perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* pp_pack.c
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
	4	* 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* He still hopefully carried some of his gear in his pack: a small tinder-box,
	13	* two small shallow pans, the smaller fitting into the larger; inside them a
	14	* wooden spoon, a short two-pronged fork and some skewers were stowed; and
	15	* hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
	16	* some salt.
	17	*
	18	* [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
	19	*/
	20
	21	/* This file contains pp ("push/pop") functions that
	22	* execute the opcodes that make up a perl program. A typical pp function
	23	* expects to find its arguments on the stack, and usually pushes its
	24	* results onto the stack, hence the 'pp' terminology. Each OP structure
	25	* contains a pointer to the relevant pp_foo() function.
	26	*
	27	* This particular file just contains pp_pack() and pp_unpack(). See the
	28	* other pp*.c files for the rest of the pp_ functions.
	29	*/
	30
	31	#include "EXTERN.h"
	32	#define PERL_IN_PP_PACK_C
	33	#include "perl.h"
	34
	35	/* Types used by pack/unpack */
	36	typedef enum {
	37	e_no_len, /* no length */
	38	e_number, /* number, [] */
	39	e_star /* asterisk */
	40	} howlen_t;
	41
	42	typedef struct tempsym {
	43	const char* patptr; /* current template char */
	44	const char* patend; /* one after last char */
	45	const char* grpbeg; /* 1st char of ()-group */
	46	const char* grpend; /* end of ()-group */
	47	I32 code; /* template code (!<>) */
	48	I32 length; /* length/repeat count */
	49	howlen_t howlen; /* how length is given */
	50	int level; /* () nesting level */
	51	U32 flags; /* /=4, comma=2, pack=1 */
	52	/* and group modifiers */
	53	STRLEN strbeg; /* offset of group start */
	54	struct tempsym previous; / previous group */
	55	} tempsym_t;
	56
	57	#define TEMPSYM_INIT(symptr, p, e, f) \
	58	STMT_START { \
	59	(symptr)->patptr = (p); \
	60	(symptr)->patend = (e); \
	61	(symptr)->grpbeg = NULL; \
	62	(symptr)->grpend = NULL; \
	63	(symptr)->grpend = NULL; \
	64	(symptr)->code = 0; \
	65	(symptr)->length = 0; \
	66	(symptr)->howlen = e_no_len; \
	67	(symptr)->level = 0; \
	68	(symptr)->flags = (f); \
	69	(symptr)->strbeg = 0; \
	70	(symptr)->previous = NULL; \
	71	} STMT_END
	72
	73	typedef union {
	74	NV nv;
	75	U8 bytes[sizeof(NV)];
	76	} NV_bytes;
	77
	78	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	79	typedef union {
	80	long double ld;
	81	U8 bytes[sizeof(long double)];
	82	} ld_bytes;
	83	#endif
	84
	85	#ifndef CHAR_BIT
	86	# define CHAR_BIT 8
	87	#endif
	88	/* Maximum number of bytes to which a byte can grow due to upgrade */
	89	#define UTF8_EXPAND 2
	90
	91	/*
	92	* Offset for integer pack/unpack.
	93	*
	94	* On architectures where I16 and I32 aren't really 16 and 32 bits,
	95	* which for now are all Crays, pack and unpack have to play games.
	96	*/
	97
	98	/*
	99	* These values are required for portability of pack() output.
	100	* If they're not right on your machine, then pack() and unpack()
	101	* wouldn't work right anyway; you'll need to apply the Cray hack.
	102	* (I'd like to check them with #if, but you can't use sizeof() in
	103	* the preprocessor.) --???
	104	*/
	105	/*
	106	The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
	107	defines are now in config.h. --Andy Dougherty April 1998
	108	*/
	109	#define SIZE16 2
	110	#define SIZE32 4
	111
	112	/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
	113	--jhi Feb 1999 */
	114
	115	#if U16SIZE > SIZE16 \|\| U32SIZE > SIZE32
	116	# if BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	117	# define OFF16(p) ((char*)(p))
	118	# define OFF32(p) ((char*)(p))
	119	# else
	120	# if BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	121	# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
	122	# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
	123	# else
	124	++++ bad cray byte order
	125	# endif
	126	# endif
	127	#else
	128	# define OFF16(p) ((char *) (p))
	129	# define OFF32(p) ((char *) (p))
	130	#endif
	131
	132	#define PUSH16(utf8, cur, p, needs_swap) \
	133	PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
	134	#define PUSH32(utf8, cur, p, needs_swap) \
	135	PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
	136
	137	#if BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	138	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
	139	#elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	140	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
	141	#else
	142	# error "Unsupported byteorder"
	143	/* Need to add code here to re-instate mixed endian support.
	144	NEEDS_SWAP would need to hold a flag indicating which action to
	145	take, and S_reverse_copy and the code in uni_to_bytes would need
	146	logic adding to deal with any mixed-endian transformations needed.
	147	*/
	148	#endif
	149
	150	/* Only to be used inside a loop (see the break) */
	151	#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
	152	STMT_START { \
	153	if (UNLIKELY(utf8)) { \
	154	if (!uni_to_bytes(aTHX_ &s, strend, \
	155	(char *) (buf), len, datumtype)) break; \
	156	} else { \
	157	if (UNLIKELY(needs_swap)) \
	158	S_reverse_copy(s, (char *) (buf), len); \
	159	else \
	160	Copy(s, (char *) (buf), len, char); \
	161	s += len; \
	162	} \
	163	} STMT_END
	164
	165	#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
	166	SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
	167
	168	#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
	169	SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
	170
	171	#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
	172	SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
	173
	174	#define PUSH_VAR(utf8, aptr, var, needs_swap) \
	175	PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
	176
	177	/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
	178	#define MAX_SUB_TEMPLATE_LEVEL 100
	179
	180	/* flags (note that type modifiers can also be used as flags!) */
	181	#define FLAG_WAS_UTF8 0x40
	182	#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
	183	#define FLAG_UNPACK_ONLY_ONE 0x10
	184	#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
	185	#define FLAG_SLASH 0x04
	186	#define FLAG_COMMA 0x02
	187	#define FLAG_PACK 0x01
	188
	189	STATIC SV *
	190	S_mul128(pTHX_ SV *sv, U8 m)
	191	{
	192	STRLEN len;
	193	char *s = SvPV(sv, len);
	194	char *t;
	195
	196	PERL_ARGS_ASSERT_MUL128;
	197
	198	if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
	199	SV * const tmpNew = newSVpvs("0000000000");
	200
	201	sv_catsv(tmpNew, sv);
	202	SvREFCNT_dec(sv); /* free old sv */
	203	sv = tmpNew;
	204	s = SvPV(sv, len);
	205	}
	206	t = s + len - 1;
	207	while (!t) / trailing '\0'? */
	208	t--;
	209	while (t > s) {
	210	const U32 i = ((*t - '0') << 7) + m;
	211	*(t--) = '0' + (char)(i % 10);
	212	m = (char)(i / 10);
	213	}
	214	return (sv);
	215	}
	216
	217	/* Explosives and implosives. */
	218
	219	#if 'I' == 73 && 'J' == 74
	220	/* On an ASCII/ISO kind of system */
	221	#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
	222	#else
	223	/*
	224	Some other sort of character set - use memchr() so we don't match
	225	the null byte.
	226	*/
	227	#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) \|\| (ch) == ' ')
	228	#endif
	229
	230	/* type modifiers */
	231	#define TYPE_IS_SHRIEKING 0x100
	232	#define TYPE_IS_BIG_ENDIAN 0x200
	233	#define TYPE_IS_LITTLE_ENDIAN 0x400
	234	#define TYPE_IS_PACK 0x800
	235	#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN\|TYPE_IS_LITTLE_ENDIAN)
	236	#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
	237	#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
	238
	239	# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
	240	# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
	241
	242	# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
	243
	244	#define PACK_SIZE_CANNOT_CSUM 0x80
	245	#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
	246	#define PACK_SIZE_MASK 0x3F
	247
	248	#include "packsizetables.c"
	249
	250	static void
	251	S_reverse_copy(const char src, char dest, STRLEN len)
	252	{
	253	dest += len;
	254	while (len--)
	255	--dest = src++;
	256	}
	257
	258	STATIC U8
	259	uni_to_byte(pTHX_ const char *s, const char end, I32 datumtype)
	260	{
	261	STRLEN retlen;
	262	UV val = utf8n_to_uvchr((U8 ) s, end-*s, &retlen,
	263	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	264	/* We try to process malformed UTF-8 as much as possible (preferably with
	265	warnings), but these two mean we make no progress in the string and
	266	might enter an infinite loop */
	267	if (retlen == (STRLEN) -1 \|\| retlen == 0)
	268	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
	269	(int) TYPE_NO_MODIFIERS(datumtype));
	270	if (val >= 0x100) {
	271	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	272	"Character in '%c' format wrapped in unpack",
	273	(int) TYPE_NO_MODIFIERS(datumtype));
	274	val &= 0xff;
	275	}
	276	*s += retlen;
	277	return (U8)val;
	278	}
	279
	280	#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
	281	uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
	282	(U8 )(s)++)
	283
	284	STATIC bool
	285	uni_to_bytes(pTHX_ const char *s, const char end, const char *buf, int buf_len, I32 datumtype)
	286	{
	287	UV val;
	288	STRLEN retlen;
	289	const char from = s;
	290	int bad = 0;
	291	const U32 flags = ckWARN(WARN_UTF8) ?
	292	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY \| UTF8_ALLOW_ANY);
	293	const bool needs_swap = NEEDS_SWAP(datumtype);
	294
	295	if (UNLIKELY(needs_swap))
	296	buf += buf_len;
	297
	298	for (;buf_len > 0; buf_len--) {
	299	if (from >= end) return FALSE;
	300	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
	301	if (retlen == (STRLEN) -1 \|\| retlen == 0) {
	302	from += UTF8SKIP(from);
	303	bad \|= 1;
	304	} else from += retlen;
	305	if (val >= 0x100) {
	306	bad \|= 2;
	307	val &= 0xff;
	308	}
	309	if (UNLIKELY(needs_swap))
	310	(U8 )--buf = (U8)val;
	311	else
	312	(U8 )buf++ = (U8)val;
	313	}
	314	/* We have enough characters for the buffer. Did we have problems ? */
	315	if (bad) {
	316	if (bad & 1) {
	317	/* Rewalk the string fragment while warning */
	318	const char *ptr;
	319	const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
	320	for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
	321	if (ptr >= end) break;
	322	utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
	323	}
	324	if (from > end) from = end;
	325	}
	326	if ((bad & 2))
	327	Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
	328	WARN_PACK : WARN_UNPACK),
	329	"Character(s) in '%c' format wrapped in %s",
	330	(int) TYPE_NO_MODIFIERS(datumtype),
	331	datumtype & TYPE_IS_PACK ? "pack" : "unpack");
	332	}
	333	*s = from;
	334	return TRUE;
	335	}
	336
	337	STATIC bool
	338	next_uni_uu(pTHX_ const char *s, const char end, I32 *out)
	339	{
	340	dVAR;
	341	STRLEN retlen;
	342	const UV val = utf8n_to_uvchr((U8 ) s, end-*s, &retlen, UTF8_CHECK_ONLY);
	343	if (val >= 0x100 \|\| !ISUUCHAR(val) \|\|
	344	retlen == (STRLEN) -1 \|\| retlen == 0) {
	345	*out = 0;
	346	return FALSE;
	347	}
	348	*out = PL_uudmap[val] & 077;
	349	*s += retlen;
	350	return TRUE;
	351	}
	352
	353	STATIC char *
	354	S_bytes_to_uni(const U8 start, STRLEN len, char dest, const bool needs_swap) {
	355	PERL_ARGS_ASSERT_BYTES_TO_UNI;
	356
	357	if (UNLIKELY(needs_swap)) {
	358	const U8 *p = start + len;
	359	while (p-- > start) {
	360	append_utf8_from_native_byte(p, (U8 *) & dest);
	361	}
	362	} else {
	363	const U8 * const end = start + len;
	364	while (start < end) {
	365	append_utf8_from_native_byte(start, (U8 *) & dest);
	366	start++;
	367	}
	368	}
	369	return dest;
	370	}
	371
	372	#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
	373	STMT_START { \
	374	if (UNLIKELY(utf8)) \
	375	(cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
	376	else { \
	377	if (UNLIKELY(needs_swap)) \
	378	S_reverse_copy((char *)(buf), cur, len); \
	379	else \
	380	Copy(buf, cur, len, char); \
	381	(cur) += (len); \
	382	} \
	383	} STMT_END
	384
	385	#define GROWING(utf8, cat, start, cur, in_len) \
	386	STMT_START { \
	387	STRLEN glen = (in_len); \
	388	if (utf8) glen *= UTF8_EXPAND; \
	389	if ((cur) + glen >= (start) + SvLEN(cat)) { \
	390	(start) = sv_exp_grow(cat, glen); \
	391	(cur) = (start) + SvCUR(cat); \
	392	} \
	393	} STMT_END
	394
	395	#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
	396	STMT_START { \
	397	const STRLEN glen = (in_len); \
	398	STRLEN gl = glen; \
	399	if (utf8) gl *= UTF8_EXPAND; \
	400	if ((cur) + gl >= (start) + SvLEN(cat)) { \
	401	*cur = '\0'; \
	402	SvCUR_set((cat), (cur) - (start)); \
	403	(start) = sv_exp_grow(cat, gl); \
	404	(cur) = (start) + SvCUR(cat); \
	405	} \
	406	PUSH_BYTES(utf8, cur, buf, glen, 0); \
	407	} STMT_END
	408
	409	#define PUSH_BYTE(utf8, s, byte) \
	410	STMT_START { \
	411	if (utf8) { \
	412	const U8 au8 = (byte); \
	413	(s) = S_bytes_to_uni(&au8, 1, (s), 0); \
	414	} else (U8 )(s)++ = (byte); \
	415	} STMT_END
	416
	417	/* Only to be used inside a loop (see the break) */
	418	#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
	419	STMT_START { \
	420	STRLEN retlen; \
	421	if (str >= end) break; \
	422	val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
	423	if (retlen == (STRLEN) -1 \|\| retlen == 0) { \
	424	*cur = '\0'; \
	425	Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
	426	} \
	427	str += retlen; \
	428	} STMT_END
	429
	430	static const char _action( const tempsym_t symptr )
	431	{
	432	return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
	433	}
	434
	435	/* Returns the sizeof() struct described by pat */
	436	STATIC I32
	437	S_measure_struct(pTHX_ tempsym_t* symptr)
	438	{
	439	I32 total = 0;
	440
	441	PERL_ARGS_ASSERT_MEASURE_STRUCT;
	442
	443	while (next_symbol(symptr)) {
	444	I32 len;
	445	int size;
	446
	447	switch (symptr->howlen) {
	448	case e_star:
	449	Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
	450	_action( symptr ) );
	451
	452	default:
	453	/* e_no_len and e_number */
	454	len = symptr->length;
	455	break;
	456	}
	457
	458	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
	459	if (!size) {
	460	int star;
	461	/* endianness doesn't influence the size of a type */
	462	switch(TYPE_NO_ENDIANNESS(symptr->code)) {
	463	default:
	464	Perl_croak(aTHX_ "Invalid type '%c' in %s",
	465	(int)TYPE_NO_MODIFIERS(symptr->code),
	466	_action( symptr ) );
	467	case '.' \| TYPE_IS_SHRIEKING:
	468	case '@' \| TYPE_IS_SHRIEKING:
	469	case '@':
	470	case '.':
	471	case '/':
	472	case 'U': /* XXXX Is it correct? */
	473	case 'w':
	474	case 'u':
	475	Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
	476	(int) TYPE_NO_MODIFIERS(symptr->code),
	477	_action( symptr ) );
	478	case '%':
	479	size = 0;
	480	break;
	481	case '(':
	482	{
	483	tempsym_t savsym = *symptr;
	484	symptr->patptr = savsym.grpbeg;
	485	symptr->patend = savsym.grpend;
	486	/* XXXX Theoretically, we need to measure many times at
	487	different positions, since the subexpression may contain
	488	alignment commands, but be not of aligned length.
	489	Need to detect this and croak(). */
	490	size = measure_struct(symptr);
	491	*symptr = savsym;
	492	break;
	493	}
	494	case 'X' \| TYPE_IS_SHRIEKING:
	495	/* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
	496	*/
	497	if (!len) /* Avoid division by 0 */
	498	len = 1;
	499	len = total % len; /* Assumed: the start is aligned. */
	500	/* FALLTHROUGH */
	501	case 'X':
	502	size = -1;
	503	if (total < len)
	504	Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
	505	break;
	506	case 'x' \| TYPE_IS_SHRIEKING:
	507	if (!len) /* Avoid division by 0 */
	508	len = 1;
	509	star = total % len; /* Assumed: the start is aligned. */
	510	if (star) /* Other portable ways? */
	511	len = len - star;
	512	else
	513	len = 0;
	514	/* FALLTHROUGH */
	515	case 'x':
	516	case 'A':
	517	case 'Z':
	518	case 'a':
	519	size = 1;
	520	break;
	521	case 'B':
	522	case 'b':
	523	len = (len + 7)/8;
	524	size = 1;
	525	break;
	526	case 'H':
	527	case 'h':
	528	len = (len + 1)/2;
	529	size = 1;
	530	break;
	531
	532	case 'P':
	533	len = 1;
	534	size = sizeof(char*);
	535	break;
	536	}
	537	}
	538	total += len * size;
	539	}
	540	return total;
	541	}
	542
	543
	544	/* locate matching closing parenthesis or bracket
	545	* returns char pointer to char after match, or NULL
	546	*/
	547	STATIC const char *
	548	S_group_end(pTHX_ const char patptr, const char patend, char ender)
	549	{
	550	PERL_ARGS_ASSERT_GROUP_END;
	551
	552	while (patptr < patend) {
	553	const char c = *patptr++;
	554
	555	if (isSPACE(c))
	556	continue;
	557	else if (c == ender)
	558	return patptr-1;
	559	else if (c == '#') {
	560	while (patptr < patend && *patptr != '\n')
	561	patptr++;
	562	continue;
	563	} else if (c == '(')
	564	patptr = group_end(patptr, patend, ')') + 1;
	565	else if (c == '[')
	566	patptr = group_end(patptr, patend, ']') + 1;
	567	}
	568	Perl_croak(aTHX_ "No group ending character '%c' found in template",
	569	ender);
	570	NOT_REACHED; /* NOTREACHED */
	571	}
	572
	573
	574	/* Convert unsigned decimal number to binary.
	575	* Expects a pointer to the first digit and address of length variable
	576	* Advances char pointer to 1st non-digit char and returns number
	577	*/
	578	STATIC const char *
	579	S_get_num(pTHX_ const char patptr, I32 lenptr )
	580	{
	581	I32 len = *patptr++ - '0';
	582
	583	PERL_ARGS_ASSERT_GET_NUM;
	584
	585	while (isDIGIT(*patptr)) {
	586	if (len >= 0x7FFFFFFF/10)
	587	Perl_croak(aTHX_ "pack/unpack repeat count overflow");
	588	len = (len * 10) + (*patptr++ - '0');
	589	}
	590	*lenptr = len;
	591	return patptr;
	592	}
	593
	594	/* The marvellous template parsing routine: Using state stored in *symptr,
	595	* locates next template code and count
	596	*/
	597	STATIC bool
	598	S_next_symbol(pTHX_ tempsym_t* symptr )
	599	{
	600	const char* patptr = symptr->patptr;
	601	const char* const patend = symptr->patend;
	602
	603	PERL_ARGS_ASSERT_NEXT_SYMBOL;
	604
	605	symptr->flags &= ~FLAG_SLASH;
	606
	607	while (patptr < patend) {
	608	if (isSPACE(*patptr))
	609	patptr++;
	610	else if (*patptr == '#') {
	611	patptr++;
	612	while (patptr < patend && *patptr != '\n')
	613	patptr++;
	614	if (patptr < patend)
	615	patptr++;
	616	} else {
	617	/* We should have found a template code */
	618	I32 code = *patptr++ & 0xFF;
	619	U32 inherited_modifiers = 0;
	620
	621	if (code == ','){ /* grandfather in commas but with a warning */
	622	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
	623	symptr->flags \|= FLAG_COMMA;
	624	Perl_warner(aTHX_ packWARN(WARN_UNPACK),
	625	"Invalid type ',' in %s", _action( symptr ) );
	626	}
	627	continue;
	628	}
	629
	630	/* for '(', skip to ')' */
	631	if (code == '(') {
	632	if( isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '[' )
	633	Perl_croak(aTHX_ "()-group starts with a count in %s",
	634	_action( symptr ) );
	635	symptr->grpbeg = patptr;
	636	patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
	637	if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
	638	Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
	639	_action( symptr ) );
	640	}
	641
	642	/* look for group modifiers to inherit */
	643	if (TYPE_ENDIANNESS(symptr->flags)) {
	644	if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
	645	inherited_modifiers \|= TYPE_ENDIANNESS(symptr->flags);
	646	}
	647
	648	/* look for modifiers */
	649	while (patptr < patend) {
	650	const char *allowed;
	651	I32 modifier;
	652	switch (*patptr) {
	653	case '!':
	654	modifier = TYPE_IS_SHRIEKING;
	655	allowed = "sSiIlLxXnNvV@.";
	656	break;
	657	case '>':
	658	modifier = TYPE_IS_BIG_ENDIAN;
	659	allowed = ENDIANNESS_ALLOWED_TYPES;
	660	break;
	661	case '<':
	662	modifier = TYPE_IS_LITTLE_ENDIAN;
	663	allowed = ENDIANNESS_ALLOWED_TYPES;
	664	break;
	665	default:
	666	allowed = "";
	667	modifier = 0;
	668	break;
	669	}
	670
	671	if (modifier == 0)
	672	break;
	673
	674	if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
	675	Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
	676	allowed, _action( symptr ) );
	677
	678	if (TYPE_ENDIANNESS(code \| modifier) == TYPE_ENDIANNESS_MASK)
	679	Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
	680	(int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
	681	else if (TYPE_ENDIANNESS(code \| modifier \| inherited_modifiers) ==
	682	TYPE_ENDIANNESS_MASK)
	683	Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
	684	*patptr, _action( symptr ) );
	685
	686	if ((code & modifier)) {
	687	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	688	"Duplicate modifier '%c' after '%c' in %s",
	689	*patptr, (int) TYPE_NO_MODIFIERS(code),
	690	_action( symptr ) );
	691	}
	692
	693	code \|= modifier;
	694	patptr++;
	695	}
	696
	697	/* inherit modifiers */
	698	code \|= inherited_modifiers;
	699
	700	/* look for count and/or / */
	701	if (patptr < patend) {
	702	if (isDIGIT(*patptr)) {
	703	patptr = get_num( patptr, &symptr->length );
	704	symptr->howlen = e_number;
	705
	706	} else if (patptr == '') {
	707	patptr++;
	708	symptr->howlen = e_star;
	709
	710	} else if (*patptr == '[') {
	711	const char* lenptr = ++patptr;
	712	symptr->howlen = e_number;
	713	patptr = group_end( patptr, patend, ']' ) + 1;
	714	/* what kind of [] is it? */
	715	if (isDIGIT(*lenptr)) {
	716	lenptr = get_num( lenptr, &symptr->length );
	717	if( *lenptr != ']' )
	718	Perl_croak(aTHX_ "Malformed integer in [] in %s",
	719	_action( symptr ) );
	720	} else {
	721	tempsym_t savsym = *symptr;
	722	symptr->patend = patptr-1;
	723	symptr->patptr = lenptr;
	724	savsym.length = measure_struct(symptr);
	725	*symptr = savsym;
	726	}
	727	} else {
	728	symptr->howlen = e_no_len;
	729	symptr->length = 1;
	730	}
	731
	732	/* try to find / */
	733	while (patptr < patend) {
	734	if (isSPACE(*patptr))
	735	patptr++;
	736	else if (*patptr == '#') {
	737	patptr++;
	738	while (patptr < patend && *patptr != '\n')
	739	patptr++;
	740	if (patptr < patend)
	741	patptr++;
	742	} else {
	743	if (*patptr == '/') {
	744	symptr->flags \|= FLAG_SLASH;
	745	patptr++;
	746	if (patptr < patend &&
	747	(isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '['))
	748	Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
	749	_action( symptr ) );
	750	}
	751	break;
	752	}
	753	}
	754	} else {
	755	/* at end - no count, no / */
	756	symptr->howlen = e_no_len;
	757	symptr->length = 1;
	758	}
	759
	760	symptr->code = code;
	761	symptr->patptr = patptr;
	762	return TRUE;
	763	}
	764	}
	765	symptr->patptr = patptr;
	766	return FALSE;
	767	}
	768
	769	/*
	770	There is no way to cleanly handle the case where we should process the
	771	string per byte in its upgraded form while it's really in downgraded form
	772	(e.g. estimates like strend-s as an upper bound for the number of
	773	characters left wouldn't work). So if we foresee the need of this
	774	(pattern starts with U or contains U0), we want to work on the encoded
	775	version of the string. Users are advised to upgrade their pack string
	776	themselves if they need to do a lot of unpacks like this on it
	777	*/
	778	STATIC bool
	779	need_utf8(const char pat, const char patend)
	780	{
	781	bool first = TRUE;
	782
	783	PERL_ARGS_ASSERT_NEED_UTF8;
	784
	785	while (pat < patend) {
	786	if (pat[0] == '#') {
	787	pat++;
	788	pat = (const char *) memchr(pat, '\n', patend-pat);
	789	if (!pat) return FALSE;
	790	} else if (pat[0] == 'U') {
	791	if (first \|\| pat[1] == '0') return TRUE;
	792	} else first = FALSE;
	793	pat++;
	794	}
	795	return FALSE;
	796	}
	797
	798	STATIC char
	799	first_symbol(const char pat, const char patend) {
	800	PERL_ARGS_ASSERT_FIRST_SYMBOL;
	801
	802	while (pat < patend) {
	803	if (pat[0] != '#') return pat[0];
	804	pat++;
	805	pat = (const char *) memchr(pat, '\n', patend-pat);
	806	if (!pat) return 0;
	807	pat++;
	808	}
	809	return 0;
	810	}
	811
	812	/*
	813
	814	=head1 Pack and Unpack
	815
	816	=for apidoc unpackstring
	817
	818	The engine implementing the unpack() Perl function.
	819
	820	Using the template pat..patend, this function unpacks the string
	821	s..strend into a number of mortal SVs, which it pushes onto the perl
	822	argument (@_) stack (so you will need to issue a C<PUTBACK> before and
	823	C<SPAGAIN> after the call to this function). It returns the number of
	824	pushed elements.
	825
	826	The strend and patend pointers should point to the byte following the last
	827	character of each string.
	828
	829	Although this function returns its values on the perl argument stack, it
	830	doesn't take any parameters from that stack (and thus in particular
	831	there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
	832	example).
	833
	834	=cut */
	835
	836	I32
	837	Perl_unpackstring(pTHX_ const char pat, const char patend, const char s, const char strend, U32 flags)
	838	{
	839	tempsym_t sym;
	840
	841	PERL_ARGS_ASSERT_UNPACKSTRING;
	842
	843	if (flags & FLAG_DO_UTF8) flags \|= FLAG_WAS_UTF8;
	844	else if (need_utf8(pat, patend)) {
	845	/* We probably should try to avoid this in case a scalar context call
	846	wouldn't get to the "U0" */
	847	STRLEN len = strend - s;
	848	s = (char ) bytes_to_utf8((U8 ) s, &len);
	849	SAVEFREEPV(s);
	850	strend = s + len;
	851	flags \|= FLAG_DO_UTF8;
	852	}
	853
	854	if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
	855	flags \|= FLAG_PARSE_UTF8;
	856
	857	TEMPSYM_INIT(&sym, pat, patend, flags);
	858
	859	return unpack_rec(&sym, s, s, strend, NULL );
	860	}
	861
	862	STATIC I32
	863	S_unpack_rec(pTHX_ tempsym_t* symptr, const char s, const char strbeg, const char strend, const char *new_s )
	864	{
	865	dVAR; dSP;
	866	SV *sv = NULL;
	867	const I32 start_sp_offset = SP - PL_stack_base;
	868	howlen_t howlen;
	869	I32 checksum = 0;
	870	UV cuv = 0;
	871	NV cdouble = 0.0;
	872	const int bits_in_uv = CHAR_BIT * sizeof(cuv);
	873	bool beyond = FALSE;
	874	bool explicit_length;
	875	const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
	876	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	877
	878	PERL_ARGS_ASSERT_UNPACK_REC;
	879
	880	symptr->strbeg = s - strbeg;
	881
	882	while (next_symbol(symptr)) {
	883	packprops_t props;
	884	I32 len;
	885	I32 datumtype = symptr->code;
	886	bool needs_swap;
	887	/* do first one only unless in list context
	888	/ is implemented by unpacking the count, then popping it from the
	889	stack, so must check that we're not in the middle of a / */
	890	if ( unpack_only_one
	891	&& (SP - PL_stack_base == start_sp_offset + 1)
	892	&& (datumtype != '/') ) /* XXX can this be omitted */
	893	break;
	894
	895	switch (howlen = symptr->howlen) {
	896	case e_star:
	897	len = strend - strbeg; /* long enough */
	898	break;
	899	default:
	900	/* e_no_len and e_number */
	901	len = symptr->length;
	902	break;
	903	}
	904
	905	explicit_length = TRUE;
	906	redo_switch:
	907	beyond = s >= strend;
	908
	909	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	910	if (props) {
	911	/* props nonzero means we can process this letter. */
	912	const long size = props & PACK_SIZE_MASK;
	913	const long howmany = (strend - s) / size;
	914	if (len > howmany)
	915	len = howmany;
	916
	917	if (!checksum \|\| (props & PACK_SIZE_CANNOT_CSUM)) {
	918	if (len && unpack_only_one) len = 1;
	919	EXTEND(SP, len);
	920	EXTEND_MORTAL(len);
	921	}
	922	}
	923
	924	needs_swap = NEEDS_SWAP(datumtype);
	925
	926	switch(TYPE_NO_ENDIANNESS(datumtype)) {
	927	default:
	928	Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
	929
	930	case '%':
	931	if (howlen == e_no_len)
	932	len = 16; /* len is not specified */
	933	checksum = len;
	934	cuv = 0;
	935	cdouble = 0;
	936	continue;
	937
	938	case '(':
	939	{
	940	tempsym_t savsym = *symptr;
	941	const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	942	symptr->flags \|= group_modifiers;
	943	symptr->patend = savsym.grpend;
	944	symptr->previous = &savsym;
	945	symptr->level++;
	946	PUTBACK;
	947	if (len && unpack_only_one) len = 1;
	948	while (len--) {
	949	symptr->patptr = savsym.grpbeg;
	950	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	951	else symptr->flags &= ~FLAG_PARSE_UTF8;
	952	unpack_rec(symptr, s, strbeg, strend, &s);
	953	if (s == strend && savsym.howlen == e_star)
	954	break; /* No way to continue */
	955	}
	956	SPAGAIN;
	957	savsym.flags = symptr->flags & ~group_modifiers;
	958	*symptr = savsym;
	959	break;
	960	}
	961	case '.' \| TYPE_IS_SHRIEKING:
	962	case '.': {
	963	const char *from;
	964	SV *sv;
	965	const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
	966	if (howlen == e_star) from = strbeg;
	967	else if (len <= 0) from = s;
	968	else {
	969	tempsym_t *group = symptr;
	970
	971	while (--len && group) group = group->previous;
	972	from = group ? strbeg + group->strbeg : strbeg;
	973	}
	974	sv = from <= s ?
	975	newSVuv( u8 ? (UV) utf8_length((const U8)from, (const U8)s) : (UV) (s-from)) :
	976	newSViv(-(u8 ? (IV) utf8_length((const U8)s, (const U8)from) : (IV) (from-s)));
	977	mXPUSHs(sv);
	978	break;
	979	}
	980	case '@' \| TYPE_IS_SHRIEKING:
	981	case '@':
	982	s = strbeg + symptr->strbeg;
	983	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	984	{
	985	while (len > 0) {
	986	if (s >= strend)
	987	Perl_croak(aTHX_ "'@' outside of string in unpack");
	988	s += UTF8SKIP(s);
	989	len--;
	990	}
	991	if (s > strend)
	992	Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
	993	} else {
	994	if (strend-s < len)
	995	Perl_croak(aTHX_ "'@' outside of string in unpack");
	996	s += len;
	997	}
	998	break;
	999	case 'X' \| TYPE_IS_SHRIEKING:
	1000	if (!len) /* Avoid division by 0 */
	1001	len = 1;
	1002	if (utf8) {
	1003	const char hop, last;
	1004	I32 l = len;
	1005	hop = last = strbeg;
	1006	while (hop < s) {
	1007	hop += UTF8SKIP(hop);
	1008	if (--l == 0) {
	1009	last = hop;
	1010	l = len;
	1011	}
	1012	}
	1013	if (last > s)
	1014	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1015	s = last;
	1016	break;
	1017	}
	1018	len = (s - strbeg) % len;
	1019	/* FALLTHROUGH */
	1020	case 'X':
	1021	if (utf8) {
	1022	while (len > 0) {
	1023	if (s <= strbeg)
	1024	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1025	while (--s, UTF8_IS_CONTINUATION(*s)) {
	1026	if (s <= strbeg)
	1027	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1028	}
	1029	len--;
	1030	}
	1031	} else {
	1032	if (len > s - strbeg)
	1033	Perl_croak(aTHX_ "'X' outside of string in unpack" );
	1034	s -= len;
	1035	}
	1036	break;
	1037	case 'x' \| TYPE_IS_SHRIEKING: {
	1038	I32 ai32;
	1039	if (!len) /* Avoid division by 0 */
	1040	len = 1;
	1041	if (utf8) ai32 = utf8_length((U8 ) strbeg, (U8 ) s) % len;
	1042	else ai32 = (s - strbeg) % len;
	1043	if (ai32 == 0) break;
	1044	len -= ai32;
	1045	}
	1046	/* FALLTHROUGH */
	1047	case 'x':
	1048	if (utf8) {
	1049	while (len>0) {
	1050	if (s >= strend)
	1051	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1052	s += UTF8SKIP(s);
	1053	len--;
	1054	}
	1055	} else {
	1056	if (len > strend - s)
	1057	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1058	s += len;
	1059	}
	1060	break;
	1061	case '/':
	1062	Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
	1063
	1064	case 'A':
	1065	case 'Z':
	1066	case 'a':
	1067	if (checksum) {
	1068	/* Preliminary length estimate is assumed done in 'W' */
	1069	if (len > strend - s) len = strend - s;
	1070	goto W_checksum;
	1071	}
	1072	if (utf8) {
	1073	I32 l;
	1074	const char *hop;
	1075	for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
	1076	if (hop >= strend) {
	1077	if (hop > strend)
	1078	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1079	break;
	1080	}
	1081	}
	1082	if (hop > strend)
	1083	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1084	len = hop - s;
	1085	} else if (len > strend - s)
	1086	len = strend - s;
	1087
	1088	if (datumtype == 'Z') {
	1089	/* 'Z' strips stuff after first null */
	1090	const char ptr, end;
	1091	end = s + len;
	1092	for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
	1093	sv = newSVpvn(s, ptr-s);
	1094	if (howlen == e_star) /* exact for 'Z' /
	1095	len = ptr-s + (ptr != strend ? 1 : 0);
	1096	} else if (datumtype == 'A') {
	1097	/* 'A' strips both nulls and spaces */
	1098	const char *ptr;
	1099	if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
	1100	for (ptr = s+len-1; ptr >= s; ptr--)
	1101	if (ptr != 0 && !UTF8_IS_CONTINUATION(ptr) &&
	1102	!isSPACE_utf8(ptr)) break;
	1103	if (ptr >= s) ptr += UTF8SKIP(ptr);
	1104	else ptr++;
	1105	if (ptr > s+len)
	1106	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1107	} else {
	1108	for (ptr = s+len-1; ptr >= s; ptr--)
	1109	if (ptr != 0 && !isSPACE(ptr)) break;
	1110	ptr++;
	1111	}
	1112	sv = newSVpvn(s, ptr-s);
	1113	} else sv = newSVpvn(s, len);
	1114
	1115	if (utf8) {
	1116	SvUTF8_on(sv);
	1117	/* Undo any upgrade done due to need_utf8() */
	1118	if (!(symptr->flags & FLAG_WAS_UTF8))
	1119	sv_utf8_downgrade(sv, 0);
	1120	}
	1121	mXPUSHs(sv);
	1122	s += len;
	1123	break;
	1124	case 'B':
	1125	case 'b': {
	1126	char *str;
	1127	if (howlen == e_star \|\| len > (strend - s) * 8)
	1128	len = (strend - s) * 8;
	1129	if (checksum) {
	1130	if (utf8)
	1131	while (len >= 8 && s < strend) {
	1132	cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
	1133	len -= 8;
	1134	}
	1135	else
	1136	while (len >= 8) {
	1137	cuv += PL_bitcount[(U8 )s++];
	1138	len -= 8;
	1139	}
	1140	if (len && s < strend) {
	1141	U8 bits;
	1142	bits = SHIFT_BYTE(utf8, s, strend, datumtype);
	1143	if (datumtype == 'b')
	1144	while (len-- > 0) {
	1145	if (bits & 1) cuv++;
	1146	bits >>= 1;
	1147	}
	1148	else
	1149	while (len-- > 0) {
	1150	if (bits & 0x80) cuv++;
	1151	bits <<= 1;
	1152	}
	1153	}
	1154	break;
	1155	}
	1156
	1157	sv = sv_2mortal(newSV(len ? len : 1));
	1158	SvPOK_on(sv);
	1159	str = SvPVX(sv);
	1160	if (datumtype == 'b') {
	1161	U8 bits = 0;
	1162	const I32 ai32 = len;
	1163	for (len = 0; len < ai32; len++) {
	1164	if (len & 7) bits >>= 1;
	1165	else if (utf8) {
	1166	if (s >= strend) break;
	1167	bits = uni_to_byte(aTHX_ &s, strend, datumtype);
	1168	} else bits = (U8 ) s++;
	1169	*str++ = bits & 1 ? '1' : '0';
	1170	}
	1171	} else {
	1172	U8 bits = 0;
	1173	const I32 ai32 = len;
	1174	for (len = 0; len < ai32; len++) {
	1175	if (len & 7) bits <<= 1;
	1176	else if (utf8) {
	1177	if (s >= strend) break;
	1178	bits = uni_to_byte(aTHX_ &s, strend, datumtype);
	1179	} else bits = (U8 ) s++;
	1180	*str++ = bits & 0x80 ? '1' : '0';
	1181	}
	1182	}
	1183	*str = '\0';
	1184	SvCUR_set(sv, str - SvPVX_const(sv));
	1185	XPUSHs(sv);
	1186	break;
	1187	}
	1188	case 'H':
	1189	case 'h': {
	1190	char *str = NULL;
	1191	/* Preliminary length estimate, acceptable for utf8 too */
	1192	if (howlen == e_star \|\| len > (strend - s) * 2)
	1193	len = (strend - s) * 2;
	1194	if (!checksum) {
	1195	sv = sv_2mortal(newSV(len ? len : 1));
	1196	SvPOK_on(sv);
	1197	str = SvPVX(sv);
	1198	}
	1199	if (datumtype == 'h') {
	1200	U8 bits = 0;
	1201	I32 ai32 = len;
	1202	for (len = 0; len < ai32; len++) {
	1203	if (len & 1) bits >>= 4;
	1204	else if (utf8) {
	1205	if (s >= strend) break;
	1206	bits = uni_to_byte(aTHX_ &s, strend, datumtype);
	1207	} else bits = * (U8 *) s++;
	1208	if (!checksum)
	1209	*str++ = PL_hexdigit[bits & 15];
	1210	}
	1211	} else {
	1212	U8 bits = 0;
	1213	const I32 ai32 = len;
	1214	for (len = 0; len < ai32; len++) {
	1215	if (len & 1) bits <<= 4;
	1216	else if (utf8) {
	1217	if (s >= strend) break;
	1218	bits = uni_to_byte(aTHX_ &s, strend, datumtype);
	1219	} else bits = (U8 ) s++;
	1220	if (!checksum)
	1221	*str++ = PL_hexdigit[(bits >> 4) & 15];
	1222	}
	1223	}
	1224	if (!checksum) {
	1225	*str = '\0';
	1226	SvCUR_set(sv, str - SvPVX_const(sv));
	1227	XPUSHs(sv);
	1228	}
	1229	break;
	1230	}
	1231	case 'C':
	1232	if (len == 0) {
	1233	if (explicit_length)
	1234	/* Switch to "character" mode */
	1235	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	1236	break;
	1237	}
	1238	/* FALLTHROUGH */
	1239	case 'c':
	1240	while (len-- > 0 && s < strend) {
	1241	int aint;
	1242	if (utf8)
	1243	{
	1244	STRLEN retlen;
	1245	aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1246	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1247	if (retlen == (STRLEN) -1 \|\| retlen == 0)
	1248	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1249	s += retlen;
	1250	}
	1251	else
	1252	aint = (U8 )(s)++;
	1253	if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
	1254	aint -= 256;
	1255	if (!checksum)
	1256	mPUSHi(aint);
	1257	else if (checksum > bits_in_uv)
	1258	cdouble += (NV)aint;
	1259	else
	1260	cuv += aint;
	1261	}
	1262	break;
	1263	case 'W':
	1264	W_checksum:
	1265	if (utf8) {
	1266	while (len-- > 0 && s < strend) {
	1267	STRLEN retlen;
	1268	const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1269	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1270	if (retlen == (STRLEN) -1 \|\| retlen == 0)
	1271	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1272	s += retlen;
	1273	if (!checksum)
	1274	mPUSHu(val);
	1275	else if (checksum > bits_in_uv)
	1276	cdouble += (NV) val;
	1277	else
	1278	cuv += val;
	1279	}
	1280	} else if (!checksum)
	1281	while (len-- > 0) {
	1282	const U8 ch = (U8 ) s++;
	1283	mPUSHu(ch);
	1284	}
	1285	else if (checksum > bits_in_uv)
	1286	while (len-- > 0) cdouble += (NV) (U8 ) s++;
	1287	else
	1288	while (len-- > 0) cuv += (U8 ) s++;
	1289	break;
	1290	case 'U':
	1291	if (len == 0) {
	1292	if (explicit_length && howlen != e_star) {
	1293	/* Switch to "bytes in UTF-8" mode */
	1294	if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
	1295	else
	1296	/* Should be impossible due to the need_utf8() test */
	1297	Perl_croak(aTHX_ "U0 mode on a byte string");
	1298	}
	1299	break;
	1300	}
	1301	if (len > strend - s) len = strend - s;
	1302	if (!checksum) {
	1303	if (len && unpack_only_one) len = 1;
	1304	EXTEND(SP, len);
	1305	EXTEND_MORTAL(len);
	1306	}
	1307	while (len-- > 0 && s < strend) {
	1308	STRLEN retlen;
	1309	UV auv;
	1310	if (utf8) {
	1311	U8 result[UTF8_MAXLEN];
	1312	const char *ptr = s;
	1313	STRLEN len;
	1314	/* Bug: warns about bad utf8 even if we are short on bytes
	1315	and will break out of the loop */
	1316	if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
	1317	'U'))
	1318	break;
	1319	len = UTF8SKIP(result);
	1320	if (!uni_to_bytes(aTHX_ &ptr, strend,
	1321	(char *) &result[1], len-1, 'U')) break;
	1322	auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
	1323	s = ptr;
	1324	} else {
	1325	auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
	1326	if (retlen == (STRLEN) -1 \|\| retlen == 0)
	1327	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1328	s += retlen;
	1329	}
	1330	if (!checksum)
	1331	mPUSHu(auv);
	1332	else if (checksum > bits_in_uv)
	1333	cdouble += (NV) auv;
	1334	else
	1335	cuv += auv;
	1336	}
	1337	break;
	1338	case 's' \| TYPE_IS_SHRIEKING:
	1339	#if SHORTSIZE != SIZE16
	1340	while (len-- > 0) {
	1341	short ashort;
	1342	SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
	1343	if (!checksum)
	1344	mPUSHi(ashort);
	1345	else if (checksum > bits_in_uv)
	1346	cdouble += (NV)ashort;
	1347	else
	1348	cuv += ashort;
	1349	}
	1350	break;
	1351	#else
	1352	/* FALLTHROUGH */
	1353	#endif
	1354	case 's':
	1355	while (len-- > 0) {
	1356	I16 ai16;
	1357
	1358	#if U16SIZE > SIZE16
	1359	ai16 = 0;
	1360	#endif
	1361	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1362	#if U16SIZE > SIZE16
	1363	if (ai16 > 32767)
	1364	ai16 -= 65536;
	1365	#endif
	1366	if (!checksum)
	1367	mPUSHi(ai16);
	1368	else if (checksum > bits_in_uv)
	1369	cdouble += (NV)ai16;
	1370	else
	1371	cuv += ai16;
	1372	}
	1373	break;
	1374	case 'S' \| TYPE_IS_SHRIEKING:
	1375	#if SHORTSIZE != SIZE16
	1376	while (len-- > 0) {
	1377	unsigned short aushort;
	1378	SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
	1379	needs_swap);
	1380	if (!checksum)
	1381	mPUSHu(aushort);
	1382	else if (checksum > bits_in_uv)
	1383	cdouble += (NV)aushort;
	1384	else
	1385	cuv += aushort;
	1386	}
	1387	break;
	1388	#else
	1389	/* FALLTHROUGH */
	1390	#endif
	1391	case 'v':
	1392	case 'n':
	1393	case 'S':
	1394	while (len-- > 0) {
	1395	U16 au16;
	1396	#if U16SIZE > SIZE16
	1397	au16 = 0;
	1398	#endif
	1399	SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
	1400	if (datumtype == 'n')
	1401	au16 = PerlSock_ntohs(au16);
	1402	if (datumtype == 'v')
	1403	au16 = vtohs(au16);
	1404	if (!checksum)
	1405	mPUSHu(au16);
	1406	else if (checksum > bits_in_uv)
	1407	cdouble += (NV) au16;
	1408	else
	1409	cuv += au16;
	1410	}
	1411	break;
	1412	case 'v' \| TYPE_IS_SHRIEKING:
	1413	case 'n' \| TYPE_IS_SHRIEKING:
	1414	while (len-- > 0) {
	1415	I16 ai16;
	1416	# if U16SIZE > SIZE16
	1417	ai16 = 0;
	1418	# endif
	1419	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1420	/* There should never be any byte-swapping here. */
	1421	assert(!TYPE_ENDIANNESS(datumtype));
	1422	if (datumtype == ('n' \| TYPE_IS_SHRIEKING))
	1423	ai16 = (I16) PerlSock_ntohs((U16) ai16);
	1424	if (datumtype == ('v' \| TYPE_IS_SHRIEKING))
	1425	ai16 = (I16) vtohs((U16) ai16);
	1426	if (!checksum)
	1427	mPUSHi(ai16);
	1428	else if (checksum > bits_in_uv)
	1429	cdouble += (NV) ai16;
	1430	else
	1431	cuv += ai16;
	1432	}
	1433	break;
	1434	case 'i':
	1435	case 'i' \| TYPE_IS_SHRIEKING:
	1436	while (len-- > 0) {
	1437	int aint;
	1438	SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
	1439	if (!checksum)
	1440	mPUSHi(aint);
	1441	else if (checksum > bits_in_uv)
	1442	cdouble += (NV)aint;
	1443	else
	1444	cuv += aint;
	1445	}
	1446	break;
	1447	case 'I':
	1448	case 'I' \| TYPE_IS_SHRIEKING:
	1449	while (len-- > 0) {
	1450	unsigned int auint;
	1451	SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
	1452	if (!checksum)
	1453	mPUSHu(auint);
	1454	else if (checksum > bits_in_uv)
	1455	cdouble += (NV)auint;
	1456	else
	1457	cuv += auint;
	1458	}
	1459	break;
	1460	case 'j':
	1461	while (len-- > 0) {
	1462	IV aiv;
	1463	SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
	1464	if (!checksum)
	1465	mPUSHi(aiv);
	1466	else if (checksum > bits_in_uv)
	1467	cdouble += (NV)aiv;
	1468	else
	1469	cuv += aiv;
	1470	}
	1471	break;
	1472	case 'J':
	1473	while (len-- > 0) {
	1474	UV auv;
	1475	SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
	1476	if (!checksum)
	1477	mPUSHu(auv);
	1478	else if (checksum > bits_in_uv)
	1479	cdouble += (NV)auv;
	1480	else
	1481	cuv += auv;
	1482	}
	1483	break;
	1484	case 'l' \| TYPE_IS_SHRIEKING:
	1485	#if LONGSIZE != SIZE32
	1486	while (len-- > 0) {
	1487	long along;
	1488	SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
	1489	if (!checksum)
	1490	mPUSHi(along);
	1491	else if (checksum > bits_in_uv)
	1492	cdouble += (NV)along;
	1493	else
	1494	cuv += along;
	1495	}
	1496	break;
	1497	#else
	1498	/* FALLTHROUGH */
	1499	#endif
	1500	case 'l':
	1501	while (len-- > 0) {
	1502	I32 ai32;
	1503	#if U32SIZE > SIZE32
	1504	ai32 = 0;
	1505	#endif
	1506	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1507	#if U32SIZE > SIZE32
	1508	if (ai32 > 2147483647) ai32 -= 4294967296;
	1509	#endif
	1510	if (!checksum)
	1511	mPUSHi(ai32);
	1512	else if (checksum > bits_in_uv)
	1513	cdouble += (NV)ai32;
	1514	else
	1515	cuv += ai32;
	1516	}
	1517	break;
	1518	case 'L' \| TYPE_IS_SHRIEKING:
	1519	#if LONGSIZE != SIZE32
	1520	while (len-- > 0) {
	1521	unsigned long aulong;
	1522	SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
	1523	if (!checksum)
	1524	mPUSHu(aulong);
	1525	else if (checksum > bits_in_uv)
	1526	cdouble += (NV)aulong;
	1527	else
	1528	cuv += aulong;
	1529	}
	1530	break;
	1531	#else
	1532	/* FALLTHROUGH */
	1533	#endif
	1534	case 'V':
	1535	case 'N':
	1536	case 'L':
	1537	while (len-- > 0) {
	1538	U32 au32;
	1539	#if U32SIZE > SIZE32
	1540	au32 = 0;
	1541	#endif
	1542	SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
	1543	if (datumtype == 'N')
	1544	au32 = PerlSock_ntohl(au32);
	1545	if (datumtype == 'V')
	1546	au32 = vtohl(au32);
	1547	if (!checksum)
	1548	mPUSHu(au32);
	1549	else if (checksum > bits_in_uv)
	1550	cdouble += (NV)au32;
	1551	else
	1552	cuv += au32;
	1553	}
	1554	break;
	1555	case 'V' \| TYPE_IS_SHRIEKING:
	1556	case 'N' \| TYPE_IS_SHRIEKING:
	1557	while (len-- > 0) {
	1558	I32 ai32;
	1559	#if U32SIZE > SIZE32
	1560	ai32 = 0;
	1561	#endif
	1562	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1563	/* There should never be any byte swapping here. */
	1564	assert(!TYPE_ENDIANNESS(datumtype));
	1565	if (datumtype == ('N' \| TYPE_IS_SHRIEKING))
	1566	ai32 = (I32)PerlSock_ntohl((U32)ai32);
	1567	if (datumtype == ('V' \| TYPE_IS_SHRIEKING))
	1568	ai32 = (I32)vtohl((U32)ai32);
	1569	if (!checksum)
	1570	mPUSHi(ai32);
	1571	else if (checksum > bits_in_uv)
	1572	cdouble += (NV)ai32;
	1573	else
	1574	cuv += ai32;
	1575	}
	1576	break;
	1577	case 'p':
	1578	while (len-- > 0) {
	1579	const char *aptr;
	1580	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1581	/* newSVpv generates undef if aptr is NULL */
	1582	mPUSHs(newSVpv(aptr, 0));
	1583	}
	1584	break;
	1585	case 'w':
	1586	{
	1587	UV auv = 0;
	1588	U32 bytes = 0;
	1589
	1590	while (len > 0 && s < strend) {
	1591	U8 ch;
	1592	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1593	auv = (auv << 7) \| (ch & 0x7f);
	1594	/* UTF8_IS_XXXXX not right here - using constant 0x80 */
	1595	if (ch < 0x80) {
	1596	bytes = 0;
	1597	mPUSHu(auv);
	1598	len--;
	1599	auv = 0;
	1600	continue;
	1601	}
	1602	if (++bytes >= sizeof(UV)) { /* promote to string */
	1603	const char *t;
	1604
	1605	sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
	1606	while (s < strend) {
	1607	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1608	sv = mul128(sv, (U8)(ch & 0x7f));
	1609	if (!(ch & 0x80)) {
	1610	bytes = 0;
	1611	break;
	1612	}
	1613	}
	1614	t = SvPV_nolen_const(sv);
	1615	while (*t == '0')
	1616	t++;
	1617	sv_chop(sv, t);
	1618	mPUSHs(sv);
	1619	len--;
	1620	auv = 0;
	1621	}
	1622	}
	1623	if ((s >= strend) && bytes)
	1624	Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
	1625	}
	1626	break;
	1627	case 'P':
	1628	if (symptr->howlen == e_star)
	1629	Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
	1630	EXTEND(SP, 1);
	1631	if (s + sizeof(char*) <= strend) {
	1632	char *aptr;
	1633	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1634	/* newSVpvn generates undef if aptr is NULL */
	1635	PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
	1636	}
	1637	break;
	1638	#if defined(HAS_QUAD) && IVSIZE >= 8
	1639	case 'q':
	1640	while (len-- > 0) {
	1641	Quad_t aquad;
	1642	SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
	1643	if (!checksum)
	1644	mPUSHs(newSViv((IV)aquad));
	1645	else if (checksum > bits_in_uv)
	1646	cdouble += (NV)aquad;
	1647	else
	1648	cuv += aquad;
	1649	}
	1650	break;
	1651	case 'Q':
	1652	while (len-- > 0) {
	1653	Uquad_t auquad;
	1654	SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
	1655	if (!checksum)
	1656	mPUSHs(newSVuv((UV)auquad));
	1657	else if (checksum > bits_in_uv)
	1658	cdouble += (NV)auquad;
	1659	else
	1660	cuv += auquad;
	1661	}
	1662	break;
	1663	#endif
	1664	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
	1665	case 'f':
	1666	while (len-- > 0) {
	1667	float afloat;
	1668	SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
	1669	if (!checksum)
	1670	mPUSHn(afloat);
	1671	else
	1672	cdouble += afloat;
	1673	}
	1674	break;
	1675	case 'd':
	1676	while (len-- > 0) {
	1677	double adouble;
	1678	SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
	1679	if (!checksum)
	1680	mPUSHn(adouble);
	1681	else
	1682	cdouble += adouble;
	1683	}
	1684	break;
	1685	case 'F':
	1686	while (len-- > 0) {
	1687	NV_bytes anv;
	1688	SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
	1689	datumtype, needs_swap);
	1690	if (!checksum)
	1691	mPUSHn(anv.nv);
	1692	else
	1693	cdouble += anv.nv;
	1694	}
	1695	break;
	1696	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	1697	case 'D':
	1698	while (len-- > 0) {
	1699	ld_bytes aldouble;
	1700	SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
	1701	sizeof(aldouble.bytes), datumtype, needs_swap);
	1702	if (!checksum)
	1703	mPUSHn(aldouble.ld);
	1704	else
	1705	cdouble += aldouble.ld;
	1706	}
	1707	break;
	1708	#endif
	1709	case 'u':
	1710	if (!checksum) {
	1711	const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
	1712	sv = sv_2mortal(newSV(l));
	1713	if (l) SvPOK_on(sv);
	1714	}
	1715	if (utf8) {
	1716	while (next_uni_uu(aTHX_ &s, strend, &len)) {
	1717	I32 a, b, c, d;
	1718	char hunk[3];
	1719
	1720	while (len > 0) {
	1721	next_uni_uu(aTHX_ &s, strend, &a);
	1722	next_uni_uu(aTHX_ &s, strend, &b);
	1723	next_uni_uu(aTHX_ &s, strend, &c);
	1724	next_uni_uu(aTHX_ &s, strend, &d);
	1725	hunk[0] = (char)((a << 2) \| (b >> 4));
	1726	hunk[1] = (char)((b << 4) \| (c >> 2));
	1727	hunk[2] = (char)((c << 6) \| d);
	1728	if (!checksum)
	1729	sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
	1730	len -= 3;
	1731	}
	1732	if (s < strend) {
	1733	if (*s == '\n') {
	1734	s++;
	1735	}
	1736	else {
	1737	/* possible checksum byte */
	1738	const char *skip = s+UTF8SKIP(s);
	1739	if (skip < strend && *skip == '\n')
	1740	s = skip+1;
	1741	}
	1742	}
	1743	}
	1744	} else {
	1745	while (s < strend && s > ' ' && ISUUCHAR(s)) {
	1746	I32 a, b, c, d;
	1747	char hunk[3];
	1748
	1749	len = PL_uudmap[(U8)s++] & 077;
	1750	while (len > 0) {
	1751	if (s < strend && ISUUCHAR(*s))
	1752	a = PL_uudmap[(U8)s++] & 077;
	1753	else
	1754	a = 0;
	1755	if (s < strend && ISUUCHAR(*s))
	1756	b = PL_uudmap[(U8)s++] & 077;
	1757	else
	1758	b = 0;
	1759	if (s < strend && ISUUCHAR(*s))
	1760	c = PL_uudmap[(U8)s++] & 077;
	1761	else
	1762	c = 0;
	1763	if (s < strend && ISUUCHAR(*s))
	1764	d = PL_uudmap[(U8)s++] & 077;
	1765	else
	1766	d = 0;
	1767	hunk[0] = (char)((a << 2) \| (b >> 4));
	1768	hunk[1] = (char)((b << 4) \| (c >> 2));
	1769	hunk[2] = (char)((c << 6) \| d);
	1770	if (!checksum)
	1771	sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
	1772	len -= 3;
	1773	}
	1774	if (*s == '\n')
	1775	s++;
	1776	else /* possible checksum byte */
	1777	if (s + 1 < strend && s[1] == '\n')
	1778	s += 2;
	1779	}
	1780	}
	1781	if (!checksum)
	1782	XPUSHs(sv);
	1783	break;
	1784	}
	1785
	1786	if (checksum) {
	1787	if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) \|\|
	1788	(checksum > bits_in_uv &&
	1789	strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
	1790	NV trouble, anv;
	1791
	1792	anv = (NV) (1 << (checksum & 15));
	1793	while (checksum >= 16) {
	1794	checksum -= 16;
	1795	anv *= 65536.0;
	1796	}
	1797	while (cdouble < 0.0)
	1798	cdouble += anv;
	1799	cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
	1800	sv = newSVnv(cdouble);
	1801	}
	1802	else {
	1803	if (checksum < bits_in_uv) {
	1804	UV mask = ((UV)1 << checksum) - 1;
	1805	cuv &= mask;
	1806	}
	1807	sv = newSVuv(cuv);
	1808	}
	1809	mXPUSHs(sv);
	1810	checksum = 0;
	1811	}
	1812
	1813	if (symptr->flags & FLAG_SLASH){
	1814	if (SP - PL_stack_base - start_sp_offset <= 0)
	1815	break;
	1816	if( next_symbol(symptr) ){
	1817	if( symptr->howlen == e_number )
	1818	Perl_croak(aTHX_ "Count after length/code in unpack" );
	1819	if( beyond ){
	1820	/* ...end of char buffer then no decent length available */
	1821	Perl_croak(aTHX_ "length/code after end of string in unpack" );
	1822	} else {
	1823	/* take top of stack (hope it's numeric) */
	1824	len = POPi;
	1825	if( len < 0 )
	1826	Perl_croak(aTHX_ "Negative '/' count in unpack" );
	1827	}
	1828	} else {
	1829	Perl_croak(aTHX_ "Code missing after '/' in unpack" );
	1830	}
	1831	datumtype = symptr->code;
	1832	explicit_length = FALSE;
	1833	goto redo_switch;
	1834	}
	1835	}
	1836
	1837	if (new_s)
	1838	*new_s = s;
	1839	PUTBACK;
	1840	return SP - PL_stack_base - start_sp_offset;
	1841	}
	1842
	1843	PP(pp_unpack)
	1844	{
	1845	dVAR;
	1846	dSP;
	1847	dPOPPOPssrl;
	1848	I32 gimme = GIMME_V;
	1849	STRLEN llen;
	1850	STRLEN rlen;
	1851	const char *pat = SvPV_const(left, llen);
	1852	const char *s = SvPV_const(right, rlen);
	1853	const char *strend = s + rlen;
	1854	const char *patend = pat + llen;
	1855	I32 cnt;
	1856
	1857	PUTBACK;
	1858	cnt = unpackstring(pat, patend, s, strend,
	1859	((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
	1860	\| (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
	1861
	1862	SPAGAIN;
	1863	if ( !cnt && gimme == G_SCALAR )
	1864	PUSHs(&PL_sv_undef);
	1865	RETURN;
	1866	}
	1867
	1868	STATIC U8 *
	1869	doencodes(U8 h, const char s, I32 len)
	1870	{
	1871	*h++ = PL_uuemap[len];
	1872	while (len > 2) {
	1873	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1874	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((s[1] >> 4) & 017)))];
	1875	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) \| ((s[2] >> 6) & 03)))];
	1876	*h++ = PL_uuemap[(077 & (s[2] & 077))];
	1877	s += 3;
	1878	len -= 3;
	1879	}
	1880	if (len > 0) {
	1881	const char r = (len > 1 ? s[1] : '\0');
	1882	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1883	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((r >> 4) & 017)))];
	1884	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
	1885	*h++ = PL_uuemap[0];
	1886	}
	1887	*h++ = '\n';
	1888	return h;
	1889	}
	1890
	1891	STATIC SV *
	1892	S_is_an_int(pTHX_ const char *s, STRLEN l)
	1893	{
	1894	SV *result = newSVpvn(s, l);
	1895	char const result_c = SvPV_nolen(result); / convenience */
	1896	char *out = result_c;
	1897	bool skip = 1;
	1898	bool ignore = 0;
	1899
	1900	PERL_ARGS_ASSERT_IS_AN_INT;
	1901
	1902	while (*s) {
	1903	switch (*s) {
	1904	case ' ':
	1905	break;
	1906	case '+':
	1907	if (!skip) {
	1908	SvREFCNT_dec(result);
	1909	return (NULL);
	1910	}
	1911	break;
	1912	case '0':
	1913	case '1':
	1914	case '2':
	1915	case '3':
	1916	case '4':
	1917	case '5':
	1918	case '6':
	1919	case '7':
	1920	case '8':
	1921	case '9':
	1922	skip = 0;
	1923	if (!ignore) {
	1924	(out++) = s;
	1925	}
	1926	break;
	1927	case '.':
	1928	ignore = 1;
	1929	break;
	1930	default:
	1931	SvREFCNT_dec(result);
	1932	return (NULL);
	1933	}
	1934	s++;
	1935	}
	1936	*(out++) = '\0';
	1937	SvCUR_set(result, out - result_c);
	1938	return (result);
	1939	}
	1940
	1941	/* pnum must be '\0' terminated */
	1942	STATIC int
	1943	S_div128(pTHX_ SV pnum, bool done)
	1944	{
	1945	STRLEN len;
	1946	char * const s = SvPV(pnum, len);
	1947	char *t = s;
	1948	int m = 0;
	1949
	1950	PERL_ARGS_ASSERT_DIV128;
	1951
	1952	*done = 1;
	1953	while (*t) {
	1954	const int i = m * 10 + (*t - '0');
	1955	const int r = (i >> 7); /* r < 10 */
	1956	m = i & 0x7F;
	1957	if (r) {
	1958	*done = 0;
	1959	}
	1960	*(t++) = '0' + r;
	1961	}
	1962	*(t++) = '\0';
	1963	SvCUR_set(pnum, (STRLEN) (t - s));
	1964	return (m);
	1965	}
	1966
	1967	/*
	1968	=for apidoc packlist
	1969
	1970	The engine implementing pack() Perl function.
	1971
	1972	=cut
	1973	*/
	1974
	1975	void
	1976	Perl_packlist(pTHX_ SV cat, const char pat, const char patend, SV beglist, SV *endlist )
	1977	{
	1978	dVAR;
	1979	tempsym_t sym;
	1980
	1981	PERL_ARGS_ASSERT_PACKLIST;
	1982
	1983	TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
	1984
	1985	/* We're going to do changes through SvPVX(cat). Make sure it's valid.
	1986	Also make sure any UTF8 flag is loaded */
	1987	SvPV_force_nolen(cat);
	1988	if (DO_UTF8(cat))
	1989	sym.flags \|= FLAG_PARSE_UTF8 \| FLAG_DO_UTF8;
	1990
	1991	(void)pack_rec( cat, &sym, beglist, endlist );
	1992	}
	1993
	1994	/* like sv_utf8_upgrade, but also repoint the group start markers */
	1995	STATIC void
	1996	marked_upgrade(pTHX_ SV sv, tempsym_t sym_ptr) {
	1997	STRLEN len;
	1998	tempsym_t *group;
	1999	const char from_ptr, from_start, from_end, marks, *m;
	2000	char to_start, to_ptr;
	2001
	2002	if (SvUTF8(sv)) return;
	2003
	2004	from_start = SvPVX_const(sv);
	2005	from_end = from_start + SvCUR(sv);
	2006	for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
	2007	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
	2008	if (from_ptr == from_end) {
	2009	/* Simple case: no character needs to be changed */
	2010	SvUTF8_on(sv);
	2011	return;
	2012	}
	2013
	2014	len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
	2015	Newx(to_start, len, char);
	2016	Copy(from_start, to_start, from_ptr-from_start, char);
	2017	to_ptr = to_start + (from_ptr-from_start);
	2018
	2019	Newx(marks, sym_ptr->level+2, const char *);
	2020	for (group=sym_ptr; group; group = group->previous)
	2021	marks[group->level] = from_start + group->strbeg;
	2022	marks[sym_ptr->level+1] = from_end+1;
	2023	for (m = marks; *m < from_ptr; m++)
	2024	m = to_start + (m-from_start);
	2025
	2026	for (;from_ptr < from_end; from_ptr++) {
	2027	while (m == from_ptr) m++ = to_ptr;
	2028	to_ptr = (char ) uvchr_to_utf8((U8 ) to_ptr, (U8 ) from_ptr);
	2029	}
	2030	*to_ptr = 0;
	2031
	2032	while (m == from_ptr) m++ = to_ptr;
	2033	if (m != marks + sym_ptr->level+1) {
	2034	Safefree(marks);
	2035	Safefree(to_start);
	2036	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
	2037	"level=%d", m, marks, sym_ptr->level);
	2038	}
	2039	for (group=sym_ptr; group; group = group->previous)
	2040	group->strbeg = marks[group->level] - to_start;
	2041	Safefree(marks);
	2042
	2043	if (SvOOK(sv)) {
	2044	if (SvIVX(sv)) {
	2045	SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
	2046	from_start -= SvIVX(sv);
	2047	SvIV_set(sv, 0);
	2048	}
	2049	SvFLAGS(sv) &= ~SVf_OOK;
	2050	}
	2051	if (SvLEN(sv) != 0)
	2052	Safefree(from_start);
	2053	SvPV_set(sv, to_start);
	2054	SvCUR_set(sv, to_ptr - to_start);
	2055	SvLEN_set(sv, len);
	2056	SvUTF8_on(sv);
	2057	}
	2058
	2059	/* Exponential string grower. Makes string extension effectively O(n)
	2060	needed says how many extra bytes we need (not counting the final '\0')
	2061	Only grows the string if there is an actual lack of space
	2062	*/
	2063	STATIC char *
	2064	S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
	2065	const STRLEN cur = SvCUR(sv);
	2066	const STRLEN len = SvLEN(sv);
	2067	STRLEN extend;
	2068
	2069	PERL_ARGS_ASSERT_SV_EXP_GROW;
	2070
	2071	if (len - cur > needed) return SvPVX(sv);
	2072	extend = needed > len ? needed : len;
	2073	return SvGROW(sv, len+extend+1);
	2074	}
	2075
	2076	STATIC
	2077	SV **
	2078	S_pack_rec(pTHX_ SV cat, tempsym_t symptr, SV beglist, SV endlist )
	2079	{
	2080	dVAR;
	2081	tempsym_t lookahead;
	2082	I32 items = endlist - beglist;
	2083	bool found = next_symbol(symptr);
	2084	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	2085	bool warn_utf8 = ckWARN(WARN_UTF8);
	2086	char* from;
	2087
	2088	PERL_ARGS_ASSERT_PACK_REC;
	2089
	2090	if (symptr->level == 0 && found && symptr->code == 'U') {
	2091	marked_upgrade(aTHX_ cat, symptr);
	2092	symptr->flags \|= FLAG_DO_UTF8;
	2093	utf8 = 0;
	2094	}
	2095	symptr->strbeg = SvCUR(cat);
	2096
	2097	while (found) {
	2098	SV *fromstr;
	2099	STRLEN fromlen;
	2100	I32 len;
	2101	SV *lengthcode = NULL;
	2102	I32 datumtype = symptr->code;
	2103	howlen_t howlen = symptr->howlen;
	2104	char *start = SvPVX(cat);
	2105	char *cur = start + SvCUR(cat);
	2106	bool needs_swap;
	2107
	2108	#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
	2109
	2110	switch (howlen) {
	2111	case e_star:
	2112	len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
	2113	0 : items;
	2114	break;
	2115	default:
	2116	/* e_no_len and e_number */
	2117	len = symptr->length;
	2118	break;
	2119	}
	2120
	2121	if (len) {
	2122	packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	2123
	2124	if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
	2125	/* We can process this letter. */
	2126	STRLEN size = props & PACK_SIZE_MASK;
	2127	GROWING(utf8, cat, start, cur, (STRLEN) len * size);
	2128	}
	2129	}
	2130
	2131	/* Look ahead for next symbol. Do we have code/code? */
	2132	lookahead = *symptr;
	2133	found = next_symbol(&lookahead);
	2134	if (symptr->flags & FLAG_SLASH) {
	2135	IV count;
	2136	if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
	2137	if (strchr("aAZ", lookahead.code)) {
	2138	if (lookahead.howlen == e_number) count = lookahead.length;
	2139	else {
	2140	if (items > 0) {
	2141	count = sv_len_utf8(*beglist);
	2142	}
	2143	else count = 0;
	2144	if (lookahead.code == 'Z') count++;
	2145	}
	2146	} else {
	2147	if (lookahead.howlen == e_number && lookahead.length < items)
	2148	count = lookahead.length;
	2149	else count = items;
	2150	}
	2151	lookahead.howlen = e_number;
	2152	lookahead.length = count;
	2153	lengthcode = sv_2mortal(newSViv(count));
	2154	}
	2155
	2156	needs_swap = NEEDS_SWAP(datumtype);
	2157
	2158	/* Code inside the switch must take care to properly update
	2159	cat (CUR length and '\0' termination) if it updated *cur and
	2160	doesn't simply leave using break */
	2161	switch(TYPE_NO_ENDIANNESS(datumtype)) {
	2162	default:
	2163	Perl_croak(aTHX_ "Invalid type '%c' in pack",
	2164	(int) TYPE_NO_MODIFIERS(datumtype));
	2165	case '%':
	2166	Perl_croak(aTHX_ "'%%' may not be used in pack");
	2167
	2168	case '.' \| TYPE_IS_SHRIEKING:
	2169	case '.':
	2170	if (howlen == e_star) from = start;
	2171	else if (len == 0) from = cur;
	2172	else {
	2173	tempsym_t *group = symptr;
	2174
	2175	while (--len && group) group = group->previous;
	2176	from = group ? start + group->strbeg : start;
	2177	}
	2178	fromstr = NEXTFROM;
	2179	len = SvIV(fromstr);
	2180	goto resize;
	2181	case '@' \| TYPE_IS_SHRIEKING:
	2182	case '@':
	2183	from = start + symptr->strbeg;
	2184	resize:
	2185	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	2186	if (len >= 0) {
	2187	while (len && from < cur) {
	2188	from += UTF8SKIP(from);
	2189	len--;
	2190	}
	2191	if (from > cur)
	2192	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2193	if (len) {
	2194	/* Here we know from == cur */
	2195	grow:
	2196	GROWING(0, cat, start, cur, len);
	2197	Zero(cur, len, char);
	2198	cur += len;
	2199	} else if (from < cur) {
	2200	len = cur - from;
	2201	goto shrink;
	2202	} else goto no_change;
	2203	} else {
	2204	cur = from;
	2205	len = -len;
	2206	goto utf8_shrink;
	2207	}
	2208	else {
	2209	len -= cur - from;
	2210	if (len > 0) goto grow;
	2211	if (len == 0) goto no_change;
	2212	len = -len;
	2213	goto shrink;
	2214	}
	2215	break;
	2216
	2217	case '(': {
	2218	tempsym_t savsym = *symptr;
	2219	U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	2220	symptr->flags \|= group_modifiers;
	2221	symptr->patend = savsym.grpend;
	2222	symptr->level++;
	2223	symptr->previous = &lookahead;
	2224	while (len--) {
	2225	U32 was_utf8;
	2226	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	2227	else symptr->flags &= ~FLAG_PARSE_UTF8;
	2228	was_utf8 = SvUTF8(cat);
	2229	symptr->patptr = savsym.grpbeg;
	2230	beglist = pack_rec(cat, symptr, beglist, endlist);
	2231	if (SvUTF8(cat) != was_utf8)
	2232	/* This had better be an upgrade while in utf8==0 mode */
	2233	utf8 = 1;
	2234
	2235	if (savsym.howlen == e_star && beglist == endlist)
	2236	break; /* No way to continue */
	2237	}
	2238	items = endlist - beglist;
	2239	lookahead.flags = symptr->flags & ~group_modifiers;
	2240	goto no_change;
	2241	}
	2242	case 'X' \| TYPE_IS_SHRIEKING:
	2243	if (!len) /* Avoid division by 0 */
	2244	len = 1;
	2245	if (utf8) {
	2246	char hop, last;
	2247	I32 l = len;
	2248	hop = last = start;
	2249	while (hop < cur) {
	2250	hop += UTF8SKIP(hop);
	2251	if (--l == 0) {
	2252	last = hop;
	2253	l = len;
	2254	}
	2255	}
	2256	if (last > cur)
	2257	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2258	cur = last;
	2259	break;
	2260	}
	2261	len = (cur-start) % len;
	2262	/* FALLTHROUGH */
	2263	case 'X':
	2264	if (utf8) {
	2265	if (len < 1) goto no_change;
	2266	utf8_shrink:
	2267	while (len > 0) {
	2268	if (cur <= start)
	2269	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2270	(int) TYPE_NO_MODIFIERS(datumtype));
	2271	while (--cur, UTF8_IS_CONTINUATION(*cur)) {
	2272	if (cur <= start)
	2273	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2274	(int) TYPE_NO_MODIFIERS(datumtype));
	2275	}
	2276	len--;
	2277	}
	2278	} else {
	2279	shrink:
	2280	if (cur - start < len)
	2281	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2282	(int) TYPE_NO_MODIFIERS(datumtype));
	2283	cur -= len;
	2284	}
	2285	if (cur < start+symptr->strbeg) {
	2286	/* Make sure group starts don't point into the void */
	2287	tempsym_t *group;
	2288	const STRLEN length = cur-start;
	2289	for (group = symptr;
	2290	group && length < group->strbeg;
	2291	group = group->previous) group->strbeg = length;
	2292	lookahead.strbeg = length;
	2293	}
	2294	break;
	2295	case 'x' \| TYPE_IS_SHRIEKING: {
	2296	I32 ai32;
	2297	if (!len) /* Avoid division by 0 */
	2298	len = 1;
	2299	if (utf8) ai32 = utf8_length((U8 ) start, (U8 ) cur) % len;
	2300	else ai32 = (cur - start) % len;
	2301	if (ai32 == 0) goto no_change;
	2302	len -= ai32;
	2303	}
	2304	/* FALLTHROUGH */
	2305	case 'x':
	2306	goto grow;
	2307	case 'A':
	2308	case 'Z':
	2309	case 'a': {
	2310	const char *aptr;
	2311
	2312	fromstr = NEXTFROM;
	2313	aptr = SvPV_const(fromstr, fromlen);
	2314	if (DO_UTF8(fromstr)) {
	2315	const char end, s;
	2316
	2317	if (!utf8 && !SvUTF8(cat)) {
	2318	marked_upgrade(aTHX_ cat, symptr);
	2319	lookahead.flags \|= FLAG_DO_UTF8;
	2320	lookahead.strbeg = symptr->strbeg;
	2321	utf8 = 1;
	2322	start = SvPVX(cat);
	2323	cur = start + SvCUR(cat);
	2324	}
	2325	if (howlen == e_star) {
	2326	if (utf8) goto string_copy;
	2327	len = fromlen+1;
	2328	}
	2329	s = aptr;
	2330	end = aptr + fromlen;
	2331	fromlen = datumtype == 'Z' ? len-1 : len;
	2332	while ((I32) fromlen > 0 && s < end) {
	2333	s += UTF8SKIP(s);
	2334	fromlen--;
	2335	}
	2336	if (s > end)
	2337	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2338	if (utf8) {
	2339	len = fromlen;
	2340	if (datumtype == 'Z') len++;
	2341	fromlen = s-aptr;
	2342	len += fromlen;
	2343
	2344	goto string_copy;
	2345	}
	2346	fromlen = len - fromlen;
	2347	if (datumtype == 'Z') fromlen--;
	2348	if (howlen == e_star) {
	2349	len = fromlen;
	2350	if (datumtype == 'Z') len++;
	2351	}
	2352	GROWING(0, cat, start, cur, len);
	2353	if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
	2354	datumtype \| TYPE_IS_PACK))
	2355	Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
	2356	"for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
	2357	(int)datumtype, aptr, end, cur, (UV)fromlen);
	2358	cur += fromlen;
	2359	len -= fromlen;
	2360	} else if (utf8) {
	2361	if (howlen == e_star) {
	2362	len = fromlen;
	2363	if (datumtype == 'Z') len++;
	2364	}
	2365	if (len <= (I32) fromlen) {
	2366	fromlen = len;
	2367	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2368	}
	2369	/* assumes a byte expands to at most UTF8_EXPAND bytes on
	2370	upgrade, so:
	2371	expected_length <= from_lenUTF8_EXPAND + (len-from_len) /
	2372	GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
	2373	len -= fromlen;
	2374	while (fromlen > 0) {
	2375	cur = (char ) uvchr_to_utf8((U8 ) cur, * (U8 *) aptr);
	2376	aptr++;
	2377	fromlen--;
	2378	}
	2379	} else {
	2380	string_copy:
	2381	if (howlen == e_star) {
	2382	len = fromlen;
	2383	if (datumtype == 'Z') len++;
	2384	}
	2385	if (len <= (I32) fromlen) {
	2386	fromlen = len;
	2387	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2388	}
	2389	GROWING(0, cat, start, cur, len);
	2390	Copy(aptr, cur, fromlen, char);
	2391	cur += fromlen;
	2392	len -= fromlen;
	2393	}
	2394	memset(cur, datumtype == 'A' ? ' ' : '\0', len);
	2395	cur += len;
	2396	SvTAINT(cat);
	2397	break;
	2398	}
	2399	case 'B':
	2400	case 'b': {
	2401	const char str, end;
	2402	I32 l, field_len;
	2403	U8 bits;
	2404	bool utf8_source;
	2405	U32 utf8_flags;
	2406
	2407	fromstr = NEXTFROM;
	2408	str = SvPV_const(fromstr, fromlen);
	2409	end = str + fromlen;
	2410	if (DO_UTF8(fromstr)) {
	2411	utf8_source = TRUE;
	2412	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2413	} else {
	2414	utf8_source = FALSE;
	2415	utf8_flags = 0; /* Unused, but keep compilers happy */
	2416	}
	2417	if (howlen == e_star) len = fromlen;
	2418	field_len = (len+7)/8;
	2419	GROWING(utf8, cat, start, cur, field_len);
	2420	if (len > (I32)fromlen) len = fromlen;
	2421	bits = 0;
	2422	l = 0;
	2423	if (datumtype == 'B')
	2424	while (l++ < len) {
	2425	if (utf8_source) {
	2426	UV val = 0;
	2427	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2428	bits \|= val & 1;
	2429	} else bits \|= *str++ & 1;
	2430	if (l & 7) bits <<= 1;
	2431	else {
	2432	PUSH_BYTE(utf8, cur, bits);
	2433	bits = 0;
	2434	}
	2435	}
	2436	else
	2437	/* datumtype == 'b' */
	2438	while (l++ < len) {
	2439	if (utf8_source) {
	2440	UV val = 0;
	2441	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2442	if (val & 1) bits \|= 0x80;
	2443	} else if (*str++ & 1)
	2444	bits \|= 0x80;
	2445	if (l & 7) bits >>= 1;
	2446	else {
	2447	PUSH_BYTE(utf8, cur, bits);
	2448	bits = 0;
	2449	}
	2450	}
	2451	l--;
	2452	if (l & 7) {
	2453	if (datumtype == 'B')
	2454	bits <<= 7 - (l & 7);
	2455	else
	2456	bits >>= 7 - (l & 7);
	2457	PUSH_BYTE(utf8, cur, bits);
	2458	l += 7;
	2459	}
	2460	/* Determine how many chars are left in the requested field */
	2461	l /= 8;
	2462	if (howlen == e_star) field_len = 0;
	2463	else field_len -= l;
	2464	Zero(cur, field_len, char);
	2465	cur += field_len;
	2466	break;
	2467	}
	2468	case 'H':
	2469	case 'h': {
	2470	const char str, end;
	2471	I32 l, field_len;
	2472	U8 bits;
	2473	bool utf8_source;
	2474	U32 utf8_flags;
	2475
	2476	fromstr = NEXTFROM;
	2477	str = SvPV_const(fromstr, fromlen);
	2478	end = str + fromlen;
	2479	if (DO_UTF8(fromstr)) {
	2480	utf8_source = TRUE;
	2481	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2482	} else {
	2483	utf8_source = FALSE;
	2484	utf8_flags = 0; /* Unused, but keep compilers happy */
	2485	}
	2486	if (howlen == e_star) len = fromlen;
	2487	field_len = (len+1)/2;
	2488	GROWING(utf8, cat, start, cur, field_len);
	2489	if (!utf8 && len > (I32)fromlen) len = fromlen;
	2490	bits = 0;
	2491	l = 0;
	2492	if (datumtype == 'H')
	2493	while (l++ < len) {
	2494	if (utf8_source) {
	2495	UV val = 0;
	2496	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2497	if (val < 256 && isALPHA(val))
	2498	bits \|= (val + 9) & 0xf;
	2499	else
	2500	bits \|= val & 0xf;
	2501	} else if (isALPHA(*str))
	2502	bits \|= (*str++ + 9) & 0xf;
	2503	else
	2504	bits \|= *str++ & 0xf;
	2505	if (l & 1) bits <<= 4;
	2506	else {
	2507	PUSH_BYTE(utf8, cur, bits);
	2508	bits = 0;
	2509	}
	2510	}
	2511	else
	2512	while (l++ < len) {
	2513	if (utf8_source) {
	2514	UV val = 0;
	2515	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2516	if (val < 256 && isALPHA(val))
	2517	bits \|= ((val + 9) & 0xf) << 4;
	2518	else
	2519	bits \|= (val & 0xf) << 4;
	2520	} else if (isALPHA(*str))
	2521	bits \|= ((*str++ + 9) & 0xf) << 4;
	2522	else
	2523	bits \|= (*str++ & 0xf) << 4;
	2524	if (l & 1) bits >>= 4;
	2525	else {
	2526	PUSH_BYTE(utf8, cur, bits);
	2527	bits = 0;
	2528	}
	2529	}
	2530	l--;
	2531	if (l & 1) {
	2532	PUSH_BYTE(utf8, cur, bits);
	2533	l++;
	2534	}
	2535	/* Determine how many chars are left in the requested field */
	2536	l /= 2;
	2537	if (howlen == e_star) field_len = 0;
	2538	else field_len -= l;
	2539	Zero(cur, field_len, char);
	2540	cur += field_len;
	2541	break;
	2542	}
	2543	case 'c':
	2544	while (len-- > 0) {
	2545	IV aiv;
	2546	fromstr = NEXTFROM;
	2547	aiv = SvIV(fromstr);
	2548	if ((-128 > aiv \|\| aiv > 127))
	2549	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2550	"Character in 'c' format wrapped in pack");
	2551	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2552	}
	2553	break;
	2554	case 'C':
	2555	if (len == 0) {
	2556	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	2557	break;
	2558	}
	2559	while (len-- > 0) {
	2560	IV aiv;
	2561	fromstr = NEXTFROM;
	2562	aiv = SvIV(fromstr);
	2563	if ((0 > aiv \|\| aiv > 0xff))
	2564	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2565	"Character in 'C' format wrapped in pack");
	2566	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2567	}
	2568	break;
	2569	case 'W': {
	2570	char *end;
	2571	U8 in_bytes = (U8)IN_BYTES;
	2572
	2573	end = start+SvLEN(cat)-1;
	2574	if (utf8) end -= UTF8_MAXLEN-1;
	2575	while (len-- > 0) {
	2576	UV auv;
	2577	fromstr = NEXTFROM;
	2578	auv = SvUV(fromstr);
	2579	if (in_bytes) auv = auv % 0x100;
	2580	if (utf8) {
	2581	W_utf8:
	2582	if (cur > end) {
	2583	*cur = '\0';
	2584	SvCUR_set(cat, cur - start);
	2585
	2586	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2587	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2588	}
	2589	cur = (char ) uvchr_to_utf8_flags((U8 ) cur,
	2590	auv,
	2591	warn_utf8 ?
	2592	0 : UNICODE_ALLOW_ANY);
	2593	} else {
	2594	if (auv >= 0x100) {
	2595	if (!SvUTF8(cat)) {
	2596	*cur = '\0';
	2597	SvCUR_set(cat, cur - start);
	2598	marked_upgrade(aTHX_ cat, symptr);
	2599	lookahead.flags \|= FLAG_DO_UTF8;
	2600	lookahead.strbeg = symptr->strbeg;
	2601	utf8 = 1;
	2602	start = SvPVX(cat);
	2603	cur = start + SvCUR(cat);
	2604	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2605	goto W_utf8;
	2606	}
	2607	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2608	"Character in 'W' format wrapped in pack");
	2609	auv &= 0xff;
	2610	}
	2611	if (cur >= end) {
	2612	*cur = '\0';
	2613	SvCUR_set(cat, cur - start);
	2614	GROWING(0, cat, start, cur, len+1);
	2615	end = start+SvLEN(cat)-1;
	2616	}
	2617	(U8 ) cur++ = (U8)auv;
	2618	}
	2619	}
	2620	break;
	2621	}
	2622	case 'U': {
	2623	char *end;
	2624
	2625	if (len == 0) {
	2626	if (!(symptr->flags & FLAG_DO_UTF8)) {
	2627	marked_upgrade(aTHX_ cat, symptr);
	2628	lookahead.flags \|= FLAG_DO_UTF8;
	2629	lookahead.strbeg = symptr->strbeg;
	2630	}
	2631	utf8 = 0;
	2632	goto no_change;
	2633	}
	2634
	2635	end = start+SvLEN(cat);
	2636	if (!utf8) end -= UTF8_MAXLEN;
	2637	while (len-- > 0) {
	2638	UV auv;
	2639	fromstr = NEXTFROM;
	2640	auv = SvUV(fromstr);
	2641	if (utf8) {
	2642	U8 buffer[UTF8_MAXLEN], *endb;
	2643	endb = uvchr_to_utf8_flags(buffer, auv,
	2644	warn_utf8 ?
	2645	0 : UNICODE_ALLOW_ANY);
	2646	if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
	2647	*cur = '\0';
	2648	SvCUR_set(cat, cur - start);
	2649	GROWING(0, cat, start, cur,
	2650	len+(endb-buffer)*UTF8_EXPAND);
	2651	end = start+SvLEN(cat);
	2652	}
	2653	cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
	2654	} else {
	2655	if (cur >= end) {
	2656	*cur = '\0';
	2657	SvCUR_set(cat, cur - start);
	2658	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2659	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2660	}
	2661	cur = (char ) uvchr_to_utf8_flags((U8 ) cur, auv,
	2662	warn_utf8 ?
	2663	0 : UNICODE_ALLOW_ANY);
	2664	}
	2665	}
	2666	break;
	2667	}
	2668	/* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
	2669	case 'f':
	2670	while (len-- > 0) {
	2671	float afloat;
	2672	NV anv;
	2673	fromstr = NEXTFROM;
	2674	anv = SvNV(fromstr);
	2675	# if defined(VMS) && !defined(_IEEE_FP)
	2676	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2677	* on Alpha; fake it if we don't have them.
	2678	*/
	2679	if (anv > FLT_MAX)
	2680	afloat = FLT_MAX;
	2681	else if (anv < -FLT_MAX)
	2682	afloat = -FLT_MAX;
	2683	else afloat = (float)anv;
	2684	# else
	2685	afloat = (float)anv;
	2686	# endif
	2687	PUSH_VAR(utf8, cur, afloat, needs_swap);
	2688	}
	2689	break;
	2690	case 'd':
	2691	while (len-- > 0) {
	2692	double adouble;
	2693	NV anv;
	2694	fromstr = NEXTFROM;
	2695	anv = SvNV(fromstr);
	2696	# if defined(VMS) && !defined(_IEEE_FP)
	2697	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2698	* on Alpha; fake it if we don't have them.
	2699	*/
	2700	if (anv > DBL_MAX)
	2701	adouble = DBL_MAX;
	2702	else if (anv < -DBL_MAX)
	2703	adouble = -DBL_MAX;
	2704	else adouble = (double)anv;
	2705	# else
	2706	adouble = (double)anv;
	2707	# endif
	2708	PUSH_VAR(utf8, cur, adouble, needs_swap);
	2709	}
	2710	break;
	2711	case 'F': {
	2712	NV_bytes anv;
	2713	Zero(&anv, 1, NV); /* can be long double with unused bits */
	2714	while (len-- > 0) {
	2715	fromstr = NEXTFROM;
	2716	#ifdef __GNUC__
	2717	/* to work round a gcc/x86 bug; don't use SvNV */
	2718	anv.nv = sv_2nv(fromstr);
	2719	#else
	2720	anv.nv = SvNV(fromstr);
	2721	#endif
	2722	PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
	2723	}
	2724	break;
	2725	}
	2726	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	2727	case 'D': {
	2728	ld_bytes aldouble;
	2729	/* long doubles can have unused bits, which may be nonzero */
	2730	Zero(&aldouble, 1, long double);
	2731	while (len-- > 0) {
	2732	fromstr = NEXTFROM;
	2733	# ifdef __GNUC__
	2734	/* to work round a gcc/x86 bug; don't use SvNV */
	2735	aldouble.ld = (long double)sv_2nv(fromstr);
	2736	# else
	2737	aldouble.ld = (long double)SvNV(fromstr);
	2738	# endif
	2739	PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
	2740	needs_swap);
	2741	}
	2742	break;
	2743	}
	2744	#endif
	2745	case 'n' \| TYPE_IS_SHRIEKING:
	2746	case 'n':
	2747	while (len-- > 0) {
	2748	I16 ai16;
	2749	fromstr = NEXTFROM;
	2750	ai16 = (I16)SvIV(fromstr);
	2751	ai16 = PerlSock_htons(ai16);
	2752	PUSH16(utf8, cur, &ai16, FALSE);
	2753	}
	2754	break;
	2755	case 'v' \| TYPE_IS_SHRIEKING:
	2756	case 'v':
	2757	while (len-- > 0) {
	2758	I16 ai16;
	2759	fromstr = NEXTFROM;
	2760	ai16 = (I16)SvIV(fromstr);
	2761	ai16 = htovs(ai16);
	2762	PUSH16(utf8, cur, &ai16, FALSE);
	2763	}
	2764	break;
	2765	case 'S' \| TYPE_IS_SHRIEKING:
	2766	#if SHORTSIZE != SIZE16
	2767	while (len-- > 0) {
	2768	unsigned short aushort;
	2769	fromstr = NEXTFROM;
	2770	aushort = SvUV(fromstr);
	2771	PUSH_VAR(utf8, cur, aushort, needs_swap);
	2772	}
	2773	break;
	2774	#else
	2775	/* FALLTHROUGH */
	2776	#endif
	2777	case 'S':
	2778	while (len-- > 0) {
	2779	U16 au16;
	2780	fromstr = NEXTFROM;
	2781	au16 = (U16)SvUV(fromstr);
	2782	PUSH16(utf8, cur, &au16, needs_swap);
	2783	}
	2784	break;
	2785	case 's' \| TYPE_IS_SHRIEKING:
	2786	#if SHORTSIZE != SIZE16
	2787	while (len-- > 0) {
	2788	short ashort;
	2789	fromstr = NEXTFROM;
	2790	ashort = SvIV(fromstr);
	2791	PUSH_VAR(utf8, cur, ashort, needs_swap);
	2792	}
	2793	break;
	2794	#else
	2795	/* FALLTHROUGH */
	2796	#endif
	2797	case 's':
	2798	while (len-- > 0) {
	2799	I16 ai16;
	2800	fromstr = NEXTFROM;
	2801	ai16 = (I16)SvIV(fromstr);
	2802	PUSH16(utf8, cur, &ai16, needs_swap);
	2803	}
	2804	break;
	2805	case 'I':
	2806	case 'I' \| TYPE_IS_SHRIEKING:
	2807	while (len-- > 0) {
	2808	unsigned int auint;
	2809	fromstr = NEXTFROM;
	2810	auint = SvUV(fromstr);
	2811	PUSH_VAR(utf8, cur, auint, needs_swap);
	2812	}
	2813	break;
	2814	case 'j':
	2815	while (len-- > 0) {
	2816	IV aiv;
	2817	fromstr = NEXTFROM;
	2818	aiv = SvIV(fromstr);
	2819	PUSH_VAR(utf8, cur, aiv, needs_swap);
	2820	}
	2821	break;
	2822	case 'J':
	2823	while (len-- > 0) {
	2824	UV auv;
	2825	fromstr = NEXTFROM;
	2826	auv = SvUV(fromstr);
	2827	PUSH_VAR(utf8, cur, auv, needs_swap);
	2828	}
	2829	break;
	2830	case 'w':
	2831	while (len-- > 0) {
	2832	NV anv;
	2833	fromstr = NEXTFROM;
	2834	anv = SvNV(fromstr);
	2835
	2836	if (anv < 0) {
	2837	*cur = '\0';
	2838	SvCUR_set(cat, cur - start);
	2839	Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
	2840	}
	2841
	2842	/* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
	2843	which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
	2844	any negative IVs will have already been got by the croak()
	2845	above. IOK is untrue for fractions, so we test them
	2846	against UV_MAX_P1. */
	2847	if (SvIOK(fromstr) \|\| anv < UV_MAX_P1) {
	2848	char buf[(sizeof(UV)*CHAR_BIT)/7+1];
	2849	char *in = buf + sizeof(buf);
	2850	UV auv = SvUV(fromstr);
	2851
	2852	do {
	2853	*--in = (char)((auv & 0x7f) \| 0x80);
	2854	auv >>= 7;
	2855	} while (auv);
	2856	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2857	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2858	in, (buf + sizeof(buf)) - in);
	2859	} else if (SvPOKp(fromstr))
	2860	goto w_string;
	2861	else if (SvNOKp(fromstr)) {
	2862	/* 10**NV_MAX_10_EXP is the largest power of 10
	2863	so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
	2864	given 10(NV_MAX_10_EXP+1) == 128 x solve for x:
	2865	x = (NV_MAX_10_EXP+1) * log (10) / log (128)
	2866	And with that many bytes only Inf can overflow.
	2867	Some C compilers are strict about integral constant
	2868	expressions so we conservatively divide by a slightly
	2869	smaller integer instead of multiplying by the exact
	2870	floating-point value.
	2871	*/
	2872	#ifdef NV_MAX_10_EXP
	2873	/* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
	2874	char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
	2875	#else
	2876	/* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
	2877	char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
	2878	#endif
	2879	char *in = buf + sizeof(buf);
	2880
	2881	anv = Perl_floor(anv);
	2882	do {
	2883	const NV next = Perl_floor(anv / 128);
	2884	if (in <= buf) /* this cannot happen ;-) */
	2885	Perl_croak(aTHX_ "Cannot compress integer in pack");
	2886	--in = (unsigned char)(anv - (next 128)) \| 0x80;
	2887	anv = next;
	2888	} while (anv > 0);
	2889	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2890	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2891	in, (buf + sizeof(buf)) - in);
	2892	} else {
	2893	const char *from;
	2894	char result, in;
	2895	SV *norm;
	2896	STRLEN len;
	2897	bool done;
	2898
	2899	w_string:
	2900	/* Copy string and check for compliance */
	2901	from = SvPV_const(fromstr, len);
	2902	if ((norm = is_an_int(from, len)) == NULL)
	2903	Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
	2904
	2905	Newx(result, len, char);
	2906	in = result + len;
	2907	done = FALSE;
	2908	while (!done) *--in = div128(norm, &done) \| 0x80;
	2909	result[len - 1] &= 0x7F; /* clear continue bit */
	2910	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2911	in, (result + len) - in);
	2912	Safefree(result);
	2913	SvREFCNT_dec(norm); /* free norm */
	2914	}
	2915	}
	2916	break;
	2917	case 'i':
	2918	case 'i' \| TYPE_IS_SHRIEKING:
	2919	while (len-- > 0) {
	2920	int aint;
	2921	fromstr = NEXTFROM;
	2922	aint = SvIV(fromstr);
	2923	PUSH_VAR(utf8, cur, aint, needs_swap);
	2924	}
	2925	break;
	2926	case 'N' \| TYPE_IS_SHRIEKING:
	2927	case 'N':
	2928	while (len-- > 0) {
	2929	U32 au32;
	2930	fromstr = NEXTFROM;
	2931	au32 = SvUV(fromstr);
	2932	au32 = PerlSock_htonl(au32);
	2933	PUSH32(utf8, cur, &au32, FALSE);
	2934	}
	2935	break;
	2936	case 'V' \| TYPE_IS_SHRIEKING:
	2937	case 'V':
	2938	while (len-- > 0) {
	2939	U32 au32;
	2940	fromstr = NEXTFROM;
	2941	au32 = SvUV(fromstr);
	2942	au32 = htovl(au32);
	2943	PUSH32(utf8, cur, &au32, FALSE);
	2944	}
	2945	break;
	2946	case 'L' \| TYPE_IS_SHRIEKING:
	2947	#if LONGSIZE != SIZE32
	2948	while (len-- > 0) {
	2949	unsigned long aulong;
	2950	fromstr = NEXTFROM;
	2951	aulong = SvUV(fromstr);
	2952	PUSH_VAR(utf8, cur, aulong, needs_swap);
	2953	}
	2954	break;
	2955	#else
	2956	/* Fall though! */
	2957	#endif
	2958	case 'L':
	2959	while (len-- > 0) {
	2960	U32 au32;
	2961	fromstr = NEXTFROM;
	2962	au32 = SvUV(fromstr);
	2963	PUSH32(utf8, cur, &au32, needs_swap);
	2964	}
	2965	break;
	2966	case 'l' \| TYPE_IS_SHRIEKING:
	2967	#if LONGSIZE != SIZE32
	2968	while (len-- > 0) {
	2969	long along;
	2970	fromstr = NEXTFROM;
	2971	along = SvIV(fromstr);
	2972	PUSH_VAR(utf8, cur, along, needs_swap);
	2973	}
	2974	break;
	2975	#else
	2976	/* Fall though! */
	2977	#endif
	2978	case 'l':
	2979	while (len-- > 0) {
	2980	I32 ai32;
	2981	fromstr = NEXTFROM;
	2982	ai32 = SvIV(fromstr);
	2983	PUSH32(utf8, cur, &ai32, needs_swap);
	2984	}
	2985	break;
	2986	#if defined(HAS_QUAD) && IVSIZE >= 8
	2987	case 'Q':
	2988	while (len-- > 0) {
	2989	Uquad_t auquad;
	2990	fromstr = NEXTFROM;
	2991	auquad = (Uquad_t) SvUV(fromstr);
	2992	PUSH_VAR(utf8, cur, auquad, needs_swap);
	2993	}
	2994	break;
	2995	case 'q':
	2996	while (len-- > 0) {
	2997	Quad_t aquad;
	2998	fromstr = NEXTFROM;
	2999	aquad = (Quad_t)SvIV(fromstr);
	3000	PUSH_VAR(utf8, cur, aquad, needs_swap);
	3001	}
	3002	break;
	3003	#endif
	3004	case 'P':
	3005	len = 1; /* assume SV is correct length */
	3006	GROWING(utf8, cat, start, cur, sizeof(char *));
	3007	/* FALLTHROUGH */
	3008	case 'p':
	3009	while (len-- > 0) {
	3010	const char *aptr;
	3011
	3012	fromstr = NEXTFROM;
	3013	SvGETMAGIC(fromstr);
	3014	if (!SvOK(fromstr)) aptr = NULL;
	3015	else {
	3016	/* XXX better yet, could spirit away the string to
	3017	* a safe spot and hang on to it until the result
	3018	* of pack() (and all copies of the result) are
	3019	* gone.
	3020	*/
	3021	if ((SvTEMP(fromstr) \|\| (SvPADTMP(fromstr) &&
	3022	!SvREADONLY(fromstr)))) {
	3023	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3024	"Attempt to pack pointer to temporary value");
	3025	}
	3026	if (SvPOK(fromstr) \|\| SvNIOK(fromstr))
	3027	aptr = SvPV_nomg_const_nolen(fromstr);
	3028	else
	3029	aptr = SvPV_force_flags_nolen(fromstr, 0);
	3030	}
	3031	PUSH_VAR(utf8, cur, aptr, needs_swap);
	3032	}
	3033	break;
	3034	case 'u': {
	3035	const char aptr, aend;
	3036	bool from_utf8;
	3037
	3038	fromstr = NEXTFROM;
	3039	if (len <= 2) len = 45;
	3040	else len = len / 3 * 3;
	3041	if (len >= 64) {
	3042	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3043	"Field too wide in 'u' format in pack");
	3044	len = 63;
	3045	}
	3046	aptr = SvPV_const(fromstr, fromlen);
	3047	from_utf8 = DO_UTF8(fromstr);
	3048	if (from_utf8) {
	3049	aend = aptr + fromlen;
	3050	fromlen = sv_len_utf8_nomg(fromstr);
	3051	} else aend = NULL; /* Unused, but keep compilers happy */
	3052	GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
	3053	while (fromlen > 0) {
	3054	U8 *end;
	3055	I32 todo;
	3056	U8 hunk[1+63/3*4+1];
	3057
	3058	if ((I32)fromlen > len)
	3059	todo = len;
	3060	else
	3061	todo = fromlen;
	3062	if (from_utf8) {
	3063	char buffer[64];
	3064	if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
	3065	'u' \| TYPE_IS_PACK)) {
	3066	*cur = '\0';
	3067	SvCUR_set(cat, cur - start);
	3068	Perl_croak(aTHX_ "panic: string is shorter than advertised, "
	3069	"aptr=%p, aend=%p, buffer=%p, todo=%ld",
	3070	aptr, aend, buffer, (long) todo);
	3071	}
	3072	end = doencodes(hunk, buffer, todo);
	3073	} else {
	3074	end = doencodes(hunk, aptr, todo);
	3075	aptr += todo;
	3076	}
	3077	PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
	3078	fromlen -= todo;
	3079	}
	3080	break;
	3081	}
	3082	}
	3083	*cur = '\0';
	3084	SvCUR_set(cat, cur - start);
	3085	no_change:
	3086	*symptr = lookahead;
	3087	}
	3088	return beglist;
	3089	}
	3090	#undef NEXTFROM
	3091
	3092
	3093	PP(pp_pack)
	3094	{
	3095	dVAR; dSP; dMARK; dORIGMARK; dTARGET;
	3096	SV *cat = TARG;
	3097	STRLEN fromlen;
	3098	SV pat_sv = ++MARK;
	3099	const char *pat = SvPV_const(pat_sv, fromlen);
	3100	const char *patend = pat + fromlen;
	3101
	3102	MARK++;
	3103	sv_setpvs(cat, "");
	3104	SvUTF8_off(cat);
	3105
	3106	packlist(cat, pat, patend, MARK, SP + 1);
	3107
	3108	SvSETMAGIC(cat);
	3109	SP = ORIGMARK;
	3110	PUSHs(cat);
	3111	RETURN;
	3112	}
	3113
	3114	/*
	3115	* Local variables:
	3116	* c-indentation-style: bsd
	3117	* c-basic-offset: 4
	3118	* indent-tabs-mode: nil
	3119	* End:
	3120	*
	3121	* ex: set ts=8 sts=4 sw=4 et:
	3122	*/