perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* pp_pack.c
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
	4	* 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* He still hopefully carried some of his gear in his pack: a small tinder-box,
	13	* two small shallow pans, the smaller fitting into the larger; inside them a
	14	* wooden spoon, a short two-pronged fork and some skewers were stowed; and
	15	* hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
	16	* some salt.
	17	*
	18	* [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
	19	*/
	20
	21	/* This file contains pp ("push/pop") functions that
	22	* execute the opcodes that make up a perl program. A typical pp function
	23	* expects to find its arguments on the stack, and usually pushes its
	24	* results onto the stack, hence the 'pp' terminology. Each OP structure
	25	* contains a pointer to the relevant pp_foo() function.
	26	*
	27	* This particular file just contains pp_pack() and pp_unpack(). See the
	28	* other pp*.c files for the rest of the pp_ functions.
	29	*/
	30
	31	#include "EXTERN.h"
	32	#define PERL_IN_PP_PACK_C
	33	#include "perl.h"
	34
	35	/* Types used by pack/unpack */
	36	typedef enum {
	37	e_no_len, /* no length */
	38	e_number, /* number, [] */
	39	e_star /* asterisk */
	40	} howlen_t;
	41
	42	typedef struct tempsym {
	43	const char* patptr; /* current template char */
	44	const char* patend; /* one after last char */
	45	const char* grpbeg; /* 1st char of ()-group */
	46	const char* grpend; /* end of ()-group */
	47	I32 code; /* template code (!<>) */
	48	U32 flags; /* /=4, comma=2, pack=1 */
	49	/* and group modifiers */
	50	SSize_t length; /* length/repeat count */
	51	howlen_t howlen; /* how length is given */
	52	int level; /* () nesting level */
	53	STRLEN strbeg; /* offset of group start */
	54	struct tempsym previous; / previous group */
	55	} tempsym_t;
	56
	57	#define TEMPSYM_INIT(symptr, p, e, f) \
	58	STMT_START { \
	59	(symptr)->patptr = (p); \
	60	(symptr)->patend = (e); \
	61	(symptr)->grpbeg = NULL; \
	62	(symptr)->grpend = NULL; \
	63	(symptr)->grpend = NULL; \
	64	(symptr)->code = 0; \
	65	(symptr)->length = 0; \
	66	(symptr)->howlen = e_no_len; \
	67	(symptr)->level = 0; \
	68	(symptr)->flags = (f); \
	69	(symptr)->strbeg = 0; \
	70	(symptr)->previous = NULL; \
	71	} STMT_END
	72
	73	typedef union {
	74	NV nv;
	75	U8 bytes[sizeof(NV)];
	76	} NV_bytes;
	77
	78	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	79	typedef union {
	80	long double ld;
	81	U8 bytes[sizeof(long double)];
	82	} ld_bytes;
	83	#endif
	84
	85	#ifndef CHAR_BIT
	86	# define CHAR_BIT 8
	87	#endif
	88	/* Maximum number of bytes to which a byte can grow due to upgrade */
	89	#define UTF8_EXPAND 2
	90
	91	/*
	92	* Offset for integer pack/unpack.
	93	*
	94	* On architectures where I16 and I32 aren't really 16 and 32 bits,
	95	* which for now are all Crays, pack and unpack have to play games.
	96	*/
	97
	98	/*
	99	* These values are required for portability of pack() output.
	100	* If they're not right on your machine, then pack() and unpack()
	101	* wouldn't work right anyway; you'll need to apply the Cray hack.
	102	* (I'd like to check them with #if, but you can't use sizeof() in
	103	* the preprocessor.) --???
	104	*/
	105	/*
	106	The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
	107	defines are now in config.h. --Andy Dougherty April 1998
	108	*/
	109	#define SIZE16 2
	110	#define SIZE32 4
	111
	112	/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
	113	--jhi Feb 1999 */
	114
	115	#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
	116	# define OFF16(p) ((char *) (p))
	117	# define OFF32(p) ((char *) (p))
	118	#elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	119	# define OFF16(p) ((char*)(p))
	120	# define OFF32(p) ((char*)(p))
	121	#elif BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	122	# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
	123	# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
	124	#else
	125	# error "bad cray byte order"
	126	#endif
	127
	128	#define PUSH16(utf8, cur, p, needs_swap) \
	129	PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
	130	#define PUSH32(utf8, cur, p, needs_swap) \
	131	PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
	132
	133	#if BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	134	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
	135	#elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	136	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
	137	#else
	138	# error "Unsupported byteorder"
	139	/* Need to add code here to re-instate mixed endian support.
	140	NEEDS_SWAP would need to hold a flag indicating which action to
	141	take, and S_reverse_copy and the code in S_utf8_to_bytes would need
	142	logic adding to deal with any mixed-endian transformations needed.
	143	*/
	144	#endif
	145
	146	/* Only to be used inside a loop (see the break) */
	147	#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
	148	STMT_START { \
	149	if (UNLIKELY(utf8)) { \
	150	if (!S_utf8_to_bytes(aTHX_ &s, strend, \
	151	(char *) (buf), len, datumtype)) break; \
	152	} else { \
	153	if (UNLIKELY(needs_swap)) \
	154	S_reverse_copy(s, (char *) (buf), len); \
	155	else \
	156	Copy(s, (char *) (buf), len, char); \
	157	s += len; \
	158	} \
	159	} STMT_END
	160
	161	#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
	162	SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
	163
	164	#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
	165	SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
	166
	167	#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
	168	SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
	169
	170	#define PUSH_VAR(utf8, aptr, var, needs_swap) \
	171	PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
	172
	173	/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
	174	#define MAX_SUB_TEMPLATE_LEVEL 100
	175
	176	/* flags (note that type modifiers can also be used as flags!) */
	177	#define FLAG_WAS_UTF8 0x40
	178	#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
	179	#define FLAG_UNPACK_ONLY_ONE 0x10
	180	#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
	181	#define FLAG_SLASH 0x04
	182	#define FLAG_COMMA 0x02
	183	#define FLAG_PACK 0x01
	184
	185	STATIC SV *
	186	S_mul128(pTHX_ SV *sv, U8 m)
	187	{
	188	STRLEN len;
	189	char *s = SvPV(sv, len);
	190	char *t;
	191
	192	PERL_ARGS_ASSERT_MUL128;
	193
	194	if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
	195	SV * const tmpNew = newSVpvs("0000000000");
	196
	197	sv_catsv(tmpNew, sv);
	198	SvREFCNT_dec(sv); /* free old sv */
	199	sv = tmpNew;
	200	s = SvPV(sv, len);
	201	}
	202	t = s + len - 1;
	203	while (!t) / trailing '\0'? */
	204	t--;
	205	while (t > s) {
	206	const U32 i = ((*t - '0') << 7) + m;
	207	*(t--) = '0' + (char)(i % 10);
	208	m = (char)(i / 10);
	209	}
	210	return (sv);
	211	}
	212
	213	/* Explosives and implosives. */
	214
	215	#define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
	216	NATIVE_TO_LATIN1(' '), \
	217	NATIVE_TO_LATIN1('a') - 1)
	218
	219	/* type modifiers */
	220	#define TYPE_IS_SHRIEKING 0x100
	221	#define TYPE_IS_BIG_ENDIAN 0x200
	222	#define TYPE_IS_LITTLE_ENDIAN 0x400
	223	#define TYPE_IS_PACK 0x800
	224	#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN\|TYPE_IS_LITTLE_ENDIAN)
	225	#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
	226	#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
	227
	228	# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
	229	# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
	230
	231	# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
	232
	233	#define PACK_SIZE_CANNOT_CSUM 0x80
	234	#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
	235	#define PACK_SIZE_MASK 0x3F
	236
	237	#include "packsizetables.inc"
	238
	239	static void
	240	S_reverse_copy(const char src, char dest, STRLEN len)
	241	{
	242	dest += len;
	243	while (len--)
	244	--dest = src++;
	245	}
	246
	247	STATIC U8
	248	utf8_to_byte(pTHX_ const char *s, const char end, I32 datumtype)
	249	{
	250	STRLEN retlen;
	251	UV val;
	252
	253	if (*s >= end) {
	254	goto croak;
	255	}
	256	val = utf8n_to_uvchr((U8 ) s, end-*s, &retlen,
	257	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	258	if (retlen == (STRLEN) -1)
	259	croak:
	260	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
	261	(int) TYPE_NO_MODIFIERS(datumtype));
	262	if (val >= 0x100) {
	263	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	264	"Character in '%c' format wrapped in unpack",
	265	(int) TYPE_NO_MODIFIERS(datumtype));
	266	val &= 0xff;
	267	}
	268	*s += retlen;
	269	return (U8)val;
	270	}
	271
	272	#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
	273	utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
	274	(U8 )(s)++)
	275
	276	STATIC bool
	277	S_utf8_to_bytes(pTHX_ const char *s, const char end, const char *buf, SSize_t buf_len, I32 datumtype)
	278	{
	279	UV val;
	280	STRLEN retlen;
	281	const char from = s;
	282	int bad = 0;
	283	const U32 flags = ckWARN(WARN_UTF8) ?
	284	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY \| UTF8_ALLOW_ANY);
	285	const bool needs_swap = NEEDS_SWAP(datumtype);
	286
	287	if (UNLIKELY(needs_swap))
	288	buf += buf_len;
	289
	290	for (;buf_len > 0; buf_len--) {
	291	if (from >= end) return FALSE;
	292	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
	293	if (retlen == (STRLEN) -1) {
	294	from += UTF8_SAFE_SKIP(from, end);
	295	bad \|= 1;
	296	} else from += retlen;
	297	if (val >= 0x100) {
	298	bad \|= 2;
	299	val &= 0xff;
	300	}
	301	if (UNLIKELY(needs_swap))
	302	(U8 )--buf = (U8)val;
	303	else
	304	(U8 )buf++ = (U8)val;
	305	}
	306	/* We have enough characters for the buffer. Did we have problems ? */
	307	if (bad) {
	308	if (bad & 1) {
	309	/* Rewalk the string fragment while warning */
	310	const char *ptr;
	311	const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
	312	for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
	313	if (ptr >= end) break;
	314	utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
	315	}
	316	if (from > end) from = end;
	317	}
	318	if ((bad & 2))
	319	Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
	320	WARN_PACK : WARN_UNPACK),
	321	"Character(s) in '%c' format wrapped in %s",
	322	(int) TYPE_NO_MODIFIERS(datumtype),
	323	datumtype & TYPE_IS_PACK ? "pack" : "unpack");
	324	}
	325	*s = from;
	326	return TRUE;
	327	}
	328
	329	STATIC char *
	330	S_my_bytes_to_utf8(const U8 start, STRLEN len, char dest, const bool needs_swap) {
	331	PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
	332
	333	if (UNLIKELY(needs_swap)) {
	334	const U8 *p = start + len;
	335	while (p-- > start) {
	336	append_utf8_from_native_byte(p, (U8 *) & dest);
	337	}
	338	} else {
	339	const U8 * const end = start + len;
	340	while (start < end) {
	341	append_utf8_from_native_byte(start, (U8 *) & dest);
	342	start++;
	343	}
	344	}
	345	return dest;
	346	}
	347
	348	#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
	349	STMT_START { \
	350	if (UNLIKELY(utf8)) \
	351	(cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
	352	else { \
	353	if (UNLIKELY(needs_swap)) \
	354	S_reverse_copy((char *)(buf), cur, len); \
	355	else \
	356	Copy(buf, cur, len, char); \
	357	(cur) += (len); \
	358	} \
	359	} STMT_END
	360
	361	#define SAFE_UTF8_EXPAND(var) \
	362	STMT_START { \
	363	if ((var) > SSize_t_MAX / UTF8_EXPAND) \
	364	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	365	(var) = (var) * UTF8_EXPAND; \
	366	} STMT_END
	367
	368	#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
	369	STMT_START { \
	370	if (SSize_t_MAX / (item_size) < (item_count)) \
	371	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	372	GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
	373	} STMT_END
	374
	375	#define GROWING(utf8, cat, start, cur, in_len) \
	376	STMT_START { \
	377	STRLEN glen = (in_len); \
	378	STRLEN catcur = (STRLEN)((cur) - (start)); \
	379	if (utf8) SAFE_UTF8_EXPAND(glen); \
	380	if (SSize_t_MAX - glen < catcur) \
	381	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	382	if (catcur + glen >= SvLEN(cat)) { \
	383	(start) = sv_exp_grow(cat, glen); \
	384	(cur) = (start) + SvCUR(cat); \
	385	} \
	386	} STMT_END
	387
	388	#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
	389	STMT_START { \
	390	const STRLEN glen = (in_len); \
	391	STRLEN gl = glen; \
	392	if (utf8) SAFE_UTF8_EXPAND(gl); \
	393	if ((cur) + gl >= (start) + SvLEN(cat)) { \
	394	*cur = '\0'; \
	395	SvCUR_set((cat), (cur) - (start)); \
	396	(start) = sv_exp_grow(cat, gl); \
	397	(cur) = (start) + SvCUR(cat); \
	398	} \
	399	PUSH_BYTES(utf8, cur, buf, glen, 0); \
	400	} STMT_END
	401
	402	#define PUSH_BYTE(utf8, s, byte) \
	403	STMT_START { \
	404	if (utf8) { \
	405	const U8 au8 = (byte); \
	406	(s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
	407	} else (U8 )(s)++ = (byte); \
	408	} STMT_END
	409
	410	/* Only to be used inside a loop (see the break) */
	411	#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
	412	STMT_START { \
	413	STRLEN retlen; \
	414	if (str >= end) break; \
	415	val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
	416	if (retlen == (STRLEN) -1) { \
	417	*cur = '\0'; \
	418	Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
	419	} \
	420	str += retlen; \
	421	} STMT_END
	422
	423	static const char _action( const tempsym_t symptr )
	424	{
	425	return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
	426	}
	427
	428	/* Returns the sizeof() struct described by pat */
	429	STATIC SSize_t
	430	S_measure_struct(pTHX_ tempsym_t* symptr)
	431	{
	432	SSize_t total = 0;
	433
	434	PERL_ARGS_ASSERT_MEASURE_STRUCT;
	435
	436	while (next_symbol(symptr)) {
	437	SSize_t len, size;
	438
	439	switch (symptr->howlen) {
	440	case e_star:
	441	Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
	442	_action( symptr ) );
	443
	444	default:
	445	/* e_no_len and e_number */
	446	len = symptr->length;
	447	break;
	448	}
	449
	450	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
	451	if (!size) {
	452	SSize_t star;
	453	/* endianness doesn't influence the size of a type */
	454	switch(TYPE_NO_ENDIANNESS(symptr->code)) {
	455	default:
	456	Perl_croak(aTHX_ "Invalid type '%c' in %s",
	457	(int)TYPE_NO_MODIFIERS(symptr->code),
	458	_action( symptr ) );
	459	case '.' \| TYPE_IS_SHRIEKING:
	460	case '@' \| TYPE_IS_SHRIEKING:
	461	case '@':
	462	case '.':
	463	case '/':
	464	case 'U': /* XXXX Is it correct? */
	465	case 'w':
	466	case 'u':
	467	Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
	468	(int) TYPE_NO_MODIFIERS(symptr->code),
	469	_action( symptr ) );
	470	case '%':
	471	size = 0;
	472	break;
	473	case '(':
	474	{
	475	tempsym_t savsym = *symptr;
	476	symptr->patptr = savsym.grpbeg;
	477	symptr->patend = savsym.grpend;
	478	/* XXXX Theoretically, we need to measure many times at
	479	different positions, since the subexpression may contain
	480	alignment commands, but be not of aligned length.
	481	Need to detect this and croak(). */
	482	size = measure_struct(symptr);
	483	*symptr = savsym;
	484	break;
	485	}
	486	case 'X' \| TYPE_IS_SHRIEKING:
	487	/* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
	488	*/
	489	if (!len) /* Avoid division by 0 */
	490	len = 1;
	491	len = total % len; /* Assumed: the start is aligned. */
	492	/* FALLTHROUGH */
	493	case 'X':
	494	size = -1;
	495	if (total < len)
	496	Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
	497	break;
	498	case 'x' \| TYPE_IS_SHRIEKING:
	499	if (!len) /* Avoid division by 0 */
	500	len = 1;
	501	star = total % len; /* Assumed: the start is aligned. */
	502	if (star) /* Other portable ways? */
	503	len = len - star;
	504	else
	505	len = 0;
	506	/* FALLTHROUGH */
	507	case 'x':
	508	case 'A':
	509	case 'Z':
	510	case 'a':
	511	size = 1;
	512	break;
	513	case 'B':
	514	case 'b':
	515	len = (len + 7)/8;
	516	size = 1;
	517	break;
	518	case 'H':
	519	case 'h':
	520	len = (len + 1)/2;
	521	size = 1;
	522	break;
	523
	524	case 'P':
	525	len = 1;
	526	size = sizeof(char*);
	527	break;
	528	}
	529	}
	530	total += len * size;
	531	}
	532	return total;
	533	}
	534
	535
	536	/* locate matching closing parenthesis or bracket
	537	* returns char pointer to char after match, or NULL
	538	*/
	539	STATIC const char *
	540	S_group_end(pTHX_ const char patptr, const char patend, char ender)
	541	{
	542	PERL_ARGS_ASSERT_GROUP_END;
	543
	544	while (patptr < patend) {
	545	const char c = *patptr++;
	546
	547	if (isSPACE(c))
	548	continue;
	549	else if (c == ender)
	550	return patptr-1;
	551	else if (c == '#') {
	552	while (patptr < patend && *patptr != '\n')
	553	patptr++;
	554	continue;
	555	} else if (c == '(')
	556	patptr = group_end(patptr, patend, ')') + 1;
	557	else if (c == '[')
	558	patptr = group_end(patptr, patend, ']') + 1;
	559	}
	560	Perl_croak(aTHX_ "No group ending character '%c' found in template",
	561	ender);
	562	NOT_REACHED; /* NOTREACHED */
	563	}
	564
	565
	566	/* Convert unsigned decimal number to binary.
	567	* Expects a pointer to the first digit and address of length variable
	568	* Advances char pointer to 1st non-digit char and returns number
	569	*/
	570	STATIC const char *
	571	S_get_num(pTHX_ const char patptr, SSize_t lenptr )
	572	{
	573	SSize_t len = *patptr++ - '0';
	574
	575	PERL_ARGS_ASSERT_GET_NUM;
	576
	577	while (isDIGIT(*patptr)) {
	578	SSize_t nlen = (len * 10) + (*patptr++ - '0');
	579	if (nlen < 0 \|\| nlen/10 != len)
	580	Perl_croak(aTHX_ "pack/unpack repeat count overflow");
	581	len = nlen;
	582	}
	583	*lenptr = len;
	584	return patptr;
	585	}
	586
	587	/* The marvellous template parsing routine: Using state stored in *symptr,
	588	* locates next template code and count
	589	*/
	590	STATIC bool
	591	S_next_symbol(pTHX_ tempsym_t* symptr )
	592	{
	593	const char* patptr = symptr->patptr;
	594	const char* const patend = symptr->patend;
	595
	596	PERL_ARGS_ASSERT_NEXT_SYMBOL;
	597
	598	symptr->flags &= ~FLAG_SLASH;
	599
	600	while (patptr < patend) {
	601	if (isSPACE(*patptr))
	602	patptr++;
	603	else if (*patptr == '#') {
	604	patptr++;
	605	while (patptr < patend && *patptr != '\n')
	606	patptr++;
	607	if (patptr < patend)
	608	patptr++;
	609	} else {
	610	/* We should have found a template code */
	611	I32 code = *patptr++ & 0xFF;
	612	U32 inherited_modifiers = 0;
	613
	614	if (code == ','){ /* grandfather in commas but with a warning */
	615	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
	616	symptr->flags \|= FLAG_COMMA;
	617	Perl_warner(aTHX_ packWARN(WARN_UNPACK),
	618	"Invalid type ',' in %s", _action( symptr ) );
	619	}
	620	continue;
	621	}
	622
	623	/* for '(', skip to ')' */
	624	if (code == '(') {
	625	if( isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '[' )
	626	Perl_croak(aTHX_ "()-group starts with a count in %s",
	627	_action( symptr ) );
	628	symptr->grpbeg = patptr;
	629	patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
	630	if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
	631	Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
	632	_action( symptr ) );
	633	}
	634
	635	/* look for group modifiers to inherit */
	636	if (TYPE_ENDIANNESS(symptr->flags)) {
	637	if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
	638	inherited_modifiers \|= TYPE_ENDIANNESS(symptr->flags);
	639	}
	640
	641	/* look for modifiers */
	642	while (patptr < patend) {
	643	const char *allowed;
	644	I32 modifier;
	645	switch (*patptr) {
	646	case '!':
	647	modifier = TYPE_IS_SHRIEKING;
	648	allowed = "sSiIlLxXnNvV@.";
	649	break;
	650	case '>':
	651	modifier = TYPE_IS_BIG_ENDIAN;
	652	allowed = ENDIANNESS_ALLOWED_TYPES;
	653	break;
	654	case '<':
	655	modifier = TYPE_IS_LITTLE_ENDIAN;
	656	allowed = ENDIANNESS_ALLOWED_TYPES;
	657	break;
	658	default:
	659	allowed = "";
	660	modifier = 0;
	661	break;
	662	}
	663
	664	if (modifier == 0)
	665	break;
	666
	667	if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
	668	Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
	669	allowed, _action( symptr ) );
	670
	671	if (TYPE_ENDIANNESS(code \| modifier) == TYPE_ENDIANNESS_MASK)
	672	Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
	673	(int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
	674	else if (TYPE_ENDIANNESS(code \| modifier \| inherited_modifiers) ==
	675	TYPE_ENDIANNESS_MASK)
	676	Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
	677	*patptr, _action( symptr ) );
	678
	679	if ((code & modifier)) {
	680	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	681	"Duplicate modifier '%c' after '%c' in %s",
	682	*patptr, (int) TYPE_NO_MODIFIERS(code),
	683	_action( symptr ) );
	684	}
	685
	686	code \|= modifier;
	687	patptr++;
	688	}
	689
	690	/* inherit modifiers */
	691	code \|= inherited_modifiers;
	692
	693	/* look for count and/or / */
	694	if (patptr < patend) {
	695	if (isDIGIT(*patptr)) {
	696	patptr = get_num( patptr, &symptr->length );
	697	symptr->howlen = e_number;
	698
	699	} else if (patptr == '') {
	700	patptr++;
	701	symptr->howlen = e_star;
	702
	703	} else if (*patptr == '[') {
	704	const char* lenptr = ++patptr;
	705	symptr->howlen = e_number;
	706	patptr = group_end( patptr, patend, ']' ) + 1;
	707	/* what kind of [] is it? */
	708	if (isDIGIT(*lenptr)) {
	709	lenptr = get_num( lenptr, &symptr->length );
	710	if( *lenptr != ']' )
	711	Perl_croak(aTHX_ "Malformed integer in [] in %s",
	712	_action( symptr ) );
	713	} else {
	714	tempsym_t savsym = *symptr;
	715	symptr->patend = patptr-1;
	716	symptr->patptr = lenptr;
	717	savsym.length = measure_struct(symptr);
	718	*symptr = savsym;
	719	}
	720	} else {
	721	symptr->howlen = e_no_len;
	722	symptr->length = 1;
	723	}
	724
	725	/* try to find / */
	726	while (patptr < patend) {
	727	if (isSPACE(*patptr))
	728	patptr++;
	729	else if (*patptr == '#') {
	730	patptr++;
	731	while (patptr < patend && *patptr != '\n')
	732	patptr++;
	733	if (patptr < patend)
	734	patptr++;
	735	} else {
	736	if (*patptr == '/') {
	737	symptr->flags \|= FLAG_SLASH;
	738	patptr++;
	739	if (patptr < patend &&
	740	(isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '['))
	741	Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
	742	_action( symptr ) );
	743	}
	744	break;
	745	}
	746	}
	747	} else {
	748	/* at end - no count, no / */
	749	symptr->howlen = e_no_len;
	750	symptr->length = 1;
	751	}
	752
	753	symptr->code = code;
	754	symptr->patptr = patptr;
	755	return TRUE;
	756	}
	757	}
	758	symptr->patptr = patptr;
	759	return FALSE;
	760	}
	761
	762	/*
	763	There is no way to cleanly handle the case where we should process the
	764	string per byte in its upgraded form while it's really in downgraded form
	765	(e.g. estimates like strend-s as an upper bound for the number of
	766	characters left wouldn't work). So if we foresee the need of this
	767	(pattern starts with U or contains U0), we want to work on the encoded
	768	version of the string. Users are advised to upgrade their pack string
	769	themselves if they need to do a lot of unpacks like this on it
	770	*/
	771	STATIC bool
	772	need_utf8(const char pat, const char patend)
	773	{
	774	bool first = TRUE;
	775
	776	PERL_ARGS_ASSERT_NEED_UTF8;
	777
	778	while (pat < patend) {
	779	if (pat[0] == '#') {
	780	pat++;
	781	pat = (const char *) memchr(pat, '\n', patend-pat);
	782	if (!pat) return FALSE;
	783	} else if (pat[0] == 'U') {
	784	if (first \|\| pat[1] == '0') return TRUE;
	785	} else first = FALSE;
	786	pat++;
	787	}
	788	return FALSE;
	789	}
	790
	791	STATIC char
	792	first_symbol(const char pat, const char patend) {
	793	PERL_ARGS_ASSERT_FIRST_SYMBOL;
	794
	795	while (pat < patend) {
	796	if (pat[0] != '#') return pat[0];
	797	pat++;
	798	pat = (const char *) memchr(pat, '\n', patend-pat);
	799	if (!pat) return 0;
	800	pat++;
	801	}
	802	return 0;
	803	}
	804
	805	/*
	806
	807	=for apidoc unpackstring
	808
	809	The engine implementing the C<unpack()> Perl function.
	810
	811	Using the template C<pat..patend>, this function unpacks the string
	812	C<s..strend> into a number of mortal SVs, which it pushes onto the perl
	813	argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
	814	C<SPAGAIN> after the call to this function). It returns the number of
	815	pushed elements.
	816
	817	The C<strend> and C<patend> pointers should point to the byte following the
	818	last character of each string.
	819
	820	Although this function returns its values on the perl argument stack, it
	821	doesn't take any parameters from that stack (and thus in particular
	822	there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
	823	example).
	824
	825	=cut */
	826
	827	SSize_t
	828	Perl_unpackstring(pTHX_ const char pat, const char patend, const char s, const char strend, U32 flags)
	829	{
	830	tempsym_t sym;
	831
	832	PERL_ARGS_ASSERT_UNPACKSTRING;
	833
	834	if (flags & FLAG_DO_UTF8) flags \|= FLAG_WAS_UTF8;
	835	else if (need_utf8(pat, patend)) {
	836	/* We probably should try to avoid this in case a scalar context call
	837	wouldn't get to the "U0" */
	838	STRLEN len = strend - s;
	839	s = (char ) bytes_to_utf8((U8 ) s, &len);
	840	SAVEFREEPV(s);
	841	strend = s + len;
	842	flags \|= FLAG_DO_UTF8;
	843	}
	844
	845	if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
	846	flags \|= FLAG_PARSE_UTF8;
	847
	848	TEMPSYM_INIT(&sym, pat, patend, flags);
	849
	850	return unpack_rec(&sym, s, s, strend, NULL );
	851	}
	852
	853	STATIC SSize_t
	854	S_unpack_rec(pTHX_ tempsym_t* symptr, const char s, const char strbeg, const char strend, const char *new_s )
	855	{
	856	dSP;
	857	SV *sv = NULL;
	858	const SSize_t start_sp_offset = SP - PL_stack_base;
	859	howlen_t howlen;
	860	SSize_t checksum = 0;
	861	UV cuv = 0;
	862	NV cdouble = 0.0;
	863	const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
	864	bool beyond = FALSE;
	865	bool explicit_length;
	866	const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
	867	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	868
	869	PERL_ARGS_ASSERT_UNPACK_REC;
	870
	871	symptr->strbeg = s - strbeg;
	872
	873	while (next_symbol(symptr)) {
	874	packprops_t props;
	875	SSize_t len;
	876	I32 datumtype = symptr->code;
	877	bool needs_swap;
	878	/* do first one only unless in list context
	879	/ is implemented by unpacking the count, then popping it from the
	880	stack, so must check that we're not in the middle of a / */
	881	if ( unpack_only_one
	882	&& (SP - PL_stack_base == start_sp_offset + 1)
	883	&& (datumtype != '/') ) /* XXX can this be omitted */
	884	break;
	885
	886	switch (howlen = symptr->howlen) {
	887	case e_star:
	888	len = strend - strbeg; /* long enough */
	889	break;
	890	default:
	891	/* e_no_len and e_number */
	892	len = symptr->length;
	893	break;
	894	}
	895
	896	explicit_length = TRUE;
	897	redo_switch:
	898	beyond = s >= strend;
	899
	900	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	901	if (props) {
	902	/* props nonzero means we can process this letter. */
	903	const SSize_t size = props & PACK_SIZE_MASK;
	904	const SSize_t howmany = (strend - s) / size;
	905	if (len > howmany)
	906	len = howmany;
	907
	908	if (!checksum \|\| (props & PACK_SIZE_CANNOT_CSUM)) {
	909	if (len && unpack_only_one) len = 1;
	910	EXTEND(SP, len);
	911	EXTEND_MORTAL(len);
	912	}
	913	}
	914
	915	needs_swap = NEEDS_SWAP(datumtype);
	916
	917	switch(TYPE_NO_ENDIANNESS(datumtype)) {
	918	default:
	919	Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
	920
	921	case '%':
	922	if (howlen == e_no_len)
	923	len = 16; /* len is not specified */
	924	checksum = len;
	925	cuv = 0;
	926	cdouble = 0;
	927	continue;
	928
	929	case '(':
	930	{
	931	tempsym_t savsym = *symptr;
	932	const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	933	symptr->flags \|= group_modifiers;
	934	symptr->patend = savsym.grpend;
	935	symptr->previous = &savsym;
	936	symptr->level++;
	937	PUTBACK;
	938	if (len && unpack_only_one) len = 1;
	939	while (len--) {
	940	symptr->patptr = savsym.grpbeg;
	941	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	942	else symptr->flags &= ~FLAG_PARSE_UTF8;
	943	unpack_rec(symptr, s, strbeg, strend, &s);
	944	if (s == strend && savsym.howlen == e_star)
	945	break; /* No way to continue */
	946	}
	947	SPAGAIN;
	948	savsym.flags = symptr->flags & ~group_modifiers;
	949	*symptr = savsym;
	950	break;
	951	}
	952	case '.' \| TYPE_IS_SHRIEKING:
	953	case '.': {
	954	const char *from;
	955	SV *sv;
	956	const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
	957	if (howlen == e_star) from = strbeg;
	958	else if (len <= 0) from = s;
	959	else {
	960	tempsym_t *group = symptr;
	961
	962	while (--len && group) group = group->previous;
	963	from = group ? strbeg + group->strbeg : strbeg;
	964	}
	965	sv = from <= s ?
	966	newSVuv( u8 ? (UV) utf8_length((const U8)from, (const U8)s) : (UV) (s-from)) :
	967	newSViv(-(u8 ? (IV) utf8_length((const U8)s, (const U8)from) : (IV) (from-s)));
	968	mXPUSHs(sv);
	969	break;
	970	}
	971	case '@' \| TYPE_IS_SHRIEKING:
	972	case '@':
	973	s = strbeg + symptr->strbeg;
	974	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	975	{
	976	while (len > 0) {
	977	if (s >= strend)
	978	Perl_croak(aTHX_ "'@' outside of string in unpack");
	979	s += UTF8SKIP(s);
	980	len--;
	981	}
	982	if (s > strend)
	983	Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
	984	} else {
	985	if (strend-s < len)
	986	Perl_croak(aTHX_ "'@' outside of string in unpack");
	987	s += len;
	988	}
	989	break;
	990	case 'X' \| TYPE_IS_SHRIEKING:
	991	if (!len) /* Avoid division by 0 */
	992	len = 1;
	993	if (utf8) {
	994	const char hop, last;
	995	SSize_t l = len;
	996	hop = last = strbeg;
	997	while (hop < s) {
	998	hop += UTF8SKIP(hop);
	999	if (--l == 0) {
	1000	last = hop;
	1001	l = len;
	1002	}
	1003	}
	1004	if (last > s)
	1005	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1006	s = last;
	1007	break;
	1008	}
	1009	len = (s - strbeg) % len;
	1010	/* FALLTHROUGH */
	1011	case 'X':
	1012	if (utf8) {
	1013	while (len > 0) {
	1014	if (s <= strbeg)
	1015	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1016	while (--s, UTF8_IS_CONTINUATION(*s)) {
	1017	if (s <= strbeg)
	1018	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1019	}
	1020	len--;
	1021	}
	1022	} else {
	1023	if (len > s - strbeg)
	1024	Perl_croak(aTHX_ "'X' outside of string in unpack" );
	1025	s -= len;
	1026	}
	1027	break;
	1028	case 'x' \| TYPE_IS_SHRIEKING: {
	1029	SSize_t ai32;
	1030	if (!len) /* Avoid division by 0 */
	1031	len = 1;
	1032	if (utf8) ai32 = utf8_length((U8 ) strbeg, (U8 ) s) % len;
	1033	else ai32 = (s - strbeg) % len;
	1034	if (ai32 == 0) break;
	1035	len -= ai32;
	1036	}
	1037	/* FALLTHROUGH */
	1038	case 'x':
	1039	if (utf8) {
	1040	while (len>0) {
	1041	if (s >= strend)
	1042	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1043	s += UTF8SKIP(s);
	1044	len--;
	1045	}
	1046	} else {
	1047	if (len > strend - s)
	1048	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1049	s += len;
	1050	}
	1051	break;
	1052	case '/':
	1053	Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
	1054
	1055	case 'A':
	1056	case 'Z':
	1057	case 'a':
	1058	if (checksum) {
	1059	/* Preliminary length estimate is assumed done in 'W' */
	1060	if (len > strend - s) len = strend - s;
	1061	goto W_checksum;
	1062	}
	1063	if (utf8) {
	1064	SSize_t l;
	1065	const char *hop;
	1066	for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
	1067	if (hop >= strend) {
	1068	if (hop > strend)
	1069	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1070	break;
	1071	}
	1072	}
	1073	if (hop > strend)
	1074	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1075	len = hop - s;
	1076	} else if (len > strend - s)
	1077	len = strend - s;
	1078
	1079	if (datumtype == 'Z') {
	1080	/* 'Z' strips stuff after first null */
	1081	const char ptr, end;
	1082	end = s + len;
	1083	for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
	1084	sv = newSVpvn(s, ptr-s);
	1085	if (howlen == e_star) /* exact for 'Z' /
	1086	len = ptr-s + (ptr != strend ? 1 : 0);
	1087	} else if (datumtype == 'A') {
	1088	/* 'A' strips both nulls and spaces */
	1089	const char *ptr;
	1090	if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
	1091	for (ptr = s+len-1; ptr >= s; ptr--) {
	1092	if ( *ptr != 0
	1093	&& !UTF8_IS_CONTINUATION(*ptr)
	1094	&& !isSPACE_utf8_safe(ptr, strend))
	1095	{
	1096	break;
	1097	}
	1098	}
	1099	if (ptr >= s) ptr += UTF8SKIP(ptr);
	1100	else ptr++;
	1101	if (ptr > s+len)
	1102	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1103	} else {
	1104	for (ptr = s+len-1; ptr >= s; ptr--)
	1105	if (ptr != 0 && !isSPACE(ptr)) break;
	1106	ptr++;
	1107	}
	1108	sv = newSVpvn(s, ptr-s);
	1109	} else sv = newSVpvn(s, len);
	1110
	1111	if (utf8) {
	1112	SvUTF8_on(sv);
	1113	/* Undo any upgrade done due to need_utf8() */
	1114	if (!(symptr->flags & FLAG_WAS_UTF8))
	1115	sv_utf8_downgrade(sv, 0);
	1116	}
	1117	mXPUSHs(sv);
	1118	s += len;
	1119	break;
	1120	case 'B':
	1121	case 'b': {
	1122	char *str;
	1123	if (howlen == e_star \|\| len > (strend - s) * 8)
	1124	len = (strend - s) * 8;
	1125	if (checksum) {
	1126	if (utf8)
	1127	while (len >= 8 && s < strend) {
	1128	cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
	1129	len -= 8;
	1130	}
	1131	else
	1132	while (len >= 8) {
	1133	cuv += PL_bitcount[(U8 )s++];
	1134	len -= 8;
	1135	}
	1136	if (len && s < strend) {
	1137	U8 bits;
	1138	bits = SHIFT_BYTE(utf8, s, strend, datumtype);
	1139	if (datumtype == 'b')
	1140	while (len-- > 0) {
	1141	if (bits & 1) cuv++;
	1142	bits >>= 1;
	1143	}
	1144	else
	1145	while (len-- > 0) {
	1146	if (bits & 0x80) cuv++;
	1147	bits <<= 1;
	1148	}
	1149	}
	1150	break;
	1151	}
	1152
	1153	sv = sv_2mortal(newSV(len ? len : 1));
	1154	SvPOK_on(sv);
	1155	str = SvPVX(sv);
	1156	if (datumtype == 'b') {
	1157	U8 bits = 0;
	1158	const SSize_t ai32 = len;
	1159	for (len = 0; len < ai32; len++) {
	1160	if (len & 7) bits >>= 1;
	1161	else if (utf8) {
	1162	if (s >= strend) break;
	1163	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1164	} else bits = (U8 ) s++;
	1165	*str++ = bits & 1 ? '1' : '0';
	1166	}
	1167	} else {
	1168	U8 bits = 0;
	1169	const SSize_t ai32 = len;
	1170	for (len = 0; len < ai32; len++) {
	1171	if (len & 7) bits <<= 1;
	1172	else if (utf8) {
	1173	if (s >= strend) break;
	1174	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1175	} else bits = (U8 ) s++;
	1176	*str++ = bits & 0x80 ? '1' : '0';
	1177	}
	1178	}
	1179	*str = '\0';
	1180	SvCUR_set(sv, str - SvPVX_const(sv));
	1181	XPUSHs(sv);
	1182	break;
	1183	}
	1184	case 'H':
	1185	case 'h': {
	1186	char *str = NULL;
	1187	/* Preliminary length estimate, acceptable for utf8 too */
	1188	if (howlen == e_star \|\| len > (strend - s) * 2)
	1189	len = (strend - s) * 2;
	1190	if (!checksum) {
	1191	sv = sv_2mortal(newSV(len ? len : 1));
	1192	SvPOK_on(sv);
	1193	str = SvPVX(sv);
	1194	}
	1195	if (datumtype == 'h') {
	1196	U8 bits = 0;
	1197	SSize_t ai32 = len;
	1198	for (len = 0; len < ai32; len++) {
	1199	if (len & 1) bits >>= 4;
	1200	else if (utf8) {
	1201	if (s >= strend) break;
	1202	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1203	} else bits = * (U8 *) s++;
	1204	if (!checksum)
	1205	*str++ = PL_hexdigit[bits & 15];
	1206	}
	1207	} else {
	1208	U8 bits = 0;
	1209	const SSize_t ai32 = len;
	1210	for (len = 0; len < ai32; len++) {
	1211	if (len & 1) bits <<= 4;
	1212	else if (utf8) {
	1213	if (s >= strend) break;
	1214	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1215	} else bits = (U8 ) s++;
	1216	if (!checksum)
	1217	*str++ = PL_hexdigit[(bits >> 4) & 15];
	1218	}
	1219	}
	1220	if (!checksum) {
	1221	*str = '\0';
	1222	SvCUR_set(sv, str - SvPVX_const(sv));
	1223	XPUSHs(sv);
	1224	}
	1225	break;
	1226	}
	1227	case 'C':
	1228	if (len == 0) {
	1229	if (explicit_length)
	1230	/* Switch to "character" mode */
	1231	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	1232	break;
	1233	}
	1234	/* FALLTHROUGH */
	1235	case 'c':
	1236	while (len-- > 0 && s < strend) {
	1237	int aint;
	1238	if (utf8)
	1239	{
	1240	STRLEN retlen;
	1241	aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1242	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1243	if (retlen == (STRLEN) -1)
	1244	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1245	s += retlen;
	1246	}
	1247	else
	1248	aint = (U8 )(s)++;
	1249	if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
	1250	aint -= 256;
	1251	if (!checksum)
	1252	mPUSHi(aint);
	1253	else if (checksum > bits_in_uv)
	1254	cdouble += (NV)aint;
	1255	else
	1256	cuv += aint;
	1257	}
	1258	break;
	1259	case 'W':
	1260	W_checksum:
	1261	if (utf8) {
	1262	while (len-- > 0 && s < strend) {
	1263	STRLEN retlen;
	1264	const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1265	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1266	if (retlen == (STRLEN) -1)
	1267	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1268	s += retlen;
	1269	if (!checksum)
	1270	mPUSHu(val);
	1271	else if (checksum > bits_in_uv)
	1272	cdouble += (NV) val;
	1273	else
	1274	cuv += val;
	1275	}
	1276	} else if (!checksum)
	1277	while (len-- > 0) {
	1278	const U8 ch = (U8 ) s++;
	1279	mPUSHu(ch);
	1280	}
	1281	else if (checksum > bits_in_uv)
	1282	while (len-- > 0) cdouble += (NV) (U8 ) s++;
	1283	else
	1284	while (len-- > 0) cuv += (U8 ) s++;
	1285	break;
	1286	case 'U':
	1287	if (len == 0) {
	1288	if (explicit_length && howlen != e_star) {
	1289	/* Switch to "bytes in UTF-8" mode */
	1290	if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
	1291	else
	1292	/* Should be impossible due to the need_utf8() test */
	1293	Perl_croak(aTHX_ "U0 mode on a byte string");
	1294	}
	1295	break;
	1296	}
	1297	if (len > strend - s) len = strend - s;
	1298	if (!checksum) {
	1299	if (len && unpack_only_one) len = 1;
	1300	EXTEND(SP, len);
	1301	EXTEND_MORTAL(len);
	1302	}
	1303	while (len-- > 0 && s < strend) {
	1304	STRLEN retlen;
	1305	UV auv;
	1306	if (utf8) {
	1307	U8 result[UTF8_MAXLEN+1];
	1308	const char *ptr = s;
	1309	STRLEN len;
	1310	/* Bug: warns about bad utf8 even if we are short on bytes
	1311	and will break out of the loop */
	1312	if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
	1313	'U'))
	1314	break;
	1315	len = UTF8SKIP(result);
	1316	if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
	1317	(char *) &result[1], len-1, 'U')) break;
	1318	auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
	1319	len,
	1320	&retlen,
	1321	UTF8_ALLOW_DEFAULT));
	1322	s = ptr;
	1323	} else {
	1324	auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
	1325	strend - s,
	1326	&retlen,
	1327	UTF8_ALLOW_DEFAULT));
	1328	if (retlen == (STRLEN) -1)
	1329	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1330	s += retlen;
	1331	}
	1332	if (!checksum)
	1333	mPUSHu(auv);
	1334	else if (checksum > bits_in_uv)
	1335	cdouble += (NV) auv;
	1336	else
	1337	cuv += auv;
	1338	}
	1339	break;
	1340	case 's' \| TYPE_IS_SHRIEKING:
	1341	#if SHORTSIZE != SIZE16
	1342	while (len-- > 0) {
	1343	short ashort;
	1344	SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
	1345	if (!checksum)
	1346	mPUSHi(ashort);
	1347	else if (checksum > bits_in_uv)
	1348	cdouble += (NV)ashort;
	1349	else
	1350	cuv += ashort;
	1351	}
	1352	break;
	1353	#else
	1354	/* FALLTHROUGH */
	1355	#endif
	1356	case 's':
	1357	while (len-- > 0) {
	1358	I16 ai16;
	1359
	1360	#if U16SIZE > SIZE16
	1361	ai16 = 0;
	1362	#endif
	1363	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1364	#if U16SIZE > SIZE16
	1365	if (ai16 > 32767)
	1366	ai16 -= 65536;
	1367	#endif
	1368	if (!checksum)
	1369	mPUSHi(ai16);
	1370	else if (checksum > bits_in_uv)
	1371	cdouble += (NV)ai16;
	1372	else
	1373	cuv += ai16;
	1374	}
	1375	break;
	1376	case 'S' \| TYPE_IS_SHRIEKING:
	1377	#if SHORTSIZE != SIZE16
	1378	while (len-- > 0) {
	1379	unsigned short aushort;
	1380	SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
	1381	if (!checksum)
	1382	mPUSHu(aushort);
	1383	else if (checksum > bits_in_uv)
	1384	cdouble += (NV)aushort;
	1385	else
	1386	cuv += aushort;
	1387	}
	1388	break;
	1389	#else
	1390	/* FALLTHROUGH */
	1391	#endif
	1392	case 'v':
	1393	case 'n':
	1394	case 'S':
	1395	while (len-- > 0) {
	1396	U16 au16;
	1397	#if U16SIZE > SIZE16
	1398	au16 = 0;
	1399	#endif
	1400	SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
	1401	if (datumtype == 'n')
	1402	au16 = PerlSock_ntohs(au16);
	1403	if (datumtype == 'v')
	1404	au16 = vtohs(au16);
	1405	if (!checksum)
	1406	mPUSHu(au16);
	1407	else if (checksum > bits_in_uv)
	1408	cdouble += (NV) au16;
	1409	else
	1410	cuv += au16;
	1411	}
	1412	break;
	1413	case 'v' \| TYPE_IS_SHRIEKING:
	1414	case 'n' \| TYPE_IS_SHRIEKING:
	1415	while (len-- > 0) {
	1416	I16 ai16;
	1417	# if U16SIZE > SIZE16
	1418	ai16 = 0;
	1419	# endif
	1420	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1421	/* There should never be any byte-swapping here. */
	1422	assert(!TYPE_ENDIANNESS(datumtype));
	1423	if (datumtype == ('n' \| TYPE_IS_SHRIEKING))
	1424	ai16 = (I16) PerlSock_ntohs((U16) ai16);
	1425	if (datumtype == ('v' \| TYPE_IS_SHRIEKING))
	1426	ai16 = (I16) vtohs((U16) ai16);
	1427	if (!checksum)
	1428	mPUSHi(ai16);
	1429	else if (checksum > bits_in_uv)
	1430	cdouble += (NV) ai16;
	1431	else
	1432	cuv += ai16;
	1433	}
	1434	break;
	1435	case 'i':
	1436	case 'i' \| TYPE_IS_SHRIEKING:
	1437	while (len-- > 0) {
	1438	int aint;
	1439	SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
	1440	if (!checksum)
	1441	mPUSHi(aint);
	1442	else if (checksum > bits_in_uv)
	1443	cdouble += (NV)aint;
	1444	else
	1445	cuv += aint;
	1446	}
	1447	break;
	1448	case 'I':
	1449	case 'I' \| TYPE_IS_SHRIEKING:
	1450	while (len-- > 0) {
	1451	unsigned int auint;
	1452	SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
	1453	if (!checksum)
	1454	mPUSHu(auint);
	1455	else if (checksum > bits_in_uv)
	1456	cdouble += (NV)auint;
	1457	else
	1458	cuv += auint;
	1459	}
	1460	break;
	1461	case 'j':
	1462	while (len-- > 0) {
	1463	IV aiv;
	1464	SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
	1465	if (!checksum)
	1466	mPUSHi(aiv);
	1467	else if (checksum > bits_in_uv)
	1468	cdouble += (NV)aiv;
	1469	else
	1470	cuv += aiv;
	1471	}
	1472	break;
	1473	case 'J':
	1474	while (len-- > 0) {
	1475	UV auv;
	1476	SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
	1477	if (!checksum)
	1478	mPUSHu(auv);
	1479	else if (checksum > bits_in_uv)
	1480	cdouble += (NV)auv;
	1481	else
	1482	cuv += auv;
	1483	}
	1484	break;
	1485	case 'l' \| TYPE_IS_SHRIEKING:
	1486	#if LONGSIZE != SIZE32
	1487	while (len-- > 0) {
	1488	long along;
	1489	SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
	1490	if (!checksum)
	1491	mPUSHi(along);
	1492	else if (checksum > bits_in_uv)
	1493	cdouble += (NV)along;
	1494	else
	1495	cuv += along;
	1496	}
	1497	break;
	1498	#else
	1499	/* FALLTHROUGH */
	1500	#endif
	1501	case 'l':
	1502	while (len-- > 0) {
	1503	I32 ai32;
	1504	#if U32SIZE > SIZE32
	1505	ai32 = 0;
	1506	#endif
	1507	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1508	#if U32SIZE > SIZE32
	1509	if (ai32 > 2147483647) ai32 -= 4294967296;
	1510	#endif
	1511	if (!checksum)
	1512	mPUSHi(ai32);
	1513	else if (checksum > bits_in_uv)
	1514	cdouble += (NV)ai32;
	1515	else
	1516	cuv += ai32;
	1517	}
	1518	break;
	1519	case 'L' \| TYPE_IS_SHRIEKING:
	1520	#if LONGSIZE != SIZE32
	1521	while (len-- > 0) {
	1522	unsigned long aulong;
	1523	SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
	1524	if (!checksum)
	1525	mPUSHu(aulong);
	1526	else if (checksum > bits_in_uv)
	1527	cdouble += (NV)aulong;
	1528	else
	1529	cuv += aulong;
	1530	}
	1531	break;
	1532	#else
	1533	/* FALLTHROUGH */
	1534	#endif
	1535	case 'V':
	1536	case 'N':
	1537	case 'L':
	1538	while (len-- > 0) {
	1539	U32 au32;
	1540	#if U32SIZE > SIZE32
	1541	au32 = 0;
	1542	#endif
	1543	SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
	1544	if (datumtype == 'N')
	1545	au32 = PerlSock_ntohl(au32);
	1546	if (datumtype == 'V')
	1547	au32 = vtohl(au32);
	1548	if (!checksum)
	1549	mPUSHu(au32);
	1550	else if (checksum > bits_in_uv)
	1551	cdouble += (NV)au32;
	1552	else
	1553	cuv += au32;
	1554	}
	1555	break;
	1556	case 'V' \| TYPE_IS_SHRIEKING:
	1557	case 'N' \| TYPE_IS_SHRIEKING:
	1558	while (len-- > 0) {
	1559	I32 ai32;
	1560	#if U32SIZE > SIZE32
	1561	ai32 = 0;
	1562	#endif
	1563	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1564	/* There should never be any byte swapping here. */
	1565	assert(!TYPE_ENDIANNESS(datumtype));
	1566	if (datumtype == ('N' \| TYPE_IS_SHRIEKING))
	1567	ai32 = (I32)PerlSock_ntohl((U32)ai32);
	1568	if (datumtype == ('V' \| TYPE_IS_SHRIEKING))
	1569	ai32 = (I32)vtohl((U32)ai32);
	1570	if (!checksum)
	1571	mPUSHi(ai32);
	1572	else if (checksum > bits_in_uv)
	1573	cdouble += (NV)ai32;
	1574	else
	1575	cuv += ai32;
	1576	}
	1577	break;
	1578	case 'p':
	1579	while (len-- > 0) {
	1580	const char *aptr;
	1581	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1582	/* newSVpv generates undef if aptr is NULL */
	1583	mPUSHs(newSVpv(aptr, 0));
	1584	}
	1585	break;
	1586	case 'w':
	1587	{
	1588	UV auv = 0;
	1589	size_t bytes = 0;
	1590
	1591	while (len > 0 && s < strend) {
	1592	U8 ch;
	1593	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1594	auv = (auv << 7) \| (ch & 0x7f);
	1595	/* UTF8_IS_XXXXX not right here because this is a BER, not
	1596	* UTF-8 format - using constant 0x80 */
	1597	if (ch < 0x80) {
	1598	bytes = 0;
	1599	mPUSHu(auv);
	1600	len--;
	1601	auv = 0;
	1602	continue;
	1603	}
	1604	if (++bytes >= sizeof(UV)) { /* promote to string */
	1605	const char *t;
	1606
	1607	sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
	1608	(int)TYPE_DIGITS(UV), auv);
	1609	while (s < strend) {
	1610	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1611	sv = mul128(sv, (U8)(ch & 0x7f));
	1612	if (!(ch & 0x80)) {
	1613	bytes = 0;
	1614	break;
	1615	}
	1616	}
	1617	t = SvPV_nolen_const(sv);
	1618	while (*t == '0')
	1619	t++;
	1620	sv_chop(sv, t);
	1621	mPUSHs(sv);
	1622	len--;
	1623	auv = 0;
	1624	}
	1625	}
	1626	if ((s >= strend) && bytes)
	1627	Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
	1628	}
	1629	break;
	1630	case 'P':
	1631	if (symptr->howlen == e_star)
	1632	Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
	1633	EXTEND(SP, 1);
	1634	if (s + sizeof(char*) <= strend) {
	1635	char *aptr;
	1636	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1637	/* newSVpvn generates undef if aptr is NULL */
	1638	PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
	1639	}
	1640	break;
	1641	#if defined(HAS_QUAD) && IVSIZE >= 8
	1642	case 'q':
	1643	while (len-- > 0) {
	1644	Quad_t aquad;
	1645	SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
	1646	if (!checksum)
	1647	mPUSHs(newSViv((IV)aquad));
	1648	else if (checksum > bits_in_uv)
	1649	cdouble += (NV)aquad;
	1650	else
	1651	cuv += aquad;
	1652	}
	1653	break;
	1654	case 'Q':
	1655	while (len-- > 0) {
	1656	Uquad_t auquad;
	1657	SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
	1658	if (!checksum)
	1659	mPUSHs(newSVuv((UV)auquad));
	1660	else if (checksum > bits_in_uv)
	1661	cdouble += (NV)auquad;
	1662	else
	1663	cuv += auquad;
	1664	}
	1665	break;
	1666	#endif
	1667	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
	1668	case 'f':
	1669	while (len-- > 0) {
	1670	float afloat;
	1671	SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
	1672	if (!checksum)
	1673	mPUSHn(afloat);
	1674	else
	1675	cdouble += afloat;
	1676	}
	1677	break;
	1678	case 'd':
	1679	while (len-- > 0) {
	1680	double adouble;
	1681	SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
	1682	if (!checksum)
	1683	mPUSHn(adouble);
	1684	else
	1685	cdouble += adouble;
	1686	}
	1687	break;
	1688	case 'F':
	1689	while (len-- > 0) {
	1690	NV_bytes anv;
	1691	SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
	1692	datumtype, needs_swap);
	1693	if (!checksum)
	1694	mPUSHn(anv.nv);
	1695	else
	1696	cdouble += anv.nv;
	1697	}
	1698	break;
	1699	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	1700	case 'D':
	1701	while (len-- > 0) {
	1702	ld_bytes aldouble;
	1703	SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
	1704	sizeof(aldouble.bytes), datumtype, needs_swap);
	1705	/* The most common long double format, the x86 80-bit
	1706	* extended precision, has either 2 or 6 unused bytes,
	1707	* which may contain garbage, which may contain
	1708	* unintentional data. While we do zero the bytes of
	1709	* the long double data in pack(), here in unpack() we
	1710	* don't, because it's really hard to envision that
	1711	* reading the long double off aldouble would be
	1712	* affected by the unused bytes.
	1713	*
	1714	* Note that trying to unpack 'long doubles' of 'long
	1715	* doubles' packed in another system is in the general
	1716	* case doomed without having more detail. */
	1717	if (!checksum)
	1718	mPUSHn(aldouble.ld);
	1719	else
	1720	cdouble += aldouble.ld;
	1721	}
	1722	break;
	1723	#endif
	1724	case 'u':
	1725	if (!checksum) {
	1726	const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
	1727	sv = sv_2mortal(newSV(l));
	1728	if (l) {
	1729	SvPOK_on(sv);
	1730	*SvEND(sv) = '\0';
	1731	}
	1732	}
	1733
	1734	/* Note that all legal uuencoded strings are ASCII printables, so
	1735	* have the same representation under UTF-8 vs not. This means we
	1736	* can ignore UTF8ness on legal input. For illegal we stop at the
	1737	* first failure, and don't report where/what that is, so again we
	1738	* can ignore UTF8ness */
	1739
	1740	while (s < strend && s != ' ' && ISUUCHAR(s)) {
	1741	I32 a, b, c, d;
	1742	char hunk[3];
	1743
	1744	len = PL_uudmap[(U8)s++] & 077;
	1745	while (len > 0) {
	1746	if (s < strend && ISUUCHAR(*s))
	1747	a = PL_uudmap[(U8)s++] & 077;
	1748	else
	1749	a = 0;
	1750	if (s < strend && ISUUCHAR(*s))
	1751	b = PL_uudmap[(U8)s++] & 077;
	1752	else
	1753	b = 0;
	1754	if (s < strend && ISUUCHAR(*s))
	1755	c = PL_uudmap[(U8)s++] & 077;
	1756	else
	1757	c = 0;
	1758	if (s < strend && ISUUCHAR(*s))
	1759	d = PL_uudmap[(U8)s++] & 077;
	1760	else
	1761	d = 0;
	1762	hunk[0] = (char)((a << 2) \| (b >> 4));
	1763	hunk[1] = (char)((b << 4) \| (c >> 2));
	1764	hunk[2] = (char)((c << 6) \| d);
	1765	if (!checksum)
	1766	sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
	1767	len -= 3;
	1768	}
	1769	if (*s == '\n')
	1770	s++;
	1771	else /* possible checksum byte */
	1772	if (s + 1 < strend && s[1] == '\n')
	1773	s += 2;
	1774	}
	1775	if (!checksum)
	1776	XPUSHs(sv);
	1777	break;
	1778	} /* End of switch */
	1779
	1780	if (checksum) {
	1781	if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) \|\|
	1782	(checksum > bits_in_uv &&
	1783	memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
	1784	NV trouble, anv;
	1785
	1786	anv = (NV) (1 << (checksum & 15));
	1787	while (checksum >= 16) {
	1788	checksum -= 16;
	1789	anv *= 65536.0;
	1790	}
	1791	while (cdouble < 0.0)
	1792	cdouble += anv;
	1793	cdouble = Perl_modf(cdouble / anv, &trouble);
	1794	#ifdef LONGDOUBLE_DOUBLEDOUBLE
	1795	/* Workaround for powerpc doubledouble modfl bug:
	1796	* close to 1.0L and -1.0L cdouble is 0, and trouble
	1797	* is cdouble / anv. */
	1798	if (trouble != Perl_ceil(trouble)) {
	1799	cdouble = trouble;
	1800	if (cdouble > 1.0L) cdouble -= 1.0L;
	1801	if (cdouble < -1.0L) cdouble += 1.0L;
	1802	}
	1803	#endif
	1804	cdouble *= anv;
	1805	sv = newSVnv(cdouble);
	1806	}
	1807	else {
	1808	if (checksum < bits_in_uv) {
	1809	UV mask = nBIT_MASK(checksum);
	1810	cuv &= mask;
	1811	}
	1812	sv = newSVuv(cuv);
	1813	}
	1814	mXPUSHs(sv);
	1815	checksum = 0;
	1816	}
	1817
	1818	if (symptr->flags & FLAG_SLASH){
	1819	if (SP - PL_stack_base - start_sp_offset <= 0)
	1820	break;
	1821	if( next_symbol(symptr) ){
	1822	if( symptr->howlen == e_number )
	1823	Perl_croak(aTHX_ "Count after length/code in unpack" );
	1824	if( beyond ){
	1825	/* ...end of char buffer then no decent length available */
	1826	Perl_croak(aTHX_ "length/code after end of string in unpack" );
	1827	} else {
	1828	/* take top of stack (hope it's numeric) */
	1829	len = POPi;
	1830	if( len < 0 )
	1831	Perl_croak(aTHX_ "Negative '/' count in unpack" );
	1832	}
	1833	} else {
	1834	Perl_croak(aTHX_ "Code missing after '/' in unpack" );
	1835	}
	1836	datumtype = symptr->code;
	1837	explicit_length = FALSE;
	1838	goto redo_switch;
	1839	}
	1840	}
	1841
	1842	if (new_s)
	1843	*new_s = s;
	1844	PUTBACK;
	1845	return SP - PL_stack_base - start_sp_offset;
	1846	}
	1847
	1848	PP(pp_unpack)
	1849	{
	1850	dSP;
	1851	dPOPPOPssrl;
	1852	U8 gimme = GIMME_V;
	1853	STRLEN llen;
	1854	STRLEN rlen;
	1855	const char *pat = SvPV_const(left, llen);
	1856	const char *s = SvPV_const(right, rlen);
	1857	const char *strend = s + rlen;
	1858	const char *patend = pat + llen;
	1859	SSize_t cnt;
	1860
	1861	PUTBACK;
	1862	cnt = unpackstring(pat, patend, s, strend,
	1863	((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
	1864	\| (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
	1865
	1866	SPAGAIN;
	1867	if ( !cnt && gimme == G_SCALAR )
	1868	PUSHs(&PL_sv_undef);
	1869	RETURN;
	1870	}
	1871
	1872	STATIC U8 *
	1873	doencodes(U8 h, const U8 s, SSize_t len)
	1874	{
	1875	*h++ = PL_uuemap[len];
	1876	while (len > 2) {
	1877	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1878	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((s[1] >> 4) & 017)))];
	1879	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) \| ((s[2] >> 6) & 03)))];
	1880	*h++ = PL_uuemap[(077 & (s[2] & 077))];
	1881	s += 3;
	1882	len -= 3;
	1883	}
	1884	if (len > 0) {
	1885	const U8 r = (len > 1 ? s[1] : '\0');
	1886	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1887	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((r >> 4) & 017)))];
	1888	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
	1889	*h++ = PL_uuemap[0];
	1890	}
	1891	*h++ = '\n';
	1892	return h;
	1893	}
	1894
	1895	STATIC SV *
	1896	S_is_an_int(pTHX_ const char *s, STRLEN l)
	1897	{
	1898	SV *result = newSVpvn(s, l);
	1899	char const result_c = SvPV_nolen(result); / convenience */
	1900	char *out = result_c;
	1901	bool skip = 1;
	1902	bool ignore = 0;
	1903
	1904	PERL_ARGS_ASSERT_IS_AN_INT;
	1905
	1906	while (*s) {
	1907	switch (*s) {
	1908	case ' ':
	1909	break;
	1910	case '+':
	1911	if (!skip) {
	1912	SvREFCNT_dec(result);
	1913	return (NULL);
	1914	}
	1915	break;
	1916	case '0':
	1917	case '1':
	1918	case '2':
	1919	case '3':
	1920	case '4':
	1921	case '5':
	1922	case '6':
	1923	case '7':
	1924	case '8':
	1925	case '9':
	1926	skip = 0;
	1927	if (!ignore) {
	1928	(out++) = s;
	1929	}
	1930	break;
	1931	case '.':
	1932	ignore = 1;
	1933	break;
	1934	default:
	1935	SvREFCNT_dec(result);
	1936	return (NULL);
	1937	}
	1938	s++;
	1939	}
	1940	*(out++) = '\0';
	1941	SvCUR_set(result, out - result_c);
	1942	return (result);
	1943	}
	1944
	1945	/* pnum must be '\0' terminated */
	1946	STATIC int
	1947	S_div128(pTHX_ SV pnum, bool done)
	1948	{
	1949	STRLEN len;
	1950	char * const s = SvPV(pnum, len);
	1951	char *t = s;
	1952	int m = 0;
	1953
	1954	PERL_ARGS_ASSERT_DIV128;
	1955
	1956	*done = 1;
	1957	while (*t) {
	1958	const int i = m * 10 + (*t - '0');
	1959	const int r = (i >> 7); /* r < 10 */
	1960	m = i & 0x7F;
	1961	if (r) {
	1962	*done = 0;
	1963	}
	1964	*(t++) = '0' + r;
	1965	}
	1966	*(t++) = '\0';
	1967	SvCUR_set(pnum, (STRLEN) (t - s));
	1968	return (m);
	1969	}
	1970
	1971	/*
	1972	=for apidoc packlist
	1973
	1974	The engine implementing C<pack()> Perl function.
	1975
	1976	=cut
	1977	*/
	1978
	1979	void
	1980	Perl_packlist(pTHX_ SV cat, const char pat, const char patend, SV beglist, SV *endlist )
	1981	{
	1982	tempsym_t sym;
	1983
	1984	PERL_ARGS_ASSERT_PACKLIST;
	1985
	1986	TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
	1987
	1988	/* We're going to do changes through SvPVX(cat). Make sure it's valid.
	1989	Also make sure any UTF8 flag is loaded */
	1990	SvPV_force_nolen(cat);
	1991	if (DO_UTF8(cat))
	1992	sym.flags \|= FLAG_PARSE_UTF8 \| FLAG_DO_UTF8;
	1993
	1994	(void)pack_rec( cat, &sym, beglist, endlist );
	1995	}
	1996
	1997	/* like sv_utf8_upgrade, but also repoint the group start markers */
	1998	STATIC void
	1999	marked_upgrade(pTHX_ SV sv, tempsym_t sym_ptr) {
	2000	STRLEN len;
	2001	tempsym_t *group;
	2002	const char from_ptr, from_start, from_end, marks, *m;
	2003	char to_start, to_ptr;
	2004
	2005	if (SvUTF8(sv)) return;
	2006
	2007	from_start = SvPVX_const(sv);
	2008	from_end = from_start + SvCUR(sv);
	2009	for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
	2010	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
	2011	if (from_ptr == from_end) {
	2012	/* Simple case: no character needs to be changed */
	2013	SvUTF8_on(sv);
	2014	return;
	2015	}
	2016
	2017	len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
	2018	Newx(to_start, len, char);
	2019	Copy(from_start, to_start, from_ptr-from_start, char);
	2020	to_ptr = to_start + (from_ptr-from_start);
	2021
	2022	Newx(marks, sym_ptr->level+2, const char *);
	2023	for (group=sym_ptr; group; group = group->previous)
	2024	marks[group->level] = from_start + group->strbeg;
	2025	marks[sym_ptr->level+1] = from_end+1;
	2026	for (m = marks; *m < from_ptr; m++)
	2027	m = to_start + (m-from_start);
	2028
	2029	for (;from_ptr < from_end; from_ptr++) {
	2030	while (m == from_ptr) m++ = to_ptr;
	2031	to_ptr = (char ) uvchr_to_utf8((U8 ) to_ptr, (U8 ) from_ptr);
	2032	}
	2033	*to_ptr = 0;
	2034
	2035	while (m == from_ptr) m++ = to_ptr;
	2036	if (m != marks + sym_ptr->level+1) {
	2037	Safefree(marks);
	2038	Safefree(to_start);
	2039	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
	2040	"level=%d", m, marks, sym_ptr->level);
	2041	}
	2042	for (group=sym_ptr; group; group = group->previous)
	2043	group->strbeg = marks[group->level] - to_start;
	2044	Safefree(marks);
	2045
	2046	if (SvOOK(sv)) {
	2047	if (SvIVX(sv)) {
	2048	SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
	2049	from_start -= SvIVX(sv);
	2050	SvIV_set(sv, 0);
	2051	}
	2052	SvFLAGS(sv) &= ~SVf_OOK;
	2053	}
	2054	if (SvLEN(sv) != 0)
	2055	Safefree(from_start);
	2056	SvPV_set(sv, to_start);
	2057	SvCUR_set(sv, to_ptr - to_start);
	2058	SvLEN_set(sv, len);
	2059	SvUTF8_on(sv);
	2060	}
	2061
	2062	/* Exponential string grower. Makes string extension effectively O(n)
	2063	needed says how many extra bytes we need (not counting the final '\0')
	2064	Only grows the string if there is an actual lack of space
	2065	*/
	2066	STATIC char *
	2067	S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
	2068	const STRLEN cur = SvCUR(sv);
	2069	const STRLEN len = SvLEN(sv);
	2070	STRLEN extend;
	2071
	2072	PERL_ARGS_ASSERT_SV_EXP_GROW;
	2073
	2074	if (len - cur > needed) return SvPVX(sv);
	2075	extend = needed > len ? needed : len;
	2076	return SvGROW(sv, len+extend+1);
	2077	}
	2078
	2079	static SV *
	2080	S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
	2081	{
	2082	SvGETMAGIC(sv);
	2083	if (UNLIKELY(SvAMAGIC(sv)))
	2084	sv = sv_2num(sv);
	2085	if (UNLIKELY(isinfnansv(sv))) {
	2086	const I32 c = TYPE_NO_MODIFIERS(datumtype);
	2087	const NV nv = SvNV_nomg(sv);
	2088	if (c == 'w')
	2089	Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
	2090	else
	2091	Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
	2092	}
	2093	return sv;
	2094	}
	2095
	2096	#define SvIV_no_inf(sv,d) \
	2097	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
	2098	#define SvUV_no_inf(sv,d) \
	2099	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
	2100
	2101	STATIC
	2102	SV **
	2103	S_pack_rec(pTHX_ SV cat, tempsym_t symptr, SV beglist, SV endlist )
	2104	{
	2105	tempsym_t lookahead;
	2106	SSize_t items = endlist - beglist;
	2107	bool found = next_symbol(symptr);
	2108	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	2109	bool warn_utf8 = ckWARN(WARN_UTF8);
	2110	char* from;
	2111
	2112	PERL_ARGS_ASSERT_PACK_REC;
	2113
	2114	if (symptr->level == 0 && found && symptr->code == 'U') {
	2115	marked_upgrade(aTHX_ cat, symptr);
	2116	symptr->flags \|= FLAG_DO_UTF8;
	2117	utf8 = 0;
	2118	}
	2119	symptr->strbeg = SvCUR(cat);
	2120
	2121	while (found) {
	2122	SV *fromstr;
	2123	STRLEN fromlen;
	2124	SSize_t len;
	2125	SV *lengthcode = NULL;
	2126	I32 datumtype = symptr->code;
	2127	howlen_t howlen = symptr->howlen;
	2128	char *start = SvPVX(cat);
	2129	char *cur = start + SvCUR(cat);
	2130	bool needs_swap;
	2131
	2132	#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
	2133	#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
	2134
	2135	switch (howlen) {
	2136	case e_star:
	2137	len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
	2138	0 : items;
	2139	break;
	2140	default:
	2141	/* e_no_len and e_number */
	2142	len = symptr->length;
	2143	break;
	2144	}
	2145
	2146	if (len) {
	2147	packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	2148
	2149	if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
	2150	/* We can process this letter. */
	2151	STRLEN size = props & PACK_SIZE_MASK;
	2152	GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
	2153	}
	2154	}
	2155
	2156	/* Look ahead for next symbol. Do we have code/code? */
	2157	lookahead = *symptr;
	2158	found = next_symbol(&lookahead);
	2159	if (symptr->flags & FLAG_SLASH) {
	2160	IV count;
	2161	if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
	2162	if (memCHRs("aAZ", lookahead.code)) {
	2163	if (lookahead.howlen == e_number) count = lookahead.length;
	2164	else {
	2165	if (items > 0) {
	2166	count = sv_len_utf8(*beglist);
	2167	}
	2168	else count = 0;
	2169	if (lookahead.code == 'Z') count++;
	2170	}
	2171	} else {
	2172	if (lookahead.howlen == e_number && lookahead.length < items)
	2173	count = lookahead.length;
	2174	else count = items;
	2175	}
	2176	lookahead.howlen = e_number;
	2177	lookahead.length = count;
	2178	lengthcode = sv_2mortal(newSViv(count));
	2179	}
	2180
	2181	needs_swap = NEEDS_SWAP(datumtype);
	2182
	2183	/* Code inside the switch must take care to properly update
	2184	cat (CUR length and '\0' termination) if it updated *cur and
	2185	doesn't simply leave using break */
	2186	switch (TYPE_NO_ENDIANNESS(datumtype)) {
	2187	default:
	2188	Perl_croak(aTHX_ "Invalid type '%c' in pack",
	2189	(int) TYPE_NO_MODIFIERS(datumtype));
	2190	case '%':
	2191	Perl_croak(aTHX_ "'%%' may not be used in pack");
	2192
	2193	case '.' \| TYPE_IS_SHRIEKING:
	2194	case '.':
	2195	if (howlen == e_star) from = start;
	2196	else if (len == 0) from = cur;
	2197	else {
	2198	tempsym_t *group = symptr;
	2199
	2200	while (--len && group) group = group->previous;
	2201	from = group ? start + group->strbeg : start;
	2202	}
	2203	fromstr = NEXTFROM;
	2204	len = SvIV_no_inf(fromstr, datumtype);
	2205	goto resize;
	2206	case '@' \| TYPE_IS_SHRIEKING:
	2207	case '@':
	2208	from = start + symptr->strbeg;
	2209	resize:
	2210	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	2211	if (len >= 0) {
	2212	while (len && from < cur) {
	2213	from += UTF8SKIP(from);
	2214	len--;
	2215	}
	2216	if (from > cur)
	2217	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2218	if (len) {
	2219	/* Here we know from == cur */
	2220	grow:
	2221	GROWING(0, cat, start, cur, len);
	2222	Zero(cur, len, char);
	2223	cur += len;
	2224	} else if (from < cur) {
	2225	len = cur - from;
	2226	goto shrink;
	2227	} else goto no_change;
	2228	} else {
	2229	cur = from;
	2230	len = -len;
	2231	goto utf8_shrink;
	2232	}
	2233	else {
	2234	len -= cur - from;
	2235	if (len > 0) goto grow;
	2236	if (len == 0) goto no_change;
	2237	len = -len;
	2238	goto shrink;
	2239	}
	2240	break;
	2241
	2242	case '(': {
	2243	tempsym_t savsym = *symptr;
	2244	U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	2245	symptr->flags \|= group_modifiers;
	2246	symptr->patend = savsym.grpend;
	2247	symptr->level++;
	2248	symptr->previous = &lookahead;
	2249	while (len--) {
	2250	U32 was_utf8;
	2251	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	2252	else symptr->flags &= ~FLAG_PARSE_UTF8;
	2253	was_utf8 = SvUTF8(cat);
	2254	symptr->patptr = savsym.grpbeg;
	2255	beglist = pack_rec(cat, symptr, beglist, endlist);
	2256	if (SvUTF8(cat) != was_utf8)
	2257	/* This had better be an upgrade while in utf8==0 mode */
	2258	utf8 = 1;
	2259
	2260	if (savsym.howlen == e_star && beglist == endlist)
	2261	break; /* No way to continue */
	2262	}
	2263	items = endlist - beglist;
	2264	lookahead.flags = symptr->flags & ~group_modifiers;
	2265	goto no_change;
	2266	}
	2267	case 'X' \| TYPE_IS_SHRIEKING:
	2268	if (!len) /* Avoid division by 0 */
	2269	len = 1;
	2270	if (utf8) {
	2271	char hop, last;
	2272	SSize_t l = len;
	2273	hop = last = start;
	2274	while (hop < cur) {
	2275	hop += UTF8SKIP(hop);
	2276	if (--l == 0) {
	2277	last = hop;
	2278	l = len;
	2279	}
	2280	}
	2281	if (last > cur)
	2282	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2283	cur = last;
	2284	break;
	2285	}
	2286	len = (cur-start) % len;
	2287	/* FALLTHROUGH */
	2288	case 'X':
	2289	if (utf8) {
	2290	if (len < 1) goto no_change;
	2291	utf8_shrink:
	2292	while (len > 0) {
	2293	if (cur <= start)
	2294	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2295	(int) TYPE_NO_MODIFIERS(datumtype));
	2296	while (--cur, UTF8_IS_CONTINUATION(*cur)) {
	2297	if (cur <= start)
	2298	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2299	(int) TYPE_NO_MODIFIERS(datumtype));
	2300	}
	2301	len--;
	2302	}
	2303	} else {
	2304	shrink:
	2305	if (cur - start < len)
	2306	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2307	(int) TYPE_NO_MODIFIERS(datumtype));
	2308	cur -= len;
	2309	}
	2310	if (cur < start+symptr->strbeg) {
	2311	/* Make sure group starts don't point into the void */
	2312	tempsym_t *group;
	2313	const STRLEN length = cur-start;
	2314	for (group = symptr;
	2315	group && length < group->strbeg;
	2316	group = group->previous) group->strbeg = length;
	2317	lookahead.strbeg = length;
	2318	}
	2319	break;
	2320	case 'x' \| TYPE_IS_SHRIEKING: {
	2321	SSize_t ai32;
	2322	if (!len) /* Avoid division by 0 */
	2323	len = 1;
	2324	if (utf8) ai32 = utf8_length((U8 ) start, (U8 ) cur) % len;
	2325	else ai32 = (cur - start) % len;
	2326	if (ai32 == 0) goto no_change;
	2327	len -= ai32;
	2328	}
	2329	/* FALLTHROUGH */
	2330	case 'x':
	2331	goto grow;
	2332	case 'A':
	2333	case 'Z':
	2334	case 'a': {
	2335	const char *aptr;
	2336
	2337	fromstr = NEXTFROM;
	2338	aptr = SvPV_const(fromstr, fromlen);
	2339	if (DO_UTF8(fromstr)) {
	2340	const char end, s;
	2341
	2342	if (!utf8 && !SvUTF8(cat)) {
	2343	marked_upgrade(aTHX_ cat, symptr);
	2344	lookahead.flags \|= FLAG_DO_UTF8;
	2345	lookahead.strbeg = symptr->strbeg;
	2346	utf8 = 1;
	2347	start = SvPVX(cat);
	2348	cur = start + SvCUR(cat);
	2349	}
	2350	if (howlen == e_star) {
	2351	if (utf8) goto string_copy;
	2352	len = fromlen+1;
	2353	}
	2354	s = aptr;
	2355	end = aptr + fromlen;
	2356	fromlen = datumtype == 'Z' ? len-1 : len;
	2357	while ((SSize_t) fromlen > 0 && s < end) {
	2358	s += UTF8SKIP(s);
	2359	fromlen--;
	2360	}
	2361	if (s > end)
	2362	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2363	if (utf8) {
	2364	len = fromlen;
	2365	if (datumtype == 'Z') len++;
	2366	fromlen = s-aptr;
	2367	len += fromlen;
	2368
	2369	goto string_copy;
	2370	}
	2371	fromlen = len - fromlen;
	2372	if (datumtype == 'Z') fromlen--;
	2373	if (howlen == e_star) {
	2374	len = fromlen;
	2375	if (datumtype == 'Z') len++;
	2376	}
	2377	GROWING(0, cat, start, cur, len);
	2378	if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
	2379	datumtype \| TYPE_IS_PACK))
	2380	Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
	2381	"for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
	2382	(int)datumtype, aptr, end, cur, fromlen);
	2383	cur += fromlen;
	2384	len -= fromlen;
	2385	} else if (utf8) {
	2386	if (howlen == e_star) {
	2387	len = fromlen;
	2388	if (datumtype == 'Z') len++;
	2389	}
	2390	if (len <= (SSize_t) fromlen) {
	2391	fromlen = len;
	2392	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2393	}
	2394	/* assumes a byte expands to at most UTF8_EXPAND bytes on
	2395	upgrade, so:
	2396	expected_length <= from_lenUTF8_EXPAND + (len-from_len) /
	2397	GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
	2398	len -= fromlen;
	2399	while (fromlen > 0) {
	2400	cur = (char ) uvchr_to_utf8((U8 ) cur, * (U8 *) aptr);
	2401	aptr++;
	2402	fromlen--;
	2403	}
	2404	} else {
	2405	string_copy:
	2406	if (howlen == e_star) {
	2407	len = fromlen;
	2408	if (datumtype == 'Z') len++;
	2409	}
	2410	if (len <= (SSize_t) fromlen) {
	2411	fromlen = len;
	2412	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2413	}
	2414	GROWING(0, cat, start, cur, len);
	2415	Copy(aptr, cur, fromlen, char);
	2416	cur += fromlen;
	2417	len -= fromlen;
	2418	}
	2419	memset(cur, datumtype == 'A' ? ' ' : '\0', len);
	2420	cur += len;
	2421	SvTAINT(cat);
	2422	break;
	2423	}
	2424	case 'B':
	2425	case 'b': {
	2426	const char str, end;
	2427	SSize_t l, field_len;
	2428	U8 bits;
	2429	bool utf8_source;
	2430	U32 utf8_flags;
	2431
	2432	fromstr = NEXTFROM;
	2433	str = SvPV_const(fromstr, fromlen);
	2434	end = str + fromlen;
	2435	if (DO_UTF8(fromstr)) {
	2436	utf8_source = TRUE;
	2437	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2438	} else {
	2439	utf8_source = FALSE;
	2440	utf8_flags = 0; /* Unused, but keep compilers happy */
	2441	}
	2442	if (howlen == e_star) len = fromlen;
	2443	field_len = (len+7)/8;
	2444	GROWING(utf8, cat, start, cur, field_len);
	2445	if (len > (SSize_t)fromlen) len = fromlen;
	2446	bits = 0;
	2447	l = 0;
	2448	if (datumtype == 'B')
	2449	while (l++ < len) {
	2450	if (utf8_source) {
	2451	UV val = 0;
	2452	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2453	bits \|= val & 1;
	2454	} else bits \|= *str++ & 1;
	2455	if (l & 7) bits <<= 1;
	2456	else {
	2457	PUSH_BYTE(utf8, cur, bits);
	2458	bits = 0;
	2459	}
	2460	}
	2461	else
	2462	/* datumtype == 'b' */
	2463	while (l++ < len) {
	2464	if (utf8_source) {
	2465	UV val = 0;
	2466	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2467	if (val & 1) bits \|= 0x80;
	2468	} else if (*str++ & 1)
	2469	bits \|= 0x80;
	2470	if (l & 7) bits >>= 1;
	2471	else {
	2472	PUSH_BYTE(utf8, cur, bits);
	2473	bits = 0;
	2474	}
	2475	}
	2476	l--;
	2477	if (l & 7) {
	2478	if (datumtype == 'B')
	2479	bits <<= 7 - (l & 7);
	2480	else
	2481	bits >>= 7 - (l & 7);
	2482	PUSH_BYTE(utf8, cur, bits);
	2483	l += 7;
	2484	}
	2485	/* Determine how many chars are left in the requested field */
	2486	l /= 8;
	2487	if (howlen == e_star) field_len = 0;
	2488	else field_len -= l;
	2489	Zero(cur, field_len, char);
	2490	cur += field_len;
	2491	break;
	2492	}
	2493	case 'H':
	2494	case 'h': {
	2495	const char str, end;
	2496	SSize_t l, field_len;
	2497	U8 bits;
	2498	bool utf8_source;
	2499	U32 utf8_flags;
	2500
	2501	fromstr = NEXTFROM;
	2502	str = SvPV_const(fromstr, fromlen);
	2503	end = str + fromlen;
	2504	if (DO_UTF8(fromstr)) {
	2505	utf8_source = TRUE;
	2506	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2507	} else {
	2508	utf8_source = FALSE;
	2509	utf8_flags = 0; /* Unused, but keep compilers happy */
	2510	}
	2511	if (howlen == e_star) len = fromlen;
	2512	field_len = (len+1)/2;
	2513	GROWING(utf8, cat, start, cur, field_len);
	2514	if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
	2515	bits = 0;
	2516	l = 0;
	2517	if (datumtype == 'H')
	2518	while (l++ < len) {
	2519	if (utf8_source) {
	2520	UV val = 0;
	2521	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2522	if (val < 256 && isALPHA(val))
	2523	bits \|= (val + 9) & 0xf;
	2524	else
	2525	bits \|= val & 0xf;
	2526	} else if (isALPHA(*str))
	2527	bits \|= (*str++ + 9) & 0xf;
	2528	else
	2529	bits \|= *str++ & 0xf;
	2530	if (l & 1) bits <<= 4;
	2531	else {
	2532	PUSH_BYTE(utf8, cur, bits);
	2533	bits = 0;
	2534	}
	2535	}
	2536	else
	2537	while (l++ < len) {
	2538	if (utf8_source) {
	2539	UV val = 0;
	2540	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2541	if (val < 256 && isALPHA(val))
	2542	bits \|= ((val + 9) & 0xf) << 4;
	2543	else
	2544	bits \|= (val & 0xf) << 4;
	2545	} else if (isALPHA(*str))
	2546	bits \|= ((*str++ + 9) & 0xf) << 4;
	2547	else
	2548	bits \|= (*str++ & 0xf) << 4;
	2549	if (l & 1) bits >>= 4;
	2550	else {
	2551	PUSH_BYTE(utf8, cur, bits);
	2552	bits = 0;
	2553	}
	2554	}
	2555	l--;
	2556	if (l & 1) {
	2557	PUSH_BYTE(utf8, cur, bits);
	2558	l++;
	2559	}
	2560	/* Determine how many chars are left in the requested field */
	2561	l /= 2;
	2562	if (howlen == e_star) field_len = 0;
	2563	else field_len -= l;
	2564	Zero(cur, field_len, char);
	2565	cur += field_len;
	2566	break;
	2567	}
	2568	case 'c':
	2569	while (len-- > 0) {
	2570	IV aiv;
	2571	fromstr = NEXTFROM;
	2572	aiv = SvIV_no_inf(fromstr, datumtype);
	2573	if ((-128 > aiv \|\| aiv > 127))
	2574	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2575	"Character in 'c' format wrapped in pack");
	2576	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2577	}
	2578	break;
	2579	case 'C':
	2580	if (len == 0) {
	2581	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	2582	break;
	2583	}
	2584	while (len-- > 0) {
	2585	IV aiv;
	2586	fromstr = NEXTFROM;
	2587	aiv = SvIV_no_inf(fromstr, datumtype);
	2588	if ((0 > aiv \|\| aiv > 0xff))
	2589	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2590	"Character in 'C' format wrapped in pack");
	2591	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2592	}
	2593	break;
	2594	case 'W': {
	2595	char *end;
	2596	U8 in_bytes = (U8)IN_BYTES;
	2597
	2598	end = start+SvLEN(cat)-1;
	2599	if (utf8) end -= UTF8_MAXLEN-1;
	2600	while (len-- > 0) {
	2601	UV auv;
	2602	fromstr = NEXTFROM;
	2603	auv = SvUV_no_inf(fromstr, datumtype);
	2604	if (in_bytes) auv = auv % 0x100;
	2605	if (utf8) {
	2606	W_utf8:
	2607	if (cur >= end) {
	2608	*cur = '\0';
	2609	SvCUR_set(cat, cur - start);
	2610
	2611	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2612	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2613	}
	2614	cur = (char ) uvchr_to_utf8_flags((U8 ) cur, auv, 0);
	2615	} else {
	2616	if (auv >= 0x100) {
	2617	if (!SvUTF8(cat)) {
	2618	*cur = '\0';
	2619	SvCUR_set(cat, cur - start);
	2620	marked_upgrade(aTHX_ cat, symptr);
	2621	lookahead.flags \|= FLAG_DO_UTF8;
	2622	lookahead.strbeg = symptr->strbeg;
	2623	utf8 = 1;
	2624	start = SvPVX(cat);
	2625	cur = start + SvCUR(cat);
	2626	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2627	goto W_utf8;
	2628	}
	2629	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2630	"Character in 'W' format wrapped in pack");
	2631	auv &= 0xff;
	2632	}
	2633	if (cur >= end) {
	2634	*cur = '\0';
	2635	SvCUR_set(cat, cur - start);
	2636	GROWING(0, cat, start, cur, len+1);
	2637	end = start+SvLEN(cat)-1;
	2638	}
	2639	(U8 ) cur++ = (U8)auv;
	2640	}
	2641	}
	2642	break;
	2643	}
	2644	case 'U': {
	2645	char *end;
	2646
	2647	if (len == 0) {
	2648	if (!(symptr->flags & FLAG_DO_UTF8)) {
	2649	marked_upgrade(aTHX_ cat, symptr);
	2650	lookahead.flags \|= FLAG_DO_UTF8;
	2651	lookahead.strbeg = symptr->strbeg;
	2652	}
	2653	utf8 = 0;
	2654	goto no_change;
	2655	}
	2656
	2657	end = start+SvLEN(cat);
	2658	if (!utf8) end -= UTF8_MAXLEN;
	2659	while (len-- > 0) {
	2660	UV auv;
	2661	fromstr = NEXTFROM;
	2662	auv = SvUV_no_inf(fromstr, datumtype);
	2663	if (utf8) {
	2664	U8 buffer[UTF8_MAXLEN+1], *endb;
	2665	endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
	2666	if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
	2667	*cur = '\0';
	2668	SvCUR_set(cat, cur - start);
	2669	GROWING(0, cat, start, cur,
	2670	len+(endb-buffer)*UTF8_EXPAND);
	2671	end = start+SvLEN(cat);
	2672	}
	2673	cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
	2674	} else {
	2675	if (cur >= end) {
	2676	*cur = '\0';
	2677	SvCUR_set(cat, cur - start);
	2678	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2679	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2680	}
	2681	cur = (char ) uvchr_to_utf8_flags((U8 ) cur,
	2682	UNI_TO_NATIVE(auv),
	2683	0);
	2684	}
	2685	}
	2686	break;
	2687	}
	2688	/* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
	2689	case 'f':
	2690	while (len-- > 0) {
	2691	float afloat;
	2692	NV anv;
	2693	fromstr = NEXTFROM;
	2694	anv = SvNV(fromstr);
	2695	# if (defined(VMS) && !defined(_IEEE_FP)) \|\| defined(DOUBLE_IS_VAX_FLOAT)
	2696	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2697	* on Alpha; fake it if we don't have them.
	2698	*/
	2699	if (anv > FLT_MAX)
	2700	afloat = FLT_MAX;
	2701	else if (anv < -FLT_MAX)
	2702	afloat = -FLT_MAX;
	2703	else afloat = (float)anv;
	2704	# else
	2705	# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
	2706	if(Perl_isnan(anv))
	2707	afloat = (float)NV_NAN;
	2708	else
	2709	# endif
	2710	# ifdef NV_INF
	2711	/* a simple cast to float is undefined if outside
	2712	* the range of values that can be represented */
	2713	afloat = (float)(anv > FLT_MAX ? NV_INF :
	2714	anv < -FLT_MAX ? -NV_INF : anv);
	2715	# endif
	2716	# endif
	2717	PUSH_VAR(utf8, cur, afloat, needs_swap);
	2718	}
	2719	break;
	2720	case 'd':
	2721	while (len-- > 0) {
	2722	double adouble;
	2723	NV anv;
	2724	fromstr = NEXTFROM;
	2725	anv = SvNV(fromstr);
	2726	# if (defined(VMS) && !defined(_IEEE_FP)) \|\| defined(DOUBLE_IS_VAX_FLOAT)
	2727	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2728	* on Alpha; fake it if we don't have them.
	2729	*/
	2730	if (anv > DBL_MAX)
	2731	adouble = DBL_MAX;
	2732	else if (anv < -DBL_MAX)
	2733	adouble = -DBL_MAX;
	2734	else adouble = (double)anv;
	2735	# else
	2736	adouble = (double)anv;
	2737	# endif
	2738	PUSH_VAR(utf8, cur, adouble, needs_swap);
	2739	}
	2740	break;
	2741	case 'F': {
	2742	NV_bytes anv;
	2743	Zero(&anv, 1, NV); /* can be long double with unused bits */
	2744	while (len-- > 0) {
	2745	fromstr = NEXTFROM;
	2746	#ifdef __GNUC__
	2747	/* to work round a gcc/x86 bug; don't use SvNV */
	2748	anv.nv = sv_2nv(fromstr);
	2749	# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
	2750	&& LONG_DOUBLESIZE > 10
	2751	/* GCC sometimes overwrites the padding in the
	2752	assignment above */
	2753	Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
	2754	# endif
	2755	#else
	2756	anv.nv = SvNV(fromstr);
	2757	#endif
	2758	PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
	2759	}
	2760	break;
	2761	}
	2762	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	2763	case 'D': {
	2764	ld_bytes aldouble;
	2765	/* long doubles can have unused bits, which may be nonzero */
	2766	Zero(&aldouble, 1, long double);
	2767	while (len-- > 0) {
	2768	fromstr = NEXTFROM;
	2769	# ifdef __GNUC__
	2770	/* to work round a gcc/x86 bug; don't use SvNV */
	2771	aldouble.ld = (long double)sv_2nv(fromstr);
	2772	# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
	2773	/* GCC sometimes overwrites the padding in the
	2774	assignment above */
	2775	Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
	2776	# endif
	2777	# else
	2778	aldouble.ld = (long double)SvNV(fromstr);
	2779	# endif
	2780	PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
	2781	needs_swap);
	2782	}
	2783	break;
	2784	}
	2785	#endif
	2786	case 'n' \| TYPE_IS_SHRIEKING:
	2787	case 'n':
	2788	while (len-- > 0) {
	2789	I16 ai16;
	2790	fromstr = NEXTFROM;
	2791	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2792	ai16 = PerlSock_htons(ai16);
	2793	PUSH16(utf8, cur, &ai16, FALSE);
	2794	}
	2795	break;
	2796	case 'v' \| TYPE_IS_SHRIEKING:
	2797	case 'v':
	2798	while (len-- > 0) {
	2799	I16 ai16;
	2800	fromstr = NEXTFROM;
	2801	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2802	ai16 = htovs(ai16);
	2803	PUSH16(utf8, cur, &ai16, FALSE);
	2804	}
	2805	break;
	2806	case 'S' \| TYPE_IS_SHRIEKING:
	2807	#if SHORTSIZE != SIZE16
	2808	while (len-- > 0) {
	2809	unsigned short aushort;
	2810	fromstr = NEXTFROM;
	2811	aushort = SvUV_no_inf(fromstr, datumtype);
	2812	PUSH_VAR(utf8, cur, aushort, needs_swap);
	2813	}
	2814	break;
	2815	#else
	2816	/* FALLTHROUGH */
	2817	#endif
	2818	case 'S':
	2819	while (len-- > 0) {
	2820	U16 au16;
	2821	fromstr = NEXTFROM;
	2822	au16 = (U16)SvUV_no_inf(fromstr, datumtype);
	2823	PUSH16(utf8, cur, &au16, needs_swap);
	2824	}
	2825	break;
	2826	case 's' \| TYPE_IS_SHRIEKING:
	2827	#if SHORTSIZE != SIZE16
	2828	while (len-- > 0) {
	2829	short ashort;
	2830	fromstr = NEXTFROM;
	2831	ashort = SvIV_no_inf(fromstr, datumtype);
	2832	PUSH_VAR(utf8, cur, ashort, needs_swap);
	2833	}
	2834	break;
	2835	#else
	2836	/* FALLTHROUGH */
	2837	#endif
	2838	case 's':
	2839	while (len-- > 0) {
	2840	I16 ai16;
	2841	fromstr = NEXTFROM;
	2842	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2843	PUSH16(utf8, cur, &ai16, needs_swap);
	2844	}
	2845	break;
	2846	case 'I':
	2847	case 'I' \| TYPE_IS_SHRIEKING:
	2848	while (len-- > 0) {
	2849	unsigned int auint;
	2850	fromstr = NEXTFROM;
	2851	auint = SvUV_no_inf(fromstr, datumtype);
	2852	PUSH_VAR(utf8, cur, auint, needs_swap);
	2853	}
	2854	break;
	2855	case 'j':
	2856	while (len-- > 0) {
	2857	IV aiv;
	2858	fromstr = NEXTFROM;
	2859	aiv = SvIV_no_inf(fromstr, datumtype);
	2860	PUSH_VAR(utf8, cur, aiv, needs_swap);
	2861	}
	2862	break;
	2863	case 'J':
	2864	while (len-- > 0) {
	2865	UV auv;
	2866	fromstr = NEXTFROM;
	2867	auv = SvUV_no_inf(fromstr, datumtype);
	2868	PUSH_VAR(utf8, cur, auv, needs_swap);
	2869	}
	2870	break;
	2871	case 'w':
	2872	while (len-- > 0) {
	2873	NV anv;
	2874	fromstr = NEXTFROM;
	2875	S_sv_check_infnan(aTHX_ fromstr, datumtype);
	2876	anv = SvNV_nomg(fromstr);
	2877
	2878	if (anv < 0) {
	2879	*cur = '\0';
	2880	SvCUR_set(cat, cur - start);
	2881	Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
	2882	}
	2883
	2884	/* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
	2885	which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
	2886	any negative IVs will have already been got by the croak()
	2887	above. IOK is untrue for fractions, so we test them
	2888	against UV_MAX_P1. */
	2889	if (SvIOK(fromstr) \|\| anv < UV_MAX_P1) {
	2890	char buf[(sizeof(UV)*CHAR_BIT)/7+1];
	2891	char *in = buf + sizeof(buf);
	2892	UV auv = SvUV_nomg(fromstr);
	2893
	2894	do {
	2895	*--in = (char)((auv & 0x7f) \| 0x80);
	2896	auv >>= 7;
	2897	} while (auv);
	2898	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2899	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2900	in, (buf + sizeof(buf)) - in);
	2901	} else if (SvPOKp(fromstr))
	2902	goto w_string;
	2903	else if (SvNOKp(fromstr)) {
	2904	/* 10**NV_MAX_10_EXP is the largest power of 10
	2905	so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
	2906	given 10(NV_MAX_10_EXP+1) == 128 x solve for x:
	2907	x = (NV_MAX_10_EXP+1) * log (10) / log (128)
	2908	And with that many bytes only Inf can overflow.
	2909	Some C compilers are strict about integral constant
	2910	expressions so we conservatively divide by a slightly
	2911	smaller integer instead of multiplying by the exact
	2912	floating-point value.
	2913	*/
	2914	#ifdef NV_MAX_10_EXP
	2915	/* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
	2916	char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
	2917	#else
	2918	/* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
	2919	char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
	2920	#endif
	2921	char *in = buf + sizeof(buf);
	2922
	2923	anv = Perl_floor(anv);
	2924	do {
	2925	const NV next = Perl_floor(anv / 128);
	2926	if (in <= buf) /* this cannot happen ;-) */
	2927	Perl_croak(aTHX_ "Cannot compress integer in pack");
	2928	--in = (unsigned char)(anv - (next 128)) \| 0x80;
	2929	anv = next;
	2930	} while (anv > 0);
	2931	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2932	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2933	in, (buf + sizeof(buf)) - in);
	2934	} else {
	2935	const char *from;
	2936	char result, in;
	2937	SV *norm;
	2938	STRLEN len;
	2939	bool done;
	2940
	2941	w_string:
	2942	/* Copy string and check for compliance */
	2943	from = SvPV_nomg_const(fromstr, len);
	2944	if ((norm = is_an_int(from, len)) == NULL)
	2945	Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
	2946
	2947	Newx(result, len, char);
	2948	in = result + len;
	2949	done = FALSE;
	2950	while (!done) *--in = div128(norm, &done) \| 0x80;
	2951	result[len - 1] &= 0x7F; /* clear continue bit */
	2952	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2953	in, (result + len) - in);
	2954	Safefree(result);
	2955	SvREFCNT_dec(norm); /* free norm */
	2956	}
	2957	}
	2958	break;
	2959	case 'i':
	2960	case 'i' \| TYPE_IS_SHRIEKING:
	2961	while (len-- > 0) {
	2962	int aint;
	2963	fromstr = NEXTFROM;
	2964	aint = SvIV_no_inf(fromstr, datumtype);
	2965	PUSH_VAR(utf8, cur, aint, needs_swap);
	2966	}
	2967	break;
	2968	case 'N' \| TYPE_IS_SHRIEKING:
	2969	case 'N':
	2970	while (len-- > 0) {
	2971	U32 au32;
	2972	fromstr = NEXTFROM;
	2973	au32 = SvUV_no_inf(fromstr, datumtype);
	2974	au32 = PerlSock_htonl(au32);
	2975	PUSH32(utf8, cur, &au32, FALSE);
	2976	}
	2977	break;
	2978	case 'V' \| TYPE_IS_SHRIEKING:
	2979	case 'V':
	2980	while (len-- > 0) {
	2981	U32 au32;
	2982	fromstr = NEXTFROM;
	2983	au32 = SvUV_no_inf(fromstr, datumtype);
	2984	au32 = htovl(au32);
	2985	PUSH32(utf8, cur, &au32, FALSE);
	2986	}
	2987	break;
	2988	case 'L' \| TYPE_IS_SHRIEKING:
	2989	#if LONGSIZE != SIZE32
	2990	while (len-- > 0) {
	2991	unsigned long aulong;
	2992	fromstr = NEXTFROM;
	2993	aulong = SvUV_no_inf(fromstr, datumtype);
	2994	PUSH_VAR(utf8, cur, aulong, needs_swap);
	2995	}
	2996	break;
	2997	#else
	2998	/* Fall though! */
	2999	#endif
	3000	case 'L':
	3001	while (len-- > 0) {
	3002	U32 au32;
	3003	fromstr = NEXTFROM;
	3004	au32 = SvUV_no_inf(fromstr, datumtype);
	3005	PUSH32(utf8, cur, &au32, needs_swap);
	3006	}
	3007	break;
	3008	case 'l' \| TYPE_IS_SHRIEKING:
	3009	#if LONGSIZE != SIZE32
	3010	while (len-- > 0) {
	3011	long along;
	3012	fromstr = NEXTFROM;
	3013	along = SvIV_no_inf(fromstr, datumtype);
	3014	PUSH_VAR(utf8, cur, along, needs_swap);
	3015	}
	3016	break;
	3017	#else
	3018	/* Fall though! */
	3019	#endif
	3020	case 'l':
	3021	while (len-- > 0) {
	3022	I32 ai32;
	3023	fromstr = NEXTFROM;
	3024	ai32 = SvIV_no_inf(fromstr, datumtype);
	3025	PUSH32(utf8, cur, &ai32, needs_swap);
	3026	}
	3027	break;
	3028	#if defined(HAS_QUAD) && IVSIZE >= 8
	3029	case 'Q':
	3030	while (len-- > 0) {
	3031	Uquad_t auquad;
	3032	fromstr = NEXTFROM;
	3033	auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
	3034	PUSH_VAR(utf8, cur, auquad, needs_swap);
	3035	}
	3036	break;
	3037	case 'q':
	3038	while (len-- > 0) {
	3039	Quad_t aquad;
	3040	fromstr = NEXTFROM;
	3041	aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
	3042	PUSH_VAR(utf8, cur, aquad, needs_swap);
	3043	}
	3044	break;
	3045	#endif
	3046	case 'P':
	3047	len = 1; /* assume SV is correct length */
	3048	GROWING(utf8, cat, start, cur, sizeof(char *));
	3049	/* FALLTHROUGH */
	3050	case 'p':
	3051	while (len-- > 0) {
	3052	const char *aptr;
	3053
	3054	fromstr = NEXTFROM;
	3055	SvGETMAGIC(fromstr);
	3056	if (!SvOK(fromstr)) aptr = NULL;
	3057	else {
	3058	/* XXX better yet, could spirit away the string to
	3059	* a safe spot and hang on to it until the result
	3060	* of pack() (and all copies of the result) are
	3061	* gone.
	3062	*/
	3063	if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
	3064	\|\| (SvPADTMP(fromstr) &&
	3065	!SvREADONLY(fromstr)))) {
	3066	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3067	"Attempt to pack pointer to temporary value");
	3068	}
	3069	if (SvPOK(fromstr) \|\| SvNIOK(fromstr))
	3070	aptr = SvPV_nomg_const_nolen(fromstr);
	3071	else
	3072	aptr = SvPV_force_flags_nolen(fromstr, 0);
	3073	}
	3074	PUSH_VAR(utf8, cur, aptr, needs_swap);
	3075	}
	3076	break;
	3077	case 'u': {
	3078	const char aptr, aend;
	3079	bool from_utf8;
	3080
	3081	fromstr = NEXTFROM;
	3082	if (len <= 2) len = 45;
	3083	else len = len / 3 * 3;
	3084	if (len >= 64) {
	3085	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3086	"Field too wide in 'u' format in pack");
	3087	len = 63;
	3088	}
	3089	aptr = SvPV_const(fromstr, fromlen);
	3090	from_utf8 = DO_UTF8(fromstr);
	3091	if (from_utf8) {
	3092	aend = aptr + fromlen;
	3093	fromlen = sv_len_utf8_nomg(fromstr);
	3094	} else aend = NULL; /* Unused, but keep compilers happy */
	3095	GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
	3096	while (fromlen > 0) {
	3097	U8 *end;
	3098	SSize_t todo;
	3099	U8 hunk[1+63/3*4+1];
	3100
	3101	if ((SSize_t)fromlen > len)
	3102	todo = len;
	3103	else
	3104	todo = fromlen;
	3105	if (from_utf8) {
	3106	char buffer[64];
	3107	if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
	3108	'u' \| TYPE_IS_PACK)) {
	3109	*cur = '\0';
	3110	SvCUR_set(cat, cur - start);
	3111	Perl_croak(aTHX_ "panic: string is shorter than advertised, "
	3112	"aptr=%p, aend=%p, buffer=%p, todo=%zd",
	3113	aptr, aend, buffer, todo);
	3114	}
	3115	end = doencodes(hunk, (const U8 *)buffer, todo);
	3116	} else {
	3117	end = doencodes(hunk, (const U8 *)aptr, todo);
	3118	aptr += todo;
	3119	}
	3120	PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
	3121	fromlen -= todo;
	3122	}
	3123	break;
	3124	}
	3125	}
	3126	*cur = '\0';
	3127	SvCUR_set(cat, cur - start);
	3128	no_change:
	3129	*symptr = lookahead;
	3130	}
	3131	return beglist;
	3132	}
	3133	#undef NEXTFROM
	3134
	3135
	3136	PP(pp_pack)
	3137	{
	3138	dSP; dMARK; dORIGMARK; dTARGET;
	3139	SV *cat = TARG;
	3140	STRLEN fromlen;
	3141	SV pat_sv = ++MARK;
	3142	const char *pat = SvPV_const(pat_sv, fromlen);
	3143	const char *patend = pat + fromlen;
	3144
	3145	MARK++;
	3146	SvPVCLEAR(cat);
	3147	SvUTF8_off(cat);
	3148
	3149	packlist(cat, pat, patend, MARK, SP + 1);
	3150
	3151	if (SvUTF8(cat)) {
	3152	STRLEN result_len;
	3153	const char * result = SvPV_nomg(cat, result_len);
	3154	const U8 * error_pos;
	3155
	3156	if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
	3157	_force_out_malformed_utf8_message(error_pos,
	3158	(U8 *) result + result_len,
	3159	0, /* no flags */
	3160	1 /* Die */
	3161	);
	3162	NOT_REACHED; /* NOTREACHED */
	3163	}
	3164	}
	3165
	3166	SvSETMAGIC(cat);
	3167	SP = ORIGMARK;
	3168	PUSHs(cat);
	3169	RETURN;
	3170	}
	3171
	3172	/*
	3173	* ex: set ts=8 sts=4 sw=4 et:
	3174	*/