perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* pp_pack.c
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
	4	* 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* He still hopefully carried some of his gear in his pack: a small tinder-box,
	13	* two small shallow pans, the smaller fitting into the larger; inside them a
	14	* wooden spoon, a short two-pronged fork and some skewers were stowed; and
	15	* hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
	16	* some salt.
	17	*
	18	* [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
	19	*/
	20
	21	/* This file contains pp ("push/pop") functions that
	22	* execute the opcodes that make up a perl program. A typical pp function
	23	* expects to find its arguments on the stack, and usually pushes its
	24	* results onto the stack, hence the 'pp' terminology. Each OP structure
	25	* contains a pointer to the relevant pp_foo() function.
	26	*
	27	* This particular file just contains pp_pack() and pp_unpack(). See the
	28	* other pp*.c files for the rest of the pp_ functions.
	29	*/
	30
	31	#include "EXTERN.h"
	32	#define PERL_IN_PP_PACK_C
	33	#include "perl.h"
	34
	35	/* Types used by pack/unpack */
	36	typedef enum {
	37	e_no_len, /* no length */
	38	e_number, /* number, [] */
	39	e_star /* asterisk */
	40	} howlen_t;
	41
	42	typedef struct tempsym {
	43	const char* patptr; /* current template char */
	44	const char* patend; /* one after last char */
	45	const char* grpbeg; /* 1st char of ()-group */
	46	const char* grpend; /* end of ()-group */
	47	I32 code; /* template code (!<>) */
	48	U32 flags; /* /=4, comma=2, pack=1 */
	49	/* and group modifiers */
	50	SSize_t length; /* length/repeat count */
	51	howlen_t howlen; /* how length is given */
	52	int level; /* () nesting level */
	53	STRLEN strbeg; /* offset of group start */
	54	struct tempsym previous; / previous group */
	55	} tempsym_t;
	56
	57	#define TEMPSYM_INIT(symptr, p, e, f) \
	58	STMT_START { \
	59	(symptr)->patptr = (p); \
	60	(symptr)->patend = (e); \
	61	(symptr)->grpbeg = NULL; \
	62	(symptr)->grpend = NULL; \
	63	(symptr)->grpend = NULL; \
	64	(symptr)->code = 0; \
	65	(symptr)->length = 0; \
	66	(symptr)->howlen = e_no_len; \
	67	(symptr)->level = 0; \
	68	(symptr)->flags = (f); \
	69	(symptr)->strbeg = 0; \
	70	(symptr)->previous = NULL; \
	71	} STMT_END
	72
	73	typedef union {
	74	NV nv;
	75	U8 bytes[sizeof(NV)];
	76	} NV_bytes;
	77
	78	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	79	typedef union {
	80	long double ld;
	81	U8 bytes[sizeof(long double)];
	82	} ld_bytes;
	83	#endif
	84
	85	#ifndef CHAR_BIT
	86	# define CHAR_BIT 8
	87	#endif
	88	/* Maximum number of bytes to which a byte can grow due to upgrade */
	89	#define UTF8_EXPAND 2
	90
	91	/*
	92	* Offset for integer pack/unpack.
	93	*
	94	* On architectures where I16 and I32 aren't really 16 and 32 bits,
	95	* which for now are all Crays, pack and unpack have to play games.
	96	*/
	97
	98	/*
	99	* These values are required for portability of pack() output.
	100	* If they're not right on your machine, then pack() and unpack()
	101	* wouldn't work right anyway; you'll need to apply the Cray hack.
	102	* (I'd like to check them with #if, but you can't use sizeof() in
	103	* the preprocessor.) --???
	104	*/
	105	/*
	106	The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
	107	defines are now in config.h. --Andy Dougherty April 1998
	108	*/
	109	#define SIZE16 2
	110	#define SIZE32 4
	111
	112	/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
	113	--jhi Feb 1999 */
	114
	115	#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
	116	# define OFF16(p) ((char *) (p))
	117	# define OFF32(p) ((char *) (p))
	118	#elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	119	# define OFF16(p) ((char*)(p))
	120	# define OFF32(p) ((char*)(p))
	121	#elif BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	122	# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
	123	# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
	124	#else
	125	# error "bad cray byte order"
	126	#endif
	127
	128	#define PUSH16(utf8, cur, p, needs_swap) \
	129	PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
	130	#define PUSH32(utf8, cur, p, needs_swap) \
	131	PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
	132
	133	#if BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	134	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
	135	#elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	136	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
	137	#else
	138	# error "Unsupported byteorder"
	139	/* Need to add code here to re-instate mixed endian support.
	140	NEEDS_SWAP would need to hold a flag indicating which action to
	141	take, and S_reverse_copy and the code in S_utf8_to_bytes would need
	142	logic adding to deal with any mixed-endian transformations needed.
	143	*/
	144	#endif
	145
	146	/* Only to be used inside a loop (see the break) */
	147	#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
	148	STMT_START { \
	149	if (UNLIKELY(utf8)) { \
	150	if (!S_utf8_to_bytes(aTHX_ &s, strend, \
	151	(char *) (buf), len, datumtype)) break; \
	152	} else { \
	153	if (UNLIKELY(needs_swap)) \
	154	S_reverse_copy(s, (char *) (buf), len); \
	155	else \
	156	Copy(s, (char *) (buf), len, char); \
	157	s += len; \
	158	} \
	159	} STMT_END
	160
	161	#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
	162	SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
	163
	164	#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
	165	SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
	166
	167	#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
	168	SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
	169
	170	#define PUSH_VAR(utf8, aptr, var, needs_swap) \
	171	PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
	172
	173	/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
	174	#define MAX_SUB_TEMPLATE_LEVEL 100
	175
	176	/* flags (note that type modifiers can also be used as flags!) */
	177	#define FLAG_WAS_UTF8 0x40
	178	#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
	179	#define FLAG_UNPACK_ONLY_ONE 0x10
	180	#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
	181	#define FLAG_SLASH 0x04
	182	#define FLAG_COMMA 0x02
	183	#define FLAG_PACK 0x01
	184
	185	STATIC SV *
	186	S_mul128(pTHX_ SV *sv, U8 m)
	187	{
	188	STRLEN len;
	189	char *s = SvPV(sv, len);
	190	char *t;
	191
	192	PERL_ARGS_ASSERT_MUL128;
	193
	194	if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
	195	SV * const tmpNew = newSVpvs("0000000000");
	196
	197	sv_catsv(tmpNew, sv);
	198	SvREFCNT_dec(sv); /* free old sv */
	199	sv = tmpNew;
	200	s = SvPV(sv, len);
	201	}
	202	t = s + len - 1;
	203	while (!t) / trailing '\0'? */
	204	t--;
	205	while (t > s) {
	206	const U32 i = ((*t - '0') << 7) + m;
	207	*(t--) = '0' + (char)(i % 10);
	208	m = (char)(i / 10);
	209	}
	210	return (sv);
	211	}
	212
	213	/* Explosives and implosives. */
	214
	215	#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
	216	&& NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
	217
	218	/* type modifiers */
	219	#define TYPE_IS_SHRIEKING 0x100
	220	#define TYPE_IS_BIG_ENDIAN 0x200
	221	#define TYPE_IS_LITTLE_ENDIAN 0x400
	222	#define TYPE_IS_PACK 0x800
	223	#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN\|TYPE_IS_LITTLE_ENDIAN)
	224	#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
	225	#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
	226
	227	# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
	228	# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
	229
	230	# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
	231
	232	#define PACK_SIZE_CANNOT_CSUM 0x80
	233	#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
	234	#define PACK_SIZE_MASK 0x3F
	235
	236	#include "packsizetables.inc"
	237
	238	static void
	239	S_reverse_copy(const char src, char dest, STRLEN len)
	240	{
	241	dest += len;
	242	while (len--)
	243	--dest = src++;
	244	}
	245
	246	STATIC U8
	247	utf8_to_byte(pTHX_ const char *s, const char end, I32 datumtype)
	248	{
	249	STRLEN retlen;
	250	UV val;
	251
	252	if (*s >= end) {
	253	goto croak;
	254	}
	255	val = utf8n_to_uvchr((U8 ) s, end-*s, &retlen,
	256	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	257	if (retlen == (STRLEN) -1)
	258	croak:
	259	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
	260	(int) TYPE_NO_MODIFIERS(datumtype));
	261	if (val >= 0x100) {
	262	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	263	"Character in '%c' format wrapped in unpack",
	264	(int) TYPE_NO_MODIFIERS(datumtype));
	265	val &= 0xff;
	266	}
	267	*s += retlen;
	268	return (U8)val;
	269	}
	270
	271	#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
	272	utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
	273	(U8 )(s)++)
	274
	275	STATIC bool
	276	S_utf8_to_bytes(pTHX_ const char *s, const char end, const char *buf, SSize_t buf_len, I32 datumtype)
	277	{
	278	UV val;
	279	STRLEN retlen;
	280	const char from = s;
	281	int bad = 0;
	282	const U32 flags = ckWARN(WARN_UTF8) ?
	283	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY \| UTF8_ALLOW_ANY);
	284	const bool needs_swap = NEEDS_SWAP(datumtype);
	285
	286	if (UNLIKELY(needs_swap))
	287	buf += buf_len;
	288
	289	for (;buf_len > 0; buf_len--) {
	290	if (from >= end) return FALSE;
	291	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
	292	if (retlen == (STRLEN) -1) {
	293	from += UTF8SKIP(from);
	294	bad \|= 1;
	295	} else from += retlen;
	296	if (val >= 0x100) {
	297	bad \|= 2;
	298	val &= 0xff;
	299	}
	300	if (UNLIKELY(needs_swap))
	301	(U8 )--buf = (U8)val;
	302	else
	303	(U8 )buf++ = (U8)val;
	304	}
	305	/* We have enough characters for the buffer. Did we have problems ? */
	306	if (bad) {
	307	if (bad & 1) {
	308	/* Rewalk the string fragment while warning */
	309	const char *ptr;
	310	const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
	311	for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
	312	if (ptr >= end) break;
	313	utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
	314	}
	315	if (from > end) from = end;
	316	}
	317	if ((bad & 2))
	318	Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
	319	WARN_PACK : WARN_UNPACK),
	320	"Character(s) in '%c' format wrapped in %s",
	321	(int) TYPE_NO_MODIFIERS(datumtype),
	322	datumtype & TYPE_IS_PACK ? "pack" : "unpack");
	323	}
	324	*s = from;
	325	return TRUE;
	326	}
	327
	328	STATIC char *
	329	S_my_bytes_to_utf8(const U8 start, STRLEN len, char dest, const bool needs_swap) {
	330	PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
	331
	332	if (UNLIKELY(needs_swap)) {
	333	const U8 *p = start + len;
	334	while (p-- > start) {
	335	append_utf8_from_native_byte(p, (U8 *) & dest);
	336	}
	337	} else {
	338	const U8 * const end = start + len;
	339	while (start < end) {
	340	append_utf8_from_native_byte(start, (U8 *) & dest);
	341	start++;
	342	}
	343	}
	344	return dest;
	345	}
	346
	347	#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
	348	STMT_START { \
	349	if (UNLIKELY(utf8)) \
	350	(cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
	351	else { \
	352	if (UNLIKELY(needs_swap)) \
	353	S_reverse_copy((char *)(buf), cur, len); \
	354	else \
	355	Copy(buf, cur, len, char); \
	356	(cur) += (len); \
	357	} \
	358	} STMT_END
	359
	360	#define SAFE_UTF8_EXPAND(var) \
	361	STMT_START { \
	362	if ((var) > SSize_t_MAX / UTF8_EXPAND) \
	363	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	364	(var) = (var) * UTF8_EXPAND; \
	365	} STMT_END
	366
	367	#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
	368	STMT_START { \
	369	if (SSize_t_MAX / (item_size) < (item_count)) \
	370	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	371	GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
	372	} STMT_END
	373
	374	#define GROWING(utf8, cat, start, cur, in_len) \
	375	STMT_START { \
	376	STRLEN glen = (in_len); \
	377	STRLEN catcur = (STRLEN)((cur) - (start)); \
	378	if (utf8) SAFE_UTF8_EXPAND(glen); \
	379	if (SSize_t_MAX - glen < catcur) \
	380	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	381	if (catcur + glen >= SvLEN(cat)) { \
	382	(start) = sv_exp_grow(cat, glen); \
	383	(cur) = (start) + SvCUR(cat); \
	384	} \
	385	} STMT_END
	386
	387	#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
	388	STMT_START { \
	389	const STRLEN glen = (in_len); \
	390	STRLEN gl = glen; \
	391	if (utf8) SAFE_UTF8_EXPAND(gl); \
	392	if ((cur) + gl >= (start) + SvLEN(cat)) { \
	393	*cur = '\0'; \
	394	SvCUR_set((cat), (cur) - (start)); \
	395	(start) = sv_exp_grow(cat, gl); \
	396	(cur) = (start) + SvCUR(cat); \
	397	} \
	398	PUSH_BYTES(utf8, cur, buf, glen, 0); \
	399	} STMT_END
	400
	401	#define PUSH_BYTE(utf8, s, byte) \
	402	STMT_START { \
	403	if (utf8) { \
	404	const U8 au8 = (byte); \
	405	(s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
	406	} else (U8 )(s)++ = (byte); \
	407	} STMT_END
	408
	409	/* Only to be used inside a loop (see the break) */
	410	#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
	411	STMT_START { \
	412	STRLEN retlen; \
	413	if (str >= end) break; \
	414	val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
	415	if (retlen == (STRLEN) -1) { \
	416	*cur = '\0'; \
	417	Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
	418	} \
	419	str += retlen; \
	420	} STMT_END
	421
	422	static const char _action( const tempsym_t symptr )
	423	{
	424	return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
	425	}
	426
	427	/* Returns the sizeof() struct described by pat */
	428	STATIC SSize_t
	429	S_measure_struct(pTHX_ tempsym_t* symptr)
	430	{
	431	SSize_t total = 0;
	432
	433	PERL_ARGS_ASSERT_MEASURE_STRUCT;
	434
	435	while (next_symbol(symptr)) {
	436	SSize_t len, size;
	437
	438	switch (symptr->howlen) {
	439	case e_star:
	440	Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
	441	_action( symptr ) );
	442
	443	default:
	444	/* e_no_len and e_number */
	445	len = symptr->length;
	446	break;
	447	}
	448
	449	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
	450	if (!size) {
	451	SSize_t star;
	452	/* endianness doesn't influence the size of a type */
	453	switch(TYPE_NO_ENDIANNESS(symptr->code)) {
	454	default:
	455	Perl_croak(aTHX_ "Invalid type '%c' in %s",
	456	(int)TYPE_NO_MODIFIERS(symptr->code),
	457	_action( symptr ) );
	458	case '.' \| TYPE_IS_SHRIEKING:
	459	case '@' \| TYPE_IS_SHRIEKING:
	460	case '@':
	461	case '.':
	462	case '/':
	463	case 'U': /* XXXX Is it correct? */
	464	case 'w':
	465	case 'u':
	466	Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
	467	(int) TYPE_NO_MODIFIERS(symptr->code),
	468	_action( symptr ) );
	469	case '%':
	470	size = 0;
	471	break;
	472	case '(':
	473	{
	474	tempsym_t savsym = *symptr;
	475	symptr->patptr = savsym.grpbeg;
	476	symptr->patend = savsym.grpend;
	477	/* XXXX Theoretically, we need to measure many times at
	478	different positions, since the subexpression may contain
	479	alignment commands, but be not of aligned length.
	480	Need to detect this and croak(). */
	481	size = measure_struct(symptr);
	482	*symptr = savsym;
	483	break;
	484	}
	485	case 'X' \| TYPE_IS_SHRIEKING:
	486	/* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
	487	*/
	488	if (!len) /* Avoid division by 0 */
	489	len = 1;
	490	len = total % len; /* Assumed: the start is aligned. */
	491	/* FALLTHROUGH */
	492	case 'X':
	493	size = -1;
	494	if (total < len)
	495	Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
	496	break;
	497	case 'x' \| TYPE_IS_SHRIEKING:
	498	if (!len) /* Avoid division by 0 */
	499	len = 1;
	500	star = total % len; /* Assumed: the start is aligned. */
	501	if (star) /* Other portable ways? */
	502	len = len - star;
	503	else
	504	len = 0;
	505	/* FALLTHROUGH */
	506	case 'x':
	507	case 'A':
	508	case 'Z':
	509	case 'a':
	510	size = 1;
	511	break;
	512	case 'B':
	513	case 'b':
	514	len = (len + 7)/8;
	515	size = 1;
	516	break;
	517	case 'H':
	518	case 'h':
	519	len = (len + 1)/2;
	520	size = 1;
	521	break;
	522
	523	case 'P':
	524	len = 1;
	525	size = sizeof(char*);
	526	break;
	527	}
	528	}
	529	total += len * size;
	530	}
	531	return total;
	532	}
	533
	534
	535	/* locate matching closing parenthesis or bracket
	536	* returns char pointer to char after match, or NULL
	537	*/
	538	STATIC const char *
	539	S_group_end(pTHX_ const char patptr, const char patend, char ender)
	540	{
	541	PERL_ARGS_ASSERT_GROUP_END;
	542
	543	while (patptr < patend) {
	544	const char c = *patptr++;
	545
	546	if (isSPACE(c))
	547	continue;
	548	else if (c == ender)
	549	return patptr-1;
	550	else if (c == '#') {
	551	while (patptr < patend && *patptr != '\n')
	552	patptr++;
	553	continue;
	554	} else if (c == '(')
	555	patptr = group_end(patptr, patend, ')') + 1;
	556	else if (c == '[')
	557	patptr = group_end(patptr, patend, ']') + 1;
	558	}
	559	Perl_croak(aTHX_ "No group ending character '%c' found in template",
	560	ender);
	561	NOT_REACHED; /* NOTREACHED */
	562	}
	563
	564
	565	/* Convert unsigned decimal number to binary.
	566	* Expects a pointer to the first digit and address of length variable
	567	* Advances char pointer to 1st non-digit char and returns number
	568	*/
	569	STATIC const char *
	570	S_get_num(pTHX_ const char patptr, SSize_t lenptr )
	571	{
	572	SSize_t len = *patptr++ - '0';
	573
	574	PERL_ARGS_ASSERT_GET_NUM;
	575
	576	while (isDIGIT(*patptr)) {
	577	SSize_t nlen = (len * 10) + (*patptr++ - '0');
	578	if (nlen < 0 \|\| nlen/10 != len)
	579	Perl_croak(aTHX_ "pack/unpack repeat count overflow");
	580	len = nlen;
	581	}
	582	*lenptr = len;
	583	return patptr;
	584	}
	585
	586	/* The marvellous template parsing routine: Using state stored in *symptr,
	587	* locates next template code and count
	588	*/
	589	STATIC bool
	590	S_next_symbol(pTHX_ tempsym_t* symptr )
	591	{
	592	const char* patptr = symptr->patptr;
	593	const char* const patend = symptr->patend;
	594
	595	PERL_ARGS_ASSERT_NEXT_SYMBOL;
	596
	597	symptr->flags &= ~FLAG_SLASH;
	598
	599	while (patptr < patend) {
	600	if (isSPACE(*patptr))
	601	patptr++;
	602	else if (*patptr == '#') {
	603	patptr++;
	604	while (patptr < patend && *patptr != '\n')
	605	patptr++;
	606	if (patptr < patend)
	607	patptr++;
	608	} else {
	609	/* We should have found a template code */
	610	I32 code = *patptr++ & 0xFF;
	611	U32 inherited_modifiers = 0;
	612
	613	if (code == ','){ /* grandfather in commas but with a warning */
	614	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
	615	symptr->flags \|= FLAG_COMMA;
	616	Perl_warner(aTHX_ packWARN(WARN_UNPACK),
	617	"Invalid type ',' in %s", _action( symptr ) );
	618	}
	619	continue;
	620	}
	621
	622	/* for '(', skip to ')' */
	623	if (code == '(') {
	624	if( isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '[' )
	625	Perl_croak(aTHX_ "()-group starts with a count in %s",
	626	_action( symptr ) );
	627	symptr->grpbeg = patptr;
	628	patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
	629	if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
	630	Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
	631	_action( symptr ) );
	632	}
	633
	634	/* look for group modifiers to inherit */
	635	if (TYPE_ENDIANNESS(symptr->flags)) {
	636	if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
	637	inherited_modifiers \|= TYPE_ENDIANNESS(symptr->flags);
	638	}
	639
	640	/* look for modifiers */
	641	while (patptr < patend) {
	642	const char *allowed;
	643	I32 modifier;
	644	switch (*patptr) {
	645	case '!':
	646	modifier = TYPE_IS_SHRIEKING;
	647	allowed = "sSiIlLxXnNvV@.";
	648	break;
	649	case '>':
	650	modifier = TYPE_IS_BIG_ENDIAN;
	651	allowed = ENDIANNESS_ALLOWED_TYPES;
	652	break;
	653	case '<':
	654	modifier = TYPE_IS_LITTLE_ENDIAN;
	655	allowed = ENDIANNESS_ALLOWED_TYPES;
	656	break;
	657	default:
	658	allowed = "";
	659	modifier = 0;
	660	break;
	661	}
	662
	663	if (modifier == 0)
	664	break;
	665
	666	if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
	667	Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
	668	allowed, _action( symptr ) );
	669
	670	if (TYPE_ENDIANNESS(code \| modifier) == TYPE_ENDIANNESS_MASK)
	671	Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
	672	(int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
	673	else if (TYPE_ENDIANNESS(code \| modifier \| inherited_modifiers) ==
	674	TYPE_ENDIANNESS_MASK)
	675	Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
	676	*patptr, _action( symptr ) );
	677
	678	if ((code & modifier)) {
	679	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	680	"Duplicate modifier '%c' after '%c' in %s",
	681	*patptr, (int) TYPE_NO_MODIFIERS(code),
	682	_action( symptr ) );
	683	}
	684
	685	code \|= modifier;
	686	patptr++;
	687	}
	688
	689	/* inherit modifiers */
	690	code \|= inherited_modifiers;
	691
	692	/* look for count and/or / */
	693	if (patptr < patend) {
	694	if (isDIGIT(*patptr)) {
	695	patptr = get_num( patptr, &symptr->length );
	696	symptr->howlen = e_number;
	697
	698	} else if (patptr == '') {
	699	patptr++;
	700	symptr->howlen = e_star;
	701
	702	} else if (*patptr == '[') {
	703	const char* lenptr = ++patptr;
	704	symptr->howlen = e_number;
	705	patptr = group_end( patptr, patend, ']' ) + 1;
	706	/* what kind of [] is it? */
	707	if (isDIGIT(*lenptr)) {
	708	lenptr = get_num( lenptr, &symptr->length );
	709	if( *lenptr != ']' )
	710	Perl_croak(aTHX_ "Malformed integer in [] in %s",
	711	_action( symptr ) );
	712	} else {
	713	tempsym_t savsym = *symptr;
	714	symptr->patend = patptr-1;
	715	symptr->patptr = lenptr;
	716	savsym.length = measure_struct(symptr);
	717	*symptr = savsym;
	718	}
	719	} else {
	720	symptr->howlen = e_no_len;
	721	symptr->length = 1;
	722	}
	723
	724	/* try to find / */
	725	while (patptr < patend) {
	726	if (isSPACE(*patptr))
	727	patptr++;
	728	else if (*patptr == '#') {
	729	patptr++;
	730	while (patptr < patend && *patptr != '\n')
	731	patptr++;
	732	if (patptr < patend)
	733	patptr++;
	734	} else {
	735	if (*patptr == '/') {
	736	symptr->flags \|= FLAG_SLASH;
	737	patptr++;
	738	if (patptr < patend &&
	739	(isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '['))
	740	Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
	741	_action( symptr ) );
	742	}
	743	break;
	744	}
	745	}
	746	} else {
	747	/* at end - no count, no / */
	748	symptr->howlen = e_no_len;
	749	symptr->length = 1;
	750	}
	751
	752	symptr->code = code;
	753	symptr->patptr = patptr;
	754	return TRUE;
	755	}
	756	}
	757	symptr->patptr = patptr;
	758	return FALSE;
	759	}
	760
	761	/*
	762	There is no way to cleanly handle the case where we should process the
	763	string per byte in its upgraded form while it's really in downgraded form
	764	(e.g. estimates like strend-s as an upper bound for the number of
	765	characters left wouldn't work). So if we foresee the need of this
	766	(pattern starts with U or contains U0), we want to work on the encoded
	767	version of the string. Users are advised to upgrade their pack string
	768	themselves if they need to do a lot of unpacks like this on it
	769	*/
	770	STATIC bool
	771	need_utf8(const char pat, const char patend)
	772	{
	773	bool first = TRUE;
	774
	775	PERL_ARGS_ASSERT_NEED_UTF8;
	776
	777	while (pat < patend) {
	778	if (pat[0] == '#') {
	779	pat++;
	780	pat = (const char *) memchr(pat, '\n', patend-pat);
	781	if (!pat) return FALSE;
	782	} else if (pat[0] == 'U') {
	783	if (first \|\| pat[1] == '0') return TRUE;
	784	} else first = FALSE;
	785	pat++;
	786	}
	787	return FALSE;
	788	}
	789
	790	STATIC char
	791	first_symbol(const char pat, const char patend) {
	792	PERL_ARGS_ASSERT_FIRST_SYMBOL;
	793
	794	while (pat < patend) {
	795	if (pat[0] != '#') return pat[0];
	796	pat++;
	797	pat = (const char *) memchr(pat, '\n', patend-pat);
	798	if (!pat) return 0;
	799	pat++;
	800	}
	801	return 0;
	802	}
	803
	804	/*
	805
	806	=head1 Pack and Unpack
	807
	808	=for apidoc unpackstring
	809
	810	The engine implementing the C<unpack()> Perl function.
	811
	812	Using the template C<pat..patend>, this function unpacks the string
	813	C<s..strend> into a number of mortal SVs, which it pushes onto the perl
	814	argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
	815	C<SPAGAIN> after the call to this function). It returns the number of
	816	pushed elements.
	817
	818	The C<strend> and C<patend> pointers should point to the byte following the
	819	last character of each string.
	820
	821	Although this function returns its values on the perl argument stack, it
	822	doesn't take any parameters from that stack (and thus in particular
	823	there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
	824	example).
	825
	826	=cut */
	827
	828	SSize_t
	829	Perl_unpackstring(pTHX_ const char pat, const char patend, const char s, const char strend, U32 flags)
	830	{
	831	tempsym_t sym;
	832
	833	PERL_ARGS_ASSERT_UNPACKSTRING;
	834
	835	if (flags & FLAG_DO_UTF8) flags \|= FLAG_WAS_UTF8;
	836	else if (need_utf8(pat, patend)) {
	837	/* We probably should try to avoid this in case a scalar context call
	838	wouldn't get to the "U0" */
	839	STRLEN len = strend - s;
	840	s = (char ) bytes_to_utf8((U8 ) s, &len);
	841	SAVEFREEPV(s);
	842	strend = s + len;
	843	flags \|= FLAG_DO_UTF8;
	844	}
	845
	846	if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
	847	flags \|= FLAG_PARSE_UTF8;
	848
	849	TEMPSYM_INIT(&sym, pat, patend, flags);
	850
	851	return unpack_rec(&sym, s, s, strend, NULL );
	852	}
	853
	854	STATIC SSize_t
	855	S_unpack_rec(pTHX_ tempsym_t* symptr, const char s, const char strbeg, const char strend, const char *new_s )
	856	{
	857	dSP;
	858	SV *sv = NULL;
	859	const SSize_t start_sp_offset = SP - PL_stack_base;
	860	howlen_t howlen;
	861	SSize_t checksum = 0;
	862	UV cuv = 0;
	863	NV cdouble = 0.0;
	864	const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
	865	bool beyond = FALSE;
	866	bool explicit_length;
	867	const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
	868	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	869
	870	PERL_ARGS_ASSERT_UNPACK_REC;
	871
	872	symptr->strbeg = s - strbeg;
	873
	874	while (next_symbol(symptr)) {
	875	packprops_t props;
	876	SSize_t len;
	877	I32 datumtype = symptr->code;
	878	bool needs_swap;
	879	/* do first one only unless in list context
	880	/ is implemented by unpacking the count, then popping it from the
	881	stack, so must check that we're not in the middle of a / */
	882	if ( unpack_only_one
	883	&& (SP - PL_stack_base == start_sp_offset + 1)
	884	&& (datumtype != '/') ) /* XXX can this be omitted */
	885	break;
	886
	887	switch (howlen = symptr->howlen) {
	888	case e_star:
	889	len = strend - strbeg; /* long enough */
	890	break;
	891	default:
	892	/* e_no_len and e_number */
	893	len = symptr->length;
	894	break;
	895	}
	896
	897	explicit_length = TRUE;
	898	redo_switch:
	899	beyond = s >= strend;
	900
	901	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	902	if (props) {
	903	/* props nonzero means we can process this letter. */
	904	const SSize_t size = props & PACK_SIZE_MASK;
	905	const SSize_t howmany = (strend - s) / size;
	906	if (len > howmany)
	907	len = howmany;
	908
	909	if (!checksum \|\| (props & PACK_SIZE_CANNOT_CSUM)) {
	910	if (len && unpack_only_one) len = 1;
	911	EXTEND(SP, len);
	912	EXTEND_MORTAL(len);
	913	}
	914	}
	915
	916	needs_swap = NEEDS_SWAP(datumtype);
	917
	918	switch(TYPE_NO_ENDIANNESS(datumtype)) {
	919	default:
	920	Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
	921
	922	case '%':
	923	if (howlen == e_no_len)
	924	len = 16; /* len is not specified */
	925	checksum = len;
	926	cuv = 0;
	927	cdouble = 0;
	928	continue;
	929
	930	case '(':
	931	{
	932	tempsym_t savsym = *symptr;
	933	const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	934	symptr->flags \|= group_modifiers;
	935	symptr->patend = savsym.grpend;
	936	symptr->previous = &savsym;
	937	symptr->level++;
	938	PUTBACK;
	939	if (len && unpack_only_one) len = 1;
	940	while (len--) {
	941	symptr->patptr = savsym.grpbeg;
	942	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	943	else symptr->flags &= ~FLAG_PARSE_UTF8;
	944	unpack_rec(symptr, s, strbeg, strend, &s);
	945	if (s == strend && savsym.howlen == e_star)
	946	break; /* No way to continue */
	947	}
	948	SPAGAIN;
	949	savsym.flags = symptr->flags & ~group_modifiers;
	950	*symptr = savsym;
	951	break;
	952	}
	953	case '.' \| TYPE_IS_SHRIEKING:
	954	case '.': {
	955	const char *from;
	956	SV *sv;
	957	const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
	958	if (howlen == e_star) from = strbeg;
	959	else if (len <= 0) from = s;
	960	else {
	961	tempsym_t *group = symptr;
	962
	963	while (--len && group) group = group->previous;
	964	from = group ? strbeg + group->strbeg : strbeg;
	965	}
	966	sv = from <= s ?
	967	newSVuv( u8 ? (UV) utf8_length((const U8)from, (const U8)s) : (UV) (s-from)) :
	968	newSViv(-(u8 ? (IV) utf8_length((const U8)s, (const U8)from) : (IV) (from-s)));
	969	mXPUSHs(sv);
	970	break;
	971	}
	972	case '@' \| TYPE_IS_SHRIEKING:
	973	case '@':
	974	s = strbeg + symptr->strbeg;
	975	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	976	{
	977	while (len > 0) {
	978	if (s >= strend)
	979	Perl_croak(aTHX_ "'@' outside of string in unpack");
	980	s += UTF8SKIP(s);
	981	len--;
	982	}
	983	if (s > strend)
	984	Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
	985	} else {
	986	if (strend-s < len)
	987	Perl_croak(aTHX_ "'@' outside of string in unpack");
	988	s += len;
	989	}
	990	break;
	991	case 'X' \| TYPE_IS_SHRIEKING:
	992	if (!len) /* Avoid division by 0 */
	993	len = 1;
	994	if (utf8) {
	995	const char hop, last;
	996	SSize_t l = len;
	997	hop = last = strbeg;
	998	while (hop < s) {
	999	hop += UTF8SKIP(hop);
	1000	if (--l == 0) {
	1001	last = hop;
	1002	l = len;
	1003	}
	1004	}
	1005	if (last > s)
	1006	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1007	s = last;
	1008	break;
	1009	}
	1010	len = (s - strbeg) % len;
	1011	/* FALLTHROUGH */
	1012	case 'X':
	1013	if (utf8) {
	1014	while (len > 0) {
	1015	if (s <= strbeg)
	1016	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1017	while (--s, UTF8_IS_CONTINUATION(*s)) {
	1018	if (s <= strbeg)
	1019	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1020	}
	1021	len--;
	1022	}
	1023	} else {
	1024	if (len > s - strbeg)
	1025	Perl_croak(aTHX_ "'X' outside of string in unpack" );
	1026	s -= len;
	1027	}
	1028	break;
	1029	case 'x' \| TYPE_IS_SHRIEKING: {
	1030	SSize_t ai32;
	1031	if (!len) /* Avoid division by 0 */
	1032	len = 1;
	1033	if (utf8) ai32 = utf8_length((U8 ) strbeg, (U8 ) s) % len;
	1034	else ai32 = (s - strbeg) % len;
	1035	if (ai32 == 0) break;
	1036	len -= ai32;
	1037	}
	1038	/* FALLTHROUGH */
	1039	case 'x':
	1040	if (utf8) {
	1041	while (len>0) {
	1042	if (s >= strend)
	1043	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1044	s += UTF8SKIP(s);
	1045	len--;
	1046	}
	1047	} else {
	1048	if (len > strend - s)
	1049	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1050	s += len;
	1051	}
	1052	break;
	1053	case '/':
	1054	Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
	1055
	1056	case 'A':
	1057	case 'Z':
	1058	case 'a':
	1059	if (checksum) {
	1060	/* Preliminary length estimate is assumed done in 'W' */
	1061	if (len > strend - s) len = strend - s;
	1062	goto W_checksum;
	1063	}
	1064	if (utf8) {
	1065	SSize_t l;
	1066	const char *hop;
	1067	for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
	1068	if (hop >= strend) {
	1069	if (hop > strend)
	1070	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1071	break;
	1072	}
	1073	}
	1074	if (hop > strend)
	1075	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1076	len = hop - s;
	1077	} else if (len > strend - s)
	1078	len = strend - s;
	1079
	1080	if (datumtype == 'Z') {
	1081	/* 'Z' strips stuff after first null */
	1082	const char ptr, end;
	1083	end = s + len;
	1084	for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
	1085	sv = newSVpvn(s, ptr-s);
	1086	if (howlen == e_star) /* exact for 'Z' /
	1087	len = ptr-s + (ptr != strend ? 1 : 0);
	1088	} else if (datumtype == 'A') {
	1089	/* 'A' strips both nulls and spaces */
	1090	const char *ptr;
	1091	if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
	1092	for (ptr = s+len-1; ptr >= s; ptr--) {
	1093	if ( *ptr != 0
	1094	&& !UTF8_IS_CONTINUATION(*ptr)
	1095	&& !isSPACE_utf8_safe(ptr, strend))
	1096	{
	1097	break;
	1098	}
	1099	}
	1100	if (ptr >= s) ptr += UTF8SKIP(ptr);
	1101	else ptr++;
	1102	if (ptr > s+len)
	1103	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1104	} else {
	1105	for (ptr = s+len-1; ptr >= s; ptr--)
	1106	if (ptr != 0 && !isSPACE(ptr)) break;
	1107	ptr++;
	1108	}
	1109	sv = newSVpvn(s, ptr-s);
	1110	} else sv = newSVpvn(s, len);
	1111
	1112	if (utf8) {
	1113	SvUTF8_on(sv);
	1114	/* Undo any upgrade done due to need_utf8() */
	1115	if (!(symptr->flags & FLAG_WAS_UTF8))
	1116	sv_utf8_downgrade(sv, 0);
	1117	}
	1118	mXPUSHs(sv);
	1119	s += len;
	1120	break;
	1121	case 'B':
	1122	case 'b': {
	1123	char *str;
	1124	if (howlen == e_star \|\| len > (strend - s) * 8)
	1125	len = (strend - s) * 8;
	1126	if (checksum) {
	1127	if (utf8)
	1128	while (len >= 8 && s < strend) {
	1129	cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
	1130	len -= 8;
	1131	}
	1132	else
	1133	while (len >= 8) {
	1134	cuv += PL_bitcount[(U8 )s++];
	1135	len -= 8;
	1136	}
	1137	if (len && s < strend) {
	1138	U8 bits;
	1139	bits = SHIFT_BYTE(utf8, s, strend, datumtype);
	1140	if (datumtype == 'b')
	1141	while (len-- > 0) {
	1142	if (bits & 1) cuv++;
	1143	bits >>= 1;
	1144	}
	1145	else
	1146	while (len-- > 0) {
	1147	if (bits & 0x80) cuv++;
	1148	bits <<= 1;
	1149	}
	1150	}
	1151	break;
	1152	}
	1153
	1154	sv = sv_2mortal(newSV(len ? len : 1));
	1155	SvPOK_on(sv);
	1156	str = SvPVX(sv);
	1157	if (datumtype == 'b') {
	1158	U8 bits = 0;
	1159	const SSize_t ai32 = len;
	1160	for (len = 0; len < ai32; len++) {
	1161	if (len & 7) bits >>= 1;
	1162	else if (utf8) {
	1163	if (s >= strend) break;
	1164	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1165	} else bits = (U8 ) s++;
	1166	*str++ = bits & 1 ? '1' : '0';
	1167	}
	1168	} else {
	1169	U8 bits = 0;
	1170	const SSize_t ai32 = len;
	1171	for (len = 0; len < ai32; len++) {
	1172	if (len & 7) bits <<= 1;
	1173	else if (utf8) {
	1174	if (s >= strend) break;
	1175	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1176	} else bits = (U8 ) s++;
	1177	*str++ = bits & 0x80 ? '1' : '0';
	1178	}
	1179	}
	1180	*str = '\0';
	1181	SvCUR_set(sv, str - SvPVX_const(sv));
	1182	XPUSHs(sv);
	1183	break;
	1184	}
	1185	case 'H':
	1186	case 'h': {
	1187	char *str = NULL;
	1188	/* Preliminary length estimate, acceptable for utf8 too */
	1189	if (howlen == e_star \|\| len > (strend - s) * 2)
	1190	len = (strend - s) * 2;
	1191	if (!checksum) {
	1192	sv = sv_2mortal(newSV(len ? len : 1));
	1193	SvPOK_on(sv);
	1194	str = SvPVX(sv);
	1195	}
	1196	if (datumtype == 'h') {
	1197	U8 bits = 0;
	1198	SSize_t ai32 = len;
	1199	for (len = 0; len < ai32; len++) {
	1200	if (len & 1) bits >>= 4;
	1201	else if (utf8) {
	1202	if (s >= strend) break;
	1203	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1204	} else bits = * (U8 *) s++;
	1205	if (!checksum)
	1206	*str++ = PL_hexdigit[bits & 15];
	1207	}
	1208	} else {
	1209	U8 bits = 0;
	1210	const SSize_t ai32 = len;
	1211	for (len = 0; len < ai32; len++) {
	1212	if (len & 1) bits <<= 4;
	1213	else if (utf8) {
	1214	if (s >= strend) break;
	1215	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1216	} else bits = (U8 ) s++;
	1217	if (!checksum)
	1218	*str++ = PL_hexdigit[(bits >> 4) & 15];
	1219	}
	1220	}
	1221	if (!checksum) {
	1222	*str = '\0';
	1223	SvCUR_set(sv, str - SvPVX_const(sv));
	1224	XPUSHs(sv);
	1225	}
	1226	break;
	1227	}
	1228	case 'C':
	1229	if (len == 0) {
	1230	if (explicit_length)
	1231	/* Switch to "character" mode */
	1232	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	1233	break;
	1234	}
	1235	/* FALLTHROUGH */
	1236	case 'c':
	1237	while (len-- > 0 && s < strend) {
	1238	int aint;
	1239	if (utf8)
	1240	{
	1241	STRLEN retlen;
	1242	aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1243	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1244	if (retlen == (STRLEN) -1)
	1245	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1246	s += retlen;
	1247	}
	1248	else
	1249	aint = (U8 )(s)++;
	1250	if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
	1251	aint -= 256;
	1252	if (!checksum)
	1253	mPUSHi(aint);
	1254	else if (checksum > bits_in_uv)
	1255	cdouble += (NV)aint;
	1256	else
	1257	cuv += aint;
	1258	}
	1259	break;
	1260	case 'W':
	1261	W_checksum:
	1262	if (utf8) {
	1263	while (len-- > 0 && s < strend) {
	1264	STRLEN retlen;
	1265	const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1266	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1267	if (retlen == (STRLEN) -1)
	1268	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1269	s += retlen;
	1270	if (!checksum)
	1271	mPUSHu(val);
	1272	else if (checksum > bits_in_uv)
	1273	cdouble += (NV) val;
	1274	else
	1275	cuv += val;
	1276	}
	1277	} else if (!checksum)
	1278	while (len-- > 0) {
	1279	const U8 ch = (U8 ) s++;
	1280	mPUSHu(ch);
	1281	}
	1282	else if (checksum > bits_in_uv)
	1283	while (len-- > 0) cdouble += (NV) (U8 ) s++;
	1284	else
	1285	while (len-- > 0) cuv += (U8 ) s++;
	1286	break;
	1287	case 'U':
	1288	if (len == 0) {
	1289	if (explicit_length && howlen != e_star) {
	1290	/* Switch to "bytes in UTF-8" mode */
	1291	if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
	1292	else
	1293	/* Should be impossible due to the need_utf8() test */
	1294	Perl_croak(aTHX_ "U0 mode on a byte string");
	1295	}
	1296	break;
	1297	}
	1298	if (len > strend - s) len = strend - s;
	1299	if (!checksum) {
	1300	if (len && unpack_only_one) len = 1;
	1301	EXTEND(SP, len);
	1302	EXTEND_MORTAL(len);
	1303	}
	1304	while (len-- > 0 && s < strend) {
	1305	STRLEN retlen;
	1306	UV auv;
	1307	if (utf8) {
	1308	U8 result[UTF8_MAXLEN+1];
	1309	const char *ptr = s;
	1310	STRLEN len;
	1311	/* Bug: warns about bad utf8 even if we are short on bytes
	1312	and will break out of the loop */
	1313	if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
	1314	'U'))
	1315	break;
	1316	len = UTF8SKIP(result);
	1317	if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
	1318	(char *) &result[1], len-1, 'U')) break;
	1319	auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
	1320	len,
	1321	&retlen,
	1322	UTF8_ALLOW_DEFAULT));
	1323	s = ptr;
	1324	} else {
	1325	auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
	1326	strend - s,
	1327	&retlen,
	1328	UTF8_ALLOW_DEFAULT));
	1329	if (retlen == (STRLEN) -1)
	1330	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1331	s += retlen;
	1332	}
	1333	if (!checksum)
	1334	mPUSHu(auv);
	1335	else if (checksum > bits_in_uv)
	1336	cdouble += (NV) auv;
	1337	else
	1338	cuv += auv;
	1339	}
	1340	break;
	1341	case 's' \| TYPE_IS_SHRIEKING:
	1342	#if SHORTSIZE != SIZE16
	1343	while (len-- > 0) {
	1344	short ashort;
	1345	SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
	1346	if (!checksum)
	1347	mPUSHi(ashort);
	1348	else if (checksum > bits_in_uv)
	1349	cdouble += (NV)ashort;
	1350	else
	1351	cuv += ashort;
	1352	}
	1353	break;
	1354	#else
	1355	/* FALLTHROUGH */
	1356	#endif
	1357	case 's':
	1358	while (len-- > 0) {
	1359	I16 ai16;
	1360
	1361	#if U16SIZE > SIZE16
	1362	ai16 = 0;
	1363	#endif
	1364	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1365	#if U16SIZE > SIZE16
	1366	if (ai16 > 32767)
	1367	ai16 -= 65536;
	1368	#endif
	1369	if (!checksum)
	1370	mPUSHi(ai16);
	1371	else if (checksum > bits_in_uv)
	1372	cdouble += (NV)ai16;
	1373	else
	1374	cuv += ai16;
	1375	}
	1376	break;
	1377	case 'S' \| TYPE_IS_SHRIEKING:
	1378	#if SHORTSIZE != SIZE16
	1379	while (len-- > 0) {
	1380	unsigned short aushort;
	1381	SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
	1382	needs_swap);
	1383	if (!checksum)
	1384	mPUSHu(aushort);
	1385	else if (checksum > bits_in_uv)
	1386	cdouble += (NV)aushort;
	1387	else
	1388	cuv += aushort;
	1389	}
	1390	break;
	1391	#else
	1392	/* FALLTHROUGH */
	1393	#endif
	1394	case 'v':
	1395	case 'n':
	1396	case 'S':
	1397	while (len-- > 0) {
	1398	U16 au16;
	1399	#if U16SIZE > SIZE16
	1400	au16 = 0;
	1401	#endif
	1402	SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
	1403	if (datumtype == 'n')
	1404	au16 = PerlSock_ntohs(au16);
	1405	if (datumtype == 'v')
	1406	au16 = vtohs(au16);
	1407	if (!checksum)
	1408	mPUSHu(au16);
	1409	else if (checksum > bits_in_uv)
	1410	cdouble += (NV) au16;
	1411	else
	1412	cuv += au16;
	1413	}
	1414	break;
	1415	case 'v' \| TYPE_IS_SHRIEKING:
	1416	case 'n' \| TYPE_IS_SHRIEKING:
	1417	while (len-- > 0) {
	1418	I16 ai16;
	1419	# if U16SIZE > SIZE16
	1420	ai16 = 0;
	1421	# endif
	1422	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1423	/* There should never be any byte-swapping here. */
	1424	assert(!TYPE_ENDIANNESS(datumtype));
	1425	if (datumtype == ('n' \| TYPE_IS_SHRIEKING))
	1426	ai16 = (I16) PerlSock_ntohs((U16) ai16);
	1427	if (datumtype == ('v' \| TYPE_IS_SHRIEKING))
	1428	ai16 = (I16) vtohs((U16) ai16);
	1429	if (!checksum)
	1430	mPUSHi(ai16);
	1431	else if (checksum > bits_in_uv)
	1432	cdouble += (NV) ai16;
	1433	else
	1434	cuv += ai16;
	1435	}
	1436	break;
	1437	case 'i':
	1438	case 'i' \| TYPE_IS_SHRIEKING:
	1439	while (len-- > 0) {
	1440	int aint;
	1441	SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
	1442	if (!checksum)
	1443	mPUSHi(aint);
	1444	else if (checksum > bits_in_uv)
	1445	cdouble += (NV)aint;
	1446	else
	1447	cuv += aint;
	1448	}
	1449	break;
	1450	case 'I':
	1451	case 'I' \| TYPE_IS_SHRIEKING:
	1452	while (len-- > 0) {
	1453	unsigned int auint;
	1454	SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
	1455	if (!checksum)
	1456	mPUSHu(auint);
	1457	else if (checksum > bits_in_uv)
	1458	cdouble += (NV)auint;
	1459	else
	1460	cuv += auint;
	1461	}
	1462	break;
	1463	case 'j':
	1464	while (len-- > 0) {
	1465	IV aiv;
	1466	SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
	1467	if (!checksum)
	1468	mPUSHi(aiv);
	1469	else if (checksum > bits_in_uv)
	1470	cdouble += (NV)aiv;
	1471	else
	1472	cuv += aiv;
	1473	}
	1474	break;
	1475	case 'J':
	1476	while (len-- > 0) {
	1477	UV auv;
	1478	SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
	1479	if (!checksum)
	1480	mPUSHu(auv);
	1481	else if (checksum > bits_in_uv)
	1482	cdouble += (NV)auv;
	1483	else
	1484	cuv += auv;
	1485	}
	1486	break;
	1487	case 'l' \| TYPE_IS_SHRIEKING:
	1488	#if LONGSIZE != SIZE32
	1489	while (len-- > 0) {
	1490	long along;
	1491	SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
	1492	if (!checksum)
	1493	mPUSHi(along);
	1494	else if (checksum > bits_in_uv)
	1495	cdouble += (NV)along;
	1496	else
	1497	cuv += along;
	1498	}
	1499	break;
	1500	#else
	1501	/* FALLTHROUGH */
	1502	#endif
	1503	case 'l':
	1504	while (len-- > 0) {
	1505	I32 ai32;
	1506	#if U32SIZE > SIZE32
	1507	ai32 = 0;
	1508	#endif
	1509	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1510	#if U32SIZE > SIZE32
	1511	if (ai32 > 2147483647) ai32 -= 4294967296;
	1512	#endif
	1513	if (!checksum)
	1514	mPUSHi(ai32);
	1515	else if (checksum > bits_in_uv)
	1516	cdouble += (NV)ai32;
	1517	else
	1518	cuv += ai32;
	1519	}
	1520	break;
	1521	case 'L' \| TYPE_IS_SHRIEKING:
	1522	#if LONGSIZE != SIZE32
	1523	while (len-- > 0) {
	1524	unsigned long aulong;
	1525	SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
	1526	if (!checksum)
	1527	mPUSHu(aulong);
	1528	else if (checksum > bits_in_uv)
	1529	cdouble += (NV)aulong;
	1530	else
	1531	cuv += aulong;
	1532	}
	1533	break;
	1534	#else
	1535	/* FALLTHROUGH */
	1536	#endif
	1537	case 'V':
	1538	case 'N':
	1539	case 'L':
	1540	while (len-- > 0) {
	1541	U32 au32;
	1542	#if U32SIZE > SIZE32
	1543	au32 = 0;
	1544	#endif
	1545	SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
	1546	if (datumtype == 'N')
	1547	au32 = PerlSock_ntohl(au32);
	1548	if (datumtype == 'V')
	1549	au32 = vtohl(au32);
	1550	if (!checksum)
	1551	mPUSHu(au32);
	1552	else if (checksum > bits_in_uv)
	1553	cdouble += (NV)au32;
	1554	else
	1555	cuv += au32;
	1556	}
	1557	break;
	1558	case 'V' \| TYPE_IS_SHRIEKING:
	1559	case 'N' \| TYPE_IS_SHRIEKING:
	1560	while (len-- > 0) {
	1561	I32 ai32;
	1562	#if U32SIZE > SIZE32
	1563	ai32 = 0;
	1564	#endif
	1565	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1566	/* There should never be any byte swapping here. */
	1567	assert(!TYPE_ENDIANNESS(datumtype));
	1568	if (datumtype == ('N' \| TYPE_IS_SHRIEKING))
	1569	ai32 = (I32)PerlSock_ntohl((U32)ai32);
	1570	if (datumtype == ('V' \| TYPE_IS_SHRIEKING))
	1571	ai32 = (I32)vtohl((U32)ai32);
	1572	if (!checksum)
	1573	mPUSHi(ai32);
	1574	else if (checksum > bits_in_uv)
	1575	cdouble += (NV)ai32;
	1576	else
	1577	cuv += ai32;
	1578	}
	1579	break;
	1580	case 'p':
	1581	while (len-- > 0) {
	1582	const char *aptr;
	1583	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1584	/* newSVpv generates undef if aptr is NULL */
	1585	mPUSHs(newSVpv(aptr, 0));
	1586	}
	1587	break;
	1588	case 'w':
	1589	{
	1590	UV auv = 0;
	1591	size_t bytes = 0;
	1592
	1593	while (len > 0 && s < strend) {
	1594	U8 ch;
	1595	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1596	auv = (auv << 7) \| (ch & 0x7f);
	1597	/* UTF8_IS_XXXXX not right here because this is a BER, not
	1598	* UTF-8 format - using constant 0x80 */
	1599	if (ch < 0x80) {
	1600	bytes = 0;
	1601	mPUSHu(auv);
	1602	len--;
	1603	auv = 0;
	1604	continue;
	1605	}
	1606	if (++bytes >= sizeof(UV)) { /* promote to string */
	1607	const char *t;
	1608
	1609	sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
	1610	(int)TYPE_DIGITS(UV), auv);
	1611	while (s < strend) {
	1612	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1613	sv = mul128(sv, (U8)(ch & 0x7f));
	1614	if (!(ch & 0x80)) {
	1615	bytes = 0;
	1616	break;
	1617	}
	1618	}
	1619	t = SvPV_nolen_const(sv);
	1620	while (*t == '0')
	1621	t++;
	1622	sv_chop(sv, t);
	1623	mPUSHs(sv);
	1624	len--;
	1625	auv = 0;
	1626	}
	1627	}
	1628	if ((s >= strend) && bytes)
	1629	Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
	1630	}
	1631	break;
	1632	case 'P':
	1633	if (symptr->howlen == e_star)
	1634	Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
	1635	EXTEND(SP, 1);
	1636	if (s + sizeof(char*) <= strend) {
	1637	char *aptr;
	1638	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1639	/* newSVpvn generates undef if aptr is NULL */
	1640	PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
	1641	}
	1642	break;
	1643	#if defined(HAS_QUAD) && IVSIZE >= 8
	1644	case 'q':
	1645	while (len-- > 0) {
	1646	Quad_t aquad;
	1647	SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
	1648	if (!checksum)
	1649	mPUSHs(newSViv((IV)aquad));
	1650	else if (checksum > bits_in_uv)
	1651	cdouble += (NV)aquad;
	1652	else
	1653	cuv += aquad;
	1654	}
	1655	break;
	1656	case 'Q':
	1657	while (len-- > 0) {
	1658	Uquad_t auquad;
	1659	SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
	1660	if (!checksum)
	1661	mPUSHs(newSVuv((UV)auquad));
	1662	else if (checksum > bits_in_uv)
	1663	cdouble += (NV)auquad;
	1664	else
	1665	cuv += auquad;
	1666	}
	1667	break;
	1668	#endif
	1669	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
	1670	case 'f':
	1671	while (len-- > 0) {
	1672	float afloat;
	1673	SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
	1674	if (!checksum)
	1675	mPUSHn(afloat);
	1676	else
	1677	cdouble += afloat;
	1678	}
	1679	break;
	1680	case 'd':
	1681	while (len-- > 0) {
	1682	double adouble;
	1683	SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
	1684	if (!checksum)
	1685	mPUSHn(adouble);
	1686	else
	1687	cdouble += adouble;
	1688	}
	1689	break;
	1690	case 'F':
	1691	while (len-- > 0) {
	1692	NV_bytes anv;
	1693	SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
	1694	datumtype, needs_swap);
	1695	if (!checksum)
	1696	mPUSHn(anv.nv);
	1697	else
	1698	cdouble += anv.nv;
	1699	}
	1700	break;
	1701	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	1702	case 'D':
	1703	while (len-- > 0) {
	1704	ld_bytes aldouble;
	1705	SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
	1706	sizeof(aldouble.bytes), datumtype, needs_swap);
	1707	/* The most common long double format, the x86 80-bit
	1708	* extended precision, has either 2 or 6 unused bytes,
	1709	* which may contain garbage, which may contain
	1710	* unintentional data. While we do zero the bytes of
	1711	* the long double data in pack(), here in unpack() we
	1712	* don't, because it's really hard to envision that
	1713	* reading the long double off aldouble would be
	1714	* affected by the unused bytes.
	1715	*
	1716	* Note that trying to unpack 'long doubles' of 'long
	1717	* doubles' packed in another system is in the general
	1718	* case doomed without having more detail. */
	1719	if (!checksum)
	1720	mPUSHn(aldouble.ld);
	1721	else
	1722	cdouble += aldouble.ld;
	1723	}
	1724	break;
	1725	#endif
	1726	case 'u':
	1727	if (!checksum) {
	1728	const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
	1729	sv = sv_2mortal(newSV(l));
	1730	if (l) SvPOK_on(sv);
	1731	}
	1732
	1733	/* Note that all legal uuencoded strings are ASCII printables, so
	1734	* have the same representation under UTF-8 vs not. This means we
	1735	* can ignore UTF8ness on legal input. For illegal we stop at the
	1736	* first failure, and don't report where/what that is, so again we
	1737	* can ignore UTF8ness */
	1738
	1739	while (s < strend && s != ' ' && ISUUCHAR(s)) {
	1740	I32 a, b, c, d;
	1741	char hunk[3];
	1742
	1743	len = PL_uudmap[(U8)s++] & 077;
	1744	while (len > 0) {
	1745	if (s < strend && ISUUCHAR(*s))
	1746	a = PL_uudmap[(U8)s++] & 077;
	1747	else
	1748	a = 0;
	1749	if (s < strend && ISUUCHAR(*s))
	1750	b = PL_uudmap[(U8)s++] & 077;
	1751	else
	1752	b = 0;
	1753	if (s < strend && ISUUCHAR(*s))
	1754	c = PL_uudmap[(U8)s++] & 077;
	1755	else
	1756	c = 0;
	1757	if (s < strend && ISUUCHAR(*s))
	1758	d = PL_uudmap[(U8)s++] & 077;
	1759	else
	1760	d = 0;
	1761	hunk[0] = (char)((a << 2) \| (b >> 4));
	1762	hunk[1] = (char)((b << 4) \| (c >> 2));
	1763	hunk[2] = (char)((c << 6) \| d);
	1764	if (!checksum)
	1765	sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
	1766	len -= 3;
	1767	}
	1768	if (*s == '\n')
	1769	s++;
	1770	else /* possible checksum byte */
	1771	if (s + 1 < strend && s[1] == '\n')
	1772	s += 2;
	1773	}
	1774	if (!checksum)
	1775	XPUSHs(sv);
	1776	break;
	1777	} /* End of switch */
	1778
	1779	if (checksum) {
	1780	if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) \|\|
	1781	(checksum > bits_in_uv &&
	1782	strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
	1783	NV trouble, anv;
	1784
	1785	anv = (NV) (1 << (checksum & 15));
	1786	while (checksum >= 16) {
	1787	checksum -= 16;
	1788	anv *= 65536.0;
	1789	}
	1790	while (cdouble < 0.0)
	1791	cdouble += anv;
	1792	cdouble = Perl_modf(cdouble / anv, &trouble);
	1793	#ifdef LONGDOUBLE_DOUBLEDOUBLE
	1794	/* Workaround for powerpc doubledouble modfl bug:
	1795	* close to 1.0L and -1.0L cdouble is 0, and trouble
	1796	* is cdouble / anv. */
	1797	if (trouble != Perl_ceil(trouble)) {
	1798	cdouble = trouble;
	1799	if (cdouble > 1.0L) cdouble -= 1.0L;
	1800	if (cdouble < -1.0L) cdouble += 1.0L;
	1801	}
	1802	#endif
	1803	cdouble *= anv;
	1804	sv = newSVnv(cdouble);
	1805	}
	1806	else {
	1807	if (checksum < bits_in_uv) {
	1808	UV mask = ((UV)1 << checksum) - 1;
	1809	cuv &= mask;
	1810	}
	1811	sv = newSVuv(cuv);
	1812	}
	1813	mXPUSHs(sv);
	1814	checksum = 0;
	1815	}
	1816
	1817	if (symptr->flags & FLAG_SLASH){
	1818	if (SP - PL_stack_base - start_sp_offset <= 0)
	1819	break;
	1820	if( next_symbol(symptr) ){
	1821	if( symptr->howlen == e_number )
	1822	Perl_croak(aTHX_ "Count after length/code in unpack" );
	1823	if( beyond ){
	1824	/* ...end of char buffer then no decent length available */
	1825	Perl_croak(aTHX_ "length/code after end of string in unpack" );
	1826	} else {
	1827	/* take top of stack (hope it's numeric) */
	1828	len = POPi;
	1829	if( len < 0 )
	1830	Perl_croak(aTHX_ "Negative '/' count in unpack" );
	1831	}
	1832	} else {
	1833	Perl_croak(aTHX_ "Code missing after '/' in unpack" );
	1834	}
	1835	datumtype = symptr->code;
	1836	explicit_length = FALSE;
	1837	goto redo_switch;
	1838	}
	1839	}
	1840
	1841	if (new_s)
	1842	*new_s = s;
	1843	PUTBACK;
	1844	return SP - PL_stack_base - start_sp_offset;
	1845	}
	1846
	1847	PP(pp_unpack)
	1848	{
	1849	dSP;
	1850	dPOPPOPssrl;
	1851	U8 gimme = GIMME_V;
	1852	STRLEN llen;
	1853	STRLEN rlen;
	1854	const char *pat = SvPV_const(left, llen);
	1855	const char *s = SvPV_const(right, rlen);
	1856	const char *strend = s + rlen;
	1857	const char *patend = pat + llen;
	1858	SSize_t cnt;
	1859
	1860	PUTBACK;
	1861	cnt = unpackstring(pat, patend, s, strend,
	1862	((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
	1863	\| (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
	1864
	1865	SPAGAIN;
	1866	if ( !cnt && gimme == G_SCALAR )
	1867	PUSHs(&PL_sv_undef);
	1868	RETURN;
	1869	}
	1870
	1871	STATIC U8 *
	1872	doencodes(U8 h, const U8 s, SSize_t len)
	1873	{
	1874	*h++ = PL_uuemap[len];
	1875	while (len > 2) {
	1876	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1877	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((s[1] >> 4) & 017)))];
	1878	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) \| ((s[2] >> 6) & 03)))];
	1879	*h++ = PL_uuemap[(077 & (s[2] & 077))];
	1880	s += 3;
	1881	len -= 3;
	1882	}
	1883	if (len > 0) {
	1884	const U8 r = (len > 1 ? s[1] : '\0');
	1885	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1886	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((r >> 4) & 017)))];
	1887	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
	1888	*h++ = PL_uuemap[0];
	1889	}
	1890	*h++ = '\n';
	1891	return h;
	1892	}
	1893
	1894	STATIC SV *
	1895	S_is_an_int(pTHX_ const char *s, STRLEN l)
	1896	{
	1897	SV *result = newSVpvn(s, l);
	1898	char const result_c = SvPV_nolen(result); / convenience */
	1899	char *out = result_c;
	1900	bool skip = 1;
	1901	bool ignore = 0;
	1902
	1903	PERL_ARGS_ASSERT_IS_AN_INT;
	1904
	1905	while (*s) {
	1906	switch (*s) {
	1907	case ' ':
	1908	break;
	1909	case '+':
	1910	if (!skip) {
	1911	SvREFCNT_dec(result);
	1912	return (NULL);
	1913	}
	1914	break;
	1915	case '0':
	1916	case '1':
	1917	case '2':
	1918	case '3':
	1919	case '4':
	1920	case '5':
	1921	case '6':
	1922	case '7':
	1923	case '8':
	1924	case '9':
	1925	skip = 0;
	1926	if (!ignore) {
	1927	(out++) = s;
	1928	}
	1929	break;
	1930	case '.':
	1931	ignore = 1;
	1932	break;
	1933	default:
	1934	SvREFCNT_dec(result);
	1935	return (NULL);
	1936	}
	1937	s++;
	1938	}
	1939	*(out++) = '\0';
	1940	SvCUR_set(result, out - result_c);
	1941	return (result);
	1942	}
	1943
	1944	/* pnum must be '\0' terminated */
	1945	STATIC int
	1946	S_div128(pTHX_ SV pnum, bool done)
	1947	{
	1948	STRLEN len;
	1949	char * const s = SvPV(pnum, len);
	1950	char *t = s;
	1951	int m = 0;
	1952
	1953	PERL_ARGS_ASSERT_DIV128;
	1954
	1955	*done = 1;
	1956	while (*t) {
	1957	const int i = m * 10 + (*t - '0');
	1958	const int r = (i >> 7); /* r < 10 */
	1959	m = i & 0x7F;
	1960	if (r) {
	1961	*done = 0;
	1962	}
	1963	*(t++) = '0' + r;
	1964	}
	1965	*(t++) = '\0';
	1966	SvCUR_set(pnum, (STRLEN) (t - s));
	1967	return (m);
	1968	}
	1969
	1970	/*
	1971	=for apidoc packlist
	1972
	1973	The engine implementing C<pack()> Perl function.
	1974
	1975	=cut
	1976	*/
	1977
	1978	void
	1979	Perl_packlist(pTHX_ SV cat, const char pat, const char patend, SV beglist, SV *endlist )
	1980	{
	1981	tempsym_t sym;
	1982
	1983	PERL_ARGS_ASSERT_PACKLIST;
	1984
	1985	TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
	1986
	1987	/* We're going to do changes through SvPVX(cat). Make sure it's valid.
	1988	Also make sure any UTF8 flag is loaded */
	1989	SvPV_force_nolen(cat);
	1990	if (DO_UTF8(cat))
	1991	sym.flags \|= FLAG_PARSE_UTF8 \| FLAG_DO_UTF8;
	1992
	1993	(void)pack_rec( cat, &sym, beglist, endlist );
	1994	}
	1995
	1996	/* like sv_utf8_upgrade, but also repoint the group start markers */
	1997	STATIC void
	1998	marked_upgrade(pTHX_ SV sv, tempsym_t sym_ptr) {
	1999	STRLEN len;
	2000	tempsym_t *group;
	2001	const char from_ptr, from_start, from_end, marks, *m;
	2002	char to_start, to_ptr;
	2003
	2004	if (SvUTF8(sv)) return;
	2005
	2006	from_start = SvPVX_const(sv);
	2007	from_end = from_start + SvCUR(sv);
	2008	for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
	2009	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
	2010	if (from_ptr == from_end) {
	2011	/* Simple case: no character needs to be changed */
	2012	SvUTF8_on(sv);
	2013	return;
	2014	}
	2015
	2016	len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
	2017	Newx(to_start, len, char);
	2018	Copy(from_start, to_start, from_ptr-from_start, char);
	2019	to_ptr = to_start + (from_ptr-from_start);
	2020
	2021	Newx(marks, sym_ptr->level+2, const char *);
	2022	for (group=sym_ptr; group; group = group->previous)
	2023	marks[group->level] = from_start + group->strbeg;
	2024	marks[sym_ptr->level+1] = from_end+1;
	2025	for (m = marks; *m < from_ptr; m++)
	2026	m = to_start + (m-from_start);
	2027
	2028	for (;from_ptr < from_end; from_ptr++) {
	2029	while (m == from_ptr) m++ = to_ptr;
	2030	to_ptr = (char ) uvchr_to_utf8((U8 ) to_ptr, (U8 ) from_ptr);
	2031	}
	2032	*to_ptr = 0;
	2033
	2034	while (m == from_ptr) m++ = to_ptr;
	2035	if (m != marks + sym_ptr->level+1) {
	2036	Safefree(marks);
	2037	Safefree(to_start);
	2038	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
	2039	"level=%d", m, marks, sym_ptr->level);
	2040	}
	2041	for (group=sym_ptr; group; group = group->previous)
	2042	group->strbeg = marks[group->level] - to_start;
	2043	Safefree(marks);
	2044
	2045	if (SvOOK(sv)) {
	2046	if (SvIVX(sv)) {
	2047	SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
	2048	from_start -= SvIVX(sv);
	2049	SvIV_set(sv, 0);
	2050	}
	2051	SvFLAGS(sv) &= ~SVf_OOK;
	2052	}
	2053	if (SvLEN(sv) != 0)
	2054	Safefree(from_start);
	2055	SvPV_set(sv, to_start);
	2056	SvCUR_set(sv, to_ptr - to_start);
	2057	SvLEN_set(sv, len);
	2058	SvUTF8_on(sv);
	2059	}
	2060
	2061	/* Exponential string grower. Makes string extension effectively O(n)
	2062	needed says how many extra bytes we need (not counting the final '\0')
	2063	Only grows the string if there is an actual lack of space
	2064	*/
	2065	STATIC char *
	2066	S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
	2067	const STRLEN cur = SvCUR(sv);
	2068	const STRLEN len = SvLEN(sv);
	2069	STRLEN extend;
	2070
	2071	PERL_ARGS_ASSERT_SV_EXP_GROW;
	2072
	2073	if (len - cur > needed) return SvPVX(sv);
	2074	extend = needed > len ? needed : len;
	2075	return SvGROW(sv, len+extend+1);
	2076	}
	2077
	2078	static SV *
	2079	S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
	2080	{
	2081	SvGETMAGIC(sv);
	2082	if (UNLIKELY(SvAMAGIC(sv)))
	2083	sv = sv_2num(sv);
	2084	if (UNLIKELY(isinfnansv(sv))) {
	2085	const I32 c = TYPE_NO_MODIFIERS(datumtype);
	2086	const NV nv = SvNV_nomg(sv);
	2087	if (c == 'w')
	2088	Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
	2089	else
	2090	Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
	2091	}
	2092	return sv;
	2093	}
	2094
	2095	#define SvIV_no_inf(sv,d) \
	2096	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
	2097	#define SvUV_no_inf(sv,d) \
	2098	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
	2099
	2100	STATIC
	2101	SV **
	2102	S_pack_rec(pTHX_ SV cat, tempsym_t symptr, SV beglist, SV endlist )
	2103	{
	2104	tempsym_t lookahead;
	2105	SSize_t items = endlist - beglist;
	2106	bool found = next_symbol(symptr);
	2107	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	2108	bool warn_utf8 = ckWARN(WARN_UTF8);
	2109	char* from;
	2110
	2111	PERL_ARGS_ASSERT_PACK_REC;
	2112
	2113	if (symptr->level == 0 && found && symptr->code == 'U') {
	2114	marked_upgrade(aTHX_ cat, symptr);
	2115	symptr->flags \|= FLAG_DO_UTF8;
	2116	utf8 = 0;
	2117	}
	2118	symptr->strbeg = SvCUR(cat);
	2119
	2120	while (found) {
	2121	SV *fromstr;
	2122	STRLEN fromlen;
	2123	SSize_t len;
	2124	SV *lengthcode = NULL;
	2125	I32 datumtype = symptr->code;
	2126	howlen_t howlen = symptr->howlen;
	2127	char *start = SvPVX(cat);
	2128	char *cur = start + SvCUR(cat);
	2129	bool needs_swap;
	2130
	2131	#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
	2132	#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
	2133
	2134	switch (howlen) {
	2135	case e_star:
	2136	len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
	2137	0 : items;
	2138	break;
	2139	default:
	2140	/* e_no_len and e_number */
	2141	len = symptr->length;
	2142	break;
	2143	}
	2144
	2145	if (len) {
	2146	packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	2147
	2148	if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
	2149	/* We can process this letter. */
	2150	STRLEN size = props & PACK_SIZE_MASK;
	2151	GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
	2152	}
	2153	}
	2154
	2155	/* Look ahead for next symbol. Do we have code/code? */
	2156	lookahead = *symptr;
	2157	found = next_symbol(&lookahead);
	2158	if (symptr->flags & FLAG_SLASH) {
	2159	IV count;
	2160	if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
	2161	if (strchr("aAZ", lookahead.code)) {
	2162	if (lookahead.howlen == e_number) count = lookahead.length;
	2163	else {
	2164	if (items > 0) {
	2165	count = sv_len_utf8(*beglist);
	2166	}
	2167	else count = 0;
	2168	if (lookahead.code == 'Z') count++;
	2169	}
	2170	} else {
	2171	if (lookahead.howlen == e_number && lookahead.length < items)
	2172	count = lookahead.length;
	2173	else count = items;
	2174	}
	2175	lookahead.howlen = e_number;
	2176	lookahead.length = count;
	2177	lengthcode = sv_2mortal(newSViv(count));
	2178	}
	2179
	2180	needs_swap = NEEDS_SWAP(datumtype);
	2181
	2182	/* Code inside the switch must take care to properly update
	2183	cat (CUR length and '\0' termination) if it updated *cur and
	2184	doesn't simply leave using break */
	2185	switch (TYPE_NO_ENDIANNESS(datumtype)) {
	2186	default:
	2187	Perl_croak(aTHX_ "Invalid type '%c' in pack",
	2188	(int) TYPE_NO_MODIFIERS(datumtype));
	2189	case '%':
	2190	Perl_croak(aTHX_ "'%%' may not be used in pack");
	2191
	2192	case '.' \| TYPE_IS_SHRIEKING:
	2193	case '.':
	2194	if (howlen == e_star) from = start;
	2195	else if (len == 0) from = cur;
	2196	else {
	2197	tempsym_t *group = symptr;
	2198
	2199	while (--len && group) group = group->previous;
	2200	from = group ? start + group->strbeg : start;
	2201	}
	2202	fromstr = NEXTFROM;
	2203	len = SvIV_no_inf(fromstr, datumtype);
	2204	goto resize;
	2205	case '@' \| TYPE_IS_SHRIEKING:
	2206	case '@':
	2207	from = start + symptr->strbeg;
	2208	resize:
	2209	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	2210	if (len >= 0) {
	2211	while (len && from < cur) {
	2212	from += UTF8SKIP(from);
	2213	len--;
	2214	}
	2215	if (from > cur)
	2216	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2217	if (len) {
	2218	/* Here we know from == cur */
	2219	grow:
	2220	GROWING(0, cat, start, cur, len);
	2221	Zero(cur, len, char);
	2222	cur += len;
	2223	} else if (from < cur) {
	2224	len = cur - from;
	2225	goto shrink;
	2226	} else goto no_change;
	2227	} else {
	2228	cur = from;
	2229	len = -len;
	2230	goto utf8_shrink;
	2231	}
	2232	else {
	2233	len -= cur - from;
	2234	if (len > 0) goto grow;
	2235	if (len == 0) goto no_change;
	2236	len = -len;
	2237	goto shrink;
	2238	}
	2239	break;
	2240
	2241	case '(': {
	2242	tempsym_t savsym = *symptr;
	2243	U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	2244	symptr->flags \|= group_modifiers;
	2245	symptr->patend = savsym.grpend;
	2246	symptr->level++;
	2247	symptr->previous = &lookahead;
	2248	while (len--) {
	2249	U32 was_utf8;
	2250	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	2251	else symptr->flags &= ~FLAG_PARSE_UTF8;
	2252	was_utf8 = SvUTF8(cat);
	2253	symptr->patptr = savsym.grpbeg;
	2254	beglist = pack_rec(cat, symptr, beglist, endlist);
	2255	if (SvUTF8(cat) != was_utf8)
	2256	/* This had better be an upgrade while in utf8==0 mode */
	2257	utf8 = 1;
	2258
	2259	if (savsym.howlen == e_star && beglist == endlist)
	2260	break; /* No way to continue */
	2261	}
	2262	items = endlist - beglist;
	2263	lookahead.flags = symptr->flags & ~group_modifiers;
	2264	goto no_change;
	2265	}
	2266	case 'X' \| TYPE_IS_SHRIEKING:
	2267	if (!len) /* Avoid division by 0 */
	2268	len = 1;
	2269	if (utf8) {
	2270	char hop, last;
	2271	SSize_t l = len;
	2272	hop = last = start;
	2273	while (hop < cur) {
	2274	hop += UTF8SKIP(hop);
	2275	if (--l == 0) {
	2276	last = hop;
	2277	l = len;
	2278	}
	2279	}
	2280	if (last > cur)
	2281	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2282	cur = last;
	2283	break;
	2284	}
	2285	len = (cur-start) % len;
	2286	/* FALLTHROUGH */
	2287	case 'X':
	2288	if (utf8) {
	2289	if (len < 1) goto no_change;
	2290	utf8_shrink:
	2291	while (len > 0) {
	2292	if (cur <= start)
	2293	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2294	(int) TYPE_NO_MODIFIERS(datumtype));
	2295	while (--cur, UTF8_IS_CONTINUATION(*cur)) {
	2296	if (cur <= start)
	2297	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2298	(int) TYPE_NO_MODIFIERS(datumtype));
	2299	}
	2300	len--;
	2301	}
	2302	} else {
	2303	shrink:
	2304	if (cur - start < len)
	2305	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2306	(int) TYPE_NO_MODIFIERS(datumtype));
	2307	cur -= len;
	2308	}
	2309	if (cur < start+symptr->strbeg) {
	2310	/* Make sure group starts don't point into the void */
	2311	tempsym_t *group;
	2312	const STRLEN length = cur-start;
	2313	for (group = symptr;
	2314	group && length < group->strbeg;
	2315	group = group->previous) group->strbeg = length;
	2316	lookahead.strbeg = length;
	2317	}
	2318	break;
	2319	case 'x' \| TYPE_IS_SHRIEKING: {
	2320	SSize_t ai32;
	2321	if (!len) /* Avoid division by 0 */
	2322	len = 1;
	2323	if (utf8) ai32 = utf8_length((U8 ) start, (U8 ) cur) % len;
	2324	else ai32 = (cur - start) % len;
	2325	if (ai32 == 0) goto no_change;
	2326	len -= ai32;
	2327	}
	2328	/* FALLTHROUGH */
	2329	case 'x':
	2330	goto grow;
	2331	case 'A':
	2332	case 'Z':
	2333	case 'a': {
	2334	const char *aptr;
	2335
	2336	fromstr = NEXTFROM;
	2337	aptr = SvPV_const(fromstr, fromlen);
	2338	if (DO_UTF8(fromstr)) {
	2339	const char end, s;
	2340
	2341	if (!utf8 && !SvUTF8(cat)) {
	2342	marked_upgrade(aTHX_ cat, symptr);
	2343	lookahead.flags \|= FLAG_DO_UTF8;
	2344	lookahead.strbeg = symptr->strbeg;
	2345	utf8 = 1;
	2346	start = SvPVX(cat);
	2347	cur = start + SvCUR(cat);
	2348	}
	2349	if (howlen == e_star) {
	2350	if (utf8) goto string_copy;
	2351	len = fromlen+1;
	2352	}
	2353	s = aptr;
	2354	end = aptr + fromlen;
	2355	fromlen = datumtype == 'Z' ? len-1 : len;
	2356	while ((SSize_t) fromlen > 0 && s < end) {
	2357	s += UTF8SKIP(s);
	2358	fromlen--;
	2359	}
	2360	if (s > end)
	2361	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2362	if (utf8) {
	2363	len = fromlen;
	2364	if (datumtype == 'Z') len++;
	2365	fromlen = s-aptr;
	2366	len += fromlen;
	2367
	2368	goto string_copy;
	2369	}
	2370	fromlen = len - fromlen;
	2371	if (datumtype == 'Z') fromlen--;
	2372	if (howlen == e_star) {
	2373	len = fromlen;
	2374	if (datumtype == 'Z') len++;
	2375	}
	2376	GROWING(0, cat, start, cur, len);
	2377	if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
	2378	datumtype \| TYPE_IS_PACK))
	2379	Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
	2380	"for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
	2381	(int)datumtype, aptr, end, cur, fromlen);
	2382	cur += fromlen;
	2383	len -= fromlen;
	2384	} else if (utf8) {
	2385	if (howlen == e_star) {
	2386	len = fromlen;
	2387	if (datumtype == 'Z') len++;
	2388	}
	2389	if (len <= (SSize_t) fromlen) {
	2390	fromlen = len;
	2391	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2392	}
	2393	/* assumes a byte expands to at most UTF8_EXPAND bytes on
	2394	upgrade, so:
	2395	expected_length <= from_lenUTF8_EXPAND + (len-from_len) /
	2396	GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
	2397	len -= fromlen;
	2398	while (fromlen > 0) {
	2399	cur = (char ) uvchr_to_utf8((U8 ) cur, * (U8 *) aptr);
	2400	aptr++;
	2401	fromlen--;
	2402	}
	2403	} else {
	2404	string_copy:
	2405	if (howlen == e_star) {
	2406	len = fromlen;
	2407	if (datumtype == 'Z') len++;
	2408	}
	2409	if (len <= (SSize_t) fromlen) {
	2410	fromlen = len;
	2411	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2412	}
	2413	GROWING(0, cat, start, cur, len);
	2414	Copy(aptr, cur, fromlen, char);
	2415	cur += fromlen;
	2416	len -= fromlen;
	2417	}
	2418	memset(cur, datumtype == 'A' ? ' ' : '\0', len);
	2419	cur += len;
	2420	SvTAINT(cat);
	2421	break;
	2422	}
	2423	case 'B':
	2424	case 'b': {
	2425	const char str, end;
	2426	SSize_t l, field_len;
	2427	U8 bits;
	2428	bool utf8_source;
	2429	U32 utf8_flags;
	2430
	2431	fromstr = NEXTFROM;
	2432	str = SvPV_const(fromstr, fromlen);
	2433	end = str + fromlen;
	2434	if (DO_UTF8(fromstr)) {
	2435	utf8_source = TRUE;
	2436	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2437	} else {
	2438	utf8_source = FALSE;
	2439	utf8_flags = 0; /* Unused, but keep compilers happy */
	2440	}
	2441	if (howlen == e_star) len = fromlen;
	2442	field_len = (len+7)/8;
	2443	GROWING(utf8, cat, start, cur, field_len);
	2444	if (len > (SSize_t)fromlen) len = fromlen;
	2445	bits = 0;
	2446	l = 0;
	2447	if (datumtype == 'B')
	2448	while (l++ < len) {
	2449	if (utf8_source) {
	2450	UV val = 0;
	2451	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2452	bits \|= val & 1;
	2453	} else bits \|= *str++ & 1;
	2454	if (l & 7) bits <<= 1;
	2455	else {
	2456	PUSH_BYTE(utf8, cur, bits);
	2457	bits = 0;
	2458	}
	2459	}
	2460	else
	2461	/* datumtype == 'b' */
	2462	while (l++ < len) {
	2463	if (utf8_source) {
	2464	UV val = 0;
	2465	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2466	if (val & 1) bits \|= 0x80;
	2467	} else if (*str++ & 1)
	2468	bits \|= 0x80;
	2469	if (l & 7) bits >>= 1;
	2470	else {
	2471	PUSH_BYTE(utf8, cur, bits);
	2472	bits = 0;
	2473	}
	2474	}
	2475	l--;
	2476	if (l & 7) {
	2477	if (datumtype == 'B')
	2478	bits <<= 7 - (l & 7);
	2479	else
	2480	bits >>= 7 - (l & 7);
	2481	PUSH_BYTE(utf8, cur, bits);
	2482	l += 7;
	2483	}
	2484	/* Determine how many chars are left in the requested field */
	2485	l /= 8;
	2486	if (howlen == e_star) field_len = 0;
	2487	else field_len -= l;
	2488	Zero(cur, field_len, char);
	2489	cur += field_len;
	2490	break;
	2491	}
	2492	case 'H':
	2493	case 'h': {
	2494	const char str, end;
	2495	SSize_t l, field_len;
	2496	U8 bits;
	2497	bool utf8_source;
	2498	U32 utf8_flags;
	2499
	2500	fromstr = NEXTFROM;
	2501	str = SvPV_const(fromstr, fromlen);
	2502	end = str + fromlen;
	2503	if (DO_UTF8(fromstr)) {
	2504	utf8_source = TRUE;
	2505	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2506	} else {
	2507	utf8_source = FALSE;
	2508	utf8_flags = 0; /* Unused, but keep compilers happy */
	2509	}
	2510	if (howlen == e_star) len = fromlen;
	2511	field_len = (len+1)/2;
	2512	GROWING(utf8, cat, start, cur, field_len);
	2513	if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
	2514	bits = 0;
	2515	l = 0;
	2516	if (datumtype == 'H')
	2517	while (l++ < len) {
	2518	if (utf8_source) {
	2519	UV val = 0;
	2520	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2521	if (val < 256 && isALPHA(val))
	2522	bits \|= (val + 9) & 0xf;
	2523	else
	2524	bits \|= val & 0xf;
	2525	} else if (isALPHA(*str))
	2526	bits \|= (*str++ + 9) & 0xf;
	2527	else
	2528	bits \|= *str++ & 0xf;
	2529	if (l & 1) bits <<= 4;
	2530	else {
	2531	PUSH_BYTE(utf8, cur, bits);
	2532	bits = 0;
	2533	}
	2534	}
	2535	else
	2536	while (l++ < len) {
	2537	if (utf8_source) {
	2538	UV val = 0;
	2539	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2540	if (val < 256 && isALPHA(val))
	2541	bits \|= ((val + 9) & 0xf) << 4;
	2542	else
	2543	bits \|= (val & 0xf) << 4;
	2544	} else if (isALPHA(*str))
	2545	bits \|= ((*str++ + 9) & 0xf) << 4;
	2546	else
	2547	bits \|= (*str++ & 0xf) << 4;
	2548	if (l & 1) bits >>= 4;
	2549	else {
	2550	PUSH_BYTE(utf8, cur, bits);
	2551	bits = 0;
	2552	}
	2553	}
	2554	l--;
	2555	if (l & 1) {
	2556	PUSH_BYTE(utf8, cur, bits);
	2557	l++;
	2558	}
	2559	/* Determine how many chars are left in the requested field */
	2560	l /= 2;
	2561	if (howlen == e_star) field_len = 0;
	2562	else field_len -= l;
	2563	Zero(cur, field_len, char);
	2564	cur += field_len;
	2565	break;
	2566	}
	2567	case 'c':
	2568	while (len-- > 0) {
	2569	IV aiv;
	2570	fromstr = NEXTFROM;
	2571	aiv = SvIV_no_inf(fromstr, datumtype);
	2572	if ((-128 > aiv \|\| aiv > 127))
	2573	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2574	"Character in 'c' format wrapped in pack");
	2575	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2576	}
	2577	break;
	2578	case 'C':
	2579	if (len == 0) {
	2580	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	2581	break;
	2582	}
	2583	while (len-- > 0) {
	2584	IV aiv;
	2585	fromstr = NEXTFROM;
	2586	aiv = SvIV_no_inf(fromstr, datumtype);
	2587	if ((0 > aiv \|\| aiv > 0xff))
	2588	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2589	"Character in 'C' format wrapped in pack");
	2590	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2591	}
	2592	break;
	2593	case 'W': {
	2594	char *end;
	2595	U8 in_bytes = (U8)IN_BYTES;
	2596
	2597	end = start+SvLEN(cat)-1;
	2598	if (utf8) end -= UTF8_MAXLEN-1;
	2599	while (len-- > 0) {
	2600	UV auv;
	2601	fromstr = NEXTFROM;
	2602	auv = SvUV_no_inf(fromstr, datumtype);
	2603	if (in_bytes) auv = auv % 0x100;
	2604	if (utf8) {
	2605	W_utf8:
	2606	if (cur >= end) {
	2607	*cur = '\0';
	2608	SvCUR_set(cat, cur - start);
	2609
	2610	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2611	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2612	}
	2613	cur = (char ) uvchr_to_utf8_flags((U8 ) cur, auv, 0);
	2614	} else {
	2615	if (auv >= 0x100) {
	2616	if (!SvUTF8(cat)) {
	2617	*cur = '\0';
	2618	SvCUR_set(cat, cur - start);
	2619	marked_upgrade(aTHX_ cat, symptr);
	2620	lookahead.flags \|= FLAG_DO_UTF8;
	2621	lookahead.strbeg = symptr->strbeg;
	2622	utf8 = 1;
	2623	start = SvPVX(cat);
	2624	cur = start + SvCUR(cat);
	2625	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2626	goto W_utf8;
	2627	}
	2628	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2629	"Character in 'W' format wrapped in pack");
	2630	auv &= 0xff;
	2631	}
	2632	if (cur >= end) {
	2633	*cur = '\0';
	2634	SvCUR_set(cat, cur - start);
	2635	GROWING(0, cat, start, cur, len+1);
	2636	end = start+SvLEN(cat)-1;
	2637	}
	2638	(U8 ) cur++ = (U8)auv;
	2639	}
	2640	}
	2641	break;
	2642	}
	2643	case 'U': {
	2644	char *end;
	2645
	2646	if (len == 0) {
	2647	if (!(symptr->flags & FLAG_DO_UTF8)) {
	2648	marked_upgrade(aTHX_ cat, symptr);
	2649	lookahead.flags \|= FLAG_DO_UTF8;
	2650	lookahead.strbeg = symptr->strbeg;
	2651	}
	2652	utf8 = 0;
	2653	goto no_change;
	2654	}
	2655
	2656	end = start+SvLEN(cat);
	2657	if (!utf8) end -= UTF8_MAXLEN;
	2658	while (len-- > 0) {
	2659	UV auv;
	2660	fromstr = NEXTFROM;
	2661	auv = SvUV_no_inf(fromstr, datumtype);
	2662	if (utf8) {
	2663	U8 buffer[UTF8_MAXLEN+1], *endb;
	2664	endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
	2665	if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
	2666	*cur = '\0';
	2667	SvCUR_set(cat, cur - start);
	2668	GROWING(0, cat, start, cur,
	2669	len+(endb-buffer)*UTF8_EXPAND);
	2670	end = start+SvLEN(cat);
	2671	}
	2672	cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
	2673	} else {
	2674	if (cur >= end) {
	2675	*cur = '\0';
	2676	SvCUR_set(cat, cur - start);
	2677	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2678	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2679	}
	2680	cur = (char ) uvchr_to_utf8_flags((U8 ) cur,
	2681	UNI_TO_NATIVE(auv),
	2682	0);
	2683	}
	2684	}
	2685	break;
	2686	}
	2687	/* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
	2688	case 'f':
	2689	while (len-- > 0) {
	2690	float afloat;
	2691	NV anv;
	2692	fromstr = NEXTFROM;
	2693	anv = SvNV(fromstr);
	2694	# if (defined(VMS) && !defined(_IEEE_FP)) \|\| defined(DOUBLE_IS_VAX_FLOAT)
	2695	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2696	* on Alpha; fake it if we don't have them.
	2697	*/
	2698	if (anv > FLT_MAX)
	2699	afloat = FLT_MAX;
	2700	else if (anv < -FLT_MAX)
	2701	afloat = -FLT_MAX;
	2702	else afloat = (float)anv;
	2703	# else
	2704	# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
	2705	if(Perl_isnan(anv))
	2706	afloat = (float)NV_NAN;
	2707	else
	2708	# endif
	2709	# ifdef NV_INF
	2710	/* a simple cast to float is undefined if outside
	2711	* the range of values that can be represented */
	2712	afloat = (float)(anv > FLT_MAX ? NV_INF :
	2713	anv < -FLT_MAX ? -NV_INF : anv);
	2714	# endif
	2715	# endif
	2716	PUSH_VAR(utf8, cur, afloat, needs_swap);
	2717	}
	2718	break;
	2719	case 'd':
	2720	while (len-- > 0) {
	2721	double adouble;
	2722	NV anv;
	2723	fromstr = NEXTFROM;
	2724	anv = SvNV(fromstr);
	2725	# if (defined(VMS) && !defined(_IEEE_FP)) \|\| defined(DOUBLE_IS_VAX_FLOAT)
	2726	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2727	* on Alpha; fake it if we don't have them.
	2728	*/
	2729	if (anv > DBL_MAX)
	2730	adouble = DBL_MAX;
	2731	else if (anv < -DBL_MAX)
	2732	adouble = -DBL_MAX;
	2733	else adouble = (double)anv;
	2734	# else
	2735	adouble = (double)anv;
	2736	# endif
	2737	PUSH_VAR(utf8, cur, adouble, needs_swap);
	2738	}
	2739	break;
	2740	case 'F': {
	2741	NV_bytes anv;
	2742	Zero(&anv, 1, NV); /* can be long double with unused bits */
	2743	while (len-- > 0) {
	2744	fromstr = NEXTFROM;
	2745	#ifdef __GNUC__
	2746	/* to work round a gcc/x86 bug; don't use SvNV */
	2747	anv.nv = sv_2nv(fromstr);
	2748	# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
	2749	&& LONG_DOUBLESIZE > 10
	2750	/* GCC sometimes overwrites the padding in the
	2751	assignment above */
	2752	Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
	2753	# endif
	2754	#else
	2755	anv.nv = SvNV(fromstr);
	2756	#endif
	2757	PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
	2758	}
	2759	break;
	2760	}
	2761	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	2762	case 'D': {
	2763	ld_bytes aldouble;
	2764	/* long doubles can have unused bits, which may be nonzero */
	2765	Zero(&aldouble, 1, long double);
	2766	while (len-- > 0) {
	2767	fromstr = NEXTFROM;
	2768	# ifdef __GNUC__
	2769	/* to work round a gcc/x86 bug; don't use SvNV */
	2770	aldouble.ld = (long double)sv_2nv(fromstr);
	2771	# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
	2772	/* GCC sometimes overwrites the padding in the
	2773	assignment above */
	2774	Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
	2775	# endif
	2776	# else
	2777	aldouble.ld = (long double)SvNV(fromstr);
	2778	# endif
	2779	PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
	2780	needs_swap);
	2781	}
	2782	break;
	2783	}
	2784	#endif
	2785	case 'n' \| TYPE_IS_SHRIEKING:
	2786	case 'n':
	2787	while (len-- > 0) {
	2788	I16 ai16;
	2789	fromstr = NEXTFROM;
	2790	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2791	ai16 = PerlSock_htons(ai16);
	2792	PUSH16(utf8, cur, &ai16, FALSE);
	2793	}
	2794	break;
	2795	case 'v' \| TYPE_IS_SHRIEKING:
	2796	case 'v':
	2797	while (len-- > 0) {
	2798	I16 ai16;
	2799	fromstr = NEXTFROM;
	2800	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2801	ai16 = htovs(ai16);
	2802	PUSH16(utf8, cur, &ai16, FALSE);
	2803	}
	2804	break;
	2805	case 'S' \| TYPE_IS_SHRIEKING:
	2806	#if SHORTSIZE != SIZE16
	2807	while (len-- > 0) {
	2808	unsigned short aushort;
	2809	fromstr = NEXTFROM;
	2810	aushort = SvUV_no_inf(fromstr, datumtype);
	2811	PUSH_VAR(utf8, cur, aushort, needs_swap);
	2812	}
	2813	break;
	2814	#else
	2815	/* FALLTHROUGH */
	2816	#endif
	2817	case 'S':
	2818	while (len-- > 0) {
	2819	U16 au16;
	2820	fromstr = NEXTFROM;
	2821	au16 = (U16)SvUV_no_inf(fromstr, datumtype);
	2822	PUSH16(utf8, cur, &au16, needs_swap);
	2823	}
	2824	break;
	2825	case 's' \| TYPE_IS_SHRIEKING:
	2826	#if SHORTSIZE != SIZE16
	2827	while (len-- > 0) {
	2828	short ashort;
	2829	fromstr = NEXTFROM;
	2830	ashort = SvIV_no_inf(fromstr, datumtype);
	2831	PUSH_VAR(utf8, cur, ashort, needs_swap);
	2832	}
	2833	break;
	2834	#else
	2835	/* FALLTHROUGH */
	2836	#endif
	2837	case 's':
	2838	while (len-- > 0) {
	2839	I16 ai16;
	2840	fromstr = NEXTFROM;
	2841	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2842	PUSH16(utf8, cur, &ai16, needs_swap);
	2843	}
	2844	break;
	2845	case 'I':
	2846	case 'I' \| TYPE_IS_SHRIEKING:
	2847	while (len-- > 0) {
	2848	unsigned int auint;
	2849	fromstr = NEXTFROM;
	2850	auint = SvUV_no_inf(fromstr, datumtype);
	2851	PUSH_VAR(utf8, cur, auint, needs_swap);
	2852	}
	2853	break;
	2854	case 'j':
	2855	while (len-- > 0) {
	2856	IV aiv;
	2857	fromstr = NEXTFROM;
	2858	aiv = SvIV_no_inf(fromstr, datumtype);
	2859	PUSH_VAR(utf8, cur, aiv, needs_swap);
	2860	}
	2861	break;
	2862	case 'J':
	2863	while (len-- > 0) {
	2864	UV auv;
	2865	fromstr = NEXTFROM;
	2866	auv = SvUV_no_inf(fromstr, datumtype);
	2867	PUSH_VAR(utf8, cur, auv, needs_swap);
	2868	}
	2869	break;
	2870	case 'w':
	2871	while (len-- > 0) {
	2872	NV anv;
	2873	fromstr = NEXTFROM;
	2874	S_sv_check_infnan(aTHX_ fromstr, datumtype);
	2875	anv = SvNV_nomg(fromstr);
	2876
	2877	if (anv < 0) {
	2878	*cur = '\0';
	2879	SvCUR_set(cat, cur - start);
	2880	Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
	2881	}
	2882
	2883	/* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
	2884	which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
	2885	any negative IVs will have already been got by the croak()
	2886	above. IOK is untrue for fractions, so we test them
	2887	against UV_MAX_P1. */
	2888	if (SvIOK(fromstr) \|\| anv < UV_MAX_P1) {
	2889	char buf[(sizeof(UV)*CHAR_BIT)/7+1];
	2890	char *in = buf + sizeof(buf);
	2891	UV auv = SvUV_nomg(fromstr);
	2892
	2893	do {
	2894	*--in = (char)((auv & 0x7f) \| 0x80);
	2895	auv >>= 7;
	2896	} while (auv);
	2897	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2898	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2899	in, (buf + sizeof(buf)) - in);
	2900	} else if (SvPOKp(fromstr))
	2901	goto w_string;
	2902	else if (SvNOKp(fromstr)) {
	2903	/* 10**NV_MAX_10_EXP is the largest power of 10
	2904	so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
	2905	given 10(NV_MAX_10_EXP+1) == 128 x solve for x:
	2906	x = (NV_MAX_10_EXP+1) * log (10) / log (128)
	2907	And with that many bytes only Inf can overflow.
	2908	Some C compilers are strict about integral constant
	2909	expressions so we conservatively divide by a slightly
	2910	smaller integer instead of multiplying by the exact
	2911	floating-point value.
	2912	*/
	2913	#ifdef NV_MAX_10_EXP
	2914	/* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
	2915	char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
	2916	#else
	2917	/* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
	2918	char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
	2919	#endif
	2920	char *in = buf + sizeof(buf);
	2921
	2922	anv = Perl_floor(anv);
	2923	do {
	2924	const NV next = Perl_floor(anv / 128);
	2925	if (in <= buf) /* this cannot happen ;-) */
	2926	Perl_croak(aTHX_ "Cannot compress integer in pack");
	2927	--in = (unsigned char)(anv - (next 128)) \| 0x80;
	2928	anv = next;
	2929	} while (anv > 0);
	2930	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2931	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2932	in, (buf + sizeof(buf)) - in);
	2933	} else {
	2934	const char *from;
	2935	char result, in;
	2936	SV *norm;
	2937	STRLEN len;
	2938	bool done;
	2939
	2940	w_string:
	2941	/* Copy string and check for compliance */
	2942	from = SvPV_nomg_const(fromstr, len);
	2943	if ((norm = is_an_int(from, len)) == NULL)
	2944	Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
	2945
	2946	Newx(result, len, char);
	2947	in = result + len;
	2948	done = FALSE;
	2949	while (!done) *--in = div128(norm, &done) \| 0x80;
	2950	result[len - 1] &= 0x7F; /* clear continue bit */
	2951	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2952	in, (result + len) - in);
	2953	Safefree(result);
	2954	SvREFCNT_dec(norm); /* free norm */
	2955	}
	2956	}
	2957	break;
	2958	case 'i':
	2959	case 'i' \| TYPE_IS_SHRIEKING:
	2960	while (len-- > 0) {
	2961	int aint;
	2962	fromstr = NEXTFROM;
	2963	aint = SvIV_no_inf(fromstr, datumtype);
	2964	PUSH_VAR(utf8, cur, aint, needs_swap);
	2965	}
	2966	break;
	2967	case 'N' \| TYPE_IS_SHRIEKING:
	2968	case 'N':
	2969	while (len-- > 0) {
	2970	U32 au32;
	2971	fromstr = NEXTFROM;
	2972	au32 = SvUV_no_inf(fromstr, datumtype);
	2973	au32 = PerlSock_htonl(au32);
	2974	PUSH32(utf8, cur, &au32, FALSE);
	2975	}
	2976	break;
	2977	case 'V' \| TYPE_IS_SHRIEKING:
	2978	case 'V':
	2979	while (len-- > 0) {
	2980	U32 au32;
	2981	fromstr = NEXTFROM;
	2982	au32 = SvUV_no_inf(fromstr, datumtype);
	2983	au32 = htovl(au32);
	2984	PUSH32(utf8, cur, &au32, FALSE);
	2985	}
	2986	break;
	2987	case 'L' \| TYPE_IS_SHRIEKING:
	2988	#if LONGSIZE != SIZE32
	2989	while (len-- > 0) {
	2990	unsigned long aulong;
	2991	fromstr = NEXTFROM;
	2992	aulong = SvUV_no_inf(fromstr, datumtype);
	2993	PUSH_VAR(utf8, cur, aulong, needs_swap);
	2994	}
	2995	break;
	2996	#else
	2997	/* Fall though! */
	2998	#endif
	2999	case 'L':
	3000	while (len-- > 0) {
	3001	U32 au32;
	3002	fromstr = NEXTFROM;
	3003	au32 = SvUV_no_inf(fromstr, datumtype);
	3004	PUSH32(utf8, cur, &au32, needs_swap);
	3005	}
	3006	break;
	3007	case 'l' \| TYPE_IS_SHRIEKING:
	3008	#if LONGSIZE != SIZE32
	3009	while (len-- > 0) {
	3010	long along;
	3011	fromstr = NEXTFROM;
	3012	along = SvIV_no_inf(fromstr, datumtype);
	3013	PUSH_VAR(utf8, cur, along, needs_swap);
	3014	}
	3015	break;
	3016	#else
	3017	/* Fall though! */
	3018	#endif
	3019	case 'l':
	3020	while (len-- > 0) {
	3021	I32 ai32;
	3022	fromstr = NEXTFROM;
	3023	ai32 = SvIV_no_inf(fromstr, datumtype);
	3024	PUSH32(utf8, cur, &ai32, needs_swap);
	3025	}
	3026	break;
	3027	#if defined(HAS_QUAD) && IVSIZE >= 8
	3028	case 'Q':
	3029	while (len-- > 0) {
	3030	Uquad_t auquad;
	3031	fromstr = NEXTFROM;
	3032	auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
	3033	PUSH_VAR(utf8, cur, auquad, needs_swap);
	3034	}
	3035	break;
	3036	case 'q':
	3037	while (len-- > 0) {
	3038	Quad_t aquad;
	3039	fromstr = NEXTFROM;
	3040	aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
	3041	PUSH_VAR(utf8, cur, aquad, needs_swap);
	3042	}
	3043	break;
	3044	#endif
	3045	case 'P':
	3046	len = 1; /* assume SV is correct length */
	3047	GROWING(utf8, cat, start, cur, sizeof(char *));
	3048	/* FALLTHROUGH */
	3049	case 'p':
	3050	while (len-- > 0) {
	3051	const char *aptr;
	3052
	3053	fromstr = NEXTFROM;
	3054	SvGETMAGIC(fromstr);
	3055	if (!SvOK(fromstr)) aptr = NULL;
	3056	else {
	3057	/* XXX better yet, could spirit away the string to
	3058	* a safe spot and hang on to it until the result
	3059	* of pack() (and all copies of the result) are
	3060	* gone.
	3061	*/
	3062	if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
	3063	\|\| (SvPADTMP(fromstr) &&
	3064	!SvREADONLY(fromstr)))) {
	3065	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3066	"Attempt to pack pointer to temporary value");
	3067	}
	3068	if (SvPOK(fromstr) \|\| SvNIOK(fromstr))
	3069	aptr = SvPV_nomg_const_nolen(fromstr);
	3070	else
	3071	aptr = SvPV_force_flags_nolen(fromstr, 0);
	3072	}
	3073	PUSH_VAR(utf8, cur, aptr, needs_swap);
	3074	}
	3075	break;
	3076	case 'u': {
	3077	const char aptr, aend;
	3078	bool from_utf8;
	3079
	3080	fromstr = NEXTFROM;
	3081	if (len <= 2) len = 45;
	3082	else len = len / 3 * 3;
	3083	if (len >= 64) {
	3084	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3085	"Field too wide in 'u' format in pack");
	3086	len = 63;
	3087	}
	3088	aptr = SvPV_const(fromstr, fromlen);
	3089	from_utf8 = DO_UTF8(fromstr);
	3090	if (from_utf8) {
	3091	aend = aptr + fromlen;
	3092	fromlen = sv_len_utf8_nomg(fromstr);
	3093	} else aend = NULL; /* Unused, but keep compilers happy */
	3094	GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
	3095	while (fromlen > 0) {
	3096	U8 *end;
	3097	SSize_t todo;
	3098	U8 hunk[1+63/3*4+1];
	3099
	3100	if ((SSize_t)fromlen > len)
	3101	todo = len;
	3102	else
	3103	todo = fromlen;
	3104	if (from_utf8) {
	3105	char buffer[64];
	3106	if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
	3107	'u' \| TYPE_IS_PACK)) {
	3108	*cur = '\0';
	3109	SvCUR_set(cat, cur - start);
	3110	Perl_croak(aTHX_ "panic: string is shorter than advertised, "
	3111	"aptr=%p, aend=%p, buffer=%p, todo=%zd",
	3112	aptr, aend, buffer, todo);
	3113	}
	3114	end = doencodes(hunk, (const U8 *)buffer, todo);
	3115	} else {
	3116	end = doencodes(hunk, (const U8 *)aptr, todo);
	3117	aptr += todo;
	3118	}
	3119	PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
	3120	fromlen -= todo;
	3121	}
	3122	break;
	3123	}
	3124	}
	3125	*cur = '\0';
	3126	SvCUR_set(cat, cur - start);
	3127	no_change:
	3128	*symptr = lookahead;
	3129	}
	3130	return beglist;
	3131	}
	3132	#undef NEXTFROM
	3133
	3134
	3135	PP(pp_pack)
	3136	{
	3137	dSP; dMARK; dORIGMARK; dTARGET;
	3138	SV *cat = TARG;
	3139	STRLEN fromlen;
	3140	SV pat_sv = ++MARK;
	3141	const char *pat = SvPV_const(pat_sv, fromlen);
	3142	const char *patend = pat + fromlen;
	3143
	3144	MARK++;
	3145	SvPVCLEAR(cat);
	3146	SvUTF8_off(cat);
	3147
	3148	packlist(cat, pat, patend, MARK, SP + 1);
	3149
	3150	SvSETMAGIC(cat);
	3151	SP = ORIGMARK;
	3152	PUSHs(cat);
	3153	RETURN;
	3154	}
	3155
	3156	/*
	3157	* ex: set ts=8 sts=4 sw=4 et:
	3158	*/