perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* pp_pack.c
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
	4	* 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* He still hopefully carried some of his gear in his pack: a small tinder-box,
	13	* two small shallow pans, the smaller fitting into the larger; inside them a
	14	* wooden spoon, a short two-pronged fork and some skewers were stowed; and
	15	* hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
	16	* some salt.
	17	*
	18	* [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
	19	*/
	20
	21	/* This file contains pp ("push/pop") functions that
	22	* execute the opcodes that make up a perl program. A typical pp function
	23	* expects to find its arguments on the stack, and usually pushes its
	24	* results onto the stack, hence the 'pp' terminology. Each OP structure
	25	* contains a pointer to the relevant pp_foo() function.
	26	*
	27	* This particular file just contains pp_pack() and pp_unpack(). See the
	28	* other pp*.c files for the rest of the pp_ functions.
	29	*/
	30
	31	#include "EXTERN.h"
	32	#define PERL_IN_PP_PACK_C
	33	#include "perl.h"
	34
	35	/* Types used by pack/unpack */
	36	typedef enum {
	37	e_no_len, /* no length */
	38	e_number, /* number, [] */
	39	e_star /* asterisk */
	40	} howlen_t;
	41
	42	typedef struct tempsym {
	43	const char* patptr; /* current template char */
	44	const char* patend; /* one after last char */
	45	const char* grpbeg; /* 1st char of ()-group */
	46	const char* grpend; /* end of ()-group */
	47	I32 code; /* template code (!<>) */
	48	U32 flags; /* /=4, comma=2, pack=1 */
	49	/* and group modifiers */
	50	SSize_t length; /* length/repeat count */
	51	howlen_t howlen; /* how length is given */
	52	int level; /* () nesting level */
	53	STRLEN strbeg; /* offset of group start */
	54	struct tempsym previous; / previous group */
	55	} tempsym_t;
	56
	57	#define TEMPSYM_INIT(symptr, p, e, f) \
	58	STMT_START { \
	59	(symptr)->patptr = (p); \
	60	(symptr)->patend = (e); \
	61	(symptr)->grpbeg = NULL; \
	62	(symptr)->grpend = NULL; \
	63	(symptr)->grpend = NULL; \
	64	(symptr)->code = 0; \
	65	(symptr)->length = 0; \
	66	(symptr)->howlen = e_no_len; \
	67	(symptr)->level = 0; \
	68	(symptr)->flags = (f); \
	69	(symptr)->strbeg = 0; \
	70	(symptr)->previous = NULL; \
	71	} STMT_END
	72
	73	typedef union {
	74	NV nv;
	75	U8 bytes[sizeof(NV)];
	76	} NV_bytes;
	77
	78	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	79	typedef union {
	80	long double ld;
	81	U8 bytes[sizeof(long double)];
	82	} ld_bytes;
	83	#endif
	84
	85	#ifndef CHAR_BIT
	86	# define CHAR_BIT 8
	87	#endif
	88	/* Maximum number of bytes to which a byte can grow due to upgrade */
	89	#define UTF8_EXPAND 2
	90
	91	/*
	92	* Offset for integer pack/unpack.
	93	*
	94	* On architectures where I16 and I32 aren't really 16 and 32 bits,
	95	* which for now are all Crays, pack and unpack have to play games.
	96	*/
	97
	98	/*
	99	* These values are required for portability of pack() output.
	100	* If they're not right on your machine, then pack() and unpack()
	101	* wouldn't work right anyway; you'll need to apply the Cray hack.
	102	* (I'd like to check them with #if, but you can't use sizeof() in
	103	* the preprocessor.) --???
	104	*/
	105	/*
	106	The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
	107	defines are now in config.h. --Andy Dougherty April 1998
	108	*/
	109	#define SIZE16 2
	110	#define SIZE32 4
	111
	112	/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
	113	--jhi Feb 1999 */
	114
	115	#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
	116	# define OFF16(p) ((char *) (p))
	117	# define OFF32(p) ((char *) (p))
	118	#elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	119	# define OFF16(p) ((char*)(p))
	120	# define OFF32(p) ((char*)(p))
	121	#elif BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	122	# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
	123	# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
	124	#else
	125	# error "bad cray byte order"
	126	#endif
	127
	128	#define PUSH16(utf8, cur, p, needs_swap) \
	129	PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
	130	#define PUSH32(utf8, cur, p, needs_swap) \
	131	PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
	132
	133	#if BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321 /* big-endian */
	134	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
	135	#elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 /* little-endian */
	136	# define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
	137	#else
	138	# error "Unsupported byteorder"
	139	/* Need to add code here to re-instate mixed endian support.
	140	NEEDS_SWAP would need to hold a flag indicating which action to
	141	take, and S_reverse_copy and the code in S_utf8_to_bytes would need
	142	logic adding to deal with any mixed-endian transformations needed.
	143	*/
	144	#endif
	145
	146	/* Only to be used inside a loop (see the break) */
	147	#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
	148	STMT_START { \
	149	if (UNLIKELY(utf8)) { \
	150	if (!S_utf8_to_bytes(aTHX_ &s, strend, \
	151	(char *) (buf), len, datumtype)) break; \
	152	} else { \
	153	if (UNLIKELY(needs_swap)) \
	154	S_reverse_copy(s, (char *) (buf), len); \
	155	else \
	156	Copy(s, (char *) (buf), len, char); \
	157	s += len; \
	158	} \
	159	} STMT_END
	160
	161	#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
	162	SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
	163
	164	#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
	165	SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
	166
	167	#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
	168	SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
	169
	170	#define PUSH_VAR(utf8, aptr, var, needs_swap) \
	171	PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
	172
	173	/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
	174	#define MAX_SUB_TEMPLATE_LEVEL 100
	175
	176	/* flags (note that type modifiers can also be used as flags!) */
	177	#define FLAG_WAS_UTF8 0x40
	178	#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
	179	#define FLAG_UNPACK_ONLY_ONE 0x10
	180	#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
	181	#define FLAG_SLASH 0x04
	182	#define FLAG_COMMA 0x02
	183	#define FLAG_PACK 0x01
	184
	185	STATIC SV *
	186	S_mul128(pTHX_ SV *sv, U8 m)
	187	{
	188	STRLEN len;
	189	char *s = SvPV(sv, len);
	190	char *t;
	191
	192	PERL_ARGS_ASSERT_MUL128;
	193
	194	if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
	195	SV * const tmpNew = newSVpvs("0000000000");
	196
	197	sv_catsv(tmpNew, sv);
	198	SvREFCNT_dec(sv); /* free old sv */
	199	sv = tmpNew;
	200	s = SvPV(sv, len);
	201	}
	202	t = s + len - 1;
	203	while (!t) / trailing '\0'? */
	204	t--;
	205	while (t > s) {
	206	const U32 i = ((*t - '0') << 7) + m;
	207	*(t--) = '0' + (char)(i % 10);
	208	m = (char)(i / 10);
	209	}
	210	return (sv);
	211	}
	212
	213	/* Explosives and implosives. */
	214
	215	#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
	216	&& NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
	217
	218	/* type modifiers */
	219	#define TYPE_IS_SHRIEKING 0x100
	220	#define TYPE_IS_BIG_ENDIAN 0x200
	221	#define TYPE_IS_LITTLE_ENDIAN 0x400
	222	#define TYPE_IS_PACK 0x800
	223	#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN\|TYPE_IS_LITTLE_ENDIAN)
	224	#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
	225	#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
	226
	227	# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
	228	# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
	229
	230	# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
	231
	232	#define PACK_SIZE_CANNOT_CSUM 0x80
	233	#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
	234	#define PACK_SIZE_MASK 0x3F
	235
	236	#include "packsizetables.inc"
	237
	238	static void
	239	S_reverse_copy(const char src, char dest, STRLEN len)
	240	{
	241	dest += len;
	242	while (len--)
	243	--dest = src++;
	244	}
	245
	246	STATIC U8
	247	utf8_to_byte(pTHX_ const char *s, const char end, I32 datumtype)
	248	{
	249	STRLEN retlen;
	250	UV val;
	251
	252	if (*s >= end) {
	253	goto croak;
	254	}
	255	val = utf8n_to_uvchr((U8 ) s, end-*s, &retlen,
	256	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	257	if (retlen == (STRLEN) -1)
	258	croak:
	259	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
	260	(int) TYPE_NO_MODIFIERS(datumtype));
	261	if (val >= 0x100) {
	262	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	263	"Character in '%c' format wrapped in unpack",
	264	(int) TYPE_NO_MODIFIERS(datumtype));
	265	val &= 0xff;
	266	}
	267	*s += retlen;
	268	return (U8)val;
	269	}
	270
	271	#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
	272	utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
	273	(U8 )(s)++)
	274
	275	STATIC bool
	276	S_utf8_to_bytes(pTHX_ const char *s, const char end, const char *buf, SSize_t buf_len, I32 datumtype)
	277	{
	278	UV val;
	279	STRLEN retlen;
	280	const char from = s;
	281	int bad = 0;
	282	const U32 flags = ckWARN(WARN_UTF8) ?
	283	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY \| UTF8_ALLOW_ANY);
	284	const bool needs_swap = NEEDS_SWAP(datumtype);
	285
	286	if (UNLIKELY(needs_swap))
	287	buf += buf_len;
	288
	289	for (;buf_len > 0; buf_len--) {
	290	if (from >= end) return FALSE;
	291	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
	292	if (retlen == (STRLEN) -1) {
	293	from += UTF8_SAFE_SKIP(from, end);
	294	bad \|= 1;
	295	} else from += retlen;
	296	if (val >= 0x100) {
	297	bad \|= 2;
	298	val &= 0xff;
	299	}
	300	if (UNLIKELY(needs_swap))
	301	(U8 )--buf = (U8)val;
	302	else
	303	(U8 )buf++ = (U8)val;
	304	}
	305	/* We have enough characters for the buffer. Did we have problems ? */
	306	if (bad) {
	307	if (bad & 1) {
	308	/* Rewalk the string fragment while warning */
	309	const char *ptr;
	310	const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
	311	for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
	312	if (ptr >= end) break;
	313	utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
	314	}
	315	if (from > end) from = end;
	316	}
	317	if ((bad & 2))
	318	Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
	319	WARN_PACK : WARN_UNPACK),
	320	"Character(s) in '%c' format wrapped in %s",
	321	(int) TYPE_NO_MODIFIERS(datumtype),
	322	datumtype & TYPE_IS_PACK ? "pack" : "unpack");
	323	}
	324	*s = from;
	325	return TRUE;
	326	}
	327
	328	STATIC char *
	329	S_my_bytes_to_utf8(const U8 start, STRLEN len, char dest, const bool needs_swap) {
	330	PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
	331
	332	if (UNLIKELY(needs_swap)) {
	333	const U8 *p = start + len;
	334	while (p-- > start) {
	335	append_utf8_from_native_byte(p, (U8 *) & dest);
	336	}
	337	} else {
	338	const U8 * const end = start + len;
	339	while (start < end) {
	340	append_utf8_from_native_byte(start, (U8 *) & dest);
	341	start++;
	342	}
	343	}
	344	return dest;
	345	}
	346
	347	#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
	348	STMT_START { \
	349	if (UNLIKELY(utf8)) \
	350	(cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
	351	else { \
	352	if (UNLIKELY(needs_swap)) \
	353	S_reverse_copy((char *)(buf), cur, len); \
	354	else \
	355	Copy(buf, cur, len, char); \
	356	(cur) += (len); \
	357	} \
	358	} STMT_END
	359
	360	#define SAFE_UTF8_EXPAND(var) \
	361	STMT_START { \
	362	if ((var) > SSize_t_MAX / UTF8_EXPAND) \
	363	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	364	(var) = (var) * UTF8_EXPAND; \
	365	} STMT_END
	366
	367	#define GROWING2(utf8, cat, start, cur, item_size, item_count) \
	368	STMT_START { \
	369	if (SSize_t_MAX / (item_size) < (item_count)) \
	370	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	371	GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
	372	} STMT_END
	373
	374	#define GROWING(utf8, cat, start, cur, in_len) \
	375	STMT_START { \
	376	STRLEN glen = (in_len); \
	377	STRLEN catcur = (STRLEN)((cur) - (start)); \
	378	if (utf8) SAFE_UTF8_EXPAND(glen); \
	379	if (SSize_t_MAX - glen < catcur) \
	380	Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
	381	if (catcur + glen >= SvLEN(cat)) { \
	382	(start) = sv_exp_grow(cat, glen); \
	383	(cur) = (start) + SvCUR(cat); \
	384	} \
	385	} STMT_END
	386
	387	#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
	388	STMT_START { \
	389	const STRLEN glen = (in_len); \
	390	STRLEN gl = glen; \
	391	if (utf8) SAFE_UTF8_EXPAND(gl); \
	392	if ((cur) + gl >= (start) + SvLEN(cat)) { \
	393	*cur = '\0'; \
	394	SvCUR_set((cat), (cur) - (start)); \
	395	(start) = sv_exp_grow(cat, gl); \
	396	(cur) = (start) + SvCUR(cat); \
	397	} \
	398	PUSH_BYTES(utf8, cur, buf, glen, 0); \
	399	} STMT_END
	400
	401	#define PUSH_BYTE(utf8, s, byte) \
	402	STMT_START { \
	403	if (utf8) { \
	404	const U8 au8 = (byte); \
	405	(s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
	406	} else (U8 )(s)++ = (byte); \
	407	} STMT_END
	408
	409	/* Only to be used inside a loop (see the break) */
	410	#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
	411	STMT_START { \
	412	STRLEN retlen; \
	413	if (str >= end) break; \
	414	val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
	415	if (retlen == (STRLEN) -1) { \
	416	*cur = '\0'; \
	417	Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
	418	} \
	419	str += retlen; \
	420	} STMT_END
	421
	422	static const char _action( const tempsym_t symptr )
	423	{
	424	return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
	425	}
	426
	427	/* Returns the sizeof() struct described by pat */
	428	STATIC SSize_t
	429	S_measure_struct(pTHX_ tempsym_t* symptr)
	430	{
	431	SSize_t total = 0;
	432
	433	PERL_ARGS_ASSERT_MEASURE_STRUCT;
	434
	435	while (next_symbol(symptr)) {
	436	SSize_t len, size;
	437
	438	switch (symptr->howlen) {
	439	case e_star:
	440	Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
	441	_action( symptr ) );
	442
	443	default:
	444	/* e_no_len and e_number */
	445	len = symptr->length;
	446	break;
	447	}
	448
	449	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
	450	if (!size) {
	451	SSize_t star;
	452	/* endianness doesn't influence the size of a type */
	453	switch(TYPE_NO_ENDIANNESS(symptr->code)) {
	454	default:
	455	Perl_croak(aTHX_ "Invalid type '%c' in %s",
	456	(int)TYPE_NO_MODIFIERS(symptr->code),
	457	_action( symptr ) );
	458	case '.' \| TYPE_IS_SHRIEKING:
	459	case '@' \| TYPE_IS_SHRIEKING:
	460	case '@':
	461	case '.':
	462	case '/':
	463	case 'U': /* XXXX Is it correct? */
	464	case 'w':
	465	case 'u':
	466	Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
	467	(int) TYPE_NO_MODIFIERS(symptr->code),
	468	_action( symptr ) );
	469	case '%':
	470	size = 0;
	471	break;
	472	case '(':
	473	{
	474	tempsym_t savsym = *symptr;
	475	symptr->patptr = savsym.grpbeg;
	476	symptr->patend = savsym.grpend;
	477	/* XXXX Theoretically, we need to measure many times at
	478	different positions, since the subexpression may contain
	479	alignment commands, but be not of aligned length.
	480	Need to detect this and croak(). */
	481	size = measure_struct(symptr);
	482	*symptr = savsym;
	483	break;
	484	}
	485	case 'X' \| TYPE_IS_SHRIEKING:
	486	/* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
	487	*/
	488	if (!len) /* Avoid division by 0 */
	489	len = 1;
	490	len = total % len; /* Assumed: the start is aligned. */
	491	/* FALLTHROUGH */
	492	case 'X':
	493	size = -1;
	494	if (total < len)
	495	Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
	496	break;
	497	case 'x' \| TYPE_IS_SHRIEKING:
	498	if (!len) /* Avoid division by 0 */
	499	len = 1;
	500	star = total % len; /* Assumed: the start is aligned. */
	501	if (star) /* Other portable ways? */
	502	len = len - star;
	503	else
	504	len = 0;
	505	/* FALLTHROUGH */
	506	case 'x':
	507	case 'A':
	508	case 'Z':
	509	case 'a':
	510	size = 1;
	511	break;
	512	case 'B':
	513	case 'b':
	514	len = (len + 7)/8;
	515	size = 1;
	516	break;
	517	case 'H':
	518	case 'h':
	519	len = (len + 1)/2;
	520	size = 1;
	521	break;
	522
	523	case 'P':
	524	len = 1;
	525	size = sizeof(char*);
	526	break;
	527	}
	528	}
	529	total += len * size;
	530	}
	531	return total;
	532	}
	533
	534
	535	/* locate matching closing parenthesis or bracket
	536	* returns char pointer to char after match, or NULL
	537	*/
	538	STATIC const char *
	539	S_group_end(pTHX_ const char patptr, const char patend, char ender)
	540	{
	541	PERL_ARGS_ASSERT_GROUP_END;
	542
	543	while (patptr < patend) {
	544	const char c = *patptr++;
	545
	546	if (isSPACE(c))
	547	continue;
	548	else if (c == ender)
	549	return patptr-1;
	550	else if (c == '#') {
	551	while (patptr < patend && *patptr != '\n')
	552	patptr++;
	553	continue;
	554	} else if (c == '(')
	555	patptr = group_end(patptr, patend, ')') + 1;
	556	else if (c == '[')
	557	patptr = group_end(patptr, patend, ']') + 1;
	558	}
	559	Perl_croak(aTHX_ "No group ending character '%c' found in template",
	560	ender);
	561	NOT_REACHED; /* NOTREACHED */
	562	}
	563
	564
	565	/* Convert unsigned decimal number to binary.
	566	* Expects a pointer to the first digit and address of length variable
	567	* Advances char pointer to 1st non-digit char and returns number
	568	*/
	569	STATIC const char *
	570	S_get_num(pTHX_ const char patptr, SSize_t lenptr )
	571	{
	572	SSize_t len = *patptr++ - '0';
	573
	574	PERL_ARGS_ASSERT_GET_NUM;
	575
	576	while (isDIGIT(*patptr)) {
	577	SSize_t nlen = (len * 10) + (*patptr++ - '0');
	578	if (nlen < 0 \|\| nlen/10 != len)
	579	Perl_croak(aTHX_ "pack/unpack repeat count overflow");
	580	len = nlen;
	581	}
	582	*lenptr = len;
	583	return patptr;
	584	}
	585
	586	/* The marvellous template parsing routine: Using state stored in *symptr,
	587	* locates next template code and count
	588	*/
	589	STATIC bool
	590	S_next_symbol(pTHX_ tempsym_t* symptr )
	591	{
	592	const char* patptr = symptr->patptr;
	593	const char* const patend = symptr->patend;
	594
	595	PERL_ARGS_ASSERT_NEXT_SYMBOL;
	596
	597	symptr->flags &= ~FLAG_SLASH;
	598
	599	while (patptr < patend) {
	600	if (isSPACE(*patptr))
	601	patptr++;
	602	else if (*patptr == '#') {
	603	patptr++;
	604	while (patptr < patend && *patptr != '\n')
	605	patptr++;
	606	if (patptr < patend)
	607	patptr++;
	608	} else {
	609	/* We should have found a template code */
	610	I32 code = *patptr++ & 0xFF;
	611	U32 inherited_modifiers = 0;
	612
	613	if (code == ','){ /* grandfather in commas but with a warning */
	614	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
	615	symptr->flags \|= FLAG_COMMA;
	616	Perl_warner(aTHX_ packWARN(WARN_UNPACK),
	617	"Invalid type ',' in %s", _action( symptr ) );
	618	}
	619	continue;
	620	}
	621
	622	/* for '(', skip to ')' */
	623	if (code == '(') {
	624	if( isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '[' )
	625	Perl_croak(aTHX_ "()-group starts with a count in %s",
	626	_action( symptr ) );
	627	symptr->grpbeg = patptr;
	628	patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
	629	if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
	630	Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
	631	_action( symptr ) );
	632	}
	633
	634	/* look for group modifiers to inherit */
	635	if (TYPE_ENDIANNESS(symptr->flags)) {
	636	if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
	637	inherited_modifiers \|= TYPE_ENDIANNESS(symptr->flags);
	638	}
	639
	640	/* look for modifiers */
	641	while (patptr < patend) {
	642	const char *allowed;
	643	I32 modifier;
	644	switch (*patptr) {
	645	case '!':
	646	modifier = TYPE_IS_SHRIEKING;
	647	allowed = "sSiIlLxXnNvV@.";
	648	break;
	649	case '>':
	650	modifier = TYPE_IS_BIG_ENDIAN;
	651	allowed = ENDIANNESS_ALLOWED_TYPES;
	652	break;
	653	case '<':
	654	modifier = TYPE_IS_LITTLE_ENDIAN;
	655	allowed = ENDIANNESS_ALLOWED_TYPES;
	656	break;
	657	default:
	658	allowed = "";
	659	modifier = 0;
	660	break;
	661	}
	662
	663	if (modifier == 0)
	664	break;
	665
	666	if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
	667	Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
	668	allowed, _action( symptr ) );
	669
	670	if (TYPE_ENDIANNESS(code \| modifier) == TYPE_ENDIANNESS_MASK)
	671	Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
	672	(int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
	673	else if (TYPE_ENDIANNESS(code \| modifier \| inherited_modifiers) ==
	674	TYPE_ENDIANNESS_MASK)
	675	Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
	676	*patptr, _action( symptr ) );
	677
	678	if ((code & modifier)) {
	679	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
	680	"Duplicate modifier '%c' after '%c' in %s",
	681	*patptr, (int) TYPE_NO_MODIFIERS(code),
	682	_action( symptr ) );
	683	}
	684
	685	code \|= modifier;
	686	patptr++;
	687	}
	688
	689	/* inherit modifiers */
	690	code \|= inherited_modifiers;
	691
	692	/* look for count and/or / */
	693	if (patptr < patend) {
	694	if (isDIGIT(*patptr)) {
	695	patptr = get_num( patptr, &symptr->length );
	696	symptr->howlen = e_number;
	697
	698	} else if (patptr == '') {
	699	patptr++;
	700	symptr->howlen = e_star;
	701
	702	} else if (*patptr == '[') {
	703	const char* lenptr = ++patptr;
	704	symptr->howlen = e_number;
	705	patptr = group_end( patptr, patend, ']' ) + 1;
	706	/* what kind of [] is it? */
	707	if (isDIGIT(*lenptr)) {
	708	lenptr = get_num( lenptr, &symptr->length );
	709	if( *lenptr != ']' )
	710	Perl_croak(aTHX_ "Malformed integer in [] in %s",
	711	_action( symptr ) );
	712	} else {
	713	tempsym_t savsym = *symptr;
	714	symptr->patend = patptr-1;
	715	symptr->patptr = lenptr;
	716	savsym.length = measure_struct(symptr);
	717	*symptr = savsym;
	718	}
	719	} else {
	720	symptr->howlen = e_no_len;
	721	symptr->length = 1;
	722	}
	723
	724	/* try to find / */
	725	while (patptr < patend) {
	726	if (isSPACE(*patptr))
	727	patptr++;
	728	else if (*patptr == '#') {
	729	patptr++;
	730	while (patptr < patend && *patptr != '\n')
	731	patptr++;
	732	if (patptr < patend)
	733	patptr++;
	734	} else {
	735	if (*patptr == '/') {
	736	symptr->flags \|= FLAG_SLASH;
	737	patptr++;
	738	if (patptr < patend &&
	739	(isDIGIT(patptr) \|\| patptr == '' \|\| patptr == '['))
	740	Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
	741	_action( symptr ) );
	742	}
	743	break;
	744	}
	745	}
	746	} else {
	747	/* at end - no count, no / */
	748	symptr->howlen = e_no_len;
	749	symptr->length = 1;
	750	}
	751
	752	symptr->code = code;
	753	symptr->patptr = patptr;
	754	return TRUE;
	755	}
	756	}
	757	symptr->patptr = patptr;
	758	return FALSE;
	759	}
	760
	761	/*
	762	There is no way to cleanly handle the case where we should process the
	763	string per byte in its upgraded form while it's really in downgraded form
	764	(e.g. estimates like strend-s as an upper bound for the number of
	765	characters left wouldn't work). So if we foresee the need of this
	766	(pattern starts with U or contains U0), we want to work on the encoded
	767	version of the string. Users are advised to upgrade their pack string
	768	themselves if they need to do a lot of unpacks like this on it
	769	*/
	770	STATIC bool
	771	need_utf8(const char pat, const char patend)
	772	{
	773	bool first = TRUE;
	774
	775	PERL_ARGS_ASSERT_NEED_UTF8;
	776
	777	while (pat < patend) {
	778	if (pat[0] == '#') {
	779	pat++;
	780	pat = (const char *) memchr(pat, '\n', patend-pat);
	781	if (!pat) return FALSE;
	782	} else if (pat[0] == 'U') {
	783	if (first \|\| pat[1] == '0') return TRUE;
	784	} else first = FALSE;
	785	pat++;
	786	}
	787	return FALSE;
	788	}
	789
	790	STATIC char
	791	first_symbol(const char pat, const char patend) {
	792	PERL_ARGS_ASSERT_FIRST_SYMBOL;
	793
	794	while (pat < patend) {
	795	if (pat[0] != '#') return pat[0];
	796	pat++;
	797	pat = (const char *) memchr(pat, '\n', patend-pat);
	798	if (!pat) return 0;
	799	pat++;
	800	}
	801	return 0;
	802	}
	803
	804	/*
	805
	806	=head1 Pack and Unpack
	807
	808	=for apidoc unpackstring
	809
	810	The engine implementing the C<unpack()> Perl function.
	811
	812	Using the template C<pat..patend>, this function unpacks the string
	813	C<s..strend> into a number of mortal SVs, which it pushes onto the perl
	814	argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
	815	C<SPAGAIN> after the call to this function). It returns the number of
	816	pushed elements.
	817
	818	The C<strend> and C<patend> pointers should point to the byte following the
	819	last character of each string.
	820
	821	Although this function returns its values on the perl argument stack, it
	822	doesn't take any parameters from that stack (and thus in particular
	823	there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
	824	example).
	825
	826	=cut */
	827
	828	SSize_t
	829	Perl_unpackstring(pTHX_ const char pat, const char patend, const char s, const char strend, U32 flags)
	830	{
	831	tempsym_t sym;
	832
	833	PERL_ARGS_ASSERT_UNPACKSTRING;
	834
	835	if (flags & FLAG_DO_UTF8) flags \|= FLAG_WAS_UTF8;
	836	else if (need_utf8(pat, patend)) {
	837	/* We probably should try to avoid this in case a scalar context call
	838	wouldn't get to the "U0" */
	839	STRLEN len = strend - s;
	840	s = (char ) bytes_to_utf8((U8 ) s, &len);
	841	SAVEFREEPV(s);
	842	strend = s + len;
	843	flags \|= FLAG_DO_UTF8;
	844	}
	845
	846	if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
	847	flags \|= FLAG_PARSE_UTF8;
	848
	849	TEMPSYM_INIT(&sym, pat, patend, flags);
	850
	851	return unpack_rec(&sym, s, s, strend, NULL );
	852	}
	853
	854	STATIC SSize_t
	855	S_unpack_rec(pTHX_ tempsym_t* symptr, const char s, const char strbeg, const char strend, const char *new_s )
	856	{
	857	dSP;
	858	SV *sv = NULL;
	859	const SSize_t start_sp_offset = SP - PL_stack_base;
	860	howlen_t howlen;
	861	SSize_t checksum = 0;
	862	UV cuv = 0;
	863	NV cdouble = 0.0;
	864	const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
	865	bool beyond = FALSE;
	866	bool explicit_length;
	867	const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
	868	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	869
	870	PERL_ARGS_ASSERT_UNPACK_REC;
	871
	872	symptr->strbeg = s - strbeg;
	873
	874	while (next_symbol(symptr)) {
	875	packprops_t props;
	876	SSize_t len;
	877	I32 datumtype = symptr->code;
	878	bool needs_swap;
	879	/* do first one only unless in list context
	880	/ is implemented by unpacking the count, then popping it from the
	881	stack, so must check that we're not in the middle of a / */
	882	if ( unpack_only_one
	883	&& (SP - PL_stack_base == start_sp_offset + 1)
	884	&& (datumtype != '/') ) /* XXX can this be omitted */
	885	break;
	886
	887	switch (howlen = symptr->howlen) {
	888	case e_star:
	889	len = strend - strbeg; /* long enough */
	890	break;
	891	default:
	892	/* e_no_len and e_number */
	893	len = symptr->length;
	894	break;
	895	}
	896
	897	explicit_length = TRUE;
	898	redo_switch:
	899	beyond = s >= strend;
	900
	901	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	902	if (props) {
	903	/* props nonzero means we can process this letter. */
	904	const SSize_t size = props & PACK_SIZE_MASK;
	905	const SSize_t howmany = (strend - s) / size;
	906	if (len > howmany)
	907	len = howmany;
	908
	909	if (!checksum \|\| (props & PACK_SIZE_CANNOT_CSUM)) {
	910	if (len && unpack_only_one) len = 1;
	911	EXTEND(SP, len);
	912	EXTEND_MORTAL(len);
	913	}
	914	}
	915
	916	needs_swap = NEEDS_SWAP(datumtype);
	917
	918	switch(TYPE_NO_ENDIANNESS(datumtype)) {
	919	default:
	920	Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
	921
	922	case '%':
	923	if (howlen == e_no_len)
	924	len = 16; /* len is not specified */
	925	checksum = len;
	926	cuv = 0;
	927	cdouble = 0;
	928	continue;
	929
	930	case '(':
	931	{
	932	tempsym_t savsym = *symptr;
	933	const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	934	symptr->flags \|= group_modifiers;
	935	symptr->patend = savsym.grpend;
	936	symptr->previous = &savsym;
	937	symptr->level++;
	938	PUTBACK;
	939	if (len && unpack_only_one) len = 1;
	940	while (len--) {
	941	symptr->patptr = savsym.grpbeg;
	942	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	943	else symptr->flags &= ~FLAG_PARSE_UTF8;
	944	unpack_rec(symptr, s, strbeg, strend, &s);
	945	if (s == strend && savsym.howlen == e_star)
	946	break; /* No way to continue */
	947	}
	948	SPAGAIN;
	949	savsym.flags = symptr->flags & ~group_modifiers;
	950	*symptr = savsym;
	951	break;
	952	}
	953	case '.' \| TYPE_IS_SHRIEKING:
	954	case '.': {
	955	const char *from;
	956	SV *sv;
	957	const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
	958	if (howlen == e_star) from = strbeg;
	959	else if (len <= 0) from = s;
	960	else {
	961	tempsym_t *group = symptr;
	962
	963	while (--len && group) group = group->previous;
	964	from = group ? strbeg + group->strbeg : strbeg;
	965	}
	966	sv = from <= s ?
	967	newSVuv( u8 ? (UV) utf8_length((const U8)from, (const U8)s) : (UV) (s-from)) :
	968	newSViv(-(u8 ? (IV) utf8_length((const U8)s, (const U8)from) : (IV) (from-s)));
	969	mXPUSHs(sv);
	970	break;
	971	}
	972	case '@' \| TYPE_IS_SHRIEKING:
	973	case '@':
	974	s = strbeg + symptr->strbeg;
	975	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	976	{
	977	while (len > 0) {
	978	if (s >= strend)
	979	Perl_croak(aTHX_ "'@' outside of string in unpack");
	980	s += UTF8SKIP(s);
	981	len--;
	982	}
	983	if (s > strend)
	984	Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
	985	} else {
	986	if (strend-s < len)
	987	Perl_croak(aTHX_ "'@' outside of string in unpack");
	988	s += len;
	989	}
	990	break;
	991	case 'X' \| TYPE_IS_SHRIEKING:
	992	if (!len) /* Avoid division by 0 */
	993	len = 1;
	994	if (utf8) {
	995	const char hop, last;
	996	SSize_t l = len;
	997	hop = last = strbeg;
	998	while (hop < s) {
	999	hop += UTF8SKIP(hop);
	1000	if (--l == 0) {
	1001	last = hop;
	1002	l = len;
	1003	}
	1004	}
	1005	if (last > s)
	1006	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1007	s = last;
	1008	break;
	1009	}
	1010	len = (s - strbeg) % len;
	1011	/* FALLTHROUGH */
	1012	case 'X':
	1013	if (utf8) {
	1014	while (len > 0) {
	1015	if (s <= strbeg)
	1016	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1017	while (--s, UTF8_IS_CONTINUATION(*s)) {
	1018	if (s <= strbeg)
	1019	Perl_croak(aTHX_ "'X' outside of string in unpack");
	1020	}
	1021	len--;
	1022	}
	1023	} else {
	1024	if (len > s - strbeg)
	1025	Perl_croak(aTHX_ "'X' outside of string in unpack" );
	1026	s -= len;
	1027	}
	1028	break;
	1029	case 'x' \| TYPE_IS_SHRIEKING: {
	1030	SSize_t ai32;
	1031	if (!len) /* Avoid division by 0 */
	1032	len = 1;
	1033	if (utf8) ai32 = utf8_length((U8 ) strbeg, (U8 ) s) % len;
	1034	else ai32 = (s - strbeg) % len;
	1035	if (ai32 == 0) break;
	1036	len -= ai32;
	1037	}
	1038	/* FALLTHROUGH */
	1039	case 'x':
	1040	if (utf8) {
	1041	while (len>0) {
	1042	if (s >= strend)
	1043	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1044	s += UTF8SKIP(s);
	1045	len--;
	1046	}
	1047	} else {
	1048	if (len > strend - s)
	1049	Perl_croak(aTHX_ "'x' outside of string in unpack");
	1050	s += len;
	1051	}
	1052	break;
	1053	case '/':
	1054	Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
	1055
	1056	case 'A':
	1057	case 'Z':
	1058	case 'a':
	1059	if (checksum) {
	1060	/* Preliminary length estimate is assumed done in 'W' */
	1061	if (len > strend - s) len = strend - s;
	1062	goto W_checksum;
	1063	}
	1064	if (utf8) {
	1065	SSize_t l;
	1066	const char *hop;
	1067	for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
	1068	if (hop >= strend) {
	1069	if (hop > strend)
	1070	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1071	break;
	1072	}
	1073	}
	1074	if (hop > strend)
	1075	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1076	len = hop - s;
	1077	} else if (len > strend - s)
	1078	len = strend - s;
	1079
	1080	if (datumtype == 'Z') {
	1081	/* 'Z' strips stuff after first null */
	1082	const char ptr, end;
	1083	end = s + len;
	1084	for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
	1085	sv = newSVpvn(s, ptr-s);
	1086	if (howlen == e_star) /* exact for 'Z' /
	1087	len = ptr-s + (ptr != strend ? 1 : 0);
	1088	} else if (datumtype == 'A') {
	1089	/* 'A' strips both nulls and spaces */
	1090	const char *ptr;
	1091	if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
	1092	for (ptr = s+len-1; ptr >= s; ptr--) {
	1093	if ( *ptr != 0
	1094	&& !UTF8_IS_CONTINUATION(*ptr)
	1095	&& !isSPACE_utf8_safe(ptr, strend))
	1096	{
	1097	break;
	1098	}
	1099	}
	1100	if (ptr >= s) ptr += UTF8SKIP(ptr);
	1101	else ptr++;
	1102	if (ptr > s+len)
	1103	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1104	} else {
	1105	for (ptr = s+len-1; ptr >= s; ptr--)
	1106	if (ptr != 0 && !isSPACE(ptr)) break;
	1107	ptr++;
	1108	}
	1109	sv = newSVpvn(s, ptr-s);
	1110	} else sv = newSVpvn(s, len);
	1111
	1112	if (utf8) {
	1113	SvUTF8_on(sv);
	1114	/* Undo any upgrade done due to need_utf8() */
	1115	if (!(symptr->flags & FLAG_WAS_UTF8))
	1116	sv_utf8_downgrade(sv, 0);
	1117	}
	1118	mXPUSHs(sv);
	1119	s += len;
	1120	break;
	1121	case 'B':
	1122	case 'b': {
	1123	char *str;
	1124	if (howlen == e_star \|\| len > (strend - s) * 8)
	1125	len = (strend - s) * 8;
	1126	if (checksum) {
	1127	if (utf8)
	1128	while (len >= 8 && s < strend) {
	1129	cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
	1130	len -= 8;
	1131	}
	1132	else
	1133	while (len >= 8) {
	1134	cuv += PL_bitcount[(U8 )s++];
	1135	len -= 8;
	1136	}
	1137	if (len && s < strend) {
	1138	U8 bits;
	1139	bits = SHIFT_BYTE(utf8, s, strend, datumtype);
	1140	if (datumtype == 'b')
	1141	while (len-- > 0) {
	1142	if (bits & 1) cuv++;
	1143	bits >>= 1;
	1144	}
	1145	else
	1146	while (len-- > 0) {
	1147	if (bits & 0x80) cuv++;
	1148	bits <<= 1;
	1149	}
	1150	}
	1151	break;
	1152	}
	1153
	1154	sv = sv_2mortal(newSV(len ? len : 1));
	1155	SvPOK_on(sv);
	1156	str = SvPVX(sv);
	1157	if (datumtype == 'b') {
	1158	U8 bits = 0;
	1159	const SSize_t ai32 = len;
	1160	for (len = 0; len < ai32; len++) {
	1161	if (len & 7) bits >>= 1;
	1162	else if (utf8) {
	1163	if (s >= strend) break;
	1164	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1165	} else bits = (U8 ) s++;
	1166	*str++ = bits & 1 ? '1' : '0';
	1167	}
	1168	} else {
	1169	U8 bits = 0;
	1170	const SSize_t ai32 = len;
	1171	for (len = 0; len < ai32; len++) {
	1172	if (len & 7) bits <<= 1;
	1173	else if (utf8) {
	1174	if (s >= strend) break;
	1175	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1176	} else bits = (U8 ) s++;
	1177	*str++ = bits & 0x80 ? '1' : '0';
	1178	}
	1179	}
	1180	*str = '\0';
	1181	SvCUR_set(sv, str - SvPVX_const(sv));
	1182	XPUSHs(sv);
	1183	break;
	1184	}
	1185	case 'H':
	1186	case 'h': {
	1187	char *str = NULL;
	1188	/* Preliminary length estimate, acceptable for utf8 too */
	1189	if (howlen == e_star \|\| len > (strend - s) * 2)
	1190	len = (strend - s) * 2;
	1191	if (!checksum) {
	1192	sv = sv_2mortal(newSV(len ? len : 1));
	1193	SvPOK_on(sv);
	1194	str = SvPVX(sv);
	1195	}
	1196	if (datumtype == 'h') {
	1197	U8 bits = 0;
	1198	SSize_t ai32 = len;
	1199	for (len = 0; len < ai32; len++) {
	1200	if (len & 1) bits >>= 4;
	1201	else if (utf8) {
	1202	if (s >= strend) break;
	1203	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1204	} else bits = * (U8 *) s++;
	1205	if (!checksum)
	1206	*str++ = PL_hexdigit[bits & 15];
	1207	}
	1208	} else {
	1209	U8 bits = 0;
	1210	const SSize_t ai32 = len;
	1211	for (len = 0; len < ai32; len++) {
	1212	if (len & 1) bits <<= 4;
	1213	else if (utf8) {
	1214	if (s >= strend) break;
	1215	bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
	1216	} else bits = (U8 ) s++;
	1217	if (!checksum)
	1218	*str++ = PL_hexdigit[(bits >> 4) & 15];
	1219	}
	1220	}
	1221	if (!checksum) {
	1222	*str = '\0';
	1223	SvCUR_set(sv, str - SvPVX_const(sv));
	1224	XPUSHs(sv);
	1225	}
	1226	break;
	1227	}
	1228	case 'C':
	1229	if (len == 0) {
	1230	if (explicit_length)
	1231	/* Switch to "character" mode */
	1232	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	1233	break;
	1234	}
	1235	/* FALLTHROUGH */
	1236	case 'c':
	1237	while (len-- > 0 && s < strend) {
	1238	int aint;
	1239	if (utf8)
	1240	{
	1241	STRLEN retlen;
	1242	aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1243	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1244	if (retlen == (STRLEN) -1)
	1245	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1246	s += retlen;
	1247	}
	1248	else
	1249	aint = (U8 )(s)++;
	1250	if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
	1251	aint -= 256;
	1252	if (!checksum)
	1253	mPUSHi(aint);
	1254	else if (checksum > bits_in_uv)
	1255	cdouble += (NV)aint;
	1256	else
	1257	cuv += aint;
	1258	}
	1259	break;
	1260	case 'W':
	1261	W_checksum:
	1262	if (utf8) {
	1263	while (len-- > 0 && s < strend) {
	1264	STRLEN retlen;
	1265	const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
	1266	ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
	1267	if (retlen == (STRLEN) -1)
	1268	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1269	s += retlen;
	1270	if (!checksum)
	1271	mPUSHu(val);
	1272	else if (checksum > bits_in_uv)
	1273	cdouble += (NV) val;
	1274	else
	1275	cuv += val;
	1276	}
	1277	} else if (!checksum)
	1278	while (len-- > 0) {
	1279	const U8 ch = (U8 ) s++;
	1280	mPUSHu(ch);
	1281	}
	1282	else if (checksum > bits_in_uv)
	1283	while (len-- > 0) cdouble += (NV) (U8 ) s++;
	1284	else
	1285	while (len-- > 0) cuv += (U8 ) s++;
	1286	break;
	1287	case 'U':
	1288	if (len == 0) {
	1289	if (explicit_length && howlen != e_star) {
	1290	/* Switch to "bytes in UTF-8" mode */
	1291	if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
	1292	else
	1293	/* Should be impossible due to the need_utf8() test */
	1294	Perl_croak(aTHX_ "U0 mode on a byte string");
	1295	}
	1296	break;
	1297	}
	1298	if (len > strend - s) len = strend - s;
	1299	if (!checksum) {
	1300	if (len && unpack_only_one) len = 1;
	1301	EXTEND(SP, len);
	1302	EXTEND_MORTAL(len);
	1303	}
	1304	while (len-- > 0 && s < strend) {
	1305	STRLEN retlen;
	1306	UV auv;
	1307	if (utf8) {
	1308	U8 result[UTF8_MAXLEN+1];
	1309	const char *ptr = s;
	1310	STRLEN len;
	1311	/* Bug: warns about bad utf8 even if we are short on bytes
	1312	and will break out of the loop */
	1313	if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
	1314	'U'))
	1315	break;
	1316	len = UTF8SKIP(result);
	1317	if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
	1318	(char *) &result[1], len-1, 'U')) break;
	1319	auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
	1320	len,
	1321	&retlen,
	1322	UTF8_ALLOW_DEFAULT));
	1323	s = ptr;
	1324	} else {
	1325	auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
	1326	strend - s,
	1327	&retlen,
	1328	UTF8_ALLOW_DEFAULT));
	1329	if (retlen == (STRLEN) -1)
	1330	Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
	1331	s += retlen;
	1332	}
	1333	if (!checksum)
	1334	mPUSHu(auv);
	1335	else if (checksum > bits_in_uv)
	1336	cdouble += (NV) auv;
	1337	else
	1338	cuv += auv;
	1339	}
	1340	break;
	1341	case 's' \| TYPE_IS_SHRIEKING:
	1342	#if SHORTSIZE != SIZE16
	1343	while (len-- > 0) {
	1344	short ashort;
	1345	SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
	1346	if (!checksum)
	1347	mPUSHi(ashort);
	1348	else if (checksum > bits_in_uv)
	1349	cdouble += (NV)ashort;
	1350	else
	1351	cuv += ashort;
	1352	}
	1353	break;
	1354	#else
	1355	/* FALLTHROUGH */
	1356	#endif
	1357	case 's':
	1358	while (len-- > 0) {
	1359	I16 ai16;
	1360
	1361	#if U16SIZE > SIZE16
	1362	ai16 = 0;
	1363	#endif
	1364	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1365	#if U16SIZE > SIZE16
	1366	if (ai16 > 32767)
	1367	ai16 -= 65536;
	1368	#endif
	1369	if (!checksum)
	1370	mPUSHi(ai16);
	1371	else if (checksum > bits_in_uv)
	1372	cdouble += (NV)ai16;
	1373	else
	1374	cuv += ai16;
	1375	}
	1376	break;
	1377	case 'S' \| TYPE_IS_SHRIEKING:
	1378	#if SHORTSIZE != SIZE16
	1379	while (len-- > 0) {
	1380	unsigned short aushort;
	1381	SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
	1382	if (!checksum)
	1383	mPUSHu(aushort);
	1384	else if (checksum > bits_in_uv)
	1385	cdouble += (NV)aushort;
	1386	else
	1387	cuv += aushort;
	1388	}
	1389	break;
	1390	#else
	1391	/* FALLTHROUGH */
	1392	#endif
	1393	case 'v':
	1394	case 'n':
	1395	case 'S':
	1396	while (len-- > 0) {
	1397	U16 au16;
	1398	#if U16SIZE > SIZE16
	1399	au16 = 0;
	1400	#endif
	1401	SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
	1402	if (datumtype == 'n')
	1403	au16 = PerlSock_ntohs(au16);
	1404	if (datumtype == 'v')
	1405	au16 = vtohs(au16);
	1406	if (!checksum)
	1407	mPUSHu(au16);
	1408	else if (checksum > bits_in_uv)
	1409	cdouble += (NV) au16;
	1410	else
	1411	cuv += au16;
	1412	}
	1413	break;
	1414	case 'v' \| TYPE_IS_SHRIEKING:
	1415	case 'n' \| TYPE_IS_SHRIEKING:
	1416	while (len-- > 0) {
	1417	I16 ai16;
	1418	# if U16SIZE > SIZE16
	1419	ai16 = 0;
	1420	# endif
	1421	SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
	1422	/* There should never be any byte-swapping here. */
	1423	assert(!TYPE_ENDIANNESS(datumtype));
	1424	if (datumtype == ('n' \| TYPE_IS_SHRIEKING))
	1425	ai16 = (I16) PerlSock_ntohs((U16) ai16);
	1426	if (datumtype == ('v' \| TYPE_IS_SHRIEKING))
	1427	ai16 = (I16) vtohs((U16) ai16);
	1428	if (!checksum)
	1429	mPUSHi(ai16);
	1430	else if (checksum > bits_in_uv)
	1431	cdouble += (NV) ai16;
	1432	else
	1433	cuv += ai16;
	1434	}
	1435	break;
	1436	case 'i':
	1437	case 'i' \| TYPE_IS_SHRIEKING:
	1438	while (len-- > 0) {
	1439	int aint;
	1440	SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
	1441	if (!checksum)
	1442	mPUSHi(aint);
	1443	else if (checksum > bits_in_uv)
	1444	cdouble += (NV)aint;
	1445	else
	1446	cuv += aint;
	1447	}
	1448	break;
	1449	case 'I':
	1450	case 'I' \| TYPE_IS_SHRIEKING:
	1451	while (len-- > 0) {
	1452	unsigned int auint;
	1453	SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
	1454	if (!checksum)
	1455	mPUSHu(auint);
	1456	else if (checksum > bits_in_uv)
	1457	cdouble += (NV)auint;
	1458	else
	1459	cuv += auint;
	1460	}
	1461	break;
	1462	case 'j':
	1463	while (len-- > 0) {
	1464	IV aiv;
	1465	SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
	1466	if (!checksum)
	1467	mPUSHi(aiv);
	1468	else if (checksum > bits_in_uv)
	1469	cdouble += (NV)aiv;
	1470	else
	1471	cuv += aiv;
	1472	}
	1473	break;
	1474	case 'J':
	1475	while (len-- > 0) {
	1476	UV auv;
	1477	SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
	1478	if (!checksum)
	1479	mPUSHu(auv);
	1480	else if (checksum > bits_in_uv)
	1481	cdouble += (NV)auv;
	1482	else
	1483	cuv += auv;
	1484	}
	1485	break;
	1486	case 'l' \| TYPE_IS_SHRIEKING:
	1487	#if LONGSIZE != SIZE32
	1488	while (len-- > 0) {
	1489	long along;
	1490	SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
	1491	if (!checksum)
	1492	mPUSHi(along);
	1493	else if (checksum > bits_in_uv)
	1494	cdouble += (NV)along;
	1495	else
	1496	cuv += along;
	1497	}
	1498	break;
	1499	#else
	1500	/* FALLTHROUGH */
	1501	#endif
	1502	case 'l':
	1503	while (len-- > 0) {
	1504	I32 ai32;
	1505	#if U32SIZE > SIZE32
	1506	ai32 = 0;
	1507	#endif
	1508	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1509	#if U32SIZE > SIZE32
	1510	if (ai32 > 2147483647) ai32 -= 4294967296;
	1511	#endif
	1512	if (!checksum)
	1513	mPUSHi(ai32);
	1514	else if (checksum > bits_in_uv)
	1515	cdouble += (NV)ai32;
	1516	else
	1517	cuv += ai32;
	1518	}
	1519	break;
	1520	case 'L' \| TYPE_IS_SHRIEKING:
	1521	#if LONGSIZE != SIZE32
	1522	while (len-- > 0) {
	1523	unsigned long aulong;
	1524	SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
	1525	if (!checksum)
	1526	mPUSHu(aulong);
	1527	else if (checksum > bits_in_uv)
	1528	cdouble += (NV)aulong;
	1529	else
	1530	cuv += aulong;
	1531	}
	1532	break;
	1533	#else
	1534	/* FALLTHROUGH */
	1535	#endif
	1536	case 'V':
	1537	case 'N':
	1538	case 'L':
	1539	while (len-- > 0) {
	1540	U32 au32;
	1541	#if U32SIZE > SIZE32
	1542	au32 = 0;
	1543	#endif
	1544	SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
	1545	if (datumtype == 'N')
	1546	au32 = PerlSock_ntohl(au32);
	1547	if (datumtype == 'V')
	1548	au32 = vtohl(au32);
	1549	if (!checksum)
	1550	mPUSHu(au32);
	1551	else if (checksum > bits_in_uv)
	1552	cdouble += (NV)au32;
	1553	else
	1554	cuv += au32;
	1555	}
	1556	break;
	1557	case 'V' \| TYPE_IS_SHRIEKING:
	1558	case 'N' \| TYPE_IS_SHRIEKING:
	1559	while (len-- > 0) {
	1560	I32 ai32;
	1561	#if U32SIZE > SIZE32
	1562	ai32 = 0;
	1563	#endif
	1564	SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
	1565	/* There should never be any byte swapping here. */
	1566	assert(!TYPE_ENDIANNESS(datumtype));
	1567	if (datumtype == ('N' \| TYPE_IS_SHRIEKING))
	1568	ai32 = (I32)PerlSock_ntohl((U32)ai32);
	1569	if (datumtype == ('V' \| TYPE_IS_SHRIEKING))
	1570	ai32 = (I32)vtohl((U32)ai32);
	1571	if (!checksum)
	1572	mPUSHi(ai32);
	1573	else if (checksum > bits_in_uv)
	1574	cdouble += (NV)ai32;
	1575	else
	1576	cuv += ai32;
	1577	}
	1578	break;
	1579	case 'p':
	1580	while (len-- > 0) {
	1581	const char *aptr;
	1582	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1583	/* newSVpv generates undef if aptr is NULL */
	1584	mPUSHs(newSVpv(aptr, 0));
	1585	}
	1586	break;
	1587	case 'w':
	1588	{
	1589	UV auv = 0;
	1590	size_t bytes = 0;
	1591
	1592	while (len > 0 && s < strend) {
	1593	U8 ch;
	1594	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1595	auv = (auv << 7) \| (ch & 0x7f);
	1596	/* UTF8_IS_XXXXX not right here because this is a BER, not
	1597	* UTF-8 format - using constant 0x80 */
	1598	if (ch < 0x80) {
	1599	bytes = 0;
	1600	mPUSHu(auv);
	1601	len--;
	1602	auv = 0;
	1603	continue;
	1604	}
	1605	if (++bytes >= sizeof(UV)) { /* promote to string */
	1606	const char *t;
	1607
	1608	sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
	1609	(int)TYPE_DIGITS(UV), auv);
	1610	while (s < strend) {
	1611	ch = SHIFT_BYTE(utf8, s, strend, datumtype);
	1612	sv = mul128(sv, (U8)(ch & 0x7f));
	1613	if (!(ch & 0x80)) {
	1614	bytes = 0;
	1615	break;
	1616	}
	1617	}
	1618	t = SvPV_nolen_const(sv);
	1619	while (*t == '0')
	1620	t++;
	1621	sv_chop(sv, t);
	1622	mPUSHs(sv);
	1623	len--;
	1624	auv = 0;
	1625	}
	1626	}
	1627	if ((s >= strend) && bytes)
	1628	Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
	1629	}
	1630	break;
	1631	case 'P':
	1632	if (symptr->howlen == e_star)
	1633	Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
	1634	EXTEND(SP, 1);
	1635	if (s + sizeof(char*) <= strend) {
	1636	char *aptr;
	1637	SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
	1638	/* newSVpvn generates undef if aptr is NULL */
	1639	PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
	1640	}
	1641	break;
	1642	#if defined(HAS_QUAD) && IVSIZE >= 8
	1643	case 'q':
	1644	while (len-- > 0) {
	1645	Quad_t aquad;
	1646	SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
	1647	if (!checksum)
	1648	mPUSHs(newSViv((IV)aquad));
	1649	else if (checksum > bits_in_uv)
	1650	cdouble += (NV)aquad;
	1651	else
	1652	cuv += aquad;
	1653	}
	1654	break;
	1655	case 'Q':
	1656	while (len-- > 0) {
	1657	Uquad_t auquad;
	1658	SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
	1659	if (!checksum)
	1660	mPUSHs(newSVuv((UV)auquad));
	1661	else if (checksum > bits_in_uv)
	1662	cdouble += (NV)auquad;
	1663	else
	1664	cuv += auquad;
	1665	}
	1666	break;
	1667	#endif
	1668	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
	1669	case 'f':
	1670	while (len-- > 0) {
	1671	float afloat;
	1672	SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
	1673	if (!checksum)
	1674	mPUSHn(afloat);
	1675	else
	1676	cdouble += afloat;
	1677	}
	1678	break;
	1679	case 'd':
	1680	while (len-- > 0) {
	1681	double adouble;
	1682	SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
	1683	if (!checksum)
	1684	mPUSHn(adouble);
	1685	else
	1686	cdouble += adouble;
	1687	}
	1688	break;
	1689	case 'F':
	1690	while (len-- > 0) {
	1691	NV_bytes anv;
	1692	SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
	1693	datumtype, needs_swap);
	1694	if (!checksum)
	1695	mPUSHn(anv.nv);
	1696	else
	1697	cdouble += anv.nv;
	1698	}
	1699	break;
	1700	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	1701	case 'D':
	1702	while (len-- > 0) {
	1703	ld_bytes aldouble;
	1704	SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
	1705	sizeof(aldouble.bytes), datumtype, needs_swap);
	1706	/* The most common long double format, the x86 80-bit
	1707	* extended precision, has either 2 or 6 unused bytes,
	1708	* which may contain garbage, which may contain
	1709	* unintentional data. While we do zero the bytes of
	1710	* the long double data in pack(), here in unpack() we
	1711	* don't, because it's really hard to envision that
	1712	* reading the long double off aldouble would be
	1713	* affected by the unused bytes.
	1714	*
	1715	* Note that trying to unpack 'long doubles' of 'long
	1716	* doubles' packed in another system is in the general
	1717	* case doomed without having more detail. */
	1718	if (!checksum)
	1719	mPUSHn(aldouble.ld);
	1720	else
	1721	cdouble += aldouble.ld;
	1722	}
	1723	break;
	1724	#endif
	1725	case 'u':
	1726	if (!checksum) {
	1727	const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
	1728	sv = sv_2mortal(newSV(l));
	1729	if (l) {
	1730	SvPOK_on(sv);
	1731	*SvEND(sv) = '\0';
	1732	}
	1733	}
	1734
	1735	/* Note that all legal uuencoded strings are ASCII printables, so
	1736	* have the same representation under UTF-8 vs not. This means we
	1737	* can ignore UTF8ness on legal input. For illegal we stop at the
	1738	* first failure, and don't report where/what that is, so again we
	1739	* can ignore UTF8ness */
	1740
	1741	while (s < strend && s != ' ' && ISUUCHAR(s)) {
	1742	I32 a, b, c, d;
	1743	char hunk[3];
	1744
	1745	len = PL_uudmap[(U8)s++] & 077;
	1746	while (len > 0) {
	1747	if (s < strend && ISUUCHAR(*s))
	1748	a = PL_uudmap[(U8)s++] & 077;
	1749	else
	1750	a = 0;
	1751	if (s < strend && ISUUCHAR(*s))
	1752	b = PL_uudmap[(U8)s++] & 077;
	1753	else
	1754	b = 0;
	1755	if (s < strend && ISUUCHAR(*s))
	1756	c = PL_uudmap[(U8)s++] & 077;
	1757	else
	1758	c = 0;
	1759	if (s < strend && ISUUCHAR(*s))
	1760	d = PL_uudmap[(U8)s++] & 077;
	1761	else
	1762	d = 0;
	1763	hunk[0] = (char)((a << 2) \| (b >> 4));
	1764	hunk[1] = (char)((b << 4) \| (c >> 2));
	1765	hunk[2] = (char)((c << 6) \| d);
	1766	if (!checksum)
	1767	sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
	1768	len -= 3;
	1769	}
	1770	if (*s == '\n')
	1771	s++;
	1772	else /* possible checksum byte */
	1773	if (s + 1 < strend && s[1] == '\n')
	1774	s += 2;
	1775	}
	1776	if (!checksum)
	1777	XPUSHs(sv);
	1778	break;
	1779	} /* End of switch */
	1780
	1781	if (checksum) {
	1782	if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) \|\|
	1783	(checksum > bits_in_uv &&
	1784	strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
	1785	NV trouble, anv;
	1786
	1787	anv = (NV) (1 << (checksum & 15));
	1788	while (checksum >= 16) {
	1789	checksum -= 16;
	1790	anv *= 65536.0;
	1791	}
	1792	while (cdouble < 0.0)
	1793	cdouble += anv;
	1794	cdouble = Perl_modf(cdouble / anv, &trouble);
	1795	#ifdef LONGDOUBLE_DOUBLEDOUBLE
	1796	/* Workaround for powerpc doubledouble modfl bug:
	1797	* close to 1.0L and -1.0L cdouble is 0, and trouble
	1798	* is cdouble / anv. */
	1799	if (trouble != Perl_ceil(trouble)) {
	1800	cdouble = trouble;
	1801	if (cdouble > 1.0L) cdouble -= 1.0L;
	1802	if (cdouble < -1.0L) cdouble += 1.0L;
	1803	}
	1804	#endif
	1805	cdouble *= anv;
	1806	sv = newSVnv(cdouble);
	1807	}
	1808	else {
	1809	if (checksum < bits_in_uv) {
	1810	UV mask = ((UV)1 << checksum) - 1;
	1811	cuv &= mask;
	1812	}
	1813	sv = newSVuv(cuv);
	1814	}
	1815	mXPUSHs(sv);
	1816	checksum = 0;
	1817	}
	1818
	1819	if (symptr->flags & FLAG_SLASH){
	1820	if (SP - PL_stack_base - start_sp_offset <= 0)
	1821	break;
	1822	if( next_symbol(symptr) ){
	1823	if( symptr->howlen == e_number )
	1824	Perl_croak(aTHX_ "Count after length/code in unpack" );
	1825	if( beyond ){
	1826	/* ...end of char buffer then no decent length available */
	1827	Perl_croak(aTHX_ "length/code after end of string in unpack" );
	1828	} else {
	1829	/* take top of stack (hope it's numeric) */
	1830	len = POPi;
	1831	if( len < 0 )
	1832	Perl_croak(aTHX_ "Negative '/' count in unpack" );
	1833	}
	1834	} else {
	1835	Perl_croak(aTHX_ "Code missing after '/' in unpack" );
	1836	}
	1837	datumtype = symptr->code;
	1838	explicit_length = FALSE;
	1839	goto redo_switch;
	1840	}
	1841	}
	1842
	1843	if (new_s)
	1844	*new_s = s;
	1845	PUTBACK;
	1846	return SP - PL_stack_base - start_sp_offset;
	1847	}
	1848
	1849	PP(pp_unpack)
	1850	{
	1851	dSP;
	1852	dPOPPOPssrl;
	1853	U8 gimme = GIMME_V;
	1854	STRLEN llen;
	1855	STRLEN rlen;
	1856	const char *pat = SvPV_const(left, llen);
	1857	const char *s = SvPV_const(right, rlen);
	1858	const char *strend = s + rlen;
	1859	const char *patend = pat + llen;
	1860	SSize_t cnt;
	1861
	1862	PUTBACK;
	1863	cnt = unpackstring(pat, patend, s, strend,
	1864	((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
	1865	\| (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
	1866
	1867	SPAGAIN;
	1868	if ( !cnt && gimme == G_SCALAR )
	1869	PUSHs(&PL_sv_undef);
	1870	RETURN;
	1871	}
	1872
	1873	STATIC U8 *
	1874	doencodes(U8 h, const U8 s, SSize_t len)
	1875	{
	1876	*h++ = PL_uuemap[len];
	1877	while (len > 2) {
	1878	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1879	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((s[1] >> 4) & 017)))];
	1880	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) \| ((s[2] >> 6) & 03)))];
	1881	*h++ = PL_uuemap[(077 & (s[2] & 077))];
	1882	s += 3;
	1883	len -= 3;
	1884	}
	1885	if (len > 0) {
	1886	const U8 r = (len > 1 ? s[1] : '\0');
	1887	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
	1888	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) \| ((r >> 4) & 017)))];
	1889	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
	1890	*h++ = PL_uuemap[0];
	1891	}
	1892	*h++ = '\n';
	1893	return h;
	1894	}
	1895
	1896	STATIC SV *
	1897	S_is_an_int(pTHX_ const char *s, STRLEN l)
	1898	{
	1899	SV *result = newSVpvn(s, l);
	1900	char const result_c = SvPV_nolen(result); / convenience */
	1901	char *out = result_c;
	1902	bool skip = 1;
	1903	bool ignore = 0;
	1904
	1905	PERL_ARGS_ASSERT_IS_AN_INT;
	1906
	1907	while (*s) {
	1908	switch (*s) {
	1909	case ' ':
	1910	break;
	1911	case '+':
	1912	if (!skip) {
	1913	SvREFCNT_dec(result);
	1914	return (NULL);
	1915	}
	1916	break;
	1917	case '0':
	1918	case '1':
	1919	case '2':
	1920	case '3':
	1921	case '4':
	1922	case '5':
	1923	case '6':
	1924	case '7':
	1925	case '8':
	1926	case '9':
	1927	skip = 0;
	1928	if (!ignore) {
	1929	(out++) = s;
	1930	}
	1931	break;
	1932	case '.':
	1933	ignore = 1;
	1934	break;
	1935	default:
	1936	SvREFCNT_dec(result);
	1937	return (NULL);
	1938	}
	1939	s++;
	1940	}
	1941	*(out++) = '\0';
	1942	SvCUR_set(result, out - result_c);
	1943	return (result);
	1944	}
	1945
	1946	/* pnum must be '\0' terminated */
	1947	STATIC int
	1948	S_div128(pTHX_ SV pnum, bool done)
	1949	{
	1950	STRLEN len;
	1951	char * const s = SvPV(pnum, len);
	1952	char *t = s;
	1953	int m = 0;
	1954
	1955	PERL_ARGS_ASSERT_DIV128;
	1956
	1957	*done = 1;
	1958	while (*t) {
	1959	const int i = m * 10 + (*t - '0');
	1960	const int r = (i >> 7); /* r < 10 */
	1961	m = i & 0x7F;
	1962	if (r) {
	1963	*done = 0;
	1964	}
	1965	*(t++) = '0' + r;
	1966	}
	1967	*(t++) = '\0';
	1968	SvCUR_set(pnum, (STRLEN) (t - s));
	1969	return (m);
	1970	}
	1971
	1972	/*
	1973	=for apidoc packlist
	1974
	1975	The engine implementing C<pack()> Perl function.
	1976
	1977	=cut
	1978	*/
	1979
	1980	void
	1981	Perl_packlist(pTHX_ SV cat, const char pat, const char patend, SV beglist, SV *endlist )
	1982	{
	1983	tempsym_t sym;
	1984
	1985	PERL_ARGS_ASSERT_PACKLIST;
	1986
	1987	TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
	1988
	1989	/* We're going to do changes through SvPVX(cat). Make sure it's valid.
	1990	Also make sure any UTF8 flag is loaded */
	1991	SvPV_force_nolen(cat);
	1992	if (DO_UTF8(cat))
	1993	sym.flags \|= FLAG_PARSE_UTF8 \| FLAG_DO_UTF8;
	1994
	1995	(void)pack_rec( cat, &sym, beglist, endlist );
	1996	}
	1997
	1998	/* like sv_utf8_upgrade, but also repoint the group start markers */
	1999	STATIC void
	2000	marked_upgrade(pTHX_ SV sv, tempsym_t sym_ptr) {
	2001	STRLEN len;
	2002	tempsym_t *group;
	2003	const char from_ptr, from_start, from_end, marks, *m;
	2004	char to_start, to_ptr;
	2005
	2006	if (SvUTF8(sv)) return;
	2007
	2008	from_start = SvPVX_const(sv);
	2009	from_end = from_start + SvCUR(sv);
	2010	for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
	2011	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
	2012	if (from_ptr == from_end) {
	2013	/* Simple case: no character needs to be changed */
	2014	SvUTF8_on(sv);
	2015	return;
	2016	}
	2017
	2018	len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
	2019	Newx(to_start, len, char);
	2020	Copy(from_start, to_start, from_ptr-from_start, char);
	2021	to_ptr = to_start + (from_ptr-from_start);
	2022
	2023	Newx(marks, sym_ptr->level+2, const char *);
	2024	for (group=sym_ptr; group; group = group->previous)
	2025	marks[group->level] = from_start + group->strbeg;
	2026	marks[sym_ptr->level+1] = from_end+1;
	2027	for (m = marks; *m < from_ptr; m++)
	2028	m = to_start + (m-from_start);
	2029
	2030	for (;from_ptr < from_end; from_ptr++) {
	2031	while (m == from_ptr) m++ = to_ptr;
	2032	to_ptr = (char ) uvchr_to_utf8((U8 ) to_ptr, (U8 ) from_ptr);
	2033	}
	2034	*to_ptr = 0;
	2035
	2036	while (m == from_ptr) m++ = to_ptr;
	2037	if (m != marks + sym_ptr->level+1) {
	2038	Safefree(marks);
	2039	Safefree(to_start);
	2040	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
	2041	"level=%d", m, marks, sym_ptr->level);
	2042	}
	2043	for (group=sym_ptr; group; group = group->previous)
	2044	group->strbeg = marks[group->level] - to_start;
	2045	Safefree(marks);
	2046
	2047	if (SvOOK(sv)) {
	2048	if (SvIVX(sv)) {
	2049	SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
	2050	from_start -= SvIVX(sv);
	2051	SvIV_set(sv, 0);
	2052	}
	2053	SvFLAGS(sv) &= ~SVf_OOK;
	2054	}
	2055	if (SvLEN(sv) != 0)
	2056	Safefree(from_start);
	2057	SvPV_set(sv, to_start);
	2058	SvCUR_set(sv, to_ptr - to_start);
	2059	SvLEN_set(sv, len);
	2060	SvUTF8_on(sv);
	2061	}
	2062
	2063	/* Exponential string grower. Makes string extension effectively O(n)
	2064	needed says how many extra bytes we need (not counting the final '\0')
	2065	Only grows the string if there is an actual lack of space
	2066	*/
	2067	STATIC char *
	2068	S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
	2069	const STRLEN cur = SvCUR(sv);
	2070	const STRLEN len = SvLEN(sv);
	2071	STRLEN extend;
	2072
	2073	PERL_ARGS_ASSERT_SV_EXP_GROW;
	2074
	2075	if (len - cur > needed) return SvPVX(sv);
	2076	extend = needed > len ? needed : len;
	2077	return SvGROW(sv, len+extend+1);
	2078	}
	2079
	2080	static SV *
	2081	S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
	2082	{
	2083	SvGETMAGIC(sv);
	2084	if (UNLIKELY(SvAMAGIC(sv)))
	2085	sv = sv_2num(sv);
	2086	if (UNLIKELY(isinfnansv(sv))) {
	2087	const I32 c = TYPE_NO_MODIFIERS(datumtype);
	2088	const NV nv = SvNV_nomg(sv);
	2089	if (c == 'w')
	2090	Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
	2091	else
	2092	Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
	2093	}
	2094	return sv;
	2095	}
	2096
	2097	#define SvIV_no_inf(sv,d) \
	2098	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
	2099	#define SvUV_no_inf(sv,d) \
	2100	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
	2101
	2102	STATIC
	2103	SV **
	2104	S_pack_rec(pTHX_ SV cat, tempsym_t symptr, SV beglist, SV endlist )
	2105	{
	2106	tempsym_t lookahead;
	2107	SSize_t items = endlist - beglist;
	2108	bool found = next_symbol(symptr);
	2109	bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
	2110	bool warn_utf8 = ckWARN(WARN_UTF8);
	2111	char* from;
	2112
	2113	PERL_ARGS_ASSERT_PACK_REC;
	2114
	2115	if (symptr->level == 0 && found && symptr->code == 'U') {
	2116	marked_upgrade(aTHX_ cat, symptr);
	2117	symptr->flags \|= FLAG_DO_UTF8;
	2118	utf8 = 0;
	2119	}
	2120	symptr->strbeg = SvCUR(cat);
	2121
	2122	while (found) {
	2123	SV *fromstr;
	2124	STRLEN fromlen;
	2125	SSize_t len;
	2126	SV *lengthcode = NULL;
	2127	I32 datumtype = symptr->code;
	2128	howlen_t howlen = symptr->howlen;
	2129	char *start = SvPVX(cat);
	2130	char *cur = start + SvCUR(cat);
	2131	bool needs_swap;
	2132
	2133	#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
	2134	#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
	2135
	2136	switch (howlen) {
	2137	case e_star:
	2138	len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
	2139	0 : items;
	2140	break;
	2141	default:
	2142	/* e_no_len and e_number */
	2143	len = symptr->length;
	2144	break;
	2145	}
	2146
	2147	if (len) {
	2148	packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
	2149
	2150	if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
	2151	/* We can process this letter. */
	2152	STRLEN size = props & PACK_SIZE_MASK;
	2153	GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
	2154	}
	2155	}
	2156
	2157	/* Look ahead for next symbol. Do we have code/code? */
	2158	lookahead = *symptr;
	2159	found = next_symbol(&lookahead);
	2160	if (symptr->flags & FLAG_SLASH) {
	2161	IV count;
	2162	if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
	2163	if (strchr("aAZ", lookahead.code)) {
	2164	if (lookahead.howlen == e_number) count = lookahead.length;
	2165	else {
	2166	if (items > 0) {
	2167	count = sv_len_utf8(*beglist);
	2168	}
	2169	else count = 0;
	2170	if (lookahead.code == 'Z') count++;
	2171	}
	2172	} else {
	2173	if (lookahead.howlen == e_number && lookahead.length < items)
	2174	count = lookahead.length;
	2175	else count = items;
	2176	}
	2177	lookahead.howlen = e_number;
	2178	lookahead.length = count;
	2179	lengthcode = sv_2mortal(newSViv(count));
	2180	}
	2181
	2182	needs_swap = NEEDS_SWAP(datumtype);
	2183
	2184	/* Code inside the switch must take care to properly update
	2185	cat (CUR length and '\0' termination) if it updated *cur and
	2186	doesn't simply leave using break */
	2187	switch (TYPE_NO_ENDIANNESS(datumtype)) {
	2188	default:
	2189	Perl_croak(aTHX_ "Invalid type '%c' in pack",
	2190	(int) TYPE_NO_MODIFIERS(datumtype));
	2191	case '%':
	2192	Perl_croak(aTHX_ "'%%' may not be used in pack");
	2193
	2194	case '.' \| TYPE_IS_SHRIEKING:
	2195	case '.':
	2196	if (howlen == e_star) from = start;
	2197	else if (len == 0) from = cur;
	2198	else {
	2199	tempsym_t *group = symptr;
	2200
	2201	while (--len && group) group = group->previous;
	2202	from = group ? start + group->strbeg : start;
	2203	}
	2204	fromstr = NEXTFROM;
	2205	len = SvIV_no_inf(fromstr, datumtype);
	2206	goto resize;
	2207	case '@' \| TYPE_IS_SHRIEKING:
	2208	case '@':
	2209	from = start + symptr->strbeg;
	2210	resize:
	2211	if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
	2212	if (len >= 0) {
	2213	while (len && from < cur) {
	2214	from += UTF8SKIP(from);
	2215	len--;
	2216	}
	2217	if (from > cur)
	2218	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2219	if (len) {
	2220	/* Here we know from == cur */
	2221	grow:
	2222	GROWING(0, cat, start, cur, len);
	2223	Zero(cur, len, char);
	2224	cur += len;
	2225	} else if (from < cur) {
	2226	len = cur - from;
	2227	goto shrink;
	2228	} else goto no_change;
	2229	} else {
	2230	cur = from;
	2231	len = -len;
	2232	goto utf8_shrink;
	2233	}
	2234	else {
	2235	len -= cur - from;
	2236	if (len > 0) goto grow;
	2237	if (len == 0) goto no_change;
	2238	len = -len;
	2239	goto shrink;
	2240	}
	2241	break;
	2242
	2243	case '(': {
	2244	tempsym_t savsym = *symptr;
	2245	U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
	2246	symptr->flags \|= group_modifiers;
	2247	symptr->patend = savsym.grpend;
	2248	symptr->level++;
	2249	symptr->previous = &lookahead;
	2250	while (len--) {
	2251	U32 was_utf8;
	2252	if (utf8) symptr->flags \|= FLAG_PARSE_UTF8;
	2253	else symptr->flags &= ~FLAG_PARSE_UTF8;
	2254	was_utf8 = SvUTF8(cat);
	2255	symptr->patptr = savsym.grpbeg;
	2256	beglist = pack_rec(cat, symptr, beglist, endlist);
	2257	if (SvUTF8(cat) != was_utf8)
	2258	/* This had better be an upgrade while in utf8==0 mode */
	2259	utf8 = 1;
	2260
	2261	if (savsym.howlen == e_star && beglist == endlist)
	2262	break; /* No way to continue */
	2263	}
	2264	items = endlist - beglist;
	2265	lookahead.flags = symptr->flags & ~group_modifiers;
	2266	goto no_change;
	2267	}
	2268	case 'X' \| TYPE_IS_SHRIEKING:
	2269	if (!len) /* Avoid division by 0 */
	2270	len = 1;
	2271	if (utf8) {
	2272	char hop, last;
	2273	SSize_t l = len;
	2274	hop = last = start;
	2275	while (hop < cur) {
	2276	hop += UTF8SKIP(hop);
	2277	if (--l == 0) {
	2278	last = hop;
	2279	l = len;
	2280	}
	2281	}
	2282	if (last > cur)
	2283	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2284	cur = last;
	2285	break;
	2286	}
	2287	len = (cur-start) % len;
	2288	/* FALLTHROUGH */
	2289	case 'X':
	2290	if (utf8) {
	2291	if (len < 1) goto no_change;
	2292	utf8_shrink:
	2293	while (len > 0) {
	2294	if (cur <= start)
	2295	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2296	(int) TYPE_NO_MODIFIERS(datumtype));
	2297	while (--cur, UTF8_IS_CONTINUATION(*cur)) {
	2298	if (cur <= start)
	2299	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2300	(int) TYPE_NO_MODIFIERS(datumtype));
	2301	}
	2302	len--;
	2303	}
	2304	} else {
	2305	shrink:
	2306	if (cur - start < len)
	2307	Perl_croak(aTHX_ "'%c' outside of string in pack",
	2308	(int) TYPE_NO_MODIFIERS(datumtype));
	2309	cur -= len;
	2310	}
	2311	if (cur < start+symptr->strbeg) {
	2312	/* Make sure group starts don't point into the void */
	2313	tempsym_t *group;
	2314	const STRLEN length = cur-start;
	2315	for (group = symptr;
	2316	group && length < group->strbeg;
	2317	group = group->previous) group->strbeg = length;
	2318	lookahead.strbeg = length;
	2319	}
	2320	break;
	2321	case 'x' \| TYPE_IS_SHRIEKING: {
	2322	SSize_t ai32;
	2323	if (!len) /* Avoid division by 0 */
	2324	len = 1;
	2325	if (utf8) ai32 = utf8_length((U8 ) start, (U8 ) cur) % len;
	2326	else ai32 = (cur - start) % len;
	2327	if (ai32 == 0) goto no_change;
	2328	len -= ai32;
	2329	}
	2330	/* FALLTHROUGH */
	2331	case 'x':
	2332	goto grow;
	2333	case 'A':
	2334	case 'Z':
	2335	case 'a': {
	2336	const char *aptr;
	2337
	2338	fromstr = NEXTFROM;
	2339	aptr = SvPV_const(fromstr, fromlen);
	2340	if (DO_UTF8(fromstr)) {
	2341	const char end, s;
	2342
	2343	if (!utf8 && !SvUTF8(cat)) {
	2344	marked_upgrade(aTHX_ cat, symptr);
	2345	lookahead.flags \|= FLAG_DO_UTF8;
	2346	lookahead.strbeg = symptr->strbeg;
	2347	utf8 = 1;
	2348	start = SvPVX(cat);
	2349	cur = start + SvCUR(cat);
	2350	}
	2351	if (howlen == e_star) {
	2352	if (utf8) goto string_copy;
	2353	len = fromlen+1;
	2354	}
	2355	s = aptr;
	2356	end = aptr + fromlen;
	2357	fromlen = datumtype == 'Z' ? len-1 : len;
	2358	while ((SSize_t) fromlen > 0 && s < end) {
	2359	s += UTF8SKIP(s);
	2360	fromlen--;
	2361	}
	2362	if (s > end)
	2363	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
	2364	if (utf8) {
	2365	len = fromlen;
	2366	if (datumtype == 'Z') len++;
	2367	fromlen = s-aptr;
	2368	len += fromlen;
	2369
	2370	goto string_copy;
	2371	}
	2372	fromlen = len - fromlen;
	2373	if (datumtype == 'Z') fromlen--;
	2374	if (howlen == e_star) {
	2375	len = fromlen;
	2376	if (datumtype == 'Z') len++;
	2377	}
	2378	GROWING(0, cat, start, cur, len);
	2379	if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
	2380	datumtype \| TYPE_IS_PACK))
	2381	Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
	2382	"for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
	2383	(int)datumtype, aptr, end, cur, fromlen);
	2384	cur += fromlen;
	2385	len -= fromlen;
	2386	} else if (utf8) {
	2387	if (howlen == e_star) {
	2388	len = fromlen;
	2389	if (datumtype == 'Z') len++;
	2390	}
	2391	if (len <= (SSize_t) fromlen) {
	2392	fromlen = len;
	2393	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2394	}
	2395	/* assumes a byte expands to at most UTF8_EXPAND bytes on
	2396	upgrade, so:
	2397	expected_length <= from_lenUTF8_EXPAND + (len-from_len) /
	2398	GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
	2399	len -= fromlen;
	2400	while (fromlen > 0) {
	2401	cur = (char ) uvchr_to_utf8((U8 ) cur, * (U8 *) aptr);
	2402	aptr++;
	2403	fromlen--;
	2404	}
	2405	} else {
	2406	string_copy:
	2407	if (howlen == e_star) {
	2408	len = fromlen;
	2409	if (datumtype == 'Z') len++;
	2410	}
	2411	if (len <= (SSize_t) fromlen) {
	2412	fromlen = len;
	2413	if (datumtype == 'Z' && fromlen > 0) fromlen--;
	2414	}
	2415	GROWING(0, cat, start, cur, len);
	2416	Copy(aptr, cur, fromlen, char);
	2417	cur += fromlen;
	2418	len -= fromlen;
	2419	}
	2420	memset(cur, datumtype == 'A' ? ' ' : '\0', len);
	2421	cur += len;
	2422	SvTAINT(cat);
	2423	break;
	2424	}
	2425	case 'B':
	2426	case 'b': {
	2427	const char str, end;
	2428	SSize_t l, field_len;
	2429	U8 bits;
	2430	bool utf8_source;
	2431	U32 utf8_flags;
	2432
	2433	fromstr = NEXTFROM;
	2434	str = SvPV_const(fromstr, fromlen);
	2435	end = str + fromlen;
	2436	if (DO_UTF8(fromstr)) {
	2437	utf8_source = TRUE;
	2438	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2439	} else {
	2440	utf8_source = FALSE;
	2441	utf8_flags = 0; /* Unused, but keep compilers happy */
	2442	}
	2443	if (howlen == e_star) len = fromlen;
	2444	field_len = (len+7)/8;
	2445	GROWING(utf8, cat, start, cur, field_len);
	2446	if (len > (SSize_t)fromlen) len = fromlen;
	2447	bits = 0;
	2448	l = 0;
	2449	if (datumtype == 'B')
	2450	while (l++ < len) {
	2451	if (utf8_source) {
	2452	UV val = 0;
	2453	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2454	bits \|= val & 1;
	2455	} else bits \|= *str++ & 1;
	2456	if (l & 7) bits <<= 1;
	2457	else {
	2458	PUSH_BYTE(utf8, cur, bits);
	2459	bits = 0;
	2460	}
	2461	}
	2462	else
	2463	/* datumtype == 'b' */
	2464	while (l++ < len) {
	2465	if (utf8_source) {
	2466	UV val = 0;
	2467	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2468	if (val & 1) bits \|= 0x80;
	2469	} else if (*str++ & 1)
	2470	bits \|= 0x80;
	2471	if (l & 7) bits >>= 1;
	2472	else {
	2473	PUSH_BYTE(utf8, cur, bits);
	2474	bits = 0;
	2475	}
	2476	}
	2477	l--;
	2478	if (l & 7) {
	2479	if (datumtype == 'B')
	2480	bits <<= 7 - (l & 7);
	2481	else
	2482	bits >>= 7 - (l & 7);
	2483	PUSH_BYTE(utf8, cur, bits);
	2484	l += 7;
	2485	}
	2486	/* Determine how many chars are left in the requested field */
	2487	l /= 8;
	2488	if (howlen == e_star) field_len = 0;
	2489	else field_len -= l;
	2490	Zero(cur, field_len, char);
	2491	cur += field_len;
	2492	break;
	2493	}
	2494	case 'H':
	2495	case 'h': {
	2496	const char str, end;
	2497	SSize_t l, field_len;
	2498	U8 bits;
	2499	bool utf8_source;
	2500	U32 utf8_flags;
	2501
	2502	fromstr = NEXTFROM;
	2503	str = SvPV_const(fromstr, fromlen);
	2504	end = str + fromlen;
	2505	if (DO_UTF8(fromstr)) {
	2506	utf8_source = TRUE;
	2507	utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
	2508	} else {
	2509	utf8_source = FALSE;
	2510	utf8_flags = 0; /* Unused, but keep compilers happy */
	2511	}
	2512	if (howlen == e_star) len = fromlen;
	2513	field_len = (len+1)/2;
	2514	GROWING(utf8, cat, start, cur, field_len);
	2515	if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
	2516	bits = 0;
	2517	l = 0;
	2518	if (datumtype == 'H')
	2519	while (l++ < len) {
	2520	if (utf8_source) {
	2521	UV val = 0;
	2522	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2523	if (val < 256 && isALPHA(val))
	2524	bits \|= (val + 9) & 0xf;
	2525	else
	2526	bits \|= val & 0xf;
	2527	} else if (isALPHA(*str))
	2528	bits \|= (*str++ + 9) & 0xf;
	2529	else
	2530	bits \|= *str++ & 0xf;
	2531	if (l & 1) bits <<= 4;
	2532	else {
	2533	PUSH_BYTE(utf8, cur, bits);
	2534	bits = 0;
	2535	}
	2536	}
	2537	else
	2538	while (l++ < len) {
	2539	if (utf8_source) {
	2540	UV val = 0;
	2541	NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
	2542	if (val < 256 && isALPHA(val))
	2543	bits \|= ((val + 9) & 0xf) << 4;
	2544	else
	2545	bits \|= (val & 0xf) << 4;
	2546	} else if (isALPHA(*str))
	2547	bits \|= ((*str++ + 9) & 0xf) << 4;
	2548	else
	2549	bits \|= (*str++ & 0xf) << 4;
	2550	if (l & 1) bits >>= 4;
	2551	else {
	2552	PUSH_BYTE(utf8, cur, bits);
	2553	bits = 0;
	2554	}
	2555	}
	2556	l--;
	2557	if (l & 1) {
	2558	PUSH_BYTE(utf8, cur, bits);
	2559	l++;
	2560	}
	2561	/* Determine how many chars are left in the requested field */
	2562	l /= 2;
	2563	if (howlen == e_star) field_len = 0;
	2564	else field_len -= l;
	2565	Zero(cur, field_len, char);
	2566	cur += field_len;
	2567	break;
	2568	}
	2569	case 'c':
	2570	while (len-- > 0) {
	2571	IV aiv;
	2572	fromstr = NEXTFROM;
	2573	aiv = SvIV_no_inf(fromstr, datumtype);
	2574	if ((-128 > aiv \|\| aiv > 127))
	2575	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2576	"Character in 'c' format wrapped in pack");
	2577	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2578	}
	2579	break;
	2580	case 'C':
	2581	if (len == 0) {
	2582	utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
	2583	break;
	2584	}
	2585	while (len-- > 0) {
	2586	IV aiv;
	2587	fromstr = NEXTFROM;
	2588	aiv = SvIV_no_inf(fromstr, datumtype);
	2589	if ((0 > aiv \|\| aiv > 0xff))
	2590	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2591	"Character in 'C' format wrapped in pack");
	2592	PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
	2593	}
	2594	break;
	2595	case 'W': {
	2596	char *end;
	2597	U8 in_bytes = (U8)IN_BYTES;
	2598
	2599	end = start+SvLEN(cat)-1;
	2600	if (utf8) end -= UTF8_MAXLEN-1;
	2601	while (len-- > 0) {
	2602	UV auv;
	2603	fromstr = NEXTFROM;
	2604	auv = SvUV_no_inf(fromstr, datumtype);
	2605	if (in_bytes) auv = auv % 0x100;
	2606	if (utf8) {
	2607	W_utf8:
	2608	if (cur >= end) {
	2609	*cur = '\0';
	2610	SvCUR_set(cat, cur - start);
	2611
	2612	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2613	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2614	}
	2615	cur = (char ) uvchr_to_utf8_flags((U8 ) cur, auv, 0);
	2616	} else {
	2617	if (auv >= 0x100) {
	2618	if (!SvUTF8(cat)) {
	2619	*cur = '\0';
	2620	SvCUR_set(cat, cur - start);
	2621	marked_upgrade(aTHX_ cat, symptr);
	2622	lookahead.flags \|= FLAG_DO_UTF8;
	2623	lookahead.strbeg = symptr->strbeg;
	2624	utf8 = 1;
	2625	start = SvPVX(cat);
	2626	cur = start + SvCUR(cat);
	2627	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2628	goto W_utf8;
	2629	}
	2630	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	2631	"Character in 'W' format wrapped in pack");
	2632	auv &= 0xff;
	2633	}
	2634	if (cur >= end) {
	2635	*cur = '\0';
	2636	SvCUR_set(cat, cur - start);
	2637	GROWING(0, cat, start, cur, len+1);
	2638	end = start+SvLEN(cat)-1;
	2639	}
	2640	(U8 ) cur++ = (U8)auv;
	2641	}
	2642	}
	2643	break;
	2644	}
	2645	case 'U': {
	2646	char *end;
	2647
	2648	if (len == 0) {
	2649	if (!(symptr->flags & FLAG_DO_UTF8)) {
	2650	marked_upgrade(aTHX_ cat, symptr);
	2651	lookahead.flags \|= FLAG_DO_UTF8;
	2652	lookahead.strbeg = symptr->strbeg;
	2653	}
	2654	utf8 = 0;
	2655	goto no_change;
	2656	}
	2657
	2658	end = start+SvLEN(cat);
	2659	if (!utf8) end -= UTF8_MAXLEN;
	2660	while (len-- > 0) {
	2661	UV auv;
	2662	fromstr = NEXTFROM;
	2663	auv = SvUV_no_inf(fromstr, datumtype);
	2664	if (utf8) {
	2665	U8 buffer[UTF8_MAXLEN+1], *endb;
	2666	endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
	2667	if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
	2668	*cur = '\0';
	2669	SvCUR_set(cat, cur - start);
	2670	GROWING(0, cat, start, cur,
	2671	len+(endb-buffer)*UTF8_EXPAND);
	2672	end = start+SvLEN(cat);
	2673	}
	2674	cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
	2675	} else {
	2676	if (cur >= end) {
	2677	*cur = '\0';
	2678	SvCUR_set(cat, cur - start);
	2679	GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
	2680	end = start+SvLEN(cat)-UTF8_MAXLEN;
	2681	}
	2682	cur = (char ) uvchr_to_utf8_flags((U8 ) cur,
	2683	UNI_TO_NATIVE(auv),
	2684	0);
	2685	}
	2686	}
	2687	break;
	2688	}
	2689	/* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
	2690	case 'f':
	2691	while (len-- > 0) {
	2692	float afloat;
	2693	NV anv;
	2694	fromstr = NEXTFROM;
	2695	anv = SvNV(fromstr);
	2696	# if (defined(VMS) && !defined(_IEEE_FP)) \|\| defined(DOUBLE_IS_VAX_FLOAT)
	2697	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2698	* on Alpha; fake it if we don't have them.
	2699	*/
	2700	if (anv > FLT_MAX)
	2701	afloat = FLT_MAX;
	2702	else if (anv < -FLT_MAX)
	2703	afloat = -FLT_MAX;
	2704	else afloat = (float)anv;
	2705	# else
	2706	# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
	2707	if(Perl_isnan(anv))
	2708	afloat = (float)NV_NAN;
	2709	else
	2710	# endif
	2711	# ifdef NV_INF
	2712	/* a simple cast to float is undefined if outside
	2713	* the range of values that can be represented */
	2714	afloat = (float)(anv > FLT_MAX ? NV_INF :
	2715	anv < -FLT_MAX ? -NV_INF : anv);
	2716	# endif
	2717	# endif
	2718	PUSH_VAR(utf8, cur, afloat, needs_swap);
	2719	}
	2720	break;
	2721	case 'd':
	2722	while (len-- > 0) {
	2723	double adouble;
	2724	NV anv;
	2725	fromstr = NEXTFROM;
	2726	anv = SvNV(fromstr);
	2727	# if (defined(VMS) && !defined(_IEEE_FP)) \|\| defined(DOUBLE_IS_VAX_FLOAT)
	2728	/* IEEE fp overflow shenanigans are unavailable on VAX and optional
	2729	* on Alpha; fake it if we don't have them.
	2730	*/
	2731	if (anv > DBL_MAX)
	2732	adouble = DBL_MAX;
	2733	else if (anv < -DBL_MAX)
	2734	adouble = -DBL_MAX;
	2735	else adouble = (double)anv;
	2736	# else
	2737	adouble = (double)anv;
	2738	# endif
	2739	PUSH_VAR(utf8, cur, adouble, needs_swap);
	2740	}
	2741	break;
	2742	case 'F': {
	2743	NV_bytes anv;
	2744	Zero(&anv, 1, NV); /* can be long double with unused bits */
	2745	while (len-- > 0) {
	2746	fromstr = NEXTFROM;
	2747	#ifdef __GNUC__
	2748	/* to work round a gcc/x86 bug; don't use SvNV */
	2749	anv.nv = sv_2nv(fromstr);
	2750	# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
	2751	&& LONG_DOUBLESIZE > 10
	2752	/* GCC sometimes overwrites the padding in the
	2753	assignment above */
	2754	Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
	2755	# endif
	2756	#else
	2757	anv.nv = SvNV(fromstr);
	2758	#endif
	2759	PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
	2760	}
	2761	break;
	2762	}
	2763	#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
	2764	case 'D': {
	2765	ld_bytes aldouble;
	2766	/* long doubles can have unused bits, which may be nonzero */
	2767	Zero(&aldouble, 1, long double);
	2768	while (len-- > 0) {
	2769	fromstr = NEXTFROM;
	2770	# ifdef __GNUC__
	2771	/* to work round a gcc/x86 bug; don't use SvNV */
	2772	aldouble.ld = (long double)sv_2nv(fromstr);
	2773	# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
	2774	/* GCC sometimes overwrites the padding in the
	2775	assignment above */
	2776	Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
	2777	# endif
	2778	# else
	2779	aldouble.ld = (long double)SvNV(fromstr);
	2780	# endif
	2781	PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
	2782	needs_swap);
	2783	}
	2784	break;
	2785	}
	2786	#endif
	2787	case 'n' \| TYPE_IS_SHRIEKING:
	2788	case 'n':
	2789	while (len-- > 0) {
	2790	I16 ai16;
	2791	fromstr = NEXTFROM;
	2792	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2793	ai16 = PerlSock_htons(ai16);
	2794	PUSH16(utf8, cur, &ai16, FALSE);
	2795	}
	2796	break;
	2797	case 'v' \| TYPE_IS_SHRIEKING:
	2798	case 'v':
	2799	while (len-- > 0) {
	2800	I16 ai16;
	2801	fromstr = NEXTFROM;
	2802	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2803	ai16 = htovs(ai16);
	2804	PUSH16(utf8, cur, &ai16, FALSE);
	2805	}
	2806	break;
	2807	case 'S' \| TYPE_IS_SHRIEKING:
	2808	#if SHORTSIZE != SIZE16
	2809	while (len-- > 0) {
	2810	unsigned short aushort;
	2811	fromstr = NEXTFROM;
	2812	aushort = SvUV_no_inf(fromstr, datumtype);
	2813	PUSH_VAR(utf8, cur, aushort, needs_swap);
	2814	}
	2815	break;
	2816	#else
	2817	/* FALLTHROUGH */
	2818	#endif
	2819	case 'S':
	2820	while (len-- > 0) {
	2821	U16 au16;
	2822	fromstr = NEXTFROM;
	2823	au16 = (U16)SvUV_no_inf(fromstr, datumtype);
	2824	PUSH16(utf8, cur, &au16, needs_swap);
	2825	}
	2826	break;
	2827	case 's' \| TYPE_IS_SHRIEKING:
	2828	#if SHORTSIZE != SIZE16
	2829	while (len-- > 0) {
	2830	short ashort;
	2831	fromstr = NEXTFROM;
	2832	ashort = SvIV_no_inf(fromstr, datumtype);
	2833	PUSH_VAR(utf8, cur, ashort, needs_swap);
	2834	}
	2835	break;
	2836	#else
	2837	/* FALLTHROUGH */
	2838	#endif
	2839	case 's':
	2840	while (len-- > 0) {
	2841	I16 ai16;
	2842	fromstr = NEXTFROM;
	2843	ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
	2844	PUSH16(utf8, cur, &ai16, needs_swap);
	2845	}
	2846	break;
	2847	case 'I':
	2848	case 'I' \| TYPE_IS_SHRIEKING:
	2849	while (len-- > 0) {
	2850	unsigned int auint;
	2851	fromstr = NEXTFROM;
	2852	auint = SvUV_no_inf(fromstr, datumtype);
	2853	PUSH_VAR(utf8, cur, auint, needs_swap);
	2854	}
	2855	break;
	2856	case 'j':
	2857	while (len-- > 0) {
	2858	IV aiv;
	2859	fromstr = NEXTFROM;
	2860	aiv = SvIV_no_inf(fromstr, datumtype);
	2861	PUSH_VAR(utf8, cur, aiv, needs_swap);
	2862	}
	2863	break;
	2864	case 'J':
	2865	while (len-- > 0) {
	2866	UV auv;
	2867	fromstr = NEXTFROM;
	2868	auv = SvUV_no_inf(fromstr, datumtype);
	2869	PUSH_VAR(utf8, cur, auv, needs_swap);
	2870	}
	2871	break;
	2872	case 'w':
	2873	while (len-- > 0) {
	2874	NV anv;
	2875	fromstr = NEXTFROM;
	2876	S_sv_check_infnan(aTHX_ fromstr, datumtype);
	2877	anv = SvNV_nomg(fromstr);
	2878
	2879	if (anv < 0) {
	2880	*cur = '\0';
	2881	SvCUR_set(cat, cur - start);
	2882	Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
	2883	}
	2884
	2885	/* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
	2886	which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
	2887	any negative IVs will have already been got by the croak()
	2888	above. IOK is untrue for fractions, so we test them
	2889	against UV_MAX_P1. */
	2890	if (SvIOK(fromstr) \|\| anv < UV_MAX_P1) {
	2891	char buf[(sizeof(UV)*CHAR_BIT)/7+1];
	2892	char *in = buf + sizeof(buf);
	2893	UV auv = SvUV_nomg(fromstr);
	2894
	2895	do {
	2896	*--in = (char)((auv & 0x7f) \| 0x80);
	2897	auv >>= 7;
	2898	} while (auv);
	2899	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2900	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2901	in, (buf + sizeof(buf)) - in);
	2902	} else if (SvPOKp(fromstr))
	2903	goto w_string;
	2904	else if (SvNOKp(fromstr)) {
	2905	/* 10**NV_MAX_10_EXP is the largest power of 10
	2906	so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
	2907	given 10(NV_MAX_10_EXP+1) == 128 x solve for x:
	2908	x = (NV_MAX_10_EXP+1) * log (10) / log (128)
	2909	And with that many bytes only Inf can overflow.
	2910	Some C compilers are strict about integral constant
	2911	expressions so we conservatively divide by a slightly
	2912	smaller integer instead of multiplying by the exact
	2913	floating-point value.
	2914	*/
	2915	#ifdef NV_MAX_10_EXP
	2916	/* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
	2917	char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
	2918	#else
	2919	/* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
	2920	char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
	2921	#endif
	2922	char *in = buf + sizeof(buf);
	2923
	2924	anv = Perl_floor(anv);
	2925	do {
	2926	const NV next = Perl_floor(anv / 128);
	2927	if (in <= buf) /* this cannot happen ;-) */
	2928	Perl_croak(aTHX_ "Cannot compress integer in pack");
	2929	--in = (unsigned char)(anv - (next 128)) \| 0x80;
	2930	anv = next;
	2931	} while (anv > 0);
	2932	buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
	2933	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2934	in, (buf + sizeof(buf)) - in);
	2935	} else {
	2936	const char *from;
	2937	char result, in;
	2938	SV *norm;
	2939	STRLEN len;
	2940	bool done;
	2941
	2942	w_string:
	2943	/* Copy string and check for compliance */
	2944	from = SvPV_nomg_const(fromstr, len);
	2945	if ((norm = is_an_int(from, len)) == NULL)
	2946	Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
	2947
	2948	Newx(result, len, char);
	2949	in = result + len;
	2950	done = FALSE;
	2951	while (!done) *--in = div128(norm, &done) \| 0x80;
	2952	result[len - 1] &= 0x7F; /* clear continue bit */
	2953	PUSH_GROWING_BYTES(utf8, cat, start, cur,
	2954	in, (result + len) - in);
	2955	Safefree(result);
	2956	SvREFCNT_dec(norm); /* free norm */
	2957	}
	2958	}
	2959	break;
	2960	case 'i':
	2961	case 'i' \| TYPE_IS_SHRIEKING:
	2962	while (len-- > 0) {
	2963	int aint;
	2964	fromstr = NEXTFROM;
	2965	aint = SvIV_no_inf(fromstr, datumtype);
	2966	PUSH_VAR(utf8, cur, aint, needs_swap);
	2967	}
	2968	break;
	2969	case 'N' \| TYPE_IS_SHRIEKING:
	2970	case 'N':
	2971	while (len-- > 0) {
	2972	U32 au32;
	2973	fromstr = NEXTFROM;
	2974	au32 = SvUV_no_inf(fromstr, datumtype);
	2975	au32 = PerlSock_htonl(au32);
	2976	PUSH32(utf8, cur, &au32, FALSE);
	2977	}
	2978	break;
	2979	case 'V' \| TYPE_IS_SHRIEKING:
	2980	case 'V':
	2981	while (len-- > 0) {
	2982	U32 au32;
	2983	fromstr = NEXTFROM;
	2984	au32 = SvUV_no_inf(fromstr, datumtype);
	2985	au32 = htovl(au32);
	2986	PUSH32(utf8, cur, &au32, FALSE);
	2987	}
	2988	break;
	2989	case 'L' \| TYPE_IS_SHRIEKING:
	2990	#if LONGSIZE != SIZE32
	2991	while (len-- > 0) {
	2992	unsigned long aulong;
	2993	fromstr = NEXTFROM;
	2994	aulong = SvUV_no_inf(fromstr, datumtype);
	2995	PUSH_VAR(utf8, cur, aulong, needs_swap);
	2996	}
	2997	break;
	2998	#else
	2999	/* Fall though! */
	3000	#endif
	3001	case 'L':
	3002	while (len-- > 0) {
	3003	U32 au32;
	3004	fromstr = NEXTFROM;
	3005	au32 = SvUV_no_inf(fromstr, datumtype);
	3006	PUSH32(utf8, cur, &au32, needs_swap);
	3007	}
	3008	break;
	3009	case 'l' \| TYPE_IS_SHRIEKING:
	3010	#if LONGSIZE != SIZE32
	3011	while (len-- > 0) {
	3012	long along;
	3013	fromstr = NEXTFROM;
	3014	along = SvIV_no_inf(fromstr, datumtype);
	3015	PUSH_VAR(utf8, cur, along, needs_swap);
	3016	}
	3017	break;
	3018	#else
	3019	/* Fall though! */
	3020	#endif
	3021	case 'l':
	3022	while (len-- > 0) {
	3023	I32 ai32;
	3024	fromstr = NEXTFROM;
	3025	ai32 = SvIV_no_inf(fromstr, datumtype);
	3026	PUSH32(utf8, cur, &ai32, needs_swap);
	3027	}
	3028	break;
	3029	#if defined(HAS_QUAD) && IVSIZE >= 8
	3030	case 'Q':
	3031	while (len-- > 0) {
	3032	Uquad_t auquad;
	3033	fromstr = NEXTFROM;
	3034	auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
	3035	PUSH_VAR(utf8, cur, auquad, needs_swap);
	3036	}
	3037	break;
	3038	case 'q':
	3039	while (len-- > 0) {
	3040	Quad_t aquad;
	3041	fromstr = NEXTFROM;
	3042	aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
	3043	PUSH_VAR(utf8, cur, aquad, needs_swap);
	3044	}
	3045	break;
	3046	#endif
	3047	case 'P':
	3048	len = 1; /* assume SV is correct length */
	3049	GROWING(utf8, cat, start, cur, sizeof(char *));
	3050	/* FALLTHROUGH */
	3051	case 'p':
	3052	while (len-- > 0) {
	3053	const char *aptr;
	3054
	3055	fromstr = NEXTFROM;
	3056	SvGETMAGIC(fromstr);
	3057	if (!SvOK(fromstr)) aptr = NULL;
	3058	else {
	3059	/* XXX better yet, could spirit away the string to
	3060	* a safe spot and hang on to it until the result
	3061	* of pack() (and all copies of the result) are
	3062	* gone.
	3063	*/
	3064	if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
	3065	\|\| (SvPADTMP(fromstr) &&
	3066	!SvREADONLY(fromstr)))) {
	3067	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3068	"Attempt to pack pointer to temporary value");
	3069	}
	3070	if (SvPOK(fromstr) \|\| SvNIOK(fromstr))
	3071	aptr = SvPV_nomg_const_nolen(fromstr);
	3072	else
	3073	aptr = SvPV_force_flags_nolen(fromstr, 0);
	3074	}
	3075	PUSH_VAR(utf8, cur, aptr, needs_swap);
	3076	}
	3077	break;
	3078	case 'u': {
	3079	const char aptr, aend;
	3080	bool from_utf8;
	3081
	3082	fromstr = NEXTFROM;
	3083	if (len <= 2) len = 45;
	3084	else len = len / 3 * 3;
	3085	if (len >= 64) {
	3086	Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
	3087	"Field too wide in 'u' format in pack");
	3088	len = 63;
	3089	}
	3090	aptr = SvPV_const(fromstr, fromlen);
	3091	from_utf8 = DO_UTF8(fromstr);
	3092	if (from_utf8) {
	3093	aend = aptr + fromlen;
	3094	fromlen = sv_len_utf8_nomg(fromstr);
	3095	} else aend = NULL; /* Unused, but keep compilers happy */
	3096	GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
	3097	while (fromlen > 0) {
	3098	U8 *end;
	3099	SSize_t todo;
	3100	U8 hunk[1+63/3*4+1];
	3101
	3102	if ((SSize_t)fromlen > len)
	3103	todo = len;
	3104	else
	3105	todo = fromlen;
	3106	if (from_utf8) {
	3107	char buffer[64];
	3108	if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
	3109	'u' \| TYPE_IS_PACK)) {
	3110	*cur = '\0';
	3111	SvCUR_set(cat, cur - start);
	3112	Perl_croak(aTHX_ "panic: string is shorter than advertised, "
	3113	"aptr=%p, aend=%p, buffer=%p, todo=%zd",
	3114	aptr, aend, buffer, todo);
	3115	}
	3116	end = doencodes(hunk, (const U8 *)buffer, todo);
	3117	} else {
	3118	end = doencodes(hunk, (const U8 *)aptr, todo);
	3119	aptr += todo;
	3120	}
	3121	PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
	3122	fromlen -= todo;
	3123	}
	3124	break;
	3125	}
	3126	}
	3127	*cur = '\0';
	3128	SvCUR_set(cat, cur - start);
	3129	no_change:
	3130	*symptr = lookahead;
	3131	}
	3132	return beglist;
	3133	}
	3134	#undef NEXTFROM
	3135
	3136
	3137	PP(pp_pack)
	3138	{
	3139	dSP; dMARK; dORIGMARK; dTARGET;
	3140	SV *cat = TARG;
	3141	STRLEN fromlen;
	3142	SV pat_sv = ++MARK;
	3143	const char *pat = SvPV_const(pat_sv, fromlen);
	3144	const char *patend = pat + fromlen;
	3145
	3146	MARK++;
	3147	SvPVCLEAR(cat);
	3148	SvUTF8_off(cat);
	3149
	3150	packlist(cat, pat, patend, MARK, SP + 1);
	3151
	3152	if (SvUTF8(cat)) {
	3153	STRLEN result_len;
	3154	const char * result = SvPV_nomg(cat, result_len);
	3155	const U8 * error_pos;
	3156
	3157	if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
	3158	_force_out_malformed_utf8_message(error_pos,
	3159	(U8 *) result + result_len,
	3160	0, /* no flags */
	3161	1 /* Die */
	3162	);
	3163	NOT_REACHED; /* NOTREACHED */
	3164	}
	3165	}
	3166
	3167	SvSETMAGIC(cat);
	3168	SP = ORIGMARK;
	3169	PUSHs(cat);
	3170	RETURN;
	3171	}
	3172
	3173	/*
	3174	* ex: set ts=8 sts=4 sw=4 et:
	3175	*/