perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* inline.h
	2	*
	3	* Copyright (C) 2012 by Larry Wall and others
	4	*
	5	* You may distribute under the terms of either the GNU General Public
	6	* License or the Artistic License, as specified in the README file.
	7	*
	8	* This file is a home for static inline functions that cannot go in other
	9	* headers files, because they depend on proto.h (included after most other
	10	* headers) or struct definitions.
	11	*
	12	* Each section names the header file that the functions "belong" to.
	13	*/
	14
	15	/* ------------------------------- av.h ------------------------------- */
	16
	17	PERL_STATIC_INLINE SSize_t
	18	S_av_top_index(pTHX_ AV *av)
	19	{
	20	PERL_ARGS_ASSERT_AV_TOP_INDEX;
	21	assert(SvTYPE(av) == SVt_PVAV);
	22
	23	return AvFILL(av);
	24	}
	25
	26	/* ------------------------------- cv.h ------------------------------- */
	27
	28	PERL_STATIC_INLINE GV *
	29	S_CvGV(pTHX_ CV *sv)
	30	{
	31	return CvNAMED(sv)
	32	? Perl_cvgv_from_hek(aTHX_ sv)
	33	: ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
	34	}
	35
	36	PERL_STATIC_INLINE I32 *
	37	S_CvDEPTHp(const CV * const sv)
	38	{
	39	assert(SvTYPE(sv) == SVt_PVCV \|\| SvTYPE(sv) == SVt_PVFM);
	40	return &((XPVCV*)SvANY(sv))->xcv_depth;
	41	}
	42
	43	/*
	44	CvPROTO returns the prototype as stored, which is not necessarily what
	45	the interpreter should be using. Specifically, the interpreter assumes
	46	that spaces have been stripped, which has been the case if the prototype
	47	was added by toke.c, but is generally not the case if it was added elsewhere.
	48	Since we can't enforce the spacelessness at assignment time, this routine
	49	provides a temporary copy at parse time with spaces removed.
	50	I<orig> is the start of the original buffer, I<len> is the length of the
	51	prototype and will be updated when this returns.
	52	*/
	53
	54	#ifdef PERL_CORE
	55	PERL_STATIC_INLINE char *
	56	S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
	57	{
	58	SV * tmpsv;
	59	char * tmps;
	60	tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
	61	tmps = SvPVX(tmpsv);
	62	while ((*len)--) {
	63	if (!isSPACE(*orig))
	64	tmps++ = orig;
	65	orig++;
	66	}
	67	*tmps = '\0';
	68	*len = tmps - SvPVX(tmpsv);
	69	return SvPVX(tmpsv);
	70	}
	71	#endif
	72
	73	/* ------------------------------- mg.h ------------------------------- */
	74
	75	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	76	/* assumes get-magic and stringification have already occurred */
	77	PERL_STATIC_INLINE STRLEN
	78	S_MgBYTEPOS(pTHX_ MAGIC mg, SV sv, const char *s, STRLEN len)
	79	{
	80	assert(mg->mg_type == PERL_MAGIC_regex_global);
	81	assert(mg->mg_len != -1);
	82	if (mg->mg_flags & MGf_BYTES \|\| !DO_UTF8(sv))
	83	return (STRLEN)mg->mg_len;
	84	else {
	85	const STRLEN pos = (STRLEN)mg->mg_len;
	86	/* Without this check, we may read past the end of the buffer: */
	87	if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
	88	return sv_or_pv_pos_u2b(sv, s, pos, NULL);
	89	}
	90	}
	91	#endif
	92
	93	/* ------------------------------- pad.h ------------------------------ */
	94
	95	#if defined(PERL_IN_PAD_C) \|\| defined(PERL_IN_OP_C)
	96	PERL_STATIC_INLINE bool
	97	PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
	98	{
	99	/* is seq within the range _LOW to _HIGH ?
	100	* This is complicated by the fact that PL_cop_seqmax
	101	* may have wrapped around at some point */
	102	if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
	103	return FALSE; /* not yet introduced */
	104
	105	if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
	106	/* in compiling scope */
	107	if (
	108	(seq > COP_SEQ_RANGE_LOW(pn))
	109	? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
	110	: (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
	111	)
	112	return TRUE;
	113	}
	114	else if (
	115	(COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
	116	?
	117	( seq > COP_SEQ_RANGE_LOW(pn)
	118	\|\| seq <= COP_SEQ_RANGE_HIGH(pn))
	119
	120	: ( seq > COP_SEQ_RANGE_LOW(pn)
	121	&& seq <= COP_SEQ_RANGE_HIGH(pn))
	122	)
	123	return TRUE;
	124	return FALSE;
	125	}
	126	#endif
	127
	128	/* ------------------------------- pp.h ------------------------------- */
	129
	130	PERL_STATIC_INLINE I32
	131	S_TOPMARK(pTHX)
	132	{
	133	DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
	134	"MARK top %p %" IVdf "\n",
	135	PL_markstack_ptr,
	136	(IV)*PL_markstack_ptr)));
	137	return *PL_markstack_ptr;
	138	}
	139
	140	PERL_STATIC_INLINE I32
	141	S_POPMARK(pTHX)
	142	{
	143	DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
	144	"MARK pop %p %" IVdf "\n",
	145	(PL_markstack_ptr-1),
	146	(IV)*(PL_markstack_ptr-1))));
	147	assert((PL_markstack_ptr > PL_markstack) \|\| !"MARK underflow");
	148	return *PL_markstack_ptr--;
	149	}
	150
	151	/* ----------------------------- regexp.h ----------------------------- */
	152
	153	PERL_STATIC_INLINE struct regexp *
	154	S_ReANY(const REGEXP * const re)
	155	{
	156	XPV* const p = (XPV*)SvANY(re);
	157	assert(isREGEXP(re));
	158	return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
	159	: (struct regexp *)p;
	160	}
	161
	162	/* ------------------------------- sv.h ------------------------------- */
	163
	164	PERL_STATIC_INLINE SV *
	165	S_SvREFCNT_inc(SV *sv)
	166	{
	167	if (LIKELY(sv != NULL))
	168	SvREFCNT(sv)++;
	169	return sv;
	170	}
	171	PERL_STATIC_INLINE SV *
	172	S_SvREFCNT_inc_NN(SV *sv)
	173	{
	174	SvREFCNT(sv)++;
	175	return sv;
	176	}
	177	PERL_STATIC_INLINE void
	178	S_SvREFCNT_inc_void(SV *sv)
	179	{
	180	if (LIKELY(sv != NULL))
	181	SvREFCNT(sv)++;
	182	}
	183	PERL_STATIC_INLINE void
	184	S_SvREFCNT_dec(pTHX_ SV *sv)
	185	{
	186	if (LIKELY(sv != NULL)) {
	187	U32 rc = SvREFCNT(sv);
	188	if (LIKELY(rc > 1))
	189	SvREFCNT(sv) = rc - 1;
	190	else
	191	Perl_sv_free2(aTHX_ sv, rc);
	192	}
	193	}
	194
	195	PERL_STATIC_INLINE void
	196	S_SvREFCNT_dec_NN(pTHX_ SV *sv)
	197	{
	198	U32 rc = SvREFCNT(sv);
	199	if (LIKELY(rc > 1))
	200	SvREFCNT(sv) = rc - 1;
	201	else
	202	Perl_sv_free2(aTHX_ sv, rc);
	203	}
	204
	205	PERL_STATIC_INLINE void
	206	SvAMAGIC_on(SV *sv)
	207	{
	208	assert(SvROK(sv));
	209	if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
	210	}
	211	PERL_STATIC_INLINE void
	212	SvAMAGIC_off(SV *sv)
	213	{
	214	if (SvROK(sv) && SvOBJECT(SvRV(sv)))
	215	HvAMAGIC_off(SvSTASH(SvRV(sv)));
	216	}
	217
	218	PERL_STATIC_INLINE U32
	219	S_SvPADSTALE_on(SV *sv)
	220	{
	221	assert(!(SvFLAGS(sv) & SVs_PADTMP));
	222	return SvFLAGS(sv) \|= SVs_PADSTALE;
	223	}
	224	PERL_STATIC_INLINE U32
	225	S_SvPADSTALE_off(SV *sv)
	226	{
	227	assert(!(SvFLAGS(sv) & SVs_PADTMP));
	228	return SvFLAGS(sv) &= ~SVs_PADSTALE;
	229	}
	230	#if defined(PERL_CORE) \|\| defined (PERL_EXT)
	231	PERL_STATIC_INLINE STRLEN
	232	S_sv_or_pv_pos_u2b(pTHX_ SV sv, const char pv, STRLEN pos, STRLEN *lenp)
	233	{
	234	PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
	235	if (SvGAMAGIC(sv)) {
	236	U8 hopped = utf8_hop((U8 )pv, pos);
	237	if (lenp) lenp = (STRLEN)(utf8_hop(hopped, lenp) - hopped);
	238	return (STRLEN)(hopped - (U8 *)pv);
	239	}
	240	return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
	241	}
	242	#endif
	243
	244	/* ------------------------------- handy.h ------------------------------- */
	245
	246	/* saves machine code for a common noreturn idiom typically used in Newx() /
	247	GCC_DIAG_IGNORE_DECL(-Wunused-function);
	248	static void
	249	S_croak_memory_wrap(void)
	250	{
	251	Perl_croak_nocontext("%s",PL_memory_wrap);
	252	}
	253	GCC_DIAG_RESTORE_DECL;
	254
	255	/* ------------------------------- utf8.h ------------------------------- */
	256
	257	/*
	258	=head1 Unicode Support
	259	*/
	260
	261	PERL_STATIC_INLINE void
	262	S_append_utf8_from_native_byte(const U8 byte, U8** dest)
	263	{
	264	/* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
	265	* encoded string at 'dest', updating 'dest' to include it */
	266
	267	PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
	268
	269	if (NATIVE_BYTE_IS_INVARIANT(byte))
	270	((dest)++) = byte;
	271	else {
	272	((dest)++) = UTF8_EIGHT_BIT_HI(byte);
	273	((dest)++) = UTF8_EIGHT_BIT_LO(byte);
	274	}
	275	}
	276
	277	/*
	278	=for apidoc valid_utf8_to_uvchr
	279	Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
	280	the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
	281	it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
	282	non-Unicode code points are allowed.
	283
	284	=cut
	285
	286	*/
	287
	288	PERL_STATIC_INLINE UV
	289	Perl_valid_utf8_to_uvchr(const U8 s, STRLEN retlen)
	290	{
	291	const UV expectlen = UTF8SKIP(s);
	292	const U8* send = s + expectlen;
	293	UV uv = *s;
	294
	295	PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
	296
	297	if (retlen) {
	298	*retlen = expectlen;
	299	}
	300
	301	/* An invariant is trivially returned */
	302	if (expectlen == 1) {
	303	return uv;
	304	}
	305
	306	/* Remove the leading bits that indicate the number of bytes, leaving just
	307	* the bits that are part of the value */
	308	uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
	309
	310	/* Now, loop through the remaining bytes, accumulating each into the
	311	* working total as we go. (I khw tried unrolling the loop for up to 4
	312	* bytes, but there was no performance improvement) */
	313	for (++s; s < send; s++) {
	314	uv = UTF8_ACCUMULATE(uv, *s);
	315	}
	316
	317	return UNI_TO_NATIVE(uv);
	318
	319	}
	320
	321	/*
	322	=for apidoc is_utf8_invariant_string
	323
	324	Returns TRUE if the first C<len> bytes of the string C<s> are the same
	325	regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
	326	EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
	327	are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
	328	the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
	329	characters are invariant, but so also are the C1 controls.
	330
	331	If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
	332	use this option, that C<s> can't have embedded C<NUL> characters and has to
	333	have a terminating C<NUL> byte).
	334
	335	See also
	336	C<L</is_utf8_string>>,
	337	C<L</is_utf8_string_flags>>,
	338	C<L</is_utf8_string_loc>>,
	339	C<L</is_utf8_string_loc_flags>>,
	340	C<L</is_utf8_string_loclen>>,
	341	C<L</is_utf8_string_loclen_flags>>,
	342	C<L</is_utf8_fixed_width_buf_flags>>,
	343	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	344	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	345	C<L</is_strict_utf8_string>>,
	346	C<L</is_strict_utf8_string_loc>>,
	347	C<L</is_strict_utf8_string_loclen>>,
	348	C<L</is_c9strict_utf8_string>>,
	349	C<L</is_c9strict_utf8_string_loc>>,
	350	and
	351	C<L</is_c9strict_utf8_string_loclen>>.
	352
	353	=cut
	354
	355	*/
	356
	357	#define is_utf8_invariant_string(s, len) \
	358	is_utf8_invariant_string_loc(s, len, NULL)
	359
	360	/*
	361	=for apidoc is_utf8_invariant_string_loc
	362
	363	Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
	364	the first UTF-8 variant character in the C<ep> pointer; if all characters are
	365	UTF-8 invariant, this function does not change the contents of C<*ep>.
	366
	367	=cut
	368
	369	*/
	370
	371	PERL_STATIC_INLINE bool
	372	S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
	373	{
	374	const U8* send;
	375	const U8* x = s;
	376
	377	PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
	378
	379	if (len == 0) {
	380	len = strlen((const char *)s);
	381	}
	382
	383	send = s + len;
	384
	385	#ifndef EBCDIC
	386
	387	/* This looks like 0x010101... */
	388	#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
	389
	390	/* This looks like 0x808080... */
	391	#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
	392	#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
	393	#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
	394
	395	/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
	396	* or'ing together the lowest bits of 'x'. Hopefully the final term gets
	397	* optimized out completely on a 32-bit system, and its mask gets optimized out
	398	* on a 64-bit system */
	399	#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
	400	\| ( PTR2nat(x) >> 1) \
	401	\| ( ( (PTR2nat(x) \
	402	& PERL_WORD_BOUNDARY_MASK) >> 2))))
	403
	404	/* Do the word-at-a-time iff there is at least one usable full word. That
	405	* means that after advancing to a word boundary, there still is at least a
	406	* full word left. The number of bytes needed to advance is 'wordsize -
	407	* offset' unless offset is 0. */
	408	if ((STRLEN) (send - x) >= PERL_WORDSIZE
	409
	410	/* This term is wordsize if subword; 0 if not */
	411	+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
	412
	413	/* 'offset' */
	414	- (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
	415	{
	416
	417	/* Process per-byte until reach word boundary. XXX This loop could be
	418	* eliminated if we knew that this platform had fast unaligned reads */
	419	while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
	420	if (! UTF8_IS_INVARIANT(*x)) {
	421	if (ep) {
	422	*ep = x;
	423	}
	424
	425	return FALSE;
	426	}
	427	x++;
	428	}
	429
	430	/* Here, we know we have at least one full word to process. Process
	431	* per-word as long as we have at least a full word left */
	432	do {
	433	if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
	434
	435	/* Found a variant. Just return if caller doesn't want its
	436	* exact position */
	437	if (! ep) {
	438	return FALSE;
	439	}
	440
	441	#if BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 \
	442	\|\| BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
	443
	444	ep = x + _variant_byte_number( (PERL_UINTMAX_T *) x);
	445	assert(ep >= s && ep < send);
	446
	447	return FALSE;
	448
	449	#else /* If weird byte order, drop into next loop to do byte-at-a-time
	450	checks. */
	451
	452	break;
	453	#endif
	454	}
	455
	456	x += PERL_WORDSIZE;
	457
	458	} while (x + PERL_WORDSIZE <= send);
	459	}
	460
	461	#endif /* End of ! EBCDIC */
	462
	463	/* Process per-byte */
	464	while (x < send) {
	465	if (! UTF8_IS_INVARIANT(*x)) {
	466	if (ep) {
	467	*ep = x;
	468	}
	469
	470	return FALSE;
	471	}
	472
	473	x++;
	474	}
	475
	476	return TRUE;
	477	}
	478
	479	PERL_STATIC_INLINE unsigned int
	480	S__variant_byte_number(PERL_UINTMAX_T word)
	481	{
	482
	483	/* This returns the position in a word (0..7) of the first variant byte in
	484	* it. This is a helper function. Note that there are no branches */
	485
	486	assert(word);
	487
	488	/* Get just the msb bits of each byte */
	489	word &= PERL_VARIANTS_WORD_MASK;
	490
	491	# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
	492	easiest thing is to hide that from the callers */
	493	{
	494	unsigned int i;
	495	const U8 * s = (U8 *) &word;
	496	dTHX;
	497
	498	for (i = 0; i < sizeof(word); i++ ) {
	499	if (s[i]) {
	500	return i;
	501	}
	502	}
	503
	504	Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
	505	__FILE__, __LINE__);
	506	}
	507
	508	# elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678
	509
	510	/* Bytes are stored like
	511	* Byte8 ... Byte2 Byte1
	512	* 63..56...15...8 7...0
	513	*
	514	* Isolate the lsb;
	515	* https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
	516	*
	517	* The word will look this this, with a rightmost set bit in position 's':
	518	* ('x's are don't cares)
	519	* s
	520	* x..x100..0
	521	* x..xx10..0 Right shift (rightmost 0 is shifted off)
	522	* x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
	523	* the 1 just to their left into a 0; the remainder is
	524	* untouched
	525	* 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
	526	* bottom to all 1
	527	* 0..0100..0 Add 1 to clear the word except for the bit in 's'
	528	*
	529	* Another method is to do 'word &= -word'; but it generates a compiler
	530	* message on some platforms about taking the negative of an unsigned */
	531
	532	word >>= 1;
	533	word = 1 + (word ^ (word - 1));
	534
	535	# elif BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
	536
	537	/* Bytes are stored like
	538	* Byte1 Byte2 ... Byte8
	539	* 63..56 55..47 ... 7...0
	540	*
	541	* Isolate the msb; http://codeforces.com/blog/entry/10330
	542	*
	543	* Only the most significant set bit matters. Or'ing word with its right
	544	* shift of 1 makes that bit and the next one to its right both 1. Then
	545	* right shifting by 2 makes for 4 1-bits in a row. ... We end with the
	546	* msb and all to the right being 1. */
	547	word \|= word >> 1;
	548	word \|= word >> 2;
	549	word \|= word >> 4;
	550	word \|= word >> 8;
	551	word \|= word >> 16;
	552	word \|= word >> 32; /* This should get optimized out on 32-bit systems. */
	553
	554	/* Then subtracting the right shift by 1 clears all but the left-most of
	555	* the 1 bits, which is our desired result */
	556	word -= (word >> 1);
	557
	558	# else
	559	# error Unexpected byte order
	560	# endif
	561
	562	/* Here 'word' has a single bit set: the msb of the first byte in which it
	563	* is set. Calculate that position in the word. We can use this
	564	* specialized solution: https://stackoverflow.com/a/32339674/1626653,
	565	* assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
	566	* just get shifted off at compile time) */
	567	word = (word >> 7) * ((UINTMAX_C( 7) << 56) \| (UINTMAX_C(15) << 48)
	568	\| (UINTMAX_C(23) << 40) \| (UINTMAX_C(31) << 32)
	569	\| (39 << 24) \| (47 << 16)
	570	\| (55 << 8) \| (63 << 0));
	571	word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
	572
	573	/* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
	574	word = ((word + 1) >> 3) - 1;
	575
	576	# if BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
	577
	578	/* And invert the result */
	579	word = CHARBITS - word - 1;
	580
	581	# endif
	582
	583	return (unsigned int) word;
	584	}
	585
	586	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	587
	588	/*
	589	=for apidoc variant_under_utf8_count
	590
	591	This function looks at the sequence of bytes between C<s> and C<e>, which are
	592	assumed to be encoded in ASCII/Latin1, and returns how many of them would
	593	change should the string be translated into UTF-8. Due to the nature of UTF-8,
	594	each of these would occupy two bytes instead of the single one in the input
	595	string. Thus, this function returns the precise number of bytes the string
	596	would expand by when translated to UTF-8.
	597
	598	Unlike most of the other functions that have C<utf8> in their name, the input
	599	to this function is NOT a UTF-8-encoded string. The function name is slightly
	600	I<odd> to emphasize this.
	601
	602	This function is internal to Perl because khw thinks that any XS code that
	603	would want this is probably operating too close to the internals. Presenting a
	604	valid use case could change that.
	605
	606	See also
	607	C<L<perlapi/is_utf8_invariant_string>>
	608	and
	609	C<L<perlapi/is_utf8_invariant_string_loc>>,
	610
	611	=cut
	612
	613	*/
	614
	615	PERL_STATIC_INLINE Size_t
	616	S_variant_under_utf8_count(const U8* const s, const U8* const e)
	617	{
	618	const U8* x = s;
	619	Size_t count = 0;
	620
	621	PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
	622
	623	# ifndef EBCDIC
	624
	625	/* Test if the string is long enough to use word-at-a-time. (Logic is the
	626	* same as for is_utf8_invariant_string()) */
	627	if ((STRLEN) (e - x) >= PERL_WORDSIZE
	628	+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
	629	- (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
	630	{
	631
	632	/* Process per-byte until reach word boundary. XXX This loop could be
	633	* eliminated if we knew that this platform had fast unaligned reads */
	634	while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
	635	count += ! UTF8_IS_INVARIANT(*x++);
	636	}
	637
	638	/* Process per-word as long as we have at least a full word left */
	639	do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
	640	explanation of how this works */
	641	count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
	642	* PERL_COUNT_MULTIPLIER)
	643	>> ((PERL_WORDSIZE - 1) * CHARBITS);
	644	x += PERL_WORDSIZE;
	645	} while (x + PERL_WORDSIZE <= e);
	646	}
	647
	648	# endif
	649
	650	/* Process per-byte */
	651	while (x < e) {
	652	if (! UTF8_IS_INVARIANT(*x)) {
	653	count++;
	654	}
	655
	656	x++;
	657	}
	658
	659	return count;
	660	}
	661
	662	#endif
	663
	664	#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
	665	# undef PERL_WORDSIZE
	666	# undef PERL_COUNT_MULTIPLIER
	667	# undef PERL_WORD_BOUNDARY_MASK
	668	# undef PERL_VARIANTS_WORD_MASK
	669	#endif
	670
	671	/*
	672	=for apidoc is_utf8_string
	673
	674	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	675	Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
	676	be calculated using C<strlen(s)> (which means if you use this option, that C<s>
	677	can't have embedded C<NUL> characters and has to have a terminating C<NUL>
	678	byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
	679
	680	This function considers Perl's extended UTF-8 to be valid. That means that
	681	code points above Unicode, surrogates, and non-character code points are
	682	considered valid by this function. Use C<L</is_strict_utf8_string>>,
	683	C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
	684	code points are considered valid.
	685
	686	See also
	687	C<L</is_utf8_invariant_string>>,
	688	C<L</is_utf8_invariant_string_loc>>,
	689	C<L</is_utf8_string_loc>>,
	690	C<L</is_utf8_string_loclen>>,
	691	C<L</is_utf8_fixed_width_buf_flags>>,
	692	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	693	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	694
	695	=cut
	696	*/
	697
	698	#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
	699
	700	#if defined(PERL_CORE) \|\| defined (PERL_EXT)
	701
	702	/*
	703	=for apidoc is_utf8_non_invariant_string
	704
	705	Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
	706	C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
	707	UTF-8; otherwise returns FALSE.
	708
	709	A TRUE return means that at least one code point represented by the sequence
	710	either is a wide character not representable as a single byte, or the
	711	representation differs depending on whether the sequence is encoded in UTF-8 or
	712	not.
	713
	714	See also
	715	C<L<perlapi/is_utf8_invariant_string>>,
	716	C<L<perlapi/is_utf8_string>>
	717
	718	=cut
	719
	720	This is commonly used to determine if a SV's UTF-8 flag should be turned on.
	721	It generally needn't be if its string is entirely UTF-8 invariant, and it
	722	shouldn't be if it otherwise contains invalid UTF-8.
	723
	724	It is an internal function because khw thinks that XS code shouldn't be working
	725	at this low a level. A valid use case could change that.
	726
	727	*/
	728
	729	PERL_STATIC_INLINE bool
	730	S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
	731	{
	732	const U8 * first_variant;
	733
	734	PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
	735
	736	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	737	return FALSE;
	738	}
	739
	740	return is_utf8_string(first_variant, len - (first_variant - s));
	741	}
	742
	743	#endif
	744
	745	/*
	746	=for apidoc is_strict_utf8_string
	747
	748	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	749	UTF-8-encoded string that is fully interchangeable by any application using
	750	Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
	751	calculated using C<strlen(s)> (which means if you use this option, that C<s>
	752	can't have embedded C<NUL> characters and has to have a terminating C<NUL>
	753	byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
	754
	755	This function returns FALSE for strings containing any
	756	code points above the Unicode max of 0x10FFFF, surrogate code points, or
	757	non-character code points.
	758
	759	See also
	760	C<L</is_utf8_invariant_string>>,
	761	C<L</is_utf8_invariant_string_loc>>,
	762	C<L</is_utf8_string>>,
	763	C<L</is_utf8_string_flags>>,
	764	C<L</is_utf8_string_loc>>,
	765	C<L</is_utf8_string_loc_flags>>,
	766	C<L</is_utf8_string_loclen>>,
	767	C<L</is_utf8_string_loclen_flags>>,
	768	C<L</is_utf8_fixed_width_buf_flags>>,
	769	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	770	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	771	C<L</is_strict_utf8_string_loc>>,
	772	C<L</is_strict_utf8_string_loclen>>,
	773	C<L</is_c9strict_utf8_string>>,
	774	C<L</is_c9strict_utf8_string_loc>>,
	775	and
	776	C<L</is_c9strict_utf8_string_loclen>>.
	777
	778	=cut
	779	*/
	780
	781	#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
	782
	783	/*
	784	=for apidoc is_c9strict_utf8_string
	785
	786	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	787	UTF-8-encoded string that conforms to
	788	L<Unicode Corrigendum #9\|http://www.unicode.org/versions/corrigendum9.html>;
	789	otherwise it returns FALSE. If C<len> is 0, it will be calculated using
	790	C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
	791	C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
	792	characters being ASCII constitute 'a valid UTF-8 string'.
	793
	794	This function returns FALSE for strings containing any code points above the
	795	Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
	796	code points per
	797	L<Corrigendum #9\|http://www.unicode.org/versions/corrigendum9.html>.
	798
	799	See also
	800	C<L</is_utf8_invariant_string>>,
	801	C<L</is_utf8_invariant_string_loc>>,
	802	C<L</is_utf8_string>>,
	803	C<L</is_utf8_string_flags>>,
	804	C<L</is_utf8_string_loc>>,
	805	C<L</is_utf8_string_loc_flags>>,
	806	C<L</is_utf8_string_loclen>>,
	807	C<L</is_utf8_string_loclen_flags>>,
	808	C<L</is_utf8_fixed_width_buf_flags>>,
	809	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	810	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	811	C<L</is_strict_utf8_string>>,
	812	C<L</is_strict_utf8_string_loc>>,
	813	C<L</is_strict_utf8_string_loclen>>,
	814	C<L</is_c9strict_utf8_string_loc>>,
	815	and
	816	C<L</is_c9strict_utf8_string_loclen>>.
	817
	818	=cut
	819	*/
	820
	821	#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
	822
	823	/*
	824	=for apidoc is_utf8_string_flags
	825
	826	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	827	UTF-8 string, subject to the restrictions imposed by C<flags>;
	828	returns FALSE otherwise. If C<len> is 0, it will be calculated
	829	using C<strlen(s)> (which means if you use this option, that C<s> can't have
	830	embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
	831	that all characters being ASCII constitute 'a valid UTF-8 string'.
	832
	833	If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
	834	C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
	835	as C<L</is_strict_utf8_string>>; and if C<flags> is
	836	C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
	837	C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
	838	combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
	839	C<L</utf8n_to_uvchr>>, with the same meanings.
	840
	841	See also
	842	C<L</is_utf8_invariant_string>>,
	843	C<L</is_utf8_invariant_string_loc>>,
	844	C<L</is_utf8_string>>,
	845	C<L</is_utf8_string_loc>>,
	846	C<L</is_utf8_string_loc_flags>>,
	847	C<L</is_utf8_string_loclen>>,
	848	C<L</is_utf8_string_loclen_flags>>,
	849	C<L</is_utf8_fixed_width_buf_flags>>,
	850	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	851	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	852	C<L</is_strict_utf8_string>>,
	853	C<L</is_strict_utf8_string_loc>>,
	854	C<L</is_strict_utf8_string_loclen>>,
	855	C<L</is_c9strict_utf8_string>>,
	856	C<L</is_c9strict_utf8_string_loc>>,
	857	and
	858	C<L</is_c9strict_utf8_string_loclen>>.
	859
	860	=cut
	861	*/
	862
	863	PERL_STATIC_INLINE bool
	864	S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
	865	{
	866	const U8 * first_variant;
	867
	868	PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
	869	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	870	\|UTF8_DISALLOW_PERL_EXTENDED)));
	871
	872	if (len == 0) {
	873	len = strlen((const char *)s);
	874	}
	875
	876	if (flags == 0) {
	877	return is_utf8_string(s, len);
	878	}
	879
	880	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	881	== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
	882	{
	883	return is_strict_utf8_string(s, len);
	884	}
	885
	886	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	887	== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
	888	{
	889	return is_c9strict_utf8_string(s, len);
	890	}
	891
	892	if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
	893	const U8* const send = s + len;
	894	const U8* x = first_variant;
	895
	896	while (x < send) {
	897	STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
	898	if (UNLIKELY(! cur_len)) {
	899	return FALSE;
	900	}
	901	x += cur_len;
	902	}
	903	}
	904
	905	return TRUE;
	906	}
	907
	908	/*
	909
	910	=for apidoc is_utf8_string_loc
	911
	912	Like C<L</is_utf8_string>> but stores the location of the failure (in the
	913	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	914	"utf8ness success") in the C<ep> pointer.
	915
	916	See also C<L</is_utf8_string_loclen>>.
	917
	918	=cut
	919	*/
	920
	921	#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
	922
	923	/*
	924
	925	=for apidoc is_utf8_string_loclen
	926
	927	Like C<L</is_utf8_string>> but stores the location of the failure (in the
	928	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	929	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	930	encoded characters in the C<el> pointer.
	931
	932	See also C<L</is_utf8_string_loc>>.
	933
	934	=cut
	935	*/
	936
	937	PERL_STATIC_INLINE bool
	938	Perl_is_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
	939	{
	940	const U8 * first_variant;
	941
	942	PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
	943
	944	if (len == 0) {
	945	len = strlen((const char *) s);
	946	}
	947
	948	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	949	if (el)
	950	*el = len;
	951
	952	if (ep) {
	953	*ep = s + len;
	954	}
	955
	956	return TRUE;
	957	}
	958
	959	{
	960	const U8* const send = s + len;
	961	const U8* x = first_variant;
	962	STRLEN outlen = first_variant - s;
	963
	964	while (x < send) {
	965	const STRLEN cur_len = isUTF8_CHAR(x, send);
	966	if (UNLIKELY(! cur_len)) {
	967	break;
	968	}
	969	x += cur_len;
	970	outlen++;
	971	}
	972
	973	if (el)
	974	*el = outlen;
	975
	976	if (ep) {
	977	*ep = x;
	978	}
	979
	980	return (x == send);
	981	}
	982	}
	983
	984	/*
	985
	986	=for apidoc is_strict_utf8_string_loc
	987
	988	Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
	989	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	990	"utf8ness success") in the C<ep> pointer.
	991
	992	See also C<L</is_strict_utf8_string_loclen>>.
	993
	994	=cut
	995	*/
	996
	997	#define is_strict_utf8_string_loc(s, len, ep) \
	998	is_strict_utf8_string_loclen(s, len, ep, 0)
	999
	1000	/*
	1001
	1002	=for apidoc is_strict_utf8_string_loclen
	1003
	1004	Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
	1005	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1006	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	1007	encoded characters in the C<el> pointer.
	1008
	1009	See also C<L</is_strict_utf8_string_loc>>.
	1010
	1011	=cut
	1012	*/
	1013
	1014	PERL_STATIC_INLINE bool
	1015	S_is_strict_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
	1016	{
	1017	const U8 * first_variant;
	1018
	1019	PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
	1020
	1021	if (len == 0) {
	1022	len = strlen((const char *) s);
	1023	}
	1024
	1025	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	1026	if (el)
	1027	*el = len;
	1028
	1029	if (ep) {
	1030	*ep = s + len;
	1031	}
	1032
	1033	return TRUE;
	1034	}
	1035
	1036	{
	1037	const U8* const send = s + len;
	1038	const U8* x = first_variant;
	1039	STRLEN outlen = first_variant - s;
	1040
	1041	while (x < send) {
	1042	const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
	1043	if (UNLIKELY(! cur_len)) {
	1044	break;
	1045	}
	1046	x += cur_len;
	1047	outlen++;
	1048	}
	1049
	1050	if (el)
	1051	*el = outlen;
	1052
	1053	if (ep) {
	1054	*ep = x;
	1055	}
	1056
	1057	return (x == send);
	1058	}
	1059	}
	1060
	1061	/*
	1062
	1063	=for apidoc is_c9strict_utf8_string_loc
	1064
	1065	Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
	1066	the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1067	"utf8ness success") in the C<ep> pointer.
	1068
	1069	See also C<L</is_c9strict_utf8_string_loclen>>.
	1070
	1071	=cut
	1072	*/
	1073
	1074	#define is_c9strict_utf8_string_loc(s, len, ep) \
	1075	is_c9strict_utf8_string_loclen(s, len, ep, 0)
	1076
	1077	/*
	1078
	1079	=for apidoc is_c9strict_utf8_string_loclen
	1080
	1081	Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
	1082	the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1083	"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
	1084	characters in the C<el> pointer.
	1085
	1086	See also C<L</is_c9strict_utf8_string_loc>>.
	1087
	1088	=cut
	1089	*/
	1090
	1091	PERL_STATIC_INLINE bool
	1092	S_is_c9strict_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
	1093	{
	1094	const U8 * first_variant;
	1095
	1096	PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
	1097
	1098	if (len == 0) {
	1099	len = strlen((const char *) s);
	1100	}
	1101
	1102	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	1103	if (el)
	1104	*el = len;
	1105
	1106	if (ep) {
	1107	*ep = s + len;
	1108	}
	1109
	1110	return TRUE;
	1111	}
	1112
	1113	{
	1114	const U8* const send = s + len;
	1115	const U8* x = first_variant;
	1116	STRLEN outlen = first_variant - s;
	1117
	1118	while (x < send) {
	1119	const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
	1120	if (UNLIKELY(! cur_len)) {
	1121	break;
	1122	}
	1123	x += cur_len;
	1124	outlen++;
	1125	}
	1126
	1127	if (el)
	1128	*el = outlen;
	1129
	1130	if (ep) {
	1131	*ep = x;
	1132	}
	1133
	1134	return (x == send);
	1135	}
	1136	}
	1137
	1138	/*
	1139
	1140	=for apidoc is_utf8_string_loc_flags
	1141
	1142	Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
	1143	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1144	"utf8ness success") in the C<ep> pointer.
	1145
	1146	See also C<L</is_utf8_string_loclen_flags>>.
	1147
	1148	=cut
	1149	*/
	1150
	1151	#define is_utf8_string_loc_flags(s, len, ep, flags) \
	1152	is_utf8_string_loclen_flags(s, len, ep, 0, flags)
	1153
	1154
	1155	/* The above 3 actual functions could have been moved into the more general one
	1156	* just below, and made #defines that call it with the right 'flags'. They are
	1157	* currently kept separate to increase their chances of getting inlined */
	1158
	1159	/*
	1160
	1161	=for apidoc is_utf8_string_loclen_flags
	1162
	1163	Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
	1164	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1165	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	1166	encoded characters in the C<el> pointer.
	1167
	1168	See also C<L</is_utf8_string_loc_flags>>.
	1169
	1170	=cut
	1171	*/
	1172
	1173	PERL_STATIC_INLINE bool
	1174	S_is_utf8_string_loclen_flags(const U8 s, STRLEN len, const U8 ep, STRLEN el, const U32 flags)
	1175	{
	1176	const U8 * first_variant;
	1177
	1178	PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
	1179	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	1180	\|UTF8_DISALLOW_PERL_EXTENDED)));
	1181
	1182	if (len == 0) {
	1183	len = strlen((const char *) s);
	1184	}
	1185
	1186	if (flags == 0) {
	1187	return is_utf8_string_loclen(s, len, ep, el);
	1188	}
	1189
	1190	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	1191	== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
	1192	{
	1193	return is_strict_utf8_string_loclen(s, len, ep, el);
	1194	}
	1195
	1196	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	1197	== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
	1198	{
	1199	return is_c9strict_utf8_string_loclen(s, len, ep, el);
	1200	}
	1201
	1202	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	1203	if (el)
	1204	*el = len;
	1205
	1206	if (ep) {
	1207	*ep = s + len;
	1208	}
	1209
	1210	return TRUE;
	1211	}
	1212
	1213	{
	1214	const U8* send = s + len;
	1215	const U8* x = first_variant;
	1216	STRLEN outlen = first_variant - s;
	1217
	1218	while (x < send) {
	1219	const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
	1220	if (UNLIKELY(! cur_len)) {
	1221	break;
	1222	}
	1223	x += cur_len;
	1224	outlen++;
	1225	}
	1226
	1227	if (el)
	1228	*el = outlen;
	1229
	1230	if (ep) {
	1231	*ep = x;
	1232	}
	1233
	1234	return (x == send);
	1235	}
	1236	}
	1237
	1238	/*
	1239	=for apidoc utf8_distance
	1240
	1241	Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
	1242	and C<b>.
	1243
	1244	WARNING: use only if you know that the pointers point inside the
	1245	same UTF-8 buffer.
	1246
	1247	=cut
	1248	*/
	1249
	1250	PERL_STATIC_INLINE IV
	1251	Perl_utf8_distance(pTHX_ const U8 a, const U8 b)
	1252	{
	1253	PERL_ARGS_ASSERT_UTF8_DISTANCE;
	1254
	1255	return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
	1256	}
	1257
	1258	/*
	1259	=for apidoc utf8_hop
	1260
	1261	Return the UTF-8 pointer C<s> displaced by C<off> characters, either
	1262	forward or backward.
	1263
	1264	WARNING: do not use the following unless you know C<off> is within
	1265	the UTF-8 data pointed to by C<s> and that on entry C<s> is aligned
	1266	on the first byte of character or just after the last byte of a character.
	1267
	1268	=cut
	1269	*/
	1270
	1271	PERL_STATIC_INLINE U8 *
	1272	Perl_utf8_hop(const U8 *s, SSize_t off)
	1273	{
	1274	PERL_ARGS_ASSERT_UTF8_HOP;
	1275
	1276	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1277	* the bitops (especially ~) can create illegal UTF-8.
	1278	* In other words: in Perl UTF-8 is not just for Unicode. */
	1279
	1280	if (off >= 0) {
	1281	while (off--)
	1282	s += UTF8SKIP(s);
	1283	}
	1284	else {
	1285	while (off++) {
	1286	s--;
	1287	while (UTF8_IS_CONTINUATION(*s))
	1288	s--;
	1289	}
	1290	}
	1291	GCC_DIAG_IGNORE_STMT(-Wcast-qual);
	1292	return (U8 *)s;
	1293	GCC_DIAG_RESTORE_STMT;
	1294	}
	1295
	1296	/*
	1297	=for apidoc utf8_hop_forward
	1298
	1299	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	1300	forward.
	1301
	1302	C<off> must be non-negative.
	1303
	1304	C<s> must be before or equal to C<end>.
	1305
	1306	When moving forward it will not move beyond C<end>.
	1307
	1308	Will not exceed this limit even if the string is not valid "UTF-8".
	1309
	1310	=cut
	1311	*/
	1312
	1313	PERL_STATIC_INLINE U8 *
	1314	Perl_utf8_hop_forward(const U8 s, SSize_t off, const U8 end)
	1315	{
	1316	PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
	1317
	1318	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1319	* the bitops (especially ~) can create illegal UTF-8.
	1320	* In other words: in Perl UTF-8 is not just for Unicode. */
	1321
	1322	assert(s <= end);
	1323	assert(off >= 0);
	1324
	1325	while (off--) {
	1326	STRLEN skip = UTF8SKIP(s);
	1327	if ((STRLEN)(end - s) <= skip) {
	1328	GCC_DIAG_IGNORE_STMT(-Wcast-qual);
	1329	return (U8 *)end;
	1330	GCC_DIAG_RESTORE_STMT;
	1331	}
	1332	s += skip;
	1333	}
	1334
	1335	GCC_DIAG_IGNORE_STMT(-Wcast-qual);
	1336	return (U8 *)s;
	1337	GCC_DIAG_RESTORE_STMT;
	1338	}
	1339
	1340	/*
	1341	=for apidoc utf8_hop_back
	1342
	1343	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	1344	backward.
	1345
	1346	C<off> must be non-positive.
	1347
	1348	C<s> must be after or equal to C<start>.
	1349
	1350	When moving backward it will not move before C<start>.
	1351
	1352	Will not exceed this limit even if the string is not valid "UTF-8".
	1353
	1354	=cut
	1355	*/
	1356
	1357	PERL_STATIC_INLINE U8 *
	1358	Perl_utf8_hop_back(const U8 s, SSize_t off, const U8 start)
	1359	{
	1360	PERL_ARGS_ASSERT_UTF8_HOP_BACK;
	1361
	1362	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1363	* the bitops (especially ~) can create illegal UTF-8.
	1364	* In other words: in Perl UTF-8 is not just for Unicode. */
	1365
	1366	assert(start <= s);
	1367	assert(off <= 0);
	1368
	1369	while (off++ && s > start) {
	1370	s--;
	1371	while (UTF8_IS_CONTINUATION(*s) && s > start)
	1372	s--;
	1373	}
	1374
	1375	GCC_DIAG_IGNORE_STMT(-Wcast-qual);
	1376	return (U8 *)s;
	1377	GCC_DIAG_RESTORE_STMT;
	1378	}
	1379
	1380	/*
	1381	=for apidoc utf8_hop_safe
	1382
	1383	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	1384	either forward or backward.
	1385
	1386	When moving backward it will not move before C<start>.
	1387
	1388	When moving forward it will not move beyond C<end>.
	1389
	1390	Will not exceed those limits even if the string is not valid "UTF-8".
	1391
	1392	=cut
	1393	*/
	1394
	1395	PERL_STATIC_INLINE U8 *
	1396	Perl_utf8_hop_safe(const U8 s, SSize_t off, const U8 start, const U8 *end)
	1397	{
	1398	PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
	1399
	1400	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1401	* the bitops (especially ~) can create illegal UTF-8.
	1402	* In other words: in Perl UTF-8 is not just for Unicode. */
	1403
	1404	assert(start <= s && s <= end);
	1405
	1406	if (off >= 0) {
	1407	return utf8_hop_forward(s, off, end);
	1408	}
	1409	else {
	1410	return utf8_hop_back(s, off, start);
	1411	}
	1412	}
	1413
	1414	/*
	1415
	1416	=for apidoc is_utf8_valid_partial_char
	1417
	1418	Returns 0 if the sequence of bytes starting at C<s> and looking no further than
	1419	S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
	1420	points. Otherwise, it returns 1 if there exists at least one non-empty
	1421	sequence of bytes that when appended to sequence C<s>, starting at position
	1422	C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
	1423	otherwise returns 0.
	1424
	1425	In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
	1426	point.
	1427
	1428	This is useful when a fixed-length buffer is being tested for being well-formed
	1429	UTF-8, but the final few bytes in it don't comprise a full character; that is,
	1430	it is split somewhere in the middle of the final code point's UTF-8
	1431	representation. (Presumably when the buffer is refreshed with the next chunk
	1432	of data, the new first bytes will complete the partial code point.) This
	1433	function is used to verify that the final bytes in the current buffer are in
	1434	fact the legal beginning of some code point, so that if they aren't, the
	1435	failure can be signalled without having to wait for the next read.
	1436
	1437	=cut
	1438	*/
	1439	#define is_utf8_valid_partial_char(s, e) \
	1440	is_utf8_valid_partial_char_flags(s, e, 0)
	1441
	1442	/*
	1443
	1444	=for apidoc is_utf8_valid_partial_char_flags
	1445
	1446	Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
	1447	or not the input is a valid UTF-8 encoded partial character, but it takes an
	1448	extra parameter, C<flags>, which can further restrict which code points are
	1449	considered valid.
	1450
	1451	If C<flags> is 0, this behaves identically to
	1452	C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
	1453	of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
	1454	there is any sequence of bytes that can complete the input partial character in
	1455	such a way that a non-prohibited character is formed, the function returns
	1456	TRUE; otherwise FALSE. Non character code points cannot be determined based on
	1457	partial character input. But many of the other possible excluded types can be
	1458	determined from just the first one or two bytes.
	1459
	1460	=cut
	1461	*/
	1462
	1463	PERL_STATIC_INLINE bool
	1464	S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
	1465	{
	1466	PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
	1467
	1468	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	1469	\|UTF8_DISALLOW_PERL_EXTENDED)));
	1470
	1471	if (s >= e \|\| s + UTF8SKIP(s) <= e) {
	1472	return FALSE;
	1473	}
	1474
	1475	return cBOOL(_is_utf8_char_helper(s, e, flags));
	1476	}
	1477
	1478	/*
	1479
	1480	=for apidoc is_utf8_fixed_width_buf_flags
	1481
	1482	Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
	1483	is entirely valid UTF-8, subject to the restrictions given by C<flags>;
	1484	otherwise it returns FALSE.
	1485
	1486	If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
	1487	without restriction. If the final few bytes of the buffer do not form a
	1488	complete code point, this will return TRUE anyway, provided that
	1489	C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
	1490
	1491	If C<flags> in non-zero, it can be any combination of the
	1492	C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
	1493	same meanings.
	1494
	1495	This function differs from C<L</is_utf8_string_flags>> only in that the latter
	1496	returns FALSE if the final few bytes of the string don't form a complete code
	1497	point.
	1498
	1499	=cut
	1500	*/
	1501	#define is_utf8_fixed_width_buf_flags(s, len, flags) \
	1502	is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
	1503
	1504	/*
	1505
	1506	=for apidoc is_utf8_fixed_width_buf_loc_flags
	1507
	1508	Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
	1509	failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
	1510	to the beginning of any partial character at the end of the buffer; if there is
	1511	no partial character C<*ep> will contain C<s>+C<len>.
	1512
	1513	See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
	1514
	1515	=cut
	1516	*/
	1517
	1518	#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
	1519	is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
	1520
	1521	/*
	1522
	1523	=for apidoc is_utf8_fixed_width_buf_loclen_flags
	1524
	1525	Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
	1526	complete, valid characters found in the C<el> pointer.
	1527
	1528	=cut
	1529	*/
	1530
	1531	PERL_STATIC_INLINE bool
	1532	S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
	1533	STRLEN len,
	1534	const U8 **ep,
	1535	STRLEN *el,
	1536	const U32 flags)
	1537	{
	1538	const U8 * maybe_partial;
	1539
	1540	PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
	1541
	1542	if (! ep) {
	1543	ep = &maybe_partial;
	1544	}
	1545
	1546	/* If it's entirely valid, return that; otherwise see if the only error is
	1547	* that the final few bytes are for a partial character */
	1548	return is_utf8_string_loclen_flags(s, len, ep, el, flags)
	1549	\|\| is_utf8_valid_partial_char_flags(*ep, s + len, flags);
	1550	}
	1551
	1552	/* ------------------------------- perl.h ----------------------------- */
	1553
	1554	/*
	1555	=head1 Miscellaneous Functions
	1556
	1557	=for apidoc AiR\|bool\|is_safe_syscall\|const char pv\|STRLEN len\|const char what\|const char *op_name
	1558
	1559	Test that the given C<pv> doesn't contain any internal C<NUL> characters.
	1560	If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
	1561
	1562	Return TRUE if the name is safe.
	1563
	1564	Used by the C<IS_SAFE_SYSCALL()> macro.
	1565
	1566	=cut
	1567	*/
	1568
	1569	PERL_STATIC_INLINE bool
	1570	S_is_safe_syscall(pTHX_ const char pv, STRLEN len, const char what, const char *op_name) {
	1571	/* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
	1572	* perl itself uses xce*() functions which accept 8-bit strings.
	1573	*/
	1574
	1575	PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
	1576
	1577	if (len > 1) {
	1578	char *null_at;
	1579	if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
	1580	SETERRNO(ENOENT, LIB_INVARG);
	1581	Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
	1582	"Invalid \\0 character in %s for %s: %s\\0%s",
	1583	what, op_name, pv, null_at+1);
	1584	return FALSE;
	1585	}
	1586	}
	1587
	1588	return TRUE;
	1589	}
	1590
	1591	/*
	1592
	1593	Return true if the supplied filename has a newline character
	1594	immediately before the first (hopefully only) NUL.
	1595
	1596	My original look at this incorrectly used the len from SvPV(), but
	1597	that's incorrect, since we allow for a NUL in pv[len-1].
	1598
	1599	So instead, strlen() and work from there.
	1600
	1601	This allow for the user reading a filename, forgetting to chomp it,
	1602	then calling:
	1603
	1604	open my $foo, "$file\0";
	1605
	1606	*/
	1607
	1608	#ifdef PERL_CORE
	1609
	1610	PERL_STATIC_INLINE bool
	1611	S_should_warn_nl(const char *pv) {
	1612	STRLEN len;
	1613
	1614	PERL_ARGS_ASSERT_SHOULD_WARN_NL;
	1615
	1616	len = strlen(pv);
	1617
	1618	return len > 0 && pv[len-1] == '\n';
	1619	}
	1620
	1621	#endif
	1622
	1623	/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
	1624
	1625	#define MAX_CHARSET_NAME_LENGTH 2
	1626
	1627	PERL_STATIC_INLINE const char *
	1628	get_regex_charset_name(const U32 flags, STRLEN* const lenp)
	1629	{
	1630	/* Returns a string that corresponds to the name of the regex character set
	1631	* given by 'flags', and *lenp is set the length of that string, which
	1632	* cannot exceed MAX_CHARSET_NAME_LENGTH characters */
	1633
	1634	*lenp = 1;
	1635	switch (get_regex_charset(flags)) {
	1636	case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
	1637	case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
	1638	case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
	1639	case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
	1640	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	1641	*lenp = 2;
	1642	return ASCII_MORE_RESTRICT_PAT_MODS;
	1643	}
	1644	/* The NOT_REACHED; hides an assert() which has a rather complex
	1645	* definition in perl.h. */
	1646	NOT_REACHED; /* NOTREACHED */
	1647	return "?"; /* Unknown */
	1648	}
	1649
	1650	/*
	1651
	1652	Return false if any get magic is on the SV other than taint magic.
	1653
	1654	*/
	1655
	1656	PERL_STATIC_INLINE bool
	1657	S_sv_only_taint_gmagic(SV *sv) {
	1658	MAGIC *mg = SvMAGIC(sv);
	1659
	1660	PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
	1661
	1662	while (mg) {
	1663	if (mg->mg_type != PERL_MAGIC_taint
	1664	&& !(mg->mg_flags & MGf_GSKIP)
	1665	&& mg->mg_virtual->svt_get) {
	1666	return FALSE;
	1667	}
	1668	mg = mg->mg_moremagic;
	1669	}
	1670
	1671	return TRUE;
	1672	}
	1673
	1674	/* ------------------ cop.h ------------------------------------------- */
	1675
	1676
	1677	/* Enter a block. Push a new base context and return its address. */
	1678
	1679	PERL_STATIC_INLINE PERL_CONTEXT *
	1680	S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
	1681	{
	1682	PERL_CONTEXT * cx;
	1683
	1684	PERL_ARGS_ASSERT_CX_PUSHBLOCK;
	1685
	1686	CXINC;
	1687	cx = CX_CUR();
	1688	cx->cx_type = type;
	1689	cx->blk_gimme = gimme;
	1690	cx->blk_oldsaveix = saveix;
	1691	cx->blk_oldsp = (I32)(sp - PL_stack_base);
	1692	cx->blk_oldcop = PL_curcop;
	1693	cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
	1694	cx->blk_oldscopesp = PL_scopestack_ix;
	1695	cx->blk_oldpm = PL_curpm;
	1696	cx->blk_old_tmpsfloor = PL_tmps_floor;
	1697
	1698	PL_tmps_floor = PL_tmps_ix;
	1699	CX_DEBUG(cx, "PUSH");
	1700	return cx;
	1701	}
	1702
	1703
	1704	/* Exit a block (RETURN and LAST). */
	1705
	1706	PERL_STATIC_INLINE void
	1707	S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
	1708	{
	1709	PERL_ARGS_ASSERT_CX_POPBLOCK;
	1710
	1711	CX_DEBUG(cx, "POP");
	1712	/* these 3 are common to cx_popblock and cx_topblock */
	1713	PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
	1714	PL_scopestack_ix = cx->blk_oldscopesp;
	1715	PL_curpm = cx->blk_oldpm;
	1716
	1717	/* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
	1718	* and leaves a CX entry lying around for repeated use, so
	1719	* skip for multicall */ \
	1720	assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
	1721	\|\| PL_savestack_ix == cx->blk_oldsaveix);
	1722	PL_curcop = cx->blk_oldcop;
	1723	PL_tmps_floor = cx->blk_old_tmpsfloor;
	1724	}
	1725
	1726	/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
	1727	* Whereas cx_popblock() restores the state to the point just before
	1728	* cx_pushblock() was called, cx_topblock() restores it to the point just
	1729	* after cx_pushblock() was called. */
	1730
	1731	PERL_STATIC_INLINE void
	1732	S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
	1733	{
	1734	PERL_ARGS_ASSERT_CX_TOPBLOCK;
	1735
	1736	CX_DEBUG(cx, "TOP");
	1737	/* these 3 are common to cx_popblock and cx_topblock */
	1738	PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
	1739	PL_scopestack_ix = cx->blk_oldscopesp;
	1740	PL_curpm = cx->blk_oldpm;
	1741
	1742	PL_stack_sp = PL_stack_base + cx->blk_oldsp;
	1743	}
	1744
	1745
	1746	PERL_STATIC_INLINE void
	1747	S_cx_pushsub(pTHX_ PERL_CONTEXT cx, CV cv, OP *retop, bool hasargs)
	1748	{
	1749	U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
	1750
	1751	PERL_ARGS_ASSERT_CX_PUSHSUB;
	1752
	1753	PERL_DTRACE_PROBE_ENTRY(cv);
	1754	cx->blk_sub.cv = cv;
	1755	cx->blk_sub.olddepth = CvDEPTH(cv);
	1756	cx->blk_sub.prevcomppad = PL_comppad;
	1757	cx->cx_type \|= (hasargs) ? CXp_HASARGS : 0;
	1758	cx->blk_sub.retop = retop;
	1759	SvREFCNT_inc_simple_void_NN(cv);
	1760	cx->blk_u16 = PL_op->op_private & (phlags\|OPpDEREF);
	1761	}
	1762
	1763
	1764	/* subsets of cx_popsub() */
	1765
	1766	PERL_STATIC_INLINE void
	1767	S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
	1768	{
	1769	CV *cv;
	1770
	1771	PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
	1772	assert(CxTYPE(cx) == CXt_SUB);
	1773
	1774	PL_comppad = cx->blk_sub.prevcomppad;
	1775	PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
	1776	cv = cx->blk_sub.cv;
	1777	CvDEPTH(cv) = cx->blk_sub.olddepth;
	1778	cx->blk_sub.cv = NULL;
	1779	SvREFCNT_dec(cv);
	1780	}
	1781
	1782
	1783	/* handle the @_ part of leaving a sub */
	1784
	1785	PERL_STATIC_INLINE void
	1786	S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
	1787	{
	1788	AV *av;
	1789
	1790	PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
	1791	assert(CxTYPE(cx) == CXt_SUB);
	1792	assert(AvARRAY(MUTABLE_AV(
	1793	PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
	1794	CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
	1795
	1796	CX_POP_SAVEARRAY(cx);
	1797	av = MUTABLE_AV(PAD_SVl(0));
	1798	if (UNLIKELY(AvREAL(av)))
	1799	/* abandon @_ if it got reified */
	1800	clear_defarray(av, 0);
	1801	else {
	1802	CLEAR_ARGARRAY(av);
	1803	}
	1804	}
	1805
	1806
	1807	PERL_STATIC_INLINE void
	1808	S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
	1809	{
	1810	PERL_ARGS_ASSERT_CX_POPSUB;
	1811	assert(CxTYPE(cx) == CXt_SUB);
	1812
	1813	PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
	1814
	1815	if (CxHASARGS(cx))
	1816	cx_popsub_args(cx);
	1817	cx_popsub_common(cx);
	1818	}
	1819
	1820
	1821	PERL_STATIC_INLINE void
	1822	S_cx_pushformat(pTHX_ PERL_CONTEXT cx, CV cv, OP retop, GV gv)
	1823	{
	1824	PERL_ARGS_ASSERT_CX_PUSHFORMAT;
	1825
	1826	cx->blk_format.cv = cv;
	1827	cx->blk_format.retop = retop;
	1828	cx->blk_format.gv = gv;
	1829	cx->blk_format.dfoutgv = PL_defoutgv;
	1830	cx->blk_format.prevcomppad = PL_comppad;
	1831	cx->blk_u16 = 0;
	1832
	1833	SvREFCNT_inc_simple_void_NN(cv);
	1834	CvDEPTH(cv)++;
	1835	SvREFCNT_inc_void(cx->blk_format.dfoutgv);
	1836	}
	1837
	1838
	1839	PERL_STATIC_INLINE void
	1840	S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
	1841	{
	1842	CV *cv;
	1843	GV *dfout;
	1844
	1845	PERL_ARGS_ASSERT_CX_POPFORMAT;
	1846	assert(CxTYPE(cx) == CXt_FORMAT);
	1847
	1848	dfout = cx->blk_format.dfoutgv;
	1849	setdefout(dfout);
	1850	cx->blk_format.dfoutgv = NULL;
	1851	SvREFCNT_dec_NN(dfout);
	1852
	1853	PL_comppad = cx->blk_format.prevcomppad;
	1854	PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
	1855	cv = cx->blk_format.cv;
	1856	cx->blk_format.cv = NULL;
	1857	--CvDEPTH(cv);
	1858	SvREFCNT_dec_NN(cv);
	1859	}
	1860
	1861
	1862	PERL_STATIC_INLINE void
	1863	S_cx_pusheval(pTHX_ PERL_CONTEXT cx, OP retop, SV *namesv)
	1864	{
	1865	PERL_ARGS_ASSERT_CX_PUSHEVAL;
	1866
	1867	cx->blk_eval.retop = retop;
	1868	cx->blk_eval.old_namesv = namesv;
	1869	cx->blk_eval.old_eval_root = PL_eval_root;
	1870	cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
	1871	cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
	1872	cx->blk_eval.cur_top_env = PL_top_env;
	1873
	1874	assert(!(PL_in_eval & ~ 0x3F));
	1875	assert(!(PL_op->op_type & ~0x1FF));
	1876	cx->blk_u16 = (PL_in_eval & 0x3F) \| ((U16)PL_op->op_type << 7);
	1877	}
	1878
	1879
	1880	PERL_STATIC_INLINE void
	1881	S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
	1882	{
	1883	SV *sv;
	1884
	1885	PERL_ARGS_ASSERT_CX_POPEVAL;
	1886	assert(CxTYPE(cx) == CXt_EVAL);
	1887
	1888	PL_in_eval = CxOLD_IN_EVAL(cx);
	1889	assert(!(PL_in_eval & 0xc0));
	1890	PL_eval_root = cx->blk_eval.old_eval_root;
	1891	sv = cx->blk_eval.cur_text;
	1892	if (sv && CxEVAL_TXT_REFCNTED(cx)) {
	1893	cx->blk_eval.cur_text = NULL;
	1894	SvREFCNT_dec_NN(sv);
	1895	}
	1896
	1897	sv = cx->blk_eval.old_namesv;
	1898	if (sv) {
	1899	cx->blk_eval.old_namesv = NULL;
	1900	SvREFCNT_dec_NN(sv);
	1901	}
	1902	}
	1903
	1904
	1905	/* push a plain loop, i.e.
	1906	* { block }
	1907	* while (cond) { block }
	1908	* for (init;cond;continue) { block }
	1909	* This loop can be last/redo'ed etc.
	1910	*/
	1911
	1912	PERL_STATIC_INLINE void
	1913	S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
	1914	{
	1915	PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
	1916	cx->blk_loop.my_op = cLOOP;
	1917	}
	1918
	1919
	1920	/* push a true for loop, i.e.
	1921	* for var (list) { block }
	1922	*/
	1923
	1924	PERL_STATIC_INLINE void
	1925	S_cx_pushloop_for(pTHX_ PERL_CONTEXT cx, void itervarp, SV* itersave)
	1926	{
	1927	PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
	1928
	1929	/* this one line is common with cx_pushloop_plain */
	1930	cx->blk_loop.my_op = cLOOP;
	1931
	1932	cx->blk_loop.itervar_u.svp = (SV**)itervarp;
	1933	cx->blk_loop.itersave = itersave;
	1934	#ifdef USE_ITHREADS
	1935	cx->blk_loop.oldcomppad = PL_comppad;
	1936	#endif
	1937	}
	1938
	1939
	1940	/* pop all loop types, including plain */
	1941
	1942	PERL_STATIC_INLINE void
	1943	S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
	1944	{
	1945	PERL_ARGS_ASSERT_CX_POPLOOP;
	1946
	1947	assert(CxTYPE_is_LOOP(cx));
	1948	if ( CxTYPE(cx) == CXt_LOOP_ARY
	1949	\|\| CxTYPE(cx) == CXt_LOOP_LAZYSV)
	1950	{
	1951	/* Free ary or cur. This assumes that state_u.ary.ary
	1952	* aligns with state_u.lazysv.cur. See cx_dup() */
	1953	SV *sv = cx->blk_loop.state_u.lazysv.cur;
	1954	cx->blk_loop.state_u.lazysv.cur = NULL;
	1955	SvREFCNT_dec_NN(sv);
	1956	if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
	1957	sv = cx->blk_loop.state_u.lazysv.end;
	1958	cx->blk_loop.state_u.lazysv.end = NULL;
	1959	SvREFCNT_dec_NN(sv);
	1960	}
	1961	}
	1962	if (cx->cx_type & (CXp_FOR_PAD\|CXp_FOR_GV)) {
	1963	SV *cursv;
	1964	SV **svp = (cx)->blk_loop.itervar_u.svp;
	1965	if ((cx->cx_type & CXp_FOR_GV))
	1966	svp = &GvSV((GV*)svp);
	1967	cursv = *svp;
	1968	*svp = cx->blk_loop.itersave;
	1969	cx->blk_loop.itersave = NULL;
	1970	SvREFCNT_dec(cursv);
	1971	}
	1972	}
	1973
	1974
	1975	PERL_STATIC_INLINE void
	1976	S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
	1977	{
	1978	PERL_ARGS_ASSERT_CX_PUSHWHEN;
	1979
	1980	cx->blk_givwhen.leave_op = cLOGOP->op_other;
	1981	}
	1982
	1983
	1984	PERL_STATIC_INLINE void
	1985	S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
	1986	{
	1987	PERL_ARGS_ASSERT_CX_POPWHEN;
	1988	assert(CxTYPE(cx) == CXt_WHEN);
	1989
	1990	PERL_UNUSED_ARG(cx);
	1991	PERL_UNUSED_CONTEXT;
	1992	/* currently NOOP */
	1993	}
	1994
	1995
	1996	PERL_STATIC_INLINE void
	1997	S_cx_pushgiven(pTHX_ PERL_CONTEXT cx, SV orig_defsv)
	1998	{
	1999	PERL_ARGS_ASSERT_CX_PUSHGIVEN;
	2000
	2001	cx->blk_givwhen.leave_op = cLOGOP->op_other;
	2002	cx->blk_givwhen.defsv_save = orig_defsv;
	2003	}
	2004
	2005
	2006	PERL_STATIC_INLINE void
	2007	S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
	2008	{
	2009	SV *sv;
	2010
	2011	PERL_ARGS_ASSERT_CX_POPGIVEN;
	2012	assert(CxTYPE(cx) == CXt_GIVEN);
	2013
	2014	sv = GvSV(PL_defgv);
	2015	GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
	2016	cx->blk_givwhen.defsv_save = NULL;
	2017	SvREFCNT_dec(sv);
	2018	}
	2019
	2020	/* ------------------ util.h ------------------------------------------- */
	2021
	2022	/*
	2023	=head1 Miscellaneous Functions
	2024
	2025	=for apidoc foldEQ
	2026
	2027	Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
	2028	same
	2029	case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
	2030	match themselves and their opposite case counterparts. Non-cased and non-ASCII
	2031	range bytes match only themselves.
	2032
	2033	=cut
	2034	*/
	2035
	2036	PERL_STATIC_INLINE I32
	2037	Perl_foldEQ(const char s1, const char s2, I32 len)
	2038	{
	2039	const U8 a = (const U8 )s1;
	2040	const U8 b = (const U8 )s2;
	2041
	2042	PERL_ARGS_ASSERT_FOLDEQ;
	2043
	2044	assert(len >= 0);
	2045
	2046	while (len--) {
	2047	if (a != b && a != PL_fold[b])
	2048	return 0;
	2049	a++,b++;
	2050	}
	2051	return 1;
	2052	}
	2053
	2054	PERL_STATIC_INLINE I32
	2055	Perl_foldEQ_latin1(const char s1, const char s2, I32 len)
	2056	{
	2057	/* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
	2058	* MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
	2059	* LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
	2060	* does it check that the strings each have at least 'len' characters */
	2061
	2062	const U8 a = (const U8 )s1;
	2063	const U8 b = (const U8 )s2;
	2064
	2065	PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
	2066
	2067	assert(len >= 0);
	2068
	2069	while (len--) {
	2070	if (a != b && a != PL_fold_latin1[b]) {
	2071	return 0;
	2072	}
	2073	a++, b++;
	2074	}
	2075	return 1;
	2076	}
	2077
	2078	/*
	2079	=for apidoc foldEQ_locale
	2080
	2081	Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
	2082	same case-insensitively in the current locale; false otherwise.
	2083
	2084	=cut
	2085	*/
	2086
	2087	PERL_STATIC_INLINE I32
	2088	Perl_foldEQ_locale(const char s1, const char s2, I32 len)
	2089	{
	2090	dVAR;
	2091	const U8 a = (const U8 )s1;
	2092	const U8 b = (const U8 )s2;
	2093
	2094	PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
	2095
	2096	assert(len >= 0);
	2097
	2098	while (len--) {
	2099	if (a != b && a != PL_fold_locale[b])
	2100	return 0;
	2101	a++,b++;
	2102	}
	2103	return 1;
	2104	}
	2105
	2106	#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) \|\| defined(PERL_EXT))
	2107
	2108	PERL_STATIC_INLINE void *
	2109	S_my_memrchr(const char * s, const char c, const STRLEN len)
	2110	{
	2111	/* memrchr(), since many platforms lack it */
	2112
	2113	const char * t = s + len - 1;
	2114
	2115	PERL_ARGS_ASSERT_MY_MEMRCHR;
	2116
	2117	while (t >= s) {
	2118	if (*t == c) {
	2119	return (void *) t;
	2120	}
	2121	t--;
	2122	}
	2123
	2124	return NULL;
	2125	}
	2126
	2127	#endif
	2128
	2129	/*
	2130	* ex: set ts=8 sts=4 sw=4 et:
	2131	*/