perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* inline.h
	2	*
	3	* Copyright (C) 2012 by Larry Wall and others
	4	*
	5	* You may distribute under the terms of either the GNU General Public
	6	* License or the Artistic License, as specified in the README file.
	7	*
	8	* This file contains tables and code adapted from
	9	* http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which requires this
	10	* copyright notice:
	11
	12	Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
	13
	14	Permission is hereby granted, free of charge, to any person obtaining a copy of
	15	this software and associated documentation files (the "Software"), to deal in
	16	the Software without restriction, including without limitation the rights to
	17	use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	18	of the Software, and to permit persons to whom the Software is furnished to do
	19	so, subject to the following conditions:
	20
	21	The above copyright notice and this permission notice shall be included in all
	22	copies or substantial portions of the Software.
	23
	24	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	25	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	26	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	27	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	28	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	29	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	30	SOFTWARE.
	31
	32	*
	33	* This file is a home for static inline functions that cannot go in other
	34	* header files, because they depend on proto.h (included after most other
	35	* headers) or struct definitions.
	36	*
	37	* Each section names the header file that the functions "belong" to.
	38	*/
	39
	40	/* ------------------------------- av.h ------------------------------- */
	41
	42	PERL_STATIC_INLINE SSize_t
	43	S_av_top_index(pTHX_ AV *av)
	44	{
	45	PERL_ARGS_ASSERT_AV_TOP_INDEX;
	46	assert(SvTYPE(av) == SVt_PVAV);
	47
	48	return AvFILL(av);
	49	}
	50
	51	/* ------------------------------- cv.h ------------------------------- */
	52
	53	PERL_STATIC_INLINE GV *
	54	S_CvGV(pTHX_ CV *sv)
	55	{
	56	return CvNAMED(sv)
	57	? Perl_cvgv_from_hek(aTHX_ sv)
	58	: ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
	59	}
	60
	61	PERL_STATIC_INLINE I32 *
	62	S_CvDEPTHp(const CV * const sv)
	63	{
	64	assert(SvTYPE(sv) == SVt_PVCV \|\| SvTYPE(sv) == SVt_PVFM);
	65	return &((XPVCV*)SvANY(sv))->xcv_depth;
	66	}
	67
	68	/*
	69	CvPROTO returns the prototype as stored, which is not necessarily what
	70	the interpreter should be using. Specifically, the interpreter assumes
	71	that spaces have been stripped, which has been the case if the prototype
	72	was added by toke.c, but is generally not the case if it was added elsewhere.
	73	Since we can't enforce the spacelessness at assignment time, this routine
	74	provides a temporary copy at parse time with spaces removed.
	75	I<orig> is the start of the original buffer, I<len> is the length of the
	76	prototype and will be updated when this returns.
	77	*/
	78
	79	#ifdef PERL_CORE
	80	PERL_STATIC_INLINE char *
	81	S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
	82	{
	83	SV * tmpsv;
	84	char * tmps;
	85	tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
	86	tmps = SvPVX(tmpsv);
	87	while ((*len)--) {
	88	if (!isSPACE(*orig))
	89	tmps++ = orig;
	90	orig++;
	91	}
	92	*tmps = '\0';
	93	*len = tmps - SvPVX(tmpsv);
	94	return SvPVX(tmpsv);
	95	}
	96	#endif
	97
	98	/* ------------------------------- mg.h ------------------------------- */
	99
	100	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	101	/* assumes get-magic and stringification have already occurred */
	102	PERL_STATIC_INLINE STRLEN
	103	S_MgBYTEPOS(pTHX_ MAGIC mg, SV sv, const char *s, STRLEN len)
	104	{
	105	assert(mg->mg_type == PERL_MAGIC_regex_global);
	106	assert(mg->mg_len != -1);
	107	if (mg->mg_flags & MGf_BYTES \|\| !DO_UTF8(sv))
	108	return (STRLEN)mg->mg_len;
	109	else {
	110	const STRLEN pos = (STRLEN)mg->mg_len;
	111	/* Without this check, we may read past the end of the buffer: */
	112	if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
	113	return sv_or_pv_pos_u2b(sv, s, pos, NULL);
	114	}
	115	}
	116	#endif
	117
	118	/* ------------------------------- pad.h ------------------------------ */
	119
	120	#if defined(PERL_IN_PAD_C) \|\| defined(PERL_IN_OP_C)
	121	PERL_STATIC_INLINE bool
	122	PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
	123	{
	124	/* is seq within the range _LOW to _HIGH ?
	125	* This is complicated by the fact that PL_cop_seqmax
	126	* may have wrapped around at some point */
	127	if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
	128	return FALSE; /* not yet introduced */
	129
	130	if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
	131	/* in compiling scope */
	132	if (
	133	(seq > COP_SEQ_RANGE_LOW(pn))
	134	? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
	135	: (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
	136	)
	137	return TRUE;
	138	}
	139	else if (
	140	(COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
	141	?
	142	( seq > COP_SEQ_RANGE_LOW(pn)
	143	\|\| seq <= COP_SEQ_RANGE_HIGH(pn))
	144
	145	: ( seq > COP_SEQ_RANGE_LOW(pn)
	146	&& seq <= COP_SEQ_RANGE_HIGH(pn))
	147	)
	148	return TRUE;
	149	return FALSE;
	150	}
	151	#endif
	152
	153	/* ------------------------------- pp.h ------------------------------- */
	154
	155	PERL_STATIC_INLINE I32
	156	S_TOPMARK(pTHX)
	157	{
	158	DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
	159	"MARK top %p %" IVdf "\n",
	160	PL_markstack_ptr,
	161	(IV)*PL_markstack_ptr)));
	162	return *PL_markstack_ptr;
	163	}
	164
	165	PERL_STATIC_INLINE I32
	166	S_POPMARK(pTHX)
	167	{
	168	DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
	169	"MARK pop %p %" IVdf "\n",
	170	(PL_markstack_ptr-1),
	171	(IV)*(PL_markstack_ptr-1))));
	172	assert((PL_markstack_ptr > PL_markstack) \|\| !"MARK underflow");
	173	return *PL_markstack_ptr--;
	174	}
	175
	176	/* ----------------------------- regexp.h ----------------------------- */
	177
	178	PERL_STATIC_INLINE struct regexp *
	179	S_ReANY(const REGEXP * const re)
	180	{
	181	XPV* const p = (XPV*)SvANY(re);
	182	assert(isREGEXP(re));
	183	return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
	184	: (struct regexp *)p;
	185	}
	186
	187	/* ------------------------------- sv.h ------------------------------- */
	188
	189	PERL_STATIC_INLINE SV *
	190	S_SvREFCNT_inc(SV *sv)
	191	{
	192	if (LIKELY(sv != NULL))
	193	SvREFCNT(sv)++;
	194	return sv;
	195	}
	196	PERL_STATIC_INLINE SV *
	197	S_SvREFCNT_inc_NN(SV *sv)
	198	{
	199	SvREFCNT(sv)++;
	200	return sv;
	201	}
	202	PERL_STATIC_INLINE void
	203	S_SvREFCNT_inc_void(SV *sv)
	204	{
	205	if (LIKELY(sv != NULL))
	206	SvREFCNT(sv)++;
	207	}
	208	PERL_STATIC_INLINE void
	209	S_SvREFCNT_dec(pTHX_ SV *sv)
	210	{
	211	if (LIKELY(sv != NULL)) {
	212	U32 rc = SvREFCNT(sv);
	213	if (LIKELY(rc > 1))
	214	SvREFCNT(sv) = rc - 1;
	215	else
	216	Perl_sv_free2(aTHX_ sv, rc);
	217	}
	218	}
	219
	220	PERL_STATIC_INLINE void
	221	S_SvREFCNT_dec_NN(pTHX_ SV *sv)
	222	{
	223	U32 rc = SvREFCNT(sv);
	224	if (LIKELY(rc > 1))
	225	SvREFCNT(sv) = rc - 1;
	226	else
	227	Perl_sv_free2(aTHX_ sv, rc);
	228	}
	229
	230	PERL_STATIC_INLINE void
	231	SvAMAGIC_on(SV *sv)
	232	{
	233	assert(SvROK(sv));
	234	if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
	235	}
	236	PERL_STATIC_INLINE void
	237	SvAMAGIC_off(SV *sv)
	238	{
	239	if (SvROK(sv) && SvOBJECT(SvRV(sv)))
	240	HvAMAGIC_off(SvSTASH(SvRV(sv)));
	241	}
	242
	243	PERL_STATIC_INLINE U32
	244	S_SvPADSTALE_on(SV *sv)
	245	{
	246	assert(!(SvFLAGS(sv) & SVs_PADTMP));
	247	return SvFLAGS(sv) \|= SVs_PADSTALE;
	248	}
	249	PERL_STATIC_INLINE U32
	250	S_SvPADSTALE_off(SV *sv)
	251	{
	252	assert(!(SvFLAGS(sv) & SVs_PADTMP));
	253	return SvFLAGS(sv) &= ~SVs_PADSTALE;
	254	}
	255	#if defined(PERL_CORE) \|\| defined (PERL_EXT)
	256	PERL_STATIC_INLINE STRLEN
	257	S_sv_or_pv_pos_u2b(pTHX_ SV sv, const char pv, STRLEN pos, STRLEN *lenp)
	258	{
	259	PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
	260	if (SvGAMAGIC(sv)) {
	261	U8 hopped = utf8_hop((U8 )pv, pos);
	262	if (lenp) lenp = (STRLEN)(utf8_hop(hopped, lenp) - hopped);
	263	return (STRLEN)(hopped - (U8 *)pv);
	264	}
	265	return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
	266	}
	267	#endif
	268
	269	/* ------------------------------- handy.h ------------------------------- */
	270
	271	/* saves machine code for a common noreturn idiom typically used in Newx() /
	272	GCC_DIAG_IGNORE_DECL(-Wunused-function);
	273	static void
	274	S_croak_memory_wrap(void)
	275	{
	276	Perl_croak_nocontext("%s",PL_memory_wrap);
	277	}
	278	GCC_DIAG_RESTORE_DECL;
	279
	280	/* ------------------------------- utf8.h ------------------------------- */
	281
	282	/*
	283	=head1 Unicode Support
	284	*/
	285
	286	PERL_STATIC_INLINE void
	287	S_append_utf8_from_native_byte(const U8 byte, U8** dest)
	288	{
	289	/* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
	290	* encoded string at 'dest', updating 'dest' to include it */
	291
	292	PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
	293
	294	if (NATIVE_BYTE_IS_INVARIANT(byte))
	295	((dest)++) = byte;
	296	else {
	297	((dest)++) = UTF8_EIGHT_BIT_HI(byte);
	298	((dest)++) = UTF8_EIGHT_BIT_LO(byte);
	299	}
	300	}
	301
	302	/*
	303	=for apidoc valid_utf8_to_uvchr
	304	Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
	305	the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
	306	it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
	307	non-Unicode code points are allowed.
	308
	309	=cut
	310
	311	*/
	312
	313	PERL_STATIC_INLINE UV
	314	Perl_valid_utf8_to_uvchr(const U8 s, STRLEN retlen)
	315	{
	316	const UV expectlen = UTF8SKIP(s);
	317	const U8* send = s + expectlen;
	318	UV uv = *s;
	319
	320	PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
	321
	322	if (retlen) {
	323	*retlen = expectlen;
	324	}
	325
	326	/* An invariant is trivially returned */
	327	if (expectlen == 1) {
	328	return uv;
	329	}
	330
	331	/* Remove the leading bits that indicate the number of bytes, leaving just
	332	* the bits that are part of the value */
	333	uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
	334
	335	/* Now, loop through the remaining bytes, accumulating each into the
	336	* working total as we go. (I khw tried unrolling the loop for up to 4
	337	* bytes, but there was no performance improvement) */
	338	for (++s; s < send; s++) {
	339	uv = UTF8_ACCUMULATE(uv, *s);
	340	}
	341
	342	return UNI_TO_NATIVE(uv);
	343
	344	}
	345
	346	/*
	347	=for apidoc is_utf8_invariant_string
	348
	349	Returns TRUE if the first C<len> bytes of the string C<s> are the same
	350	regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
	351	EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
	352	are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
	353	the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
	354	characters are invariant, but so also are the C1 controls.
	355
	356	If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
	357	use this option, that C<s> can't have embedded C<NUL> characters and has to
	358	have a terminating C<NUL> byte).
	359
	360	See also
	361	C<L</is_utf8_string>>,
	362	C<L</is_utf8_string_flags>>,
	363	C<L</is_utf8_string_loc>>,
	364	C<L</is_utf8_string_loc_flags>>,
	365	C<L</is_utf8_string_loclen>>,
	366	C<L</is_utf8_string_loclen_flags>>,
	367	C<L</is_utf8_fixed_width_buf_flags>>,
	368	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	369	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	370	C<L</is_strict_utf8_string>>,
	371	C<L</is_strict_utf8_string_loc>>,
	372	C<L</is_strict_utf8_string_loclen>>,
	373	C<L</is_c9strict_utf8_string>>,
	374	C<L</is_c9strict_utf8_string_loc>>,
	375	and
	376	C<L</is_c9strict_utf8_string_loclen>>.
	377
	378	=cut
	379
	380	*/
	381
	382	#define is_utf8_invariant_string(s, len) \
	383	is_utf8_invariant_string_loc(s, len, NULL)
	384
	385	/*
	386	=for apidoc is_utf8_invariant_string_loc
	387
	388	Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
	389	the first UTF-8 variant character in the C<ep> pointer; if all characters are
	390	UTF-8 invariant, this function does not change the contents of C<*ep>.
	391
	392	=cut
	393
	394	*/
	395
	396	PERL_STATIC_INLINE bool
	397	S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
	398	{
	399	const U8* send;
	400	const U8* x = s;
	401
	402	PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
	403
	404	if (len == 0) {
	405	len = strlen((const char *)s);
	406	}
	407
	408	send = s + len;
	409
	410	/* This looks like 0x010101... */
	411	# define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
	412
	413	/* This looks like 0x808080... */
	414	# define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
	415	# define PERL_WORDSIZE sizeof(PERL_UINTMAX_T)
	416	# define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
	417
	418	/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
	419	* or'ing together the lowest bits of 'x'. Hopefully the final term gets
	420	* optimized out completely on a 32-bit system, and its mask gets optimized out
	421	* on a 64-bit system */
	422	# define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
	423	\| ( PTR2nat(x) >> 1) \
	424	\| ( ( (PTR2nat(x) \
	425	& PERL_WORD_BOUNDARY_MASK) >> 2))))
	426
	427	#ifndef EBCDIC
	428
	429	/* Do the word-at-a-time iff there is at least one usable full word. That
	430	* means that after advancing to a word boundary, there still is at least a
	431	* full word left. The number of bytes needed to advance is 'wordsize -
	432	* offset' unless offset is 0. */
	433	if ((STRLEN) (send - x) >= PERL_WORDSIZE
	434
	435	/* This term is wordsize if subword; 0 if not */
	436	+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
	437
	438	/* 'offset' */
	439	- (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
	440	{
	441
	442	/* Process per-byte until reach word boundary. XXX This loop could be
	443	* eliminated if we knew that this platform had fast unaligned reads */
	444	while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
	445	if (! UTF8_IS_INVARIANT(*x)) {
	446	if (ep) {
	447	*ep = x;
	448	}
	449
	450	return FALSE;
	451	}
	452	x++;
	453	}
	454
	455	/* Here, we know we have at least one full word to process. Process
	456	* per-word as long as we have at least a full word left */
	457	do {
	458	if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
	459
	460	/* Found a variant. Just return if caller doesn't want its
	461	* exact position */
	462	if (! ep) {
	463	return FALSE;
	464	}
	465
	466	# if BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678 \
	467	\|\| BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
	468
	469	ep = x + _variant_byte_number( (PERL_UINTMAX_T *) x);
	470	assert(ep >= s && ep < send);
	471
	472	return FALSE;
	473
	474	# else /* If weird byte order, drop into next loop to do byte-at-a-time
	475	checks. */
	476
	477	break;
	478	# endif
	479	}
	480
	481	x += PERL_WORDSIZE;
	482
	483	} while (x + PERL_WORDSIZE <= send);
	484	}
	485
	486	#endif /* End of ! EBCDIC */
	487
	488	/* Process per-byte */
	489	while (x < send) {
	490	if (! UTF8_IS_INVARIANT(*x)) {
	491	if (ep) {
	492	*ep = x;
	493	}
	494
	495	return FALSE;
	496	}
	497
	498	x++;
	499	}
	500
	501	return TRUE;
	502	}
	503
	504	#ifndef EBCDIC
	505
	506	PERL_STATIC_INLINE unsigned int
	507	S__variant_byte_number(PERL_UINTMAX_T word)
	508	{
	509
	510	/* This returns the position in a word (0..7) of the first variant byte in
	511	* it. This is a helper function. Note that there are no branches */
	512
	513	assert(word);
	514
	515	/* Get just the msb bits of each byte */
	516	word &= PERL_VARIANTS_WORD_MASK;
	517
	518	# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
	519	easiest thing is to hide that from the callers */
	520	{
	521	unsigned int i;
	522	const U8 * s = (U8 *) &word;
	523	dTHX;
	524
	525	for (i = 0; i < sizeof(word); i++ ) {
	526	if (s[i]) {
	527	return i;
	528	}
	529	}
	530
	531	Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
	532	__FILE__, __LINE__);
	533	}
	534
	535	# elif BYTEORDER == 0x1234 \|\| BYTEORDER == 0x12345678
	536
	537	/* Bytes are stored like
	538	* Byte8 ... Byte2 Byte1
	539	* 63..56...15...8 7...0
	540	*
	541	* Isolate the lsb;
	542	* https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
	543	*
	544	* The word will look this this, with a rightmost set bit in position 's':
	545	* ('x's are don't cares)
	546	* s
	547	* x..x100..0
	548	* x..xx10..0 Right shift (rightmost 0 is shifted off)
	549	* x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
	550	* the 1 just to their left into a 0; the remainder is
	551	* untouched
	552	* 0..0011..1 The xor with the original, x..xx10..0, clears that
	553	* remainder, sets the bottom to all 1
	554	* 0..0100..0 Add 1 to clear the word except for the bit in 's'
	555	*
	556	* Another method is to do 'word &= -word'; but it generates a compiler
	557	* message on some platforms about taking the negative of an unsigned */
	558
	559	word >>= 1;
	560	word = 1 + (word ^ (word - 1));
	561
	562	# elif BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
	563
	564	/* Bytes are stored like
	565	* Byte1 Byte2 ... Byte8
	566	* 63..56 55..47 ... 7...0
	567	*
	568	* Isolate the msb; http://codeforces.com/blog/entry/10330
	569	*
	570	* Only the most significant set bit matters. Or'ing word with its right
	571	* shift of 1 makes that bit and the next one to its right both 1. Then
	572	* right shifting by 2 makes for 4 1-bits in a row. ... We end with the
	573	* msb and all to the right being 1. */
	574	word \|= word >> 1;
	575	word \|= word >> 2;
	576	word \|= word >> 4;
	577	word \|= word >> 8;
	578	word \|= word >> 16;
	579	word \|= word >> 32; /* This should get optimized out on 32-bit systems. */
	580
	581	/* Then subtracting the right shift by 1 clears all but the left-most of
	582	* the 1 bits, which is our desired result */
	583	word -= (word >> 1);
	584
	585	# else
	586	# error Unexpected byte order
	587	# endif
	588
	589	/* Here 'word' has a single bit set: the msb of the first byte in which it
	590	* is set. Calculate that position in the word. We can use this
	591	* specialized solution: https://stackoverflow.com/a/32339674/1626653,
	592	* assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
	593	* just get shifted off at compile time) */
	594	word = (word >> 7) * ((UINTMAX_C( 7) << 56) \| (UINTMAX_C(15) << 48)
	595	\| (UINTMAX_C(23) << 40) \| (UINTMAX_C(31) << 32)
	596	\| (39 << 24) \| (47 << 16)
	597	\| (55 << 8) \| (63 << 0));
	598	word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
	599
	600	/* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
	601	word = ((word + 1) >> 3) - 1;
	602
	603	# if BYTEORDER == 0x4321 \|\| BYTEORDER == 0x87654321
	604
	605	/* And invert the result */
	606	word = CHARBITS - word - 1;
	607
	608	# endif
	609
	610	return (unsigned int) word;
	611	}
	612
	613	#endif
	614	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	615
	616	/*
	617	=for apidoc variant_under_utf8_count
	618
	619	This function looks at the sequence of bytes between C<s> and C<e>, which are
	620	assumed to be encoded in ASCII/Latin1, and returns how many of them would
	621	change should the string be translated into UTF-8. Due to the nature of UTF-8,
	622	each of these would occupy two bytes instead of the single one in the input
	623	string. Thus, this function returns the precise number of bytes the string
	624	would expand by when translated to UTF-8.
	625
	626	Unlike most of the other functions that have C<utf8> in their name, the input
	627	to this function is NOT a UTF-8-encoded string. The function name is slightly
	628	I<odd> to emphasize this.
	629
	630	This function is internal to Perl because khw thinks that any XS code that
	631	would want this is probably operating too close to the internals. Presenting a
	632	valid use case could change that.
	633
	634	See also
	635	C<L<perlapi/is_utf8_invariant_string>>
	636	and
	637	C<L<perlapi/is_utf8_invariant_string_loc>>,
	638
	639	=cut
	640
	641	*/
	642
	643	PERL_STATIC_INLINE Size_t
	644	S_variant_under_utf8_count(const U8* const s, const U8* const e)
	645	{
	646	const U8* x = s;
	647	Size_t count = 0;
	648
	649	PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
	650
	651	# ifndef EBCDIC
	652
	653	/* Test if the string is long enough to use word-at-a-time. (Logic is the
	654	* same as for is_utf8_invariant_string()) */
	655	if ((STRLEN) (e - x) >= PERL_WORDSIZE
	656	+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
	657	- (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
	658	{
	659
	660	/* Process per-byte until reach word boundary. XXX This loop could be
	661	* eliminated if we knew that this platform had fast unaligned reads */
	662	while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
	663	count += ! UTF8_IS_INVARIANT(*x++);
	664	}
	665
	666	/* Process per-word as long as we have at least a full word left */
	667	do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
	668	explanation of how this works */
	669	PERL_UINTMAX_T increment
	670	= ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
	671	* PERL_COUNT_MULTIPLIER)
	672	>> ((PERL_WORDSIZE - 1) * CHARBITS);
	673	count += (Size_t) increment;
	674	x += PERL_WORDSIZE;
	675	} while (x + PERL_WORDSIZE <= e);
	676	}
	677
	678	# endif
	679
	680	/* Process per-byte */
	681	while (x < e) {
	682	if (! UTF8_IS_INVARIANT(*x)) {
	683	count++;
	684	}
	685
	686	x++;
	687	}
	688
	689	return count;
	690	}
	691
	692	#endif
	693
	694	#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
	695	# undef PERL_WORDSIZE
	696	# undef PERL_COUNT_MULTIPLIER
	697	# undef PERL_WORD_BOUNDARY_MASK
	698	# undef PERL_VARIANTS_WORD_MASK
	699	#endif
	700
	701	/*
	702	=for apidoc is_utf8_string
	703
	704	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	705	Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
	706	be calculated using C<strlen(s)> (which means if you use this option, that C<s>
	707	can't have embedded C<NUL> characters and has to have a terminating C<NUL>
	708	byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
	709
	710	This function considers Perl's extended UTF-8 to be valid. That means that
	711	code points above Unicode, surrogates, and non-character code points are
	712	considered valid by this function. Use C<L</is_strict_utf8_string>>,
	713	C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
	714	code points are considered valid.
	715
	716	See also
	717	C<L</is_utf8_invariant_string>>,
	718	C<L</is_utf8_invariant_string_loc>>,
	719	C<L</is_utf8_string_loc>>,
	720	C<L</is_utf8_string_loclen>>,
	721	C<L</is_utf8_fixed_width_buf_flags>>,
	722	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	723	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	724
	725	=cut
	726	*/
	727
	728	#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
	729
	730	#if defined(PERL_CORE) \|\| defined (PERL_EXT)
	731
	732	/*
	733	=for apidoc is_utf8_non_invariant_string
	734
	735	Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
	736	C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
	737	UTF-8; otherwise returns FALSE.
	738
	739	A TRUE return means that at least one code point represented by the sequence
	740	either is a wide character not representable as a single byte, or the
	741	representation differs depending on whether the sequence is encoded in UTF-8 or
	742	not.
	743
	744	See also
	745	C<L<perlapi/is_utf8_invariant_string>>,
	746	C<L<perlapi/is_utf8_string>>
	747
	748	=cut
	749
	750	This is commonly used to determine if a SV's UTF-8 flag should be turned on.
	751	It generally needn't be if its string is entirely UTF-8 invariant, and it
	752	shouldn't be if it otherwise contains invalid UTF-8.
	753
	754	It is an internal function because khw thinks that XS code shouldn't be working
	755	at this low a level. A valid use case could change that.
	756
	757	*/
	758
	759	PERL_STATIC_INLINE bool
	760	S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
	761	{
	762	const U8 * first_variant;
	763
	764	PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
	765
	766	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	767	return FALSE;
	768	}
	769
	770	return is_utf8_string(first_variant, len - (first_variant - s));
	771	}
	772
	773	#endif
	774
	775	/*
	776	=for apidoc is_strict_utf8_string
	777
	778	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	779	UTF-8-encoded string that is fully interchangeable by any application using
	780	Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
	781	calculated using C<strlen(s)> (which means if you use this option, that C<s>
	782	can't have embedded C<NUL> characters and has to have a terminating C<NUL>
	783	byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
	784
	785	This function returns FALSE for strings containing any
	786	code points above the Unicode max of 0x10FFFF, surrogate code points, or
	787	non-character code points.
	788
	789	See also
	790	C<L</is_utf8_invariant_string>>,
	791	C<L</is_utf8_invariant_string_loc>>,
	792	C<L</is_utf8_string>>,
	793	C<L</is_utf8_string_flags>>,
	794	C<L</is_utf8_string_loc>>,
	795	C<L</is_utf8_string_loc_flags>>,
	796	C<L</is_utf8_string_loclen>>,
	797	C<L</is_utf8_string_loclen_flags>>,
	798	C<L</is_utf8_fixed_width_buf_flags>>,
	799	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	800	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	801	C<L</is_strict_utf8_string_loc>>,
	802	C<L</is_strict_utf8_string_loclen>>,
	803	C<L</is_c9strict_utf8_string>>,
	804	C<L</is_c9strict_utf8_string_loc>>,
	805	and
	806	C<L</is_c9strict_utf8_string_loclen>>.
	807
	808	=cut
	809	*/
	810
	811	#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
	812
	813	/*
	814	=for apidoc is_c9strict_utf8_string
	815
	816	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	817	UTF-8-encoded string that conforms to
	818	L<Unicode Corrigendum #9\|http://www.unicode.org/versions/corrigendum9.html>;
	819	otherwise it returns FALSE. If C<len> is 0, it will be calculated using
	820	C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
	821	C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
	822	characters being ASCII constitute 'a valid UTF-8 string'.
	823
	824	This function returns FALSE for strings containing any code points above the
	825	Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
	826	code points per
	827	L<Corrigendum #9\|http://www.unicode.org/versions/corrigendum9.html>.
	828
	829	See also
	830	C<L</is_utf8_invariant_string>>,
	831	C<L</is_utf8_invariant_string_loc>>,
	832	C<L</is_utf8_string>>,
	833	C<L</is_utf8_string_flags>>,
	834	C<L</is_utf8_string_loc>>,
	835	C<L</is_utf8_string_loc_flags>>,
	836	C<L</is_utf8_string_loclen>>,
	837	C<L</is_utf8_string_loclen_flags>>,
	838	C<L</is_utf8_fixed_width_buf_flags>>,
	839	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	840	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	841	C<L</is_strict_utf8_string>>,
	842	C<L</is_strict_utf8_string_loc>>,
	843	C<L</is_strict_utf8_string_loclen>>,
	844	C<L</is_c9strict_utf8_string_loc>>,
	845	and
	846	C<L</is_c9strict_utf8_string_loclen>>.
	847
	848	=cut
	849	*/
	850
	851	#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
	852
	853	/*
	854	=for apidoc is_utf8_string_flags
	855
	856	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	857	UTF-8 string, subject to the restrictions imposed by C<flags>;
	858	returns FALSE otherwise. If C<len> is 0, it will be calculated
	859	using C<strlen(s)> (which means if you use this option, that C<s> can't have
	860	embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
	861	that all characters being ASCII constitute 'a valid UTF-8 string'.
	862
	863	If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
	864	C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
	865	as C<L</is_strict_utf8_string>>; and if C<flags> is
	866	C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
	867	C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
	868	combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
	869	C<L</utf8n_to_uvchr>>, with the same meanings.
	870
	871	See also
	872	C<L</is_utf8_invariant_string>>,
	873	C<L</is_utf8_invariant_string_loc>>,
	874	C<L</is_utf8_string>>,
	875	C<L</is_utf8_string_loc>>,
	876	C<L</is_utf8_string_loc_flags>>,
	877	C<L</is_utf8_string_loclen>>,
	878	C<L</is_utf8_string_loclen_flags>>,
	879	C<L</is_utf8_fixed_width_buf_flags>>,
	880	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	881	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	882	C<L</is_strict_utf8_string>>,
	883	C<L</is_strict_utf8_string_loc>>,
	884	C<L</is_strict_utf8_string_loclen>>,
	885	C<L</is_c9strict_utf8_string>>,
	886	C<L</is_c9strict_utf8_string_loc>>,
	887	and
	888	C<L</is_c9strict_utf8_string_loclen>>.
	889
	890	=cut
	891	*/
	892
	893	PERL_STATIC_INLINE bool
	894	S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
	895	{
	896	const U8 * first_variant;
	897
	898	PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
	899	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	900	\|UTF8_DISALLOW_PERL_EXTENDED)));
	901
	902	if (len == 0) {
	903	len = strlen((const char *)s);
	904	}
	905
	906	if (flags == 0) {
	907	return is_utf8_string(s, len);
	908	}
	909
	910	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	911	== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
	912	{
	913	return is_strict_utf8_string(s, len);
	914	}
	915
	916	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	917	== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
	918	{
	919	return is_c9strict_utf8_string(s, len);
	920	}
	921
	922	if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
	923	const U8* const send = s + len;
	924	const U8* x = first_variant;
	925
	926	while (x < send) {
	927	STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
	928	if (UNLIKELY(! cur_len)) {
	929	return FALSE;
	930	}
	931	x += cur_len;
	932	}
	933	}
	934
	935	return TRUE;
	936	}
	937
	938	/*
	939
	940	=for apidoc is_utf8_string_loc
	941
	942	Like C<L</is_utf8_string>> but stores the location of the failure (in the
	943	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	944	"utf8ness success") in the C<ep> pointer.
	945
	946	See also C<L</is_utf8_string_loclen>>.
	947
	948	=cut
	949	*/
	950
	951	#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
	952
	953	/*
	954
	955	=for apidoc is_utf8_string_loclen
	956
	957	Like C<L</is_utf8_string>> but stores the location of the failure (in the
	958	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	959	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	960	encoded characters in the C<el> pointer.
	961
	962	See also C<L</is_utf8_string_loc>>.
	963
	964	=cut
	965	*/
	966
	967	PERL_STATIC_INLINE bool
	968	Perl_is_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
	969	{
	970	const U8 * first_variant;
	971
	972	PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
	973
	974	if (len == 0) {
	975	len = strlen((const char *) s);
	976	}
	977
	978	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	979	if (el)
	980	*el = len;
	981
	982	if (ep) {
	983	*ep = s + len;
	984	}
	985
	986	return TRUE;
	987	}
	988
	989	{
	990	const U8* const send = s + len;
	991	const U8* x = first_variant;
	992	STRLEN outlen = first_variant - s;
	993
	994	while (x < send) {
	995	const STRLEN cur_len = isUTF8_CHAR(x, send);
	996	if (UNLIKELY(! cur_len)) {
	997	break;
	998	}
	999	x += cur_len;
	1000	outlen++;
	1001	}
	1002
	1003	if (el)
	1004	*el = outlen;
	1005
	1006	if (ep) {
	1007	*ep = x;
	1008	}
	1009
	1010	return (x == send);
	1011	}
	1012	}
	1013
	1014	/*
	1015
	1016	=for apidoc Am\|STRLEN\|isUTF8_CHAR\|const U8 s\|const U8 e
	1017
	1018	Evaluates to non-zero if the first few bytes of the string starting at C<s> and
	1019	looking no further than S<C<e - 1>> are well-formed UTF-8, as extended by Perl,
	1020	that represents some code point; otherwise it evaluates to 0. If non-zero, the
	1021	value gives how many bytes starting at C<s> comprise the code point's
	1022	representation. Any bytes remaining before C<e>, but beyond the ones needed to
	1023	form the first code point in C<s>, are not examined.
	1024
	1025	The code point can be any that will fit in an IV on this machine, using Perl's
	1026	extension to official UTF-8 to represent those higher than the Unicode maximum
	1027	of 0x10FFFF. That means that this macro is used to efficiently decide if the
	1028	next few bytes in C<s> is legal UTF-8 for a single character.
	1029
	1030	Use C<L</isSTRICT_UTF8_CHAR>> to restrict the acceptable code points to those
	1031	defined by Unicode to be fully interchangeable across applications;
	1032	C<L</isC9_STRICT_UTF8_CHAR>> to use the L<Unicode Corrigendum
	1033	#9\|http://www.unicode.org/versions/corrigendum9.html> definition of allowable
	1034	code points; and C<L</isUTF8_CHAR_flags>> for a more customized definition.
	1035
	1036	Use C<L</is_utf8_string>>, C<L</is_utf8_string_loc>>, and
	1037	C<L</is_utf8_string_loclen>> to check entire strings.
	1038
	1039	Note also that a UTF-8 "invariant" character (i.e. ASCII on non-EBCDIC
	1040	machines) is a valid UTF-8 character.
	1041
	1042	=cut
	1043
	1044	This uses an adaptation of the table and algorithm given in
	1045	http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
	1046	documentation of the original version. A copyright notice for the original
	1047	version is given at the beginning of this file. The Perl adapation is
	1048	documented at the definition of PL_extended_utf8_dfa_tab[].
	1049
	1050	*/
	1051
	1052	PERL_STATIC_INLINE Size_t
	1053	S_isUTF8_CHAR(const U8 * const s0, const U8 * const e)
	1054	{
	1055	const U8 * s = s0;
	1056	UV state = 0;
	1057
	1058	PERL_ARGS_ASSERT_ISUTF8_CHAR;
	1059
	1060	/* This dfa is fast. If it accepts the input, it was for a well-formed,
	1061	* code point, which can be returned immediately. Otherwise, it is either
	1062	* malformed, or for the start byte FF which the dfa doesn't handle (except
	1063	* on 32-bit ASCII platforms where it trivially is an error). Call a
	1064	* helper function for the other platforms. */
	1065
	1066	while (s < e && LIKELY(state != 1)) {
	1067	state = PL_extended_utf8_dfa_tab[256
	1068	+ state
	1069	+ PL_extended_utf8_dfa_tab[*s]];
	1070	if (state != 0) {
	1071	s++;
	1072	continue;
	1073	}
	1074
	1075	return s - s0 + 1;
	1076	}
	1077
	1078	#if defined(UV_IS_QUAD) \|\| defined(EBCDIC)
	1079
	1080	if (NATIVE_UTF8_TO_I8(*s0) == 0xFF && e - s0 >= UTF8_MAXBYTES) {
	1081	return _is_utf8_char_helper(s0, e, 0);
	1082	}
	1083
	1084	#endif
	1085
	1086	return 0;
	1087	}
	1088
	1089	/*
	1090
	1091	=for apidoc isSTRICT_UTF8_CHAR
	1092
	1093	Evaluates to non-zero if the first few bytes of the string starting at C<s> and
	1094	looking no further than S<C<e - 1>> are well-formed UTF-8 that represents some
	1095	Unicode code point completely acceptable for open interchange between all
	1096	applications; otherwise it evaluates to 0. If non-zero, the value gives how
	1097	many bytes starting at C<s> comprise the code point's representation. Any
	1098	bytes remaining before C<e>, but beyond the ones needed to form the first code
	1099	point in C<s>, are not examined.
	1100
	1101	The largest acceptable code point is the Unicode maximum 0x10FFFF, and must not
	1102	be a surrogate nor a non-character code point. Thus this excludes any code
	1103	point from Perl's extended UTF-8.
	1104
	1105	This is used to efficiently decide if the next few bytes in C<s> is
	1106	legal Unicode-acceptable UTF-8 for a single character.
	1107
	1108	Use C<L</isC9_STRICT_UTF8_CHAR>> to use the L<Unicode Corrigendum
	1109	#9\|http://www.unicode.org/versions/corrigendum9.html> definition of allowable
	1110	code points; C<L</isUTF8_CHAR>> to check for Perl's extended UTF-8;
	1111	and C<L</isUTF8_CHAR_flags>> for a more customized definition.
	1112
	1113	Use C<L</is_strict_utf8_string>>, C<L</is_strict_utf8_string_loc>>, and
	1114	C<L</is_strict_utf8_string_loclen>> to check entire strings.
	1115
	1116	=cut
	1117
	1118	This uses an adaptation of the tables and algorithm given in
	1119	http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
	1120	documentation of the original version. A copyright notice for the original
	1121	version is given at the beginning of this file. The Perl adapation is
	1122	documented at the definition of strict_extended_utf8_dfa_tab[].
	1123
	1124	*/
	1125
	1126	PERL_STATIC_INLINE Size_t
	1127	S_isSTRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
	1128	{
	1129	const U8 * s = s0;
	1130	UV state = 0;
	1131
	1132	PERL_ARGS_ASSERT_ISSTRICT_UTF8_CHAR;
	1133
	1134	while (s < e && LIKELY(state != 1)) {
	1135	state = PL_strict_utf8_dfa_tab[256 + state + PL_strict_utf8_dfa_tab[*s]];
	1136
	1137	if (state != 0) {
	1138	s++;
	1139	continue;
	1140	}
	1141
	1142	return s - s0 + 1;
	1143	}
	1144
	1145	#ifndef EBCDIC
	1146
	1147	/* The dfa above drops out for certain Hanguls; handle them specially */
	1148	if (is_HANGUL_ED_utf8_safe(s0, e)) {
	1149	return 3;
	1150	}
	1151
	1152	#endif
	1153
	1154	return 0;
	1155	}
	1156
	1157	/*
	1158
	1159	=for apidoc Am\|STRLEN\|isC9_STRICT_UTF8_CHAR\|const U8 s\|const U8 e
	1160
	1161	Evaluates to non-zero if the first few bytes of the string starting at C<s> and
	1162	looking no further than S<C<e - 1>> are well-formed UTF-8 that represents some
	1163	Unicode non-surrogate code point; otherwise it evaluates to 0. If non-zero,
	1164	the value gives how many bytes starting at C<s> comprise the code point's
	1165	representation. Any bytes remaining before C<e>, but beyond the ones needed to
	1166	form the first code point in C<s>, are not examined.
	1167
	1168	The largest acceptable code point is the Unicode maximum 0x10FFFF. This
	1169	differs from C<L</isSTRICT_UTF8_CHAR>> only in that it accepts non-character
	1170	code points. This corresponds to
	1171	L<Unicode Corrigendum #9\|http://www.unicode.org/versions/corrigendum9.html>.
	1172	which said that non-character code points are merely discouraged rather than
	1173	completely forbidden in open interchange. See
	1174	L<perlunicode/Noncharacter code points>.
	1175
	1176	Use C<L</isUTF8_CHAR>> to check for Perl's extended UTF-8; and
	1177	C<L</isUTF8_CHAR_flags>> for a more customized definition.
	1178
	1179	Use C<L</is_c9strict_utf8_string>>, C<L</is_c9strict_utf8_string_loc>>, and
	1180	C<L</is_c9strict_utf8_string_loclen>> to check entire strings.
	1181
	1182	=cut
	1183
	1184	This uses an adaptation of the tables and algorithm given in
	1185	http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
	1186	documentation of the original version. A copyright notice for the original
	1187	version is given at the beginning of this file. The Perl adapation is
	1188	documented at the definition of PL_c9_utf8_dfa_tab[].
	1189
	1190	*/
	1191
	1192	PERL_STATIC_INLINE Size_t
	1193	S_isC9_STRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
	1194	{
	1195	const U8 * s = s0;
	1196	UV state = 0;
	1197
	1198	PERL_ARGS_ASSERT_ISC9_STRICT_UTF8_CHAR;
	1199
	1200	while (s < e && LIKELY(state != 1)) {
	1201	state = PL_c9_utf8_dfa_tab[256 + state + PL_c9_utf8_dfa_tab[*s]];
	1202
	1203	if (state != 0) {
	1204	s++;
	1205	continue;
	1206	}
	1207
	1208	return s - s0 + 1;
	1209	}
	1210
	1211	return 0;
	1212	}
	1213
	1214	/*
	1215
	1216	=for apidoc is_strict_utf8_string_loc
	1217
	1218	Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
	1219	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1220	"utf8ness success") in the C<ep> pointer.
	1221
	1222	See also C<L</is_strict_utf8_string_loclen>>.
	1223
	1224	=cut
	1225	*/
	1226
	1227	#define is_strict_utf8_string_loc(s, len, ep) \
	1228	is_strict_utf8_string_loclen(s, len, ep, 0)
	1229
	1230	/*
	1231
	1232	=for apidoc is_strict_utf8_string_loclen
	1233
	1234	Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
	1235	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1236	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	1237	encoded characters in the C<el> pointer.
	1238
	1239	See also C<L</is_strict_utf8_string_loc>>.
	1240
	1241	=cut
	1242	*/
	1243
	1244	PERL_STATIC_INLINE bool
	1245	S_is_strict_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
	1246	{
	1247	const U8 * first_variant;
	1248
	1249	PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
	1250
	1251	if (len == 0) {
	1252	len = strlen((const char *) s);
	1253	}
	1254
	1255	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	1256	if (el)
	1257	*el = len;
	1258
	1259	if (ep) {
	1260	*ep = s + len;
	1261	}
	1262
	1263	return TRUE;
	1264	}
	1265
	1266	{
	1267	const U8* const send = s + len;
	1268	const U8* x = first_variant;
	1269	STRLEN outlen = first_variant - s;
	1270
	1271	while (x < send) {
	1272	const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
	1273	if (UNLIKELY(! cur_len)) {
	1274	break;
	1275	}
	1276	x += cur_len;
	1277	outlen++;
	1278	}
	1279
	1280	if (el)
	1281	*el = outlen;
	1282
	1283	if (ep) {
	1284	*ep = x;
	1285	}
	1286
	1287	return (x == send);
	1288	}
	1289	}
	1290
	1291	/*
	1292
	1293	=for apidoc is_c9strict_utf8_string_loc
	1294
	1295	Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
	1296	the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1297	"utf8ness success") in the C<ep> pointer.
	1298
	1299	See also C<L</is_c9strict_utf8_string_loclen>>.
	1300
	1301	=cut
	1302	*/
	1303
	1304	#define is_c9strict_utf8_string_loc(s, len, ep) \
	1305	is_c9strict_utf8_string_loclen(s, len, ep, 0)
	1306
	1307	/*
	1308
	1309	=for apidoc is_c9strict_utf8_string_loclen
	1310
	1311	Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
	1312	the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1313	"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
	1314	characters in the C<el> pointer.
	1315
	1316	See also C<L</is_c9strict_utf8_string_loc>>.
	1317
	1318	=cut
	1319	*/
	1320
	1321	PERL_STATIC_INLINE bool
	1322	S_is_c9strict_utf8_string_loclen(const U8 s, STRLEN len, const U8 ep, STRLEN el)
	1323	{
	1324	const U8 * first_variant;
	1325
	1326	PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
	1327
	1328	if (len == 0) {
	1329	len = strlen((const char *) s);
	1330	}
	1331
	1332	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	1333	if (el)
	1334	*el = len;
	1335
	1336	if (ep) {
	1337	*ep = s + len;
	1338	}
	1339
	1340	return TRUE;
	1341	}
	1342
	1343	{
	1344	const U8* const send = s + len;
	1345	const U8* x = first_variant;
	1346	STRLEN outlen = first_variant - s;
	1347
	1348	while (x < send) {
	1349	const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
	1350	if (UNLIKELY(! cur_len)) {
	1351	break;
	1352	}
	1353	x += cur_len;
	1354	outlen++;
	1355	}
	1356
	1357	if (el)
	1358	*el = outlen;
	1359
	1360	if (ep) {
	1361	*ep = x;
	1362	}
	1363
	1364	return (x == send);
	1365	}
	1366	}
	1367
	1368	/*
	1369
	1370	=for apidoc is_utf8_string_loc_flags
	1371
	1372	Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
	1373	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1374	"utf8ness success") in the C<ep> pointer.
	1375
	1376	See also C<L</is_utf8_string_loclen_flags>>.
	1377
	1378	=cut
	1379	*/
	1380
	1381	#define is_utf8_string_loc_flags(s, len, ep, flags) \
	1382	is_utf8_string_loclen_flags(s, len, ep, 0, flags)
	1383
	1384
	1385	/* The above 3 actual functions could have been moved into the more general one
	1386	* just below, and made #defines that call it with the right 'flags'. They are
	1387	* currently kept separate to increase their chances of getting inlined */
	1388
	1389	/*
	1390
	1391	=for apidoc is_utf8_string_loclen_flags
	1392
	1393	Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
	1394	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	1395	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	1396	encoded characters in the C<el> pointer.
	1397
	1398	See also C<L</is_utf8_string_loc_flags>>.
	1399
	1400	=cut
	1401	*/
	1402
	1403	PERL_STATIC_INLINE bool
	1404	S_is_utf8_string_loclen_flags(const U8 s, STRLEN len, const U8 ep, STRLEN el, const U32 flags)
	1405	{
	1406	const U8 * first_variant;
	1407
	1408	PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
	1409	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	1410	\|UTF8_DISALLOW_PERL_EXTENDED)));
	1411
	1412	if (len == 0) {
	1413	len = strlen((const char *) s);
	1414	}
	1415
	1416	if (flags == 0) {
	1417	return is_utf8_string_loclen(s, len, ep, el);
	1418	}
	1419
	1420	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	1421	== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
	1422	{
	1423	return is_strict_utf8_string_loclen(s, len, ep, el);
	1424	}
	1425
	1426	if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
	1427	== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
	1428	{
	1429	return is_c9strict_utf8_string_loclen(s, len, ep, el);
	1430	}
	1431
	1432	if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
	1433	if (el)
	1434	*el = len;
	1435
	1436	if (ep) {
	1437	*ep = s + len;
	1438	}
	1439
	1440	return TRUE;
	1441	}
	1442
	1443	{
	1444	const U8* send = s + len;
	1445	const U8* x = first_variant;
	1446	STRLEN outlen = first_variant - s;
	1447
	1448	while (x < send) {
	1449	const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
	1450	if (UNLIKELY(! cur_len)) {
	1451	break;
	1452	}
	1453	x += cur_len;
	1454	outlen++;
	1455	}
	1456
	1457	if (el)
	1458	*el = outlen;
	1459
	1460	if (ep) {
	1461	*ep = x;
	1462	}
	1463
	1464	return (x == send);
	1465	}
	1466	}
	1467
	1468	/*
	1469	=for apidoc utf8_distance
	1470
	1471	Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
	1472	and C<b>.
	1473
	1474	WARNING: use only if you know that the pointers point inside the
	1475	same UTF-8 buffer.
	1476
	1477	=cut
	1478	*/
	1479
	1480	PERL_STATIC_INLINE IV
	1481	Perl_utf8_distance(pTHX_ const U8 a, const U8 b)
	1482	{
	1483	PERL_ARGS_ASSERT_UTF8_DISTANCE;
	1484
	1485	return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
	1486	}
	1487
	1488	/*
	1489	=for apidoc utf8_hop
	1490
	1491	Return the UTF-8 pointer C<s> displaced by C<off> characters, either
	1492	forward or backward.
	1493
	1494	WARNING: do not use the following unless you know C<off> is within
	1495	the UTF-8 data pointed to by C<s> and that on entry C<s> is aligned
	1496	on the first byte of character or just after the last byte of a character.
	1497
	1498	=cut
	1499	*/
	1500
	1501	PERL_STATIC_INLINE U8 *
	1502	Perl_utf8_hop(const U8 *s, SSize_t off)
	1503	{
	1504	PERL_ARGS_ASSERT_UTF8_HOP;
	1505
	1506	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1507	* the bitops (especially ~) can create illegal UTF-8.
	1508	* In other words: in Perl UTF-8 is not just for Unicode. */
	1509
	1510	if (off >= 0) {
	1511	while (off--)
	1512	s += UTF8SKIP(s);
	1513	}
	1514	else {
	1515	while (off++) {
	1516	s--;
	1517	while (UTF8_IS_CONTINUATION(*s))
	1518	s--;
	1519	}
	1520	}
	1521	GCC_DIAG_IGNORE(-Wcast-qual)
	1522	return (U8 *)s;
	1523	GCC_DIAG_RESTORE
	1524	}
	1525
	1526	/*
	1527	=for apidoc utf8_hop_forward
	1528
	1529	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	1530	forward.
	1531
	1532	C<off> must be non-negative.
	1533
	1534	C<s> must be before or equal to C<end>.
	1535
	1536	When moving forward it will not move beyond C<end>.
	1537
	1538	Will not exceed this limit even if the string is not valid "UTF-8".
	1539
	1540	=cut
	1541	*/
	1542
	1543	PERL_STATIC_INLINE U8 *
	1544	Perl_utf8_hop_forward(const U8 s, SSize_t off, const U8 end)
	1545	{
	1546	PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
	1547
	1548	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1549	* the bitops (especially ~) can create illegal UTF-8.
	1550	* In other words: in Perl UTF-8 is not just for Unicode. */
	1551
	1552	assert(s <= end);
	1553	assert(off >= 0);
	1554
	1555	while (off--) {
	1556	STRLEN skip = UTF8SKIP(s);
	1557	if ((STRLEN)(end - s) <= skip) {
	1558	GCC_DIAG_IGNORE(-Wcast-qual)
	1559	return (U8 *)end;
	1560	GCC_DIAG_RESTORE
	1561	}
	1562	s += skip;
	1563	}
	1564
	1565	GCC_DIAG_IGNORE(-Wcast-qual)
	1566	return (U8 *)s;
	1567	GCC_DIAG_RESTORE
	1568	}
	1569
	1570	/*
	1571	=for apidoc utf8_hop_back
	1572
	1573	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	1574	backward.
	1575
	1576	C<off> must be non-positive.
	1577
	1578	C<s> must be after or equal to C<start>.
	1579
	1580	When moving backward it will not move before C<start>.
	1581
	1582	Will not exceed this limit even if the string is not valid "UTF-8".
	1583
	1584	=cut
	1585	*/
	1586
	1587	PERL_STATIC_INLINE U8 *
	1588	Perl_utf8_hop_back(const U8 s, SSize_t off, const U8 start)
	1589	{
	1590	PERL_ARGS_ASSERT_UTF8_HOP_BACK;
	1591
	1592	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1593	* the bitops (especially ~) can create illegal UTF-8.
	1594	* In other words: in Perl UTF-8 is not just for Unicode. */
	1595
	1596	assert(start <= s);
	1597	assert(off <= 0);
	1598
	1599	while (off++ && s > start) {
	1600	do {
	1601	s--;
	1602	} while (UTF8_IS_CONTINUATION(*s) && s > start);
	1603	}
	1604
	1605	GCC_DIAG_IGNORE(-Wcast-qual)
	1606	return (U8 *)s;
	1607	GCC_DIAG_RESTORE
	1608	}
	1609
	1610	/*
	1611	=for apidoc utf8_hop_safe
	1612
	1613	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	1614	either forward or backward.
	1615
	1616	When moving backward it will not move before C<start>.
	1617
	1618	When moving forward it will not move beyond C<end>.
	1619
	1620	Will not exceed those limits even if the string is not valid "UTF-8".
	1621
	1622	=cut
	1623	*/
	1624
	1625	PERL_STATIC_INLINE U8 *
	1626	Perl_utf8_hop_safe(const U8 s, SSize_t off, const U8 start, const U8 *end)
	1627	{
	1628	PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
	1629
	1630	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1631	* the bitops (especially ~) can create illegal UTF-8.
	1632	* In other words: in Perl UTF-8 is not just for Unicode. */
	1633
	1634	assert(start <= s && s <= end);
	1635
	1636	if (off >= 0) {
	1637	return utf8_hop_forward(s, off, end);
	1638	}
	1639	else {
	1640	return utf8_hop_back(s, off, start);
	1641	}
	1642	}
	1643
	1644	/*
	1645
	1646	=for apidoc is_utf8_valid_partial_char
	1647
	1648	Returns 0 if the sequence of bytes starting at C<s> and looking no further than
	1649	S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
	1650	points. Otherwise, it returns 1 if there exists at least one non-empty
	1651	sequence of bytes that when appended to sequence C<s>, starting at position
	1652	C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
	1653	otherwise returns 0.
	1654
	1655	In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
	1656	point.
	1657
	1658	This is useful when a fixed-length buffer is being tested for being well-formed
	1659	UTF-8, but the final few bytes in it don't comprise a full character; that is,
	1660	it is split somewhere in the middle of the final code point's UTF-8
	1661	representation. (Presumably when the buffer is refreshed with the next chunk
	1662	of data, the new first bytes will complete the partial code point.) This
	1663	function is used to verify that the final bytes in the current buffer are in
	1664	fact the legal beginning of some code point, so that if they aren't, the
	1665	failure can be signalled without having to wait for the next read.
	1666
	1667	=cut
	1668	*/
	1669	#define is_utf8_valid_partial_char(s, e) \
	1670	is_utf8_valid_partial_char_flags(s, e, 0)
	1671
	1672	/*
	1673
	1674	=for apidoc is_utf8_valid_partial_char_flags
	1675
	1676	Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
	1677	or not the input is a valid UTF-8 encoded partial character, but it takes an
	1678	extra parameter, C<flags>, which can further restrict which code points are
	1679	considered valid.
	1680
	1681	If C<flags> is 0, this behaves identically to
	1682	C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
	1683	of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
	1684	there is any sequence of bytes that can complete the input partial character in
	1685	such a way that a non-prohibited character is formed, the function returns
	1686	TRUE; otherwise FALSE. Non character code points cannot be determined based on
	1687	partial character input. But many of the other possible excluded types can be
	1688	determined from just the first one or two bytes.
	1689
	1690	=cut
	1691	*/
	1692
	1693	PERL_STATIC_INLINE bool
	1694	S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
	1695	{
	1696	PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
	1697
	1698	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	1699	\|UTF8_DISALLOW_PERL_EXTENDED)));
	1700
	1701	if (s >= e \|\| s + UTF8SKIP(s) <= e) {
	1702	return FALSE;
	1703	}
	1704
	1705	return cBOOL(_is_utf8_char_helper(s, e, flags));
	1706	}
	1707
	1708	/*
	1709
	1710	=for apidoc is_utf8_fixed_width_buf_flags
	1711
	1712	Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
	1713	is entirely valid UTF-8, subject to the restrictions given by C<flags>;
	1714	otherwise it returns FALSE.
	1715
	1716	If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
	1717	without restriction. If the final few bytes of the buffer do not form a
	1718	complete code point, this will return TRUE anyway, provided that
	1719	C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
	1720
	1721	If C<flags> in non-zero, it can be any combination of the
	1722	C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
	1723	same meanings.
	1724
	1725	This function differs from C<L</is_utf8_string_flags>> only in that the latter
	1726	returns FALSE if the final few bytes of the string don't form a complete code
	1727	point.
	1728
	1729	=cut
	1730	*/
	1731	#define is_utf8_fixed_width_buf_flags(s, len, flags) \
	1732	is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
	1733
	1734	/*
	1735
	1736	=for apidoc is_utf8_fixed_width_buf_loc_flags
	1737
	1738	Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
	1739	failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
	1740	to the beginning of any partial character at the end of the buffer; if there is
	1741	no partial character C<*ep> will contain C<s>+C<len>.
	1742
	1743	See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
	1744
	1745	=cut
	1746	*/
	1747
	1748	#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
	1749	is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
	1750
	1751	/*
	1752
	1753	=for apidoc is_utf8_fixed_width_buf_loclen_flags
	1754
	1755	Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
	1756	complete, valid characters found in the C<el> pointer.
	1757
	1758	=cut
	1759	*/
	1760
	1761	PERL_STATIC_INLINE bool
	1762	S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
	1763	STRLEN len,
	1764	const U8 **ep,
	1765	STRLEN *el,
	1766	const U32 flags)
	1767	{
	1768	const U8 * maybe_partial;
	1769
	1770	PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
	1771
	1772	if (! ep) {
	1773	ep = &maybe_partial;
	1774	}
	1775
	1776	/* If it's entirely valid, return that; otherwise see if the only error is
	1777	* that the final few bytes are for a partial character */
	1778	return is_utf8_string_loclen_flags(s, len, ep, el, flags)
	1779	\|\| is_utf8_valid_partial_char_flags(*ep, s + len, flags);
	1780	}
	1781
	1782	PERL_STATIC_INLINE UV
	1783	S_utf8n_to_uvchr_msgs(const U8 *s,
	1784	STRLEN curlen,
	1785	STRLEN *retlen,
	1786	const U32 flags,
	1787	U32 * errors,
	1788	AV ** msgs)
	1789	{
	1790	/* This is the inlined portion of utf8n_to_uvchr_msgs. It handles the
	1791	* simple cases, and, if necessary calls a helper function to deal with the
	1792	* more complex ones. Almost all well-formed non-problematic code points
	1793	* are considered simple, so that it's unlikely that the helper function
	1794	* will need to be called.
	1795	*
	1796	* This is an adaptation of the tables and algorithm given in
	1797	* http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides
	1798	* comprehensive documentation of the original version. A copyright notice
	1799	* for the original version is given at the beginning of this file. The
	1800	* Perl adapation is documented at the definition of PL_strict_utf8_dfa_tab[].
	1801	*/
	1802
	1803	const U8 * const s0 = s;
	1804	const U8 * send = s0 + curlen;
	1805	UV uv = 0; /* The 0 silences some stupid compilers */
	1806	UV state = 0;
	1807
	1808	PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
	1809
	1810	/* This dfa is fast. If it accepts the input, it was for a well-formed,
	1811	* non-problematic code point, which can be returned immediately.
	1812	* Otherwise we call a helper function to figure out the more complicated
	1813	* cases. */
	1814
	1815	while (s < send && LIKELY(state != 1)) {
	1816	UV type = PL_strict_utf8_dfa_tab[*s];
	1817
	1818	uv = (state == 0)
	1819	? ((0xff >> type) & NATIVE_UTF8_TO_I8(*s))
	1820	: UTF8_ACCUMULATE(uv, *s);
	1821	state = PL_strict_utf8_dfa_tab[256 + state + type];
	1822
	1823	if (state != 0) {
	1824	s++;
	1825	continue;
	1826	}
	1827
	1828	if (retlen) {
	1829	*retlen = s - s0 + 1;
	1830	}
	1831	if (errors) {
	1832	*errors = 0;
	1833	}
	1834	if (msgs) {
	1835	*msgs = NULL;
	1836	}
	1837
	1838	return uv;
	1839	}
	1840
	1841	/* Here is potentially problematic. Use the full mechanism */
	1842	return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs);
	1843	}
	1844
	1845	/* ------------------------------- perl.h ----------------------------- */
	1846
	1847	/*
	1848	=head1 Miscellaneous Functions
	1849
	1850	=for apidoc AiR\|bool\|is_safe_syscall\|const char pv\|STRLEN len\|const char what\|const char *op_name
	1851
	1852	Test that the given C<pv> doesn't contain any internal C<NUL> characters.
	1853	If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
	1854
	1855	Return TRUE if the name is safe.
	1856
	1857	Used by the C<IS_SAFE_SYSCALL()> macro.
	1858
	1859	=cut
	1860	*/
	1861
	1862	PERL_STATIC_INLINE bool
	1863	S_is_safe_syscall(pTHX_ const char pv, STRLEN len, const char what, const char *op_name) {
	1864	/* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
	1865	* perl itself uses xce*() functions which accept 8-bit strings.
	1866	*/
	1867
	1868	PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
	1869
	1870	if (len > 1) {
	1871	char *null_at;
	1872	if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
	1873	SETERRNO(ENOENT, LIB_INVARG);
	1874	Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
	1875	"Invalid \\0 character in %s for %s: %s\\0%s",
	1876	what, op_name, pv, null_at+1);
	1877	return FALSE;
	1878	}
	1879	}
	1880
	1881	return TRUE;
	1882	}
	1883
	1884	/*
	1885
	1886	Return true if the supplied filename has a newline character
	1887	immediately before the first (hopefully only) NUL.
	1888
	1889	My original look at this incorrectly used the len from SvPV(), but
	1890	that's incorrect, since we allow for a NUL in pv[len-1].
	1891
	1892	So instead, strlen() and work from there.
	1893
	1894	This allow for the user reading a filename, forgetting to chomp it,
	1895	then calling:
	1896
	1897	open my $foo, "$file\0";
	1898
	1899	*/
	1900
	1901	#ifdef PERL_CORE
	1902
	1903	PERL_STATIC_INLINE bool
	1904	S_should_warn_nl(const char *pv) {
	1905	STRLEN len;
	1906
	1907	PERL_ARGS_ASSERT_SHOULD_WARN_NL;
	1908
	1909	len = strlen(pv);
	1910
	1911	return len > 0 && pv[len-1] == '\n';
	1912	}
	1913
	1914	#endif
	1915
	1916	#if defined(PERL_IN_PP_C) \|\| defined(PERL_IN_PP_HOT_C)
	1917
	1918	PERL_STATIC_INLINE bool
	1919	S_lossless_NV_to_IV(const NV nv, IV *ivp)
	1920	{
	1921	/* This function determines if the input NV 'nv' may be converted without
	1922	* loss of data to an IV. If not, it returns FALSE taking no other action.
	1923	* But if it is possible, it does the conversion, returning TRUE, and
	1924	* storing the converted result in 'ivp' /
	1925
	1926	PERL_ARGS_ASSERT_LOSSLESS_NV_TO_IV;
	1927
	1928	# if defined(Perl_isnan)
	1929
	1930	if (UNLIKELY(Perl_isnan(nv))) {
	1931	return FALSE;
	1932	}
	1933
	1934	# endif
	1935
	1936	if (UNLIKELY(nv < IV_MIN) \|\| UNLIKELY(nv > IV_MAX)) {
	1937	return FALSE;
	1938	}
	1939
	1940	if ((IV) nv != nv) {
	1941	return FALSE;
	1942	}
	1943
	1944	*ivp = (IV) nv;
	1945	return TRUE;
	1946	}
	1947
	1948	#endif
	1949
	1950	/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
	1951
	1952	#define MAX_CHARSET_NAME_LENGTH 2
	1953
	1954	PERL_STATIC_INLINE const char *
	1955	get_regex_charset_name(const U32 flags, STRLEN* const lenp)
	1956	{
	1957	/* Returns a string that corresponds to the name of the regex character set
	1958	* given by 'flags', and *lenp is set the length of that string, which
	1959	* cannot exceed MAX_CHARSET_NAME_LENGTH characters */
	1960
	1961	*lenp = 1;
	1962	switch (get_regex_charset(flags)) {
	1963	case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
	1964	case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
	1965	case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
	1966	case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
	1967	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	1968	*lenp = 2;
	1969	return ASCII_MORE_RESTRICT_PAT_MODS;
	1970	}
	1971	/* The NOT_REACHED; hides an assert() which has a rather complex
	1972	* definition in perl.h. */
	1973	NOT_REACHED; /* NOTREACHED */
	1974	return "?"; /* Unknown */
	1975	}
	1976
	1977	/*
	1978
	1979	Return false if any get magic is on the SV other than taint magic.
	1980
	1981	*/
	1982
	1983	PERL_STATIC_INLINE bool
	1984	S_sv_only_taint_gmagic(SV *sv) {
	1985	MAGIC *mg = SvMAGIC(sv);
	1986
	1987	PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
	1988
	1989	while (mg) {
	1990	if (mg->mg_type != PERL_MAGIC_taint
	1991	&& !(mg->mg_flags & MGf_GSKIP)
	1992	&& mg->mg_virtual->svt_get) {
	1993	return FALSE;
	1994	}
	1995	mg = mg->mg_moremagic;
	1996	}
	1997
	1998	return TRUE;
	1999	}
	2000
	2001	/* ------------------ cop.h ------------------------------------------- */
	2002
	2003
	2004	/* Enter a block. Push a new base context and return its address. */
	2005
	2006	PERL_STATIC_INLINE PERL_CONTEXT *
	2007	S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
	2008	{
	2009	PERL_CONTEXT * cx;
	2010
	2011	PERL_ARGS_ASSERT_CX_PUSHBLOCK;
	2012
	2013	CXINC;
	2014	cx = CX_CUR();
	2015	cx->cx_type = type;
	2016	cx->blk_gimme = gimme;
	2017	cx->blk_oldsaveix = saveix;
	2018	cx->blk_oldsp = (I32)(sp - PL_stack_base);
	2019	cx->blk_oldcop = PL_curcop;
	2020	cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
	2021	cx->blk_oldscopesp = PL_scopestack_ix;
	2022	cx->blk_oldpm = PL_curpm;
	2023	cx->blk_old_tmpsfloor = PL_tmps_floor;
	2024
	2025	PL_tmps_floor = PL_tmps_ix;
	2026	CX_DEBUG(cx, "PUSH");
	2027	return cx;
	2028	}
	2029
	2030
	2031	/* Exit a block (RETURN and LAST). */
	2032
	2033	PERL_STATIC_INLINE void
	2034	S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
	2035	{
	2036	PERL_ARGS_ASSERT_CX_POPBLOCK;
	2037
	2038	CX_DEBUG(cx, "POP");
	2039	/* these 3 are common to cx_popblock and cx_topblock */
	2040	PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
	2041	PL_scopestack_ix = cx->blk_oldscopesp;
	2042	PL_curpm = cx->blk_oldpm;
	2043
	2044	/* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
	2045	* and leaves a CX entry lying around for repeated use, so
	2046	* skip for multicall */ \
	2047	assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
	2048	\|\| PL_savestack_ix == cx->blk_oldsaveix);
	2049	PL_curcop = cx->blk_oldcop;
	2050	PL_tmps_floor = cx->blk_old_tmpsfloor;
	2051	}
	2052
	2053	/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
	2054	* Whereas cx_popblock() restores the state to the point just before
	2055	* cx_pushblock() was called, cx_topblock() restores it to the point just
	2056	* after cx_pushblock() was called. */
	2057
	2058	PERL_STATIC_INLINE void
	2059	S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
	2060	{
	2061	PERL_ARGS_ASSERT_CX_TOPBLOCK;
	2062
	2063	CX_DEBUG(cx, "TOP");
	2064	/* these 3 are common to cx_popblock and cx_topblock */
	2065	PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
	2066	PL_scopestack_ix = cx->blk_oldscopesp;
	2067	PL_curpm = cx->blk_oldpm;
	2068
	2069	PL_stack_sp = PL_stack_base + cx->blk_oldsp;
	2070	}
	2071
	2072
	2073	PERL_STATIC_INLINE void
	2074	S_cx_pushsub(pTHX_ PERL_CONTEXT cx, CV cv, OP *retop, bool hasargs)
	2075	{
	2076	U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
	2077
	2078	PERL_ARGS_ASSERT_CX_PUSHSUB;
	2079
	2080	PERL_DTRACE_PROBE_ENTRY(cv);
	2081	cx->blk_sub.cv = cv;
	2082	cx->blk_sub.olddepth = CvDEPTH(cv);
	2083	cx->blk_sub.prevcomppad = PL_comppad;
	2084	cx->cx_type \|= (hasargs) ? CXp_HASARGS : 0;
	2085	cx->blk_sub.retop = retop;
	2086	SvREFCNT_inc_simple_void_NN(cv);
	2087	cx->blk_u16 = PL_op->op_private & (phlags\|OPpDEREF);
	2088	}
	2089
	2090
	2091	/* subsets of cx_popsub() */
	2092
	2093	PERL_STATIC_INLINE void
	2094	S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
	2095	{
	2096	CV *cv;
	2097
	2098	PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
	2099	assert(CxTYPE(cx) == CXt_SUB);
	2100
	2101	PL_comppad = cx->blk_sub.prevcomppad;
	2102	PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
	2103	cv = cx->blk_sub.cv;
	2104	CvDEPTH(cv) = cx->blk_sub.olddepth;
	2105	cx->blk_sub.cv = NULL;
	2106	SvREFCNT_dec(cv);
	2107	}
	2108
	2109
	2110	/* handle the @_ part of leaving a sub */
	2111
	2112	PERL_STATIC_INLINE void
	2113	S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
	2114	{
	2115	AV *av;
	2116
	2117	PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
	2118	assert(CxTYPE(cx) == CXt_SUB);
	2119	assert(AvARRAY(MUTABLE_AV(
	2120	PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
	2121	CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
	2122
	2123	CX_POP_SAVEARRAY(cx);
	2124	av = MUTABLE_AV(PAD_SVl(0));
	2125	if (UNLIKELY(AvREAL(av)))
	2126	/* abandon @_ if it got reified */
	2127	clear_defarray(av, 0);
	2128	else {
	2129	CLEAR_ARGARRAY(av);
	2130	}
	2131	}
	2132
	2133
	2134	PERL_STATIC_INLINE void
	2135	S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
	2136	{
	2137	PERL_ARGS_ASSERT_CX_POPSUB;
	2138	assert(CxTYPE(cx) == CXt_SUB);
	2139
	2140	PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
	2141
	2142	if (CxHASARGS(cx))
	2143	cx_popsub_args(cx);
	2144	cx_popsub_common(cx);
	2145	}
	2146
	2147
	2148	PERL_STATIC_INLINE void
	2149	S_cx_pushformat(pTHX_ PERL_CONTEXT cx, CV cv, OP retop, GV gv)
	2150	{
	2151	PERL_ARGS_ASSERT_CX_PUSHFORMAT;
	2152
	2153	cx->blk_format.cv = cv;
	2154	cx->blk_format.retop = retop;
	2155	cx->blk_format.gv = gv;
	2156	cx->blk_format.dfoutgv = PL_defoutgv;
	2157	cx->blk_format.prevcomppad = PL_comppad;
	2158	cx->blk_u16 = 0;
	2159
	2160	SvREFCNT_inc_simple_void_NN(cv);
	2161	CvDEPTH(cv)++;
	2162	SvREFCNT_inc_void(cx->blk_format.dfoutgv);
	2163	}
	2164
	2165
	2166	PERL_STATIC_INLINE void
	2167	S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
	2168	{
	2169	CV *cv;
	2170	GV *dfout;
	2171
	2172	PERL_ARGS_ASSERT_CX_POPFORMAT;
	2173	assert(CxTYPE(cx) == CXt_FORMAT);
	2174
	2175	dfout = cx->blk_format.dfoutgv;
	2176	setdefout(dfout);
	2177	cx->blk_format.dfoutgv = NULL;
	2178	SvREFCNT_dec_NN(dfout);
	2179
	2180	PL_comppad = cx->blk_format.prevcomppad;
	2181	PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
	2182	cv = cx->blk_format.cv;
	2183	cx->blk_format.cv = NULL;
	2184	--CvDEPTH(cv);
	2185	SvREFCNT_dec_NN(cv);
	2186	}
	2187
	2188
	2189	PERL_STATIC_INLINE void
	2190	S_cx_pusheval(pTHX_ PERL_CONTEXT cx, OP retop, SV *namesv)
	2191	{
	2192	PERL_ARGS_ASSERT_CX_PUSHEVAL;
	2193
	2194	cx->blk_eval.retop = retop;
	2195	cx->blk_eval.old_namesv = namesv;
	2196	cx->blk_eval.old_eval_root = PL_eval_root;
	2197	cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
	2198	cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
	2199	cx->blk_eval.cur_top_env = PL_top_env;
	2200
	2201	assert(!(PL_in_eval & ~ 0x3F));
	2202	assert(!(PL_op->op_type & ~0x1FF));
	2203	cx->blk_u16 = (PL_in_eval & 0x3F) \| ((U16)PL_op->op_type << 7);
	2204	}
	2205
	2206
	2207	PERL_STATIC_INLINE void
	2208	S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
	2209	{
	2210	SV *sv;
	2211
	2212	PERL_ARGS_ASSERT_CX_POPEVAL;
	2213	assert(CxTYPE(cx) == CXt_EVAL);
	2214
	2215	PL_in_eval = CxOLD_IN_EVAL(cx);
	2216	assert(!(PL_in_eval & 0xc0));
	2217	PL_eval_root = cx->blk_eval.old_eval_root;
	2218	sv = cx->blk_eval.cur_text;
	2219	if (sv && CxEVAL_TXT_REFCNTED(cx)) {
	2220	cx->blk_eval.cur_text = NULL;
	2221	SvREFCNT_dec_NN(sv);
	2222	}
	2223
	2224	sv = cx->blk_eval.old_namesv;
	2225	if (sv) {
	2226	cx->blk_eval.old_namesv = NULL;
	2227	SvREFCNT_dec_NN(sv);
	2228	}
	2229	}
	2230
	2231
	2232	/* push a plain loop, i.e.
	2233	* { block }
	2234	* while (cond) { block }
	2235	* for (init;cond;continue) { block }
	2236	* This loop can be last/redo'ed etc.
	2237	*/
	2238
	2239	PERL_STATIC_INLINE void
	2240	S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
	2241	{
	2242	PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
	2243	cx->blk_loop.my_op = cLOOP;
	2244	}
	2245
	2246
	2247	/* push a true for loop, i.e.
	2248	* for var (list) { block }
	2249	*/
	2250
	2251	PERL_STATIC_INLINE void
	2252	S_cx_pushloop_for(pTHX_ PERL_CONTEXT cx, void itervarp, SV* itersave)
	2253	{
	2254	PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
	2255
	2256	/* this one line is common with cx_pushloop_plain */
	2257	cx->blk_loop.my_op = cLOOP;
	2258
	2259	cx->blk_loop.itervar_u.svp = (SV**)itervarp;
	2260	cx->blk_loop.itersave = itersave;
	2261	#ifdef USE_ITHREADS
	2262	cx->blk_loop.oldcomppad = PL_comppad;
	2263	#endif
	2264	}
	2265
	2266
	2267	/* pop all loop types, including plain */
	2268
	2269	PERL_STATIC_INLINE void
	2270	S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
	2271	{
	2272	PERL_ARGS_ASSERT_CX_POPLOOP;
	2273
	2274	assert(CxTYPE_is_LOOP(cx));
	2275	if ( CxTYPE(cx) == CXt_LOOP_ARY
	2276	\|\| CxTYPE(cx) == CXt_LOOP_LAZYSV)
	2277	{
	2278	/* Free ary or cur. This assumes that state_u.ary.ary
	2279	* aligns with state_u.lazysv.cur. See cx_dup() */
	2280	SV *sv = cx->blk_loop.state_u.lazysv.cur;
	2281	cx->blk_loop.state_u.lazysv.cur = NULL;
	2282	SvREFCNT_dec_NN(sv);
	2283	if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
	2284	sv = cx->blk_loop.state_u.lazysv.end;
	2285	cx->blk_loop.state_u.lazysv.end = NULL;
	2286	SvREFCNT_dec_NN(sv);
	2287	}
	2288	}
	2289	if (cx->cx_type & (CXp_FOR_PAD\|CXp_FOR_GV)) {
	2290	SV *cursv;
	2291	SV **svp = (cx)->blk_loop.itervar_u.svp;
	2292	if ((cx->cx_type & CXp_FOR_GV))
	2293	svp = &GvSV((GV*)svp);
	2294	cursv = *svp;
	2295	*svp = cx->blk_loop.itersave;
	2296	cx->blk_loop.itersave = NULL;
	2297	SvREFCNT_dec(cursv);
	2298	}
	2299	}
	2300
	2301
	2302	PERL_STATIC_INLINE void
	2303	S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
	2304	{
	2305	PERL_ARGS_ASSERT_CX_PUSHWHEN;
	2306
	2307	cx->blk_givwhen.leave_op = cLOGOP->op_other;
	2308	}
	2309
	2310
	2311	PERL_STATIC_INLINE void
	2312	S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
	2313	{
	2314	PERL_ARGS_ASSERT_CX_POPWHEN;
	2315	assert(CxTYPE(cx) == CXt_WHEN);
	2316
	2317	PERL_UNUSED_ARG(cx);
	2318	PERL_UNUSED_CONTEXT;
	2319	/* currently NOOP */
	2320	}
	2321
	2322
	2323	PERL_STATIC_INLINE void
	2324	S_cx_pushgiven(pTHX_ PERL_CONTEXT cx, SV orig_defsv)
	2325	{
	2326	PERL_ARGS_ASSERT_CX_PUSHGIVEN;
	2327
	2328	cx->blk_givwhen.leave_op = cLOGOP->op_other;
	2329	cx->blk_givwhen.defsv_save = orig_defsv;
	2330	}
	2331
	2332
	2333	PERL_STATIC_INLINE void
	2334	S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
	2335	{
	2336	SV *sv;
	2337
	2338	PERL_ARGS_ASSERT_CX_POPGIVEN;
	2339	assert(CxTYPE(cx) == CXt_GIVEN);
	2340
	2341	sv = GvSV(PL_defgv);
	2342	GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
	2343	cx->blk_givwhen.defsv_save = NULL;
	2344	SvREFCNT_dec(sv);
	2345	}
	2346
	2347	/* ------------------ util.h ------------------------------------------- */
	2348
	2349	/*
	2350	=head1 Miscellaneous Functions
	2351
	2352	=for apidoc foldEQ
	2353
	2354	Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
	2355	same
	2356	case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
	2357	match themselves and their opposite case counterparts. Non-cased and non-ASCII
	2358	range bytes match only themselves.
	2359
	2360	=cut
	2361	*/
	2362
	2363	PERL_STATIC_INLINE I32
	2364	Perl_foldEQ(const char s1, const char s2, I32 len)
	2365	{
	2366	const U8 a = (const U8 )s1;
	2367	const U8 b = (const U8 )s2;
	2368
	2369	PERL_ARGS_ASSERT_FOLDEQ;
	2370
	2371	assert(len >= 0);
	2372
	2373	while (len--) {
	2374	if (a != b && a != PL_fold[b])
	2375	return 0;
	2376	a++,b++;
	2377	}
	2378	return 1;
	2379	}
	2380
	2381	PERL_STATIC_INLINE I32
	2382	Perl_foldEQ_latin1(const char s1, const char s2, I32 len)
	2383	{
	2384	/* Compare non-UTF-8 using Unicode (Latin1) semantics. Works on all folds
	2385	* representable without UTF-8, except for LATIN_SMALL_LETTER_SHARP_S, and
	2386	* does not check for this. Nor does it check that the strings each have
	2387	* at least 'len' characters. */
	2388
	2389	const U8 a = (const U8 )s1;
	2390	const U8 b = (const U8 )s2;
	2391
	2392	PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
	2393
	2394	assert(len >= 0);
	2395
	2396	while (len--) {
	2397	if (a != b && a != PL_fold_latin1[b]) {
	2398	return 0;
	2399	}
	2400	a++, b++;
	2401	}
	2402	return 1;
	2403	}
	2404
	2405	/*
	2406	=for apidoc foldEQ_locale
	2407
	2408	Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
	2409	same case-insensitively in the current locale; false otherwise.
	2410
	2411	=cut
	2412	*/
	2413
	2414	PERL_STATIC_INLINE I32
	2415	Perl_foldEQ_locale(const char s1, const char s2, I32 len)
	2416	{
	2417	dVAR;
	2418	const U8 a = (const U8 )s1;
	2419	const U8 b = (const U8 )s2;
	2420
	2421	PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
	2422
	2423	assert(len >= 0);
	2424
	2425	while (len--) {
	2426	if (a != b && a != PL_fold_locale[b])
	2427	return 0;
	2428	a++,b++;
	2429	}
	2430	return 1;
	2431	}
	2432
	2433	#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) \|\| defined(PERL_EXT))
	2434
	2435	PERL_STATIC_INLINE void *
	2436	S_my_memrchr(const char * s, const char c, const STRLEN len)
	2437	{
	2438	/* memrchr(), since many platforms lack it */
	2439
	2440	const char * t = s + len - 1;
	2441
	2442	PERL_ARGS_ASSERT_MY_MEMRCHR;
	2443
	2444	while (t >= s) {
	2445	if (*t == c) {
	2446	return (void *) t;
	2447	}
	2448	t--;
	2449	}
	2450
	2451	return NULL;
	2452	}
	2453
	2454	#endif
	2455
	2456	/*
	2457	* ex: set ts=8 sts=4 sw=4 et:
	2458	*/