perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* inline.h
	2	*
	3	* Copyright (C) 2012 by Larry Wall and others
	4	*
	5	* You may distribute under the terms of either the GNU General Public
	6	* License or the Artistic License, as specified in the README file.
	7	*
	8	* This file is a home for static inline functions that cannot go in other
	9	* headers files, because they depend on proto.h (included after most other
	10	* headers) or struct definitions.
	11	*
	12	* Each section names the header file that the functions "belong" to.
	13	*/
	14
	15	/* ------------------------------- av.h ------------------------------- */
	16
	17	PERL_STATIC_INLINE SSize_t
	18	S_av_top_index(pTHX_ AV *av)
	19	{
	20	PERL_ARGS_ASSERT_AV_TOP_INDEX;
	21	assert(SvTYPE(av) == SVt_PVAV);
	22
	23	return AvFILL(av);
	24	}
	25
	26	/* ------------------------------- cv.h ------------------------------- */
	27
	28	PERL_STATIC_INLINE GV *
	29	S_CvGV(pTHX_ CV *sv)
	30	{
	31	return CvNAMED(sv)
	32	? Perl_cvgv_from_hek(aTHX_ sv)
	33	: ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
	34	}
	35
	36	PERL_STATIC_INLINE I32 *
	37	S_CvDEPTHp(const CV * const sv)
	38	{
	39	assert(SvTYPE(sv) == SVt_PVCV \|\| SvTYPE(sv) == SVt_PVFM);
	40	return &((XPVCV*)SvANY(sv))->xcv_depth;
	41	}
	42
	43	/*
	44	CvPROTO returns the prototype as stored, which is not necessarily what
	45	the interpreter should be using. Specifically, the interpreter assumes
	46	that spaces have been stripped, which has been the case if the prototype
	47	was added by toke.c, but is generally not the case if it was added elsewhere.
	48	Since we can't enforce the spacelessness at assignment time, this routine
	49	provides a temporary copy at parse time with spaces removed.
	50	I<orig> is the start of the original buffer, I<len> is the length of the
	51	prototype and will be updated when this returns.
	52	*/
	53
	54	#ifdef PERL_CORE
	55	PERL_STATIC_INLINE char *
	56	S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
	57	{
	58	SV * tmpsv;
	59	char * tmps;
	60	tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
	61	tmps = SvPVX(tmpsv);
	62	while ((*len)--) {
	63	if (!isSPACE(*orig))
	64	tmps++ = orig;
	65	orig++;
	66	}
	67	*tmps = '\0';
	68	*len = tmps - SvPVX(tmpsv);
	69	return SvPVX(tmpsv);
	70	}
	71	#endif
	72
	73	/* ------------------------------- mg.h ------------------------------- */
	74
	75	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	76	/* assumes get-magic and stringification have already occurred */
	77	PERL_STATIC_INLINE STRLEN
	78	S_MgBYTEPOS(pTHX_ MAGIC mg, SV sv, const char *s, STRLEN len)
	79	{
	80	assert(mg->mg_type == PERL_MAGIC_regex_global);
	81	assert(mg->mg_len != -1);
	82	if (mg->mg_flags & MGf_BYTES \|\| !DO_UTF8(sv))
	83	return (STRLEN)mg->mg_len;
	84	else {
	85	const STRLEN pos = (STRLEN)mg->mg_len;
	86	/* Without this check, we may read past the end of the buffer: */
	87	if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
	88	return sv_or_pv_pos_u2b(sv, s, pos, NULL);
	89	}
	90	}
	91	#endif
	92
	93	/* ------------------------------- pad.h ------------------------------ */
	94
	95	#if defined(PERL_IN_PAD_C) \|\| defined(PERL_IN_OP_C)
	96	PERL_STATIC_INLINE bool
	97	PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
	98	{
	99	/* is seq within the range _LOW to _HIGH ?
	100	* This is complicated by the fact that PL_cop_seqmax
	101	* may have wrapped around at some point */
	102	if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
	103	return FALSE; /* not yet introduced */
	104
	105	if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
	106	/* in compiling scope */
	107	if (
	108	(seq > COP_SEQ_RANGE_LOW(pn))
	109	? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
	110	: (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
	111	)
	112	return TRUE;
	113	}
	114	else if (
	115	(COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
	116	?
	117	( seq > COP_SEQ_RANGE_LOW(pn)
	118	\|\| seq <= COP_SEQ_RANGE_HIGH(pn))
	119
	120	: ( seq > COP_SEQ_RANGE_LOW(pn)
	121	&& seq <= COP_SEQ_RANGE_HIGH(pn))
	122	)
	123	return TRUE;
	124	return FALSE;
	125	}
	126	#endif
	127
	128	/* ------------------------------- pp.h ------------------------------- */
	129
	130	PERL_STATIC_INLINE I32
	131	S_TOPMARK(pTHX)
	132	{
	133	DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
	134	"MARK top %p %" IVdf "\n",
	135	PL_markstack_ptr,
	136	(IV)*PL_markstack_ptr)));
	137	return *PL_markstack_ptr;
	138	}
	139
	140	PERL_STATIC_INLINE I32
	141	S_POPMARK(pTHX)
	142	{
	143	DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
	144	"MARK pop %p %" IVdf "\n",
	145	(PL_markstack_ptr-1),
	146	(IV)*(PL_markstack_ptr-1))));
	147	assert((PL_markstack_ptr > PL_markstack) \|\| !"MARK underflow");
	148	return *PL_markstack_ptr--;
	149	}
	150
	151	/* ----------------------------- regexp.h ----------------------------- */
	152
	153	PERL_STATIC_INLINE struct regexp *
	154	S_ReANY(const REGEXP * const re)
	155	{
	156	assert(isREGEXP(re));
	157	return re->sv_u.svu_rx;
	158	}
	159
	160	/* ------------------------------- sv.h ------------------------------- */
	161
	162	PERL_STATIC_INLINE SV *
	163	S_SvREFCNT_inc(SV *sv)
	164	{
	165	if (LIKELY(sv != NULL))
	166	SvREFCNT(sv)++;
	167	return sv;
	168	}
	169	PERL_STATIC_INLINE SV *
	170	S_SvREFCNT_inc_NN(SV *sv)
	171	{
	172	SvREFCNT(sv)++;
	173	return sv;
	174	}
	175	PERL_STATIC_INLINE void
	176	S_SvREFCNT_inc_void(SV *sv)
	177	{
	178	if (LIKELY(sv != NULL))
	179	SvREFCNT(sv)++;
	180	}
	181	PERL_STATIC_INLINE void
	182	S_SvREFCNT_dec(pTHX_ SV *sv)
	183	{
	184	if (LIKELY(sv != NULL)) {
	185	U32 rc = SvREFCNT(sv);
	186	if (LIKELY(rc > 1))
	187	SvREFCNT(sv) = rc - 1;
	188	else
	189	Perl_sv_free2(aTHX_ sv, rc);
	190	}
	191	}
	192
	193	PERL_STATIC_INLINE void
	194	S_SvREFCNT_dec_NN(pTHX_ SV *sv)
	195	{
	196	U32 rc = SvREFCNT(sv);
	197	if (LIKELY(rc > 1))
	198	SvREFCNT(sv) = rc - 1;
	199	else
	200	Perl_sv_free2(aTHX_ sv, rc);
	201	}
	202
	203	PERL_STATIC_INLINE void
	204	SvAMAGIC_on(SV *sv)
	205	{
	206	assert(SvROK(sv));
	207	if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
	208	}
	209	PERL_STATIC_INLINE void
	210	SvAMAGIC_off(SV *sv)
	211	{
	212	if (SvROK(sv) && SvOBJECT(SvRV(sv)))
	213	HvAMAGIC_off(SvSTASH(SvRV(sv)));
	214	}
	215
	216	PERL_STATIC_INLINE U32
	217	S_SvPADSTALE_on(SV *sv)
	218	{
	219	assert(!(SvFLAGS(sv) & SVs_PADTMP));
	220	return SvFLAGS(sv) \|= SVs_PADSTALE;
	221	}
	222	PERL_STATIC_INLINE U32
	223	S_SvPADSTALE_off(SV *sv)
	224	{
	225	assert(!(SvFLAGS(sv) & SVs_PADTMP));
	226	return SvFLAGS(sv) &= ~SVs_PADSTALE;
	227	}
	228	#if defined(PERL_CORE) \|\| defined (PERL_EXT)
	229	PERL_STATIC_INLINE STRLEN
	230	S_sv_or_pv_pos_u2b(pTHX_ SV sv, const char pv, STRLEN pos, STRLEN *lenp)
	231	{
	232	PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
	233	if (SvGAMAGIC(sv)) {
	234	U8 hopped = utf8_hop((U8 )pv, pos);
	235	if (lenp) lenp = (STRLEN)(utf8_hop(hopped, lenp) - hopped);
	236	return (STRLEN)(hopped - (U8 *)pv);
	237	}
	238	return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
	239	}
	240	#endif
	241
	242	/* ------------------------------- handy.h ------------------------------- */
	243
	244	/* saves machine code for a common noreturn idiom typically used in Newx() /
	245	#ifdef GCC_DIAG_PRAGMA
	246	GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
	247	#endif
	248	static void
	249	S_croak_memory_wrap(void)
	250	{
	251	Perl_croak_nocontext("%s",PL_memory_wrap);
	252	}
	253	#ifdef GCC_DIAG_PRAGMA
	254	GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
	255	#endif
	256
	257	/* ------------------------------- utf8.h ------------------------------- */
	258
	259	/*
	260	=head1 Unicode Support
	261	*/
	262
	263	PERL_STATIC_INLINE void
	264	S_append_utf8_from_native_byte(const U8 byte, U8** dest)
	265	{
	266	/* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
	267	* encoded string at 'dest', updating 'dest' to include it */
	268
	269	PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
	270
	271	if (NATIVE_BYTE_IS_INVARIANT(byte))
	272	((dest)++) = byte;
	273	else {
	274	((dest)++) = UTF8_EIGHT_BIT_HI(byte);
	275	((dest)++) = UTF8_EIGHT_BIT_LO(byte);
	276	}
	277	}
	278
	279	/*
	280	=for apidoc valid_utf8_to_uvchr
	281	Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
	282	the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
	283	it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
	284	non-Unicode code points are allowed.
	285
	286	=cut
	287
	288	*/
	289
	290	PERL_STATIC_INLINE UV
	291	Perl_valid_utf8_to_uvchr(const U8 s, STRLEN retlen)
	292	{
	293	const UV expectlen = UTF8SKIP(s);
	294	const U8* send = s + expectlen;
	295	UV uv = *s;
	296
	297	PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
	298
	299	if (retlen) {
	300	*retlen = expectlen;
	301	}
	302
	303	/* An invariant is trivially returned */
	304	if (expectlen == 1) {
	305	return uv;
	306	}
	307
	308	/* Remove the leading bits that indicate the number of bytes, leaving just
	309	* the bits that are part of the value */
	310	uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
	311
	312	/* Now, loop through the remaining bytes, accumulating each into the
	313	* working total as we go. (I khw tried unrolling the loop for up to 4
	314	* bytes, but there was no performance improvement) */
	315	for (++s; s < send; s++) {
	316	uv = UTF8_ACCUMULATE(uv, *s);
	317	}
	318
	319	return UNI_TO_NATIVE(uv);
	320
	321	}
	322
	323	/*
	324	=for apidoc is_utf8_invariant_string
	325
	326	Returns TRUE if the first C<len> bytes of the string C<s> are the same
	327	regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
	328	EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
	329	are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
	330	the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
	331	characters are invariant, but so also are the C1 controls.
	332
	333	If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
	334	use this option, that C<s> can't have embedded C<NUL> characters and has to
	335	have a terminating C<NUL> byte).
	336
	337	See also
	338	C<L</is_utf8_string>>,
	339	C<L</is_utf8_string_flags>>,
	340	C<L</is_utf8_string_loc>>,
	341	C<L</is_utf8_string_loc_flags>>,
	342	C<L</is_utf8_string_loclen>>,
	343	C<L</is_utf8_string_loclen_flags>>,
	344	C<L</is_utf8_fixed_width_buf_flags>>,
	345	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	346	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	347	C<L</is_strict_utf8_string>>,
	348	C<L</is_strict_utf8_string_loc>>,
	349	C<L</is_strict_utf8_string_loclen>>,
	350	C<L</is_c9strict_utf8_string>>,
	351	C<L</is_c9strict_utf8_string_loc>>,
	352	and
	353	C<L</is_c9strict_utf8_string_loclen>>.
	354
	355	=cut
	356	*/
	357
	358	PERL_STATIC_INLINE bool
	359	S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
	360	{
	361	const U8* const send = s + (len ? len : strlen((const char *)s));
	362	const U8* x = s;
	363
	364	PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
	365
	366	for (; x < send; ++x) {
	367	if (!UTF8_IS_INVARIANT(*x))
	368	return FALSE;
	369	}
	370
	371	return TRUE;
	372	}
	373
	374	/*
	375	=for apidoc is_utf8_string
	376
	377	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	378	Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
	379	be calculated using C<strlen(s)> (which means if you use this option, that C<s>
	380	can't have embedded C<NUL> characters and has to have a terminating C<NUL>
	381	byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
	382
	383	This function considers Perl's extended UTF-8 to be valid. That means that
	384	code points above Unicode, surrogates, and non-character code points are
	385	considered valid by this function. Use C<L</is_strict_utf8_string>>,
	386	C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
	387	code points are considered valid.
	388
	389	See also
	390	C<L</is_utf8_invariant_string>>,
	391	C<L</is_utf8_string_loc>>,
	392	C<L</is_utf8_string_loclen>>,
	393	C<L</is_utf8_fixed_width_buf_flags>>,
	394	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	395	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	396
	397	=cut
	398	*/
	399
	400	PERL_STATIC_INLINE bool
	401	Perl_is_utf8_string(const U8 *s, const STRLEN len)
	402	{
	403	/* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
	404	* Be aware of possible changes to that */
	405
	406	const U8* const send = s + (len ? len : strlen((const char *)s));
	407	const U8* x = s;
	408
	409	PERL_ARGS_ASSERT_IS_UTF8_STRING;
	410
	411	while (x < send) {
	412	const STRLEN cur_len = isUTF8_CHAR(x, send);
	413	if (UNLIKELY(! cur_len)) {
	414	return FALSE;
	415	}
	416	x += cur_len;
	417	}
	418
	419	return TRUE;
	420	}
	421
	422	/*
	423	=for apidoc is_strict_utf8_string
	424
	425	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	426	UTF-8-encoded string that is fully interchangeable by any application using
	427	Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
	428	calculated using C<strlen(s)> (which means if you use this option, that C<s>
	429	can't have embedded C<NUL> characters and has to have a terminating C<NUL>
	430	byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
	431
	432	This function returns FALSE for strings containing any
	433	code points above the Unicode max of 0x10FFFF, surrogate code points, or
	434	non-character code points.
	435
	436	See also
	437	C<L</is_utf8_invariant_string>>,
	438	C<L</is_utf8_string>>,
	439	C<L</is_utf8_string_flags>>,
	440	C<L</is_utf8_string_loc>>,
	441	C<L</is_utf8_string_loc_flags>>,
	442	C<L</is_utf8_string_loclen>>,
	443	C<L</is_utf8_string_loclen_flags>>,
	444	C<L</is_utf8_fixed_width_buf_flags>>,
	445	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	446	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	447	C<L</is_strict_utf8_string_loc>>,
	448	C<L</is_strict_utf8_string_loclen>>,
	449	C<L</is_c9strict_utf8_string>>,
	450	C<L</is_c9strict_utf8_string_loc>>,
	451	and
	452	C<L</is_c9strict_utf8_string_loclen>>.
	453
	454	=cut
	455	*/
	456
	457	PERL_STATIC_INLINE bool
	458	S_is_strict_utf8_string(const U8 *s, const STRLEN len)
	459	{
	460	const U8* const send = s + (len ? len : strlen((const char *)s));
	461	const U8* x = s;
	462
	463	PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
	464
	465	while (x < send) {
	466	const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
	467	if (UNLIKELY(! cur_len)) {
	468	return FALSE;
	469	}
	470	x += cur_len;
	471	}
	472
	473	return TRUE;
	474	}
	475
	476	/*
	477	=for apidoc is_c9strict_utf8_string
	478
	479	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	480	UTF-8-encoded string that conforms to
	481	L<Unicode Corrigendum #9\|http://www.unicode.org/versions/corrigendum9.html>;
	482	otherwise it returns FALSE. If C<len> is 0, it will be calculated using
	483	C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
	484	C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
	485	characters being ASCII constitute 'a valid UTF-8 string'.
	486
	487	This function returns FALSE for strings containing any code points above the
	488	Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
	489	code points per
	490	L<Corrigendum #9\|http://www.unicode.org/versions/corrigendum9.html>.
	491
	492	See also
	493	C<L</is_utf8_invariant_string>>,
	494	C<L</is_utf8_string>>,
	495	C<L</is_utf8_string_flags>>,
	496	C<L</is_utf8_string_loc>>,
	497	C<L</is_utf8_string_loc_flags>>,
	498	C<L</is_utf8_string_loclen>>,
	499	C<L</is_utf8_string_loclen_flags>>,
	500	C<L</is_utf8_fixed_width_buf_flags>>,
	501	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	502	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	503	C<L</is_strict_utf8_string>>,
	504	C<L</is_strict_utf8_string_loc>>,
	505	C<L</is_strict_utf8_string_loclen>>,
	506	C<L</is_c9strict_utf8_string_loc>>,
	507	and
	508	C<L</is_c9strict_utf8_string_loclen>>.
	509
	510	=cut
	511	*/
	512
	513	PERL_STATIC_INLINE bool
	514	S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
	515	{
	516	const U8* const send = s + (len ? len : strlen((const char *)s));
	517	const U8* x = s;
	518
	519	PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
	520
	521	while (x < send) {
	522	const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
	523	if (UNLIKELY(! cur_len)) {
	524	return FALSE;
	525	}
	526	x += cur_len;
	527	}
	528
	529	return TRUE;
	530	}
	531
	532	/* The above 3 functions could have been moved into the more general one just
	533	* below, and made #defines that call it with the right 'flags'. They are
	534	* currently kept separate to increase their chances of getting inlined */
	535
	536	/*
	537	=for apidoc is_utf8_string_flags
	538
	539	Returns TRUE if the first C<len> bytes of string C<s> form a valid
	540	UTF-8 string, subject to the restrictions imposed by C<flags>;
	541	returns FALSE otherwise. If C<len> is 0, it will be calculated
	542	using C<strlen(s)> (which means if you use this option, that C<s> can't have
	543	embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
	544	that all characters being ASCII constitute 'a valid UTF-8 string'.
	545
	546	If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
	547	C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
	548	as C<L</is_strict_utf8_string>>; and if C<flags> is
	549	C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
	550	C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
	551	combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
	552	C<L</utf8n_to_uvchr>>, with the same meanings.
	553
	554	See also
	555	C<L</is_utf8_invariant_string>>,
	556	C<L</is_utf8_string>>,
	557	C<L</is_utf8_string_loc>>,
	558	C<L</is_utf8_string_loc_flags>>,
	559	C<L</is_utf8_string_loclen>>,
	560	C<L</is_utf8_string_loclen_flags>>,
	561	C<L</is_utf8_fixed_width_buf_flags>>,
	562	C<L</is_utf8_fixed_width_buf_loc_flags>>,
	563	C<L</is_utf8_fixed_width_buf_loclen_flags>>,
	564	C<L</is_strict_utf8_string>>,
	565	C<L</is_strict_utf8_string_loc>>,
	566	C<L</is_strict_utf8_string_loclen>>,
	567	C<L</is_c9strict_utf8_string>>,
	568	C<L</is_c9strict_utf8_string_loc>>,
	569	and
	570	C<L</is_c9strict_utf8_string_loclen>>.
	571
	572	=cut
	573	*/
	574
	575	PERL_STATIC_INLINE bool
	576	S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
	577	{
	578	const U8* const send = s + (len ? len : strlen((const char *)s));
	579	const U8* x = s;
	580
	581	PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
	582	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	583	\|UTF8_DISALLOW_ABOVE_31_BIT)));
	584
	585	if (flags == 0) {
	586	return is_utf8_string(s, len);
	587	}
	588
	589	if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
	590	== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
	591	{
	592	return is_strict_utf8_string(s, len);
	593	}
	594
	595	if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
	596	== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
	597	{
	598	return is_c9strict_utf8_string(s, len);
	599	}
	600
	601	while (x < send) {
	602	STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
	603	if (UNLIKELY(! cur_len)) {
	604	return FALSE;
	605	}
	606	x += cur_len;
	607	}
	608
	609	return TRUE;
	610	}
	611
	612	/*
	613
	614	=for apidoc is_utf8_string_loc
	615
	616	Like C<L</is_utf8_string>> but stores the location of the failure (in the
	617	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	618	"utf8ness success") in the C<ep> pointer.
	619
	620	See also C<L</is_utf8_string_loclen>>.
	621
	622	=cut
	623	*/
	624
	625	#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
	626
	627	/*
	628
	629	=for apidoc is_utf8_string_loclen
	630
	631	Like C<L</is_utf8_string>> but stores the location of the failure (in the
	632	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	633	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	634	encoded characters in the C<el> pointer.
	635
	636	See also C<L</is_utf8_string_loc>>.
	637
	638	=cut
	639	*/
	640
	641	PERL_STATIC_INLINE bool
	642	Perl_is_utf8_string_loclen(const U8 s, const STRLEN len, const U8 ep, STRLEN el)
	643	{
	644	const U8* const send = s + (len ? len : strlen((const char *)s));
	645	const U8* x = s;
	646	STRLEN outlen = 0;
	647
	648	PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
	649
	650	while (x < send) {
	651	const STRLEN cur_len = isUTF8_CHAR(x, send);
	652	if (UNLIKELY(! cur_len)) {
	653	break;
	654	}
	655	x += cur_len;
	656	outlen++;
	657	}
	658
	659	if (el)
	660	*el = outlen;
	661
	662	if (ep) {
	663	*ep = x;
	664	}
	665
	666	return (x == send);
	667	}
	668
	669	/*
	670
	671	=for apidoc is_strict_utf8_string_loc
	672
	673	Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
	674	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	675	"utf8ness success") in the C<ep> pointer.
	676
	677	See also C<L</is_strict_utf8_string_loclen>>.
	678
	679	=cut
	680	*/
	681
	682	#define is_strict_utf8_string_loc(s, len, ep) \
	683	is_strict_utf8_string_loclen(s, len, ep, 0)
	684
	685	/*
	686
	687	=for apidoc is_strict_utf8_string_loclen
	688
	689	Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
	690	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	691	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	692	encoded characters in the C<el> pointer.
	693
	694	See also C<L</is_strict_utf8_string_loc>>.
	695
	696	=cut
	697	*/
	698
	699	PERL_STATIC_INLINE bool
	700	S_is_strict_utf8_string_loclen(const U8 s, const STRLEN len, const U8 ep, STRLEN el)
	701	{
	702	const U8* const send = s + (len ? len : strlen((const char *)s));
	703	const U8* x = s;
	704	STRLEN outlen = 0;
	705
	706	PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
	707
	708	while (x < send) {
	709	const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
	710	if (UNLIKELY(! cur_len)) {
	711	break;
	712	}
	713	x += cur_len;
	714	outlen++;
	715	}
	716
	717	if (el)
	718	*el = outlen;
	719
	720	if (ep) {
	721	*ep = x;
	722	}
	723
	724	return (x == send);
	725	}
	726
	727	/*
	728
	729	=for apidoc is_c9strict_utf8_string_loc
	730
	731	Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
	732	the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	733	"utf8ness success") in the C<ep> pointer.
	734
	735	See also C<L</is_c9strict_utf8_string_loclen>>.
	736
	737	=cut
	738	*/
	739
	740	#define is_c9strict_utf8_string_loc(s, len, ep) \
	741	is_c9strict_utf8_string_loclen(s, len, ep, 0)
	742
	743	/*
	744
	745	=for apidoc is_c9strict_utf8_string_loclen
	746
	747	Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
	748	the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	749	"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
	750	characters in the C<el> pointer.
	751
	752	See also C<L</is_c9strict_utf8_string_loc>>.
	753
	754	=cut
	755	*/
	756
	757	PERL_STATIC_INLINE bool
	758	S_is_c9strict_utf8_string_loclen(const U8 s, const STRLEN len, const U8 ep, STRLEN el)
	759	{
	760	const U8* const send = s + (len ? len : strlen((const char *)s));
	761	const U8* x = s;
	762	STRLEN outlen = 0;
	763
	764	PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
	765
	766	while (x < send) {
	767	const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
	768	if (UNLIKELY(! cur_len)) {
	769	break;
	770	}
	771	x += cur_len;
	772	outlen++;
	773	}
	774
	775	if (el)
	776	*el = outlen;
	777
	778	if (ep) {
	779	*ep = x;
	780	}
	781
	782	return (x == send);
	783	}
	784
	785	/*
	786
	787	=for apidoc is_utf8_string_loc_flags
	788
	789	Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
	790	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	791	"utf8ness success") in the C<ep> pointer.
	792
	793	See also C<L</is_utf8_string_loclen_flags>>.
	794
	795	=cut
	796	*/
	797
	798	#define is_utf8_string_loc_flags(s, len, ep, flags) \
	799	is_utf8_string_loclen_flags(s, len, ep, 0, flags)
	800
	801
	802	/* The above 3 actual functions could have been moved into the more general one
	803	* just below, and made #defines that call it with the right 'flags'. They are
	804	* currently kept separate to increase their chances of getting inlined */
	805
	806	/*
	807
	808	=for apidoc is_utf8_string_loclen_flags
	809
	810	Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
	811	case of "utf8ness failure") or the location C<s>+C<len> (in the case of
	812	"utf8ness success") in the C<ep> pointer, and the number of UTF-8
	813	encoded characters in the C<el> pointer.
	814
	815	See also C<L</is_utf8_string_loc_flags>>.
	816
	817	=cut
	818	*/
	819
	820	PERL_STATIC_INLINE bool
	821	S_is_utf8_string_loclen_flags(const U8 s, const STRLEN len, const U8 ep, STRLEN el, const U32 flags)
	822	{
	823	const U8* const send = s + (len ? len : strlen((const char *)s));
	824	const U8* x = s;
	825	STRLEN outlen = 0;
	826
	827	PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
	828	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	829	\|UTF8_DISALLOW_ABOVE_31_BIT)));
	830
	831	if (flags == 0) {
	832	return is_utf8_string_loclen(s, len, ep, el);
	833	}
	834
	835	if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
	836	== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
	837	{
	838	return is_strict_utf8_string_loclen(s, len, ep, el);
	839	}
	840
	841	if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
	842	== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
	843	{
	844	return is_c9strict_utf8_string_loclen(s, len, ep, el);
	845	}
	846
	847	while (x < send) {
	848	const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
	849	if (UNLIKELY(! cur_len)) {
	850	break;
	851	}
	852	x += cur_len;
	853	outlen++;
	854	}
	855
	856	if (el)
	857	*el = outlen;
	858
	859	if (ep) {
	860	*ep = x;
	861	}
	862
	863	return (x == send);
	864	}
	865
	866	/*
	867	=for apidoc utf8_distance
	868
	869	Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
	870	and C<b>.
	871
	872	WARNING: use only if you know that the pointers point inside the
	873	same UTF-8 buffer.
	874
	875	=cut
	876	*/
	877
	878	PERL_STATIC_INLINE IV
	879	Perl_utf8_distance(pTHX_ const U8 a, const U8 b)
	880	{
	881	PERL_ARGS_ASSERT_UTF8_DISTANCE;
	882
	883	return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
	884	}
	885
	886	/*
	887	=for apidoc utf8_hop
	888
	889	Return the UTF-8 pointer C<s> displaced by C<off> characters, either
	890	forward or backward.
	891
	892	WARNING: do not use the following unless you know C<off> is within
	893	the UTF-8 data pointed to by C<s> and that on entry C<s> is aligned
	894	on the first byte of character or just after the last byte of a character.
	895
	896	=cut
	897	*/
	898
	899	PERL_STATIC_INLINE U8 *
	900	Perl_utf8_hop(const U8 *s, SSize_t off)
	901	{
	902	PERL_ARGS_ASSERT_UTF8_HOP;
	903
	904	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	905	* the bitops (especially ~) can create illegal UTF-8.
	906	* In other words: in Perl UTF-8 is not just for Unicode. */
	907
	908	if (off >= 0) {
	909	while (off--)
	910	s += UTF8SKIP(s);
	911	}
	912	else {
	913	while (off++) {
	914	s--;
	915	while (UTF8_IS_CONTINUATION(*s))
	916	s--;
	917	}
	918	}
	919	GCC_DIAG_IGNORE(-Wcast-qual);
	920	return (U8 *)s;
	921	GCC_DIAG_RESTORE;
	922	}
	923
	924	/*
	925	=for apidoc utf8_hop_forward
	926
	927	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	928	forward.
	929
	930	C<off> must be non-negative.
	931
	932	C<s> must be before or equal to C<end>.
	933
	934	When moving forward it will not move beyond C<end>.
	935
	936	Will not exceed this limit even if the string is not valid "UTF-8".
	937
	938	=cut
	939	*/
	940
	941	PERL_STATIC_INLINE U8 *
	942	Perl_utf8_hop_forward(const U8 s, SSize_t off, const U8 end)
	943	{
	944	PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
	945
	946	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	947	* the bitops (especially ~) can create illegal UTF-8.
	948	* In other words: in Perl UTF-8 is not just for Unicode. */
	949
	950	assert(s <= end);
	951	assert(off >= 0);
	952
	953	while (off--) {
	954	STRLEN skip = UTF8SKIP(s);
	955	if ((STRLEN)(end - s) <= skip) {
	956	GCC_DIAG_IGNORE(-Wcast-qual);
	957	return (U8 *)end;
	958	GCC_DIAG_RESTORE;
	959	}
	960	s += skip;
	961	}
	962
	963	GCC_DIAG_IGNORE(-Wcast-qual);
	964	return (U8 *)s;
	965	GCC_DIAG_RESTORE;
	966	}
	967
	968	/*
	969	=for apidoc utf8_hop_back
	970
	971	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	972	backward.
	973
	974	C<off> must be non-positive.
	975
	976	C<s> must be after or equal to C<start>.
	977
	978	When moving backward it will not move before C<start>.
	979
	980	Will not exceed this limit even if the string is not valid "UTF-8".
	981
	982	=cut
	983	*/
	984
	985	PERL_STATIC_INLINE U8 *
	986	Perl_utf8_hop_back(const U8 s, SSize_t off, const U8 start)
	987	{
	988	PERL_ARGS_ASSERT_UTF8_HOP_BACK;
	989
	990	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	991	* the bitops (especially ~) can create illegal UTF-8.
	992	* In other words: in Perl UTF-8 is not just for Unicode. */
	993
	994	assert(start <= s);
	995	assert(off <= 0);
	996
	997	while (off++ && s > start) {
	998	s--;
	999	while (UTF8_IS_CONTINUATION(*s) && s > start)
	1000	s--;
	1001	}
	1002
	1003	GCC_DIAG_IGNORE(-Wcast-qual);
	1004	return (U8 *)s;
	1005	GCC_DIAG_RESTORE;
	1006	}
	1007
	1008	/*
	1009	=for apidoc utf8_hop_safe
	1010
	1011	Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
	1012	either forward or backward.
	1013
	1014	When moving backward it will not move before C<start>.
	1015
	1016	When moving forward it will not move beyond C<end>.
	1017
	1018	Will not exceed those limits even if the string is not valid "UTF-8".
	1019
	1020	=cut
	1021	*/
	1022
	1023	PERL_STATIC_INLINE U8 *
	1024	Perl_utf8_hop_safe(const U8 s, SSize_t off, const U8 start, const U8 *end)
	1025	{
	1026	PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
	1027
	1028	/* Note: cannot use UTF8_IS_...() too eagerly here since e.g
	1029	* the bitops (especially ~) can create illegal UTF-8.
	1030	* In other words: in Perl UTF-8 is not just for Unicode. */
	1031
	1032	assert(start <= s && s <= end);
	1033
	1034	if (off >= 0) {
	1035	return utf8_hop_forward(s, off, end);
	1036	}
	1037	else {
	1038	return utf8_hop_back(s, off, start);
	1039	}
	1040	}
	1041
	1042	/*
	1043
	1044	=for apidoc is_utf8_valid_partial_char
	1045
	1046	Returns 0 if the sequence of bytes starting at C<s> and looking no further than
	1047	S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
	1048	points. Otherwise, it returns 1 if there exists at least one non-empty
	1049	sequence of bytes that when appended to sequence C<s>, starting at position
	1050	C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
	1051	otherwise returns 0.
	1052
	1053	In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
	1054	point.
	1055
	1056	This is useful when a fixed-length buffer is being tested for being well-formed
	1057	UTF-8, but the final few bytes in it don't comprise a full character; that is,
	1058	it is split somewhere in the middle of the final code point's UTF-8
	1059	representation. (Presumably when the buffer is refreshed with the next chunk
	1060	of data, the new first bytes will complete the partial code point.) This
	1061	function is used to verify that the final bytes in the current buffer are in
	1062	fact the legal beginning of some code point, so that if they aren't, the
	1063	failure can be signalled without having to wait for the next read.
	1064
	1065	=cut
	1066	*/
	1067	#define is_utf8_valid_partial_char(s, e) \
	1068	is_utf8_valid_partial_char_flags(s, e, 0)
	1069
	1070	/*
	1071
	1072	=for apidoc is_utf8_valid_partial_char_flags
	1073
	1074	Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
	1075	or not the input is a valid UTF-8 encoded partial character, but it takes an
	1076	extra parameter, C<flags>, which can further restrict which code points are
	1077	considered valid.
	1078
	1079	If C<flags> is 0, this behaves identically to
	1080	C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
	1081	of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
	1082	there is any sequence of bytes that can complete the input partial character in
	1083	such a way that a non-prohibited character is formed, the function returns
	1084	TRUE; otherwise FALSE. Non character code points cannot be determined based on
	1085	partial character input. But many of the other possible excluded types can be
	1086	determined from just the first one or two bytes.
	1087
	1088	=cut
	1089	*/
	1090
	1091	PERL_STATIC_INLINE bool
	1092	S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
	1093	{
	1094	PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
	1095
	1096	assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
	1097	\|UTF8_DISALLOW_ABOVE_31_BIT)));
	1098
	1099	if (s >= e \|\| s + UTF8SKIP(s) <= e) {
	1100	return FALSE;
	1101	}
	1102
	1103	return cBOOL(_is_utf8_char_helper(s, e, flags));
	1104	}
	1105
	1106	/*
	1107
	1108	=for apidoc is_utf8_fixed_width_buf_flags
	1109
	1110	Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
	1111	is entirely valid UTF-8, subject to the restrictions given by C<flags>;
	1112	otherwise it returns FALSE.
	1113
	1114	If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
	1115	without restriction. If the final few bytes of the buffer do not form a
	1116	complete code point, this will return TRUE anyway, provided that
	1117	C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
	1118
	1119	If C<flags> in non-zero, it can be any combination of the
	1120	C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
	1121	same meanings.
	1122
	1123	This function differs from C<L</is_utf8_string_flags>> only in that the latter
	1124	returns FALSE if the final few bytes of the string don't form a complete code
	1125	point.
	1126
	1127	=cut
	1128	*/
	1129	#define is_utf8_fixed_width_buf_flags(s, len, flags) \
	1130	is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
	1131
	1132	/*
	1133
	1134	=for apidoc is_utf8_fixed_width_buf_loc_flags
	1135
	1136	Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
	1137	failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
	1138	to the beginning of any partial character at the end of the buffer; if there is
	1139	no partial character C<*ep> will contain C<s>+C<len>.
	1140
	1141	See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
	1142
	1143	=cut
	1144	*/
	1145
	1146	#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
	1147	is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
	1148
	1149	/*
	1150
	1151	=for apidoc is_utf8_fixed_width_buf_loclen_flags
	1152
	1153	Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
	1154	complete, valid characters found in the C<el> pointer.
	1155
	1156	=cut
	1157	*/
	1158
	1159	PERL_STATIC_INLINE bool
	1160	S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
	1161	const STRLEN len,
	1162	const U8 **ep,
	1163	STRLEN *el,
	1164	const U32 flags)
	1165	{
	1166	const U8 * maybe_partial;
	1167
	1168	PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
	1169
	1170	if (! ep) {
	1171	ep = &maybe_partial;
	1172	}
	1173
	1174	/* If it's entirely valid, return that; otherwise see if the only error is
	1175	* that the final few bytes are for a partial character */
	1176	return is_utf8_string_loclen_flags(s, len, ep, el, flags)
	1177	\|\| is_utf8_valid_partial_char_flags(*ep, s + len, flags);
	1178	}
	1179
	1180	/* ------------------------------- perl.h ----------------------------- */
	1181
	1182	/*
	1183	=head1 Miscellaneous Functions
	1184
	1185	=for apidoc AiR\|bool\|is_safe_syscall\|const char pv\|STRLEN len\|const char what\|const char *op_name
	1186
	1187	Test that the given C<pv> doesn't contain any internal C<NUL> characters.
	1188	If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
	1189
	1190	Return TRUE if the name is safe.
	1191
	1192	Used by the C<IS_SAFE_SYSCALL()> macro.
	1193
	1194	=cut
	1195	*/
	1196
	1197	PERL_STATIC_INLINE bool
	1198	S_is_safe_syscall(pTHX_ const char pv, STRLEN len, const char what, const char *op_name) {
	1199	/* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
	1200	* perl itself uses xce*() functions which accept 8-bit strings.
	1201	*/
	1202
	1203	PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
	1204
	1205	if (len > 1) {
	1206	char *null_at;
	1207	if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
	1208	SETERRNO(ENOENT, LIB_INVARG);
	1209	Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
	1210	"Invalid \\0 character in %s for %s: %s\\0%s",
	1211	what, op_name, pv, null_at+1);
	1212	return FALSE;
	1213	}
	1214	}
	1215
	1216	return TRUE;
	1217	}
	1218
	1219	/*
	1220
	1221	Return true if the supplied filename has a newline character
	1222	immediately before the first (hopefully only) NUL.
	1223
	1224	My original look at this incorrectly used the len from SvPV(), but
	1225	that's incorrect, since we allow for a NUL in pv[len-1].
	1226
	1227	So instead, strlen() and work from there.
	1228
	1229	This allow for the user reading a filename, forgetting to chomp it,
	1230	then calling:
	1231
	1232	open my $foo, "$file\0";
	1233
	1234	*/
	1235
	1236	#ifdef PERL_CORE
	1237
	1238	PERL_STATIC_INLINE bool
	1239	S_should_warn_nl(const char *pv) {
	1240	STRLEN len;
	1241
	1242	PERL_ARGS_ASSERT_SHOULD_WARN_NL;
	1243
	1244	len = strlen(pv);
	1245
	1246	return len > 0 && pv[len-1] == '\n';
	1247	}
	1248
	1249	#endif
	1250
	1251	/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
	1252
	1253	#define MAX_CHARSET_NAME_LENGTH 2
	1254
	1255	PERL_STATIC_INLINE const char *
	1256	get_regex_charset_name(const U32 flags, STRLEN* const lenp)
	1257	{
	1258	/* Returns a string that corresponds to the name of the regex character set
	1259	* given by 'flags', and *lenp is set the length of that string, which
	1260	* cannot exceed MAX_CHARSET_NAME_LENGTH characters */
	1261
	1262	*lenp = 1;
	1263	switch (get_regex_charset(flags)) {
	1264	case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
	1265	case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
	1266	case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
	1267	case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
	1268	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	1269	*lenp = 2;
	1270	return ASCII_MORE_RESTRICT_PAT_MODS;
	1271	}
	1272	/* The NOT_REACHED; hides an assert() which has a rather complex
	1273	* definition in perl.h. */
	1274	NOT_REACHED; /* NOTREACHED */
	1275	return "?"; /* Unknown */
	1276	}
	1277
	1278	/*
	1279
	1280	Return false if any get magic is on the SV other than taint magic.
	1281
	1282	*/
	1283
	1284	PERL_STATIC_INLINE bool
	1285	S_sv_only_taint_gmagic(SV *sv) {
	1286	MAGIC *mg = SvMAGIC(sv);
	1287
	1288	PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
	1289
	1290	while (mg) {
	1291	if (mg->mg_type != PERL_MAGIC_taint
	1292	&& !(mg->mg_flags & MGf_GSKIP)
	1293	&& mg->mg_virtual->svt_get) {
	1294	return FALSE;
	1295	}
	1296	mg = mg->mg_moremagic;
	1297	}
	1298
	1299	return TRUE;
	1300	}
	1301
	1302	/* ------------------ cop.h ------------------------------------------- */
	1303
	1304
	1305	/* Enter a block. Push a new base context and return its address. */
	1306
	1307	PERL_STATIC_INLINE PERL_CONTEXT *
	1308	S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
	1309	{
	1310	PERL_CONTEXT * cx;
	1311
	1312	PERL_ARGS_ASSERT_CX_PUSHBLOCK;
	1313
	1314	CXINC;
	1315	cx = CX_CUR();
	1316	cx->cx_type = type;
	1317	cx->blk_gimme = gimme;
	1318	cx->blk_oldsaveix = saveix;
	1319	cx->blk_oldsp = (I32)(sp - PL_stack_base);
	1320	cx->blk_oldcop = PL_curcop;
	1321	cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
	1322	cx->blk_oldscopesp = PL_scopestack_ix;
	1323	cx->blk_oldpm = PL_curpm;
	1324	cx->blk_old_tmpsfloor = PL_tmps_floor;
	1325
	1326	PL_tmps_floor = PL_tmps_ix;
	1327	CX_DEBUG(cx, "PUSH");
	1328	return cx;
	1329	}
	1330
	1331
	1332	/* Exit a block (RETURN and LAST). */
	1333
	1334	PERL_STATIC_INLINE void
	1335	S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
	1336	{
	1337	PERL_ARGS_ASSERT_CX_POPBLOCK;
	1338
	1339	CX_DEBUG(cx, "POP");
	1340	/* these 3 are common to cx_popblock and cx_topblock */
	1341	PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
	1342	PL_scopestack_ix = cx->blk_oldscopesp;
	1343	PL_curpm = cx->blk_oldpm;
	1344
	1345	/* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
	1346	* and leaves a CX entry lying around for repeated use, so
	1347	* skip for multicall */ \
	1348	assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
	1349	\|\| PL_savestack_ix == cx->blk_oldsaveix);
	1350	PL_curcop = cx->blk_oldcop;
	1351	PL_tmps_floor = cx->blk_old_tmpsfloor;
	1352	}
	1353
	1354	/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
	1355	* Whereas cx_popblock() restores the state to the point just before
	1356	* cx_pushblock() was called, cx_topblock() restores it to the point just
	1357	* after cx_pushblock() was called. */
	1358
	1359	PERL_STATIC_INLINE void
	1360	S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
	1361	{
	1362	PERL_ARGS_ASSERT_CX_TOPBLOCK;
	1363
	1364	CX_DEBUG(cx, "TOP");
	1365	/* these 3 are common to cx_popblock and cx_topblock */
	1366	PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
	1367	PL_scopestack_ix = cx->blk_oldscopesp;
	1368	PL_curpm = cx->blk_oldpm;
	1369
	1370	PL_stack_sp = PL_stack_base + cx->blk_oldsp;
	1371	}
	1372
	1373
	1374	PERL_STATIC_INLINE void
	1375	S_cx_pushsub(pTHX_ PERL_CONTEXT cx, CV cv, OP *retop, bool hasargs)
	1376	{
	1377	U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
	1378
	1379	PERL_ARGS_ASSERT_CX_PUSHSUB;
	1380
	1381	PERL_DTRACE_PROBE_ENTRY(cv);
	1382	cx->blk_sub.cv = cv;
	1383	cx->blk_sub.olddepth = CvDEPTH(cv);
	1384	cx->blk_sub.prevcomppad = PL_comppad;
	1385	cx->cx_type \|= (hasargs) ? CXp_HASARGS : 0;
	1386	cx->blk_sub.retop = retop;
	1387	SvREFCNT_inc_simple_void_NN(cv);
	1388	cx->blk_u16 = PL_op->op_private & (phlags\|OPpDEREF);
	1389	}
	1390
	1391
	1392	/* subsets of cx_popsub() */
	1393
	1394	PERL_STATIC_INLINE void
	1395	S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
	1396	{
	1397	CV *cv;
	1398
	1399	PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
	1400	assert(CxTYPE(cx) == CXt_SUB);
	1401
	1402	PL_comppad = cx->blk_sub.prevcomppad;
	1403	PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
	1404	cv = cx->blk_sub.cv;
	1405	CvDEPTH(cv) = cx->blk_sub.olddepth;
	1406	cx->blk_sub.cv = NULL;
	1407	SvREFCNT_dec(cv);
	1408	}
	1409
	1410
	1411	/* handle the @_ part of leaving a sub */
	1412
	1413	PERL_STATIC_INLINE void
	1414	S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
	1415	{
	1416	AV *av;
	1417
	1418	PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
	1419	assert(CxTYPE(cx) == CXt_SUB);
	1420	assert(AvARRAY(MUTABLE_AV(
	1421	PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
	1422	CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
	1423
	1424	CX_POP_SAVEARRAY(cx);
	1425	av = MUTABLE_AV(PAD_SVl(0));
	1426	if (UNLIKELY(AvREAL(av)))
	1427	/* abandon @_ if it got reified */
	1428	clear_defarray(av, 0);
	1429	else {
	1430	CLEAR_ARGARRAY(av);
	1431	}
	1432	}
	1433
	1434
	1435	PERL_STATIC_INLINE void
	1436	S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
	1437	{
	1438	PERL_ARGS_ASSERT_CX_POPSUB;
	1439	assert(CxTYPE(cx) == CXt_SUB);
	1440
	1441	PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
	1442
	1443	if (CxHASARGS(cx))
	1444	cx_popsub_args(cx);
	1445	cx_popsub_common(cx);
	1446	}
	1447
	1448
	1449	PERL_STATIC_INLINE void
	1450	S_cx_pushformat(pTHX_ PERL_CONTEXT cx, CV cv, OP retop, GV gv)
	1451	{
	1452	PERL_ARGS_ASSERT_CX_PUSHFORMAT;
	1453
	1454	cx->blk_format.cv = cv;
	1455	cx->blk_format.retop = retop;
	1456	cx->blk_format.gv = gv;
	1457	cx->blk_format.dfoutgv = PL_defoutgv;
	1458	cx->blk_format.prevcomppad = PL_comppad;
	1459	cx->blk_u16 = 0;
	1460
	1461	SvREFCNT_inc_simple_void_NN(cv);
	1462	CvDEPTH(cv)++;
	1463	SvREFCNT_inc_void(cx->blk_format.dfoutgv);
	1464	}
	1465
	1466
	1467	PERL_STATIC_INLINE void
	1468	S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
	1469	{
	1470	CV *cv;
	1471	GV *dfout;
	1472
	1473	PERL_ARGS_ASSERT_CX_POPFORMAT;
	1474	assert(CxTYPE(cx) == CXt_FORMAT);
	1475
	1476	dfout = cx->blk_format.dfoutgv;
	1477	setdefout(dfout);
	1478	cx->blk_format.dfoutgv = NULL;
	1479	SvREFCNT_dec_NN(dfout);
	1480
	1481	PL_comppad = cx->blk_format.prevcomppad;
	1482	PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
	1483	cv = cx->blk_format.cv;
	1484	cx->blk_format.cv = NULL;
	1485	--CvDEPTH(cv);
	1486	SvREFCNT_dec_NN(cv);
	1487	}
	1488
	1489
	1490	PERL_STATIC_INLINE void
	1491	S_cx_pusheval(pTHX_ PERL_CONTEXT cx, OP retop, SV *namesv)
	1492	{
	1493	PERL_ARGS_ASSERT_CX_PUSHEVAL;
	1494
	1495	cx->blk_eval.retop = retop;
	1496	cx->blk_eval.old_namesv = namesv;
	1497	cx->blk_eval.old_eval_root = PL_eval_root;
	1498	cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
	1499	cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
	1500	cx->blk_eval.cur_top_env = PL_top_env;
	1501
	1502	assert(!(PL_in_eval & ~ 0x3F));
	1503	assert(!(PL_op->op_type & ~0x1FF));
	1504	cx->blk_u16 = (PL_in_eval & 0x3F) \| ((U16)PL_op->op_type << 7);
	1505	}
	1506
	1507
	1508	PERL_STATIC_INLINE void
	1509	S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
	1510	{
	1511	SV *sv;
	1512
	1513	PERL_ARGS_ASSERT_CX_POPEVAL;
	1514	assert(CxTYPE(cx) == CXt_EVAL);
	1515
	1516	PL_in_eval = CxOLD_IN_EVAL(cx);
	1517	assert(!(PL_in_eval & 0xc0));
	1518	PL_eval_root = cx->blk_eval.old_eval_root;
	1519	sv = cx->blk_eval.cur_text;
	1520	if (sv && CxEVAL_TXT_REFCNTED(cx)) {
	1521	cx->blk_eval.cur_text = NULL;
	1522	SvREFCNT_dec_NN(sv);
	1523	}
	1524
	1525	sv = cx->blk_eval.old_namesv;
	1526	if (sv) {
	1527	cx->blk_eval.old_namesv = NULL;
	1528	SvREFCNT_dec_NN(sv);
	1529	}
	1530	}
	1531
	1532
	1533	/* push a plain loop, i.e.
	1534	* { block }
	1535	* while (cond) { block }
	1536	* for (init;cond;continue) { block }
	1537	* This loop can be last/redo'ed etc.
	1538	*/
	1539
	1540	PERL_STATIC_INLINE void
	1541	S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
	1542	{
	1543	PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
	1544	cx->blk_loop.my_op = cLOOP;
	1545	}
	1546
	1547
	1548	/* push a true for loop, i.e.
	1549	* for var (list) { block }
	1550	*/
	1551
	1552	PERL_STATIC_INLINE void
	1553	S_cx_pushloop_for(pTHX_ PERL_CONTEXT cx, void itervarp, SV* itersave)
	1554	{
	1555	PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
	1556
	1557	/* this one line is common with cx_pushloop_plain */
	1558	cx->blk_loop.my_op = cLOOP;
	1559
	1560	cx->blk_loop.itervar_u.svp = (SV**)itervarp;
	1561	cx->blk_loop.itersave = itersave;
	1562	#ifdef USE_ITHREADS
	1563	cx->blk_loop.oldcomppad = PL_comppad;
	1564	#endif
	1565	}
	1566
	1567
	1568	/* pop all loop types, including plain */
	1569
	1570	PERL_STATIC_INLINE void
	1571	S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
	1572	{
	1573	PERL_ARGS_ASSERT_CX_POPLOOP;
	1574
	1575	assert(CxTYPE_is_LOOP(cx));
	1576	if ( CxTYPE(cx) == CXt_LOOP_ARY
	1577	\|\| CxTYPE(cx) == CXt_LOOP_LAZYSV)
	1578	{
	1579	/* Free ary or cur. This assumes that state_u.ary.ary
	1580	* aligns with state_u.lazysv.cur. See cx_dup() */
	1581	SV *sv = cx->blk_loop.state_u.lazysv.cur;
	1582	cx->blk_loop.state_u.lazysv.cur = NULL;
	1583	SvREFCNT_dec_NN(sv);
	1584	if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
	1585	sv = cx->blk_loop.state_u.lazysv.end;
	1586	cx->blk_loop.state_u.lazysv.end = NULL;
	1587	SvREFCNT_dec_NN(sv);
	1588	}
	1589	}
	1590	if (cx->cx_type & (CXp_FOR_PAD\|CXp_FOR_GV)) {
	1591	SV *cursv;
	1592	SV **svp = (cx)->blk_loop.itervar_u.svp;
	1593	if ((cx->cx_type & CXp_FOR_GV))
	1594	svp = &GvSV((GV*)svp);
	1595	cursv = *svp;
	1596	*svp = cx->blk_loop.itersave;
	1597	cx->blk_loop.itersave = NULL;
	1598	SvREFCNT_dec(cursv);
	1599	}
	1600	}
	1601
	1602
	1603	PERL_STATIC_INLINE void
	1604	S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
	1605	{
	1606	PERL_ARGS_ASSERT_CX_PUSHWHEN;
	1607
	1608	cx->blk_givwhen.leave_op = cLOGOP->op_other;
	1609	}
	1610
	1611
	1612	PERL_STATIC_INLINE void
	1613	S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
	1614	{
	1615	PERL_ARGS_ASSERT_CX_POPWHEN;
	1616	assert(CxTYPE(cx) == CXt_WHEN);
	1617
	1618	PERL_UNUSED_ARG(cx);
	1619	PERL_UNUSED_CONTEXT;
	1620	/* currently NOOP */
	1621	}
	1622
	1623
	1624	PERL_STATIC_INLINE void
	1625	S_cx_pushgiven(pTHX_ PERL_CONTEXT cx, SV orig_defsv)
	1626	{
	1627	PERL_ARGS_ASSERT_CX_PUSHGIVEN;
	1628
	1629	cx->blk_givwhen.leave_op = cLOGOP->op_other;
	1630	cx->blk_givwhen.defsv_save = orig_defsv;
	1631	}
	1632
	1633
	1634	PERL_STATIC_INLINE void
	1635	S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
	1636	{
	1637	SV *sv;
	1638
	1639	PERL_ARGS_ASSERT_CX_POPGIVEN;
	1640	assert(CxTYPE(cx) == CXt_GIVEN);
	1641
	1642	sv = GvSV(PL_defgv);
	1643	GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
	1644	cx->blk_givwhen.defsv_save = NULL;
	1645	SvREFCNT_dec(sv);
	1646	}
	1647
	1648	/* ------------------ util.h ------------------------------------------- */
	1649
	1650	/*
	1651	=head1 Miscellaneous Functions
	1652
	1653	=for apidoc foldEQ
	1654
	1655	Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
	1656	same
	1657	case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
	1658	match themselves and their opposite case counterparts. Non-cased and non-ASCII
	1659	range bytes match only themselves.
	1660
	1661	=cut
	1662	*/
	1663
	1664	PERL_STATIC_INLINE I32
	1665	Perl_foldEQ(const char s1, const char s2, I32 len)
	1666	{
	1667	const U8 a = (const U8 )s1;
	1668	const U8 b = (const U8 )s2;
	1669
	1670	PERL_ARGS_ASSERT_FOLDEQ;
	1671
	1672	assert(len >= 0);
	1673
	1674	while (len--) {
	1675	if (a != b && a != PL_fold[b])
	1676	return 0;
	1677	a++,b++;
	1678	}
	1679	return 1;
	1680	}
	1681
	1682	PERL_STATIC_INLINE I32
	1683	Perl_foldEQ_latin1(const char s1, const char s2, I32 len)
	1684	{
	1685	/* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
	1686	* MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
	1687	* LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
	1688	* does it check that the strings each have at least 'len' characters */
	1689
	1690	const U8 a = (const U8 )s1;
	1691	const U8 b = (const U8 )s2;
	1692
	1693	PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
	1694
	1695	assert(len >= 0);
	1696
	1697	while (len--) {
	1698	if (a != b && a != PL_fold_latin1[b]) {
	1699	return 0;
	1700	}
	1701	a++, b++;
	1702	}
	1703	return 1;
	1704	}
	1705
	1706	/*
	1707	=for apidoc foldEQ_locale
	1708
	1709	Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
	1710	same case-insensitively in the current locale; false otherwise.
	1711
	1712	=cut
	1713	*/
	1714
	1715	PERL_STATIC_INLINE I32
	1716	Perl_foldEQ_locale(const char s1, const char s2, I32 len)
	1717	{
	1718	dVAR;
	1719	const U8 a = (const U8 )s1;
	1720	const U8 b = (const U8 )s2;
	1721
	1722	PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
	1723
	1724	assert(len >= 0);
	1725
	1726	while (len--) {
	1727	if (a != b && a != PL_fold_locale[b])
	1728	return 0;
	1729	a++,b++;
	1730	}
	1731	return 1;
	1732	}
	1733
	1734	/*
	1735	* ex: set ts=8 sts=4 sw=4 et:
	1736	*/