perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/* At least one required character in the target string is expressible only in
	41	* UTF-8. */
	42	static const char* const non_utf8_target_but_utf8_required
	43	= "Can't match, because target string needs to be in UTF-8\n";
	44
	45	#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
	46	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
	47	goto target; \
	48	} STMT_END
	49
	50	/*
	51	* pregcomp and pregexec -- regsub and regerror are not used in perl
	52	*
	53	* Copyright (c) 1986 by University of Toronto.
	54	* Written by Henry Spencer. Not derived from licensed software.
	55	*
	56	* Permission is granted to anyone to use this software for any
	57	* purpose on any computer system, and to redistribute it freely,
	58	* subject to the following restrictions:
	59	*
	60	* 1. The author is not responsible for the consequences of use of
	61	* this software, no matter how awful, even if they arise
	62	* from defects in it.
	63	*
	64	* 2. The origin of this software must not be misrepresented, either
	65	* by explicit claim or by omission.
	66	*
	67	* 3. Altered versions must be plainly marked as such, and must not
	68	* be misrepresented as being the original software.
	69	*
	70	**** Alterations to Henry's code are...
	71	****
	72	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	73	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	74	**** by Larry Wall and others
	75	****
	76	**** You may distribute under the terms of either the GNU General Public
	77	**** License or the Artistic License, as specified in the README file.
	78	*
	79	* Beware that some of this code is subtly aware of the way operator
	80	* precedence is structured in regular expressions. Serious changes in
	81	* regular-expression syntax might require a total rethink.
	82	*/
	83	#include "EXTERN.h"
	84	#define PERL_IN_REGEXEC_C
	85	#include "perl.h"
	86
	87	#ifdef PERL_IN_XSUB_RE
	88	# include "re_comp.h"
	89	#else
	90	# include "regcomp.h"
	91	#endif
	92
	93	#include "inline_invlist.c"
	94	#include "unicode_constants.h"
	95
	96	#define RF_tainted 1 /* tainted information used? e.g. locale */
	97	#define RF_warned 2 /* warned about big count? */
	98
	99	#define RF_utf8 8 /* Pattern contains multibyte chars? */
	100
	101	#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
	102
	103	#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
	104
	105	#ifndef STATIC
	106	#define STATIC static
	107	#endif
	108
	109	/* Valid for non-utf8 strings: avoids the reginclass
	110	* call if there are no complications: i.e., if everything matchable is
	111	* straight forward in the bitmap */
	112	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0) \
	113	: ANYOF_BITMAP_TEST(p,*(c)))
	114
	115	/*
	116	* Forwards.
	117	*/
	118
	119	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	120	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	121
	122	#define HOPc(pos,off) \
	123	(char *)(PL_reg_match_utf8 \
	124	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	125	: (U8*)(pos + off))
	126	#define HOPBACKc(pos, off) \
	127	(char*)(PL_reg_match_utf8\
	128	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	129	: (pos - off >= PL_bostr) \
	130	? (U8*)pos - off \
	131	: NULL)
	132
	133	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	134	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	135
	136
	137	#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
	138	#define NEXTCHR_IS_EOS (nextchr < 0)
	139
	140	#define SET_nextchr \
	141	nextchr = ((locinput < PL_regeol) ? UCHARAT(locinput) : NEXTCHR_EOS)
	142
	143	#define SET_locinput(p) \
	144	locinput = (p); \
	145	SET_nextchr
	146
	147
	148	/* these are unrolled below in the CCC_TRY_XXX defined */
	149	#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
	150	if (!CAT2(PL_utf8_,class)) { \
	151	bool ok; \
	152	ENTER; save_re_context(); \
	153	ok=CAT2(is_utf8_,class)((const U8*)str); \
	154	PERL_UNUSED_VAR(ok); \
	155	assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
	156	/* Doesn't do an assert to verify that is correct */
	157	#define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
	158	if (!CAT2(PL_utf8_,class)) { \
	159	bool throw_away; \
	160	PERL_UNUSED_VAR(throw_away); \
	161	ENTER; save_re_context(); \
	162	throw_away = CAT2(is_utf8_,class)((const U8*)" "); \
	163	PERL_UNUSED_VAR(throw_away); \
	164	LEAVE; } } STMT_END
	165
	166	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
	167	#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
	168
	169	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	170	/* No asserts are done for some of these, in case called on a */ \
	171	/* Unicode version in which they map to nothing */ \
	172	LOAD_UTF8_CHARCLASS(X_regular_begin, HYPHEN_UTF8); \
	173	LOAD_UTF8_CHARCLASS(X_extend, COMBINING_GRAVE_ACCENT_UTF8); \
	174
	175	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	176
	177	/* The actual code for CCC_TRY, which uses several variables from the routine
	178	* it's callable from. It is designed to be the bulk of a case statement.
	179	* FUNC is the macro or function to call on non-utf8 targets that indicate if
	180	* nextchr matches the class.
	181	* UTF8_TEST is the whole test string to use for utf8 targets
	182	* LOAD is what to use to test, and if not present to load in the swash for the
	183	* class
	184	* POS_OR_NEG is either empty or ! to complement the results of FUNC or
	185	* UTF8_TEST test.
	186	* The logic is: Fail if we're at the end-of-string; otherwise if the target is
	187	* utf8 and a variant, load the swash if necessary and test using the utf8
	188	* test. Advance to the next character if test is ok, otherwise fail; If not
	189	* utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
	190	* fails, or advance to the next character */
	191
	192	#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \
	193	if (NEXTCHR_IS_EOS) { \
	194	sayNO; \
	195	} \
	196	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
	197	LOAD_UTF8_CHARCLASS(CLASS, STR); \
	198	if (POS_OR_NEG (UTF8_TEST)) { \
	199	sayNO; \
	200	} \
	201	} \
	202	else if (POS_OR_NEG (FUNC(nextchr))) { \
	203	sayNO; \
	204	} \
	205	goto increment_locinput;
	206
	207	/* Handle the non-locale cases for a character class and its complement. It
	208	* calls _CCC_TRY_CODE with a ! to complement the test for the character class.
	209	* This is because that code fails when the test succeeds, so we want to have
	210	* the test fail so that the code succeeds. The swash is stored in a
	211	* predictable PL_ place */
	212	#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, \
	213	CLASS, STR) \
	214	case NAME: \
	215	_CCC_TRY_CODE( !, FUNC, \
	216	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	217	(U8*)locinput, TRUE)), \
	218	CLASS, STR) \
	219	case NNAME: \
	220	_CCC_TRY_CODE( PLACEHOLDER , FUNC, \
	221	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	222	(U8*)locinput, TRUE)), \
	223	CLASS, STR)
	224	/* Generate the case statements for both locale and non-locale character
	225	* classes in regmatch for classes that don't have special unicode semantics.
	226	* Locales don't use an immediate swash, but an intermediary special locale
	227	* function that is called on the pointer to the current place in the input
	228	* string. That function will resolve to needing the same swash. One might
	229	* think that because we don't know what the locale will match, we shouldn't
	230	* check with the swash loading function that it loaded properly; ie, that we
	231	* should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
	232	* regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
	233	* irrelevant here */
	234	#define CCC_TRY(NAME, NNAME, FUNC, \
	235	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	236	NAMEA, NNAMEA, FUNCA, \
	237	CLASS, STR) \
	238	case NAMEL: \
	239	PL_reg_flags \|= RF_tainted; \
	240	_CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \
	241	case NNAMEL: \
	242	PL_reg_flags \|= RF_tainted; \
	243	_CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput), \
	244	CLASS, STR) \
	245	case NAMEA: \
	246	if (NEXTCHR_IS_EOS \|\| ! FUNCA(nextchr)) { \
	247	sayNO; \
	248	} \
	249	/* Matched a utf8-invariant, so don't have to worry about utf8 */ \
	250	locinput++; \
	251	break; \
	252	case NNAMEA: \
	253	if (NEXTCHR_IS_EOS \|\| FUNCA(nextchr)) { \
	254	sayNO; \
	255	} \
	256	goto increment_locinput; \
	257	/* Generate the non-locale cases */ \
	258	_CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
	259
	260	/* This is like CCC_TRY, but has an extra set of parameters for generating case
	261	* statements to handle separate Unicode semantics nodes */
	262	#define CCC_TRY_U(NAME, NNAME, FUNC, \
	263	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	264	NAMEU, NNAMEU, FUNCU, \
	265	NAMEA, NNAMEA, FUNCA, \
	266	CLASS, STR) \
	267	CCC_TRY(NAME, NNAME, FUNC, \
	268	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	269	NAMEA, NNAMEA, FUNCA, \
	270	CLASS, STR) \
	271	_CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)
	272
	273	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	274
	275	/* for use after a quantifier and before an EXACT-like node -- japhy */
	276	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	277	*
	278	* NOTE that nothing that affects backtracking should be in here, specifically
	279	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	280	* node that is in between two EXACT like nodes when ascertaining what the required
	281	* "follow" character is. This should probably be moved to regex compile time
	282	* although it may be done at run time beause of the REF possibility - more
	283	* investigation required. -- demerphq
	284	*/
	285	#define JUMPABLE(rn) ( \
	286	OP(rn) == OPEN \|\| \
	287	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	288	OP(rn) == EVAL \|\| \
	289	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	290	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	291	OP(rn) == KEEPS \|\| \
	292	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	293	)
	294	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	295
	296	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	297
	298	#if 0
	299	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	300	we don't need this definition. */
	301	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	302	#define IS_TEXTF(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTF \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	303	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	304
	305	#else
	306	/* ... so we use this as its faster. */
	307	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	308	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn) == EXACTFA)
	309	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	310	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	311
	312	#endif
	313
	314	/*
	315	Search for mandatory following text node; for lookahead, the text must
	316	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	317	*/
	318	#define FIND_NEXT_IMPT(rn) STMT_START { \
	319	while (JUMPABLE(rn)) { \
	320	const OPCODE type = OP(rn); \
	321	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	322	rn = NEXTOPER(NEXTOPER(rn)); \
	323	else if (type == PLUS) \
	324	rn = NEXTOPER(rn); \
	325	else if (type == IFMATCH) \
	326	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	327	else rn += NEXT_OFF(rn); \
	328	} \
	329	} STMT_END
	330
	331	/* These constants are for finding GCB=LV and GCB=LVT in the CLUMP regnode.
	332	* These are for the pre-composed Hangul syllables, which are all in a
	333	* contiguous block and arranged there in such a way so as to facilitate
	334	* alorithmic determination of their characteristics. As such, they don't need
	335	* a swash, but can be determined by simple arithmetic. Almost all are
	336	* GCB=LVT, but every 28th one is a GCB=LV */
	337	#define SBASE 0xAC00 /* Start of block */
	338	#define SCount 11172 /* Length of block */
	339	#define TCount 28
	340
	341	static void restore_pos(pTHX_ void *arg);
	342
	343	#define REGCP_PAREN_ELEMS 3
	344	#define REGCP_OTHER_ELEMS 3
	345	#define REGCP_FRAME_ELEMS 1
	346	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	347	* are needed for the regexp context stack bookkeeping. */
	348
	349	STATIC CHECKPOINT
	350	S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen)
	351	{
	352	dVAR;
	353	const int retval = PL_savestack_ix;
	354	const int paren_elems_to_push =
	355	(maxopenparen - parenfloor) * REGCP_PAREN_ELEMS;
	356	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	357	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	358	I32 p;
	359	GET_RE_DEBUG_FLAGS_DECL;
	360
	361	PERL_ARGS_ASSERT_REGCPPUSH;
	362
	363	if (paren_elems_to_push < 0)
	364	Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0",
	365	paren_elems_to_push);
	366
	367	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	368	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	369	" out of range (%lu-%ld)",
	370	total_elems,
	371	(unsigned long)maxopenparen,
	372	(long)parenfloor);
	373
	374	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	375
	376	DEBUG_BUFFERS_r(
	377	if ((int)maxopenparen > (int)parenfloor)
	378	PerlIO_printf(Perl_debug_log,
	379	"rex=0x%"UVxf" offs=0x%"UVxf": saving capture indices:\n",
	380	PTR2UV(rex),
	381	PTR2UV(rex->offs)
	382	);
	383	);
	384	for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
	385	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	386	SSPUSHINT(rex->offs[p].end);
	387	SSPUSHINT(rex->offs[p].start);
	388	SSPUSHINT(rex->offs[p].start_tmp);
	389	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	390	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
	391	(UV)p,
	392	(IV)rex->offs[p].start,
	393	(IV)rex->offs[p].start_tmp,
	394	(IV)rex->offs[p].end
	395	));
	396	}
	397	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	398	SSPUSHINT(maxopenparen);
	399	SSPUSHINT(rex->lastparen);
	400	SSPUSHINT(rex->lastcloseparen);
	401	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	402
	403	return retval;
	404	}
	405
	406	/* These are needed since we do not localize EVAL nodes: */
	407	#define REGCP_SET(cp) \
	408	DEBUG_STATE_r( \
	409	PerlIO_printf(Perl_debug_log, \
	410	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	411	(IV)PL_savestack_ix)); \
	412	cp = PL_savestack_ix
	413
	414	#define REGCP_UNWIND(cp) \
	415	DEBUG_STATE_r( \
	416	if (cp != PL_savestack_ix) \
	417	PerlIO_printf(Perl_debug_log, \
	418	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	419	(IV)(cp), (IV)PL_savestack_ix)); \
	420	regcpblow(cp)
	421
	422	#define UNWIND_PAREN(lp, lcp) \
	423	for (n = rex->lastparen; n > lp; n--) \
	424	rex->offs[n].end = -1; \
	425	rex->lastparen = n; \
	426	rex->lastcloseparen = lcp;
	427
	428
	429	STATIC void
	430	S_regcppop(pTHX_ regexp rex, U32 maxopenparen_p)
	431	{
	432	dVAR;
	433	UV i;
	434	U32 paren;
	435	GET_RE_DEBUG_FLAGS_DECL;
	436
	437	PERL_ARGS_ASSERT_REGCPPOP;
	438
	439	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	440	i = SSPOPUV;
	441	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	442	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	443	rex->lastcloseparen = SSPOPINT;
	444	rex->lastparen = SSPOPINT;
	445	*maxopenparen_p = SSPOPINT;
	446
	447	i -= REGCP_OTHER_ELEMS;
	448	/* Now restore the parentheses context. */
	449	DEBUG_BUFFERS_r(
	450	if (i \|\| rex->lastparen + 1 <= rex->nparens)
	451	PerlIO_printf(Perl_debug_log,
	452	"rex=0x%"UVxf" offs=0x%"UVxf": restoring capture indices to:\n",
	453	PTR2UV(rex),
	454	PTR2UV(rex->offs)
	455	);
	456	);
	457	paren = *maxopenparen_p;
	458	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	459	I32 tmps;
	460	rex->offs[paren].start_tmp = SSPOPINT;
	461	rex->offs[paren].start = SSPOPINT;
	462	tmps = SSPOPINT;
	463	if (paren <= rex->lastparen)
	464	rex->offs[paren].end = tmps;
	465	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	466	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	467	(UV)paren,
	468	(IV)rex->offs[paren].start,
	469	(IV)rex->offs[paren].start_tmp,
	470	(IV)rex->offs[paren].end,
	471	(paren > rex->lastparen ? "(skipped)" : ""));
	472	);
	473	paren--;
	474	}
	475	#if 1
	476	/* It would seem that the similar code in regtry()
	477	* already takes care of this, and in fact it is in
	478	* a better location to since this code can #if 0-ed out
	479	* but the code in regtry() is needed or otherwise tests
	480	* requiring null fields (pat.t#187 and split.t#{13,14}
	481	* (as of patchlevel 7877) will fail. Then again,
	482	* this code seems to be necessary or otherwise
	483	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	484	* --jhi updated by dapm */
	485	for (i = rex->lastparen + 1; i <= rex->nparens; i++) {
	486	if (i > *maxopenparen_p)
	487	rex->offs[i].start = -1;
	488	rex->offs[i].end = -1;
	489	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	490	" \\%"UVuf": %s ..-1 undeffing\n",
	491	(UV)i,
	492	(i > *maxopenparen_p) ? "-1" : " "
	493	));
	494	}
	495	#endif
	496	}
	497
	498	/* restore the parens and associated vars at savestack position ix,
	499	* but without popping the stack */
	500
	501	STATIC void
	502	S_regcp_restore(pTHX_ regexp rex, I32 ix, U32 maxopenparen_p)
	503	{
	504	I32 tmpix = PL_savestack_ix;
	505	PL_savestack_ix = ix;
	506	regcppop(rex, maxopenparen_p);
	507	PL_savestack_ix = tmpix;
	508	}
	509
	510	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	511
	512	STATIC bool
	513	S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
	514	{
	515	/* Returns a boolean as to whether or not 'character' is a member of the
	516	* Posix character class given by 'classnum' that should be equivalent to a
	517	* value in the typedef '_char_class_number'.
	518	*
	519	* Ideally this could be replaced by a just an array of function pointers
	520	* to the C library functions that implement the macros this calls.
	521	* However, to compile, the precise function signatures are required, and
	522	* these may vary from platform to to platform. To avoid having to figure
	523	* out what those all are on each platform, I (khw) am using this method,
	524	* which adds an extra layer of function call overhead. But we don't
	525	* particularly care about performance with locales anyway. */
	526
	527	switch ((_char_class_number) classnum) {
	528	case _CC_ENUM_ALPHANUMERIC: return isALPHANUMERIC_LC(character);
	529	case _CC_ENUM_ALPHA: return isALPHA_LC(character);
	530	case _CC_ENUM_DIGIT: return isDIGIT_LC(character);
	531	case _CC_ENUM_GRAPH: return isGRAPH_LC(character);
	532	case _CC_ENUM_LOWER: return isLOWER_LC(character);
	533	case _CC_ENUM_PRINT: return isPRINT_LC(character);
	534	case _CC_ENUM_PUNCT: return isPUNCT_LC(character);
	535	case _CC_ENUM_UPPER: return isUPPER_LC(character);
	536	case _CC_ENUM_WORDCHAR: return isWORDCHAR_LC(character);
	537	case _CC_ENUM_SPACE: return isSPACE_LC(character);
	538	case _CC_ENUM_BLANK: return isBLANK_LC(character);
	539	case _CC_ENUM_XDIGIT: return isXDIGIT_LC(character);
	540	case _CC_ENUM_CNTRL: return isCNTRL_LC(character);
	541	case _CC_ENUM_PSXSPC: return isPSXSPC_LC(character);
	542	case _CC_ENUM_ASCII: return isASCII_LC(character);
	543	default: /* VERTSPACE should never occur in locales */
	544	Perl_croak(aTHX_ "panic: isFOO_lc() has an unexpected character class '%d'", classnum);
	545	}
	546
	547	assert(0); /* NOTREACHED */
	548	return FALSE;
	549	}
	550
	551	/*
	552	* pregexec and friends
	553	*/
	554
	555	#ifndef PERL_IN_XSUB_RE
	556	/*
	557	- pregexec - match a regexp against a string
	558	*/
	559	I32
	560	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
	561	char strbeg, I32 minend, SV screamer, U32 nosave)
	562	/* stringarg: the point in the string at which to begin matching */
	563	/* strend: pointer to null at end of string */
	564	/* strbeg: real beginning of string */
	565	/* minend: end of match must be >= minend bytes after stringarg. */
	566	/* screamer: SV being matched: only used for utf8 flag, pos() etc; string
	567	* itself is accessed via the pointers above */
	568	/* nosave: For optimizations. */
	569	{
	570	PERL_ARGS_ASSERT_PREGEXEC;
	571
	572	return
	573	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	574	nosave ? 0 : REXEC_COPY_STR);
	575	}
	576	#endif
	577
	578	/*
	579	* Need to implement the following flags for reg_anch:
	580	*
	581	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	582	* USE_INTUIT_ML
	583	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	584	* INTUIT_AUTORITATIVE_ML
	585	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	586	* INTUIT_ONCE_ML
	587	*
	588	* Another flag for this function: SECOND_TIME (so that float substrs
	589	* with giant delta may be not rechecked).
	590	*/
	591
	592	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	593
	594	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	595	Otherwise, only SvCUR(sv) is used to get strbeg. */
	596
	597	/* XXXX We assume that strpos is strbeg unless sv. */
	598
	599	/* XXXX Some places assume that there is a fixed substring.
	600	An update may be needed if optimizer marks as "INTUITable"
	601	RExen without fixed substrings. Similarly, it is assumed that
	602	lengths of all the strings are no more than minlen, thus they
	603	cannot come from lookahead.
	604	(Or minlen should take into account lookahead.)
	605	NOTE: Some of this comment is not correct. minlen does now take account
	606	of lookahead/behind. Further research is required. -- demerphq
	607
	608	*/
	609
	610	/* A failure to find a constant substring means that there is no need to make
	611	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	612	finding a substring too deep into the string means that less calls to
	613	regtry() should be needed.
	614
	615	REx compiler's optimizer found 4 possible hints:
	616	a) Anchored substring;
	617	b) Fixed substring;
	618	c) Whether we are anchored (beginning-of-line or \G);
	619	d) First node (of those at offset 0) which may distinguish positions;
	620	We use a)b)d) and multiline-part of c), and try to find a position in the
	621	string which does not contradict any of them.
	622	*/
	623
	624	/* Most of decisions we do here should have been done at compile time.
	625	The nodes of the REx which we used for the search should have been
	626	deleted from the finite automaton. */
	627
	628	char *
	629	Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV sv, char strpos,
	630	char strend, const U32 flags, re_scream_pos_data data)
	631	{
	632	dVAR;
	633	struct regexp *const prog = ReANY(rx);
	634	I32 start_shift = 0;
	635	/* Should be nonnegative! */
	636	I32 end_shift = 0;
	637	char *s;
	638	SV *check;
	639	char *strbeg;
	640	char *t;
	641	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	642	I32 ml_anch;
	643	char other_last = NULL; / other substr checked before this */
	644	char check_at = NULL; / check substr found at this pos */
	645	char checked_upto = NULL; / how far into the string we have already checked using find_byclass*/
	646	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	647	RXi_GET_DECL(prog,progi);
	648	#ifdef DEBUGGING
	649	const char * const i_strpos = strpos;
	650	#endif
	651	GET_RE_DEBUG_FLAGS_DECL;
	652
	653	PERL_ARGS_ASSERT_RE_INTUIT_START;
	654	PERL_UNUSED_ARG(flags);
	655	PERL_UNUSED_ARG(data);
	656
	657	RX_MATCH_UTF8_set(rx,utf8_target);
	658
	659	if (RX_UTF8(rx)) {
	660	PL_reg_flags \|= RF_utf8;
	661	}
	662	DEBUG_EXECUTE_r(
	663	debug_start_match(rx, utf8_target, strpos, strend,
	664	sv ? "Guessing start of match in sv for"
	665	: "Guessing start of match in string for");
	666	);
	667
	668	/* CHR_DIST() would be more correct here but it makes things slow. */
	669	if (prog->minlen > strend - strpos) {
	670	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	671	"String too short... [re_intuit_start]\n"));
	672	goto fail;
	673	}
	674
	675	/* XXX we need to pass strbeg as a separate arg: the following is
	676	* guesswork and can be wrong... */
	677	if (sv && SvPOK(sv)) {
	678	char * p = SvPVX(sv);
	679	STRLEN cur = SvCUR(sv);
	680	if (p <= strpos && strpos < p + cur) {
	681	strbeg = p;
	682	assert(p <= strend && strend <= p + cur);
	683	}
	684	else
	685	strbeg = strend - cur;
	686	}
	687	else
	688	strbeg = strpos;
	689
	690	PL_regeol = strend;
	691	if (utf8_target) {
	692	if (!prog->check_utf8 && prog->check_substr)
	693	to_utf8_substr(prog);
	694	check = prog->check_utf8;
	695	} else {
	696	if (!prog->check_substr && prog->check_utf8) {
	697	if (! to_byte_substr(prog)) {
	698	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
	699	}
	700	}
	701	check = prog->check_substr;
	702	}
	703	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	704	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	705	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	706	&& !multiline ) ); /* Check after \n? */
	707
	708	if (!ml_anch) {
	709	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	710	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	711	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	712	&& sv && !SvROK(sv)
	713	&& (strpos != strbeg)) {
	714	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	715	goto fail;
	716	}
	717	if (prog->check_offset_min == prog->check_offset_max
	718	&& !(prog->extflags & RXf_CANY_SEEN)
	719	&& ! multiline) /* /m can cause \n's to match that aren't
	720	accounted for in the string max length.
	721	See [perl #115242] */
	722	{
	723	/* Substring at constant offset from beg-of-str... */
	724	I32 slen;
	725
	726	s = HOP3c(strpos, prog->check_offset_min, strend);
	727
	728	if (SvTAIL(check)) {
	729	slen = SvCUR(check); /* >= 1 */
	730
	731	if ( strend - s > slen \|\| strend - s < slen - 1
	732	\|\| (strend - s == slen && strend[-1] != '\n')) {
	733	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	734	goto fail_finish;
	735	}
	736	/* Now should match s[0..slen-2] */
	737	slen--;
	738	if (slen && (SvPVX_const(check) != s
	739	\|\| (slen > 1
	740	&& memNE(SvPVX_const(check), s, slen)))) {
	741	report_neq:
	742	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	743	goto fail_finish;
	744	}
	745	}
	746	else if (SvPVX_const(check) != s
	747	\|\| ((slen = SvCUR(check)) > 1
	748	&& memNE(SvPVX_const(check), s, slen)))
	749	goto report_neq;
	750	check_at = s;
	751	goto success_at_start;
	752	}
	753	}
	754	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	755	s = strpos;
	756	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	757	end_shift = prog->check_end_shift;
	758
	759	if (!ml_anch) {
	760	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	761	- (SvTAIL(check) != 0);
	762	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	763
	764	if (end_shift < eshift)
	765	end_shift = eshift;
	766	}
	767	}
	768	else { /* Can match at random position */
	769	ml_anch = 0;
	770	s = strpos;
	771	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	772	end_shift = prog->check_end_shift;
	773
	774	/* end shift should be non negative here */
	775	}
	776
	777	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	778	if (end_shift < 0)
	779	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	780	(IV)end_shift, RX_PRECOMP(prog));
	781	#endif
	782
	783	restart:
	784	/* Find a possible match in the region s..strend by looking for
	785	the "check" substring in the region corrected by start/end_shift. */
	786
	787	{
	788	I32 srch_start_shift = start_shift;
	789	I32 srch_end_shift = end_shift;
	790	U8* start_point;
	791	U8* end_point;
	792	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	793	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	794	srch_start_shift = strbeg - s;
	795	}
	796	DEBUG_OPTIMISE_MORE_r({
	797	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	798	(IV)prog->check_offset_min,
	799	(IV)srch_start_shift,
	800	(IV)srch_end_shift,
	801	(IV)prog->check_end_shift);
	802	});
	803
	804	if (prog->extflags & RXf_CANY_SEEN) {
	805	start_point= (U8*)(s + srch_start_shift);
	806	end_point= (U8*)(strend - srch_end_shift);
	807	} else {
	808	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	809	end_point= HOP3(strend, -srch_end_shift, strbeg);
	810	}
	811	DEBUG_OPTIMISE_MORE_r({
	812	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	813	(int)(end_point - start_point),
	814	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	815	start_point);
	816	});
	817
	818	s = fbm_instr( start_point, end_point,
	819	check, multiline ? FBMrf_MULTILINE : 0);
	820	}
	821	/* Update the count-of-usability, remove useless subpatterns,
	822	unshift s. */
	823
	824	DEBUG_EXECUTE_r({
	825	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	826	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	827	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	828	(s ? "Found" : "Did not find"),
	829	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	830	? "anchored" : "floating"),
	831	quoted,
	832	RE_SV_TAIL(check),
	833	(s ? " at offset " : "...\n") );
	834	});
	835
	836	if (!s)
	837	goto fail_finish;
	838	/* Finish the diagnostic message */
	839	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	840
	841	/* XXX dmq: first branch is for positive lookbehind...
	842	Our check string is offset from the beginning of the pattern.
	843	So we need to do any stclass tests offset forward from that
	844	point. I think. :-(
	845	*/
	846
	847
	848
	849	check_at=s;
	850
	851
	852	/* Got a candidate. Check MBOL anchoring, and the other substr.
	853	Start with the other substr.
	854	XXXX no SCREAM optimization yet - and a very coarse implementation
	855	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	856	always match. Probably should be marked during compile...
	857	Probably it is right to do no SCREAM here...
	858	*/
	859
	860	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	861	: (prog->float_substr && prog->anchored_substr))
	862	{
	863	/* Take into account the "other" substring. */
	864	/* XXXX May be hopelessly wrong for UTF... */
	865	if (!other_last)
	866	other_last = strpos;
	867	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	868	do_other_anchored:
	869	{
	870	char * const last = HOP3c(s, -start_shift, strbeg);
	871	char last1, last2;
	872	char * const saved_s = s;
	873	SV* must;
	874
	875	t = s - prog->check_offset_max;
	876	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	877	&& (!utf8_target
	878	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	879	&& t > strpos)))
	880	NOOP;
	881	else
	882	t = strpos;
	883	t = HOP3c(t, prog->anchored_offset, strend);
	884	if (t < other_last) /* These positions already checked */
	885	t = other_last;
	886	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	887	if (last < last1)
	888	last1 = last;
	889	/* XXXX It is not documented what units *_offsets are in.
	890	We assume bytes, but this is clearly wrong.
	891	Meaning this code needs to be carefully reviewed for errors.
	892	dmq.
	893	*/
	894
	895	/* On end-of-str: see comment below. */
	896	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	897	if (must == &PL_sv_undef) {
	898	s = (char*)NULL;
	899	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	900	}
	901	else
	902	s = fbm_instr(
	903	(unsigned char*)t,
	904	HOP3(HOP3(last1, prog->anchored_offset, strend)
	905	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	906	must,
	907	multiline ? FBMrf_MULTILINE : 0
	908	);
	909	DEBUG_EXECUTE_r({
	910	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	911	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	912	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	913	(s ? "Found" : "Contradicts"),
	914	quoted, RE_SV_TAIL(must));
	915	});
	916
	917
	918	if (!s) {
	919	if (last1 >= last2) {
	920	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	921	", giving up...\n"));
	922	goto fail_finish;
	923	}
	924	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	925	", trying floating at offset %ld...\n",
	926	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	927	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	928	s = HOP3c(last, 1, strend);
	929	goto restart;
	930	}
	931	else {
	932	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	933	(long)(s - i_strpos)));
	934	t = HOP3c(s, -prog->anchored_offset, strbeg);
	935	other_last = HOP3c(s, 1, strend);
	936	s = saved_s;
	937	if (t == strpos)
	938	goto try_at_start;
	939	goto try_at_offset;
	940	}
	941	}
	942	}
	943	else { /* Take into account the floating substring. */
	944	char last, last1;
	945	char * const saved_s = s;
	946	SV* must;
	947
	948	t = HOP3c(s, -start_shift, strbeg);
	949	last1 = last =
	950	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	951	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	952	last = HOP3c(t, prog->float_max_offset, strend);
	953	s = HOP3c(t, prog->float_min_offset, strend);
	954	if (s < other_last)
	955	s = other_last;
	956	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	957	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	958	/* fbm_instr() takes into account exact value of end-of-str
	959	if the check is SvTAIL(ed). Since false positives are OK,
	960	and end-of-str is not later than strend we are OK. */
	961	if (must == &PL_sv_undef) {
	962	s = (char*)NULL;
	963	DEBUG_r(must = prog->float_utf8); /* for debug message */
	964	}
	965	else
	966	s = fbm_instr((unsigned char*)s,
	967	(unsigned char*)last + SvCUR(must)
	968	- (SvTAIL(must)!=0),
	969	must, multiline ? FBMrf_MULTILINE : 0);
	970	DEBUG_EXECUTE_r({
	971	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	972	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	973	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	974	(s ? "Found" : "Contradicts"),
	975	quoted, RE_SV_TAIL(must));
	976	});
	977	if (!s) {
	978	if (last1 == last) {
	979	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	980	", giving up...\n"));
	981	goto fail_finish;
	982	}
	983	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	984	", trying anchored starting at offset %ld...\n",
	985	(long)(saved_s + 1 - i_strpos)));
	986	other_last = last;
	987	s = HOP3c(t, 1, strend);
	988	goto restart;
	989	}
	990	else {
	991	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	992	(long)(s - i_strpos)));
	993	other_last = s; /* Fix this later. --Hugo */
	994	s = saved_s;
	995	if (t == strpos)
	996	goto try_at_start;
	997	goto try_at_offset;
	998	}
	999	}
	1000	}
	1001
	1002
	1003	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	1004
	1005	DEBUG_OPTIMISE_MORE_r(
	1006	PerlIO_printf(Perl_debug_log,
	1007	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	1008	(IV)prog->check_offset_min,
	1009	(IV)prog->check_offset_max,
	1010	(IV)(s-strpos),
	1011	(IV)(t-strpos),
	1012	(IV)(t-s),
	1013	(IV)(strend-strpos)
	1014	)
	1015	);
	1016
	1017	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	1018	&& (!utf8_target
	1019	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	1020	&& t > strpos)))
	1021	{
	1022	/* Fixed substring is found far enough so that the match
	1023	cannot start at strpos. */
	1024	try_at_offset:
	1025	if (ml_anch && t[-1] != '\n') {
	1026	/* Eventually fbm_*() should handle this, but often
	1027	anchored_offset is not 0, so this check will not be wasted. */
	1028	/* XXXX In the code below we prefer to look for "^" even in
	1029	presence of anchored substrings. And we search even
	1030	beyond the found float position. These pessimizations
	1031	are historical artefacts only. */
	1032	find_anchor:
	1033	while (t < strend - prog->minlen) {
	1034	if (*t == '\n') {
	1035	if (t < check_at - prog->check_offset_min) {
	1036	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	1037	/* Since we moved from the found position,
	1038	we definitely contradict the found anchored
	1039	substr. Due to the above check we do not
	1040	contradict "check" substr.
	1041	Thus we can arrive here only if check substr
	1042	is float. Redo checking for "other"=="fixed".
	1043	*/
	1044	strpos = t + 1;
	1045	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	1046	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	1047	goto do_other_anchored;
	1048	}
	1049	/* We don't contradict the found floating substring. */
	1050	/* XXXX Why not check for STCLASS? */
	1051	s = t + 1;
	1052	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	1053	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	1054	goto set_useful;
	1055	}
	1056	/* Position contradicts check-string */
	1057	/* XXXX probably better to look for check-string
	1058	than for "\n", so one should lower the limit for t? */
	1059	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	1060	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	1061	other_last = strpos = s = t + 1;
	1062	goto restart;
	1063	}
	1064	t++;
	1065	}
	1066	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	1067	PL_colors[0], PL_colors[1]));
	1068	goto fail_finish;
	1069	}
	1070	else {
	1071	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1072	PL_colors[0], PL_colors[1]));
	1073	}
	1074	s = t;
	1075	set_useful:
	1076	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1077	}
	1078	else {
	1079	/* The found string does not prohibit matching at strpos,
	1080	- no optimization of calling REx engine can be performed,
	1081	unless it was an MBOL and we are not after MBOL,
	1082	or a future STCLASS check will fail this. */
	1083	try_at_start:
	1084	/* Even in this situation we may use MBOL flag if strpos is offset
	1085	wrt the start of the string. */
	1086	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	1087	&& (strpos != strbeg) && strpos[-1] != '\n'
	1088	/* May be due to an implicit anchor of m{.foo} /
	1089	&& !(prog->intflags & PREGf_IMPLICIT))
	1090	{
	1091	t = strpos;
	1092	goto find_anchor;
	1093	}
	1094	DEBUG_EXECUTE_r( if (ml_anch)
	1095	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1096	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1097	);
	1098	success_at_start:
	1099	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1100	&& (utf8_target ? (
	1101	prog->check_utf8 /* Could be deleted already */
	1102	&& --BmUSEFUL(prog->check_utf8) < 0
	1103	&& (prog->check_utf8 == prog->float_utf8)
	1104	) : (
	1105	prog->check_substr /* Could be deleted already */
	1106	&& --BmUSEFUL(prog->check_substr) < 0
	1107	&& (prog->check_substr == prog->float_substr)
	1108	)))
	1109	{
	1110	/* If flags & SOMETHING - do not do it many times on the same match */
	1111	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1112	/* XXX Does the destruction order has to change with utf8_target? */
	1113	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1114	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1115	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1116	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1117	check = NULL; /* abort */
	1118	s = strpos;
	1119	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1120	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1121	if (prog->intflags & PREGf_IMPLICIT)
	1122	prog->extflags &= ~RXf_ANCH_MBOL;
	1123	/* XXXX This is a remnant of the old implementation. It
	1124	looks wasteful, since now INTUIT can use many
	1125	other heuristics. */
	1126	prog->extflags &= ~RXf_USE_INTUIT;
	1127	/* XXXX What other flags might need to be cleared in this branch? */
	1128	}
	1129	else
	1130	s = strpos;
	1131	}
	1132
	1133	/* Last resort... */
	1134	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1135	/* trie stclasses are too expensive to use here, we are better off to
	1136	leave it to regmatch itself */
	1137	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1138	/* minlen == 0 is possible if regstclass is \b or \B,
	1139	and the fixed substr is ''$.
	1140	Since minlen is already taken into account, s+1 is before strend;
	1141	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1142	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1143	regstclass does not come from lookahead... */
	1144	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1145	This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
	1146	const U8* const str = (U8*)STRING(progi->regstclass);
	1147	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1148	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	1149	: 1);
	1150	char * endpos;
	1151	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1152	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1153	else if (prog->float_substr \|\| prog->float_utf8)
	1154	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1155	else
	1156	endpos= strend;
	1157
	1158	if (checked_upto < s)
	1159	checked_upto = s;
	1160	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1161	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1162
	1163	t = s;
	1164	s = find_byclass(prog, progi->regstclass, checked_upto, endpos, NULL);
	1165	if (s) {
	1166	checked_upto = s;
	1167	} else {
	1168	#ifdef DEBUGGING
	1169	const char *what = NULL;
	1170	#endif
	1171	if (endpos == strend) {
	1172	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1173	"Could not match STCLASS...\n") );
	1174	goto fail;
	1175	}
	1176	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1177	"This position contradicts STCLASS...\n") );
	1178	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1179	goto fail;
	1180	checked_upto = HOPBACKc(endpos, start_shift);
	1181	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1182	(IV)start_shift, (IV)(check_at - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1183	/* Contradict one of substrings */
	1184	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1185	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1186	DEBUG_EXECUTE_r( what = "anchored" );
	1187	hop_and_restart:
	1188	s = HOP3c(t, 1, strend);
	1189	if (s + start_shift + end_shift > strend) {
	1190	/* XXXX Should be taken into account earlier? */
	1191	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1192	"Could not match STCLASS...\n") );
	1193	goto fail;
	1194	}
	1195	if (!check)
	1196	goto giveup;
	1197	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1198	"Looking for %s substr starting at offset %ld...\n",
	1199	what, (long)(s + start_shift - i_strpos)) );
	1200	goto restart;
	1201	}
	1202	/* Have both, check_string is floating */
	1203	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1204	goto retry_floating_check;
	1205	/* Recheck anchored substring, but not floating... */
	1206	s = check_at;
	1207	if (!check)
	1208	goto giveup;
	1209	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1210	"Looking for anchored substr starting at offset %ld...\n",
	1211	(long)(other_last - i_strpos)) );
	1212	goto do_other_anchored;
	1213	}
	1214	/* Another way we could have checked stclass at the
	1215	current position only: */
	1216	if (ml_anch) {
	1217	s = t = t + 1;
	1218	if (!check)
	1219	goto giveup;
	1220	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1221	"Looking for /%s^%s/m starting at offset %ld...\n",
	1222	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1223	goto try_at_offset;
	1224	}
	1225	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1226	goto fail;
	1227	/* Check is floating substring. */
	1228	retry_floating_check:
	1229	t = check_at - start_shift;
	1230	DEBUG_EXECUTE_r( what = "floating" );
	1231	goto hop_and_restart;
	1232	}
	1233	if (t != s) {
	1234	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1235	"By STCLASS: moving %ld --> %ld\n",
	1236	(long)(t - i_strpos), (long)(s - i_strpos))
	1237	);
	1238	}
	1239	else {
	1240	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1241	"Does not contradict STCLASS...\n");
	1242	);
	1243	}
	1244	}
	1245	giveup:
	1246	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1247	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1248	PL_colors[5], (long)(s - i_strpos)) );
	1249	return s;
	1250
	1251	fail_finish: /* Substring not found */
	1252	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1253	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1254	fail:
	1255	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1256	PL_colors[4], PL_colors[5]));
	1257	return NULL;
	1258	}
	1259
	1260	#define DECL_TRIE_TYPE(scan) \
	1261	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	1262	trie_type = ((scan->flags == EXACT) \
	1263	? (utf8_target ? trie_utf8 : trie_plain) \
	1264	: (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
	1265
	1266	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
	1267	uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
	1268	STRLEN skiplen; \
	1269	switch (trie_type) { \
	1270	case trie_utf8_fold: \
	1271	if ( foldlen>0 ) { \
	1272	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1273	foldlen -= len; \
	1274	uscan += len; \
	1275	len=0; \
	1276	} else { \
	1277	uvc = to_utf8_fold( (const U8*) uc, foldbuf, &foldlen ); \
	1278	len = UTF8SKIP(uc); \
	1279	skiplen = UNISKIP( uvc ); \
	1280	foldlen -= skiplen; \
	1281	uscan = foldbuf + skiplen; \
	1282	} \
	1283	break; \
	1284	case trie_latin_utf8_fold: \
	1285	if ( foldlen>0 ) { \
	1286	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1287	foldlen -= len; \
	1288	uscan += len; \
	1289	len=0; \
	1290	} else { \
	1291	len = 1; \
	1292	uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, 1); \
	1293	skiplen = UNISKIP( uvc ); \
	1294	foldlen -= skiplen; \
	1295	uscan = foldbuf + skiplen; \
	1296	} \
	1297	break; \
	1298	case trie_utf8: \
	1299	uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
	1300	break; \
	1301	case trie_plain: \
	1302	uvc = (UV)*uc; \
	1303	len = 1; \
	1304	} \
	1305	if (uvc < 256) { \
	1306	charid = trie->charmap[ uvc ]; \
	1307	} \
	1308	else { \
	1309	charid = 0; \
	1310	if (widecharmap) { \
	1311	SV** const svpp = hv_fetch(widecharmap, \
	1312	(char*)&uvc, sizeof(UV), 0); \
	1313	if (svpp) \
	1314	charid = (U16)SvIV(*svpp); \
	1315	} \
	1316	} \
	1317	} STMT_END
	1318
	1319	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1320	STMT_START { \
	1321	while (s <= e) { \
	1322	if ( (CoNd) \
	1323	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1324	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1325	goto got_it; \
	1326	s++; \
	1327	} \
	1328	} STMT_END
	1329
	1330	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1331	STMT_START { \
	1332	while (s < strend) { \
	1333	CoDe \
	1334	s += UTF8SKIP(s); \
	1335	} \
	1336	} STMT_END
	1337
	1338	#define REXEC_FBC_SCAN(CoDe) \
	1339	STMT_START { \
	1340	while (s < strend) { \
	1341	CoDe \
	1342	s++; \
	1343	} \
	1344	} STMT_END
	1345
	1346	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1347	REXEC_FBC_UTF8_SCAN( \
	1348	if (CoNd) { \
	1349	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1350	goto got_it; \
	1351	else \
	1352	tmp = doevery; \
	1353	} \
	1354	else \
	1355	tmp = 1; \
	1356	)
	1357
	1358	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1359	REXEC_FBC_SCAN( \
	1360	if (CoNd) { \
	1361	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1362	goto got_it; \
	1363	else \
	1364	tmp = doevery; \
	1365	} \
	1366	else \
	1367	tmp = 1; \
	1368	)
	1369
	1370	#define REXEC_FBC_TRYIT \
	1371	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1372	goto got_it
	1373
	1374	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1375	if (utf8_target) { \
	1376	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1377	} \
	1378	else { \
	1379	REXEC_FBC_CLASS_SCAN(CoNd); \
	1380	}
	1381
	1382	#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
	1383	if (utf8_target) { \
	1384	UtFpReLoAd; \
	1385	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1386	} \
	1387	else { \
	1388	REXEC_FBC_CLASS_SCAN(CoNd); \
	1389	}
	1390
	1391	#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
	1392	PL_reg_flags \|= RF_tainted; \
	1393	if (utf8_target) { \
	1394	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1395	} \
	1396	else { \
	1397	REXEC_FBC_CLASS_SCAN(CoNd); \
	1398	}
	1399
	1400	#define DUMP_EXEC_POS(li,s,doutf8) \
	1401	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1402
	1403
	1404	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1405	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1406	tmp = TEST_NON_UTF8(tmp); \
	1407	REXEC_FBC_UTF8_SCAN( \
	1408	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1409	tmp = !tmp; \
	1410	IF_SUCCESS; \
	1411	} \
	1412	else { \
	1413	IF_FAIL; \
	1414	} \
	1415	); \
	1416
	1417	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1418	if (s == PL_bostr) { \
	1419	tmp = '\n'; \
	1420	} \
	1421	else { \
	1422	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr); \
	1423	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
	1424	} \
	1425	tmp = TeSt1_UtF8; \
	1426	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1427	REXEC_FBC_UTF8_SCAN( \
	1428	if (tmp == ! (TeSt2_UtF8)) { \
	1429	tmp = !tmp; \
	1430	IF_SUCCESS; \
	1431	} \
	1432	else { \
	1433	IF_FAIL; \
	1434	} \
	1435	); \
	1436
	1437	/* The only difference between the BOUND and NBOUND cases is that
	1438	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1439	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1440	* with the other one being empty */
	1441	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1442	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1443
	1444	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1445	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1446
	1447	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1448	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1449
	1450	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1451	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1452
	1453
	1454	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1455	* be passed in completely with the variable name being tested, which isn't
	1456	* such a clean interface, but this is easier to read than it was before. We
	1457	* are looking for the boundary (or non-boundary between a word and non-word
	1458	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1459	* must be different. Find the "wordness" of the character just prior to this
	1460	* one, and compare it with the wordness of this one. If they differ, we have
	1461	* a boundary. At the beginning of the string, pretend that the previous
	1462	* character was a new-line */
	1463	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1464	if (utf8_target) { \
	1465	UTF8_CODE \
	1466	} \
	1467	else { /* Not utf8 */ \
	1468	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1469	tmp = TEST_NON_UTF8(tmp); \
	1470	REXEC_FBC_SCAN( \
	1471	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1472	tmp = !tmp; \
	1473	IF_SUCCESS; \
	1474	} \
	1475	else { \
	1476	IF_FAIL; \
	1477	} \
	1478	); \
	1479	} \
	1480	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s))) \
	1481	goto got_it;
	1482
	1483	/* We know what class REx starts with. Try to find this position... */
	1484	/* if reginfo is NULL, its a dryrun */
	1485	/* annoyingly all the vars in this routine have different names from their counterparts
	1486	in regmatch. /grrr */
	1487
	1488	STATIC char *
	1489	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1490	const char strend, regmatch_info reginfo)
	1491	{
	1492	dVAR;
	1493	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1494	char pat_string; / The pattern's exactish string */
	1495	char pat_end; / ptr to end char of pat_string */
	1496	re_fold_t folder; /* Function for computing non-utf8 folds */
	1497	const U8 fold_array; / array for folding ords < 256 */
	1498	STRLEN ln;
	1499	STRLEN lnc;
	1500	STRLEN uskip;
	1501	U8 c1;
	1502	U8 c2;
	1503	char *e;
	1504	I32 tmp = 1; /* Scratch variable? */
	1505	const bool utf8_target = PL_reg_match_utf8;
	1506	UV utf8_fold_flags = 0;
	1507	RXi_GET_DECL(prog,progi);
	1508
	1509	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1510
	1511	/* We know what class it must start with. */
	1512	switch (OP(c)) {
	1513	case ANYOF:
	1514	if (utf8_target) {
	1515	REXEC_FBC_UTF8_CLASS_SCAN(
	1516	reginclass(prog, c, (U8*)s, utf8_target));
	1517	}
	1518	else {
	1519	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
	1520	}
	1521	break;
	1522	case CANY:
	1523	REXEC_FBC_SCAN(
	1524	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1525	goto got_it;
	1526	else
	1527	tmp = doevery;
	1528	);
	1529	break;
	1530
	1531	case EXACTFA:
	1532	if (UTF_PATTERN \|\| utf8_target) {
	1533	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1534	goto do_exactf_utf8;
	1535	}
	1536	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1537	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1538	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1539
	1540	case EXACTF:
	1541	if (utf8_target) {
	1542
	1543	/* regcomp.c already folded this if pattern is in UTF-8 */
	1544	utf8_fold_flags = 0;
	1545	goto do_exactf_utf8;
	1546	}
	1547	fold_array = PL_fold;
	1548	folder = foldEQ;
	1549	goto do_exactf_non_utf8;
	1550
	1551	case EXACTFL:
	1552	if (UTF_PATTERN \|\| utf8_target) {
	1553	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	1554	goto do_exactf_utf8;
	1555	}
	1556	fold_array = PL_fold_locale;
	1557	folder = foldEQ_locale;
	1558	goto do_exactf_non_utf8;
	1559
	1560	case EXACTFU_SS:
	1561	if (UTF_PATTERN) {
	1562	utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
	1563	}
	1564	goto do_exactf_utf8;
	1565
	1566	case EXACTFU_TRICKYFOLD:
	1567	case EXACTFU:
	1568	if (UTF_PATTERN \|\| utf8_target) {
	1569	utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1570	goto do_exactf_utf8;
	1571	}
	1572
	1573	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1574	* so we don't have to worry here about this single special case
	1575	* in the Latin1 range */
	1576	fold_array = PL_fold_latin1;
	1577	folder = foldEQ_latin1;
	1578
	1579	/* FALL THROUGH */
	1580
	1581	do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
	1582	are no glitches with fold-length differences
	1583	between the target string and pattern */
	1584
	1585	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1586	* first character of the EXACTF* node and then, if necessary,
	1587	* case-insensitively compare the full text of the node. c1 is the
	1588	* first character. c2 is its fold. This logic will not work for
	1589	* Unicode semantics and the german sharp ss, which hence should
	1590	* not be compiled into a node that gets here. */
	1591	pat_string = STRING(c);
	1592	ln = STR_LEN(c); /* length to match in octets/bytes */
	1593
	1594	/* We know that we have to match at least 'ln' bytes (which is the
	1595	* same as characters, since not utf8). If we have to match 3
	1596	* characters, and there are only 2 availabe, we know without
	1597	* trying that it will fail; so don't start a match past the
	1598	* required minimum number from the far end */
	1599	e = HOP3c(strend, -((I32)ln), s);
	1600
	1601	if (!reginfo && e < s) {
	1602	e = s; /* Due to minlen logic of intuit() */
	1603	}
	1604
	1605	c1 = *pat_string;
	1606	c2 = fold_array[c1];
	1607	if (c1 == c2) { /* If char and fold are the same */
	1608	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1609	}
	1610	else {
	1611	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1612	}
	1613	break;
	1614
	1615	do_exactf_utf8:
	1616	{
	1617	unsigned expansion;
	1618
	1619	/* If one of the operands is in utf8, we can't use the simpler folding
	1620	* above, due to the fact that many different characters can have the
	1621	* same fold, or portion of a fold, or different- length fold */
	1622	pat_string = STRING(c);
	1623	ln = STR_LEN(c); /* length to match in octets/bytes */
	1624	pat_end = pat_string + ln;
	1625	lnc = (UTF_PATTERN) /* length to match in characters */
	1626	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1627	: ln;
	1628
	1629	/* We have 'lnc' characters to match in the pattern, but because of
	1630	* multi-character folding, each character in the target can match
	1631	* up to 3 characters (Unicode guarantees it will never exceed
	1632	* this) if it is utf8-encoded; and up to 2 if not (based on the
	1633	* fact that the Latin 1 folds are already determined, and the
	1634	* only multi-char fold in that range is the sharp-s folding to
	1635	* 'ss'. Thus, a pattern character can match as little as 1/3 of a
	1636	* string character. Adjust lnc accordingly, rounding up, so that
	1637	* if we need to match at least 4+1/3 chars, that really is 5. */
	1638	expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
	1639	lnc = (lnc + expansion - 1) / expansion;
	1640
	1641	/* As in the non-UTF8 case, if we have to match 3 characters, and
	1642	* only 2 are left, it's guaranteed to fail, so don't start a
	1643	* match that would require us to go beyond the end of the string
	1644	*/
	1645	e = HOP3c(strend, -((I32)lnc), s);
	1646
	1647	if (!reginfo && e < s) {
	1648	e = s; /* Due to minlen logic of intuit() */
	1649	}
	1650
	1651	/* XXX Note that we could recalculate e to stop the loop earlier,
	1652	* as the worst case expansion above will rarely be met, and as we
	1653	* go along we would usually find that e moves further to the left.
	1654	* This would happen only after we reached the point in the loop
	1655	* where if there were no expansion we should fail. Unclear if
	1656	* worth the expense */
	1657
	1658	while (s <= e) {
	1659	char my_strend= (char )strend;
	1660	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	1661	pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
	1662	&& (!reginfo \|\| regtry(reginfo, &s)) )
	1663	{
	1664	goto got_it;
	1665	}
	1666	s += (utf8_target) ? UTF8SKIP(s) : 1;
	1667	}
	1668	break;
	1669	}
	1670	case BOUNDL:
	1671	PL_reg_flags \|= RF_tainted;
	1672	FBC_BOUND(isALNUM_LC,
	1673	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1674	isALNUM_LC_utf8((U8*)s));
	1675	break;
	1676	case NBOUNDL:
	1677	PL_reg_flags \|= RF_tainted;
	1678	FBC_NBOUND(isALNUM_LC,
	1679	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1680	isALNUM_LC_utf8((U8*)s));
	1681	break;
	1682	case BOUND:
	1683	FBC_BOUND(isWORDCHAR,
	1684	isALNUM_uni(tmp),
	1685	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1686	break;
	1687	case BOUNDA:
	1688	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1689	isWORDCHAR_A(tmp),
	1690	isWORDCHAR_A((U8*)s));
	1691	break;
	1692	case NBOUND:
	1693	FBC_NBOUND(isWORDCHAR,
	1694	isALNUM_uni(tmp),
	1695	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1696	break;
	1697	case NBOUNDA:
	1698	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1699	isWORDCHAR_A(tmp),
	1700	isWORDCHAR_A((U8*)s));
	1701	break;
	1702	case BOUNDU:
	1703	FBC_BOUND(isWORDCHAR_L1,
	1704	isALNUM_uni(tmp),
	1705	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1706	break;
	1707	case NBOUNDU:
	1708	FBC_NBOUND(isWORDCHAR_L1,
	1709	isALNUM_uni(tmp),
	1710	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1711	break;
	1712	case ALNUML:
	1713	REXEC_FBC_CSCAN_TAINT(
	1714	isALNUM_LC_utf8((U8*)s),
	1715	isALNUM_LC(*s)
	1716	);
	1717	break;
	1718	case ALNUMU:
	1719	REXEC_FBC_CSCAN_PRELOAD(
	1720	LOAD_UTF8_CHARCLASS_ALNUM(),
	1721	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1722	isWORDCHAR_L1((U8) *s)
	1723	);
	1724	break;
	1725	case ALNUM:
	1726	REXEC_FBC_CSCAN_PRELOAD(
	1727	LOAD_UTF8_CHARCLASS_ALNUM(),
	1728	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1729	isWORDCHAR((U8) *s)
	1730	);
	1731	break;
	1732	case ALNUMA:
	1733	/* Don't need to worry about utf8, as it can match only a single
	1734	* byte invariant character */
	1735	REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
	1736	break;
	1737	case NALNUMU:
	1738	REXEC_FBC_CSCAN_PRELOAD(
	1739	LOAD_UTF8_CHARCLASS_ALNUM(),
	1740	!swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1741	! isWORDCHAR_L1((U8) *s)
	1742	);
	1743	break;
	1744	case NALNUM:
	1745	REXEC_FBC_CSCAN_PRELOAD(
	1746	LOAD_UTF8_CHARCLASS_ALNUM(),
	1747	!swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target),
	1748	! isALNUM(*s)
	1749	);
	1750	break;
	1751	case NALNUMA:
	1752	REXEC_FBC_CSCAN(
	1753	!isWORDCHAR_A(*s),
	1754	!isWORDCHAR_A(*s)
	1755	);
	1756	break;
	1757	case NALNUML:
	1758	REXEC_FBC_CSCAN_TAINT(
	1759	!isALNUM_LC_utf8((U8*)s),
	1760	!isALNUM_LC(*s)
	1761	);
	1762	break;
	1763	case SPACEU:
	1764	REXEC_FBC_CSCAN(
	1765	is_XPERLSPACE_utf8(s),
	1766	isSPACE_L1((U8) *s)
	1767	);
	1768	break;
	1769	case SPACE:
	1770	REXEC_FBC_CSCAN(
	1771	is_XPERLSPACE_utf8(s),
	1772	isSPACE((U8) *s)
	1773	);
	1774	break;
	1775	case SPACEA:
	1776	/* Don't need to worry about utf8, as it can match only a single
	1777	* byte invariant character */
	1778	REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
	1779	break;
	1780	case SPACEL:
	1781	REXEC_FBC_CSCAN_TAINT(
	1782	isSPACE_LC_utf8((U8*)s),
	1783	isSPACE_LC(*s)
	1784	);
	1785	break;
	1786	case NSPACEU:
	1787	REXEC_FBC_CSCAN(
	1788	! is_XPERLSPACE_utf8(s),
	1789	! isSPACE_L1((U8) *s)
	1790	);
	1791	break;
	1792	case NSPACE:
	1793	REXEC_FBC_CSCAN(
	1794	! is_XPERLSPACE_utf8(s),
	1795	! isSPACE((U8) *s)
	1796	);
	1797	break;
	1798	case NSPACEA:
	1799	REXEC_FBC_CSCAN(
	1800	!isSPACE_A(*s),
	1801	!isSPACE_A(*s)
	1802	);
	1803	break;
	1804	case NSPACEL:
	1805	REXEC_FBC_CSCAN_TAINT(
	1806	!isSPACE_LC_utf8((U8*)s),
	1807	!isSPACE_LC(*s)
	1808	);
	1809	break;
	1810	case DIGIT:
	1811	REXEC_FBC_CSCAN_PRELOAD(
	1812	LOAD_UTF8_CHARCLASS_DIGIT(),
	1813	swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1814	isDIGIT(*s)
	1815	);
	1816	break;
	1817	case DIGITA:
	1818	/* Don't need to worry about utf8, as it can match only a single
	1819	* byte invariant character */
	1820	REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
	1821	break;
	1822	case DIGITL:
	1823	REXEC_FBC_CSCAN_TAINT(
	1824	isDIGIT_LC_utf8((U8*)s),
	1825	isDIGIT_LC(*s)
	1826	);
	1827	break;
	1828	case NDIGIT:
	1829	REXEC_FBC_CSCAN_PRELOAD(
	1830	LOAD_UTF8_CHARCLASS_DIGIT(),
	1831	!swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1832	!isDIGIT(*s)
	1833	);
	1834	break;
	1835	case NDIGITA:
	1836	REXEC_FBC_CSCAN(
	1837	!isDIGIT_A(*s),
	1838	!isDIGIT_A(*s)
	1839	);
	1840	break;
	1841	case NDIGITL:
	1842	REXEC_FBC_CSCAN_TAINT(
	1843	!isDIGIT_LC_utf8((U8*)s),
	1844	!isDIGIT_LC(*s)
	1845	);
	1846	break;
	1847	case LNBREAK:
	1848	REXEC_FBC_CSCAN(is_LNBREAK_utf8_safe(s, strend),
	1849	is_LNBREAK_latin1_safe(s, strend)
	1850	);
	1851	break;
	1852	case VERTWS:
	1853	REXEC_FBC_CSCAN(
	1854	is_VERTWS_utf8_safe(s, strend),
	1855	is_VERTWS_latin1_safe(s, strend)
	1856	);
	1857	break;
	1858	case NVERTWS:
	1859	REXEC_FBC_CSCAN(
	1860	!is_VERTWS_utf8_safe(s, strend),
	1861	!is_VERTWS_latin1_safe(s, strend)
	1862	);
	1863	break;
	1864	case HORIZWS:
	1865	REXEC_FBC_CSCAN(
	1866	is_HORIZWS_utf8_safe(s, strend),
	1867	is_HORIZWS_latin1_safe(s, strend)
	1868	);
	1869	break;
	1870	case NHORIZWS:
	1871	REXEC_FBC_CSCAN(
	1872	!is_HORIZWS_utf8_safe(s, strend),
	1873	!is_HORIZWS_latin1_safe(s, strend)
	1874	);
	1875	break;
	1876	case POSIXA:
	1877	/* Don't need to worry about utf8, as it can match only a single
	1878	* byte invariant character. The flag in this node type is the
	1879	* class number to pass to _generic_isCC() to build a mask for
	1880	* searching in PL_charclass[] */
	1881	REXEC_FBC_CLASS_SCAN( _generic_isCC_A(*s, FLAGS(c)));
	1882	break;
	1883	case NPOSIXA:
	1884	REXEC_FBC_CSCAN(
	1885	!_generic_isCC_A(*s, FLAGS(c)),
	1886	!_generic_isCC_A(*s, FLAGS(c))
	1887	);
	1888	break;
	1889
	1890	case AHOCORASICKC:
	1891	case AHOCORASICK:
	1892	{
	1893	DECL_TRIE_TYPE(c);
	1894	/* what trie are we using right now */
	1895	reg_ac_data aho = (reg_ac_data)progi->data->data[ ARG( c ) ];
	1896	reg_trie_data trie = (reg_trie_data)progi->data->data[ aho->trie ];
	1897	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1898
	1899	const char *last_start = strend - trie->minlen;
	1900	#ifdef DEBUGGING
	1901	const char *real_start = s;
	1902	#endif
	1903	STRLEN maxlen = trie->maxlen;
	1904	SV *sv_points;
	1905	U8 *points; / map of where we were in the input string
	1906	when reading a given char. For ASCII this
	1907	is unnecessary overhead as the relationship
	1908	is always 1:1, but for Unicode, especially
	1909	case folded Unicode this is not true. */
	1910	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1911	U8 *bitmap=NULL;
	1912
	1913
	1914	GET_RE_DEBUG_FLAGS_DECL;
	1915
	1916	/* We can't just allocate points here. We need to wrap it in
	1917	* an SV so it gets freed properly if there is a croak while
	1918	* running the match */
	1919	ENTER;
	1920	SAVETMPS;
	1921	sv_points=newSV(maxlen * sizeof(U8 *));
	1922	SvCUR_set(sv_points,
	1923	maxlen * sizeof(U8 *));
	1924	SvPOK_on(sv_points);
	1925	sv_2mortal(sv_points);
	1926	points=(U8**)SvPV_nolen(sv_points );
	1927	if ( trie_type != trie_utf8_fold
	1928	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1929	{
	1930	if (trie->bitmap)
	1931	bitmap=(U8*)trie->bitmap;
	1932	else
	1933	bitmap=(U8*)ANYOF_BITMAP(c);
	1934	}
	1935	/* this is the Aho-Corasick algorithm modified a touch
	1936	to include special handling for long "unknown char" sequences.
	1937	The basic idea being that we use AC as long as we are dealing
	1938	with a possible matching char, when we encounter an unknown char
	1939	(and we have not encountered an accepting state) we scan forward
	1940	until we find a legal starting char.
	1941	AC matching is basically that of trie matching, except that when
	1942	we encounter a failing transition, we fall back to the current
	1943	states "fail state", and try the current char again, a process
	1944	we repeat until we reach the root state, state 1, or a legal
	1945	transition. If we fail on the root state then we can either
	1946	terminate if we have reached an accepting state previously, or
	1947	restart the entire process from the beginning if we have not.
	1948
	1949	*/
	1950	while (s <= last_start) {
	1951	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1952	U8 uc = (U8)s;
	1953	U16 charid = 0;
	1954	U32 base = 1;
	1955	U32 state = 1;
	1956	UV uvc = 0;
	1957	STRLEN len = 0;
	1958	STRLEN foldlen = 0;
	1959	U8 uscan = (U8)NULL;
	1960	U8 *leftmost = NULL;
	1961	#ifdef DEBUGGING
	1962	U32 accepted_word= 0;
	1963	#endif
	1964	U32 pointpos = 0;
	1965
	1966	while ( state && uc <= (U8*)strend ) {
	1967	int failed=0;
	1968	U32 word = aho->states[ state ].wordnum;
	1969
	1970	if( state==1 ) {
	1971	if ( bitmap ) {
	1972	DEBUG_TRIE_EXECUTE_r(
	1973	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1974	dump_exec_pos( (char *)uc, c, strend, real_start,
	1975	(char *)uc, utf8_target );
	1976	PerlIO_printf( Perl_debug_log,
	1977	" Scanning for legal start char...\n");
	1978	}
	1979	);
	1980	if (utf8_target) {
	1981	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1982	uc += UTF8SKIP(uc);
	1983	}
	1984	} else {
	1985	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1986	uc++;
	1987	}
	1988	}
	1989	s= (char *)uc;
	1990	}
	1991	if (uc >(U8*)last_start) break;
	1992	}
	1993
	1994	if ( word ) {
	1995	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1996	if (!leftmost \|\| lpos < leftmost) {
	1997	DEBUG_r(accepted_word=word);
	1998	leftmost= lpos;
	1999	}
	2000	if (base==0) break;
	2001
	2002	}
	2003	points[pointpos++ % maxlen]= uc;
	2004	if (foldlen \|\| uc < (U8*)strend) {
	2005	REXEC_TRIE_READ_CHAR(trie_type, trie,
	2006	widecharmap, uc,
	2007	uscan, len, uvc, charid, foldlen,
	2008	foldbuf, uniflags);
	2009	DEBUG_TRIE_EXECUTE_r({
	2010	dump_exec_pos( (char *)uc, c, strend,
	2011	real_start, s, utf8_target);
	2012	PerlIO_printf(Perl_debug_log,
	2013	" Charid:%3u CP:%4"UVxf" ",
	2014	charid, uvc);
	2015	});
	2016	}
	2017	else {
	2018	len = 0;
	2019	charid = 0;
	2020	}
	2021
	2022
	2023	do {
	2024	#ifdef DEBUGGING
	2025	word = aho->states[ state ].wordnum;
	2026	#endif
	2027	base = aho->states[ state ].trans.base;
	2028
	2029	DEBUG_TRIE_EXECUTE_r({
	2030	if (failed)
	2031	dump_exec_pos( (char *)uc, c, strend, real_start,
	2032	s, utf8_target );
	2033	PerlIO_printf( Perl_debug_log,
	2034	"%sState: %4"UVxf", word=%"UVxf,
	2035	failed ? " Fail transition to " : "",
	2036	(UV)state, (UV)word);
	2037	});
	2038	if ( base ) {
	2039	U32 tmp;
	2040	I32 offset;
	2041	if (charid &&
	2042	( ((offset = base + charid
	2043	- 1 - trie->uniquecharcount)) >= 0)
	2044	&& ((U32)offset < trie->lasttrans)
	2045	&& trie->trans[offset].check == state
	2046	&& (tmp=trie->trans[offset].next))
	2047	{
	2048	DEBUG_TRIE_EXECUTE_r(
	2049	PerlIO_printf( Perl_debug_log," - legal\n"));
	2050	state = tmp;
	2051	break;
	2052	}
	2053	else {
	2054	DEBUG_TRIE_EXECUTE_r(
	2055	PerlIO_printf( Perl_debug_log," - fail\n"));
	2056	failed = 1;
	2057	state = aho->fail[state];
	2058	}
	2059	}
	2060	else {
	2061	/* we must be accepting here */
	2062	DEBUG_TRIE_EXECUTE_r(
	2063	PerlIO_printf( Perl_debug_log," - accepting\n"));
	2064	failed = 1;
	2065	break;
	2066	}
	2067	} while(state);
	2068	uc += len;
	2069	if (failed) {
	2070	if (leftmost)
	2071	break;
	2072	if (!state) state = 1;
	2073	}
	2074	}
	2075	if ( aho->states[ state ].wordnum ) {
	2076	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	2077	if (!leftmost \|\| lpos < leftmost) {
	2078	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	2079	leftmost = lpos;
	2080	}
	2081	}
	2082	if (leftmost) {
	2083	s = (char*)leftmost;
	2084	DEBUG_TRIE_EXECUTE_r({
	2085	PerlIO_printf(
	2086	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	2087	(UV)accepted_word, (IV)(s - real_start)
	2088	);
	2089	});
	2090	if (!reginfo \|\| regtry(reginfo, &s)) {
	2091	FREETMPS;
	2092	LEAVE;
	2093	goto got_it;
	2094	}
	2095	s = HOPc(s,1);
	2096	DEBUG_TRIE_EXECUTE_r({
	2097	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	2098	});
	2099	} else {
	2100	DEBUG_TRIE_EXECUTE_r(
	2101	PerlIO_printf( Perl_debug_log,"No match.\n"));
	2102	break;
	2103	}
	2104	}
	2105	FREETMPS;
	2106	LEAVE;
	2107	}
	2108	break;
	2109	default:
	2110	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2111	break;
	2112	}
	2113	return 0;
	2114	got_it:
	2115	return s;
	2116	}
	2117
	2118
	2119	/*
	2120	- regexec_flags - match a regexp against a string
	2121	*/
	2122	I32
	2123	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, char strend,
	2124	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	2125	/* stringarg: the point in the string at which to begin matching */
	2126	/* strend: pointer to null at end of string */
	2127	/* strbeg: real beginning of string */
	2128	/* minend: end of match must be >= minend bytes after stringarg. */
	2129	/* sv: SV being matched: only used for utf8 flag, pos() etc; string
	2130	* itself is accessed via the pointers above */
	2131	/* data: May be used for some additional optimizations.
	2132	Currently its only used, with a U32 cast, for transmitting
	2133	the ganch offset when doing a /g match. This will change */
	2134	/* nosave: For optimizations. */
	2135
	2136	{
	2137	dVAR;
	2138	struct regexp *const prog = ReANY(rx);
	2139	char *s;
	2140	regnode *c;
	2141	char *startpos = stringarg;
	2142	I32 minlen; /* must match at least this many chars */
	2143	I32 dontbother = 0; /* how many characters not to try at end */
	2144	I32 end_shift = 0; /* Same for the end. / / CC */
	2145	I32 scream_pos = -1; /* Internal iterator of scream. */
	2146	char *scream_olds = NULL;
	2147	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2148	I32 multiline;
	2149	RXi_GET_DECL(prog,progi);
	2150	regmatch_info reginfo; /* create some info to pass to regtry etc */
	2151	regexp_paren_pair *swap = NULL;
	2152	GET_RE_DEBUG_FLAGS_DECL;
	2153
	2154	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2155	PERL_UNUSED_ARG(data);
	2156
	2157	/* Be paranoid... */
	2158	if (prog == NULL \|\| startpos == NULL) {
	2159	Perl_croak(aTHX_ "NULL regexp parameter");
	2160	return 0;
	2161	}
	2162
	2163	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2164	reginfo.prog = rx; /* Yes, sorry that this is confusing. */
	2165
	2166	RX_MATCH_UTF8_set(rx, utf8_target);
	2167	DEBUG_EXECUTE_r(
	2168	debug_start_match(rx, utf8_target, startpos, strend,
	2169	"Matching");
	2170	);
	2171
	2172	minlen = prog->minlen;
	2173
	2174	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2175	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2176	"String too short [regexec_flags]...\n"));
	2177	goto phooey;
	2178	}
	2179
	2180
	2181	/* Check validity of program. */
	2182	if (UCHARAT(progi->program) != REG_MAGIC) {
	2183	Perl_croak(aTHX_ "corrupted regexp program");
	2184	}
	2185
	2186	PL_reg_flags = 0;
	2187	PL_reg_state.re_state_eval_setup_done = FALSE;
	2188	PL_reg_maxiter = 0;
	2189
	2190	if (RX_UTF8(rx))
	2191	PL_reg_flags \|= RF_utf8;
	2192
	2193	/* Mark beginning of line for ^ and lookbehind. */
	2194	reginfo.bol = startpos; /* XXX not used ??? */
	2195	PL_bostr = strbeg;
	2196	reginfo.sv = sv;
	2197
	2198	/* Mark end of line for $ (and such) */
	2199	PL_regeol = strend;
	2200
	2201	/* see how far we have to get to not match where we matched before */
	2202	reginfo.till = startpos+minend;
	2203
	2204	/* If there is a "must appear" string, look for it. */
	2205	s = startpos;
	2206
	2207	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	2208	MAGIC *mg;
	2209	if (flags & REXEC_IGNOREPOS){ /* Means: check only at start */
	2210	reginfo.ganch = startpos + prog->gofs;
	2211	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2212	"GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
	2213	} else if (sv && SvTYPE(sv) >= SVt_PVMG
	2214	&& SvMAGIC(sv)
	2215	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	2216	&& mg->mg_len >= 0) {
	2217	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	2218	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2219	"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
	2220
	2221	if (prog->extflags & RXf_ANCH_GPOS) {
	2222	if (s > reginfo.ganch)
	2223	goto phooey;
	2224	s = reginfo.ganch - prog->gofs;
	2225	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2226	"GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
	2227	if (s < strbeg)
	2228	goto phooey;
	2229	}
	2230	}
	2231	else if (data) {
	2232	reginfo.ganch = strbeg + PTR2UV(data);
	2233	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2234	"GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
	2235
	2236	} else { /* pos() not defined */
	2237	reginfo.ganch = strbeg;
	2238	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2239	"GPOS: reginfo.ganch = strbeg\n"));
	2240	}
	2241	}
	2242	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2243	/* We have to be careful. If the previous successful match
	2244	was from this regex we don't want a subsequent partially
	2245	successful match to clobber the old results.
	2246	So when we detect this possibility we add a swap buffer
	2247	to the re, and switch the buffer each match. If we fail
	2248	we switch it back, otherwise we leave it swapped.
	2249	*/
	2250	swap = prog->offs;
	2251	/* do we need a save destructor here for eval dies? */
	2252	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2253	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2254	"rex=0x%"UVxf" saving offs: orig=0x%"UVxf" new=0x%"UVxf"\n",
	2255	PTR2UV(prog),
	2256	PTR2UV(swap),
	2257	PTR2UV(prog->offs)
	2258	));
	2259	}
	2260	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	2261	re_scream_pos_data d;
	2262
	2263	d.scream_olds = &scream_olds;
	2264	d.scream_pos = &scream_pos;
	2265	s = re_intuit_start(rx, sv, s, strend, flags, &d);
	2266	if (!s) {
	2267	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	2268	goto phooey; /* not present */
	2269	}
	2270	}
	2271
	2272
	2273
	2274	/* Simplest case: anchored match need be tried only once. */
	2275	/* [unless only anchor is BOL and multiline is set] */
	2276	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2277	if (s == startpos && regtry(&reginfo, &startpos))
	2278	goto got_it;
	2279	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2280	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2281	{
	2282	char *end;
	2283
	2284	if (minlen)
	2285	dontbother = minlen - 1;
	2286	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2287	/* for multiline we only have to try after newlines */
	2288	if (prog->check_substr \|\| prog->check_utf8) {
	2289	/* because of the goto we can not easily reuse the macros for bifurcating the
	2290	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2291	if (utf8_target) {
	2292	if (s == startpos)
	2293	goto after_try_utf8;
	2294	while (1) {
	2295	if (regtry(&reginfo, &s)) {
	2296	goto got_it;
	2297	}
	2298	after_try_utf8:
	2299	if (s > end) {
	2300	goto phooey;
	2301	}
	2302	if (prog->extflags & RXf_USE_INTUIT) {
	2303	s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
	2304	if (!s) {
	2305	goto phooey;
	2306	}
	2307	}
	2308	else {
	2309	s += UTF8SKIP(s);
	2310	}
	2311	}
	2312	} /* end search for check string in unicode */
	2313	else {
	2314	if (s == startpos) {
	2315	goto after_try_latin;
	2316	}
	2317	while (1) {
	2318	if (regtry(&reginfo, &s)) {
	2319	goto got_it;
	2320	}
	2321	after_try_latin:
	2322	if (s > end) {
	2323	goto phooey;
	2324	}
	2325	if (prog->extflags & RXf_USE_INTUIT) {
	2326	s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
	2327	if (!s) {
	2328	goto phooey;
	2329	}
	2330	}
	2331	else {
	2332	s++;
	2333	}
	2334	}
	2335	} /* end search for check string in latin*/
	2336	} /* end search for check string */
	2337	else { /* search for newline */
	2338	if (s > startpos) {
	2339	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2340	s--;
	2341	}
	2342	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2343	while (s <= end) { /* note it could be possible to match at the end of the string */
	2344	if (s++ == '\n') { / don't need PL_utf8skip here */
	2345	if (regtry(&reginfo, &s))
	2346	goto got_it;
	2347	}
	2348	}
	2349	} /* end search for newline */
	2350	} /* end anchored/multiline check string search */
	2351	goto phooey;
	2352	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2353	{
	2354	/* the warning about reginfo.ganch being used without initialization
	2355	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	2356	and we only enter this block when the same bit is set. */
	2357	char *tmp_s = reginfo.ganch - prog->gofs;
	2358
	2359	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
	2360	goto got_it;
	2361	goto phooey;
	2362	}
	2363
	2364	/* Messy cases: unanchored match. */
	2365	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2366	/* we have /x+whatever/ */
	2367	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
	2368	char ch;
	2369	#ifdef DEBUGGING
	2370	int did_match = 0;
	2371	#endif
	2372	if (utf8_target) {
	2373	if (! prog->anchored_utf8) {
	2374	to_utf8_substr(prog);
	2375	}
	2376	ch = SvPVX_const(prog->anchored_utf8)[0];
	2377	REXEC_FBC_SCAN(
	2378	if (*s == ch) {
	2379	DEBUG_EXECUTE_r( did_match = 1 );
	2380	if (regtry(&reginfo, &s)) goto got_it;
	2381	s += UTF8SKIP(s);
	2382	while (s < strend && *s == ch)
	2383	s += UTF8SKIP(s);
	2384	}
	2385	);
	2386
	2387	}
	2388	else {
	2389	if (! prog->anchored_substr) {
	2390	if (! to_byte_substr(prog)) {
	2391	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2392	}
	2393	}
	2394	ch = SvPVX_const(prog->anchored_substr)[0];
	2395	REXEC_FBC_SCAN(
	2396	if (*s == ch) {
	2397	DEBUG_EXECUTE_r( did_match = 1 );
	2398	if (regtry(&reginfo, &s)) goto got_it;
	2399	s++;
	2400	while (s < strend && *s == ch)
	2401	s++;
	2402	}
	2403	);
	2404	}
	2405	DEBUG_EXECUTE_r(if (!did_match)
	2406	PerlIO_printf(Perl_debug_log,
	2407	"Did not find anchored character...\n")
	2408	);
	2409	}
	2410	else if (prog->anchored_substr != NULL
	2411	\|\| prog->anchored_utf8 != NULL
	2412	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2413	&& prog->float_max_offset < strend - s)) {
	2414	SV *must;
	2415	I32 back_max;
	2416	I32 back_min;
	2417	char *last;
	2418	char last1; / Last position checked before */
	2419	#ifdef DEBUGGING
	2420	int did_match = 0;
	2421	#endif
	2422	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2423	if (utf8_target) {
	2424	if (! prog->anchored_utf8) {
	2425	to_utf8_substr(prog);
	2426	}
	2427	must = prog->anchored_utf8;
	2428	}
	2429	else {
	2430	if (! prog->anchored_substr) {
	2431	if (! to_byte_substr(prog)) {
	2432	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2433	}
	2434	}
	2435	must = prog->anchored_substr;
	2436	}
	2437	back_max = back_min = prog->anchored_offset;
	2438	} else {
	2439	if (utf8_target) {
	2440	if (! prog->float_utf8) {
	2441	to_utf8_substr(prog);
	2442	}
	2443	must = prog->float_utf8;
	2444	}
	2445	else {
	2446	if (! prog->float_substr) {
	2447	if (! to_byte_substr(prog)) {
	2448	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2449	}
	2450	}
	2451	must = prog->float_substr;
	2452	}
	2453	back_max = prog->float_max_offset;
	2454	back_min = prog->float_min_offset;
	2455	}
	2456
	2457	if (back_min<0) {
	2458	last = strend;
	2459	} else {
	2460	last = HOP3c(strend, /* Cannot start after this */
	2461	-(I32)(CHR_SVLEN(must)
	2462	- (SvTAIL(must) != 0) + back_min), strbeg);
	2463	}
	2464	if (s > PL_bostr)
	2465	last1 = HOPc(s, -1);
	2466	else
	2467	last1 = s - 1; /* bogus */
	2468
	2469	/* XXXX check_substr already used to find "s", can optimize if
	2470	check_substr==must. */
	2471	scream_pos = -1;
	2472	dontbother = end_shift;
	2473	strend = HOPc(strend, -dontbother);
	2474	while ( (s <= last) &&
	2475	(s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2476	(unsigned char*)strend, must,
	2477	multiline ? FBMrf_MULTILINE : 0)) ) {
	2478	DEBUG_EXECUTE_r( did_match = 1 );
	2479	if (HOPc(s, -back_max) > last1) {
	2480	last1 = HOPc(s, -back_min);
	2481	s = HOPc(s, -back_max);
	2482	}
	2483	else {
	2484	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2485
	2486	last1 = HOPc(s, -back_min);
	2487	s = t;
	2488	}
	2489	if (utf8_target) {
	2490	while (s <= last1) {
	2491	if (regtry(&reginfo, &s))
	2492	goto got_it;
	2493	if (s >= last1) {
	2494	s++; /* to break out of outer loop */
	2495	break;
	2496	}
	2497	s += UTF8SKIP(s);
	2498	}
	2499	}
	2500	else {
	2501	while (s <= last1) {
	2502	if (regtry(&reginfo, &s))
	2503	goto got_it;
	2504	s++;
	2505	}
	2506	}
	2507	}
	2508	DEBUG_EXECUTE_r(if (!did_match) {
	2509	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2510	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2511	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2512	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2513	? "anchored" : "floating"),
	2514	quoted, RE_SV_TAIL(must));
	2515	});
	2516	goto phooey;
	2517	}
	2518	else if ( (c = progi->regstclass) ) {
	2519	if (minlen) {
	2520	const OPCODE op = OP(progi->regstclass);
	2521	/* don't bother with what can't match */
	2522	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2523	strend = HOPc(strend, -(minlen - 1));
	2524	}
	2525	DEBUG_EXECUTE_r({
	2526	SV * const prop = sv_newmortal();
	2527	regprop(prog, prop, c);
	2528	{
	2529	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2530	s,strend-s,60);
	2531	PerlIO_printf(Perl_debug_log,
	2532	"Matching stclass %.*s against %s (%d bytes)\n",
	2533	(int)SvCUR(prop), SvPVX_const(prop),
	2534	quoted, (int)(strend - s));
	2535	}
	2536	});
	2537	if (find_byclass(prog, c, s, strend, &reginfo))
	2538	goto got_it;
	2539	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2540	}
	2541	else {
	2542	dontbother = 0;
	2543	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2544	/* Trim the end. */
	2545	char *last= NULL;
	2546	SV* float_real;
	2547	STRLEN len;
	2548	const char *little;
	2549
	2550	if (utf8_target) {
	2551	if (! prog->float_utf8) {
	2552	to_utf8_substr(prog);
	2553	}
	2554	float_real = prog->float_utf8;
	2555	}
	2556	else {
	2557	if (! prog->float_substr) {
	2558	if (! to_byte_substr(prog)) {
	2559	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2560	}
	2561	}
	2562	float_real = prog->float_substr;
	2563	}
	2564
	2565	little = SvPV_const(float_real, len);
	2566	if (SvTAIL(float_real)) {
	2567	/* This means that float_real contains an artificial \n on
	2568	* the end due to the presence of something like this:
	2569	* /foo$/ where we can match both "foo" and "foo\n" at the
	2570	* end of the string. So we have to compare the end of the
	2571	* string first against the float_real without the \n and
	2572	* then against the full float_real with the string. We
	2573	* have to watch out for cases where the string might be
	2574	* smaller than the float_real or the float_real without
	2575	* the \n. */
	2576	char *checkpos= strend - len;
	2577	DEBUG_OPTIMISE_r(
	2578	PerlIO_printf(Perl_debug_log,
	2579	"%sChecking for float_real.%s\n",
	2580	PL_colors[4], PL_colors[5]));
	2581	if (checkpos + 1 < strbeg) {
	2582	/* can't match, even if we remove the trailing \n
	2583	* string is too short to match */
	2584	DEBUG_EXECUTE_r(
	2585	PerlIO_printf(Perl_debug_log,
	2586	"%sString shorter than required trailing substring, cannot match.%s\n",
	2587	PL_colors[4], PL_colors[5]));
	2588	goto phooey;
	2589	} else if (memEQ(checkpos + 1, little, len - 1)) {
	2590	/* can match, the end of the string matches without the
	2591	* "\n" */
	2592	last = checkpos + 1;
	2593	} else if (checkpos < strbeg) {
	2594	/* cant match, string is too short when the "\n" is
	2595	* included */
	2596	DEBUG_EXECUTE_r(
	2597	PerlIO_printf(Perl_debug_log,
	2598	"%sString does not contain required trailing substring, cannot match.%s\n",
	2599	PL_colors[4], PL_colors[5]));
	2600	goto phooey;
	2601	} else if (!multiline) {
	2602	/* non multiline match, so compare with the "\n" at the
	2603	* end of the string */
	2604	if (memEQ(checkpos, little, len)) {
	2605	last= checkpos;
	2606	} else {
	2607	DEBUG_EXECUTE_r(
	2608	PerlIO_printf(Perl_debug_log,
	2609	"%sString does not contain required trailing substring, cannot match.%s\n",
	2610	PL_colors[4], PL_colors[5]));
	2611	goto phooey;
	2612	}
	2613	} else {
	2614	/* multiline match, so we have to search for a place
	2615	* where the full string is located */
	2616	goto find_last;
	2617	}
	2618	} else {
	2619	find_last:
	2620	if (len)
	2621	last = rninstr(s, strend, little, little + len);
	2622	else
	2623	last = strend; /* matching "$" */
	2624	}
	2625	if (!last) {
	2626	/* at one point this block contained a comment which was
	2627	* probably incorrect, which said that this was a "should not
	2628	* happen" case. Even if it was true when it was written I am
	2629	* pretty sure it is not anymore, so I have removed the comment
	2630	* and replaced it with this one. Yves */
	2631	DEBUG_EXECUTE_r(
	2632	PerlIO_printf(Perl_debug_log,
	2633	"String does not contain required substring, cannot match.\n"
	2634	));
	2635	goto phooey;
	2636	}
	2637	dontbother = strend - last + prog->float_min_offset;
	2638	}
	2639	if (minlen && (dontbother < minlen))
	2640	dontbother = minlen - 1;
	2641	strend -= dontbother; /* this one's always in bytes! */
	2642	/* We don't know much -- general case. */
	2643	if (utf8_target) {
	2644	for (;;) {
	2645	if (regtry(&reginfo, &s))
	2646	goto got_it;
	2647	if (s >= strend)
	2648	break;
	2649	s += UTF8SKIP(s);
	2650	};
	2651	}
	2652	else {
	2653	do {
	2654	if (regtry(&reginfo, &s))
	2655	goto got_it;
	2656	} while (s++ < strend);
	2657	}
	2658	}
	2659
	2660	/* Failure. */
	2661	goto phooey;
	2662
	2663	got_it:
	2664	DEBUG_BUFFERS_r(
	2665	if (swap)
	2666	PerlIO_printf(Perl_debug_log,
	2667	"rex=0x%"UVxf" freeing offs: 0x%"UVxf"\n",
	2668	PTR2UV(prog),
	2669	PTR2UV(swap)
	2670	);
	2671	);
	2672	Safefree(swap);
	2673	RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
	2674
	2675	if (PL_reg_state.re_state_eval_setup_done)
	2676	restore_pos(aTHX_ prog);
	2677	if (RXp_PAREN_NAMES(prog))
	2678	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2679
	2680	/* make sure $`, $&, $', and $digit will work later */
	2681	if ( !(flags & REXEC_NOT_FIRST) ) {
	2682	if (flags & REXEC_COPY_STR) {
	2683	#ifdef PERL_ANY_COW
	2684	if (SvCANCOW(sv)) {
	2685	if (DEBUG_C_TEST) {
	2686	PerlIO_printf(Perl_debug_log,
	2687	"Copy on write: regexp capture, type %d\n",
	2688	(int) SvTYPE(sv));
	2689	}
	2690	RX_MATCH_COPY_FREE(rx);
	2691	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2692	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2693	assert (SvPOKp(prog->saved_copy));
	2694	prog->sublen = PL_regeol - strbeg;
	2695	prog->suboffset = 0;
	2696	prog->subcoffset = 0;
	2697	} else
	2698	#endif
	2699	{
	2700	I32 min = 0;
	2701	I32 max = PL_regeol - strbeg;
	2702	I32 sublen;
	2703
	2704	if ( (flags & REXEC_COPY_SKIP_POST)
	2705	&& !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
	2706	&& !(PL_sawampersand & SAWAMPERSAND_RIGHT)
	2707	) { /* don't copy $' part of string */
	2708	U32 n = 0;
	2709	max = -1;
	2710	/* calculate the right-most part of the string covered
	2711	* by a capture. Due to look-ahead, this may be to
	2712	* the right of $&, so we have to scan all captures */
	2713	while (n <= prog->lastparen) {
	2714	if (prog->offs[n].end > max)
	2715	max = prog->offs[n].end;
	2716	n++;
	2717	}
	2718	if (max == -1)
	2719	max = (PL_sawampersand & SAWAMPERSAND_LEFT)
	2720	? prog->offs[0].start
	2721	: 0;
	2722	assert(max >= 0 && max <= PL_regeol - strbeg);
	2723	}
	2724
	2725	if ( (flags & REXEC_COPY_SKIP_PRE)
	2726	&& !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
	2727	&& !(PL_sawampersand & SAWAMPERSAND_LEFT)
	2728	) { /* don't copy $` part of string */
	2729	U32 n = 0;
	2730	min = max;
	2731	/* calculate the left-most part of the string covered
	2732	* by a capture. Due to look-behind, this may be to
	2733	* the left of $&, so we have to scan all captures */
	2734	while (min && n <= prog->lastparen) {
	2735	if ( prog->offs[n].start != -1
	2736	&& prog->offs[n].start < min)
	2737	{
	2738	min = prog->offs[n].start;
	2739	}
	2740	n++;
	2741	}
	2742	if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
	2743	&& min > prog->offs[0].end
	2744	)
	2745	min = prog->offs[0].end;
	2746
	2747	}
	2748
	2749	assert(min >= 0 && min <= max && min <= PL_regeol - strbeg);
	2750	sublen = max - min;
	2751
	2752	if (RX_MATCH_COPIED(rx)) {
	2753	if (sublen > prog->sublen)
	2754	prog->subbeg =
	2755	(char*)saferealloc(prog->subbeg, sublen+1);
	2756	}
	2757	else
	2758	prog->subbeg = (char*)safemalloc(sublen+1);
	2759	Copy(strbeg + min, prog->subbeg, sublen, char);
	2760	prog->subbeg[sublen] = '\0';
	2761	prog->suboffset = min;
	2762	prog->sublen = sublen;
	2763	RX_MATCH_COPIED_on(rx);
	2764	}
	2765	prog->subcoffset = prog->suboffset;
	2766	if (prog->suboffset && utf8_target) {
	2767	/* Convert byte offset to chars.
	2768	* XXX ideally should only compute this if @-/@+
	2769	* has been seen, a la PL_sawampersand ??? */
	2770
	2771	/* If there's a direct correspondence between the
	2772	* string which we're matching and the original SV,
	2773	* then we can use the utf8 len cache associated with
	2774	* the SV. In particular, it means that under //g,
	2775	* sv_pos_b2u() will use the previously cached
	2776	* position to speed up working out the new length of
	2777	* subcoffset, rather than counting from the start of
	2778	* the string each time. This stops
	2779	* $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
	2780	* from going quadratic */
	2781	if (SvPOKp(sv) && SvPVX(sv) == strbeg)
	2782	sv_pos_b2u(sv, &(prog->subcoffset));
	2783	else
	2784	prog->subcoffset = utf8_length((U8*)strbeg,
	2785	(U8*)(strbeg+prog->suboffset));
	2786	}
	2787	}
	2788	else {
	2789	RX_MATCH_COPY_FREE(rx);
	2790	prog->subbeg = strbeg;
	2791	prog->suboffset = 0;
	2792	prog->subcoffset = 0;
	2793	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2794	}
	2795	}
	2796
	2797	return 1;
	2798
	2799	phooey:
	2800	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2801	PL_colors[4], PL_colors[5]));
	2802	if (PL_reg_state.re_state_eval_setup_done)
	2803	restore_pos(aTHX_ prog);
	2804	if (swap) {
	2805	/* we failed :-( roll it back */
	2806	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2807	"rex=0x%"UVxf" rolling back offs: freeing=0x%"UVxf" restoring=0x%"UVxf"\n",
	2808	PTR2UV(prog),
	2809	PTR2UV(prog->offs),
	2810	PTR2UV(swap)
	2811	));
	2812	Safefree(prog->offs);
	2813	prog->offs = swap;
	2814	}
	2815	return 0;
	2816	}
	2817
	2818
	2819	/* Set which rex is pointed to by PL_reg_state, handling ref counting.
	2820	* Do inc before dec, in case old and new rex are the same */
	2821	#define SET_reg_curpm(Re2) \
	2822	if (PL_reg_state.re_state_eval_setup_done) { \
	2823	(void)ReREFCNT_inc(Re2); \
	2824	ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
	2825	PM_SETRE((PL_reg_curpm), (Re2)); \
	2826	}
	2827
	2828
	2829	/*
	2830	- regtry - try match at specific point
	2831	*/
	2832	STATIC I32 /* 0 failure, 1 success */
	2833	S_regtry(pTHX_ regmatch_info reginfo, char *startposp)
	2834	{
	2835	dVAR;
	2836	CHECKPOINT lastcp;
	2837	REGEXP *const rx = reginfo->prog;
	2838	regexp *const prog = ReANY(rx);
	2839	I32 result;
	2840	RXi_GET_DECL(prog,progi);
	2841	GET_RE_DEBUG_FLAGS_DECL;
	2842
	2843	PERL_ARGS_ASSERT_REGTRY;
	2844
	2845	reginfo->cutpoint=NULL;
	2846
	2847	if ((prog->extflags & RXf_EVAL_SEEN)
	2848	&& !PL_reg_state.re_state_eval_setup_done)
	2849	{
	2850	MAGIC *mg;
	2851
	2852	PL_reg_state.re_state_eval_setup_done = TRUE;
	2853	if (reginfo->sv) {
	2854	/* Make $_ available to executed code. */
	2855	if (reginfo->sv != DEFSV) {
	2856	SAVE_DEFSV;
	2857	DEFSV_set(reginfo->sv);
	2858	}
	2859
	2860	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2861	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2862	/* prepare for quick setting of pos */
	2863	#ifdef PERL_OLD_COPY_ON_WRITE
	2864	if (SvIsCOW(reginfo->sv))
	2865	sv_force_normal_flags(reginfo->sv, 0);
	2866	#endif
	2867	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2868	&PL_vtbl_mglob, NULL, 0);
	2869	mg->mg_len = -1;
	2870	}
	2871	PL_reg_magic = mg;
	2872	PL_reg_oldpos = mg->mg_len;
	2873	SAVEDESTRUCTOR_X(restore_pos, prog);
	2874	}
	2875	if (!PL_reg_curpm) {
	2876	Newxz(PL_reg_curpm, 1, PMOP);
	2877	#ifdef USE_ITHREADS
	2878	{
	2879	SV* const repointer = &PL_sv_undef;
	2880	/* this regexp is also owned by the new PL_reg_curpm, which
	2881	will try to free it. */
	2882	av_push(PL_regex_padav, repointer);
	2883	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2884	PL_regex_pad = AvARRAY(PL_regex_padav);
	2885	}
	2886	#endif
	2887	}
	2888	SET_reg_curpm(rx);
	2889	PL_reg_oldcurpm = PL_curpm;
	2890	PL_curpm = PL_reg_curpm;
	2891	if (RXp_MATCH_COPIED(prog)) {
	2892	/* Here is a serious problem: we cannot rewrite subbeg,
	2893	since it may be needed if this match fails. Thus
	2894	$` inside (?{}) could fail... */
	2895	PL_reg_oldsaved = prog->subbeg;
	2896	PL_reg_oldsavedlen = prog->sublen;
	2897	PL_reg_oldsavedoffset = prog->suboffset;
	2898	PL_reg_oldsavedcoffset = prog->suboffset;
	2899	#ifdef PERL_ANY_COW
	2900	PL_nrs = prog->saved_copy;
	2901	#endif
	2902	RXp_MATCH_COPIED_off(prog);
	2903	}
	2904	else
	2905	PL_reg_oldsaved = NULL;
	2906	prog->subbeg = PL_bostr;
	2907	prog->suboffset = 0;
	2908	prog->subcoffset = 0;
	2909	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2910	}
	2911	#ifdef DEBUGGING
	2912	PL_reg_starttry = *startposp;
	2913	#endif
	2914	prog->offs[0].start = *startposp - PL_bostr;
	2915	prog->lastparen = 0;
	2916	prog->lastcloseparen = 0;
	2917
	2918	/* XXXX What this code is doing here?!!! There should be no need
	2919	to do this again and again, prog->lastparen should take care of
	2920	this! --ilya*/
	2921
	2922	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2923	* Actually, the code in regcppop() (which Ilya may be meaning by
	2924	* prog->lastparen), is not needed at all by the test suite
	2925	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2926	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2927	* Meanwhile, this code is needed for the
	2928	* above-mentioned test suite tests to succeed. The common theme
	2929	* on those tests seems to be returning null fields from matches.
	2930	* --jhi updated by dapm */
	2931	#if 1
	2932	if (prog->nparens) {
	2933	regexp_paren_pair *pp = prog->offs;
	2934	I32 i;
	2935	for (i = prog->nparens; i > (I32)prog->lastparen; i--) {
	2936	++pp;
	2937	pp->start = -1;
	2938	pp->end = -1;
	2939	}
	2940	}
	2941	#endif
	2942	REGCP_SET(lastcp);
	2943	result = regmatch(reginfo, *startposp, progi->program + 1);
	2944	if (result != -1) {
	2945	prog->offs[0].end = result;
	2946	return 1;
	2947	}
	2948	if (reginfo->cutpoint)
	2949	*startposp= reginfo->cutpoint;
	2950	REGCP_UNWIND(lastcp);
	2951	return 0;
	2952	}
	2953
	2954
	2955	#define sayYES goto yes
	2956	#define sayNO goto no
	2957	#define sayNO_SILENT goto no_silent
	2958
	2959	/* we dont use STMT_START/END here because it leads to
	2960	"unreachable code" warnings, which are bogus, but distracting. */
	2961	#define CACHEsayNO \
	2962	if (ST.cache_mask) \
	2963	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2964	sayNO
	2965
	2966	/* this is used to determine how far from the left messages like
	2967	'failed...' are printed. It should be set such that messages
	2968	are inline with the regop output that created them.
	2969	*/
	2970	#define REPORT_CODE_OFF 32
	2971
	2972
	2973	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2974	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2975	#define CHRTEST_NOT_A_CP_1 -999
	2976	#define CHRTEST_NOT_A_CP_2 -998
	2977
	2978	#define SLAB_FIRST(s) (&(s)->states[0])
	2979	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2980
	2981	/* grab a new slab and return the first slot in it */
	2982
	2983	STATIC regmatch_state *
	2984	S_push_slab(pTHX)
	2985	{
	2986	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2987	dMY_CXT;
	2988	#endif
	2989	regmatch_slab *s = PL_regmatch_slab->next;
	2990	if (!s) {
	2991	Newx(s, 1, regmatch_slab);
	2992	s->prev = PL_regmatch_slab;
	2993	s->next = NULL;
	2994	PL_regmatch_slab->next = s;
	2995	}
	2996	PL_regmatch_slab = s;
	2997	return SLAB_FIRST(s);
	2998	}
	2999
	3000
	3001	/* push a new state then goto it */
	3002
	3003	#define PUSH_STATE_GOTO(state, node, input) \
	3004	pushinput = input; \
	3005	scan = node; \
	3006	st->resume_state = state; \
	3007	goto push_state;
	3008
	3009	/* push a new state with success backtracking, then goto it */
	3010
	3011	#define PUSH_YES_STATE_GOTO(state, node, input) \
	3012	pushinput = input; \
	3013	scan = node; \
	3014	st->resume_state = state; \
	3015	goto push_yes_state;
	3016
	3017
	3018
	3019
	3020	/*
	3021
	3022	regmatch() - main matching routine
	3023
	3024	This is basically one big switch statement in a loop. We execute an op,
	3025	set 'next' to point the next op, and continue. If we come to a point which
	3026	we may need to backtrack to on failure such as (A\|B\|C), we push a
	3027	backtrack state onto the backtrack stack. On failure, we pop the top
	3028	state, and re-enter the loop at the state indicated. If there are no more
	3029	states to pop, we return failure.
	3030
	3031	Sometimes we also need to backtrack on success; for example /A+/, where
	3032	after successfully matching one A, we need to go back and try to
	3033	match another one; similarly for lookahead assertions: if the assertion
	3034	completes successfully, we backtrack to the state just before the assertion
	3035	and then carry on. In these cases, the pushed state is marked as
	3036	'backtrack on success too'. This marking is in fact done by a chain of
	3037	pointers, each pointing to the previous 'yes' state. On success, we pop to
	3038	the nearest yes state, discarding any intermediate failure-only states.
	3039	Sometimes a yes state is pushed just to force some cleanup code to be
	3040	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	3041	it to free the inner regex.
	3042
	3043	Note that failure backtracking rewinds the cursor position, while
	3044	success backtracking leaves it alone.
	3045
	3046	A pattern is complete when the END op is executed, while a subpattern
	3047	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	3048	ops trigger the "pop to last yes state if any, otherwise return true"
	3049	behaviour.
	3050
	3051	A common convention in this function is to use A and B to refer to the two
	3052	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	3053	the subpattern to be matched possibly multiple times, while B is the entire
	3054	rest of the pattern. Variable and state names reflect this convention.
	3055
	3056	The states in the main switch are the union of ops and failure/success of
	3057	substates associated with with that op. For example, IFMATCH is the op
	3058	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	3059	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	3060	successfully matched A and IFMATCH_A_fail is a state saying that we have
	3061	just failed to match A. Resume states always come in pairs. The backtrack
	3062	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	3063	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	3064	on success or failure.
	3065
	3066	The struct that holds a backtracking state is actually a big union, with
	3067	one variant for each major type of op. The variable st points to the
	3068	top-most backtrack struct. To make the code clearer, within each
	3069	block of code we #define ST to alias the relevant union.
	3070
	3071	Here's a concrete example of a (vastly oversimplified) IFMATCH
	3072	implementation:
	3073
	3074	switch (state) {
	3075	....
	3076
	3077	#define ST st->u.ifmatch
	3078
	3079	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3080	ST.foo = ...; // some state we wish to save
	3081	...
	3082	// push a yes backtrack state with a resume value of
	3083	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	3084	// first node of A:
	3085	PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
	3086	// NOTREACHED
	3087
	3088	case IFMATCH_A: // we have successfully executed A; now continue with B
	3089	next = B;
	3090	bar = ST.foo; // do something with the preserved value
	3091	break;
	3092
	3093	case IFMATCH_A_fail: // A failed, so the assertion failed
	3094	...; // do some housekeeping, then ...
	3095	sayNO; // propagate the failure
	3096
	3097	#undef ST
	3098
	3099	...
	3100	}
	3101
	3102	For any old-timers reading this who are familiar with the old recursive
	3103	approach, the code above is equivalent to:
	3104
	3105	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3106	{
	3107	int foo = ...
	3108	...
	3109	if (regmatch(A)) {
	3110	next = B;
	3111	bar = foo;
	3112	break;
	3113	}
	3114	...; // do some housekeeping, then ...
	3115	sayNO; // propagate the failure
	3116	}
	3117
	3118	The topmost backtrack state, pointed to by st, is usually free. If you
	3119	want to claim it, populate any ST.foo fields in it with values you wish to
	3120	save, then do one of
	3121
	3122	PUSH_STATE_GOTO(resume_state, node, newinput);
	3123	PUSH_YES_STATE_GOTO(resume_state, node, newinput);
	3124
	3125	which sets that backtrack state's resume value to 'resume_state', pushes a
	3126	new free entry to the top of the backtrack stack, then goes to 'node'.
	3127	On backtracking, the free slot is popped, and the saved state becomes the
	3128	new free state. An ST.foo field in this new top state can be temporarily
	3129	accessed to retrieve values, but once the main loop is re-entered, it
	3130	becomes available for reuse.
	3131
	3132	Note that the depth of the backtrack stack constantly increases during the
	3133	left-to-right execution of the pattern, rather than going up and down with
	3134	the pattern nesting. For example the stack is at its maximum at Z at the
	3135	end of the pattern, rather than at X in the following:
	3136
	3137	/(((X)+)+)+....(Y)+....Z/
	3138
	3139	The only exceptions to this are lookahead/behind assertions and the cut,
	3140	(?>A), which pop all the backtrack states associated with A before
	3141	continuing.
	3142
	3143	Backtrack state structs are allocated in slabs of about 4K in size.
	3144	PL_regmatch_state and st always point to the currently active state,
	3145	and PL_regmatch_slab points to the slab currently containing
	3146	PL_regmatch_state. The first time regmatch() is called, the first slab is
	3147	allocated, and is never freed until interpreter destruction. When the slab
	3148	is full, a new one is allocated and chained to the end. At exit from
	3149	regmatch(), slabs allocated since entry are freed.
	3150
	3151	*/
	3152
	3153
	3154	#define DEBUG_STATE_pp(pp) \
	3155	DEBUG_STATE_r({ \
	3156	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	3157	PerlIO_printf(Perl_debug_log, \
	3158	" %*s"pp" %s%s%s%s%s\n", \
	3159	depth*2, "", \
	3160	PL_reg_name[st->resume_state], \
	3161	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	3162	((st==yes_state) ? "Y" : ""), \
	3163	((st==mark_state) ? "M" : ""), \
	3164	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	3165	); \
	3166	});
	3167
	3168
	3169	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	3170
	3171	#ifdef DEBUGGING
	3172
	3173	STATIC void
	3174	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	3175	const char start, const char end, const char *blurb)
	3176	{
	3177	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	3178
	3179	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	3180
	3181	if (!PL_colorset)
	3182	reginitcolors();
	3183	{
	3184	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	3185	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	3186
	3187	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	3188	start, end - start, 60);
	3189
	3190	PerlIO_printf(Perl_debug_log,
	3191	"%s%s REx%s %s against %s\n",
	3192	PL_colors[4], blurb, PL_colors[5], s0, s1);
	3193
	3194	if (utf8_target\|\|utf8_pat)
	3195	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	3196	utf8_pat ? "pattern" : "",
	3197	utf8_pat && utf8_target ? " and " : "",
	3198	utf8_target ? "string" : ""
	3199	);
	3200	}
	3201	}
	3202
	3203	STATIC void
	3204	S_dump_exec_pos(pTHX_ const char *locinput,
	3205	const regnode *scan,
	3206	const char *loc_regeol,
	3207	const char *loc_bostr,
	3208	const char *loc_reg_starttry,
	3209	const bool utf8_target)
	3210	{
	3211	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	3212	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	3213	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	3214	/* The part of the string before starttry has one color
	3215	(pref0_len chars), between starttry and current
	3216	position another one (pref_len - pref0_len chars),
	3217	after the current position the third one.
	3218	We assume that pref0_len <= pref_len, otherwise we
	3219	decrease pref0_len. */
	3220	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	3221	? (5 + taill) - l : locinput - loc_bostr;
	3222	int pref0_len;
	3223
	3224	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	3225
	3226	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	3227	pref_len++;
	3228	pref0_len = pref_len - (locinput - loc_reg_starttry);
	3229	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	3230	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	3231	? (5 + taill) - pref_len : loc_regeol - locinput);
	3232	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	3233	l--;
	3234	if (pref0_len < 0)
	3235	pref0_len = 0;
	3236	if (pref0_len > pref_len)
	3237	pref0_len = pref_len;
	3238	{
	3239	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	3240
	3241	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	3242	(locinput - pref_len),pref0_len, 60, 4, 5);
	3243
	3244	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	3245	(locinput - pref_len + pref0_len),
	3246	pref_len - pref0_len, 60, 2, 3);
	3247
	3248	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	3249	locinput, loc_regeol - locinput, 10, 0, 1);
	3250
	3251	const STRLEN tlen=len0+len1+len2;
	3252	PerlIO_printf(Perl_debug_log,
	3253	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	3254	(IV)(locinput - loc_bostr),
	3255	len0, s0,
	3256	len1, s1,
	3257	(docolor ? "" : "> <"),
	3258	len2, s2,
	3259	(int)(tlen > 19 ? 0 : 19 - tlen),
	3260	"");
	3261	}
	3262	}
	3263
	3264	#endif
	3265
	3266	/* reg_check_named_buff_matched()
	3267	* Checks to see if a named buffer has matched. The data array of
	3268	* buffer numbers corresponding to the buffer is expected to reside
	3269	* in the regexp->data->data array in the slot stored in the ARG() of
	3270	* node involved. Note that this routine doesn't actually care about the
	3271	* name, that information is not preserved from compilation to execution.
	3272	* Returns the index of the leftmost defined buffer with the given name
	3273	* or 0 if non of the buffers matched.
	3274	*/
	3275	STATIC I32
	3276	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	3277	{
	3278	I32 n;
	3279	RXi_GET_DECL(rex,rexi);
	3280	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3281	I32 nums=(I32)SvPVX(sv_dat);
	3282
	3283	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3284
	3285	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3286	if ((I32)rex->lastparen >= nums[n] &&
	3287	rex->offs[nums[n]].end != -1)
	3288	{
	3289	return nums[n];
	3290	}
	3291	}
	3292	return 0;
	3293	}
	3294
	3295
	3296	/* free all slabs above current one - called during LEAVE_SCOPE */
	3297
	3298	STATIC void
	3299	S_clear_backtrack_stack(pTHX_ void *p)
	3300	{
	3301	regmatch_slab *s = PL_regmatch_slab->next;
	3302	PERL_UNUSED_ARG(p);
	3303
	3304	if (!s)
	3305	return;
	3306	PL_regmatch_slab->next = NULL;
	3307	while (s) {
	3308	regmatch_slab * const osl = s;
	3309	s = s->next;
	3310	Safefree(osl);
	3311	}
	3312	}
	3313	static bool
	3314	S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int c1p, U8 c1_utf8, int c2p, U8 c2_utf8)
	3315	{
	3316	/* This function determines if there are one or two characters that match
	3317	* the first character of the passed-in EXACTish node <text_node>, and if
	3318	* so, returns them in the passed-in pointers.
	3319	*
	3320	* If it determines that no possible character in the target string can
	3321	* match, it returns FALSE; otherwise TRUE. (The FALSE situation occurs if
	3322	* the first character in <text_node> requires UTF-8 to represent, and the
	3323	* target string isn't in UTF-8.)
	3324	*
	3325	* If there are more than two characters that could match the beginning of
	3326	* <text_node>, or if more context is required to determine a match or not,
	3327	* it sets both <c1p> and <c2p> to CHRTEST_VOID.
	3328	*
	3329	* The motiviation behind this function is to allow the caller to set up
	3330	* tight loops for matching. If <text_node> is of type EXACT, there is
	3331	* only one possible character that can match its first character, and so
	3332	* the situation is quite simple. But things get much more complicated if
	3333	* folding is involved. It may be that the first character of an EXACTFish
	3334	* node doesn't participate in any possible fold, e.g., punctuation, so it
	3335	* can be matched only by itself. The vast majority of characters that are
	3336	* in folds match just two things, their lower and upper-case equivalents.
	3337	* But not all are like that; some have multiple possible matches, or match
	3338	* sequences of more than one character. This function sorts all that out.
	3339	*
	3340	* Consider the patterns AB or A?B where A and B are arbitrary. In a
	3341	* loop of trying to match A*, we know we can't exit where the thing
	3342	* following it isn't a B. And something can't be a B unless it is the
	3343	* beginning of B. By putting a quick test for that beginning in a tight
	3344	* loop, we can rule out things that can't possibly be B without having to
	3345	* break out of the loop, thus avoiding work. Similarly, if A is a single
	3346	* character, we can make a tight loop matching A*, using the outputs of
	3347	* this function.
	3348	*
	3349	* If the target string to match isn't in UTF-8, and there aren't
	3350	* complications which require CHRTEST_VOID, <c1p> and <c2p> are set to
	3351	* the one or two possible octets (which are characters in this situation)
	3352	* that can match. In all cases, if there is only one character that can
	3353	* match, <c1p> and <c2p> will be identical.
	3354	*
	3355	* If the target string is in UTF-8, the buffers pointed to by <c1_utf8>
	3356	* and <c2_utf8> will contain the one or two UTF-8 sequences of bytes that
	3357	* can match the beginning of <text_node>. They should be declared with at
	3358	* least length UTF8_MAXBYTES+1. (If the target string isn't in UTF-8, it is
	3359	* undefined what these contain.) If one or both of the buffers are
	3360	* invariant under UTF-8, <c1p>, and <c2p> will also be set to the
	3361	* corresponding invariant. If variant, the corresponding *<c1p> and/or
	3362	* *<c2p> will be set to a negative number(s) that shouldn't match any code
	3363	* point (unless inappropriately coerced to unsigned). *<c1p> will equal
	3364	* <c2p> if and only if <c1_utf8> and <c2_utf8> are the same. /
	3365
	3366	const bool utf8_target = PL_reg_match_utf8;
	3367
	3368	UV c1 = CHRTEST_NOT_A_CP_1;
	3369	UV c2 = CHRTEST_NOT_A_CP_2;
	3370	bool use_chrtest_void = FALSE;
	3371
	3372	/* Used when we have both utf8 input and utf8 output, to avoid converting
	3373	* to/from code points */
	3374	bool utf8_has_been_setup = FALSE;
	3375
	3376	dVAR;
	3377
	3378	U8 pat = (U8)STRING(text_node);
	3379
	3380	if (OP(text_node) == EXACT) {
	3381
	3382	/* In an exact node, only one thing can be matched, that first
	3383	* character. If both the pat and the target are UTF-8, we can just
	3384	* copy the input to the output, avoiding finding the code point of
	3385	* that character */
	3386	if (! UTF_PATTERN) {
	3387	c2 = c1 = *pat;
	3388	}
	3389	else if (utf8_target) {
	3390	Copy(pat, c1_utf8, UTF8SKIP(pat), U8);
	3391	Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
	3392	utf8_has_been_setup = TRUE;
	3393	}
	3394	else {
	3395	c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
	3396	}
	3397	}
	3398	else /* an EXACTFish node */
	3399	if ((UTF_PATTERN
	3400	&& is_MULTI_CHAR_FOLD_utf8_safe(pat,
	3401	pat + STR_LEN(text_node)))
	3402	\|\| (! UTF_PATTERN
	3403	&& is_MULTI_CHAR_FOLD_latin1_safe(pat,
	3404	pat + STR_LEN(text_node))))
	3405	{
	3406	/* Multi-character folds require more context to sort out. Also
	3407	* PL_utf8_foldclosures used below doesn't handle them, so have to be
	3408	* handled outside this routine */
	3409	use_chrtest_void = TRUE;
	3410	}
	3411	else { /* an EXACTFish node which doesn't begin with a multi-char fold */
	3412	c1 = (UTF_PATTERN) ? valid_utf8_to_uvchr(pat, NULL) : *pat;
	3413	if (c1 > 256) {
	3414	/* Load the folds hash, if not already done */
	3415	SV** listp;
	3416	if (! PL_utf8_foldclosures) {
	3417	if (! PL_utf8_tofold) {
	3418	U8 dummy[UTF8_MAXBYTES+1];
	3419
	3420	/* Force loading this by folding an above-Latin1 char */
	3421	to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
	3422	assert(PL_utf8_tofold); /* Verify that worked */
	3423	}
	3424	PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
	3425	}
	3426
	3427	/* The fold closures data structure is a hash with the keys being
	3428	* the UTF-8 of every character that is folded to, like 'k', and
	3429	* the values each an array of all code points that fold to its
	3430	* key. e.g. [ 'k', 'K', KELVIN_SIGN ]. Multi-character folds are
	3431	* not included */
	3432	if ((! (listp = hv_fetch(PL_utf8_foldclosures,
	3433	(char *) pat,
	3434	UTF8SKIP(pat),
	3435	FALSE))))
	3436	{
	3437	/* Not found in the hash, therefore there are no folds
	3438	* containing it, so there is only a single character that
	3439	* could match */
	3440	c2 = c1;
	3441	}
	3442	else { /* Does participate in folds */
	3443	AV* list = (AV) listp;
	3444	if (av_len(list) != 1) {
	3445
	3446	/* If there aren't exactly two folds to this, it is outside
	3447	* the scope of this function */
	3448	use_chrtest_void = TRUE;
	3449	}
	3450	else { /* There are two. Get them */
	3451	SV** c_p = av_fetch(list, 0, FALSE);
	3452	if (c_p == NULL) {
	3453	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3454	}
	3455	c1 = SvUV(*c_p);
	3456
	3457	c_p = av_fetch(list, 1, FALSE);
	3458	if (c_p == NULL) {
	3459	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3460	}
	3461	c2 = SvUV(*c_p);
	3462
	3463	/* Folds that cross the 255/256 boundary are forbidden if
	3464	* EXACTFL, or EXACTFA and one is ASCIII. Since the
	3465	* pattern character is above 256, and its only other match
	3466	* is below 256, the only legal match will be to itself.
	3467	* We have thrown away the original, so have to compute
	3468	* which is the one above 255 */
	3469	if ((c1 < 256) != (c2 < 256)) {
	3470	if (OP(text_node) == EXACTFL
	3471	\|\| (OP(text_node) == EXACTFA
	3472	&& (isASCII(c1) \|\| isASCII(c2))))
	3473	{
	3474	if (c1 < 256) {
	3475	c1 = c2;
	3476	}
	3477	else {
	3478	c2 = c1;
	3479	}
	3480	}
	3481	}
	3482	}
	3483	}
	3484	}
	3485	else /* Here, c1 is < 255 */
	3486	if (utf8_target
	3487	&& HAS_NONLATIN1_FOLD_CLOSURE(c1)
	3488	&& OP(text_node) != EXACTFL
	3489	&& (OP(text_node) != EXACTFA \|\| ! isASCII(c1)))
	3490	{
	3491	/* Here, there could be something above Latin1 in the target which
	3492	* folds to this character in the pattern. All such cases except
	3493	* LATIN SMALL LETTER Y WITH DIAERESIS have more than two characters
	3494	* involved in their folds, so are outside the scope of this
	3495	* function */
	3496	if (UNLIKELY(c1 == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
	3497	c2 = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
	3498	}
	3499	else {
	3500	use_chrtest_void = TRUE;
	3501	}
	3502	}
	3503	else { /* Here nothing above Latin1 can fold to the pattern character */
	3504	switch (OP(text_node)) {
	3505
	3506	case EXACTFL: /* /l rules */
	3507	c2 = PL_fold_locale[c1];
	3508	break;
	3509
	3510	case EXACTF:
	3511	if (! utf8_target) { /* /d rules */
	3512	c2 = PL_fold[c1];
	3513	break;
	3514	}
	3515	/* FALLTHROUGH */
	3516	/* /u rules for all these. This happens to work for
	3517	* EXACTFA as nothing in Latin1 folds to ASCII */
	3518	case EXACTFA:
	3519	case EXACTFU_TRICKYFOLD:
	3520	case EXACTFU_SS:
	3521	case EXACTFU:
	3522	c2 = PL_fold_latin1[c1];
	3523	break;
	3524
	3525	default:
	3526	Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
	3527	assert(0); /* NOTREACHED */
	3528	}
	3529	}
	3530	}
	3531
	3532	/* Here have figured things out. Set up the returns */
	3533	if (use_chrtest_void) {
	3534	c2p = c1p = CHRTEST_VOID;
	3535	}
	3536	else if (utf8_target) {
	3537	if (! utf8_has_been_setup) { /* Don't have the utf8; must get it */
	3538	uvchr_to_utf8(c1_utf8, c1);
	3539	uvchr_to_utf8(c2_utf8, c2);
	3540	}
	3541
	3542	/* Invariants are stored in both the utf8 and byte outputs; Use
	3543	* negative numbers otherwise for the byte ones. Make sure that the
	3544	* byte ones are the same iff the utf8 ones are the same */
	3545	c1p = (UTF8_IS_INVARIANT(c1_utf8)) ? *c1_utf8 : CHRTEST_NOT_A_CP_1;
	3546	c2p = (UTF8_IS_INVARIANT(c2_utf8))
	3547	? *c2_utf8
	3548	: (c1 == c2)
	3549	? CHRTEST_NOT_A_CP_1
	3550	: CHRTEST_NOT_A_CP_2;
	3551	}
	3552	else if (c1 > 255) {
	3553	if (c2 > 255) { /* both possibilities are above what a non-utf8 string
	3554	can represent */
	3555	return FALSE;
	3556	}
	3557
	3558	c1p = c2p = c2; /* c2 is the only representable value */
	3559	}
	3560	else { /* c1 is representable; see about c2 */
	3561	*c1p = c1;
	3562	*c2p = (c2 < 256) ? c2 : c1;
	3563	}
	3564
	3565	return TRUE;
	3566	}
	3567
	3568	/* returns -1 on failure, $+[0] on success */
	3569	STATIC I32
	3570	S_regmatch(pTHX_ regmatch_info reginfo, char startpos, regnode *prog)
	3571	{
	3572	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3573	dMY_CXT;
	3574	#endif
	3575	dVAR;
	3576	const bool utf8_target = PL_reg_match_utf8;
	3577	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3578	REGEXP *rex_sv = reginfo->prog;
	3579	regexp *rex = ReANY(rex_sv);
	3580	RXi_GET_DECL(rex,rexi);
	3581	I32 oldsave;
	3582	/* the current state. This is a cached copy of PL_regmatch_state */
	3583	regmatch_state *st;
	3584	/* cache heavy used fields of st in registers */
	3585	regnode *scan;
	3586	regnode *next;
	3587	U32 n = 0; /* general value; init to avoid compiler warning */
	3588	I32 ln = 0; /* len or last; init to avoid compiler warning */
	3589	char *locinput = startpos;
	3590	char pushinput; / where to continue after a PUSH */
	3591	I32 nextchr; /* is always set to UCHARAT(locinput) */
	3592
	3593	bool result = 0; /* return value of S_regmatch */
	3594	int depth = 0; /* depth of backtrack stack */
	3595	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3596	const U32 max_nochange_depth =
	3597	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3598	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3599	regmatch_state yes_state = NULL; / state to pop to on success of
	3600	subpattern */
	3601	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3602	the stack on success we can update the mark_state as we go */
	3603	regmatch_state mark_state = NULL; / last mark state we have seen */
	3604	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3605	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3606	U32 state_num;
	3607	bool no_final = 0; /* prevent failure from backtracking? */
	3608	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3609	char *startpoint = locinput;
	3610	SV popmark = NULL; / are we looking for a mark? */
	3611	SV sv_commit = NULL; / last mark name seen in failure */
	3612	SV sv_yes_mark = NULL; / last mark name we have seen
	3613	during a successful match */
	3614	U32 lastopen = 0; /* last open we saw */
	3615	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3616	SV* const oreplsv = GvSV(PL_replgv);
	3617	/* these three flags are set by various ops to signal information to
	3618	* the very next op. They have a useful lifetime of exactly one loop
	3619	* iteration, and are not preserved or restored by state pushes/pops
	3620	*/
	3621	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3622	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3623	int logical = 0; /* the following EVAL is:
	3624	0: (?{...})
	3625	1: (?(?{...})X\|Y)
	3626	2: (??{...})
	3627	or the following IFMATCH/UNLESSM is:
	3628	false: plain (?=foo)
	3629	true: used as a condition: (?(?=foo))
	3630	*/
	3631	PAD* last_pad = NULL;
	3632	dMULTICALL;
	3633	I32 gimme = G_SCALAR;
	3634	CV caller_cv = NULL; / who called us */
	3635	CV last_pushed_cv = NULL; / most recently called (?{}) CV */
	3636	CHECKPOINT runops_cp; /* savestack position before executing EVAL */
	3637	U32 maxopenparen = 0; /* max '(' index seen so far */
	3638
	3639	#ifdef DEBUGGING
	3640	GET_RE_DEBUG_FLAGS_DECL;
	3641	#endif
	3642
	3643	/* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */
	3644	multicall_oldcatch = 0;
	3645	multicall_cv = NULL;
	3646	cx = NULL;
	3647	PERL_UNUSED_VAR(multicall_cop);
	3648	PERL_UNUSED_VAR(newsp);
	3649
	3650
	3651	PERL_ARGS_ASSERT_REGMATCH;
	3652
	3653	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3654	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3655	}));
	3656	/* on first ever call to regmatch, allocate first slab */
	3657	if (!PL_regmatch_slab) {
	3658	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3659	PL_regmatch_slab->prev = NULL;
	3660	PL_regmatch_slab->next = NULL;
	3661	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3662	}
	3663
	3664	oldsave = PL_savestack_ix;
	3665	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	3666	SAVEVPTR(PL_regmatch_slab);
	3667	SAVEVPTR(PL_regmatch_state);
	3668
	3669	/* grab next free state slot */
	3670	st = ++PL_regmatch_state;
	3671	if (st > SLAB_LAST(PL_regmatch_slab))
	3672	st = PL_regmatch_state = S_push_slab(aTHX);
	3673
	3674	/* Note that nextchr is a byte even in UTF */
	3675	SET_nextchr;
	3676	scan = prog;
	3677	while (scan != NULL) {
	3678
	3679	DEBUG_EXECUTE_r( {
	3680	SV * const prop = sv_newmortal();
	3681	regnode *rnext=regnext(scan);
	3682	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3683	regprop(rex, prop, scan);
	3684
	3685	PerlIO_printf(Perl_debug_log,
	3686	"%3"IVdf":%*s%s(%"IVdf")\n",
	3687	(IV)(scan - rexi->program), depth*2, "",
	3688	SvPVX_const(prop),
	3689	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3690	0 : (IV)(rnext - rexi->program));
	3691	});
	3692
	3693	next = scan + NEXT_OFF(scan);
	3694	if (next == scan)
	3695	next = NULL;
	3696	state_num = OP(scan);
	3697
	3698	reenter_switch:
	3699
	3700	SET_nextchr;
	3701	assert(nextchr < 256 && (nextchr >= 0 \|\| nextchr == NEXTCHR_EOS));
	3702
	3703	switch (state_num) {
	3704	case BOL: /* /^../ */
	3705	if (locinput == PL_bostr)
	3706	{
	3707	/* reginfo->till = reginfo->bol; */
	3708	break;
	3709	}
	3710	sayNO;
	3711
	3712	case MBOL: /* /^../m */
	3713	if (locinput == PL_bostr \|\|
	3714	(!NEXTCHR_IS_EOS && locinput[-1] == '\n'))
	3715	{
	3716	break;
	3717	}
	3718	sayNO;
	3719
	3720	case SBOL: /* /^../s */
	3721	if (locinput == PL_bostr)
	3722	break;
	3723	sayNO;
	3724
	3725	case GPOS: /* \G */
	3726	if (locinput == reginfo->ganch)
	3727	break;
	3728	sayNO;
	3729
	3730	case KEEPS: /* \K */
	3731	/* update the startpoint */
	3732	st->u.keeper.val = rex->offs[0].start;
	3733	rex->offs[0].start = locinput - PL_bostr;
	3734	PUSH_STATE_GOTO(KEEPS_next, next, locinput);
	3735	assert(0); /NOTREACHED/
	3736	case KEEPS_next_fail:
	3737	/* rollback the start point change */
	3738	rex->offs[0].start = st->u.keeper.val;
	3739	sayNO_SILENT;
	3740	assert(0); /NOTREACHED/
	3741
	3742	case EOL: /* /..$/ */
	3743	goto seol;
	3744
	3745	case MEOL: /* /..$/m */
	3746	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3747	sayNO;
	3748	break;
	3749
	3750	case SEOL: /* /..$/s */
	3751	seol:
	3752	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3753	sayNO;
	3754	if (PL_regeol - locinput > 1)
	3755	sayNO;
	3756	break;
	3757
	3758	case EOS: /* \z */
	3759	if (!NEXTCHR_IS_EOS)
	3760	sayNO;
	3761	break;
	3762
	3763	case SANY: /* /./s */
	3764	if (NEXTCHR_IS_EOS)
	3765	sayNO;
	3766	goto increment_locinput;
	3767
	3768	case CANY: /* \C */
	3769	if (NEXTCHR_IS_EOS)
	3770	sayNO;
	3771	locinput++;
	3772	break;
	3773
	3774	case REG_ANY: /* /./ */
	3775	if ((NEXTCHR_IS_EOS) \|\| nextchr == '\n')
	3776	sayNO;
	3777	goto increment_locinput;
	3778
	3779
	3780	#undef ST
	3781	#define ST st->u.trie
	3782	case TRIEC: /* (ab\|cd) with known charclass */
	3783	/* In this case the charclass data is available inline so
	3784	we can fail fast without a lot of extra overhead.
	3785	*/
	3786	if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
	3787	DEBUG_EXECUTE_r(
	3788	PerlIO_printf(Perl_debug_log,
	3789	"%*s %sfailed to match trie start class...%s\n",
	3790	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3791	);
	3792	sayNO_SILENT;
	3793	assert(0); /* NOTREACHED */
	3794	}
	3795	/* FALL THROUGH */
	3796	case TRIE: /* (ab\|cd) */
	3797	/* the basic plan of execution of the trie is:
	3798	* At the beginning, run though all the states, and
	3799	* find the longest-matching word. Also remember the position
	3800	* of the shortest matching word. For example, this pattern:
	3801	* 1 2 3 4 5
	3802	* ab\|a\|x\|abcd\|abc
	3803	* when matched against the string "abcde", will generate
	3804	* accept states for all words except 3, with the longest
	3805	* matching word being 4, and the shortest being 2 (with
	3806	* the position being after char 1 of the string).
	3807	*
	3808	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3809	* we run the remainder of the pattern; on each try setting
	3810	* the current position to the character following the word,
	3811	* returning to try the next word on failure.
	3812	*
	3813	* We avoid having to build a list of words at runtime by
	3814	* using a compile-time structure, wordinfo[].prev, which
	3815	* gives, for each word, the previous accepting word (if any).
	3816	* In the case above it would contain the mappings 1->2, 2->0,
	3817	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3818	* the longest word (4 above), a list of all words, by
	3819	* following the list of prev pointers; this gives us the
	3820	* unordered list 4,5,1,2. Then given the current word we have
	3821	* just tried, we can go through the list and find the
	3822	* next-biggest word to try (so if we just failed on word 2,
	3823	* the next in the list is 4).
	3824	*
	3825	* Since at runtime we don't record the matching position in
	3826	* the string for each word, we have to work that out for
	3827	* each word we're about to process. The wordinfo table holds
	3828	* the character length of each word; given that we recorded
	3829	* at the start: the position of the shortest word and its
	3830	* length in chars, we just need to move the pointer the
	3831	* difference between the two char lengths. Depending on
	3832	* Unicode status and folding, that's cheap or expensive.
	3833	*
	3834	* This algorithm is optimised for the case where are only a
	3835	* small number of accept states, i.e. 0,1, or maybe 2.
	3836	* With lots of accepts states, and having to try all of them,
	3837	* it becomes quadratic on number of accept states to find all
	3838	* the next words.
	3839	*/
	3840
	3841	{
	3842	/* what type of TRIE am I? (utf8 makes this contextual) */
	3843	DECL_TRIE_TYPE(scan);
	3844
	3845	/* what trie are we using right now */
	3846	reg_trie_data * const trie
	3847	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3848	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3849	U32 state = trie->startstate;
	3850
	3851	if ( trie->bitmap
	3852	&& (NEXTCHR_IS_EOS \|\| !TRIE_BITMAP_TEST(trie, nextchr)))
	3853	{
	3854	if (trie->states[ state ].wordnum) {
	3855	DEBUG_EXECUTE_r(
	3856	PerlIO_printf(Perl_debug_log,
	3857	"%*s %smatched empty string...%s\n",
	3858	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3859	);
	3860	if (!trie->jump)
	3861	break;
	3862	} else {
	3863	DEBUG_EXECUTE_r(
	3864	PerlIO_printf(Perl_debug_log,
	3865	"%*s %sfailed to match trie start class...%s\n",
	3866	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3867	);
	3868	sayNO_SILENT;
	3869	}
	3870	}
	3871
	3872	{
	3873	U8 uc = ( U8 )locinput;
	3874
	3875	STRLEN len = 0;
	3876	STRLEN foldlen = 0;
	3877	U8 uscan = (U8)NULL;
	3878	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3879	U32 charcount = 0; /* how many input chars we have matched */
	3880	U32 accepted = 0; /* have we seen any accepting states? */
	3881
	3882	ST.jump = trie->jump;
	3883	ST.me = scan;
	3884	ST.firstpos = NULL;
	3885	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3886	ST.nextword = 0;
	3887
	3888	/* fully traverse the TRIE; note the position of the
	3889	shortest accept state and the wordnum of the longest
	3890	accept state */
	3891
	3892	while ( state && uc <= (U8*)PL_regeol ) {
	3893	U32 base = trie->states[ state ].trans.base;
	3894	UV uvc = 0;
	3895	U16 charid = 0;
	3896	U16 wordnum;
	3897	wordnum = trie->states[ state ].wordnum;
	3898
	3899	if (wordnum) { /* it's an accept state */
	3900	if (!accepted) {
	3901	accepted = 1;
	3902	/* record first match position */
	3903	if (ST.longfold) {
	3904	ST.firstpos = (U8*)locinput;
	3905	ST.firstchars = 0;
	3906	}
	3907	else {
	3908	ST.firstpos = uc;
	3909	ST.firstchars = charcount;
	3910	}
	3911	}
	3912	if (!ST.nextword \|\| wordnum < ST.nextword)
	3913	ST.nextword = wordnum;
	3914	ST.topword = wordnum;
	3915	}
	3916
	3917	DEBUG_TRIE_EXECUTE_r({
	3918	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3919	PerlIO_printf( Perl_debug_log,
	3920	"%*s %sState: %4"UVxf" Accepted: %c ",
	3921	2+depth * 2, "", PL_colors[4],
	3922	(UV)state, (accepted ? 'Y' : 'N'));
	3923	});
	3924
	3925	/* read a char and goto next state */
	3926	if ( base && (foldlen \|\| uc < (U8*)PL_regeol)) {
	3927	I32 offset;
	3928	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3929	uscan, len, uvc, charid, foldlen,
	3930	foldbuf, uniflags);
	3931	charcount++;
	3932	if (foldlen>0)
	3933	ST.longfold = TRUE;
	3934	if (charid &&
	3935	( ((offset =
	3936	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3937
	3938	&& ((U32)offset < trie->lasttrans)
	3939	&& trie->trans[offset].check == state)
	3940	{
	3941	state = trie->trans[offset].next;
	3942	}
	3943	else {
	3944	state = 0;
	3945	}
	3946	uc += len;
	3947
	3948	}
	3949	else {
	3950	state = 0;
	3951	}
	3952	DEBUG_TRIE_EXECUTE_r(
	3953	PerlIO_printf( Perl_debug_log,
	3954	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3955	charid, uvc, (UV)state, PL_colors[5] );
	3956	);
	3957	}
	3958	if (!accepted)
	3959	sayNO;
	3960
	3961	/* calculate total number of accept states */
	3962	{
	3963	U16 w = ST.topword;
	3964	accepted = 0;
	3965	while (w) {
	3966	w = trie->wordinfo[w].prev;
	3967	accepted++;
	3968	}
	3969	ST.accepted = accepted;
	3970	}
	3971
	3972	DEBUG_EXECUTE_r(
	3973	PerlIO_printf( Perl_debug_log,
	3974	"%*s %sgot %"IVdf" possible matches%s\n",
	3975	REPORT_CODE_OFF + depth * 2, "",
	3976	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3977	);
	3978	goto trie_first_try; /* jump into the fail handler */
	3979	}}
	3980	assert(0); /* NOTREACHED */
	3981
	3982	case TRIE_next_fail: /* we failed - try next alternative */
	3983	{
	3984	U8 *uc;
	3985	if ( ST.jump) {
	3986	REGCP_UNWIND(ST.cp);
	3987	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	3988	}
	3989	if (!--ST.accepted) {
	3990	DEBUG_EXECUTE_r({
	3991	PerlIO_printf( Perl_debug_log,
	3992	"%*s %sTRIE failed...%s\n",
	3993	REPORT_CODE_OFF+depth*2, "",
	3994	PL_colors[4],
	3995	PL_colors[5] );
	3996	});
	3997	sayNO_SILENT;
	3998	}
	3999	{
	4000	/* Find next-highest word to process. Note that this code
	4001	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	4002	U16 min = 0;
	4003	U16 word;
	4004	U16 const nextword = ST.nextword;
	4005	reg_trie_wordinfo * const wordinfo
	4006	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	4007	for (word=ST.topword; word; word=wordinfo[word].prev) {
	4008	if (word > nextword && (!min \|\| word < min))
	4009	min = word;
	4010	}
	4011	ST.nextword = min;
	4012	}
	4013
	4014	trie_first_try:
	4015	if (do_cutgroup) {
	4016	do_cutgroup = 0;
	4017	no_final = 0;
	4018	}
	4019
	4020	if ( ST.jump) {
	4021	ST.lastparen = rex->lastparen;
	4022	ST.lastcloseparen = rex->lastcloseparen;
	4023	REGCP_SET(ST.cp);
	4024	}
	4025
	4026	/* find start char of end of current word */
	4027	{
	4028	U32 chars; /* how many chars to skip */
	4029	reg_trie_data * const trie
	4030	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	4031
	4032	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	4033	>= ST.firstchars);
	4034	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	4035	- ST.firstchars;
	4036	uc = ST.firstpos;
	4037
	4038	if (ST.longfold) {
	4039	/* the hard option - fold each char in turn and find
	4040	* its folded length (which may be different */
	4041	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	4042	STRLEN foldlen;
	4043	STRLEN len;
	4044	UV uvc;
	4045	U8 *uscan;
	4046
	4047	while (chars) {
	4048	if (utf8_target) {
	4049	uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
	4050	uniflags);
	4051	uc += len;
	4052	}
	4053	else {
	4054	uvc = *uc;
	4055	uc++;
	4056	}
	4057	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	4058	uscan = foldbuf;
	4059	while (foldlen) {
	4060	if (!--chars)
	4061	break;
	4062	uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
	4063	uniflags);
	4064	uscan += len;
	4065	foldlen -= len;
	4066	}
	4067	}
	4068	}
	4069	else {
	4070	if (utf8_target)
	4071	while (chars--)
	4072	uc += UTF8SKIP(uc);
	4073	else
	4074	uc += chars;
	4075	}
	4076	}
	4077
	4078	scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
	4079	? ST.jump[ST.nextword]
	4080	: NEXT_OFF(ST.me));
	4081
	4082	DEBUG_EXECUTE_r({
	4083	PerlIO_printf( Perl_debug_log,
	4084	"%*s %sTRIE matched word #%d, continuing%s\n",
	4085	REPORT_CODE_OFF+depth*2, "",
	4086	PL_colors[4],
	4087	ST.nextword,
	4088	PL_colors[5]
	4089	);
	4090	});
	4091
	4092	if (ST.accepted > 1 \|\| has_cutgroup) {
	4093	PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
	4094	assert(0); /* NOTREACHED */
	4095	}
	4096	/* only one choice left - just continue */
	4097	DEBUG_EXECUTE_r({
	4098	AV *const trie_words
	4099	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	4100	SV ** const tmp = av_fetch( trie_words,
	4101	ST.nextword-1, 0 );
	4102	SV *sv= tmp ? sv_newmortal() : NULL;
	4103
	4104	PerlIO_printf( Perl_debug_log,
	4105	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	4106	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	4107	ST.nextword,
	4108	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	4109	PL_colors[0], PL_colors[1],
	4110	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	4111	)
	4112	: "not compiled under -Dr",
	4113	PL_colors[5] );
	4114	});
	4115
	4116	locinput = (char*)uc;
	4117	continue; /* execute rest of RE */
	4118	assert(0); /* NOTREACHED */
	4119	}
	4120	#undef ST
	4121
	4122	case EXACT: { /* /abc/ */
	4123	char *s = STRING(scan);
	4124	ln = STR_LEN(scan);
	4125	if (utf8_target != UTF_PATTERN) {
	4126	/* The target and the pattern have differing utf8ness. */
	4127	char *l = locinput;
	4128	const char * const e = s + ln;
	4129
	4130	if (utf8_target) {
	4131	/* The target is utf8, the pattern is not utf8.
	4132	* Above-Latin1 code points can't match the pattern;
	4133	* invariants match exactly, and the other Latin1 ones need
	4134	* to be downgraded to a single byte in order to do the
	4135	* comparison. (If we could be confident that the target
	4136	* is not malformed, this could be refactored to have fewer
	4137	* tests by just assuming that if the first bytes match, it
	4138	* is an invariant, but there are tests in the test suite
	4139	* dealing with (??{...}) which violate this) */
	4140	while (s < e) {
	4141	if (l >= PL_regeol)
	4142	sayNO;
	4143	if (UTF8_IS_ABOVE_LATIN1(* (U8*) l)) {
	4144	sayNO;
	4145	}
	4146	if (UTF8_IS_INVARIANT((U8)l)) {
	4147	if (l != s) {
	4148	sayNO;
	4149	}
	4150	l++;
	4151	}
	4152	else {
	4153	if (TWO_BYTE_UTF8_TO_UNI(l, (l+1)) != * (U8*) s) {
	4154	sayNO;
	4155	}
	4156	l += 2;
	4157	}
	4158	s++;
	4159	}
	4160	}
	4161	else {
	4162	/* The target is not utf8, the pattern is utf8. */
	4163	while (s < e) {
	4164	if (l >= PL_regeol \|\| UTF8_IS_ABOVE_LATIN1(* (U8*) s))
	4165	{
	4166	sayNO;
	4167	}
	4168	if (UTF8_IS_INVARIANT((U8)s)) {
	4169	if (s != l) {
	4170	sayNO;
	4171	}
	4172	s++;
	4173	}
	4174	else {
	4175	if (TWO_BYTE_UTF8_TO_UNI(s, (s+1)) != * (U8*) l) {
	4176	sayNO;
	4177	}
	4178	s += 2;
	4179	}
	4180	l++;
	4181	}
	4182	}
	4183	locinput = l;
	4184	break;
	4185	}
	4186	/* The target and the pattern have the same utf8ness. */
	4187	/* Inline the first character, for speed. */
	4188	if (UCHARAT(s) != nextchr)
	4189	sayNO;
	4190	if (PL_regeol - locinput < ln)
	4191	sayNO;
	4192	if (ln > 1 && memNE(s, locinput, ln))
	4193	sayNO;
	4194	locinput += ln;
	4195	break;
	4196	}
	4197
	4198	case EXACTFL: { /* /abc/il */
	4199	re_fold_t folder;
	4200	const U8 * fold_array;
	4201	const char * s;
	4202	U32 fold_utf8_flags;
	4203
	4204	PL_reg_flags \|= RF_tainted;
	4205	folder = foldEQ_locale;
	4206	fold_array = PL_fold_locale;
	4207	fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
	4208	goto do_exactf;
	4209
	4210	case EXACTFU_SS: /* /\x{df}/iu */
	4211	case EXACTFU_TRICKYFOLD: /* /\x{390}/iu */
	4212	case EXACTFU: /* /abc/iu */
	4213	folder = foldEQ_latin1;
	4214	fold_array = PL_fold_latin1;
	4215	fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	4216	goto do_exactf;
	4217
	4218	case EXACTFA: /* /abc/iaa */
	4219	folder = foldEQ_latin1;
	4220	fold_array = PL_fold_latin1;
	4221	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4222	goto do_exactf;
	4223
	4224	case EXACTF: /* /abc/i */
	4225	folder = foldEQ;
	4226	fold_array = PL_fold;
	4227	fold_utf8_flags = 0;
	4228
	4229	do_exactf:
	4230	s = STRING(scan);
	4231	ln = STR_LEN(scan);
	4232
	4233	if (utf8_target \|\| UTF_PATTERN \|\| state_num == EXACTFU_SS) {
	4234	/* Either target or the pattern are utf8, or has the issue where
	4235	* the fold lengths may differ. */
	4236	const char * const l = locinput;
	4237	char *e = PL_regeol;
	4238
	4239	if (! foldEQ_utf8_flags(s, 0, ln, cBOOL(UTF_PATTERN),
	4240	l, &e, 0, utf8_target, fold_utf8_flags))
	4241	{
	4242	sayNO;
	4243	}
	4244	locinput = e;
	4245	break;
	4246	}
	4247
	4248	/* Neither the target nor the pattern are utf8 */
	4249	if (UCHARAT(s) != nextchr
	4250	&& !NEXTCHR_IS_EOS
	4251	&& UCHARAT(s) != fold_array[nextchr])
	4252	{
	4253	sayNO;
	4254	}
	4255	if (PL_regeol - locinput < ln)
	4256	sayNO;
	4257	if (ln > 1 && ! folder(s, locinput, ln))
	4258	sayNO;
	4259	locinput += ln;
	4260	break;
	4261	}
	4262
	4263	/* XXX Could improve efficiency by separating these all out using a
	4264	* macro or in-line function. At that point regcomp.c would no longer
	4265	* have to set the FLAGS fields of these */
	4266	case BOUNDL: /* /\b/l */
	4267	case NBOUNDL: /* /\B/l */
	4268	PL_reg_flags \|= RF_tainted;
	4269	/* FALL THROUGH */
	4270	case BOUND: /* /\b/ */
	4271	case BOUNDU: /* /\b/u */
	4272	case BOUNDA: /* /\b/a */
	4273	case NBOUND: /* /\B/ */
	4274	case NBOUNDU: /* /\B/u */
	4275	case NBOUNDA: /* /\B/a */
	4276	/* was last char in word? */
	4277	if (utf8_target
	4278	&& FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
	4279	&& FLAGS(scan) != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
	4280	{
	4281	if (locinput == PL_bostr)
	4282	ln = '\n';
	4283	else {
	4284	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	4285
	4286	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	4287	}
	4288	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	4289	ln = isALNUM_uni(ln);
	4290	if (NEXTCHR_IS_EOS)
	4291	n = 0;
	4292	else {
	4293	LOAD_UTF8_CHARCLASS_ALNUM();
	4294	n = swash_fetch(PL_utf8_alnum, (U8*)locinput,
	4295	utf8_target);
	4296	}
	4297	}
	4298	else {
	4299	ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
	4300	n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC_utf8((U8*)locinput);
	4301	}
	4302	}
	4303	else {
	4304
	4305	/* Here the string isn't utf8, or is utf8 and only ascii
	4306	* characters are to match \w. In the latter case looking at
	4307	* the byte just prior to the current one may be just the final
	4308	* byte of a multi-byte character. This is ok. There are two
	4309	* cases:
	4310	* 1) it is a single byte character, and then the test is doing
	4311	* just what it's supposed to.
	4312	* 2) it is a multi-byte character, in which case the final
	4313	* byte is never mistakable for ASCII, and so the test
	4314	* will say it is not a word character, which is the
	4315	* correct answer. */
	4316	ln = (locinput != PL_bostr) ?
	4317	UCHARAT(locinput - 1) : '\n';
	4318	switch (FLAGS(scan)) {
	4319	case REGEX_UNICODE_CHARSET:
	4320	ln = isWORDCHAR_L1(ln);
	4321	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
	4322	break;
	4323	case REGEX_LOCALE_CHARSET:
	4324	ln = isALNUM_LC(ln);
	4325	n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC(nextchr);
	4326	break;
	4327	case REGEX_DEPENDS_CHARSET:
	4328	ln = isALNUM(ln);
	4329	n = NEXTCHR_IS_EOS ? 0 : isALNUM(nextchr);
	4330	break;
	4331	case REGEX_ASCII_RESTRICTED_CHARSET:
	4332	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	4333	ln = isWORDCHAR_A(ln);
	4334	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_A(nextchr);
	4335	break;
	4336	default:
	4337	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	4338	break;
	4339	}
	4340	}
	4341	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	4342	* regcomp.sym */
	4343	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	4344	sayNO;
	4345	break;
	4346
	4347	case ANYOF: /* /[abc]/ */
	4348	if (NEXTCHR_IS_EOS)
	4349	sayNO;
	4350	if (utf8_target) {
	4351	if (!reginclass(rex, scan, (U8*)locinput, utf8_target))
	4352	sayNO;
	4353	locinput += UTF8SKIP(locinput);
	4354	break;
	4355	}
	4356	else {
	4357	if (!REGINCLASS(rex, scan, (U8*)locinput))
	4358	sayNO;
	4359	locinput++;
	4360	break;
	4361	}
	4362	break;
	4363
	4364	/* Special char classes: \d, \w etc.
	4365	* The defines start on line 166 or so */
	4366	CCC_TRY_U(ALNUM, NALNUM, isWORDCHAR,
	4367	ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
	4368	ALNUMU, NALNUMU, isWORDCHAR_L1,
	4369	ALNUMA, NALNUMA, isWORDCHAR_A,
	4370	alnum, "a");
	4371
	4372	case SPACEL:
	4373	PL_reg_flags \|= RF_tainted;
	4374	if (NEXTCHR_IS_EOS) {
	4375	sayNO;
	4376	}
	4377	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {
	4378	if (! isSPACE_LC_utf8((U8 *) locinput)) {
	4379	sayNO;
	4380	}
	4381	}
	4382	else if (! isSPACE_LC((U8) nextchr)) {
	4383	sayNO;
	4384	}
	4385	goto increment_locinput;
	4386
	4387	case NSPACEL:
	4388	PL_reg_flags \|= RF_tainted;
	4389	if (NEXTCHR_IS_EOS) {
	4390	sayNO;
	4391	}
	4392	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {
	4393	if (isSPACE_LC_utf8((U8 *) locinput)) {
	4394	sayNO;
	4395	}
	4396	}
	4397	else if (isSPACE_LC(nextchr)) {
	4398	sayNO;
	4399	}
	4400	goto increment_locinput;
	4401
	4402	case SPACE:
	4403	if (utf8_target) {
	4404	goto utf8_space;
	4405	}
	4406	/* FALL THROUGH */
	4407	case SPACEA:
	4408	if (NEXTCHR_IS_EOS \|\| ! isSPACE_A(nextchr)) {
	4409	sayNO;
	4410	}
	4411	/* Matched a utf8-invariant, so don't have to worry about utf8 */
	4412	locinput++;
	4413	break;
	4414
	4415	case NSPACE:
	4416	if (utf8_target) {
	4417	goto utf8_nspace;
	4418	}
	4419	/* FALL THROUGH */
	4420	case NSPACEA:
	4421	if (NEXTCHR_IS_EOS \|\| isSPACE_A(nextchr)) {
	4422	sayNO;
	4423	}
	4424	goto increment_locinput;
	4425
	4426	case SPACEU:
	4427	utf8_space:
	4428	if (NEXTCHR_IS_EOS \|\| ! is_XPERLSPACE(locinput, utf8_target)) {
	4429	sayNO;
	4430	}
	4431	goto increment_locinput;
	4432
	4433	case NSPACEU:
	4434	utf8_nspace:
	4435	if (NEXTCHR_IS_EOS \|\| is_XPERLSPACE(locinput, utf8_target)) {
	4436	sayNO;
	4437	}
	4438	goto increment_locinput;
	4439
	4440	CCC_TRY(DIGIT, NDIGIT, isDIGIT,
	4441	DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
	4442	DIGITA, NDIGITA, isDIGIT_A,
	4443	digit, "0");
	4444
	4445	case POSIXA: /* /[[:ascii:]]/ etc */
	4446	if (NEXTCHR_IS_EOS \|\| ! _generic_isCC_A(nextchr, FLAGS(scan))) {
	4447	sayNO;
	4448	}
	4449	/* Matched a utf8-invariant, so don't have to worry about utf8 */
	4450	locinput++;
	4451	break;
	4452
	4453	case NPOSIXA: /* /[^[:ascii:]]/ etc */
	4454	if (NEXTCHR_IS_EOS \|\| _generic_isCC_A(nextchr, FLAGS(scan))) {
	4455	sayNO;
	4456	}
	4457	goto increment_locinput;
	4458
	4459	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	4460	a Unicode extended Grapheme Cluster */
	4461	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	4462	extended Grapheme Cluster is:
	4463
	4464	CR LF
	4465	\| Prepend* Begin Extend*
	4466	\| .
	4467
	4468	Begin is: ( Special_Begin \| ! Control )
	4469	Special_Begin is: ( Regional-Indicator+ \| Hangul-syllable )
	4470	Extend is: ( Grapheme_Extend \| Spacing_Mark )
	4471	Control is: [ GCB_Control \| CR \| LF ]
	4472	Hangul-syllable is: ( T+ \| ( L* ( L \| ( LVT \| ( V \| LV ) V* ) T* ) ))
	4473
	4474	If we create a 'Regular_Begin' = Begin - Special_Begin, then
	4475	we can rewrite
	4476
	4477	Begin is ( Regular_Begin + Special Begin )
	4478
	4479	It turns out that 98.4% of all Unicode code points match
	4480	Regular_Begin. Doing it this way eliminates a table match in
	4481	the previous implementation for almost all Unicode code points.
	4482
	4483	There is a subtlety with Prepend* which showed up in testing.
	4484	Note that the Begin, and only the Begin is required in:
	4485	\| Prepend* Begin Extend*
	4486	Also, Begin contains '! Control'. A Prepend must be a
	4487	'! Control', which means it must also be a Begin. What it
	4488	comes down to is that if we match Prepend* and then find no
	4489	suitable Begin afterwards, that if we backtrack the last
	4490	Prepend, that one will be a suitable Begin.
	4491	*/
	4492
	4493	if (NEXTCHR_IS_EOS)
	4494	sayNO;
	4495	if (! utf8_target) {
	4496
	4497	/* Match either CR LF or '.', as all the other possibilities
	4498	* require utf8 */
	4499	locinput++; /* Match the . or CR */
	4500	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	4501	match the LF */
	4502	&& locinput < PL_regeol
	4503	&& UCHARAT(locinput) == '\n')
	4504	{
	4505	locinput++;
	4506	}
	4507	}
	4508	else {
	4509
	4510	/* Utf8: See if is ( CR LF ); already know that locinput <
	4511	* PL_regeol, so locinput+1 is in bounds */
	4512	if ( nextchr == '\r' && locinput+1 < PL_regeol
	4513	&& UCHARAT(locinput + 1) == '\n')
	4514	{
	4515	locinput += 2;
	4516	}
	4517	else {
	4518	STRLEN len;
	4519
	4520	/* In case have to backtrack to beginning, then match '.' */
	4521	char *starting = locinput;
	4522
	4523	/* In case have to backtrack the last prepend */
	4524	char *previous_prepend = NULL;
	4525
	4526	LOAD_UTF8_CHARCLASS_GCB();
	4527
	4528	/* Match (prepend)* */
	4529	while (locinput < PL_regeol
	4530	&& (len = is_GCB_Prepend_utf8(locinput)))
	4531	{
	4532	previous_prepend = locinput;
	4533	locinput += len;
	4534	}
	4535
	4536	/* As noted above, if we matched a prepend character, but
	4537	* the next thing won't match, back off the last prepend we
	4538	* matched, as it is guaranteed to match the begin */
	4539	if (previous_prepend
	4540	&& (locinput >= PL_regeol
	4541	\|\| (! swash_fetch(PL_utf8_X_regular_begin,
	4542	(U8*)locinput, utf8_target)
	4543	&& ! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)))
	4544	)
	4545	{
	4546	locinput = previous_prepend;
	4547	}
	4548
	4549	/* Note that here we know PL_regeol > locinput, as we
	4550	* tested that upon input to this switch case, and if we
	4551	* moved locinput forward, we tested the result just above
	4552	* and it either passed, or we backed off so that it will
	4553	* now pass */
	4554	if (swash_fetch(PL_utf8_X_regular_begin,
	4555	(U8*)locinput, utf8_target)) {
	4556	locinput += UTF8SKIP(locinput);
	4557	}
	4558	else if (! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)) {
	4559
	4560	/* Here did not match the required 'Begin' in the
	4561	* second term. So just match the very first
	4562	* character, the '.' of the final term of the regex */
	4563	locinput = starting + UTF8SKIP(starting);
	4564	goto exit_utf8;
	4565	} else {
	4566
	4567	/* Here is a special begin. It can be composed of
	4568	* several individual characters. One possibility is
	4569	* RI+ */
	4570	if ((len = is_GCB_RI_utf8(locinput))) {
	4571	locinput += len;
	4572	while (locinput < PL_regeol
	4573	&& (len = is_GCB_RI_utf8(locinput)))
	4574	{
	4575	locinput += len;
	4576	}
	4577	} else if ((len = is_GCB_T_utf8(locinput))) {
	4578	/* Another possibility is T+ */
	4579	locinput += len;
	4580	while (locinput < PL_regeol
	4581	&& (len = is_GCB_T_utf8(locinput)))
	4582	{
	4583	locinput += len;
	4584	}
	4585	} else {
	4586
	4587	/* Here, neither RI+ nor T+; must be some other
	4588	* Hangul. That means it is one of the others: L,
	4589	* LV, LVT or V, and matches:
	4590	* L* (L \| LVT T* \| V * V* T* \| LV V* T) /
	4591
	4592	/* Match L* */
	4593	while (locinput < PL_regeol
	4594	&& (len = is_GCB_L_utf8(locinput)))
	4595	{
	4596	locinput += len;
	4597	}
	4598
	4599	/* Here, have exhausted L*. If the next character
	4600	* is not an LV, LVT nor V, it means we had to have
	4601	* at least one L, so matches L+ in the original
	4602	* equation, we have a complete hangul syllable.
	4603	* Are done. */
	4604
	4605	if (locinput < PL_regeol
	4606	&& is_GCB_LV_LVT_V_utf8(locinput))
	4607	{
	4608	/* Otherwise keep going. Must be LV, LVT or V.
	4609	* See if LVT, by first ruling out V, then LV */
	4610	if (! is_GCB_V_utf8(locinput)
	4611	/* All but every TCount one is LV */
	4612	&& (valid_utf8_to_uvchr((U8 *) locinput,
	4613	NULL)
	4614	- SBASE)
	4615	% TCount != 0)
	4616	{
	4617	locinput += UTF8SKIP(locinput);
	4618	} else {
	4619
	4620	/* Must be V or LV. Take it, then match
	4621	* V* */
	4622	locinput += UTF8SKIP(locinput);
	4623	while (locinput < PL_regeol
	4624	&& (len = is_GCB_V_utf8(locinput)))
	4625	{
	4626	locinput += len;
	4627	}
	4628	}
	4629
	4630	/* And any of LV, LVT, or V can be followed
	4631	* by T* */
	4632	while (locinput < PL_regeol
	4633	&& (len = is_GCB_T_utf8(locinput)))
	4634	{
	4635	locinput += len;
	4636	}
	4637	}
	4638	}
	4639	}
	4640
	4641	/* Match any extender */
	4642	while (locinput < PL_regeol
	4643	&& swash_fetch(PL_utf8_X_extend,
	4644	(U8*)locinput, utf8_target))
	4645	{
	4646	locinput += UTF8SKIP(locinput);
	4647	}
	4648	}
	4649	exit_utf8:
	4650	if (locinput > PL_regeol) sayNO;
	4651	}
	4652	break;
	4653
	4654	case NREFFL: /* /\g{name}/il */
	4655	{ /* The capture buffer cases. The ones beginning with N for the
	4656	named buffers just convert to the equivalent numbered and
	4657	pretend they were called as the corresponding numbered buffer
	4658	op. */
	4659	/* don't initialize these in the declaration, it makes C++
	4660	unhappy */
	4661	char *s;
	4662	char type;
	4663	re_fold_t folder;
	4664	const U8 *fold_array;
	4665	UV utf8_fold_flags;
	4666
	4667	PL_reg_flags \|= RF_tainted;
	4668	folder = foldEQ_locale;
	4669	fold_array = PL_fold_locale;
	4670	type = REFFL;
	4671	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4672	goto do_nref;
	4673
	4674	case NREFFA: /* /\g{name}/iaa */
	4675	folder = foldEQ_latin1;
	4676	fold_array = PL_fold_latin1;
	4677	type = REFFA;
	4678	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4679	goto do_nref;
	4680
	4681	case NREFFU: /* /\g{name}/iu */
	4682	folder = foldEQ_latin1;
	4683	fold_array = PL_fold_latin1;
	4684	type = REFFU;
	4685	utf8_fold_flags = 0;
	4686	goto do_nref;
	4687
	4688	case NREFF: /* /\g{name}/i */
	4689	folder = foldEQ;
	4690	fold_array = PL_fold;
	4691	type = REFF;
	4692	utf8_fold_flags = 0;
	4693	goto do_nref;
	4694
	4695	case NREF: /* /\g{name}/ */
	4696	type = REF;
	4697	folder = NULL;
	4698	fold_array = NULL;
	4699	utf8_fold_flags = 0;
	4700	do_nref:
	4701
	4702	/* For the named back references, find the corresponding buffer
	4703	* number */
	4704	n = reg_check_named_buff_matched(rex,scan);
	4705
	4706	if ( ! n ) {
	4707	sayNO;
	4708	}
	4709	goto do_nref_ref_common;
	4710
	4711	case REFFL: /* /\1/il */
	4712	PL_reg_flags \|= RF_tainted;
	4713	folder = foldEQ_locale;
	4714	fold_array = PL_fold_locale;
	4715	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4716	goto do_ref;
	4717
	4718	case REFFA: /* /\1/iaa */
	4719	folder = foldEQ_latin1;
	4720	fold_array = PL_fold_latin1;
	4721	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4722	goto do_ref;
	4723
	4724	case REFFU: /* /\1/iu */
	4725	folder = foldEQ_latin1;
	4726	fold_array = PL_fold_latin1;
	4727	utf8_fold_flags = 0;
	4728	goto do_ref;
	4729
	4730	case REFF: /* /\1/i */
	4731	folder = foldEQ;
	4732	fold_array = PL_fold;
	4733	utf8_fold_flags = 0;
	4734	goto do_ref;
	4735
	4736	case REF: /* /\1/ */
	4737	folder = NULL;
	4738	fold_array = NULL;
	4739	utf8_fold_flags = 0;
	4740
	4741	do_ref:
	4742	type = OP(scan);
	4743	n = ARG(scan); /* which paren pair */
	4744
	4745	do_nref_ref_common:
	4746	ln = rex->offs[n].start;
	4747	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4748	if (rex->lastparen < n \|\| ln == -1)
	4749	sayNO; /* Do not match unless seen CLOSEn. */
	4750	if (ln == rex->offs[n].end)
	4751	break;
	4752
	4753	s = PL_bostr + ln;
	4754	if (type != REF /* REF can do byte comparison */
	4755	&& (utf8_target \|\| type == REFFU))
	4756	{ /* XXX handle REFFL better */
	4757	char * limit = PL_regeol;
	4758
	4759	/* This call case insensitively compares the entire buffer
	4760	* at s, with the current input starting at locinput, but
	4761	* not going off the end given by PL_regeol, and returns in
	4762	* <limit> upon success, how much of the current input was
	4763	* matched */
	4764	if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
	4765	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	4766	{
	4767	sayNO;
	4768	}
	4769	locinput = limit;
	4770	break;
	4771	}
	4772
	4773	/* Not utf8: Inline the first character, for speed. */
	4774	if (!NEXTCHR_IS_EOS &&
	4775	UCHARAT(s) != nextchr &&
	4776	(type == REF \|\|
	4777	UCHARAT(s) != fold_array[nextchr]))
	4778	sayNO;
	4779	ln = rex->offs[n].end - ln;
	4780	if (locinput + ln > PL_regeol)
	4781	sayNO;
	4782	if (ln > 1 && (type == REF
	4783	? memNE(s, locinput, ln)
	4784	: ! folder(s, locinput, ln)))
	4785	sayNO;
	4786	locinput += ln;
	4787	break;
	4788	}
	4789
	4790	case NOTHING: /* null op; e.g. the 'nothing' following
	4791	* the '' in m{(a+\|b)}' */
	4792	break;
	4793	case TAIL: /* placeholder while compiling (A\|B\|C) */
	4794	break;
	4795
	4796	case BACK: /* ??? doesn't appear to be used ??? */
	4797	break;
	4798
	4799	#undef ST
	4800	#define ST st->u.eval
	4801	{
	4802	SV *ret;
	4803	REGEXP *re_sv;
	4804	regexp *re;
	4805	regexp_internal *rei;
	4806	regnode *startpoint;
	4807
	4808	case GOSTART: /* (?R) */
	4809	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4810	if (cur_eval && cur_eval->locinput==locinput) {
	4811	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4812	Perl_croak(aTHX_ "Infinite recursion in regex");
	4813	if ( ++nochange_depth > max_nochange_depth )
	4814	Perl_croak(aTHX_
	4815	"Pattern subroutine nesting without pos change"
	4816	" exceeded limit in regex");
	4817	} else {
	4818	nochange_depth = 0;
	4819	}
	4820	re_sv = rex_sv;
	4821	re = rex;
	4822	rei = rexi;
	4823	if (OP(scan)==GOSUB) {
	4824	startpoint = scan + ARG2L(scan);
	4825	ST.close_paren = ARG(scan);
	4826	} else {
	4827	startpoint = rei->program+1;
	4828	ST.close_paren = 0;
	4829	}
	4830	goto eval_recurse_doit;
	4831	assert(0); /* NOTREACHED */
	4832
	4833	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4834	if (cur_eval && cur_eval->locinput==locinput) {
	4835	if ( ++nochange_depth > max_nochange_depth )
	4836	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4837	} else {
	4838	nochange_depth = 0;
	4839	}
	4840	{
	4841	/* execute the code in the {...} */
	4842
	4843	dSP;
	4844	IV before;
	4845	OP * const oop = PL_op;
	4846	COP * const ocurcop = PL_curcop;
	4847	OP *nop;
	4848	char *saved_regeol = PL_regeol;
	4849	struct re_save_state saved_state;
	4850	CV *newcv;
	4851
	4852	/* save all paren positions */
	4853	regcppush(rex, 0, maxopenparen);
	4854	REGCP_SET(runops_cp);
	4855
	4856	/* To not corrupt the existing regex state while executing the
	4857	* eval we would normally put it on the save stack, like with
	4858	* save_re_context. However, re-evals have a weird scoping so we
	4859	* can't just add ENTER/LEAVE here. With that, things like
	4860	*
	4861	* (?{$a=2})(a(?{local$a=$a+1}))aakc(?{$b=$a})
	4862	*
	4863	* would break, as they expect the localisation to be unwound
	4864	* only when the re-engine backtracks through the bit that
	4865	* localised it.
	4866	*
	4867	* What we do instead is just saving the state in a local c
	4868	* variable.
	4869	*/
	4870	Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
	4871
	4872	PL_reg_state.re_reparsing = FALSE;
	4873
	4874	if (!caller_cv)
	4875	caller_cv = find_runcv(NULL);
	4876
	4877	n = ARG(scan);
	4878
	4879	if (rexi->data->what[n] == 'r') { /* code from an external qr */
	4880	newcv = (ReANY(
	4881	(REGEXP*)(rexi->data->data[n])
	4882	))->qr_anoncv
	4883	;
	4884	nop = (OP*)rexi->data->data[n+1];
	4885	}
	4886	else if (rexi->data->what[n] == 'l') { /* literal code */
	4887	newcv = caller_cv;
	4888	nop = (OP*)rexi->data->data[n];
	4889	assert(CvDEPTH(newcv));
	4890	}
	4891	else {
	4892	/* literal with own CV */
	4893	assert(rexi->data->what[n] == 'L');
	4894	newcv = rex->qr_anoncv;
	4895	nop = (OP*)rexi->data->data[n];
	4896	}
	4897
	4898	/* normally if we're about to execute code from the same
	4899	* CV that we used previously, we just use the existing
	4900	* CX stack entry. However, its possible that in the
	4901	* meantime we may have backtracked, popped from the save
	4902	* stack, and undone the SAVECOMPPAD(s) associated with
	4903	* PUSH_MULTICALL; in which case PL_comppad no longer
	4904	* points to newcv's pad. */
	4905	if (newcv != last_pushed_cv \|\| PL_comppad != last_pad)
	4906	{
	4907	I32 depth = (newcv == caller_cv) ? 0 : 1;
	4908	if (last_pushed_cv) {
	4909	CHANGE_MULTICALL_WITHDEPTH(newcv, depth);
	4910	}
	4911	else {
	4912	PUSH_MULTICALL_WITHDEPTH(newcv, depth);
	4913	}
	4914	last_pushed_cv = newcv;
	4915	}
	4916	else {
	4917	/* these assignments are just to silence compiler
	4918	* warnings */
	4919	multicall_cop = NULL;
	4920	newsp = NULL;
	4921	}
	4922	last_pad = PL_comppad;
	4923
	4924	/* the initial nextstate you would normally execute
	4925	* at the start of an eval (which would cause error
	4926	* messages to come from the eval), may be optimised
	4927	* away from the execution path in the regex code blocks;
	4928	* so manually set PL_curcop to it initially */
	4929	{
	4930	OP *o = cUNOPx(nop)->op_first;
	4931	assert(o->op_type == OP_NULL);
	4932	if (o->op_targ == OP_SCOPE) {
	4933	o = cUNOPo->op_first;
	4934	}
	4935	else {
	4936	assert(o->op_targ == OP_LEAVE);
	4937	o = cUNOPo->op_first;
	4938	assert(o->op_type == OP_ENTER);
	4939	o = o->op_sibling;
	4940	}
	4941
	4942	if (o->op_type != OP_STUB) {
	4943	assert( o->op_type == OP_NEXTSTATE
	4944	\|\| o->op_type == OP_DBSTATE
	4945	\|\| (o->op_type == OP_NULL
	4946	&& ( o->op_targ == OP_NEXTSTATE
	4947	\|\| o->op_targ == OP_DBSTATE
	4948	)
	4949	)
	4950	);
	4951	PL_curcop = (COP*)o;
	4952	}
	4953	}
	4954	nop = nop->op_next;
	4955
	4956	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	4957	" re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) );
	4958
	4959	rex->offs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	4960
	4961	if (sv_yes_mark) {
	4962	SV *sv_mrk = get_sv("REGMARK", 1);
	4963	sv_setsv(sv_mrk, sv_yes_mark);
	4964	}
	4965
	4966	/* we don't use MULTICALL here as we want to call the
	4967	* first op of the block of interest, rather than the
	4968	* first op of the sub */
	4969	before = (IV)(SP-PL_stack_base);
	4970	PL_op = nop;
	4971	CALLRUNOPS(aTHX); /* Scalar context. */
	4972	SPAGAIN;
	4973	if ((IV)(SP-PL_stack_base) == before)
	4974	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	4975	else {
	4976	ret = POPs;
	4977	PUTBACK;
	4978	}
	4979
	4980	/* before restoring everything, evaluate the returned
	4981	* value, so that 'uninit' warnings don't use the wrong
	4982	* PL_op or pad. Also need to process any magic vars
	4983	* (e.g. $1) before parentheses are restored */
	4984
	4985	PL_op = NULL;
	4986
	4987	re_sv = NULL;
	4988	if (logical == 0) /* (?{})/ */
	4989	sv_setsv(save_scalar(PL_replgv), ret); /* $^R */
	4990	else if (logical == 1) { /* /(?(?{...})X\|Y)/ */
	4991	sw = cBOOL(SvTRUE(ret));
	4992	logical = 0;
	4993	}
	4994	else { /* /(??{}) */
	4995	/* if its overloaded, let the regex compiler handle
	4996	* it; otherwise extract regex, or stringify */
	4997	if (!SvAMAGIC(ret)) {
	4998	SV *sv = ret;
	4999	if (SvROK(sv))
	5000	sv = SvRV(sv);
	5001	if (SvTYPE(sv) == SVt_REGEXP)
	5002	re_sv = (REGEXP*) sv;
	5003	else if (SvSMAGICAL(sv)) {
	5004	MAGIC *mg = mg_find(sv, PERL_MAGIC_qr);
	5005	if (mg)
	5006	re_sv = (REGEXP *) mg->mg_obj;
	5007	}
	5008
	5009	/* force any magic, undef warnings here */
	5010	if (!re_sv) {
	5011	ret = sv_mortalcopy(ret);
	5012	(void) SvPV_force_nolen(ret);
	5013	}
	5014	}
	5015
	5016	}
	5017
	5018	Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
	5019
	5020	/* *** Note that at this point we don't restore
	5021	* PL_comppad, (or pop the CxSUB) on the assumption it may
	5022	* be used again soon. This is safe as long as nothing
	5023	* in the regexp code uses the pad ! */
	5024	PL_op = oop;
	5025	PL_curcop = ocurcop;
	5026	PL_regeol = saved_regeol;
	5027	S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
	5028
	5029	if (logical != 2)
	5030	break;
	5031	}
	5032
	5033	/* only /(??{})/ from now on */
	5034	logical = 0;
	5035	{
	5036	/* extract RE object from returned value; compiling if
	5037	* necessary */
	5038
	5039	if (re_sv) {
	5040	re_sv = reg_temp_copy(NULL, re_sv);
	5041	}
	5042	else {
	5043	U32 pm_flags = 0;
	5044
	5045	if (SvUTF8(ret) && IN_BYTES) {
	5046	/* In use 'bytes': make a copy of the octet
	5047	* sequence, but without the flag on */
	5048	STRLEN len;
	5049	const char *const p = SvPV(ret, len);
	5050	ret = newSVpvn_flags(p, len, SVs_TEMP);
	5051	}
	5052	if (rex->intflags & PREGf_USE_RE_EVAL)
	5053	pm_flags \|= PMf_USE_RE_EVAL;
	5054
	5055	/* if we got here, it should be an engine which
	5056	* supports compiling code blocks and stuff */
	5057	assert(rex->engine && rex->engine->op_comp);
	5058	assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
	5059	re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
	5060	rex->engine, NULL, NULL,
	5061	/* copy /msix etc to inner pattern */
	5062	scan->flags,
	5063	pm_flags);
	5064
	5065	if (!(SvFLAGS(ret)
	5066	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	5067	\| SVs_GMG))) {
	5068	/* This isn't a first class regexp. Instead, it's
	5069	caching a regexp onto an existing, Perl visible
	5070	scalar. */
	5071	sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
	5072	}
	5073	/* safe to do now that any $1 etc has been
	5074	* interpolated into the new pattern string and
	5075	* compiled */
	5076	S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
	5077	}
	5078	SAVEFREESV(re_sv);
	5079	re = ReANY(re_sv);
	5080	}
	5081	RXp_MATCH_COPIED_off(re);
	5082	re->subbeg = rex->subbeg;
	5083	re->sublen = rex->sublen;
	5084	re->suboffset = rex->suboffset;
	5085	re->subcoffset = rex->subcoffset;
	5086	rei = RXi_GET(re);
	5087	DEBUG_EXECUTE_r(
	5088	debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
	5089	"Matching embedded");
	5090	);
	5091	startpoint = rei->program + 1;
	5092	ST.close_paren = 0; /* only used for GOSUB */
	5093
	5094	eval_recurse_doit: /* Share code with GOSUB below this line */
	5095	/* run the pattern returned from (??{...}) */
	5096
	5097	/* Save all the positions. */
	5098	ST.cp = regcppush(rex, 0, maxopenparen);
	5099	REGCP_SET(ST.lastcp);
	5100
	5101	re->lastparen = 0;
	5102	re->lastcloseparen = 0;
	5103
	5104	maxopenparen = 0;
	5105
	5106	/* XXXX This is too dramatic a measure... */
	5107	PL_reg_maxiter = 0;
	5108
	5109	ST.toggle_reg_flags = PL_reg_flags;
	5110	if (RX_UTF8(re_sv))
	5111	PL_reg_flags \|= RF_utf8;
	5112	else
	5113	PL_reg_flags &= ~RF_utf8;
	5114	ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
	5115
	5116	ST.prev_rex = rex_sv;
	5117	ST.prev_curlyx = cur_curlyx;
	5118	rex_sv = re_sv;
	5119	SET_reg_curpm(rex_sv);
	5120	rex = re;
	5121	rexi = rei;
	5122	cur_curlyx = NULL;
	5123	ST.B = next;
	5124	ST.prev_eval = cur_eval;
	5125	cur_eval = st;
	5126	/* now continue from first node in postoned RE */
	5127	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
	5128	assert(0); /* NOTREACHED */
	5129	}
	5130
	5131	case EVAL_AB: /* cleanup after a successful (??{A})B */
	5132	/* note: this is called twice; first after popping B, then A */
	5133	PL_reg_flags ^= ST.toggle_reg_flags;
	5134	rex_sv = ST.prev_rex;
	5135	SET_reg_curpm(rex_sv);
	5136	rex = ReANY(rex_sv);
	5137	rexi = RXi_GET(rex);
	5138	regcpblow(ST.cp);
	5139	cur_eval = ST.prev_eval;
	5140	cur_curlyx = ST.prev_curlyx;
	5141
	5142	/* XXXX This is too dramatic a measure... */
	5143	PL_reg_maxiter = 0;
	5144	if ( nochange_depth )
	5145	nochange_depth--;
	5146	sayYES;
	5147
	5148
	5149	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	5150	/* note: this is called twice; first after popping B, then A */
	5151	PL_reg_flags ^= ST.toggle_reg_flags;
	5152	rex_sv = ST.prev_rex;
	5153	SET_reg_curpm(rex_sv);
	5154	rex = ReANY(rex_sv);
	5155	rexi = RXi_GET(rex);
	5156
	5157	REGCP_UNWIND(ST.lastcp);
	5158	regcppop(rex, &maxopenparen);
	5159	cur_eval = ST.prev_eval;
	5160	cur_curlyx = ST.prev_curlyx;
	5161	/* XXXX This is too dramatic a measure... */
	5162	PL_reg_maxiter = 0;
	5163	if ( nochange_depth )
	5164	nochange_depth--;
	5165	sayNO_SILENT;
	5166	#undef ST
	5167
	5168	case OPEN: /* ( */
	5169	n = ARG(scan); /* which paren pair */
	5170	rex->offs[n].start_tmp = locinput - PL_bostr;
	5171	if (n > maxopenparen)
	5172	maxopenparen = n;
	5173	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	5174	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf" tmp; maxopenparen=%"UVuf"\n",
	5175	PTR2UV(rex),
	5176	PTR2UV(rex->offs),
	5177	(UV)n,
	5178	(IV)rex->offs[n].start_tmp,
	5179	(UV)maxopenparen
	5180	));
	5181	lastopen = n;
	5182	break;
	5183
	5184	/* XXX really need to log other places start/end are set too */
	5185	#define CLOSE_CAPTURE \
	5186	rex->offs[n].start = rex->offs[n].start_tmp; \
	5187	rex->offs[n].end = locinput - PL_bostr; \
	5188	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, \
	5189	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf"..%"IVdf"\n", \
	5190	PTR2UV(rex), \
	5191	PTR2UV(rex->offs), \
	5192	(UV)n, \
	5193	(IV)rex->offs[n].start, \
	5194	(IV)rex->offs[n].end \
	5195	))
	5196
	5197	case CLOSE: /* ) */
	5198	n = ARG(scan); /* which paren pair */
	5199	CLOSE_CAPTURE;
	5200	if (n > rex->lastparen)
	5201	rex->lastparen = n;
	5202	rex->lastcloseparen = n;
	5203	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	5204	goto fake_end;
	5205	}
	5206	break;
	5207
	5208	case ACCEPT: /* (ACCEPT) /
	5209	if (ARG(scan)){
	5210	regnode *cursor;
	5211	for (cursor=scan;
	5212	cursor && OP(cursor)!=END;
	5213	cursor=regnext(cursor))
	5214	{
	5215	if ( OP(cursor)==CLOSE ){
	5216	n = ARG(cursor);
	5217	if ( n <= lastopen ) {
	5218	CLOSE_CAPTURE;
	5219	if (n > rex->lastparen)
	5220	rex->lastparen = n;
	5221	rex->lastcloseparen = n;
	5222	if ( n == ARG(scan) \|\| (cur_eval &&
	5223	cur_eval->u.eval.close_paren == n))
	5224	break;
	5225	}
	5226	}
	5227	}
	5228	}
	5229	goto fake_end;
	5230	/NOTREACHED/
	5231
	5232	case GROUPP: /* (?(1)) */
	5233	n = ARG(scan); /* which paren pair */
	5234	sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
	5235	break;
	5236
	5237	case NGROUPP: /* (?(<name>)) */
	5238	/* reg_check_named_buff_matched returns 0 for no match */
	5239	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	5240	break;
	5241
	5242	case INSUBP: /* (?(R)) */
	5243	n = ARG(scan);
	5244	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	5245	break;
	5246
	5247	case DEFINEP: /* (?(DEFINE)) */
	5248	sw = 0;
	5249	break;
	5250
	5251	case IFTHEN: /* (?(cond)A\|B) */
	5252	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	5253	if (sw)
	5254	next = NEXTOPER(NEXTOPER(scan));
	5255	else {
	5256	next = scan + ARG(scan);
	5257	if (OP(next) == IFTHEN) /* Fake one. */
	5258	next = NEXTOPER(NEXTOPER(next));
	5259	}
	5260	break;
	5261
	5262	case LOGICAL: /* modifier for EVAL and IFMATCH */
	5263	logical = scan->flags;
	5264	break;
	5265
	5266	/*******************************************************************
	5267
	5268	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	5269	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	5270	STAR/PLUS/CURLY/CURLYN are used instead.)
	5271
	5272	A*B is compiled as <CURLYX><A><WHILEM><B>
	5273
	5274	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	5275	state, which contains the current count, initialised to -1. It also sets
	5276	cur_curlyx to point to this state, with any previous value saved in the
	5277	state block.
	5278
	5279	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	5280	since the pattern may possibly match zero times (i.e. it's a while {} loop
	5281	rather than a do {} while loop).
	5282
	5283	Each entry to WHILEM represents a successful match of A. The count in the
	5284	CURLYX block is incremented, another WHILEM state is pushed, and execution
	5285	passes to A or B depending on greediness and the current count.
	5286
	5287	For example, if matching against the string a1a2a3b (where the aN are
	5288	substrings that match /A/), then the match progresses as follows: (the
	5289	pushed states are interspersed with the bits of strings matched so far):
	5290
	5291	<CURLYX cnt=-1>
	5292	<CURLYX cnt=0><WHILEM>
	5293	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	5294	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	5295	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	5296	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	5297
	5298	(Contrast this with something like CURLYM, which maintains only a single
	5299	backtrack state:
	5300
	5301	<CURLYM cnt=0> a1
	5302	a1 <CURLYM cnt=1> a2
	5303	a1 a2 <CURLYM cnt=2> a3
	5304	a1 a2 a3 <CURLYM cnt=3> b
	5305	)
	5306
	5307	Each WHILEM state block marks a point to backtrack to upon partial failure
	5308	of A or B, and also contains some minor state data related to that
	5309	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	5310	overall state, such as the count, and pointers to the A and B ops.
	5311
	5312	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	5313	must always point to the current CURLYX block, the rules are:
	5314
	5315	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	5316	and set cur_curlyx to point the new block.
	5317
	5318	When popping the CURLYX block after a successful or unsuccessful match,
	5319	restore the previous cur_curlyx.
	5320
	5321	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	5322	to the outer one saved in the CURLYX block.
	5323
	5324	When popping the WHILEM block after a successful or unsuccessful B match,
	5325	restore the previous cur_curlyx.
	5326
	5327	Here's an example for the pattern (AI* BI)*BO
	5328	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	5329
	5330	cur_
	5331	curlyx backtrack stack
	5332	------ ---------------
	5333	NULL
	5334	CO <CO prev=NULL> <WO>
	5335	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5336	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5337	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	5338
	5339	At this point the pattern succeeds, and we work back down the stack to
	5340	clean up, restoring as we go:
	5341
	5342	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5343	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5344	CO <CO prev=NULL> <WO>
	5345	NULL
	5346
	5347	*******************************************************************/
	5348
	5349	#define ST st->u.curlyx
	5350
	5351	case CURLYX: /* start of /AB/ (for complex A) /
	5352	{
	5353	/* No need to save/restore up to this paren */
	5354	I32 parenfloor = scan->flags;
	5355
	5356	assert(next); /* keep Coverity happy */
	5357	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	5358	next += ARG(next);
	5359
	5360	/* XXXX Probably it is better to teach regpush to support
	5361	parenfloor > maxopenparen ... */
	5362	if (parenfloor > (I32)rex->lastparen)
	5363	parenfloor = rex->lastparen; /* Pessimization... */
	5364
	5365	ST.prev_curlyx= cur_curlyx;
	5366	cur_curlyx = st;
	5367	ST.cp = PL_savestack_ix;
	5368
	5369	/* these fields contain the state of the current curly.
	5370	* they are accessed by subsequent WHILEMs */
	5371	ST.parenfloor = parenfloor;
	5372	ST.me = scan;
	5373	ST.B = next;
	5374	ST.minmod = minmod;
	5375	minmod = 0;
	5376	ST.count = -1; /* this will be updated by WHILEM */
	5377	ST.lastloc = NULL; /* this will be updated by WHILEM */
	5378
	5379	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
	5380	assert(0); /* NOTREACHED */
	5381	}
	5382
	5383	case CURLYX_end: /* just finished matching all of AB /
	5384	cur_curlyx = ST.prev_curlyx;
	5385	sayYES;
	5386	assert(0); /* NOTREACHED */
	5387
	5388	case CURLYX_end_fail: /* just failed to match all of AB /
	5389	regcpblow(ST.cp);
	5390	cur_curlyx = ST.prev_curlyx;
	5391	sayNO;
	5392	assert(0); /* NOTREACHED */
	5393
	5394
	5395	#undef ST
	5396	#define ST st->u.whilem
	5397
	5398	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	5399	{
	5400	/* see the discussion above about CURLYX/WHILEM */
	5401	I32 n;
	5402	int min = ARG1(cur_curlyx->u.curlyx.me);
	5403	int max = ARG2(cur_curlyx->u.curlyx.me);
	5404	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	5405
	5406	assert(cur_curlyx); /* keep Coverity happy */
	5407	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	5408	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	5409	ST.cache_offset = 0;
	5410	ST.cache_mask = 0;
	5411
	5412
	5413	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5414	"%*s whilem: matched %ld out of %d..%d\n",
	5415	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	5416	);
	5417
	5418	/* First just match a string of min A's. */
	5419
	5420	if (n < min) {
	5421	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5422	maxopenparen);
	5423	cur_curlyx->u.curlyx.lastloc = locinput;
	5424	REGCP_SET(ST.lastcp);
	5425
	5426	PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
	5427	assert(0); /* NOTREACHED */
	5428	}
	5429
	5430	/* If degenerate A matches "", assume A done. */
	5431
	5432	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	5433	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5434	"%*s whilem: empty match detected, trying continuation...\n",
	5435	REPORT_CODE_OFF+depth*2, "")
	5436	);
	5437	goto do_whilem_B_max;
	5438	}
	5439
	5440	/* super-linear cache processing */
	5441
	5442	if (scan->flags) {
	5443
	5444	if (!PL_reg_maxiter) {
	5445	/* start the countdown: Postpone detection until we
	5446	* know the match is not that much linear. */
	5447	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	5448	/* possible overflow for long strings and many CURLYX's */
	5449	if (PL_reg_maxiter < 0)
	5450	PL_reg_maxiter = I32_MAX;
	5451	PL_reg_leftiter = PL_reg_maxiter;
	5452	}
	5453
	5454	if (PL_reg_leftiter-- == 0) {
	5455	/* initialise cache */
	5456	const I32 size = (PL_reg_maxiter + 7)/8;
	5457	if (PL_reg_poscache) {
	5458	if ((I32)PL_reg_poscache_size < size) {
	5459	Renew(PL_reg_poscache, size, char);
	5460	PL_reg_poscache_size = size;
	5461	}
	5462	Zero(PL_reg_poscache, size, char);
	5463	}
	5464	else {
	5465	PL_reg_poscache_size = size;
	5466	Newxz(PL_reg_poscache, size, char);
	5467	}
	5468	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5469	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	5470	PL_colors[4], PL_colors[5])
	5471	);
	5472	}
	5473
	5474	if (PL_reg_leftiter < 0) {
	5475	/* have we already failed at this position? */
	5476	I32 offset, mask;
	5477	offset = (scan->flags & 0xf) - 1
	5478	+ (locinput - PL_bostr) * (scan->flags>>4);
	5479	mask = 1 << (offset % 8);
	5480	offset /= 8;
	5481	if (PL_reg_poscache[offset] & mask) {
	5482	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5483	"%*s whilem: (cache) already tried at this position...\n",
	5484	REPORT_CODE_OFF+depth*2, "")
	5485	);
	5486	sayNO; /* cache records failure */
	5487	}
	5488	ST.cache_offset = offset;
	5489	ST.cache_mask = mask;
	5490	}
	5491	}
	5492
	5493	/* Prefer B over A for minimal matching. */
	5494
	5495	if (cur_curlyx->u.curlyx.minmod) {
	5496	ST.save_curlyx = cur_curlyx;
	5497	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5498	ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor,
	5499	maxopenparen);
	5500	REGCP_SET(ST.lastcp);
	5501	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
	5502	locinput);
	5503	assert(0); /* NOTREACHED */
	5504	}
	5505
	5506	/* Prefer A over B for maximal matching. */
	5507
	5508	if (n < max) { /* More greed allowed? */
	5509	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5510	maxopenparen);
	5511	cur_curlyx->u.curlyx.lastloc = locinput;
	5512	REGCP_SET(ST.lastcp);
	5513	PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
	5514	assert(0); /* NOTREACHED */
	5515	}
	5516	goto do_whilem_B_max;
	5517	}
	5518	assert(0); /* NOTREACHED */
	5519
	5520	case WHILEM_B_min: /* just matched B in a minimal match */
	5521	case WHILEM_B_max: /* just matched B in a maximal match */
	5522	cur_curlyx = ST.save_curlyx;
	5523	sayYES;
	5524	assert(0); /* NOTREACHED */
	5525
	5526	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	5527	cur_curlyx = ST.save_curlyx;
	5528	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5529	cur_curlyx->u.curlyx.count--;
	5530	CACHEsayNO;
	5531	assert(0); /* NOTREACHED */
	5532
	5533	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	5534	/* FALL THROUGH */
	5535	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	5536	REGCP_UNWIND(ST.lastcp);
	5537	regcppop(rex, &maxopenparen);
	5538	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5539	cur_curlyx->u.curlyx.count--;
	5540	CACHEsayNO;
	5541	assert(0); /* NOTREACHED */
	5542
	5543	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	5544	REGCP_UNWIND(ST.lastcp);
	5545	regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
	5546	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5547	"%*s whilem: failed, trying continuation...\n",
	5548	REPORT_CODE_OFF+depth*2, "")
	5549	);
	5550	do_whilem_B_max:
	5551	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5552	&& ckWARN(WARN_REGEXP)
	5553	&& !(PL_reg_flags & RF_warned))
	5554	{
	5555	PL_reg_flags \|= RF_warned;
	5556	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5557	"Complex regular subexpression recursion limit (%d) "
	5558	"exceeded",
	5559	REG_INFTY - 1);
	5560	}
	5561
	5562	/* now try B */
	5563	ST.save_curlyx = cur_curlyx;
	5564	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5565	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
	5566	locinput);
	5567	assert(0); /* NOTREACHED */
	5568
	5569	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	5570	cur_curlyx = ST.save_curlyx;
	5571	REGCP_UNWIND(ST.lastcp);
	5572	regcppop(rex, &maxopenparen);
	5573
	5574	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	5575	/* Maximum greed exceeded */
	5576	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5577	&& ckWARN(WARN_REGEXP)
	5578	&& !(PL_reg_flags & RF_warned))
	5579	{
	5580	PL_reg_flags \|= RF_warned;
	5581	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5582	"Complex regular subexpression recursion "
	5583	"limit (%d) exceeded",
	5584	REG_INFTY - 1);
	5585	}
	5586	cur_curlyx->u.curlyx.count--;
	5587	CACHEsayNO;
	5588	}
	5589
	5590	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5591	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	5592	);
	5593	/* Try grabbing another A and see if it helps. */
	5594	cur_curlyx->u.curlyx.lastloc = locinput;
	5595	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5596	maxopenparen);
	5597	REGCP_SET(ST.lastcp);
	5598	PUSH_STATE_GOTO(WHILEM_A_min,
	5599	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
	5600	locinput);
	5601	assert(0); /* NOTREACHED */
	5602
	5603	#undef ST
	5604	#define ST st->u.branch
	5605
	5606	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	5607	next = scan + ARG(scan);
	5608	if (next == scan)
	5609	next = NULL;
	5610	scan = NEXTOPER(scan);
	5611	/* FALL THROUGH */
	5612
	5613	case BRANCH: /* /(...\|A\|...)/ */
	5614	scan = NEXTOPER(scan); /* scan now points to inner node */
	5615	ST.lastparen = rex->lastparen;
	5616	ST.lastcloseparen = rex->lastcloseparen;
	5617	ST.next_branch = next;
	5618	REGCP_SET(ST.cp);
	5619
	5620	/* Now go into the branch */
	5621	if (has_cutgroup) {
	5622	PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
	5623	} else {
	5624	PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
	5625	}
	5626	assert(0); /* NOTREACHED */
	5627
	5628	case CUTGROUP: /* /(THEN)/ /
	5629	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	5630	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5631	PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
	5632	assert(0); /* NOTREACHED */
	5633
	5634	case CUTGROUP_next_fail:
	5635	do_cutgroup = 1;
	5636	no_final = 1;
	5637	if (st->u.mark.mark_name)
	5638	sv_commit = st->u.mark.mark_name;
	5639	sayNO;
	5640	assert(0); /* NOTREACHED */
	5641
	5642	case BRANCH_next:
	5643	sayYES;
	5644	assert(0); /* NOTREACHED */
	5645
	5646	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	5647	if (do_cutgroup) {
	5648	do_cutgroup = 0;
	5649	no_final = 0;
	5650	}
	5651	REGCP_UNWIND(ST.cp);
	5652	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5653	scan = ST.next_branch;
	5654	/* no more branches? */
	5655	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	5656	DEBUG_EXECUTE_r({
	5657	PerlIO_printf( Perl_debug_log,
	5658	"%*s %sBRANCH failed...%s\n",
	5659	REPORT_CODE_OFF+depth*2, "",
	5660	PL_colors[4],
	5661	PL_colors[5] );
	5662	});
	5663	sayNO_SILENT;
	5664	}
	5665	continue; /* execute next BRANCH[J] op */
	5666	assert(0); /* NOTREACHED */
	5667
	5668	case MINMOD: /* next op will be non-greedy, e.g. A? /
	5669	minmod = 1;
	5670	break;
	5671
	5672	#undef ST
	5673	#define ST st->u.curlym
	5674
	5675	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	5676
	5677	/* This is an optimisation of CURLYX that enables us to push
	5678	* only a single backtracking state, no matter how many matches
	5679	* there are in {m,n}. It relies on the pattern being constant
	5680	* length, with no parens to influence future backrefs
	5681	*/
	5682
	5683	ST.me = scan;
	5684	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5685
	5686	ST.lastparen = rex->lastparen;
	5687	ST.lastcloseparen = rex->lastcloseparen;
	5688
	5689	/* if paren positive, emulate an OPEN/CLOSE around A */
	5690	if (ST.me->flags) {
	5691	U32 paren = ST.me->flags;
	5692	if (paren > maxopenparen)
	5693	maxopenparen = paren;
	5694	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	5695	}
	5696	ST.A = scan;
	5697	ST.B = next;
	5698	ST.alen = 0;
	5699	ST.count = 0;
	5700	ST.minmod = minmod;
	5701	minmod = 0;
	5702	ST.c1 = CHRTEST_UNINIT;
	5703	REGCP_SET(ST.cp);
	5704
	5705	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	5706	goto curlym_do_B;
	5707
	5708	curlym_do_A: /* execute the A in /A{m,n}B/ */
	5709	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
	5710	assert(0); /* NOTREACHED */
	5711
	5712	case CURLYM_A: /* we've just matched an A */
	5713	ST.count++;
	5714	/* after first match, determine A's length: u.curlym.alen */
	5715	if (ST.count == 1) {
	5716	if (PL_reg_match_utf8) {
	5717	char *s = st->locinput;
	5718	while (s < locinput) {
	5719	ST.alen++;
	5720	s += UTF8SKIP(s);
	5721	}
	5722	}
	5723	else {
	5724	ST.alen = locinput - st->locinput;
	5725	}
	5726	if (ST.alen == 0)
	5727	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	5728	}
	5729	DEBUG_EXECUTE_r(
	5730	PerlIO_printf(Perl_debug_log,
	5731	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	5732	(int)(REPORT_CODE_OFF+(depth*2)), "",
	5733	(IV) ST.count, (IV)ST.alen)
	5734	);
	5735
	5736	if (cur_eval && cur_eval->u.eval.close_paren &&
	5737	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5738	goto fake_end;
	5739
	5740	{
	5741	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	5742	if ( max == REG_INFTY \|\| ST.count < max )
	5743	goto curlym_do_A; /* try to match another A */
	5744	}
	5745	goto curlym_do_B; /* try to match B */
	5746
	5747	case CURLYM_A_fail: /* just failed to match an A */
	5748	REGCP_UNWIND(ST.cp);
	5749
	5750	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	5751	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5752	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5753	sayNO;
	5754
	5755	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5756	if (ST.c1 == CHRTEST_UNINIT) {
	5757	/* calculate c1 and c2 for possible match of 1st char
	5758	* following curly */
	5759	ST.c1 = ST.c2 = CHRTEST_VOID;
	5760	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5761	regnode *text_node = ST.B;
	5762	if (! HAS_TEXT(text_node))
	5763	FIND_NEXT_IMPT(text_node);
	5764	/* this used to be
	5765
	5766	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5767
	5768	But the former is redundant in light of the latter.
	5769
	5770	if this changes back then the macro for
	5771	IS_TEXT and friends need to change.
	5772	*/
	5773	if (PL_regkind[OP(text_node)] == EXACT) {
	5774	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	5775	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
	5776	{
	5777	sayNO;
	5778	}
	5779	}
	5780	}
	5781	}
	5782
	5783	DEBUG_EXECUTE_r(
	5784	PerlIO_printf(Perl_debug_log,
	5785	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5786	(int)(REPORT_CODE_OFF+(depth*2)),
	5787	"", (IV)ST.count)
	5788	);
	5789	if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
	5790	if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
	5791	if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	5792	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	5793	{
	5794	/* simulate B failing */
	5795	DEBUG_OPTIMISE_r(
	5796	PerlIO_printf(Perl_debug_log,
	5797	"%*s CURLYM Fast bail next target=U+%"UVXf" c1=U+%"UVXf" c2=U+%"UVXf"\n",
	5798	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5799	valid_utf8_to_uvchr((U8 *) locinput, NULL),
	5800	valid_utf8_to_uvchr(ST.c1_utf8, NULL),
	5801	valid_utf8_to_uvchr(ST.c2_utf8, NULL))
	5802	);
	5803	state_num = CURLYM_B_fail;
	5804	goto reenter_switch;
	5805	}
	5806	}
	5807	else if (nextchr != ST.c1 && nextchr != ST.c2) {
	5808	/* simulate B failing */
	5809	DEBUG_OPTIMISE_r(
	5810	PerlIO_printf(Perl_debug_log,
	5811	"%*s CURLYM Fast bail next target=U+%X c1=U+%X c2=U+%X\n",
	5812	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5813	(int) nextchr, ST.c1, ST.c2)
	5814	);
	5815	state_num = CURLYM_B_fail;
	5816	goto reenter_switch;
	5817	}
	5818	}
	5819
	5820	if (ST.me->flags) {
	5821	/* emulate CLOSE: mark current A as captured */
	5822	I32 paren = ST.me->flags;
	5823	if (ST.count) {
	5824	rex->offs[paren].start
	5825	= HOPc(locinput, -ST.alen) - PL_bostr;
	5826	rex->offs[paren].end = locinput - PL_bostr;
	5827	if ((U32)paren > rex->lastparen)
	5828	rex->lastparen = paren;
	5829	rex->lastcloseparen = paren;
	5830	}
	5831	else
	5832	rex->offs[paren].end = -1;
	5833	if (cur_eval && cur_eval->u.eval.close_paren &&
	5834	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5835	{
	5836	if (ST.count)
	5837	goto fake_end;
	5838	else
	5839	sayNO;
	5840	}
	5841	}
	5842
	5843	PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
	5844	assert(0); /* NOTREACHED */
	5845
	5846	case CURLYM_B_fail: /* just failed to match a B */
	5847	REGCP_UNWIND(ST.cp);
	5848	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5849	if (ST.minmod) {
	5850	I32 max = ARG2(ST.me);
	5851	if (max != REG_INFTY && ST.count == max)
	5852	sayNO;
	5853	goto curlym_do_A; /* try to match a further A */
	5854	}
	5855	/* backtrack one A */
	5856	if (ST.count == ARG1(ST.me) /* min */)
	5857	sayNO;
	5858	ST.count--;
	5859	SET_locinput(HOPc(locinput, -ST.alen));
	5860	goto curlym_do_B; /* try to match B */
	5861
	5862	#undef ST
	5863	#define ST st->u.curly
	5864
	5865	#define CURLY_SETPAREN(paren, success) \
	5866	if (paren) { \
	5867	if (success) { \
	5868	rex->offs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	5869	rex->offs[paren].end = locinput - PL_bostr; \
	5870	if (paren > rex->lastparen) \
	5871	rex->lastparen = paren; \
	5872	rex->lastcloseparen = paren; \
	5873	} \
	5874	else { \
	5875	rex->offs[paren].end = -1; \
	5876	rex->lastparen = ST.lastparen; \
	5877	rex->lastcloseparen = ST.lastcloseparen; \
	5878	} \
	5879	}
	5880
	5881	case STAR: /* /AB/ where A is width 1 char /
	5882	ST.paren = 0;
	5883	ST.min = 0;
	5884	ST.max = REG_INFTY;
	5885	scan = NEXTOPER(scan);
	5886	goto repeat;
	5887
	5888	case PLUS: /* /A+B/ where A is width 1 char */
	5889	ST.paren = 0;
	5890	ST.min = 1;
	5891	ST.max = REG_INFTY;
	5892	scan = NEXTOPER(scan);
	5893	goto repeat;
	5894
	5895	case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
	5896	ST.paren = scan->flags; /* Which paren to set */
	5897	ST.lastparen = rex->lastparen;
	5898	ST.lastcloseparen = rex->lastcloseparen;
	5899	if (ST.paren > maxopenparen)
	5900	maxopenparen = ST.paren;
	5901	ST.min = ARG1(scan); /* min to match */
	5902	ST.max = ARG2(scan); /* max to match */
	5903	if (cur_eval && cur_eval->u.eval.close_paren &&
	5904	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5905	ST.min=1;
	5906	ST.max=1;
	5907	}
	5908	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	5909	goto repeat;
	5910
	5911	case CURLY: /* /A{m,n}B/ where A is width 1 char */
	5912	ST.paren = 0;
	5913	ST.min = ARG1(scan); /* min to match */
	5914	ST.max = ARG2(scan); /* max to match */
	5915	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5916	repeat:
	5917	/*
	5918	* Lookahead to avoid useless match attempts
	5919	* when we know what character comes next.
	5920	*
	5921	* Used to only do .x and .?x, but now it allows
	5922	* for )'s, ('s and (?{ ... })'s to be in the way
	5923	* of the quantifier and the EXACT-like node. -- japhy
	5924	*/
	5925
	5926	assert(ST.min <= ST.max);
	5927	if (! HAS_TEXT(next) && ! JUMPABLE(next)) {
	5928	ST.c1 = ST.c2 = CHRTEST_VOID;
	5929	}
	5930	else {
	5931	regnode *text_node = next;
	5932
	5933	if (! HAS_TEXT(text_node))
	5934	FIND_NEXT_IMPT(text_node);
	5935
	5936	if (! HAS_TEXT(text_node))
	5937	ST.c1 = ST.c2 = CHRTEST_VOID;
	5938	else {
	5939	if ( PL_regkind[OP(text_node)] != EXACT ) {
	5940	ST.c1 = ST.c2 = CHRTEST_VOID;
	5941	}
	5942	else {
	5943
	5944	/* Currently we only get here when
	5945
	5946	PL_rekind[OP(text_node)] == EXACT
	5947
	5948	if this changes back then the macro for IS_TEXT and
	5949	friends need to change. */
	5950	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	5951	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
	5952	{
	5953	sayNO;
	5954	}
	5955	}
	5956	}
	5957	}
	5958
	5959	ST.A = scan;
	5960	ST.B = next;
	5961	if (minmod) {
	5962	char *li = locinput;
	5963	minmod = 0;
	5964	if (ST.min && regrepeat(rex, &li, ST.A, ST.min, depth) < ST.min)
	5965	sayNO;
	5966	SET_locinput(li);
	5967	ST.count = ST.min;
	5968	REGCP_SET(ST.cp);
	5969	if (ST.c1 == CHRTEST_VOID)
	5970	goto curly_try_B_min;
	5971
	5972	ST.oldloc = locinput;
	5973
	5974	/* set ST.maxpos to the furthest point along the
	5975	* string that could possibly match */
	5976	if (ST.max == REG_INFTY) {
	5977	ST.maxpos = PL_regeol - 1;
	5978	if (utf8_target)
	5979	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	5980	ST.maxpos--;
	5981	}
	5982	else if (utf8_target) {
	5983	int m = ST.max - ST.min;
	5984	for (ST.maxpos = locinput;
	5985	m >0 && ST.maxpos < PL_regeol; m--)
	5986	ST.maxpos += UTF8SKIP(ST.maxpos);
	5987	}
	5988	else {
	5989	ST.maxpos = locinput + ST.max - ST.min;
	5990	if (ST.maxpos >= PL_regeol)
	5991	ST.maxpos = PL_regeol - 1;
	5992	}
	5993	goto curly_try_B_min_known;
	5994
	5995	}
	5996	else {
	5997	/* avoid taking address of locinput, so it can remain
	5998	* a register var */
	5999	char *li = locinput;
	6000	ST.count = regrepeat(rex, &li, ST.A, ST.max, depth);
	6001	if (ST.count < ST.min)
	6002	sayNO;
	6003	SET_locinput(li);
	6004	if ((ST.count > ST.min)
	6005	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	6006	{
	6007	/* A{m,n} must come at the end of the string, there's
	6008	* no point in backing off ... */
	6009	ST.min = ST.count;
	6010	/* ...except that $ and \Z can match before and after
	6011	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	6012	We may back off by one in this case. */
	6013	if (UCHARAT(locinput - 1) == '\n' && OP(ST.B) != EOS)
	6014	ST.min--;
	6015	}
	6016	REGCP_SET(ST.cp);
	6017	goto curly_try_B_max;
	6018	}
	6019	assert(0); /* NOTREACHED */
	6020
	6021
	6022	case CURLY_B_min_known_fail:
	6023	/* failed to find B in a non-greedy match where c1,c2 valid */
	6024
	6025	REGCP_UNWIND(ST.cp);
	6026	if (ST.paren) {
	6027	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6028	}
	6029	/* Couldn't or didn't -- move forward. */
	6030	ST.oldloc = locinput;
	6031	if (utf8_target)
	6032	locinput += UTF8SKIP(locinput);
	6033	else
	6034	locinput++;
	6035	ST.count++;
	6036	curly_try_B_min_known:
	6037	/* find the next place where 'B' could work, then call B */
	6038	{
	6039	int n;
	6040	if (utf8_target) {
	6041	n = (ST.oldloc == locinput) ? 0 : 1;
	6042	if (ST.c1 == ST.c2) {
	6043	/* set n to utf8_distance(oldloc, locinput) */
	6044	while (locinput <= ST.maxpos
	6045	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
	6046	{
	6047	locinput += UTF8SKIP(locinput);
	6048	n++;
	6049	}
	6050	}
	6051	else {
	6052	/* set n to utf8_distance(oldloc, locinput) */
	6053	while (locinput <= ST.maxpos
	6054	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	6055	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	6056	{
	6057	locinput += UTF8SKIP(locinput);
	6058	n++;
	6059	}
	6060	}
	6061	}
	6062	else { /* Not utf8_target */
	6063	if (ST.c1 == ST.c2) {
	6064	while (locinput <= ST.maxpos &&
	6065	UCHARAT(locinput) != ST.c1)
	6066	locinput++;
	6067	}
	6068	else {
	6069	while (locinput <= ST.maxpos
	6070	&& UCHARAT(locinput) != ST.c1
	6071	&& UCHARAT(locinput) != ST.c2)
	6072	locinput++;
	6073	}
	6074	n = locinput - ST.oldloc;
	6075	}
	6076	if (locinput > ST.maxpos)
	6077	sayNO;
	6078	if (n) {
	6079	/* In /a{m,n}b/, ST.oldloc is at "a" x m, locinput is
	6080	* at b; check that everything between oldloc and
	6081	* locinput matches */
	6082	char *li = ST.oldloc;
	6083	ST.count += n;
	6084	if (regrepeat(rex, &li, ST.A, n, depth) < n)
	6085	sayNO;
	6086	assert(n == REG_INFTY \|\| locinput == li);
	6087	}
	6088	CURLY_SETPAREN(ST.paren, ST.count);
	6089	if (cur_eval && cur_eval->u.eval.close_paren &&
	6090	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6091	goto fake_end;
	6092	}
	6093	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
	6094	}
	6095	assert(0); /* NOTREACHED */
	6096
	6097
	6098	case CURLY_B_min_fail:
	6099	/* failed to find B in a non-greedy match where c1,c2 invalid */
	6100
	6101	REGCP_UNWIND(ST.cp);
	6102	if (ST.paren) {
	6103	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6104	}
	6105	/* failed -- move forward one */
	6106	{
	6107	char *li = locinput;
	6108	if (!regrepeat(rex, &li, ST.A, 1, depth)) {
	6109	sayNO;
	6110	}
	6111	locinput = li;
	6112	}
	6113	{
	6114	ST.count++;
	6115	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	6116	ST.count > 0)) /* count overflow ? */
	6117	{
	6118	curly_try_B_min:
	6119	CURLY_SETPAREN(ST.paren, ST.count);
	6120	if (cur_eval && cur_eval->u.eval.close_paren &&
	6121	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6122	goto fake_end;
	6123	}
	6124	PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
	6125	}
	6126	}
	6127	sayNO;
	6128	assert(0); /* NOTREACHED */
	6129
	6130
	6131	curly_try_B_max:
	6132	/* a successful greedy match: now try to match B */
	6133	if (cur_eval && cur_eval->u.eval.close_paren &&
	6134	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6135	goto fake_end;
	6136	}
	6137	{
	6138	bool could_match = locinput < PL_regeol;
	6139
	6140	/* If it could work, try it. */
	6141	if (ST.c1 != CHRTEST_VOID && could_match) {
	6142	if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
	6143	{
	6144	could_match = memEQ(locinput,
	6145	ST.c1_utf8,
	6146	UTF8SKIP(locinput))
	6147	\|\| memEQ(locinput,
	6148	ST.c2_utf8,
	6149	UTF8SKIP(locinput));
	6150	}
	6151	else {
	6152	could_match = UCHARAT(locinput) == ST.c1
	6153	\|\| UCHARAT(locinput) == ST.c2;
	6154	}
	6155	}
	6156	if (ST.c1 == CHRTEST_VOID \|\| could_match) {
	6157	CURLY_SETPAREN(ST.paren, ST.count);
	6158	PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
	6159	assert(0); /* NOTREACHED */
	6160	}
	6161	}
	6162	/* FALL THROUGH */
	6163
	6164	case CURLY_B_max_fail:
	6165	/* failed to find B in a greedy match */
	6166
	6167	REGCP_UNWIND(ST.cp);
	6168	if (ST.paren) {
	6169	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6170	}
	6171	/* back up. */
	6172	if (--ST.count < ST.min)
	6173	sayNO;
	6174	locinput = HOPc(locinput, -1);
	6175	goto curly_try_B_max;
	6176
	6177	#undef ST
	6178
	6179	case END: /* last op of main pattern */
	6180	fake_end:
	6181	if (cur_eval) {
	6182	/* we've just finished A in /(??{A})B/; now continue with B */
	6183	st->u.eval.toggle_reg_flags
	6184	= cur_eval->u.eval.toggle_reg_flags;
	6185	PL_reg_flags ^= st->u.eval.toggle_reg_flags;
	6186
	6187	st->u.eval.prev_rex = rex_sv; /* inner */
	6188
	6189	/* Save all the positions. */
	6190	st->u.eval.cp = regcppush(rex, 0, maxopenparen);
	6191	rex_sv = cur_eval->u.eval.prev_rex;
	6192	SET_reg_curpm(rex_sv);
	6193	rex = ReANY(rex_sv);
	6194	rexi = RXi_GET(rex);
	6195	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	6196
	6197	REGCP_SET(st->u.eval.lastcp);
	6198
	6199	/* Restore parens of the outer rex without popping the
	6200	* savestack */
	6201	S_regcp_restore(aTHX_ rex, cur_eval->u.eval.lastcp,
	6202	&maxopenparen);
	6203
	6204	st->u.eval.prev_eval = cur_eval;
	6205	cur_eval = cur_eval->u.eval.prev_eval;
	6206	DEBUG_EXECUTE_r(
	6207	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	6208	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	6209	if ( nochange_depth )
	6210	nochange_depth--;
	6211
	6212	PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
	6213	locinput); /* match B */
	6214	}
	6215
	6216	if (locinput < reginfo->till) {
	6217	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	6218	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	6219	PL_colors[4],
	6220	(long)(locinput - PL_reg_starttry),
	6221	(long)(reginfo->till - PL_reg_starttry),
	6222	PL_colors[5]));
	6223
	6224	sayNO_SILENT; /* Cannot match: too short. */
	6225	}
	6226	sayYES; /* Success! */
	6227
	6228	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	6229	DEBUG_EXECUTE_r(
	6230	PerlIO_printf(Perl_debug_log,
	6231	"%*s %ssubpattern success...%s\n",
	6232	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	6233	sayYES; /* Success! */
	6234
	6235	#undef ST
	6236	#define ST st->u.ifmatch
	6237
	6238	{
	6239	char *newstart;
	6240
	6241	case SUSPEND: /* (?>A) */
	6242	ST.wanted = 1;
	6243	newstart = locinput;
	6244	goto do_ifmatch;
	6245
	6246	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	6247	ST.wanted = 0;
	6248	goto ifmatch_trivial_fail_test;
	6249
	6250	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	6251	ST.wanted = 1;
	6252	ifmatch_trivial_fail_test:
	6253	if (scan->flags) {
	6254	char * const s = HOPBACKc(locinput, scan->flags);
	6255	if (!s) {
	6256	/* trivial fail */
	6257	if (logical) {
	6258	logical = 0;
	6259	sw = 1 - cBOOL(ST.wanted);
	6260	}
	6261	else if (ST.wanted)
	6262	sayNO;
	6263	next = scan + ARG(scan);
	6264	if (next == scan)
	6265	next = NULL;
	6266	break;
	6267	}
	6268	newstart = s;
	6269	}
	6270	else
	6271	newstart = locinput;
	6272
	6273	do_ifmatch:
	6274	ST.me = scan;
	6275	ST.logical = logical;
	6276	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	6277
	6278	/* execute body of (?...A) */
	6279	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
	6280	assert(0); /* NOTREACHED */
	6281	}
	6282
	6283	case IFMATCH_A_fail: /* body of (?...A) failed */
	6284	ST.wanted = !ST.wanted;
	6285	/* FALL THROUGH */
	6286
	6287	case IFMATCH_A: /* body of (?...A) succeeded */
	6288	if (ST.logical) {
	6289	sw = cBOOL(ST.wanted);
	6290	}
	6291	else if (!ST.wanted)
	6292	sayNO;
	6293
	6294	if (OP(ST.me) != SUSPEND) {
	6295	/* restore old position except for (?>...) */
	6296	locinput = st->locinput;
	6297	}
	6298	scan = ST.me + ARG(ST.me);
	6299	if (scan == ST.me)
	6300	scan = NULL;
	6301	continue; /* execute B */
	6302
	6303	#undef ST
	6304
	6305	case LONGJMP: /* alternative with many branches compiles to
	6306	* (BRANCHJ; EXACT ...; LONGJMP ) x N */
	6307	next = scan + ARG(scan);
	6308	if (next == scan)
	6309	next = NULL;
	6310	break;
	6311
	6312	case COMMIT: /* (COMMIT) /
	6313	reginfo->cutpoint = PL_regeol;
	6314	/* FALLTHROUGH */
	6315
	6316	case PRUNE: /* (PRUNE) /
	6317	if (!scan->flags)
	6318	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6319	PUSH_STATE_GOTO(COMMIT_next, next, locinput);
	6320	assert(0); /* NOTREACHED */
	6321
	6322	case COMMIT_next_fail:
	6323	no_final = 1;
	6324	/* FALLTHROUGH */
	6325
	6326	case OPFAIL: /* (FAIL) /
	6327	sayNO;
	6328	assert(0); /* NOTREACHED */
	6329
	6330	#define ST st->u.mark
	6331	case MARKPOINT: /* (MARK:foo) /
	6332	ST.prev_mark = mark_state;
	6333	ST.mark_name = sv_commit = sv_yes_mark
	6334	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6335	mark_state = st;
	6336	ST.mark_loc = locinput;
	6337	PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
	6338	assert(0); /* NOTREACHED */
	6339
	6340	case MARKPOINT_next:
	6341	mark_state = ST.prev_mark;
	6342	sayYES;
	6343	assert(0); /* NOTREACHED */
	6344
	6345	case MARKPOINT_next_fail:
	6346	if (popmark && sv_eq(ST.mark_name,popmark))
	6347	{
	6348	if (ST.mark_loc > startpoint)
	6349	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6350	popmark = NULL; /* we found our mark */
	6351	sv_commit = ST.mark_name;
	6352
	6353	DEBUG_EXECUTE_r({
	6354	PerlIO_printf(Perl_debug_log,
	6355	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	6356	REPORT_CODE_OFF+depth*2, "",
	6357	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	6358	});
	6359	}
	6360	mark_state = ST.prev_mark;
	6361	sv_yes_mark = mark_state ?
	6362	mark_state->u.mark.mark_name : NULL;
	6363	sayNO;
	6364	assert(0); /* NOTREACHED */
	6365
	6366	case SKIP: /* (SKIP) /
	6367	if (scan->flags) {
	6368	/* (SKIP) : if we fail we cut here/
	6369	ST.mark_name = NULL;
	6370	ST.mark_loc = locinput;
	6371	PUSH_STATE_GOTO(SKIP_next,next, locinput);
	6372	} else {
	6373	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	6374	otherwise do nothing. Meaning we need to scan
	6375	*/
	6376	regmatch_state *cur = mark_state;
	6377	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6378
	6379	while (cur) {
	6380	if ( sv_eq( cur->u.mark.mark_name,
	6381	find ) )
	6382	{
	6383	ST.mark_name = find;
	6384	PUSH_STATE_GOTO( SKIP_next, next, locinput);
	6385	}
	6386	cur = cur->u.mark.prev_mark;
	6387	}
	6388	}
	6389	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	6390	break;
	6391
	6392	case SKIP_next_fail:
	6393	if (ST.mark_name) {
	6394	/* (*CUT:NAME) - Set up to search for the name as we
	6395	collapse the stack*/
	6396	popmark = ST.mark_name;
	6397	} else {
	6398	/* (CUT) - No name, we cut here./
	6399	if (ST.mark_loc > startpoint)
	6400	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6401	/* but we set sv_commit to latest mark_name if there
	6402	is one so they can test to see how things lead to this
	6403	cut */
	6404	if (mark_state)
	6405	sv_commit=mark_state->u.mark.mark_name;
	6406	}
	6407	no_final = 1;
	6408	sayNO;
	6409	assert(0); /* NOTREACHED */
	6410	#undef ST
	6411
	6412	case LNBREAK: /* \R */
	6413	if ((n=is_LNBREAK_safe(locinput, PL_regeol, utf8_target))) {
	6414	locinput += n;
	6415	} else
	6416	sayNO;
	6417	break;
	6418
	6419	#define CASE_CLASS(nAmE) \
	6420	case nAmE: \
	6421	if (NEXTCHR_IS_EOS) \
	6422	sayNO; \
	6423	if ((n=is_##nAmE(locinput,utf8_target))) { \
	6424	locinput += n; \
	6425	} else \
	6426	sayNO; \
	6427	break; \
	6428	case N##nAmE: \
	6429	if (NEXTCHR_IS_EOS) \
	6430	sayNO; \
	6431	if ((n=is_##nAmE(locinput,utf8_target))) { \
	6432	sayNO; \
	6433	} else { \
	6434	locinput += UTF8SKIP(locinput); \
	6435	} \
	6436	break
	6437
	6438	CASE_CLASS(VERTWS); /* \v \V */
	6439	CASE_CLASS(HORIZWS); /* \h \H */
	6440	#undef CASE_CLASS
	6441
	6442	default:
	6443	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	6444	PTR2UV(scan), OP(scan));
	6445	Perl_croak(aTHX_ "regexp memory corruption");
	6446
	6447	/* this is a point to jump to in order to increment
	6448	* locinput by one character */
	6449	increment_locinput:
	6450	assert(!NEXTCHR_IS_EOS);
	6451	if (utf8_target) {
	6452	locinput += PL_utf8skip[nextchr];
	6453	/* locinput is allowed to go 1 char off the end, but not 2+ */
	6454	if (locinput > PL_regeol)
	6455	sayNO;
	6456	}
	6457	else
	6458	locinput++;
	6459	break;
	6460
	6461	} /* end switch */
	6462
	6463	/* switch break jumps here */
	6464	scan = next; /* prepare to execute the next op and ... */
	6465	continue; /* ... jump back to the top, reusing st */
	6466	assert(0); /* NOTREACHED */
	6467
	6468	push_yes_state:
	6469	/* push a state that backtracks on success */
	6470	st->u.yes.prev_yes_state = yes_state;
	6471	yes_state = st;
	6472	/* FALL THROUGH */
	6473	push_state:
	6474	/* push a new regex state, then continue at scan */
	6475	{
	6476	regmatch_state *newst;
	6477
	6478	DEBUG_STACK_r({
	6479	regmatch_state *cur = st;
	6480	regmatch_state *curyes = yes_state;
	6481	int curd = depth;
	6482	regmatch_slab *slab = PL_regmatch_slab;
	6483	for (;curd > -1;cur--,curd--) {
	6484	if (cur < SLAB_FIRST(slab)) {
	6485	slab = slab->prev;
	6486	cur = SLAB_LAST(slab);
	6487	}
	6488	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	6489	REPORT_CODE_OFF + 2 + depth * 2,"",
	6490	curd, PL_reg_name[cur->resume_state],
	6491	(curyes == cur) ? "yes" : ""
	6492	);
	6493	if (curyes == cur)
	6494	curyes = cur->u.yes.prev_yes_state;
	6495	}
	6496	} else
	6497	DEBUG_STATE_pp("push")
	6498	);
	6499	depth++;
	6500	st->locinput = locinput;
	6501	newst = st+1;
	6502	if (newst > SLAB_LAST(PL_regmatch_slab))
	6503	newst = S_push_slab(aTHX);
	6504	PL_regmatch_state = newst;
	6505
	6506	locinput = pushinput;
	6507	st = newst;
	6508	continue;
	6509	assert(0); /* NOTREACHED */
	6510	}
	6511	}
	6512
	6513	/*
	6514	* We get here only if there's trouble -- normally "case END" is
	6515	* the terminating point.
	6516	*/
	6517	Perl_croak(aTHX_ "corrupted regexp pointers");
	6518	/NOTREACHED/
	6519	sayNO;
	6520
	6521	yes:
	6522	if (yes_state) {
	6523	/* we have successfully completed a subexpression, but we must now
	6524	* pop to the state marked by yes_state and continue from there */
	6525	assert(st != yes_state);
	6526	#ifdef DEBUGGING
	6527	while (st != yes_state) {
	6528	st--;
	6529	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6530	PL_regmatch_slab = PL_regmatch_slab->prev;
	6531	st = SLAB_LAST(PL_regmatch_slab);
	6532	}
	6533	DEBUG_STATE_r({
	6534	if (no_final) {
	6535	DEBUG_STATE_pp("pop (no final)");
	6536	} else {
	6537	DEBUG_STATE_pp("pop (yes)");
	6538	}
	6539	});
	6540	depth--;
	6541	}
	6542	#else
	6543	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	6544	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	6545	{
	6546	/* not in this slab, pop slab */
	6547	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	6548	PL_regmatch_slab = PL_regmatch_slab->prev;
	6549	st = SLAB_LAST(PL_regmatch_slab);
	6550	}
	6551	depth -= (st - yes_state);
	6552	#endif
	6553	st = yes_state;
	6554	yes_state = st->u.yes.prev_yes_state;
	6555	PL_regmatch_state = st;
	6556
	6557	if (no_final)
	6558	locinput= st->locinput;
	6559	state_num = st->resume_state + no_final;
	6560	goto reenter_switch;
	6561	}
	6562
	6563	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	6564	PL_colors[4], PL_colors[5]));
	6565
	6566	if (PL_reg_state.re_state_eval_setup_done) {
	6567	/* each successfully executed (?{...}) block does the equivalent of
	6568	* local $^R = do {...}
	6569	* When popping the save stack, all these locals would be undone;
	6570	* bypass this by setting the outermost saved $^R to the latest
	6571	* value */
	6572	if (oreplsv != GvSV(PL_replgv))
	6573	sv_setsv(oreplsv, GvSV(PL_replgv));
	6574	}
	6575	result = 1;
	6576	goto final_exit;
	6577
	6578	no:
	6579	DEBUG_EXECUTE_r(
	6580	PerlIO_printf(Perl_debug_log,
	6581	"%*s %sfailed...%s\n",
	6582	REPORT_CODE_OFF+depth*2, "",
	6583	PL_colors[4], PL_colors[5])
	6584	);
	6585
	6586	no_silent:
	6587	if (no_final) {
	6588	if (yes_state) {
	6589	goto yes;
	6590	} else {
	6591	goto final_exit;
	6592	}
	6593	}
	6594	if (depth) {
	6595	/* there's a previous state to backtrack to */
	6596	st--;
	6597	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6598	PL_regmatch_slab = PL_regmatch_slab->prev;
	6599	st = SLAB_LAST(PL_regmatch_slab);
	6600	}
	6601	PL_regmatch_state = st;
	6602	locinput= st->locinput;
	6603
	6604	DEBUG_STATE_pp("pop");
	6605	depth--;
	6606	if (yes_state == st)
	6607	yes_state = st->u.yes.prev_yes_state;
	6608
	6609	state_num = st->resume_state + 1; /* failure = success + 1 */
	6610	goto reenter_switch;
	6611	}
	6612	result = 0;
	6613
	6614	final_exit:
	6615	if (rex->intflags & PREGf_VERBARG_SEEN) {
	6616	SV *sv_err = get_sv("REGERROR", 1);
	6617	SV *sv_mrk = get_sv("REGMARK", 1);
	6618	if (result) {
	6619	sv_commit = &PL_sv_no;
	6620	if (!sv_yes_mark)
	6621	sv_yes_mark = &PL_sv_yes;
	6622	} else {
	6623	if (!sv_commit)
	6624	sv_commit = &PL_sv_yes;
	6625	sv_yes_mark = &PL_sv_no;
	6626	}
	6627	sv_setsv(sv_err, sv_commit);
	6628	sv_setsv(sv_mrk, sv_yes_mark);
	6629	}
	6630
	6631
	6632	if (last_pushed_cv) {
	6633	dSP;
	6634	POP_MULTICALL;
	6635	PERL_UNUSED_VAR(SP);
	6636	}
	6637
	6638	/* clean up; in particular, free all slabs above current one */
	6639	LEAVE_SCOPE(oldsave);
	6640
	6641	assert(!result \|\| locinput - PL_bostr >= 0);
	6642	return result ? locinput - PL_bostr : -1;
	6643	}
	6644
	6645	/*
	6646	- regrepeat - repeatedly match something simple, report how many
	6647	*
	6648	* What 'simple' means is a node which can be the operand of a quantifier like
	6649	* '+', or {1,3}
	6650	*
	6651	* startposp - pointer a pointer to the start position. This is updated
	6652	* to point to the byte following the highest successful
	6653	* match.
	6654	* p - the regnode to be repeatedly matched against.
	6655	* max - maximum number of things to match.
	6656	* depth - (for debugging) backtracking depth.
	6657	*/
	6658	STATIC I32
	6659	S_regrepeat(pTHX_ const regexp prog, char startposp, const regnode p, I32 max, int depth)
	6660	{
	6661	dVAR;
	6662	char scan; / Pointer to current position in target string */
	6663	I32 c;
	6664	char loceol = PL_regeol; / local version */
	6665	I32 hardcount = 0; /* How many matches so far */
	6666	bool utf8_target = PL_reg_match_utf8;
	6667	UV utf8_flags;
	6668	#ifndef DEBUGGING
	6669	PERL_UNUSED_ARG(depth);
	6670	#endif
	6671
	6672	PERL_ARGS_ASSERT_REGREPEAT;
	6673
	6674	scan = *startposp;
	6675	if (max == REG_INFTY)
	6676	max = I32_MAX;
	6677	else if (! utf8_target && scan + max < loceol)
	6678	loceol = scan + max;
	6679
	6680	/* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
	6681	* to the maximum of how far we should go in it (leaving it set to the real
	6682	* end, if the maximum permissible would take us beyond that). This allows
	6683	* us to make the loop exit condition that we haven't gone past <loceol> to
	6684	* also mean that we haven't exceeded the max permissible count, saving a
	6685	* test each time through the loop. But it assumes that the OP matches a
	6686	* single byte, which is true for most of the OPs below when applied to a
	6687	* non-UTF-8 target. Those relatively few OPs that don't have this
	6688	* characteristic will have to compensate.
	6689	*
	6690	* There is no adjustment for UTF-8 targets, as the number of bytes per
	6691	* character varies. OPs will have to test both that the count is less
	6692	* than the max permissible (using <hardcount> to keep track), and that we
	6693	* are still within the bounds of the string (using <loceol>. A few OPs
	6694	* match a single byte no matter what the encoding. They can omit the max
	6695	* test if, for the UTF-8 case, they do the adjustment that was skipped
	6696	* above.
	6697	*
	6698	* Thus, the code above sets things up for the common case; and exceptional
	6699	* cases need extra work; the common case is to make sure <scan> doesn't
	6700	* go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
	6701	* count doesn't exceed the maximum permissible */
	6702
	6703	switch (OP(p)) {
	6704	case REG_ANY:
	6705	if (utf8_target) {
	6706	while (scan < loceol && hardcount < max && *scan != '\n') {
	6707	scan += UTF8SKIP(scan);
	6708	hardcount++;
	6709	}
	6710	} else {
	6711	while (scan < loceol && *scan != '\n')
	6712	scan++;
	6713	}
	6714	break;
	6715	case SANY:
	6716	if (utf8_target) {
	6717	while (scan < loceol && hardcount < max) {
	6718	scan += UTF8SKIP(scan);
	6719	hardcount++;
	6720	}
	6721	}
	6722	else
	6723	scan = loceol;
	6724	break;
	6725	case CANY: /* Move <scan> forward <max> bytes, unless goes off end */
	6726	if (utf8_target && scan + max < loceol) {
	6727
	6728	/* <loceol> hadn't been adjusted in the UTF-8 case */
	6729	scan += max;
	6730	}
	6731	else {
	6732	scan = loceol;
	6733	}
	6734	break;
	6735	case EXACT:
	6736	assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
	6737
	6738	c = (U8)*STRING(p);
	6739
	6740	/* Can use a simple loop if the pattern char to match on is invariant
	6741	* under UTF-8, or both target and pattern aren't UTF-8. Note that we
	6742	* can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
	6743	* true iff it doesn't matter if the argument is in UTF-8 or not */
	6744	if (UTF8_IS_INVARIANT(c) \|\| (! utf8_target && ! UTF_PATTERN)) {
	6745	if (utf8_target && scan + max < loceol) {
	6746	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6747	* since here, to match at all, 1 char == 1 byte */
	6748	loceol = scan + max;
	6749	}
	6750	while (scan < loceol && UCHARAT(scan) == c) {
	6751	scan++;
	6752	}
	6753	}
	6754	else if (UTF_PATTERN) {
	6755	if (utf8_target) {
	6756	STRLEN scan_char_len;
	6757
	6758	/* When both target and pattern are UTF-8, we have to do
	6759	* string EQ */
	6760	while (hardcount < max
	6761	&& scan < loceol
	6762	&& (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
	6763	&& memEQ(scan, STRING(p), scan_char_len))
	6764	{
	6765	scan += scan_char_len;
	6766	hardcount++;
	6767	}
	6768	}
	6769	else if (! UTF8_IS_ABOVE_LATIN1(c)) {
	6770
	6771	/* Target isn't utf8; convert the character in the UTF-8
	6772	* pattern to non-UTF8, and do a simple loop */
	6773	c = TWO_BYTE_UTF8_TO_UNI(c, *(STRING(p) + 1));
	6774	while (scan < loceol && UCHARAT(scan) == c) {
	6775	scan++;
	6776	}
	6777	} /* else pattern char is above Latin1, can't possibly match the
	6778	non-UTF-8 target */
	6779	}
	6780	else {
	6781
	6782	/* Here, the string must be utf8; pattern isn't, and <c> is
	6783	* different in utf8 than not, so can't compare them directly.
	6784	* Outside the loop, find the two utf8 bytes that represent c, and
	6785	* then look for those in sequence in the utf8 string */
	6786	U8 high = UTF8_TWO_BYTE_HI(c);
	6787	U8 low = UTF8_TWO_BYTE_LO(c);
	6788
	6789	while (hardcount < max
	6790	&& scan + 1 < loceol
	6791	&& UCHARAT(scan) == high
	6792	&& UCHARAT(scan + 1) == low)
	6793	{
	6794	scan += 2;
	6795	hardcount++;
	6796	}
	6797	}
	6798	break;
	6799
	6800	case EXACTFA:
	6801	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6802	goto do_exactf;
	6803
	6804	case EXACTFL:
	6805	PL_reg_flags \|= RF_tainted;
	6806	utf8_flags = FOLDEQ_UTF8_LOCALE;
	6807	goto do_exactf;
	6808
	6809	case EXACTF:
	6810	utf8_flags = 0;
	6811	goto do_exactf;
	6812
	6813	case EXACTFU_SS:
	6814	case EXACTFU_TRICKYFOLD:
	6815	case EXACTFU:
	6816	utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	6817
	6818	do_exactf: {
	6819	int c1, c2;
	6820	U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
	6821
	6822	assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
	6823
	6824	if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8)) {
	6825	if (c1 == CHRTEST_VOID) {
	6826	/* Use full Unicode fold matching */
	6827	char *tmpeol = PL_regeol;
	6828	STRLEN pat_len = (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1;
	6829	while (hardcount < max
	6830	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	6831	STRING(p), NULL, pat_len,
	6832	cBOOL(UTF_PATTERN), utf8_flags))
	6833	{
	6834	scan = tmpeol;
	6835	tmpeol = PL_regeol;
	6836	hardcount++;
	6837	}
	6838	}
	6839	else if (utf8_target) {
	6840	if (c1 == c2) {
	6841	while (scan < loceol
	6842	&& hardcount < max
	6843	&& memEQ(scan, c1_utf8, UTF8SKIP(scan)))
	6844	{
	6845	scan += UTF8SKIP(scan);
	6846	hardcount++;
	6847	}
	6848	}
	6849	else {
	6850	while (scan < loceol
	6851	&& hardcount < max
	6852	&& (memEQ(scan, c1_utf8, UTF8SKIP(scan))
	6853	\|\| memEQ(scan, c2_utf8, UTF8SKIP(scan))))
	6854	{
	6855	scan += UTF8SKIP(scan);
	6856	hardcount++;
	6857	}
	6858	}
	6859	}
	6860	else if (c1 == c2) {
	6861	while (scan < loceol && UCHARAT(scan) == c1) {
	6862	scan++;
	6863	}
	6864	}
	6865	else {
	6866	while (scan < loceol &&
	6867	(UCHARAT(scan) == c1 \|\| UCHARAT(scan) == c2))
	6868	{
	6869	scan++;
	6870	}
	6871	}
	6872	}
	6873	break;
	6874	}
	6875	case ANYOF:
	6876	if (utf8_target) {
	6877	while (hardcount < max
	6878	&& scan < loceol
	6879	&& reginclass(prog, p, (U8*)scan, utf8_target))
	6880	{
	6881	scan += UTF8SKIP(scan);
	6882	hardcount++;
	6883	}
	6884	} else {
	6885	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	6886	scan++;
	6887	}
	6888	break;
	6889	case ALNUMU:
	6890	if (utf8_target) {
	6891	utf8_wordchar:
	6892	LOAD_UTF8_CHARCLASS_ALNUM();
	6893	while (hardcount < max && scan < loceol &&
	6894	swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6895	{
	6896	scan += UTF8SKIP(scan);
	6897	hardcount++;
	6898	}
	6899	} else {
	6900	while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
	6901	scan++;
	6902	}
	6903	}
	6904	break;
	6905	case ALNUM:
	6906	if (utf8_target)
	6907	goto utf8_wordchar;
	6908	while (scan < loceol && isALNUM((U8) *scan)) {
	6909	scan++;
	6910	}
	6911	break;
	6912	case ALNUMA:
	6913	if (utf8_target && scan + max < loceol) {
	6914
	6915	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6916	* since here, to match, 1 char == 1 byte */
	6917	loceol = scan + max;
	6918	}
	6919	while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
	6920	scan++;
	6921	}
	6922	break;
	6923	case ALNUML:
	6924	PL_reg_flags \|= RF_tainted;
	6925	if (utf8_target) {
	6926	while (hardcount < max && scan < loceol &&
	6927	isALNUM_LC_utf8((U8*)scan)) {
	6928	scan += UTF8SKIP(scan);
	6929	hardcount++;
	6930	}
	6931	} else {
	6932	while (scan < loceol && isALNUM_LC(*scan))
	6933	scan++;
	6934	}
	6935	break;
	6936	case NALNUMU:
	6937	if (utf8_target) {
	6938
	6939	utf8_Nwordchar:
	6940
	6941	LOAD_UTF8_CHARCLASS_ALNUM();
	6942	while (hardcount < max && scan < loceol &&
	6943	! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6944	{
	6945	scan += UTF8SKIP(scan);
	6946	hardcount++;
	6947	}
	6948	} else {
	6949	while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
	6950	scan++;
	6951	}
	6952	}
	6953	break;
	6954	case NALNUM:
	6955	if (utf8_target)
	6956	goto utf8_Nwordchar;
	6957	while (scan < loceol && ! isALNUM((U8) *scan)) {
	6958	scan++;
	6959	}
	6960	break;
	6961
	6962	case POSIXA:
	6963	if (utf8_target && scan + max < loceol) {
	6964
	6965	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6966	* since here, to match, 1 char == 1 byte */
	6967	loceol = scan + max;
	6968	}
	6969	while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
	6970	scan++;
	6971	}
	6972	break;
	6973	case NPOSIXA:
	6974	if (utf8_target) {
	6975	while (scan < loceol && hardcount < max
	6976	&& ! _generic_isCC_A((U8) *scan, FLAGS(p)))
	6977	{
	6978	scan += UTF8SKIP(scan);
	6979	hardcount++;
	6980	}
	6981	}
	6982	else {
	6983	while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
	6984	scan++;
	6985	}
	6986	}
	6987	break;
	6988	case NALNUMA:
	6989	if (utf8_target) {
	6990	while (scan < loceol && hardcount < max
	6991	&& ! isWORDCHAR_A((U8) *scan))
	6992	{
	6993	scan += UTF8SKIP(scan);
	6994	hardcount++;
	6995	}
	6996	}
	6997	else {
	6998	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6999	scan++;
	7000	}
	7001	}
	7002	break;
	7003	case NALNUML:
	7004	PL_reg_flags \|= RF_tainted;
	7005	if (utf8_target) {
	7006	while (hardcount < max && scan < loceol &&
	7007	!isALNUM_LC_utf8((U8*)scan)) {
	7008	scan += UTF8SKIP(scan);
	7009	hardcount++;
	7010	}
	7011	} else {
	7012	while (scan < loceol && !isALNUM_LC(*scan))
	7013	scan++;
	7014	}
	7015	break;
	7016	case SPACEU:
	7017	if (utf8_target) {
	7018
	7019	utf8_space:
	7020
	7021	while (hardcount < max && scan < loceol
	7022	&& is_XPERLSPACE_utf8((U8*)scan))
	7023	{
	7024	scan += UTF8SKIP(scan);
	7025	hardcount++;
	7026	}
	7027	break;
	7028	}
	7029	else {
	7030	while (scan < loceol && isSPACE_L1((U8) *scan)) {
	7031	scan++;
	7032	}
	7033	break;
	7034	}
	7035	case SPACE:
	7036	if (utf8_target)
	7037	goto utf8_space;
	7038
	7039	while (scan < loceol && isSPACE((U8) *scan)) {
	7040	scan++;
	7041	}
	7042	break;
	7043	case SPACEA:
	7044	if (utf8_target && scan + max < loceol) {
	7045
	7046	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	7047	* since here, to match, 1 char == 1 byte */
	7048	loceol = scan + max;
	7049	}
	7050	while (scan < loceol && isSPACE_A((U8) *scan)) {
	7051	scan++;
	7052	}
	7053	break;
	7054	case SPACEL:
	7055	PL_reg_flags \|= RF_tainted;
	7056	if (utf8_target) {
	7057	while (hardcount < max && scan < loceol &&
	7058	isSPACE_LC_utf8((U8*)scan)) {
	7059	scan += UTF8SKIP(scan);
	7060	hardcount++;
	7061	}
	7062	} else {
	7063	while (scan < loceol && isSPACE_LC(*scan))
	7064	scan++;
	7065	}
	7066	break;
	7067	case NSPACEU:
	7068	if (utf8_target) {
	7069
	7070	utf8_Nspace:
	7071
	7072	while (hardcount < max && scan < loceol
	7073	&& ! is_XPERLSPACE_utf8((U8*)scan))
	7074	{
	7075	scan += UTF8SKIP(scan);
	7076	hardcount++;
	7077	}
	7078	break;
	7079	}
	7080	else {
	7081	while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
	7082	scan++;
	7083	}
	7084	}
	7085	break;
	7086	case NSPACE:
	7087	if (utf8_target)
	7088	goto utf8_Nspace;
	7089
	7090	while (scan < loceol && ! isSPACE((U8) *scan)) {
	7091	scan++;
	7092	}
	7093	break;
	7094	case NSPACEA:
	7095	if (utf8_target) {
	7096	while (hardcount < max && scan < loceol
	7097	&& ! isSPACE_A((U8) *scan))
	7098	{
	7099	scan += UTF8SKIP(scan);
	7100	hardcount++;
	7101	}
	7102	}
	7103	else {
	7104	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	7105	scan++;
	7106	}
	7107	}
	7108	break;
	7109	case NSPACEL:
	7110	PL_reg_flags \|= RF_tainted;
	7111	if (utf8_target) {
	7112	while (hardcount < max && scan < loceol &&
	7113	!isSPACE_LC_utf8((U8*)scan)) {
	7114	scan += UTF8SKIP(scan);
	7115	hardcount++;
	7116	}
	7117	} else {
	7118	while (scan < loceol && !isSPACE_LC(*scan))
	7119	scan++;
	7120	}
	7121	break;
	7122	case DIGIT:
	7123	if (utf8_target) {
	7124	LOAD_UTF8_CHARCLASS_DIGIT();
	7125	while (hardcount < max && scan < loceol &&
	7126	swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	7127	scan += UTF8SKIP(scan);
	7128	hardcount++;
	7129	}
	7130	} else {
	7131	while (scan < loceol && isDIGIT(*scan))
	7132	scan++;
	7133	}
	7134	break;
	7135	case DIGITA:
	7136	if (utf8_target && scan + max < loceol) {
	7137
	7138	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	7139	* since here, to match, 1 char == 1 byte */
	7140	loceol = scan + max;
	7141	}
	7142	while (scan < loceol && isDIGIT_A((U8) *scan)) {
	7143	scan++;
	7144	}
	7145	break;
	7146	case DIGITL:
	7147	PL_reg_flags \|= RF_tainted;
	7148	if (utf8_target) {
	7149	while (hardcount < max && scan < loceol &&
	7150	isDIGIT_LC_utf8((U8*)scan)) {
	7151	scan += UTF8SKIP(scan);
	7152	hardcount++;
	7153	}
	7154	} else {
	7155	while (scan < loceol && isDIGIT_LC(*scan))
	7156	scan++;
	7157	}
	7158	break;
	7159	case NDIGIT:
	7160	if (utf8_target) {
	7161	LOAD_UTF8_CHARCLASS_DIGIT();
	7162	while (hardcount < max && scan < loceol &&
	7163	!swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	7164	scan += UTF8SKIP(scan);
	7165	hardcount++;
	7166	}
	7167	} else {
	7168	while (scan < loceol && !isDIGIT(*scan))
	7169	scan++;
	7170	}
	7171	break;
	7172	case NDIGITA:
	7173	if (utf8_target) {
	7174	while (hardcount < max && scan < loceol
	7175	&& ! isDIGIT_A((U8) *scan)) {
	7176	scan += UTF8SKIP(scan);
	7177	hardcount++;
	7178	}
	7179	}
	7180	else {
	7181	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	7182	scan++;
	7183	}
	7184	}
	7185	break;
	7186	case NDIGITL:
	7187	PL_reg_flags \|= RF_tainted;
	7188	if (utf8_target) {
	7189	while (hardcount < max && scan < loceol &&
	7190	!isDIGIT_LC_utf8((U8*)scan)) {
	7191	scan += UTF8SKIP(scan);
	7192	hardcount++;
	7193	}
	7194	} else {
	7195	while (scan < loceol && !isDIGIT_LC(*scan))
	7196	scan++;
	7197	}
	7198	break;
	7199	case LNBREAK:
	7200	if (utf8_target) {
	7201	while (hardcount < max && scan < loceol &&
	7202	(c=is_LNBREAK_utf8_safe(scan, loceol))) {
	7203	scan += c;
	7204	hardcount++;
	7205	}
	7206	} else {
	7207	/* LNBREAK can match one or two latin chars, which is ok, but we
	7208	* have to use hardcount in this situation, and throw away the
	7209	* adjustment to <loceol> done before the switch statement */
	7210	loceol = PL_regeol;
	7211	while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
	7212	scan+=c;
	7213	hardcount++;
	7214	}
	7215	}
	7216	break;
	7217	case HORIZWS:
	7218	if (utf8_target) {
	7219	while (hardcount < max && scan < loceol &&
	7220	(c=is_HORIZWS_utf8_safe(scan, loceol)))
	7221	{
	7222	scan += c;
	7223	hardcount++;
	7224	}
	7225	} else {
	7226	while (scan < loceol && is_HORIZWS_latin1_safe(scan, loceol))
	7227	scan++;
	7228	}
	7229	break;
	7230	case NHORIZWS:
	7231	if (utf8_target) {
	7232	while (hardcount < max && scan < loceol &&
	7233	!is_HORIZWS_utf8_safe(scan, loceol))
	7234	{
	7235	scan += UTF8SKIP(scan);
	7236	hardcount++;
	7237	}
	7238	} else {
	7239	while (scan < loceol && !is_HORIZWS_latin1_safe(scan, loceol))
	7240	scan++;
	7241
	7242	}
	7243	break;
	7244	case VERTWS:
	7245	if (utf8_target) {
	7246	while (hardcount < max && scan < loceol &&
	7247	(c=is_VERTWS_utf8_safe(scan, loceol)))
	7248	{
	7249	scan += c;
	7250	hardcount++;
	7251	}
	7252	} else {
	7253	while (scan < loceol && is_VERTWS_latin1_safe(scan, loceol))
	7254	scan++;
	7255
	7256	}
	7257	break;
	7258	case NVERTWS:
	7259	if (utf8_target) {
	7260	while (hardcount < max && scan < loceol &&
	7261	!is_VERTWS_utf8_safe(scan, loceol))
	7262	{
	7263	scan += UTF8SKIP(scan);
	7264	hardcount++;
	7265	}
	7266	} else {
	7267	while (scan < loceol && !is_VERTWS_latin1_safe(scan, loceol))
	7268	scan++;
	7269
	7270	}
	7271	break;
	7272
	7273	case BOUND:
	7274	case BOUNDA:
	7275	case BOUNDL:
	7276	case BOUNDU:
	7277	case EOS:
	7278	case GPOS:
	7279	case KEEPS:
	7280	case NBOUND:
	7281	case NBOUNDA:
	7282	case NBOUNDL:
	7283	case NBOUNDU:
	7284	case OPFAIL:
	7285	case SBOL:
	7286	case SEOL:
	7287	/* These are all 0 width, so match right here or not at all. */
	7288	break;
	7289
	7290	default:
	7291	Perl_croak(aTHX_ "panic: regrepeat() called with unrecognized node type %d='%s'", OP(p), PL_reg_name[OP(p)]);
	7292	assert(0); /* NOTREACHED */
	7293
	7294	}
	7295
	7296	if (hardcount)
	7297	c = hardcount;
	7298	else
	7299	c = scan - *startposp;
	7300	*startposp = scan;
	7301
	7302	DEBUG_r({
	7303	GET_RE_DEBUG_FLAGS_DECL;
	7304	DEBUG_EXECUTE_r({
	7305	SV * const prop = sv_newmortal();
	7306	regprop(prog, prop, p);
	7307	PerlIO_printf(Perl_debug_log,
	7308	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	7309	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	7310	});
	7311	});
	7312
	7313	return(c);
	7314	}
	7315
	7316
	7317	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	7318	/*
	7319	- regclass_swash - prepare the utf8 swash. Wraps the shared core version to
	7320	create a copy so that changes the caller makes won't change the shared one.
	7321	If <altsvp> is non-null, will return NULL in it, for back-compat.
	7322	*/
	7323	SV *
	7324	Perl_regclass_swash(pTHX_ const regexp prog, const regnode node, bool doinit, SV listsvp, SV altsvp)
	7325	{
	7326	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	7327
	7328	if (altsvp) {
	7329	*altsvp = NULL;
	7330	}
	7331
	7332	return newSVsv(core_regclass_swash(prog, node, doinit, listsvp));
	7333	}
	7334	#endif
	7335
	7336	STATIC SV *
	7337	S_core_regclass_swash(pTHX_ const regexp prog, const regnode node, bool doinit, SV** listsvp)
	7338	{
	7339	/* Returns the swash for the input 'node' in the regex 'prog'.
	7340	* If <doinit> is true, will attempt to create the swash if not already
	7341	* done.
	7342	* If <listsvp> is non-null, will return the swash initialization string in
	7343	* it.
	7344	* Tied intimately to how regcomp.c sets up the data structure */
	7345
	7346	dVAR;
	7347	SV *sw = NULL;
	7348	SV *si = NULL;
	7349	SV* invlist = NULL;
	7350
	7351	RXi_GET_DECL(prog,progi);
	7352	const struct reg_data * const data = prog ? progi->data : NULL;
	7353
	7354	PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH;
	7355
	7356	assert(ANYOF_NONBITMAP(node));
	7357
	7358	if (data && data->count) {
	7359	const U32 n = ARG(node);
	7360
	7361	if (data->what[n] == 's') {
	7362	SV * const rv = MUTABLE_SV(data->data[n]);
	7363	AV * const av = MUTABLE_AV(SvRV(rv));
	7364	SV **const ary = AvARRAY(av);
	7365	U8 swash_init_flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	7366
	7367	si = ary; / ary[0] = the string to initialize the swash with */
	7368
	7369	/* Elements 2 and 3 are either both present or both absent. [2] is
	7370	* any inversion list generated at compile time; [3] indicates if
	7371	* that inversion list has any user-defined properties in it. */
	7372	if (av_len(av) >= 2) {
	7373	invlist = ary[2];
	7374	if (SvUV(ary[3])) {
	7375	swash_init_flags \|= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
	7376	}
	7377	}
	7378	else {
	7379	invlist = NULL;
	7380	}
	7381
	7382	/* Element [1] is reserved for the set-up swash. If already there,
	7383	* return it; if not, create it and store it there */
	7384	if (SvROK(ary[1])) {
	7385	sw = ary[1];
	7386	}
	7387	else if (si && doinit) {
	7388
	7389	sw = _core_swash_init("utf8", /* the utf8 package */
	7390	"", /* nameless */
	7391	si,
	7392	1, /* binary */
	7393	0, /* not from tr/// */
	7394	invlist,
	7395	&swash_init_flags);
	7396	(void)av_store(av, 1, sw);
	7397	}
	7398	}
	7399	}
	7400
	7401	if (listsvp) {
	7402	SV* matches_string = newSVpvn("", 0);
	7403
	7404	/* Use the swash, if any, which has to have incorporated into it all
	7405	* possibilities */
	7406	if ((! sw \|\| (invlist = _get_swash_invlist(sw)) == NULL)
	7407	&& (si && si != &PL_sv_undef))
	7408	{
	7409
	7410	/* If no swash, use the input initialization string, if available */
	7411	sv_catsv(matches_string, si);
	7412	}
	7413
	7414	/* Add the inversion list to whatever we have. This may have come from
	7415	* the swash, or from an input parameter */
	7416	if (invlist) {
	7417	sv_catsv(matches_string, _invlist_contents(invlist));
	7418	}
	7419	*listsvp = matches_string;
	7420	}
	7421
	7422	return sw;
	7423	}
	7424
	7425	/*
	7426	- reginclass - determine if a character falls into a character class
	7427
	7428	n is the ANYOF regnode
	7429	p is the target string
	7430	utf8_target tells whether p is in UTF-8.
	7431
	7432	Returns true if matched; false otherwise.
	7433
	7434	Note that this can be a synthetic start class, a combination of various
	7435	nodes, so things you think might be mutually exclusive, such as locale,
	7436	aren't. It can match both locale and non-locale
	7437
	7438	*/
	7439
	7440	STATIC bool
	7441	S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
	7442	{
	7443	dVAR;
	7444	const char flags = ANYOF_FLAGS(n);
	7445	bool match = FALSE;
	7446	UV c = *p;
	7447
	7448	PERL_ARGS_ASSERT_REGINCLASS;
	7449
	7450	/* If c is not already the code point, get it. Note that
	7451	* UTF8_IS_INVARIANT() works even if not in UTF-8 */
	7452	if (! UTF8_IS_INVARIANT(c) && utf8_target) {
	7453	STRLEN c_len = 0;
	7454	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
	7455	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	7456	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	7457	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	7458	* UTF8_ALLOW_FFFF */
	7459	if (c_len == (STRLEN)-1)
	7460	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	7461	}
	7462
	7463	/* If this character is potentially in the bitmap, check it */
	7464	if (c < 256) {
	7465	if (ANYOF_BITMAP_TEST(n, c))
	7466	match = TRUE;
	7467	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	7468	&& ! utf8_target
	7469	&& ! isASCII(c))
	7470	{
	7471	match = TRUE;
	7472	}
	7473	else if (flags & ANYOF_LOCALE) {
	7474	PL_reg_flags \|= RF_tainted;
	7475
	7476	if ((flags & ANYOF_LOC_FOLD)
	7477	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	7478	{
	7479	match = TRUE;
	7480	}
	7481	else if (ANYOF_CLASS_TEST_ANY_SET(n)) {
	7482
	7483	/* The data structure is arranged so bits 0, 2, 4, ... are set
	7484	* if the class includes the Posix character class given by
	7485	* bit/2; and 1, 3, 5, ... are set if the class includes the
	7486	* complemented Posix class given by int(bit/2). So we loop
	7487	* through the bits, each time changing whether we complement
	7488	* the result or not. Suppose for the sake of illustration
	7489	* that bits 0-3 mean respectively, \w, \W, \s, \S. If bit 0
	7490	* is set, it means there is a match for this ANYOF node if the
	7491	* character is in the class given by the expression (0 / 2 = 0
	7492	* = \w). If it is in that class, isFOO_lc() will return 1,
	7493	* and since 'to_complement' is 0, the result will stay TRUE,
	7494	* and we exit the loop. Suppose instead that bit 0 is 0, but
	7495	* bit 1 is 1. That means there is a match if the character
	7496	* matches \W. We won't bother to call isFOO_lc() on bit 0,
	7497	* but will on bit 1. On the second iteration 'to_complement'
	7498	* will be 1, so the exclusive or will reverse things, so we
	7499	* are testing for \W. On the third iteration, 'to_complement'
	7500	* will be 0, and we would be testing for \s; the fourth
	7501	* iteration would test for \S, etc. */
	7502
	7503	int count = 0;
	7504	int to_complement = 0;
	7505	while (count < ANYOF_MAX) {
	7506	if (ANYOF_CLASS_TEST(n, count)
	7507	&& to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))
	7508	{
	7509	match = TRUE;
	7510	break;
	7511	}
	7512	count++;
	7513	to_complement ^= 1;
	7514	}
	7515	}
	7516	}
	7517	}
	7518
	7519	/* If the bitmap didn't (or couldn't) match, and something outside the
	7520	* bitmap could match, try that. Locale nodes specify completely the
	7521	* behavior of code points in the bit map (otherwise, a utf8 target would
	7522	* cause them to be treated as Unicode and not locale), except in
	7523	* the very unlikely event when this node is a synthetic start class, which
	7524	* could be a combination of locale and non-locale nodes. So allow locale
	7525	* to match for the synthetic start class, which will give a false
	7526	* positive that will be resolved when the match is done again as not part
	7527	* of the synthetic start class */
	7528	if (!match) {
	7529	if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
	7530	match = TRUE; /* Everything above 255 matches */
	7531	}
	7532	else if (ANYOF_NONBITMAP(n)
	7533	&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
	7534	\|\| (utf8_target
	7535	&& (c >=256
	7536	\|\| (! (flags & ANYOF_LOCALE))
	7537	\|\| (flags & ANYOF_IS_SYNTHETIC)))))
	7538	{
	7539	SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
	7540	if (sw) {
	7541	U8 * utf8_p;
	7542	if (utf8_target) {
	7543	utf8_p = (U8 *) p;
	7544	} else { /* Convert to utf8 */
	7545	STRLEN len = 1;
	7546	utf8_p = bytes_to_utf8(p, &len);
	7547	}
	7548
	7549	if (swash_fetch(sw, utf8_p, TRUE)) {
	7550	match = TRUE;
	7551	}
	7552
	7553	/* If we allocated a string above, free it */
	7554	if (! utf8_target) Safefree(utf8_p);
	7555	}
	7556	}
	7557
	7558	if (UNICODE_IS_SUPER(c)
	7559	&& (flags & ANYOF_WARN_SUPER)
	7560	&& ckWARN_d(WARN_NON_UNICODE))
	7561	{
	7562	Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
	7563	"Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", c);
	7564	}
	7565	}
	7566
	7567	/* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
	7568	return cBOOL(flags & ANYOF_INVERT) ^ match;
	7569	}
	7570
	7571	STATIC U8 *
	7572	S_reghop3(U8 s, I32 off, const U8 lim)
	7573	{
	7574	/* return the position 'off' UTF-8 characters away from 's', forward if
	7575	* 'off' >= 0, backwards if negative. But don't go outside of position
	7576	* 'lim', which better be < s if off < 0 */
	7577
	7578	dVAR;
	7579
	7580	PERL_ARGS_ASSERT_REGHOP3;
	7581
	7582	if (off >= 0) {
	7583	while (off-- && s < lim) {
	7584	/* XXX could check well-formedness here */
	7585	s += UTF8SKIP(s);
	7586	}
	7587	}
	7588	else {
	7589	while (off++ && s > lim) {
	7590	s--;
	7591	if (UTF8_IS_CONTINUED(*s)) {
	7592	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7593	s--;
	7594	}
	7595	/* XXX could check well-formedness here */
	7596	}
	7597	}
	7598	return s;
	7599	}
	7600
	7601	#ifdef XXX_dmq
	7602	/* there are a bunch of places where we use two reghop3's that should
	7603	be replaced with this routine. but since thats not done yet
	7604	we ifdef it out - dmq
	7605	*/
	7606	STATIC U8 *
	7607	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	7608	{
	7609	dVAR;
	7610
	7611	PERL_ARGS_ASSERT_REGHOP4;
	7612
	7613	if (off >= 0) {
	7614	while (off-- && s < rlim) {
	7615	/* XXX could check well-formedness here */
	7616	s += UTF8SKIP(s);
	7617	}
	7618	}
	7619	else {
	7620	while (off++ && s > llim) {
	7621	s--;
	7622	if (UTF8_IS_CONTINUED(*s)) {
	7623	while (s > llim && UTF8_IS_CONTINUATION(*s))
	7624	s--;
	7625	}
	7626	/* XXX could check well-formedness here */
	7627	}
	7628	}
	7629	return s;
	7630	}
	7631	#endif
	7632
	7633	STATIC U8 *
	7634	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	7635	{
	7636	dVAR;
	7637
	7638	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	7639
	7640	if (off >= 0) {
	7641	while (off-- && s < lim) {
	7642	/* XXX could check well-formedness here */
	7643	s += UTF8SKIP(s);
	7644	}
	7645	if (off >= 0)
	7646	return NULL;
	7647	}
	7648	else {
	7649	while (off++ && s > lim) {
	7650	s--;
	7651	if (UTF8_IS_CONTINUED(*s)) {
	7652	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7653	s--;
	7654	}
	7655	/* XXX could check well-formedness here */
	7656	}
	7657	if (off <= 0)
	7658	return NULL;
	7659	}
	7660	return s;
	7661	}
	7662
	7663	static void
	7664	restore_pos(pTHX_ void *arg)
	7665	{
	7666	dVAR;
	7667	regexp * const rex = (regexp *)arg;
	7668	if (PL_reg_state.re_state_eval_setup_done) {
	7669	if (PL_reg_oldsaved) {
	7670	rex->subbeg = PL_reg_oldsaved;
	7671	rex->sublen = PL_reg_oldsavedlen;
	7672	rex->suboffset = PL_reg_oldsavedoffset;
	7673	rex->subcoffset = PL_reg_oldsavedcoffset;
	7674	#ifdef PERL_ANY_COW
	7675	rex->saved_copy = PL_nrs;
	7676	#endif
	7677	RXp_MATCH_COPIED_on(rex);
	7678	}
	7679	PL_reg_magic->mg_len = PL_reg_oldpos;
	7680	PL_reg_state.re_state_eval_setup_done = FALSE;
	7681	PL_curpm = PL_reg_oldcurpm;
	7682	}
	7683	}
	7684
	7685	STATIC void
	7686	S_to_utf8_substr(pTHX_ regexp *prog)
	7687	{
	7688	/* Converts substr fields in prog from bytes to UTF-8, calling fbm_compile
	7689	* on the converted value */
	7690
	7691	int i = 1;
	7692
	7693	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	7694
	7695	do {
	7696	if (prog->substrs->data[i].substr
	7697	&& !prog->substrs->data[i].utf8_substr) {
	7698	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	7699	prog->substrs->data[i].utf8_substr = sv;
	7700	sv_utf8_upgrade(sv);
	7701	if (SvVALID(prog->substrs->data[i].substr)) {
	7702	if (SvTAIL(prog->substrs->data[i].substr)) {
	7703	/* Trim the trailing \n that fbm_compile added last
	7704	time. */
	7705	SvCUR_set(sv, SvCUR(sv) - 1);
	7706	/* Whilst this makes the SV technically "invalid" (as its
	7707	buffer is no longer followed by "\0") when fbm_compile()
	7708	adds the "\n" back, a "\0" is restored. */
	7709	fbm_compile(sv, FBMcf_TAIL);
	7710	} else
	7711	fbm_compile(sv, 0);
	7712	}
	7713	if (prog->substrs->data[i].substr == prog->check_substr)
	7714	prog->check_utf8 = sv;
	7715	}
	7716	} while (i--);
	7717	}
	7718
	7719	STATIC bool
	7720	S_to_byte_substr(pTHX_ regexp *prog)
	7721	{
	7722	/* Converts substr fields in prog from UTF-8 to bytes, calling fbm_compile
	7723	* on the converted value; returns FALSE if can't be converted. */
	7724
	7725	dVAR;
	7726	int i = 1;
	7727
	7728	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	7729
	7730	do {
	7731	if (prog->substrs->data[i].utf8_substr
	7732	&& !prog->substrs->data[i].substr) {
	7733	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	7734	if (! sv_utf8_downgrade(sv, TRUE)) {
	7735	return FALSE;
	7736	}
	7737	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	7738	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	7739	/* Trim the trailing \n that fbm_compile added last
	7740	time. */
	7741	SvCUR_set(sv, SvCUR(sv) - 1);
	7742	fbm_compile(sv, FBMcf_TAIL);
	7743	} else
	7744	fbm_compile(sv, 0);
	7745	}
	7746	prog->substrs->data[i].substr = sv;
	7747	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	7748	prog->check_substr = sv;
	7749	}
	7750	} while (i--);
	7751
	7752	return TRUE;
	7753	}
	7754
	7755	/*
	7756	* Local variables:
	7757	* c-indentation-style: bsd
	7758	* c-basic-offset: 4
	7759	* indent-tabs-mode: nil
	7760	* End:
	7761	*
	7762	* ex: set ts=8 sts=4 sw=4 et:
	7763	*/