perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* "One Ring to rule them all, One Ring to find them..."
	6	*/
	7
	8	/* This file contains functions for executing a regular expression. See
	9	* also regcomp.c which funnily enough, contains functions for compiling
	10	* a regular expression.
	11	*
	12	* This file is also copied at build time to ext/re/re_exec.c, where
	13	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	14	* This causes the main functions to be compiled under new names and with
	15	* debugging support added, which makes "use re 'debug'" work.
	16	*/
	17
	18	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	19	* confused with the original package (see point 3 below). Thanks, Henry!
	20	*/
	21
	22	/* Additional note: this code is very heavily munged from Henry's version
	23	* in places. In some spots I've traded clarity for efficiency, so don't
	24	* blame Henry for some of the lack of readability.
	25	*/
	26
	27	/* The names of the functions have been changed from regcomp and
	28	* regexec to pregcomp and pregexec in order to avoid conflicts
	29	* with the POSIX routines of the same names.
	30	*/
	31
	32	#ifdef PERL_EXT_RE_BUILD
	33	#include "re_top.h"
	34	#endif
	35
	36	/*
	37	* pregcomp and pregexec -- regsub and regerror are not used in perl
	38	*
	39	* Copyright (c) 1986 by University of Toronto.
	40	* Written by Henry Spencer. Not derived from licensed software.
	41	*
	42	* Permission is granted to anyone to use this software for any
	43	* purpose on any computer system, and to redistribute it freely,
	44	* subject to the following restrictions:
	45	*
	46	* 1. The author is not responsible for the consequences of use of
	47	* this software, no matter how awful, even if they arise
	48	* from defects in it.
	49	*
	50	* 2. The origin of this software must not be misrepresented, either
	51	* by explicit claim or by omission.
	52	*
	53	* 3. Altered versions must be plainly marked as such, and must not
	54	* be misrepresented as being the original software.
	55	*
	56	**** Alterations to Henry's code are...
	57	****
	58	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	59	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
	60	****
	61	**** You may distribute under the terms of either the GNU General Public
	62	**** License or the Artistic License, as specified in the README file.
	63	*
	64	* Beware that some of this code is subtly aware of the way operator
	65	* precedence is structured in regular expressions. Serious changes in
	66	* regular-expression syntax might require a total rethink.
	67	*/
	68	#include "EXTERN.h"
	69	#define PERL_IN_REGEXEC_C
	70	#include "perl.h"
	71
	72	#ifdef PERL_IN_XSUB_RE
	73	# include "re_comp.h"
	74	#else
	75	# include "regcomp.h"
	76	#endif
	77
	78	#define RF_tainted 1 /* tainted information used? */
	79	#define RF_warned 2 /* warned about big count? */
	80
	81	#define RF_utf8 8 /* Pattern contains multibyte chars? */
	82
	83	#define UTF ((PL_reg_flags & RF_utf8) != 0)
	84
	85	#define RS_init 1 /* eval environment created */
	86	#define RS_set 2 /* replsv value is set */
	87
	88	#ifndef STATIC
	89	#define STATIC static
	90	#endif
	91
	92	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
	93
	94	/*
	95	* Forwards.
	96	*/
	97
	98	#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
	99	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	100
	101	#define HOPc(pos,off) \
	102	(char *)(PL_reg_match_utf8 \
	103	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	104	: (U8*)(pos + off))
	105	#define HOPBACKc(pos, off) \
	106	(char*)(PL_reg_match_utf8\
	107	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	108	: (pos - off >= PL_bostr) \
	109	? (U8*)pos - off \
	110	: NULL)
	111
	112	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	113	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	114
	115	#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
	116	if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
	117	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
	118	#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
	119	#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
	120	#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
	121
	122	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	123
	124	/* for use after a quantifier and before an EXACT-like node -- japhy */
	125	/* it would be nice to rework regcomp.sym to generate this stuff. sigh */
	126	#define JUMPABLE(rn) ( \
	127	OP(rn) == OPEN \|\| \
	128	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	129	OP(rn) == EVAL \|\| \
	130	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	131	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	132	OP(rn) == KEEPS \|\| (PL_regkind[OP(rn)] == VERB) \|\| \
	133	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	134	)
	135	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	136
	137	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	138
	139	#if 0
	140	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	141	we don't need this definition. */
	142	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	143	#define IS_TEXTF(rn) ( OP(rn)==EXACTF \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	144	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	145
	146	#else
	147	/* ... so we use this as its faster. */
	148	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	149	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	150	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	151
	152	#endif
	153
	154	/*
	155	Search for mandatory following text node; for lookahead, the text must
	156	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	157	*/
	158	#define FIND_NEXT_IMPT(rn) STMT_START { \
	159	while (JUMPABLE(rn)) { \
	160	const OPCODE type = OP(rn); \
	161	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	162	rn = NEXTOPER(NEXTOPER(rn)); \
	163	else if (type == PLUS) \
	164	rn = NEXTOPER(rn); \
	165	else if (type == IFMATCH) \
	166	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	167	else rn += NEXT_OFF(rn); \
	168	} \
	169	} STMT_END
	170
	171
	172	static void restore_pos(pTHX_ void *arg);
	173
	174	STATIC CHECKPOINT
	175	S_regcppush(pTHX_ I32 parenfloor)
	176	{
	177	dVAR;
	178	const int retval = PL_savestack_ix;
	179	#define REGCP_PAREN_ELEMS 4
	180	const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
	181	int p;
	182	GET_RE_DEBUG_FLAGS_DECL;
	183
	184	if (paren_elems_to_push < 0)
	185	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
	186
	187	#define REGCP_OTHER_ELEMS 7
	188	SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
	189
	190	for (p = PL_regsize; p > parenfloor; p--) {
	191	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	192	SSPUSHINT(PL_regoffs[p].end);
	193	SSPUSHINT(PL_regoffs[p].start);
	194	SSPUSHPTR(PL_reg_start_tmp[p]);
	195	SSPUSHINT(p);
	196	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	197	" saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
	198	(UV)p, (IV)PL_regoffs[p].start,
	199	(IV)(PL_reg_start_tmp[p] - PL_bostr),
	200	(IV)PL_regoffs[p].end
	201	));
	202	}
	203	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	204	SSPUSHPTR(PL_regoffs);
	205	SSPUSHINT(PL_regsize);
	206	SSPUSHINT(*PL_reglastparen);
	207	SSPUSHINT(*PL_reglastcloseparen);
	208	SSPUSHPTR(PL_reginput);
	209	#define REGCP_FRAME_ELEMS 2
	210	/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
	211	* are needed for the regexp context stack bookkeeping. */
	212	SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
	213	SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
	214
	215	return retval;
	216	}
	217
	218	/* These are needed since we do not localize EVAL nodes: */
	219	#define REGCP_SET(cp) \
	220	DEBUG_STATE_r( \
	221	PerlIO_printf(Perl_debug_log, \
	222	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	223	(IV)PL_savestack_ix)); \
	224	cp = PL_savestack_ix
	225
	226	#define REGCP_UNWIND(cp) \
	227	DEBUG_STATE_r( \
	228	if (cp != PL_savestack_ix) \
	229	PerlIO_printf(Perl_debug_log, \
	230	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	231	(IV)(cp), (IV)PL_savestack_ix)); \
	232	regcpblow(cp)
	233
	234	STATIC char *
	235	S_regcppop(pTHX_ const regexp *rex)
	236	{
	237	dVAR;
	238	U32 i;
	239	char *input;
	240
	241	GET_RE_DEBUG_FLAGS_DECL;
	242
	243	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	244	i = SSPOPINT;
	245	assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	246	i = SSPOPINT; /* Parentheses elements to pop. */
	247	input = (char *) SSPOPPTR;
	248	*PL_reglastcloseparen = SSPOPINT;
	249	*PL_reglastparen = SSPOPINT;
	250	PL_regsize = SSPOPINT;
	251	PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
	252
	253
	254	/* Now restore the parentheses context. */
	255	for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
	256	i > 0; i -= REGCP_PAREN_ELEMS) {
	257	I32 tmps;
	258	U32 paren = (U32)SSPOPINT;
	259	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
	260	PL_regoffs[paren].start = SSPOPINT;
	261	tmps = SSPOPINT;
	262	if (paren <= *PL_reglastparen)
	263	PL_regoffs[paren].end = tmps;
	264	DEBUG_BUFFERS_r(
	265	PerlIO_printf(Perl_debug_log,
	266	" restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	267	(UV)paren, (IV)PL_regoffs[paren].start,
	268	(IV)(PL_reg_start_tmp[paren] - PL_bostr),
	269	(IV)PL_regoffs[paren].end,
	270	(paren > *PL_reglastparen ? "(no)" : ""));
	271	);
	272	}
	273	DEBUG_BUFFERS_r(
	274	if (*PL_reglastparen + 1 <= rex->nparens) {
	275	PerlIO_printf(Perl_debug_log,
	276	" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
	277	(IV)(*PL_reglastparen + 1), (IV)rex->nparens);
	278	}
	279	);
	280	#if 1
	281	/* It would seem that the similar code in regtry()
	282	* already takes care of this, and in fact it is in
	283	* a better location to since this code can #if 0-ed out
	284	* but the code in regtry() is needed or otherwise tests
	285	* requiring null fields (pat.t#187 and split.t#{13,14}
	286	* (as of patchlevel 7877) will fail. Then again,
	287	* this code seems to be necessary or otherwise
	288	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	289	* --jhi updated by dapm */
	290	for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
	291	if (i > PL_regsize)
	292	PL_regoffs[i].start = -1;
	293	PL_regoffs[i].end = -1;
	294	}
	295	#endif
	296	return input;
	297	}
	298
	299	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	300
	301	/*
	302	* pregexec and friends
	303	*/
	304
	305	#ifndef PERL_IN_XSUB_RE
	306	/*
	307	- pregexec - match a regexp against a string
	308	*/
	309	I32
	310	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
	311	char strbeg, I32 minend, SV screamer, U32 nosave)
	312	/* strend: pointer to null at end of string */
	313	/* strbeg: real beginning of string */
	314	/* minend: end of match must be >=minend after stringarg. */
	315	/* nosave: For optimizations. */
	316	{
	317	return
	318	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	319	nosave ? 0 : REXEC_COPY_STR);
	320	}
	321	#endif
	322
	323	/*
	324	* Need to implement the following flags for reg_anch:
	325	*
	326	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	327	* USE_INTUIT_ML
	328	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	329	* INTUIT_AUTORITATIVE_ML
	330	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	331	* INTUIT_ONCE_ML
	332	*
	333	* Another flag for this function: SECOND_TIME (so that float substrs
	334	* with giant delta may be not rechecked).
	335	*/
	336
	337	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	338
	339	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	340	Otherwise, only SvCUR(sv) is used to get strbeg. */
	341
	342	/* XXXX We assume that strpos is strbeg unless sv. */
	343
	344	/* XXXX Some places assume that there is a fixed substring.
	345	An update may be needed if optimizer marks as "INTUITable"
	346	RExen without fixed substrings. Similarly, it is assumed that
	347	lengths of all the strings are no more than minlen, thus they
	348	cannot come from lookahead.
	349	(Or minlen should take into account lookahead.)
	350	NOTE: Some of this comment is not correct. minlen does now take account
	351	of lookahead/behind. Further research is required. -- demerphq
	352
	353	*/
	354
	355	/* A failure to find a constant substring means that there is no need to make
	356	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	357	finding a substring too deep into the string means that less calls to
	358	regtry() should be needed.
	359
	360	REx compiler's optimizer found 4 possible hints:
	361	a) Anchored substring;
	362	b) Fixed substring;
	363	c) Whether we are anchored (beginning-of-line or \G);
	364	d) First node (of those at offset 0) which may distingush positions;
	365	We use a)b)d) and multiline-part of c), and try to find a position in the
	366	string which does not contradict any of them.
	367	*/
	368
	369	/* Most of decisions we do here should have been done at compile time.
	370	The nodes of the REx which we used for the search should have been
	371	deleted from the finite automaton. */
	372
	373	char *
	374	Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV sv, char strpos,
	375	char strend, const U32 flags, re_scream_pos_data data)
	376	{
	377	dVAR;
	378	register I32 start_shift = 0;
	379	/* Should be nonnegative! */
	380	register I32 end_shift = 0;
	381	register char *s;
	382	register SV *check;
	383	char *strbeg;
	384	char *t;
	385	const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	386	I32 ml_anch;
	387	register char other_last = NULL; / other substr checked before this */
	388	char check_at = NULL; / check substr found at this pos */
	389	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	390	RXi_GET_DECL(prog,progi);
	391	#ifdef DEBUGGING
	392	const char * const i_strpos = strpos;
	393	#endif
	394
	395	GET_RE_DEBUG_FLAGS_DECL;
	396
	397	RX_MATCH_UTF8_set(prog,do_utf8);
	398
	399	if (prog->extflags & RXf_UTF8) {
	400	PL_reg_flags \|= RF_utf8;
	401	}
	402	DEBUG_EXECUTE_r(
	403	debug_start_match(prog, do_utf8, strpos, strend,
	404	sv ? "Guessing start of match in sv for"
	405	: "Guessing start of match in string for");
	406	);
	407
	408	/* CHR_DIST() would be more correct here but it makes things slow. */
	409	if (prog->minlen > strend - strpos) {
	410	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	411	"String too short... [re_intuit_start]\n"));
	412	goto fail;
	413	}
	414
	415	strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
	416	PL_regeol = strend;
	417	if (do_utf8) {
	418	if (!prog->check_utf8 && prog->check_substr)
	419	to_utf8_substr(prog);
	420	check = prog->check_utf8;
	421	} else {
	422	if (!prog->check_substr && prog->check_utf8)
	423	to_byte_substr(prog);
	424	check = prog->check_substr;
	425	}
	426	if (check == &PL_sv_undef) {
	427	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	428	"Non-utf8 string cannot match utf8 check string\n"));
	429	goto fail;
	430	}
	431	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	432	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	433	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	434	&& !multiline ) ); /* Check after \n? */
	435
	436	if (!ml_anch) {
	437	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	438	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	439	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	440	&& sv && !SvROK(sv)
	441	&& (strpos != strbeg)) {
	442	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	443	goto fail;
	444	}
	445	if (prog->check_offset_min == prog->check_offset_max &&
	446	!(prog->extflags & RXf_CANY_SEEN)) {
	447	/* Substring at constant offset from beg-of-str... */
	448	I32 slen;
	449
	450	s = HOP3c(strpos, prog->check_offset_min, strend);
	451
	452	if (SvTAIL(check)) {
	453	slen = SvCUR(check); /* >= 1 */
	454
	455	if ( strend - s > slen \|\| strend - s < slen - 1
	456	\|\| (strend - s == slen && strend[-1] != '\n')) {
	457	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	458	goto fail_finish;
	459	}
	460	/* Now should match s[0..slen-2] */
	461	slen--;
	462	if (slen && (SvPVX_const(check) != s
	463	\|\| (slen > 1
	464	&& memNE(SvPVX_const(check), s, slen)))) {
	465	report_neq:
	466	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	467	goto fail_finish;
	468	}
	469	}
	470	else if (SvPVX_const(check) != s
	471	\|\| ((slen = SvCUR(check)) > 1
	472	&& memNE(SvPVX_const(check), s, slen)))
	473	goto report_neq;
	474	check_at = s;
	475	goto success_at_start;
	476	}
	477	}
	478	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	479	s = strpos;
	480	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	481	end_shift = prog->check_end_shift;
	482
	483	if (!ml_anch) {
	484	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	485	- (SvTAIL(check) != 0);
	486	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	487
	488	if (end_shift < eshift)
	489	end_shift = eshift;
	490	}
	491	}
	492	else { /* Can match at random position */
	493	ml_anch = 0;
	494	s = strpos;
	495	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	496	end_shift = prog->check_end_shift;
	497
	498	/* end shift should be non negative here */
	499	}
	500
	501	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	502	if (end_shift < 0)
	503	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	504	(IV)end_shift, prog->precomp);
	505	#endif
	506
	507	restart:
	508	/* Find a possible match in the region s..strend by looking for
	509	the "check" substring in the region corrected by start/end_shift. */
	510
	511	{
	512	I32 srch_start_shift = start_shift;
	513	I32 srch_end_shift = end_shift;
	514	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	515	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	516	srch_start_shift = strbeg - s;
	517	}
	518	DEBUG_OPTIMISE_MORE_r({
	519	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	520	(IV)prog->check_offset_min,
	521	(IV)srch_start_shift,
	522	(IV)srch_end_shift,
	523	(IV)prog->check_end_shift);
	524	});
	525
	526	if (flags & REXEC_SCREAM) {
	527	I32 p = -1; /* Internal iterator of scream. */
	528	I32 * const pp = data ? data->scream_pos : &p;
	529
	530	if (PL_screamfirst[BmRARE(check)] >= 0
	531	\|\| ( BmRARE(check) == '\n'
	532	&& (BmPREVIOUS(check) == SvCUR(check) - 1)
	533	&& SvTAIL(check) ))
	534	s = screaminstr(sv, check,
	535	srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
	536	else
	537	goto fail_finish;
	538	/* we may be pointing at the wrong string */
	539	if (s && RX_MATCH_COPIED(prog))
	540	s = strbeg + (s - SvPVX_const(sv));
	541	if (data)
	542	*data->scream_olds = s;
	543	}
	544	else {
	545	U8* start_point;
	546	U8* end_point;
	547	if (prog->extflags & RXf_CANY_SEEN) {
	548	start_point= (U8*)(s + srch_start_shift);
	549	end_point= (U8*)(strend - srch_end_shift);
	550	} else {
	551	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	552	end_point= HOP3(strend, -srch_end_shift, strbeg);
	553	}
	554	DEBUG_OPTIMISE_MORE_r({
	555	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	556	(int)(end_point - start_point),
	557	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	558	start_point);
	559	});
	560
	561	s = fbm_instr( start_point, end_point,
	562	check, multiline ? FBMrf_MULTILINE : 0);
	563	}
	564	}
	565	/* Update the count-of-usability, remove useless subpatterns,
	566	unshift s. */
	567
	568	DEBUG_EXECUTE_r({
	569	RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
	570	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	571	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	572	(s ? "Found" : "Did not find"),
	573	(check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
	574	? "anchored" : "floating"),
	575	quoted,
	576	RE_SV_TAIL(check),
	577	(s ? " at offset " : "...\n") );
	578	});
	579
	580	if (!s)
	581	goto fail_finish;
	582	/* Finish the diagnostic message */
	583	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	584
	585	/* XXX dmq: first branch is for positive lookbehind...
	586	Our check string is offset from the beginning of the pattern.
	587	So we need to do any stclass tests offset forward from that
	588	point. I think. :-(
	589	*/
	590
	591
	592
	593	check_at=s;
	594
	595
	596	/* Got a candidate. Check MBOL anchoring, and the other substr.
	597	Start with the other substr.
	598	XXXX no SCREAM optimization yet - and a very coarse implementation
	599	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	600	always match. Probably should be marked during compile...
	601	Probably it is right to do no SCREAM here...
	602	*/
	603
	604	if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
	605	: (prog->float_substr && prog->anchored_substr))
	606	{
	607	/* Take into account the "other" substring. */
	608	/* XXXX May be hopelessly wrong for UTF... */
	609	if (!other_last)
	610	other_last = strpos;
	611	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
	612	do_other_anchored:
	613	{
	614	char * const last = HOP3c(s, -start_shift, strbeg);
	615	char last1, last2;
	616	char * const saved_s = s;
	617	SV* must;
	618
	619	t = s - prog->check_offset_max;
	620	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	621	&& (!do_utf8
	622	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	623	&& t > strpos)))
	624	NOOP;
	625	else
	626	t = strpos;
	627	t = HOP3c(t, prog->anchored_offset, strend);
	628	if (t < other_last) /* These positions already checked */
	629	t = other_last;
	630	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	631	if (last < last1)
	632	last1 = last;
	633	/* XXXX It is not documented what units *_offsets are in.
	634	We assume bytes, but this is clearly wrong.
	635	Meaning this code needs to be carefully reviewed for errors.
	636	dmq.
	637	*/
	638
	639	/* On end-of-str: see comment below. */
	640	must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
	641	if (must == &PL_sv_undef) {
	642	s = (char*)NULL;
	643	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	644	}
	645	else
	646	s = fbm_instr(
	647	(unsigned char*)t,
	648	HOP3(HOP3(last1, prog->anchored_offset, strend)
	649	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	650	must,
	651	multiline ? FBMrf_MULTILINE : 0
	652	);
	653	DEBUG_EXECUTE_r({
	654	RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
	655	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	656	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	657	(s ? "Found" : "Contradicts"),
	658	quoted, RE_SV_TAIL(must));
	659	});
	660
	661
	662	if (!s) {
	663	if (last1 >= last2) {
	664	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	665	", giving up...\n"));
	666	goto fail_finish;
	667	}
	668	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	669	", trying floating at offset %ld...\n",
	670	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	671	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	672	s = HOP3c(last, 1, strend);
	673	goto restart;
	674	}
	675	else {
	676	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	677	(long)(s - i_strpos)));
	678	t = HOP3c(s, -prog->anchored_offset, strbeg);
	679	other_last = HOP3c(s, 1, strend);
	680	s = saved_s;
	681	if (t == strpos)
	682	goto try_at_start;
	683	goto try_at_offset;
	684	}
	685	}
	686	}
	687	else { /* Take into account the floating substring. */
	688	char last, last1;
	689	char * const saved_s = s;
	690	SV* must;
	691
	692	t = HOP3c(s, -start_shift, strbeg);
	693	last1 = last =
	694	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	695	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	696	last = HOP3c(t, prog->float_max_offset, strend);
	697	s = HOP3c(t, prog->float_min_offset, strend);
	698	if (s < other_last)
	699	s = other_last;
	700	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	701	must = do_utf8 ? prog->float_utf8 : prog->float_substr;
	702	/* fbm_instr() takes into account exact value of end-of-str
	703	if the check is SvTAIL(ed). Since false positives are OK,
	704	and end-of-str is not later than strend we are OK. */
	705	if (must == &PL_sv_undef) {
	706	s = (char*)NULL;
	707	DEBUG_r(must = prog->float_utf8); /* for debug message */
	708	}
	709	else
	710	s = fbm_instr((unsigned char*)s,
	711	(unsigned char*)last + SvCUR(must)
	712	- (SvTAIL(must)!=0),
	713	must, multiline ? FBMrf_MULTILINE : 0);
	714	DEBUG_EXECUTE_r({
	715	RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
	716	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	717	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	718	(s ? "Found" : "Contradicts"),
	719	quoted, RE_SV_TAIL(must));
	720	});
	721	if (!s) {
	722	if (last1 == last) {
	723	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	724	", giving up...\n"));
	725	goto fail_finish;
	726	}
	727	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	728	", trying anchored starting at offset %ld...\n",
	729	(long)(saved_s + 1 - i_strpos)));
	730	other_last = last;
	731	s = HOP3c(t, 1, strend);
	732	goto restart;
	733	}
	734	else {
	735	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	736	(long)(s - i_strpos)));
	737	other_last = s; /* Fix this later. --Hugo */
	738	s = saved_s;
	739	if (t == strpos)
	740	goto try_at_start;
	741	goto try_at_offset;
	742	}
	743	}
	744	}
	745
	746
	747	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	748
	749	DEBUG_OPTIMISE_MORE_r(
	750	PerlIO_printf(Perl_debug_log,
	751	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	752	(IV)prog->check_offset_min,
	753	(IV)prog->check_offset_max,
	754	(IV)(s-strpos),
	755	(IV)(t-strpos),
	756	(IV)(t-s),
	757	(IV)(strend-strpos)
	758	)
	759	);
	760
	761	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	762	&& (!do_utf8
	763	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	764	&& t > strpos)))
	765	{
	766	/* Fixed substring is found far enough so that the match
	767	cannot start at strpos. */
	768	try_at_offset:
	769	if (ml_anch && t[-1] != '\n') {
	770	/* Eventually fbm_*() should handle this, but often
	771	anchored_offset is not 0, so this check will not be wasted. */
	772	/* XXXX In the code below we prefer to look for "^" even in
	773	presence of anchored substrings. And we search even
	774	beyond the found float position. These pessimizations
	775	are historical artefacts only. */
	776	find_anchor:
	777	while (t < strend - prog->minlen) {
	778	if (*t == '\n') {
	779	if (t < check_at - prog->check_offset_min) {
	780	if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
	781	/* Since we moved from the found position,
	782	we definitely contradict the found anchored
	783	substr. Due to the above check we do not
	784	contradict "check" substr.
	785	Thus we can arrive here only if check substr
	786	is float. Redo checking for "other"=="fixed".
	787	*/
	788	strpos = t + 1;
	789	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	790	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	791	goto do_other_anchored;
	792	}
	793	/* We don't contradict the found floating substring. */
	794	/* XXXX Why not check for STCLASS? */
	795	s = t + 1;
	796	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	797	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	798	goto set_useful;
	799	}
	800	/* Position contradicts check-string */
	801	/* XXXX probably better to look for check-string
	802	than for "\n", so one should lower the limit for t? */
	803	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	804	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	805	other_last = strpos = s = t + 1;
	806	goto restart;
	807	}
	808	t++;
	809	}
	810	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	811	PL_colors[0], PL_colors[1]));
	812	goto fail_finish;
	813	}
	814	else {
	815	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	816	PL_colors[0], PL_colors[1]));
	817	}
	818	s = t;
	819	set_useful:
	820	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	821	}
	822	else {
	823	/* The found string does not prohibit matching at strpos,
	824	- no optimization of calling REx engine can be performed,
	825	unless it was an MBOL and we are not after MBOL,
	826	or a future STCLASS check will fail this. */
	827	try_at_start:
	828	/* Even in this situation we may use MBOL flag if strpos is offset
	829	wrt the start of the string. */
	830	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	831	&& (strpos != strbeg) && strpos[-1] != '\n'
	832	/* May be due to an implicit anchor of m{.foo} /
	833	&& !(prog->intflags & PREGf_IMPLICIT))
	834	{
	835	t = strpos;
	836	goto find_anchor;
	837	}
	838	DEBUG_EXECUTE_r( if (ml_anch)
	839	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	840	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	841	);
	842	success_at_start:
	843	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	844	&& (do_utf8 ? (
	845	prog->check_utf8 /* Could be deleted already */
	846	&& --BmUSEFUL(prog->check_utf8) < 0
	847	&& (prog->check_utf8 == prog->float_utf8)
	848	) : (
	849	prog->check_substr /* Could be deleted already */
	850	&& --BmUSEFUL(prog->check_substr) < 0
	851	&& (prog->check_substr == prog->float_substr)
	852	)))
	853	{
	854	/* If flags & SOMETHING - do not do it many times on the same match */
	855	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	856	SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
	857	if (do_utf8 ? prog->check_substr : prog->check_utf8)
	858	SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
	859	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	860	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	861	check = NULL; /* abort */
	862	s = strpos;
	863	/* XXXX This is a remnant of the old implementation. It
	864	looks wasteful, since now INTUIT can use many
	865	other heuristics. */
	866	prog->extflags &= ~RXf_USE_INTUIT;
	867	}
	868	else
	869	s = strpos;
	870	}
	871
	872	/* Last resort... */
	873	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	874	/* trie stclasses are too expensive to use here, we are better off to
	875	leave it to regmatch itself */
	876	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	877	/* minlen == 0 is possible if regstclass is \b or \B,
	878	and the fixed substr is ''$.
	879	Since minlen is already taken into account, s+1 is before strend;
	880	accidentally, minlen >= 1 guaranties no false positives at s + 1
	881	even for \b or \B. But (minlen? 1 : 0) below assumes that
	882	regstclass does not come from lookahead... */
	883	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	884	This leaves EXACTF only, which is dealt with in find_byclass(). */
	885	const U8* const str = (U8*)STRING(progi->regstclass);
	886	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	887	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	888	: 1);
	889	char * endpos;
	890	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	891	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	892	else if (prog->float_substr \|\| prog->float_utf8)
	893	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	894	else
	895	endpos= strend;
	896
	897	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
	898	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
	899
	900	t = s;
	901	s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
	902	if (!s) {
	903	#ifdef DEBUGGING
	904	const char *what = NULL;
	905	#endif
	906	if (endpos == strend) {
	907	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	908	"Could not match STCLASS...\n") );
	909	goto fail;
	910	}
	911	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	912	"This position contradicts STCLASS...\n") );
	913	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	914	goto fail;
	915	/* Contradict one of substrings */
	916	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	917	if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	918	DEBUG_EXECUTE_r( what = "anchored" );
	919	hop_and_restart:
	920	s = HOP3c(t, 1, strend);
	921	if (s + start_shift + end_shift > strend) {
	922	/* XXXX Should be taken into account earlier? */
	923	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	924	"Could not match STCLASS...\n") );
	925	goto fail;
	926	}
	927	if (!check)
	928	goto giveup;
	929	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	930	"Looking for %s substr starting at offset %ld...\n",
	931	what, (long)(s + start_shift - i_strpos)) );
	932	goto restart;
	933	}
	934	/* Have both, check_string is floating */
	935	if (t + start_shift >= check_at) /* Contradicts floating=check */
	936	goto retry_floating_check;
	937	/* Recheck anchored substring, but not floating... */
	938	s = check_at;
	939	if (!check)
	940	goto giveup;
	941	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	942	"Looking for anchored substr starting at offset %ld...\n",
	943	(long)(other_last - i_strpos)) );
	944	goto do_other_anchored;
	945	}
	946	/* Another way we could have checked stclass at the
	947	current position only: */
	948	if (ml_anch) {
	949	s = t = t + 1;
	950	if (!check)
	951	goto giveup;
	952	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	953	"Looking for /%s^%s/m starting at offset %ld...\n",
	954	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	955	goto try_at_offset;
	956	}
	957	if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	958	goto fail;
	959	/* Check is floating subtring. */
	960	retry_floating_check:
	961	t = check_at - start_shift;
	962	DEBUG_EXECUTE_r( what = "floating" );
	963	goto hop_and_restart;
	964	}
	965	if (t != s) {
	966	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	967	"By STCLASS: moving %ld --> %ld\n",
	968	(long)(t - i_strpos), (long)(s - i_strpos))
	969	);
	970	}
	971	else {
	972	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	973	"Does not contradict STCLASS...\n");
	974	);
	975	}
	976	}
	977	giveup:
	978	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	979	PL_colors[4], (check ? "Guessed" : "Giving up"),
	980	PL_colors[5], (long)(s - i_strpos)) );
	981	return s;
	982
	983	fail_finish: /* Substring not found */
	984	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	985	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	986	fail:
	987	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	988	PL_colors[4], PL_colors[5]));
	989	return NULL;
	990	}
	991
	992	#define DECL_TRIE_TYPE(scan) \
	993	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	994	trie_type = (scan->flags != EXACT) \
	995	? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
	996	: (do_utf8 ? trie_utf8 : trie_plain)
	997
	998	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
	999	uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
	1000	switch (trie_type) { \
	1001	case trie_utf8_fold: \
	1002	if ( foldlen>0 ) { \
	1003	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1004	foldlen -= len; \
	1005	uscan += len; \
	1006	len=0; \
	1007	} else { \
	1008	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1009	uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
	1010	foldlen -= UNISKIP( uvc ); \
	1011	uscan = foldbuf + UNISKIP( uvc ); \
	1012	} \
	1013	break; \
	1014	case trie_latin_utf8_fold: \
	1015	if ( foldlen>0 ) { \
	1016	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1017	foldlen -= len; \
	1018	uscan += len; \
	1019	len=0; \
	1020	} else { \
	1021	len = 1; \
	1022	uvc = to_uni_fold( (U8)uc, foldbuf, &foldlen ); \
	1023	foldlen -= UNISKIP( uvc ); \
	1024	uscan = foldbuf + UNISKIP( uvc ); \
	1025	} \
	1026	break; \
	1027	case trie_utf8: \
	1028	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1029	break; \
	1030	case trie_plain: \
	1031	uvc = (UV)*uc; \
	1032	len = 1; \
	1033	} \
	1034	\
	1035	if (uvc < 256) { \
	1036	charid = trie->charmap[ uvc ]; \
	1037	} \
	1038	else { \
	1039	charid = 0; \
	1040	if (widecharmap) { \
	1041	SV** const svpp = hv_fetch(widecharmap, \
	1042	(char*)&uvc, sizeof(UV), 0); \
	1043	if (svpp) \
	1044	charid = (U16)SvIV(*svpp); \
	1045	} \
	1046	} \
	1047	} STMT_END
	1048
	1049	#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
	1050	{ \
	1051	char my_strend= (char )strend; \
	1052	if ( (CoNd) \
	1053	&& (ln == len \|\| \
	1054	!ibcmp_utf8(s, &my_strend, 0, do_utf8, \
	1055	m, NULL, ln, (bool)UTF)) \
	1056	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1057	goto got_it; \
	1058	else { \
	1059	U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
	1060	uvchr_to_utf8(tmpbuf, c); \
	1061	f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
	1062	if ( f != c \
	1063	&& (f == c1 \|\| f == c2) \
	1064	&& (ln == len \|\| \
	1065	!ibcmp_utf8(s, &my_strend, 0, do_utf8,\
	1066	m, NULL, ln, (bool)UTF)) \
	1067	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1068	goto got_it; \
	1069	} \
	1070	} \
	1071	s += len
	1072
	1073	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1074	STMT_START { \
	1075	while (s <= e) { \
	1076	if ( (CoNd) \
	1077	&& (ln == 1 \|\| !(OP(c) == EXACTF \
	1078	? ibcmp(s, m, ln) \
	1079	: ibcmp_locale(s, m, ln))) \
	1080	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1081	goto got_it; \
	1082	s++; \
	1083	} \
	1084	} STMT_END
	1085
	1086	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1087	STMT_START { \
	1088	while (s + (uskip = UTF8SKIP(s)) <= strend) { \
	1089	CoDe \
	1090	s += uskip; \
	1091	} \
	1092	} STMT_END
	1093
	1094	#define REXEC_FBC_SCAN(CoDe) \
	1095	STMT_START { \
	1096	while (s < strend) { \
	1097	CoDe \
	1098	s++; \
	1099	} \
	1100	} STMT_END
	1101
	1102	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1103	REXEC_FBC_UTF8_SCAN( \
	1104	if (CoNd) { \
	1105	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1106	goto got_it; \
	1107	else \
	1108	tmp = doevery; \
	1109	} \
	1110	else \
	1111	tmp = 1; \
	1112	)
	1113
	1114	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1115	REXEC_FBC_SCAN( \
	1116	if (CoNd) { \
	1117	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1118	goto got_it; \
	1119	else \
	1120	tmp = doevery; \
	1121	} \
	1122	else \
	1123	tmp = 1; \
	1124	)
	1125
	1126	#define REXEC_FBC_TRYIT \
	1127	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1128	goto got_it
	1129
	1130	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1131	if (do_utf8) { \
	1132	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1133	} \
	1134	else { \
	1135	REXEC_FBC_CLASS_SCAN(CoNd); \
	1136	} \
	1137	break
	1138
	1139	#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
	1140	if (do_utf8) { \
	1141	UtFpReLoAd; \
	1142	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1143	} \
	1144	else { \
	1145	REXEC_FBC_CLASS_SCAN(CoNd); \
	1146	} \
	1147	break
	1148
	1149	#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
	1150	PL_reg_flags \|= RF_tainted; \
	1151	if (do_utf8) { \
	1152	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1153	} \
	1154	else { \
	1155	REXEC_FBC_CLASS_SCAN(CoNd); \
	1156	} \
	1157	break
	1158
	1159	#define DUMP_EXEC_POS(li,s,doutf8) \
	1160	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1161
	1162	/* We know what class REx starts with. Try to find this position... */
	1163	/* if reginfo is NULL, its a dryrun */
	1164	/* annoyingly all the vars in this routine have different names from their counterparts
	1165	in regmatch. /grrr */
	1166
	1167	STATIC char *
	1168	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1169	const char strend, regmatch_info reginfo)
	1170	{
	1171	dVAR;
	1172	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1173	char *m;
	1174	STRLEN ln;
	1175	STRLEN lnc;
	1176	register STRLEN uskip;
	1177	unsigned int c1;
	1178	unsigned int c2;
	1179	char *e;
	1180	register I32 tmp = 1; /* Scratch variable? */
	1181	register const bool do_utf8 = PL_reg_match_utf8;
	1182	RXi_GET_DECL(prog,progi);
	1183
	1184	/* We know what class it must start with. */
	1185	switch (OP(c)) {
	1186	case ANYOF:
	1187	if (do_utf8) {
	1188	REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) \|\|
	1189	!UTF8_IS_INVARIANT((U8)s[0]) ?
	1190	reginclass(prog, c, (U8*)s, 0, do_utf8) :
	1191	REGINCLASS(prog, c, (U8*)s));
	1192	}
	1193	else {
	1194	while (s < strend) {
	1195	STRLEN skip = 1;
	1196
	1197	if (REGINCLASS(prog, c, (U8*)s) \|\|
	1198	(ANYOF_FOLD_SHARP_S(c, s, strend) &&
	1199	/* The assignment of 2 is intentional:
	1200	* for the folded sharp s, the skip is 2. */
	1201	(skip = SHARP_S_SKIP))) {
	1202	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1203	goto got_it;
	1204	else
	1205	tmp = doevery;
	1206	}
	1207	else
	1208	tmp = 1;
	1209	s += skip;
	1210	}
	1211	}
	1212	break;
	1213	case CANY:
	1214	REXEC_FBC_SCAN(
	1215	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1216	goto got_it;
	1217	else
	1218	tmp = doevery;
	1219	);
	1220	break;
	1221	case EXACTF:
	1222	m = STRING(c);
	1223	ln = STR_LEN(c); /* length to match in octets/bytes */
	1224	lnc = (I32) ln; /* length to match in characters */
	1225	if (UTF) {
	1226	STRLEN ulen1, ulen2;
	1227	U8 sm = (U8 ) m;
	1228	U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
	1229	U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
	1230	/* used by commented-out code below */
	1231	/const U32 uniflags = UTF8_ALLOW_DEFAULT;/
	1232
	1233	/* XXX: Since the node will be case folded at compile
	1234	time this logic is a little odd, although im not
	1235	sure that its actually wrong. --dmq */
	1236
	1237	c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
	1238	c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
	1239
	1240	/* XXX: This is kinda strange. to_utf8_XYZ returns the
	1241	codepoint of the first character in the converted
	1242	form, yet originally we did the extra step.
	1243	No tests fail by commenting this code out however
	1244	so Ive left it out. -- dmq.
	1245
	1246	c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
	1247	0, uniflags);
	1248	c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
	1249	0, uniflags);
	1250	*/
	1251
	1252	lnc = 0;
	1253	while (sm < ((U8 *) m + ln)) {
	1254	lnc++;
	1255	sm += UTF8SKIP(sm);
	1256	}
	1257	}
	1258	else {
	1259	c1 = (U8)m;
	1260	c2 = PL_fold[c1];
	1261	}
	1262	goto do_exactf;
	1263	case EXACTFL:
	1264	m = STRING(c);
	1265	ln = STR_LEN(c);
	1266	lnc = (I32) ln;
	1267	c1 = (U8)m;
	1268	c2 = PL_fold_locale[c1];
	1269	do_exactf:
	1270	e = HOP3c(strend, -((I32)lnc), s);
	1271
	1272	if (!reginfo && e < s)
	1273	e = s; /* Due to minlen logic of intuit() */
	1274
	1275	/* The idea in the EXACTF* cases is to first find the
	1276	* first character of the EXACTF* node and then, if
	1277	* necessary, case-insensitively compare the full
	1278	* text of the node. The c1 and c2 are the first
	1279	* characters (though in Unicode it gets a bit
	1280	* more complicated because there are more cases
	1281	* than just upper and lower: one needs to use
	1282	* the so-called folding case for case-insensitive
	1283	* matching (called "loose matching" in Unicode).
	1284	* ibcmp_utf8() will do just that. */
	1285
	1286	if (do_utf8 \|\| UTF) {
	1287	UV c, f;
	1288	U8 tmpbuf [UTF8_MAXBYTES+1];
	1289	STRLEN len = 1;
	1290	STRLEN foldlen;
	1291	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1292	if (c1 == c2) {
	1293	/* Upper and lower of 1st char are equal -
	1294	* probably not a "letter". */
	1295	while (s <= e) {
	1296	if (do_utf8) {
	1297	c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
	1298	uniflags);
	1299	} else {
	1300	c = ((U8)s);
	1301	}
	1302	REXEC_FBC_EXACTISH_CHECK(c == c1);
	1303	}
	1304	}
	1305	else {
	1306	while (s <= e) {
	1307	if (do_utf8) {
	1308	c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
	1309	uniflags);
	1310	} else {
	1311	c = ((U8)s);
	1312	}
	1313
	1314	/* Handle some of the three Greek sigmas cases.
	1315	* Note that not all the possible combinations
	1316	* are handled here: some of them are handled
	1317	* by the standard folding rules, and some of
	1318	* them (the character class or ANYOF cases)
	1319	* are handled during compiletime in
	1320	* regexec.c:S_regclass(). */
	1321	if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA \|\|
	1322	c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
	1323	c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
	1324
	1325	REXEC_FBC_EXACTISH_CHECK(c == c1 \|\| c == c2);
	1326	}
	1327	}
	1328	}
	1329	else {
	1330	/* Neither pattern nor string are UTF8 */
	1331	if (c1 == c2)
	1332	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1333	else
	1334	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1335	}
	1336	break;
	1337	case BOUNDL:
	1338	PL_reg_flags \|= RF_tainted;
	1339	/* FALL THROUGH */
	1340	case BOUND:
	1341	if (do_utf8) {
	1342	if (s == PL_bostr)
	1343	tmp = '\n';
	1344	else {
	1345	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr);
	1346	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
	1347	}
	1348	tmp = ((OP(c) == BOUND ?
	1349	isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
	1350	LOAD_UTF8_CHARCLASS_ALNUM();
	1351	REXEC_FBC_UTF8_SCAN(
	1352	if (tmp == !(OP(c) == BOUND ?
	1353	(bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
	1354	isALNUM_LC_utf8((U8*)s)))
	1355	{
	1356	tmp = !tmp;
	1357	REXEC_FBC_TRYIT;
	1358	}
	1359	);
	1360	}
	1361	else {
	1362	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
	1363	tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
	1364	REXEC_FBC_SCAN(
	1365	if (tmp ==
	1366	!(OP(c) == BOUND ? isALNUM(s) : isALNUM_LC(s))) {
	1367	tmp = !tmp;
	1368	REXEC_FBC_TRYIT;
	1369	}
	1370	);
	1371	}
	1372	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s)))
	1373	goto got_it;
	1374	break;
	1375	case NBOUNDL:
	1376	PL_reg_flags \|= RF_tainted;
	1377	/* FALL THROUGH */
	1378	case NBOUND:
	1379	if (do_utf8) {
	1380	if (s == PL_bostr)
	1381	tmp = '\n';
	1382	else {
	1383	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr);
	1384	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
	1385	}
	1386	tmp = ((OP(c) == NBOUND ?
	1387	isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
	1388	LOAD_UTF8_CHARCLASS_ALNUM();
	1389	REXEC_FBC_UTF8_SCAN(
	1390	if (tmp == !(OP(c) == NBOUND ?
	1391	(bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
	1392	isALNUM_LC_utf8((U8*)s)))
	1393	tmp = !tmp;
	1394	else REXEC_FBC_TRYIT;
	1395	);
	1396	}
	1397	else {
	1398	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
	1399	tmp = ((OP(c) == NBOUND ?
	1400	isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
	1401	REXEC_FBC_SCAN(
	1402	if (tmp ==
	1403	!(OP(c) == NBOUND ? isALNUM(s) : isALNUM_LC(s)))
	1404	tmp = !tmp;
	1405	else REXEC_FBC_TRYIT;
	1406	);
	1407	}
	1408	if ((!prog->minlen && !tmp) && (!reginfo \|\| regtry(reginfo, &s)))
	1409	goto got_it;
	1410	break;
	1411	case ALNUM:
	1412	REXEC_FBC_CSCAN_PRELOAD(
	1413	LOAD_UTF8_CHARCLASS_ALNUM(),
	1414	swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
	1415	isALNUM(*s)
	1416	);
	1417	case ALNUML:
	1418	REXEC_FBC_CSCAN_TAINT(
	1419	isALNUM_LC_utf8((U8*)s),
	1420	isALNUM_LC(*s)
	1421	);
	1422	case NALNUM:
	1423	REXEC_FBC_CSCAN_PRELOAD(
	1424	LOAD_UTF8_CHARCLASS_ALNUM(),
	1425	!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
	1426	!isALNUM(*s)
	1427	);
	1428	case NALNUML:
	1429	REXEC_FBC_CSCAN_TAINT(
	1430	!isALNUM_LC_utf8((U8*)s),
	1431	!isALNUM_LC(*s)
	1432	);
	1433	case SPACE:
	1434	REXEC_FBC_CSCAN_PRELOAD(
	1435	LOAD_UTF8_CHARCLASS_SPACE(),
	1436	s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, do_utf8),
	1437	isSPACE(*s)
	1438	);
	1439	case SPACEL:
	1440	REXEC_FBC_CSCAN_TAINT(
	1441	s == ' ' \|\| isSPACE_LC_utf8((U8)s),
	1442	isSPACE_LC(*s)
	1443	);
	1444	case NSPACE:
	1445	REXEC_FBC_CSCAN_PRELOAD(
	1446	LOAD_UTF8_CHARCLASS_SPACE(),
	1447	!(s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, do_utf8)),
	1448	!isSPACE(*s)
	1449	);
	1450	case NSPACEL:
	1451	REXEC_FBC_CSCAN_TAINT(
	1452	!(s == ' ' \|\| isSPACE_LC_utf8((U8)s)),
	1453	!isSPACE_LC(*s)
	1454	);
	1455	case DIGIT:
	1456	REXEC_FBC_CSCAN_PRELOAD(
	1457	LOAD_UTF8_CHARCLASS_DIGIT(),
	1458	swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
	1459	isDIGIT(*s)
	1460	);
	1461	case DIGITL:
	1462	REXEC_FBC_CSCAN_TAINT(
	1463	isDIGIT_LC_utf8((U8*)s),
	1464	isDIGIT_LC(*s)
	1465	);
	1466	case NDIGIT:
	1467	REXEC_FBC_CSCAN_PRELOAD(
	1468	LOAD_UTF8_CHARCLASS_DIGIT(),
	1469	!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
	1470	!isDIGIT(*s)
	1471	);
	1472	case NDIGITL:
	1473	REXEC_FBC_CSCAN_TAINT(
	1474	!isDIGIT_LC_utf8((U8*)s),
	1475	!isDIGIT_LC(*s)
	1476	);
	1477	case LNBREAK:
	1478	REXEC_FBC_CSCAN(
	1479	is_LNBREAK_utf8(s),
	1480	is_LNBREAK_latin1(s)
	1481	);
	1482	case VERTWS:
	1483	REXEC_FBC_CSCAN(
	1484	is_VERTWS_utf8(s),
	1485	is_VERTWS_latin1(s)
	1486	);
	1487	case NVERTWS:
	1488	REXEC_FBC_CSCAN(
	1489	!is_VERTWS_utf8(s),
	1490	!is_VERTWS_latin1(s)
	1491	);
	1492	case HORIZWS:
	1493	REXEC_FBC_CSCAN(
	1494	is_HORIZWS_utf8(s),
	1495	is_HORIZWS_latin1(s)
	1496	);
	1497	case NHORIZWS:
	1498	REXEC_FBC_CSCAN(
	1499	!is_HORIZWS_utf8(s),
	1500	!is_HORIZWS_latin1(s)
	1501	);
	1502	case AHOCORASICKC:
	1503	case AHOCORASICK:
	1504	{
	1505	DECL_TRIE_TYPE(c);
	1506	/* what trie are we using right now */
	1507	reg_ac_data *aho
	1508	= (reg_ac_data*)progi->data->data[ ARG( c ) ];
	1509	reg_trie_data *trie
	1510	= (reg_trie_data*)progi->data->data[ aho->trie ];
	1511	HV widecharmap = (HV) progi->data->data[ aho->trie + 1 ];
	1512
	1513	const char *last_start = strend - trie->minlen;
	1514	#ifdef DEBUGGING
	1515	const char *real_start = s;
	1516	#endif
	1517	STRLEN maxlen = trie->maxlen;
	1518	SV *sv_points;
	1519	U8 *points; / map of where we were in the input string
	1520	when reading a given char. For ASCII this
	1521	is unnecessary overhead as the relationship
	1522	is always 1:1, but for Unicode, especially
	1523	case folded Unicode this is not true. */
	1524	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1525	U8 *bitmap=NULL;
	1526
	1527
	1528	GET_RE_DEBUG_FLAGS_DECL;
	1529
	1530	/* We can't just allocate points here. We need to wrap it in
	1531	* an SV so it gets freed properly if there is a croak while
	1532	* running the match */
	1533	ENTER;
	1534	SAVETMPS;
	1535	sv_points=newSV(maxlen * sizeof(U8 *));
	1536	SvCUR_set(sv_points,
	1537	maxlen * sizeof(U8 *));
	1538	SvPOK_on(sv_points);
	1539	sv_2mortal(sv_points);
	1540	points=(U8**)SvPV_nolen(sv_points );
	1541	if ( trie_type != trie_utf8_fold
	1542	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1543	{
	1544	if (trie->bitmap)
	1545	bitmap=(U8*)trie->bitmap;
	1546	else
	1547	bitmap=(U8*)ANYOF_BITMAP(c);
	1548	}
	1549	/* this is the Aho-Corasick algorithm modified a touch
	1550	to include special handling for long "unknown char"
	1551	sequences. The basic idea being that we use AC as long
	1552	as we are dealing with a possible matching char, when
	1553	we encounter an unknown char (and we have not encountered
	1554	an accepting state) we scan forward until we find a legal
	1555	starting char.
	1556	AC matching is basically that of trie matching, except
	1557	that when we encounter a failing transition, we fall back
	1558	to the current states "fail state", and try the current char
	1559	again, a process we repeat until we reach the root state,
	1560	state 1, or a legal transition. If we fail on the root state
	1561	then we can either terminate if we have reached an accepting
	1562	state previously, or restart the entire process from the beginning
	1563	if we have not.
	1564
	1565	*/
	1566	while (s <= last_start) {
	1567	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1568	U8 uc = (U8)s;
	1569	U16 charid = 0;
	1570	U32 base = 1;
	1571	U32 state = 1;
	1572	UV uvc = 0;
	1573	STRLEN len = 0;
	1574	STRLEN foldlen = 0;
	1575	U8 uscan = (U8)NULL;
	1576	U8 *leftmost = NULL;
	1577	#ifdef DEBUGGING
	1578	U32 accepted_word= 0;
	1579	#endif
	1580	U32 pointpos = 0;
	1581
	1582	while ( state && uc <= (U8*)strend ) {
	1583	int failed=0;
	1584	U32 word = aho->states[ state ].wordnum;
	1585
	1586	if( state==1 ) {
	1587	if ( bitmap ) {
	1588	DEBUG_TRIE_EXECUTE_r(
	1589	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1590	dump_exec_pos( (char *)uc, c, strend, real_start,
	1591	(char *)uc, do_utf8 );
	1592	PerlIO_printf( Perl_debug_log,
	1593	" Scanning for legal start char...\n");
	1594	}
	1595	);
	1596	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1597	uc++;
	1598	}
	1599	s= (char *)uc;
	1600	}
	1601	if (uc >(U8*)last_start) break;
	1602	}
	1603
	1604	if ( word ) {
	1605	U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
	1606	if (!leftmost \|\| lpos < leftmost) {
	1607	DEBUG_r(accepted_word=word);
	1608	leftmost= lpos;
	1609	}
	1610	if (base==0) break;
	1611
	1612	}
	1613	points[pointpos++ % maxlen]= uc;
	1614	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	1615	uscan, len, uvc, charid, foldlen,
	1616	foldbuf, uniflags);
	1617	DEBUG_TRIE_EXECUTE_r({
	1618	dump_exec_pos( (char *)uc, c, strend, real_start,
	1619	s, do_utf8 );
	1620	PerlIO_printf(Perl_debug_log,
	1621	" Charid:%3u CP:%4"UVxf" ",
	1622	charid, uvc);
	1623	});
	1624
	1625	do {
	1626	#ifdef DEBUGGING
	1627	word = aho->states[ state ].wordnum;
	1628	#endif
	1629	base = aho->states[ state ].trans.base;
	1630
	1631	DEBUG_TRIE_EXECUTE_r({
	1632	if (failed)
	1633	dump_exec_pos( (char *)uc, c, strend, real_start,
	1634	s, do_utf8 );
	1635	PerlIO_printf( Perl_debug_log,
	1636	"%sState: %4"UVxf", word=%"UVxf,
	1637	failed ? " Fail transition to " : "",
	1638	(UV)state, (UV)word);
	1639	});
	1640	if ( base ) {
	1641	U32 tmp;
	1642	if (charid &&
	1643	(base + charid > trie->uniquecharcount )
	1644	&& (base + charid - 1 - trie->uniquecharcount
	1645	< trie->lasttrans)
	1646	&& trie->trans[base + charid - 1 -
	1647	trie->uniquecharcount].check == state
	1648	&& (tmp=trie->trans[base + charid - 1 -
	1649	trie->uniquecharcount ].next))
	1650	{
	1651	DEBUG_TRIE_EXECUTE_r(
	1652	PerlIO_printf( Perl_debug_log," - legal\n"));
	1653	state = tmp;
	1654	break;
	1655	}
	1656	else {
	1657	DEBUG_TRIE_EXECUTE_r(
	1658	PerlIO_printf( Perl_debug_log," - fail\n"));
	1659	failed = 1;
	1660	state = aho->fail[state];
	1661	}
	1662	}
	1663	else {
	1664	/* we must be accepting here */
	1665	DEBUG_TRIE_EXECUTE_r(
	1666	PerlIO_printf( Perl_debug_log," - accepting\n"));
	1667	failed = 1;
	1668	break;
	1669	}
	1670	} while(state);
	1671	uc += len;
	1672	if (failed) {
	1673	if (leftmost)
	1674	break;
	1675	if (!state) state = 1;
	1676	}
	1677	}
	1678	if ( aho->states[ state ].wordnum ) {
	1679	U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
	1680	if (!leftmost \|\| lpos < leftmost) {
	1681	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	1682	leftmost = lpos;
	1683	}
	1684	}
	1685	if (leftmost) {
	1686	s = (char*)leftmost;
	1687	DEBUG_TRIE_EXECUTE_r({
	1688	PerlIO_printf(
	1689	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	1690	(UV)accepted_word, (IV)(s - real_start)
	1691	);
	1692	});
	1693	if (!reginfo \|\| regtry(reginfo, &s)) {
	1694	FREETMPS;
	1695	LEAVE;
	1696	goto got_it;
	1697	}
	1698	s = HOPc(s,1);
	1699	DEBUG_TRIE_EXECUTE_r({
	1700	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	1701	});
	1702	} else {
	1703	DEBUG_TRIE_EXECUTE_r(
	1704	PerlIO_printf( Perl_debug_log,"No match.\n"));
	1705	break;
	1706	}
	1707	}
	1708	FREETMPS;
	1709	LEAVE;
	1710	}
	1711	break;
	1712	default:
	1713	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	1714	break;
	1715	}
	1716	return 0;
	1717	got_it:
	1718	return s;
	1719	}
	1720
	1721	static void
	1722	S_swap_match_buff (pTHX_ regexp *prog) {
	1723	regexp_paren_pair *t;
	1724
	1725	if (!prog->swap) {
	1726	/* We have to be careful. If the previous successful match
	1727	was from this regex we don't want a subsequent paritally
	1728	successful match to clobber the old results.
	1729	So when we detect this possibility we add a swap buffer
	1730	to the re, and switch the buffer each match. If we fail
	1731	we switch it back, otherwise we leave it swapped.
	1732	*/
	1733	Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
	1734	}
	1735	t = prog->swap;
	1736	prog->swap = prog->offs;
	1737	prog->offs = t;
	1738	}
	1739
	1740
	1741	/*
	1742	- regexec_flags - match a regexp against a string
	1743	*/
	1744	I32
	1745	Perl_regexec_flags(pTHX_ REGEXP * const prog, char stringarg, register char strend,
	1746	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	1747	/* strend: pointer to null at end of string */
	1748	/* strbeg: real beginning of string */
	1749	/* minend: end of match must be >=minend after stringarg. */
	1750	/* data: May be used for some additional optimizations.
	1751	Currently its only used, with a U32 cast, for transmitting
	1752	the ganch offset when doing a /g match. This will change */
	1753	/* nosave: For optimizations. */
	1754	{
	1755	dVAR;
	1756	/register/ char *s;
	1757	register regnode *c;
	1758	/register/ char *startpos = stringarg;
	1759	I32 minlen; /* must match at least this many chars */
	1760	I32 dontbother = 0; /* how many characters not to try at end */
	1761	I32 end_shift = 0; /* Same for the end. / / CC */
	1762	I32 scream_pos = -1; /* Internal iterator of scream. */
	1763	char *scream_olds = NULL;
	1764	const bool do_utf8 = (bool)DO_UTF8(sv);
	1765	I32 multiline;
	1766	RXi_GET_DECL(prog,progi);
	1767	regmatch_info reginfo; /* create some info to pass to regtry etc */
	1768	bool swap_on_fail = 0;
	1769
	1770	GET_RE_DEBUG_FLAGS_DECL;
	1771
	1772	PERL_UNUSED_ARG(data);
	1773
	1774	/* Be paranoid... */
	1775	if (prog == NULL \|\| startpos == NULL) {
	1776	Perl_croak(aTHX_ "NULL regexp parameter");
	1777	return 0;
	1778	}
	1779
	1780	multiline = prog->extflags & RXf_PMf_MULTILINE;
	1781	reginfo.prog = prog;
	1782
	1783	RX_MATCH_UTF8_set(prog, do_utf8);
	1784	DEBUG_EXECUTE_r(
	1785	debug_start_match(prog, do_utf8, startpos, strend,
	1786	"Matching");
	1787	);
	1788
	1789	minlen = prog->minlen;
	1790
	1791	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	1792	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1793	"String too short [regexec_flags]...\n"));
	1794	goto phooey;
	1795	}
	1796
	1797
	1798	/* Check validity of program. */
	1799	if (UCHARAT(progi->program) != REG_MAGIC) {
	1800	Perl_croak(aTHX_ "corrupted regexp program");
	1801	}
	1802
	1803	PL_reg_flags = 0;
	1804	PL_reg_eval_set = 0;
	1805	PL_reg_maxiter = 0;
	1806
	1807	if (prog->extflags & RXf_UTF8)
	1808	PL_reg_flags \|= RF_utf8;
	1809
	1810	/* Mark beginning of line for ^ and lookbehind. */
	1811	reginfo.bol = startpos; /* XXX not used ??? */
	1812	PL_bostr = strbeg;
	1813	reginfo.sv = sv;
	1814
	1815	/* Mark end of line for $ (and such) */
	1816	PL_regeol = strend;
	1817
	1818	/* see how far we have to get to not match where we matched before */
	1819	reginfo.till = startpos+minend;
	1820
	1821	/* If there is a "must appear" string, look for it. */
	1822	s = startpos;
	1823
	1824	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	1825	MAGIC *mg;
	1826
	1827	if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
	1828	reginfo.ganch = startpos + prog->gofs;
	1829	else if (sv && SvTYPE(sv) >= SVt_PVMG
	1830	&& SvMAGIC(sv)
	1831	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	1832	&& mg->mg_len >= 0) {
	1833	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	1834	if (prog->extflags & RXf_ANCH_GPOS) {
	1835	if (s > reginfo.ganch)
	1836	goto phooey;
	1837	s = reginfo.ganch - prog->gofs;
	1838	}
	1839	}
	1840	else if (data) {
	1841	reginfo.ganch = strbeg + PTR2UV(data);
	1842	} else /* pos() not defined */
	1843	reginfo.ganch = strbeg;
	1844	}
	1845	if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
	1846	swap_on_fail = 1;
	1847	swap_match_buff(prog); /* do we need a save destructor here for
	1848	eval dies? */
	1849	}
	1850	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	1851	re_scream_pos_data d;
	1852
	1853	d.scream_olds = &scream_olds;
	1854	d.scream_pos = &scream_pos;
	1855	s = re_intuit_start(prog, sv, s, strend, flags, &d);
	1856	if (!s) {
	1857	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	1858	goto phooey; /* not present */
	1859	}
	1860	}
	1861
	1862
	1863
	1864	/* Simplest case: anchored match need be tried only once. */
	1865	/* [unless only anchor is BOL and multiline is set] */
	1866	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	1867	if (s == startpos && regtry(&reginfo, &startpos))
	1868	goto got_it;
	1869	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	1870	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	1871	{
	1872	char *end;
	1873
	1874	if (minlen)
	1875	dontbother = minlen - 1;
	1876	end = HOP3c(strend, -dontbother, strbeg) - 1;
	1877	/* for multiline we only have to try after newlines */
	1878	if (prog->check_substr \|\| prog->check_utf8) {
	1879	if (s == startpos)
	1880	goto after_try;
	1881	while (1) {
	1882	if (regtry(&reginfo, &s))
	1883	goto got_it;
	1884	after_try:
	1885	if (s > end)
	1886	goto phooey;
	1887	if (prog->extflags & RXf_USE_INTUIT) {
	1888	s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
	1889	if (!s)
	1890	goto phooey;
	1891	}
	1892	else
	1893	s++;
	1894	}
	1895	} else {
	1896	if (s > startpos)
	1897	s--;
	1898	while (s < end) {
	1899	if (s++ == '\n') { / don't need PL_utf8skip here */
	1900	if (regtry(&reginfo, &s))
	1901	goto got_it;
	1902	}
	1903	}
	1904	}
	1905	}
	1906	goto phooey;
	1907	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	1908	{
	1909	/* the warning about reginfo.ganch being used without intialization
	1910	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	1911	and we only enter this block when the same bit is set. */
	1912	char *tmp_s = reginfo.ganch - prog->gofs;
	1913	if (regtry(&reginfo, &tmp_s))
	1914	goto got_it;
	1915	goto phooey;
	1916	}
	1917
	1918	/* Messy cases: unanchored match. */
	1919	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	1920	/* we have /x+whatever/ */
	1921	/* it must be a one character string (XXXX Except UTF?) */
	1922	char ch;
	1923	#ifdef DEBUGGING
	1924	int did_match = 0;
	1925	#endif
	1926	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
	1927	do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
	1928	ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
	1929
	1930	if (do_utf8) {
	1931	REXEC_FBC_SCAN(
	1932	if (*s == ch) {
	1933	DEBUG_EXECUTE_r( did_match = 1 );
	1934	if (regtry(&reginfo, &s)) goto got_it;
	1935	s += UTF8SKIP(s);
	1936	while (s < strend && *s == ch)
	1937	s += UTF8SKIP(s);
	1938	}
	1939	);
	1940	}
	1941	else {
	1942	REXEC_FBC_SCAN(
	1943	if (*s == ch) {
	1944	DEBUG_EXECUTE_r( did_match = 1 );
	1945	if (regtry(&reginfo, &s)) goto got_it;
	1946	s++;
	1947	while (s < strend && *s == ch)
	1948	s++;
	1949	}
	1950	);
	1951	}
	1952	DEBUG_EXECUTE_r(if (!did_match)
	1953	PerlIO_printf(Perl_debug_log,
	1954	"Did not find anchored character...\n")
	1955	);
	1956	}
	1957	else if (prog->anchored_substr != NULL
	1958	\|\| prog->anchored_utf8 != NULL
	1959	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	1960	&& prog->float_max_offset < strend - s)) {
	1961	SV *must;
	1962	I32 back_max;
	1963	I32 back_min;
	1964	char *last;
	1965	char last1; / Last position checked before */
	1966	#ifdef DEBUGGING
	1967	int did_match = 0;
	1968	#endif
	1969	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1970	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
	1971	do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
	1972	must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
	1973	back_max = back_min = prog->anchored_offset;
	1974	} else {
	1975	if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
	1976	do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
	1977	must = do_utf8 ? prog->float_utf8 : prog->float_substr;
	1978	back_max = prog->float_max_offset;
	1979	back_min = prog->float_min_offset;
	1980	}
	1981
	1982
	1983	if (must == &PL_sv_undef)
	1984	/* could not downgrade utf8 check substring, so must fail */
	1985	goto phooey;
	1986
	1987	if (back_min<0) {
	1988	last = strend;
	1989	} else {
	1990	last = HOP3c(strend, /* Cannot start after this */
	1991	-(I32)(CHR_SVLEN(must)
	1992	- (SvTAIL(must) != 0) + back_min), strbeg);
	1993	}
	1994	if (s > PL_bostr)
	1995	last1 = HOPc(s, -1);
	1996	else
	1997	last1 = s - 1; /* bogus */
	1998
	1999	/* XXXX check_substr already used to find "s", can optimize if
	2000	check_substr==must. */
	2001	scream_pos = -1;
	2002	dontbother = end_shift;
	2003	strend = HOPc(strend, -dontbother);
	2004	while ( (s <= last) &&
	2005	((flags & REXEC_SCREAM)
	2006	? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
	2007	end_shift, &scream_pos, 0))
	2008	: (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2009	(unsigned char*)strend, must,
	2010	multiline ? FBMrf_MULTILINE : 0))) ) {
	2011	/* we may be pointing at the wrong string */
	2012	if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
	2013	s = strbeg + (s - SvPVX_const(sv));
	2014	DEBUG_EXECUTE_r( did_match = 1 );
	2015	if (HOPc(s, -back_max) > last1) {
	2016	last1 = HOPc(s, -back_min);
	2017	s = HOPc(s, -back_max);
	2018	}
	2019	else {
	2020	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2021
	2022	last1 = HOPc(s, -back_min);
	2023	s = t;
	2024	}
	2025	if (do_utf8) {
	2026	while (s <= last1) {
	2027	if (regtry(&reginfo, &s))
	2028	goto got_it;
	2029	s += UTF8SKIP(s);
	2030	}
	2031	}
	2032	else {
	2033	while (s <= last1) {
	2034	if (regtry(&reginfo, &s))
	2035	goto got_it;
	2036	s++;
	2037	}
	2038	}
	2039	}
	2040	DEBUG_EXECUTE_r(if (!did_match) {
	2041	RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
	2042	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2043	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2044	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2045	? "anchored" : "floating"),
	2046	quoted, RE_SV_TAIL(must));
	2047	});
	2048	goto phooey;
	2049	}
	2050	else if ( (c = progi->regstclass) ) {
	2051	if (minlen) {
	2052	const OPCODE op = OP(progi->regstclass);
	2053	/* don't bother with what can't match */
	2054	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2055	strend = HOPc(strend, -(minlen - 1));
	2056	}
	2057	DEBUG_EXECUTE_r({
	2058	SV * const prop = sv_newmortal();
	2059	regprop(prog, prop, c);
	2060	{
	2061	RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
	2062	s,strend-s,60);
	2063	PerlIO_printf(Perl_debug_log,
	2064	"Matching stclass %.*s against %s (%d chars)\n",
	2065	(int)SvCUR(prop), SvPVX_const(prop),
	2066	quoted, (int)(strend - s));
	2067	}
	2068	});
	2069	if (find_byclass(prog, c, s, strend, &reginfo))
	2070	goto got_it;
	2071	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2072	}
	2073	else {
	2074	dontbother = 0;
	2075	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2076	/* Trim the end. */
	2077	char *last;
	2078	SV* float_real;
	2079
	2080	if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
	2081	do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
	2082	float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
	2083
	2084	if (flags & REXEC_SCREAM) {
	2085	last = screaminstr(sv, float_real, s - strbeg,
	2086	end_shift, &scream_pos, 1); /* last one */
	2087	if (!last)
	2088	last = scream_olds; /* Only one occurrence. */
	2089	/* we may be pointing at the wrong string */
	2090	else if (RX_MATCH_COPIED(prog))
	2091	s = strbeg + (s - SvPVX_const(sv));
	2092	}
	2093	else {
	2094	STRLEN len;
	2095	const char * const little = SvPV_const(float_real, len);
	2096
	2097	if (SvTAIL(float_real)) {
	2098	if (memEQ(strend - len + 1, little, len - 1))
	2099	last = strend - len + 1;
	2100	else if (!multiline)
	2101	last = memEQ(strend - len, little, len)
	2102	? strend - len : NULL;
	2103	else
	2104	goto find_last;
	2105	} else {
	2106	find_last:
	2107	if (len)
	2108	last = rninstr(s, strend, little, little + len);
	2109	else
	2110	last = strend; /* matching "$" */
	2111	}
	2112	}
	2113	if (last == NULL) {
	2114	DEBUG_EXECUTE_r(
	2115	PerlIO_printf(Perl_debug_log,
	2116	"%sCan't trim the tail, match fails (should not happen)%s\n",
	2117	PL_colors[4], PL_colors[5]));
	2118	goto phooey; /* Should not happen! */
	2119	}
	2120	dontbother = strend - last + prog->float_min_offset;
	2121	}
	2122	if (minlen && (dontbother < minlen))
	2123	dontbother = minlen - 1;
	2124	strend -= dontbother; /* this one's always in bytes! */
	2125	/* We don't know much -- general case. */
	2126	if (do_utf8) {
	2127	for (;;) {
	2128	if (regtry(&reginfo, &s))
	2129	goto got_it;
	2130	if (s >= strend)
	2131	break;
	2132	s += UTF8SKIP(s);
	2133	};
	2134	}
	2135	else {
	2136	do {
	2137	if (regtry(&reginfo, &s))
	2138	goto got_it;
	2139	} while (s++ < strend);
	2140	}
	2141	}
	2142
	2143	/* Failure. */
	2144	goto phooey;
	2145
	2146	got_it:
	2147	RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
	2148
	2149	if (PL_reg_eval_set)
	2150	restore_pos(aTHX_ prog);
	2151	if (prog->paren_names)
	2152	(void)hv_iterinit(prog->paren_names);
	2153
	2154	/* make sure $`, $&, $', and $digit will work later */
	2155	if ( !(flags & REXEC_NOT_FIRST) ) {
	2156	RX_MATCH_COPY_FREE(prog);
	2157	if (flags & REXEC_COPY_STR) {
	2158	const I32 i = PL_regeol - startpos + (stringarg - strbeg);
	2159	#ifdef PERL_OLD_COPY_ON_WRITE
	2160	if ((SvIsCOW(sv)
	2161	\|\| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
	2162	if (DEBUG_C_TEST) {
	2163	PerlIO_printf(Perl_debug_log,
	2164	"Copy on write: regexp capture, type %d\n",
	2165	(int) SvTYPE(sv));
	2166	}
	2167	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2168	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2169	assert (SvPOKp(prog->saved_copy));
	2170	} else
	2171	#endif
	2172	{
	2173	RX_MATCH_COPIED_on(prog);
	2174	s = savepvn(strbeg, i);
	2175	prog->subbeg = s;
	2176	}
	2177	prog->sublen = i;
	2178	}
	2179	else {
	2180	prog->subbeg = strbeg;
	2181	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2182	}
	2183	}
	2184
	2185	return 1;
	2186
	2187	phooey:
	2188	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2189	PL_colors[4], PL_colors[5]));
	2190	if (PL_reg_eval_set)
	2191	restore_pos(aTHX_ prog);
	2192	if (swap_on_fail)
	2193	/* we failed :-( roll it back */
	2194	swap_match_buff(prog);
	2195
	2196	return 0;
	2197	}
	2198
	2199
	2200	/*
	2201	- regtry - try match at specific point
	2202	*/
	2203	STATIC I32 /* 0 failure, 1 success */
	2204	S_regtry(pTHX_ regmatch_info reginfo, char *startpos)
	2205	{
	2206	dVAR;
	2207	CHECKPOINT lastcp;
	2208	regexp *prog = reginfo->prog;
	2209	RXi_GET_DECL(prog,progi);
	2210	GET_RE_DEBUG_FLAGS_DECL;
	2211	reginfo->cutpoint=NULL;
	2212
	2213	if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
	2214	MAGIC *mg;
	2215
	2216	PL_reg_eval_set = RS_init;
	2217	DEBUG_EXECUTE_r(DEBUG_s(
	2218	PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
	2219	(IV)(PL_stack_sp - PL_stack_base));
	2220	));
	2221	SAVESTACK_CXPOS();
	2222	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
	2223	/* Otherwise OP_NEXTSTATE will free whatever on stack now. */
	2224	SAVETMPS;
	2225	/* Apparently this is not needed, judging by wantarray. */
	2226	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
	2227	cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
	2228
	2229	if (reginfo->sv) {
	2230	/* Make $_ available to executed code. */
	2231	if (reginfo->sv != DEFSV) {
	2232	SAVE_DEFSV;
	2233	DEFSV = reginfo->sv;
	2234	}
	2235
	2236	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2237	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2238	/* prepare for quick setting of pos */
	2239	#ifdef PERL_OLD_COPY_ON_WRITE
	2240	if (SvIsCOW(reginfo->sv))
	2241	sv_force_normal_flags(reginfo->sv, 0);
	2242	#endif
	2243	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2244	&PL_vtbl_mglob, NULL, 0);
	2245	mg->mg_len = -1;
	2246	}
	2247	PL_reg_magic = mg;
	2248	PL_reg_oldpos = mg->mg_len;
	2249	SAVEDESTRUCTOR_X(restore_pos, prog);
	2250	}
	2251	if (!PL_reg_curpm) {
	2252	Newxz(PL_reg_curpm, 1, PMOP);
	2253	#ifdef USE_ITHREADS
	2254	{
	2255	SV* const repointer = newSViv(0);
	2256	/* so we know which PL_regex_padav element is PL_reg_curpm */
	2257	SvFLAGS(repointer) \|= SVf_BREAK;
	2258	av_push(PL_regex_padav,repointer);
	2259	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2260	PL_regex_pad = AvARRAY(PL_regex_padav);
	2261	}
	2262	#endif
	2263	}
	2264	PM_SETRE(PL_reg_curpm, prog);
	2265	PL_reg_oldcurpm = PL_curpm;
	2266	PL_curpm = PL_reg_curpm;
	2267	if (RX_MATCH_COPIED(prog)) {
	2268	/* Here is a serious problem: we cannot rewrite subbeg,
	2269	since it may be needed if this match fails. Thus
	2270	$` inside (?{}) could fail... */
	2271	PL_reg_oldsaved = prog->subbeg;
	2272	PL_reg_oldsavedlen = prog->sublen;
	2273	#ifdef PERL_OLD_COPY_ON_WRITE
	2274	PL_nrs = prog->saved_copy;
	2275	#endif
	2276	RX_MATCH_COPIED_off(prog);
	2277	}
	2278	else
	2279	PL_reg_oldsaved = NULL;
	2280	prog->subbeg = PL_bostr;
	2281	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2282	}
	2283	DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
	2284	prog->offs[0].start = *startpos - PL_bostr;
	2285	PL_reginput = *startpos;
	2286	PL_reglastparen = &prog->lastparen;
	2287	PL_reglastcloseparen = &prog->lastcloseparen;
	2288	prog->lastparen = 0;
	2289	prog->lastcloseparen = 0;
	2290	PL_regsize = 0;
	2291	PL_regoffs = prog->offs;
	2292	if (PL_reg_start_tmpl <= prog->nparens) {
	2293	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
	2294	if(PL_reg_start_tmp)
	2295	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2296	else
	2297	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2298	}
	2299
	2300	/* XXXX What this code is doing here?!!! There should be no need
	2301	to do this again and again, PL_reglastparen should take care of
	2302	this! --ilya*/
	2303
	2304	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2305	* Actually, the code in regcppop() (which Ilya may be meaning by
	2306	* PL_reglastparen), is not needed at all by the test suite
	2307	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2308	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2309	* Meanwhile, this code is needed for the
	2310	* above-mentioned test suite tests to succeed. The common theme
	2311	* on those tests seems to be returning null fields from matches.
	2312	* --jhi updated by dapm */
	2313	#if 1
	2314	if (prog->nparens) {
	2315	regexp_paren_pair *pp = PL_regoffs;
	2316	register I32 i;
	2317	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
	2318	++pp;
	2319	pp->start = -1;
	2320	pp->end = -1;
	2321	}
	2322	}
	2323	#endif
	2324	REGCP_SET(lastcp);
	2325	if (regmatch(reginfo, progi->program + 1)) {
	2326	PL_regoffs[0].end = PL_reginput - PL_bostr;
	2327	return 1;
	2328	}
	2329	if (reginfo->cutpoint)
	2330	*startpos= reginfo->cutpoint;
	2331	REGCP_UNWIND(lastcp);
	2332	return 0;
	2333	}
	2334
	2335
	2336	#define sayYES goto yes
	2337	#define sayNO goto no
	2338	#define sayNO_SILENT goto no_silent
	2339
	2340	/* we dont use STMT_START/END here because it leads to
	2341	"unreachable code" warnings, which are bogus, but distracting. */
	2342	#define CACHEsayNO \
	2343	if (ST.cache_mask) \
	2344	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2345	sayNO
	2346
	2347	/* this is used to determine how far from the left messages like
	2348	'failed...' are printed. It should be set such that messages
	2349	are inline with the regop output that created them.
	2350	*/
	2351	#define REPORT_CODE_OFF 32
	2352
	2353
	2354	/* Make sure there is a test for this +1 options in re_tests */
	2355	#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
	2356
	2357	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2358	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2359
	2360	#define SLAB_FIRST(s) (&(s)->states[0])
	2361	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2362
	2363	/* grab a new slab and return the first slot in it */
	2364
	2365	STATIC regmatch_state *
	2366	S_push_slab(pTHX)
	2367	{
	2368	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2369	dMY_CXT;
	2370	#endif
	2371	regmatch_slab *s = PL_regmatch_slab->next;
	2372	if (!s) {
	2373	Newx(s, 1, regmatch_slab);
	2374	s->prev = PL_regmatch_slab;
	2375	s->next = NULL;
	2376	PL_regmatch_slab->next = s;
	2377	}
	2378	PL_regmatch_slab = s;
	2379	return SLAB_FIRST(s);
	2380	}
	2381
	2382
	2383	/* push a new state then goto it */
	2384
	2385	#define PUSH_STATE_GOTO(state, node) \
	2386	scan = node; \
	2387	st->resume_state = state; \
	2388	goto push_state;
	2389
	2390	/* push a new state with success backtracking, then goto it */
	2391
	2392	#define PUSH_YES_STATE_GOTO(state, node) \
	2393	scan = node; \
	2394	st->resume_state = state; \
	2395	goto push_yes_state;
	2396
	2397
	2398
	2399	/*
	2400
	2401	regmatch() - main matching routine
	2402
	2403	This is basically one big switch statement in a loop. We execute an op,
	2404	set 'next' to point the next op, and continue. If we come to a point which
	2405	we may need to backtrack to on failure such as (A\|B\|C), we push a
	2406	backtrack state onto the backtrack stack. On failure, we pop the top
	2407	state, and re-enter the loop at the state indicated. If there are no more
	2408	states to pop, we return failure.
	2409
	2410	Sometimes we also need to backtrack on success; for example /A+/, where
	2411	after successfully matching one A, we need to go back and try to
	2412	match another one; similarly for lookahead assertions: if the assertion
	2413	completes successfully, we backtrack to the state just before the assertion
	2414	and then carry on. In these cases, the pushed state is marked as
	2415	'backtrack on success too'. This marking is in fact done by a chain of
	2416	pointers, each pointing to the previous 'yes' state. On success, we pop to
	2417	the nearest yes state, discarding any intermediate failure-only states.
	2418	Sometimes a yes state is pushed just to force some cleanup code to be
	2419	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	2420	it to free the inner regex.
	2421
	2422	Note that failure backtracking rewinds the cursor position, while
	2423	success backtracking leaves it alone.
	2424
	2425	A pattern is complete when the END op is executed, while a subpattern
	2426	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	2427	ops trigger the "pop to last yes state if any, otherwise return true"
	2428	behaviour.
	2429
	2430	A common convention in this function is to use A and B to refer to the two
	2431	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	2432	the subpattern to be matched possibly multiple times, while B is the entire
	2433	rest of the pattern. Variable and state names reflect this convention.
	2434
	2435	The states in the main switch are the union of ops and failure/success of
	2436	substates associated with with that op. For example, IFMATCH is the op
	2437	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	2438	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	2439	successfully matched A and IFMATCH_A_fail is a state saying that we have
	2440	just failed to match A. Resume states always come in pairs. The backtrack
	2441	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	2442	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	2443	on success or failure.
	2444
	2445	The struct that holds a backtracking state is actually a big union, with
	2446	one variant for each major type of op. The variable st points to the
	2447	top-most backtrack struct. To make the code clearer, within each
	2448	block of code we #define ST to alias the relevant union.
	2449
	2450	Here's a concrete example of a (vastly oversimplified) IFMATCH
	2451	implementation:
	2452
	2453	switch (state) {
	2454	....
	2455
	2456	#define ST st->u.ifmatch
	2457
	2458	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2459	ST.foo = ...; // some state we wish to save
	2460	...
	2461	// push a yes backtrack state with a resume value of
	2462	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	2463	// first node of A:
	2464	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
	2465	// NOTREACHED
	2466
	2467	case IFMATCH_A: // we have successfully executed A; now continue with B
	2468	next = B;
	2469	bar = ST.foo; // do something with the preserved value
	2470	break;
	2471
	2472	case IFMATCH_A_fail: // A failed, so the assertion failed
	2473	...; // do some housekeeping, then ...
	2474	sayNO; // propagate the failure
	2475
	2476	#undef ST
	2477
	2478	...
	2479	}
	2480
	2481	For any old-timers reading this who are familiar with the old recursive
	2482	approach, the code above is equivalent to:
	2483
	2484	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2485	{
	2486	int foo = ...
	2487	...
	2488	if (regmatch(A)) {
	2489	next = B;
	2490	bar = foo;
	2491	break;
	2492	}
	2493	...; // do some housekeeping, then ...
	2494	sayNO; // propagate the failure
	2495	}
	2496
	2497	The topmost backtrack state, pointed to by st, is usually free. If you
	2498	want to claim it, populate any ST.foo fields in it with values you wish to
	2499	save, then do one of
	2500
	2501	PUSH_STATE_GOTO(resume_state, node);
	2502	PUSH_YES_STATE_GOTO(resume_state, node);
	2503
	2504	which sets that backtrack state's resume value to 'resume_state', pushes a
	2505	new free entry to the top of the backtrack stack, then goes to 'node'.
	2506	On backtracking, the free slot is popped, and the saved state becomes the
	2507	new free state. An ST.foo field in this new top state can be temporarily
	2508	accessed to retrieve values, but once the main loop is re-entered, it
	2509	becomes available for reuse.
	2510
	2511	Note that the depth of the backtrack stack constantly increases during the
	2512	left-to-right execution of the pattern, rather than going up and down with
	2513	the pattern nesting. For example the stack is at its maximum at Z at the
	2514	end of the pattern, rather than at X in the following:
	2515
	2516	/(((X)+)+)+....(Y)+....Z/
	2517
	2518	The only exceptions to this are lookahead/behind assertions and the cut,
	2519	(?>A), which pop all the backtrack states associated with A before
	2520	continuing.
	2521
	2522	Bascktrack state structs are allocated in slabs of about 4K in size.
	2523	PL_regmatch_state and st always point to the currently active state,
	2524	and PL_regmatch_slab points to the slab currently containing
	2525	PL_regmatch_state. The first time regmatch() is called, the first slab is
	2526	allocated, and is never freed until interpreter destruction. When the slab
	2527	is full, a new one is allocated and chained to the end. At exit from
	2528	regmatch(), slabs allocated since entry are freed.
	2529
	2530	*/
	2531
	2532
	2533	#define DEBUG_STATE_pp(pp) \
	2534	DEBUG_STATE_r({ \
	2535	DUMP_EXEC_POS(locinput, scan, do_utf8); \
	2536	PerlIO_printf(Perl_debug_log, \
	2537	" %*s"pp" %s%s%s%s%s\n", \
	2538	depth*2, "", \
	2539	PL_reg_name[st->resume_state], \
	2540	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	2541	((st==yes_state) ? "Y" : ""), \
	2542	((st==mark_state) ? "M" : ""), \
	2543	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	2544	); \
	2545	});
	2546
	2547
	2548	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	2549
	2550	#ifdef DEBUGGING
	2551
	2552	STATIC void
	2553	S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
	2554	const char start, const char end, const char *blurb)
	2555	{
	2556	const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
	2557	if (!PL_colorset)
	2558	reginitcolors();
	2559	{
	2560	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	2561	prog->precomp, prog->prelen, 60);
	2562
	2563	RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
	2564	start, end - start, 60);
	2565
	2566	PerlIO_printf(Perl_debug_log,
	2567	"%s%s REx%s %s against %s\n",
	2568	PL_colors[4], blurb, PL_colors[5], s0, s1);
	2569
	2570	if (do_utf8\|\|utf8_pat)
	2571	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	2572	utf8_pat ? "pattern" : "",
	2573	utf8_pat && do_utf8 ? " and " : "",
	2574	do_utf8 ? "string" : ""
	2575	);
	2576	}
	2577	}
	2578
	2579	STATIC void
	2580	S_dump_exec_pos(pTHX_ const char *locinput,
	2581	const regnode *scan,
	2582	const char *loc_regeol,
	2583	const char *loc_bostr,
	2584	const char *loc_reg_starttry,
	2585	const bool do_utf8)
	2586	{
	2587	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	2588	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	2589	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	2590	/* The part of the string before starttry has one color
	2591	(pref0_len chars), between starttry and current
	2592	position another one (pref_len - pref0_len chars),
	2593	after the current position the third one.
	2594	We assume that pref0_len <= pref_len, otherwise we
	2595	decrease pref0_len. */
	2596	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	2597	? (5 + taill) - l : locinput - loc_bostr;
	2598	int pref0_len;
	2599
	2600	while (do_utf8 && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	2601	pref_len++;
	2602	pref0_len = pref_len - (locinput - loc_reg_starttry);
	2603	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	2604	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	2605	? (5 + taill) - pref_len : loc_regeol - locinput);
	2606	while (do_utf8 && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	2607	l--;
	2608	if (pref0_len < 0)
	2609	pref0_len = 0;
	2610	if (pref0_len > pref_len)
	2611	pref0_len = pref_len;
	2612	{
	2613	const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
	2614
	2615	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	2616	(locinput - pref_len),pref0_len, 60, 4, 5);
	2617
	2618	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	2619	(locinput - pref_len + pref0_len),
	2620	pref_len - pref0_len, 60, 2, 3);
	2621
	2622	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	2623	locinput, loc_regeol - locinput, 10, 0, 1);
	2624
	2625	const STRLEN tlen=len0+len1+len2;
	2626	PerlIO_printf(Perl_debug_log,
	2627	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	2628	(IV)(locinput - loc_bostr),
	2629	len0, s0,
	2630	len1, s1,
	2631	(docolor ? "" : "> <"),
	2632	len2, s2,
	2633	(int)(tlen > 19 ? 0 : 19 - tlen),
	2634	"");
	2635	}
	2636	}
	2637
	2638	#endif
	2639
	2640	/* reg_check_named_buff_matched()
	2641	* Checks to see if a named buffer has matched. The data array of
	2642	* buffer numbers corresponding to the buffer is expected to reside
	2643	* in the regexp->data->data array in the slot stored in the ARG() of
	2644	* node involved. Note that this routine doesn't actually care about the
	2645	* name, that information is not preserved from compilation to execution.
	2646	* Returns the index of the leftmost defined buffer with the given name
	2647	* or 0 if non of the buffers matched.
	2648	*/
	2649	STATIC I32
	2650	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan) {
	2651	I32 n;
	2652	RXi_GET_DECL(rex,rexi);
	2653	SV sv_dat=(SV)rexi->data->data[ ARG( scan ) ];
	2654	I32 nums=(I32)SvPVX(sv_dat);
	2655	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	2656	if ((I32)*PL_reglastparen >= nums[n] &&
	2657	PL_regoffs[nums[n]].end != -1)
	2658	{
	2659	return nums[n];
	2660	}
	2661	}
	2662	return 0;
	2663	}
	2664
	2665
	2666	/* free all slabs above current one - called during LEAVE_SCOPE */
	2667
	2668	STATIC void
	2669	S_clear_backtrack_stack(pTHX_ void *p)
	2670	{
	2671	regmatch_slab *s = PL_regmatch_slab->next;
	2672	PERL_UNUSED_ARG(p);
	2673
	2674	if (!s)
	2675	return;
	2676	PL_regmatch_slab->next = NULL;
	2677	while (s) {
	2678	regmatch_slab * const osl = s;
	2679	s = s->next;
	2680	Safefree(osl);
	2681	}
	2682	}
	2683
	2684
	2685	#define SETREX(Re1,Re2) \
	2686	if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
	2687	Re1 = (Re2)
	2688
	2689	STATIC I32 /* 0 failure, 1 success */
	2690	S_regmatch(pTHX_ regmatch_info reginfo, regnode prog)
	2691	{
	2692	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2693	dMY_CXT;
	2694	#endif
	2695	dVAR;
	2696	register const bool do_utf8 = PL_reg_match_utf8;
	2697	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	2698
	2699	regexp *rex = reginfo->prog;
	2700	RXi_GET_DECL(rex,rexi);
	2701
	2702	I32 oldsave;
	2703
	2704	/* the current state. This is a cached copy of PL_regmatch_state */
	2705	register regmatch_state *st;
	2706
	2707	/* cache heavy used fields of st in registers */
	2708	register regnode *scan;
	2709	register regnode *next;
	2710	register U32 n = 0; /* general value; init to avoid compiler warning */
	2711	register I32 ln = 0; /* len or last; init to avoid compiler warning */
	2712	register char *locinput = PL_reginput;
	2713	register I32 nextchr; /* is always set to UCHARAT(locinput) */
	2714
	2715	bool result = 0; /* return value of S_regmatch */
	2716	int depth = 0; /* depth of backtrack stack */
	2717	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	2718	const U32 max_nochange_depth =
	2719	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	2720	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	2721
	2722	regmatch_state yes_state = NULL; / state to pop to on success of
	2723	subpattern */
	2724	/* mark_state piggy backs on the yes_state logic so that when we unwind
	2725	the stack on success we can update the mark_state as we go */
	2726	regmatch_state mark_state = NULL; / last mark state we have seen */
	2727
	2728	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	2729	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	2730	U32 state_num;
	2731	bool no_final = 0; /* prevent failure from backtracking? */
	2732	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	2733	char *startpoint = PL_reginput;
	2734	SV popmark = NULL; / are we looking for a mark? */
	2735	SV sv_commit = NULL; / last mark name seen in failure */
	2736	SV sv_yes_mark = NULL; / last mark name we have seen
	2737	during a successfull match */
	2738	U32 lastopen = 0; /* last open we saw */
	2739	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	2740
	2741	SV* const oreplsv = GvSV(PL_replgv);
	2742
	2743
	2744	/* these three flags are set by various ops to signal information to
	2745	* the very next op. They have a useful lifetime of exactly one loop
	2746	* iteration, and are not preserved or restored by state pushes/pops
	2747	*/
	2748	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	2749	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	2750	int logical = 0; /* the following EVAL is:
	2751	0: (?{...})
	2752	1: (?(?{...})X\|Y)
	2753	2: (??{...})
	2754	or the following IFMATCH/UNLESSM is:
	2755	false: plain (?=foo)
	2756	true: used as a condition: (?(?=foo))
	2757	*/
	2758
	2759	#ifdef DEBUGGING
	2760	GET_RE_DEBUG_FLAGS_DECL;
	2761	#endif
	2762
	2763	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	2764	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	2765	}));
	2766	/* on first ever call to regmatch, allocate first slab */
	2767	if (!PL_regmatch_slab) {
	2768	Newx(PL_regmatch_slab, 1, regmatch_slab);
	2769	PL_regmatch_slab->prev = NULL;
	2770	PL_regmatch_slab->next = NULL;
	2771	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	2772	}
	2773
	2774	oldsave = PL_savestack_ix;
	2775	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	2776	SAVEVPTR(PL_regmatch_slab);
	2777	SAVEVPTR(PL_regmatch_state);
	2778
	2779	/* grab next free state slot */
	2780	st = ++PL_regmatch_state;
	2781	if (st > SLAB_LAST(PL_regmatch_slab))
	2782	st = PL_regmatch_state = S_push_slab(aTHX);
	2783
	2784	/* Note that nextchr is a byte even in UTF */
	2785	nextchr = UCHARAT(locinput);
	2786	scan = prog;
	2787	while (scan != NULL) {
	2788
	2789	DEBUG_EXECUTE_r( {
	2790	SV * const prop = sv_newmortal();
	2791	regnode *rnext=regnext(scan);
	2792	DUMP_EXEC_POS( locinput, scan, do_utf8 );
	2793	regprop(rex, prop, scan);
	2794
	2795	PerlIO_printf(Perl_debug_log,
	2796	"%3"IVdf":%*s%s(%"IVdf")\n",
	2797	(IV)(scan - rexi->program), depth*2, "",
	2798	SvPVX_const(prop),
	2799	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	2800	0 : (IV)(rnext - rexi->program));
	2801	});
	2802
	2803	next = scan + NEXT_OFF(scan);
	2804	if (next == scan)
	2805	next = NULL;
	2806	state_num = OP(scan);
	2807
	2808	reenter_switch:
	2809	switch (state_num) {
	2810	case BOL:
	2811	if (locinput == PL_bostr)
	2812	{
	2813	/* reginfo->till = reginfo->bol; */
	2814	break;
	2815	}
	2816	sayNO;
	2817	case MBOL:
	2818	if (locinput == PL_bostr \|\|
	2819	((nextchr \|\| locinput < PL_regeol) && locinput[-1] == '\n'))
	2820	{
	2821	break;
	2822	}
	2823	sayNO;
	2824	case SBOL:
	2825	if (locinput == PL_bostr)
	2826	break;
	2827	sayNO;
	2828	case GPOS:
	2829	if (locinput == reginfo->ganch)
	2830	break;
	2831	sayNO;
	2832
	2833	case KEEPS:
	2834	/* update the startpoint */
	2835	st->u.keeper.val = PL_regoffs[0].start;
	2836	PL_reginput = locinput;
	2837	PL_regoffs[0].start = locinput - PL_bostr;
	2838	PUSH_STATE_GOTO(KEEPS_next, next);
	2839	/NOT-REACHED/
	2840	case KEEPS_next_fail:
	2841	/* rollback the start point change */
	2842	PL_regoffs[0].start = st->u.keeper.val;
	2843	sayNO_SILENT;
	2844	/NOT-REACHED/
	2845	case EOL:
	2846	goto seol;
	2847	case MEOL:
	2848	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	2849	sayNO;
	2850	break;
	2851	case SEOL:
	2852	seol:
	2853	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	2854	sayNO;
	2855	if (PL_regeol - locinput > 1)
	2856	sayNO;
	2857	break;
	2858	case EOS:
	2859	if (PL_regeol != locinput)
	2860	sayNO;
	2861	break;
	2862	case SANY:
	2863	if (!nextchr && locinput >= PL_regeol)
	2864	sayNO;
	2865	if (do_utf8) {
	2866	locinput += PL_utf8skip[nextchr];
	2867	if (locinput > PL_regeol)
	2868	sayNO;
	2869	nextchr = UCHARAT(locinput);
	2870	}
	2871	else
	2872	nextchr = UCHARAT(++locinput);
	2873	break;
	2874	case CANY:
	2875	if (!nextchr && locinput >= PL_regeol)
	2876	sayNO;
	2877	nextchr = UCHARAT(++locinput);
	2878	break;
	2879	case REG_ANY:
	2880	if ((!nextchr && locinput >= PL_regeol) \|\| nextchr == '\n')
	2881	sayNO;
	2882	if (do_utf8) {
	2883	locinput += PL_utf8skip[nextchr];
	2884	if (locinput > PL_regeol)
	2885	sayNO;
	2886	nextchr = UCHARAT(locinput);
	2887	}
	2888	else
	2889	nextchr = UCHARAT(++locinput);
	2890	break;
	2891
	2892	#undef ST
	2893	#define ST st->u.trie
	2894	case TRIEC:
	2895	/* In this case the charclass data is available inline so
	2896	we can fail fast without a lot of extra overhead.
	2897	*/
	2898	if (scan->flags == EXACT \|\| !do_utf8) {
	2899	if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
	2900	DEBUG_EXECUTE_r(
	2901	PerlIO_printf(Perl_debug_log,
	2902	"%*s %sfailed to match trie start class...%s\n",
	2903	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	2904	);
	2905	sayNO_SILENT;
	2906	/* NOTREACHED */
	2907	}
	2908	}
	2909	/* FALL THROUGH */
	2910	case TRIE:
	2911	{
	2912	/* what type of TRIE am I? (utf8 makes this contextual) */
	2913	DECL_TRIE_TYPE(scan);
	2914
	2915	/* what trie are we using right now */
	2916	reg_trie_data * const trie
	2917	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	2918	HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
	2919	U32 state = trie->startstate;
	2920
	2921	if (trie->bitmap && trie_type != trie_utf8_fold &&
	2922	!TRIE_BITMAP_TEST(trie,*locinput)
	2923	) {
	2924	if (trie->states[ state ].wordnum) {
	2925	DEBUG_EXECUTE_r(
	2926	PerlIO_printf(Perl_debug_log,
	2927	"%*s %smatched empty string...%s\n",
	2928	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	2929	);
	2930	break;
	2931	} else {
	2932	DEBUG_EXECUTE_r(
	2933	PerlIO_printf(Perl_debug_log,
	2934	"%*s %sfailed to match trie start class...%s\n",
	2935	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	2936	);
	2937	sayNO_SILENT;
	2938	}
	2939	}
	2940
	2941	{
	2942	U8 uc = ( U8 )locinput;
	2943
	2944	STRLEN len = 0;
	2945	STRLEN foldlen = 0;
	2946	U8 uscan = (U8)NULL;
	2947	STRLEN bufflen=0;
	2948	SV *sv_accept_buff = NULL;
	2949	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	2950
	2951	ST.accepted = 0; /* how many accepting states we have seen */
	2952	ST.B = next;
	2953	ST.jump = trie->jump;
	2954	ST.me = scan;
	2955	/*
	2956	traverse the TRIE keeping track of all accepting states
	2957	we transition through until we get to a failing node.
	2958	*/
	2959
	2960	while ( state && uc <= (U8*)PL_regeol ) {
	2961	U32 base = trie->states[ state ].trans.base;
	2962	UV uvc = 0;
	2963	U16 charid;
	2964	/* We use charid to hold the wordnum as we don't use it
	2965	for charid until after we have done the wordnum logic.
	2966	We define an alias just so that the wordnum logic reads
	2967	more naturally. */
	2968
	2969	#define got_wordnum charid
	2970	got_wordnum = trie->states[ state ].wordnum;
	2971
	2972	if ( got_wordnum ) {
	2973	if ( ! ST.accepted ) {
	2974	ENTER;
	2975	/* SAVETMPS; / / XXX is this necessary? dmq */
	2976	bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
	2977	sv_accept_buff=newSV(bufflen *
	2978	sizeof(reg_trie_accepted) - 1);
	2979	SvCUR_set(sv_accept_buff, 0);
	2980	SvPOK_on(sv_accept_buff);
	2981	sv_2mortal(sv_accept_buff);
	2982	SAVETMPS;
	2983	ST.accept_buff =
	2984	(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
	2985	}
	2986	do {
	2987	if (ST.accepted >= bufflen) {
	2988	bufflen *= 2;
	2989	ST.accept_buff =(reg_trie_accepted*)
	2990	SvGROW(sv_accept_buff,
	2991	bufflen * sizeof(reg_trie_accepted));
	2992	}
	2993	SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
	2994	+ sizeof(reg_trie_accepted));
	2995
	2996
	2997	ST.accept_buff[ST.accepted].wordnum = got_wordnum;
	2998	ST.accept_buff[ST.accepted].endpos = uc;
	2999	++ST.accepted;
	3000	} while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
	3001	}
	3002	#undef got_wordnum
	3003
	3004	DEBUG_TRIE_EXECUTE_r({
	3005	DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
	3006	PerlIO_printf( Perl_debug_log,
	3007	"%*s %sState: %4"UVxf" Accepted: %4"UVxf" ",
	3008	2+depth * 2, "", PL_colors[4],
	3009	(UV)state, (UV)ST.accepted );
	3010	});
	3011
	3012	if ( base ) {
	3013	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3014	uscan, len, uvc, charid, foldlen,
	3015	foldbuf, uniflags);
	3016
	3017	if (charid &&
	3018	(base + charid > trie->uniquecharcount )
	3019	&& (base + charid - 1 - trie->uniquecharcount
	3020	< trie->lasttrans)
	3021	&& trie->trans[base + charid - 1 -
	3022	trie->uniquecharcount].check == state)
	3023	{
	3024	state = trie->trans[base + charid - 1 -
	3025	trie->uniquecharcount ].next;
	3026	}
	3027	else {
	3028	state = 0;
	3029	}
	3030	uc += len;
	3031
	3032	}
	3033	else {
	3034	state = 0;
	3035	}
	3036	DEBUG_TRIE_EXECUTE_r(
	3037	PerlIO_printf( Perl_debug_log,
	3038	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3039	charid, uvc, (UV)state, PL_colors[5] );
	3040	);
	3041	}
	3042	if (!ST.accepted )
	3043	sayNO;
	3044
	3045	DEBUG_EXECUTE_r(
	3046	PerlIO_printf( Perl_debug_log,
	3047	"%*s %sgot %"IVdf" possible matches%s\n",
	3048	REPORT_CODE_OFF + depth * 2, "",
	3049	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3050	);
	3051	}}
	3052	goto trie_first_try; /* jump into the fail handler */
	3053	/* NOTREACHED */
	3054	case TRIE_next_fail: /* we failed - try next alterative */
	3055	if ( ST.jump) {
	3056	REGCP_UNWIND(ST.cp);
	3057	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	3058	PL_regoffs[n].end = -1;
	3059	*PL_reglastparen = n;
	3060	}
	3061	trie_first_try:
	3062	if (do_cutgroup) {
	3063	do_cutgroup = 0;
	3064	no_final = 0;
	3065	}
	3066
	3067	if ( ST.jump) {
	3068	ST.lastparen = *PL_reglastparen;
	3069	REGCP_SET(ST.cp);
	3070	}
	3071	if ( ST.accepted == 1 ) {
	3072	/* only one choice left - just continue */
	3073	DEBUG_EXECUTE_r({
	3074	AV *const trie_words
	3075	= (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
	3076	SV ** const tmp = av_fetch( trie_words,
	3077	ST.accept_buff[ 0 ].wordnum-1, 0 );
	3078	SV *sv= tmp ? sv_newmortal() : NULL;
	3079
	3080	PerlIO_printf( Perl_debug_log,
	3081	"%*s %sonly one match left: #%d <%s>%s\n",
	3082	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	3083	ST.accept_buff[ 0 ].wordnum,
	3084	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	3085	PL_colors[0], PL_colors[1],
	3086	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
	3087	)
	3088	: "not compiled under -Dr",
	3089	PL_colors[5] );
	3090	});
	3091	PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
	3092	/* in this case we free tmps/leave before we call regmatch
	3093	as we wont be using accept_buff again. */
	3094
	3095	locinput = PL_reginput;
	3096	nextchr = UCHARAT(locinput);
	3097	if ( !ST.jump \|\| !ST.jump[ST.accept_buff[0].wordnum])
	3098	scan = ST.B;
	3099	else
	3100	scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
	3101	if (!has_cutgroup) {
	3102	FREETMPS;
	3103	LEAVE;
	3104	} else {
	3105	ST.accepted--;
	3106	PUSH_YES_STATE_GOTO(TRIE_next, scan);
	3107	}
	3108
	3109	continue; /* execute rest of RE */
	3110	}
	3111
	3112	if ( !ST.accepted-- ) {
	3113	DEBUG_EXECUTE_r({
	3114	PerlIO_printf( Perl_debug_log,
	3115	"%*s %sTRIE failed...%s\n",
	3116	REPORT_CODE_OFF+depth*2, "",
	3117	PL_colors[4],
	3118	PL_colors[5] );
	3119	});
	3120	FREETMPS;
	3121	LEAVE;
	3122	sayNO_SILENT;
	3123	/NOTREACHED/
	3124	}
	3125
	3126	/*
	3127	There are at least two accepting states left. Presumably
	3128	the number of accepting states is going to be low,
	3129	typically two. So we simply scan through to find the one
	3130	with lowest wordnum. Once we find it, we swap the last
	3131	state into its place and decrement the size. We then try to
	3132	match the rest of the pattern at the point where the word
	3133	ends. If we succeed, control just continues along the
	3134	regex; if we fail we return here to try the next accepting
	3135	state
	3136	*/
	3137
	3138	{
	3139	U32 best = 0;
	3140	U32 cur;
	3141	for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
	3142	DEBUG_TRIE_EXECUTE_r(
	3143	PerlIO_printf( Perl_debug_log,
	3144	"%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
	3145	REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
	3146	(IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
	3147	ST.accept_buff[ cur ].wordnum, PL_colors[5] );
	3148	);
	3149
	3150	if (ST.accept_buff[cur].wordnum <
	3151	ST.accept_buff[best].wordnum)
	3152	best = cur;
	3153	}
	3154
	3155	DEBUG_EXECUTE_r({
	3156	AV *const trie_words
	3157	= (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
	3158	SV ** const tmp = av_fetch( trie_words,
	3159	ST.accept_buff[ best ].wordnum - 1, 0 );
	3160	regnode *nextop=(!ST.jump \|\| !ST.jump[ST.accept_buff[best].wordnum]) ?
	3161	ST.B :
	3162	ST.me + ST.jump[ST.accept_buff[best].wordnum];
	3163	SV *sv= tmp ? sv_newmortal() : NULL;
	3164
	3165	PerlIO_printf( Perl_debug_log,
	3166	"%*s %strying alternation #%d <%s> at node #%d %s\n",
	3167	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	3168	ST.accept_buff[best].wordnum,
	3169	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	3170	PL_colors[0], PL_colors[1],
	3171	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
	3172	) : "not compiled under -Dr",
	3173	REG_NODE_NUM(nextop),
	3174	PL_colors[5] );
	3175	});
	3176
	3177	if ( best<ST.accepted ) {
	3178	reg_trie_accepted tmp = ST.accept_buff[ best ];
	3179	ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
	3180	ST.accept_buff[ ST.accepted ] = tmp;
	3181	best = ST.accepted;
	3182	}
	3183	PL_reginput = (char *)ST.accept_buff[ best ].endpos;
	3184	if ( !ST.jump \|\| !ST.jump[ST.accept_buff[best].wordnum]) {
	3185	scan = ST.B;
	3186	} else {
	3187	scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
	3188	}
	3189	PUSH_YES_STATE_GOTO(TRIE_next, scan);
	3190	/* NOTREACHED */
	3191	}
	3192	/* NOTREACHED */
	3193	case TRIE_next:
	3194	FREETMPS;
	3195	LEAVE;
	3196	sayYES;
	3197	#undef ST
	3198
	3199	case EXACT: {
	3200	char *s = STRING(scan);
	3201	ln = STR_LEN(scan);
	3202	if (do_utf8 != UTF) {
	3203	/* The target and the pattern have differing utf8ness. */
	3204	char *l = locinput;
	3205	const char * const e = s + ln;
	3206
	3207	if (do_utf8) {
	3208	/* The target is utf8, the pattern is not utf8. */
	3209	while (s < e) {
	3210	STRLEN ulen;
	3211	if (l >= PL_regeol)
	3212	sayNO;
	3213	if (NATIVE_TO_UNI((U8)s) !=
	3214	utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
	3215	uniflags))
	3216	sayNO;
	3217	l += ulen;
	3218	s ++;
	3219	}
	3220	}
	3221	else {
	3222	/* The target is not utf8, the pattern is utf8. */
	3223	while (s < e) {
	3224	STRLEN ulen;
	3225	if (l >= PL_regeol)
	3226	sayNO;
	3227	if (NATIVE_TO_UNI(((U8)l)) !=
	3228	utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
	3229	uniflags))
	3230	sayNO;
	3231	s += ulen;
	3232	l ++;
	3233	}
	3234	}
	3235	locinput = l;
	3236	nextchr = UCHARAT(locinput);
	3237	break;
	3238	}
	3239	/* The target and the pattern have the same utf8ness. */
	3240	/* Inline the first character, for speed. */
	3241	if (UCHARAT(s) != nextchr)
	3242	sayNO;
	3243	if (PL_regeol - locinput < ln)
	3244	sayNO;
	3245	if (ln > 1 && memNE(s, locinput, ln))
	3246	sayNO;
	3247	locinput += ln;
	3248	nextchr = UCHARAT(locinput);
	3249	break;
	3250	}
	3251	case EXACTFL:
	3252	PL_reg_flags \|= RF_tainted;
	3253	/* FALL THROUGH */
	3254	case EXACTF: {
	3255	char * const s = STRING(scan);
	3256	ln = STR_LEN(scan);
	3257
	3258	if (do_utf8 \|\| UTF) {
	3259	/* Either target or the pattern are utf8. */
	3260	const char * const l = locinput;
	3261	char *e = PL_regeol;
	3262
	3263	if (ibcmp_utf8(s, 0, ln, (bool)UTF,
	3264	l, &e, 0, do_utf8)) {
	3265	/* One more case for the sharp s:
	3266	* pack("U0U*", 0xDF) =~ /ss/i,
	3267	* the 0xC3 0x9F are the UTF-8
	3268	* byte sequence for the U+00DF. */
	3269
	3270	if (!(do_utf8 &&
	3271	toLOWER(s[0]) == 's' &&
	3272	ln >= 2 &&
	3273	toLOWER(s[1]) == 's' &&
	3274	(U8)l[0] == 0xC3 &&
	3275	e - l >= 2 &&
	3276	(U8)l[1] == 0x9F))
	3277	sayNO;
	3278	}
	3279	locinput = e;
	3280	nextchr = UCHARAT(locinput);
	3281	break;
	3282	}
	3283
	3284	/* Neither the target and the pattern are utf8. */
	3285
	3286	/* Inline the first character, for speed. */
	3287	if (UCHARAT(s) != nextchr &&
	3288	UCHARAT(s) != ((OP(scan) == EXACTF)
	3289	? PL_fold : PL_fold_locale)[nextchr])
	3290	sayNO;
	3291	if (PL_regeol - locinput < ln)
	3292	sayNO;
	3293	if (ln > 1 && (OP(scan) == EXACTF
	3294	? ibcmp(s, locinput, ln)
	3295	: ibcmp_locale(s, locinput, ln)))
	3296	sayNO;
	3297	locinput += ln;
	3298	nextchr = UCHARAT(locinput);
	3299	break;
	3300	}
	3301	case ANYOF:
	3302	if (do_utf8) {
	3303	STRLEN inclasslen = PL_regeol - locinput;
	3304
	3305	if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
	3306	goto anyof_fail;
	3307	if (locinput >= PL_regeol)
	3308	sayNO;
	3309	locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
	3310	nextchr = UCHARAT(locinput);
	3311	break;
	3312	}
	3313	else {
	3314	if (nextchr < 0)
	3315	nextchr = UCHARAT(locinput);
	3316	if (!REGINCLASS(rex, scan, (U8*)locinput))
	3317	goto anyof_fail;
	3318	if (!nextchr && locinput >= PL_regeol)
	3319	sayNO;
	3320	nextchr = UCHARAT(++locinput);
	3321	break;
	3322	}
	3323	anyof_fail:
	3324	/* If we might have the case of the German sharp s
	3325	* in a casefolding Unicode character class. */
	3326
	3327	if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
	3328	locinput += SHARP_S_SKIP;
	3329	nextchr = UCHARAT(locinput);
	3330	}
	3331	else
	3332	sayNO;
	3333	break;
	3334	case ALNUML:
	3335	PL_reg_flags \|= RF_tainted;
	3336	/* FALL THROUGH */
	3337	case ALNUM:
	3338	if (!nextchr)
	3339	sayNO;
	3340	if (do_utf8) {
	3341	LOAD_UTF8_CHARCLASS_ALNUM();
	3342	if (!(OP(scan) == ALNUM
	3343	? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
	3344	: isALNUM_LC_utf8((U8*)locinput)))
	3345	{
	3346	sayNO;
	3347	}
	3348	locinput += PL_utf8skip[nextchr];
	3349	nextchr = UCHARAT(locinput);
	3350	break;
	3351	}
	3352	if (!(OP(scan) == ALNUM
	3353	? isALNUM(nextchr) : isALNUM_LC(nextchr)))
	3354	sayNO;
	3355	nextchr = UCHARAT(++locinput);
	3356	break;
	3357	case NALNUML:
	3358	PL_reg_flags \|= RF_tainted;
	3359	/* FALL THROUGH */
	3360	case NALNUM:
	3361	if (!nextchr && locinput >= PL_regeol)
	3362	sayNO;
	3363	if (do_utf8) {
	3364	LOAD_UTF8_CHARCLASS_ALNUM();
	3365	if (OP(scan) == NALNUM
	3366	? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
	3367	: isALNUM_LC_utf8((U8*)locinput))
	3368	{
	3369	sayNO;
	3370	}
	3371	locinput += PL_utf8skip[nextchr];
	3372	nextchr = UCHARAT(locinput);
	3373	break;
	3374	}
	3375	if (OP(scan) == NALNUM
	3376	? isALNUM(nextchr) : isALNUM_LC(nextchr))
	3377	sayNO;
	3378	nextchr = UCHARAT(++locinput);
	3379	break;
	3380	case BOUNDL:
	3381	case NBOUNDL:
	3382	PL_reg_flags \|= RF_tainted;
	3383	/* FALL THROUGH */
	3384	case BOUND:
	3385	case NBOUND:
	3386	/* was last char in word? */
	3387	if (do_utf8) {
	3388	if (locinput == PL_bostr)
	3389	ln = '\n';
	3390	else {
	3391	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	3392
	3393	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	3394	}
	3395	if (OP(scan) == BOUND \|\| OP(scan) == NBOUND) {
	3396	ln = isALNUM_uni(ln);
	3397	LOAD_UTF8_CHARCLASS_ALNUM();
	3398	n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
	3399	}
	3400	else {
	3401	ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
	3402	n = isALNUM_LC_utf8((U8*)locinput);
	3403	}
	3404	}
	3405	else {
	3406	ln = (locinput != PL_bostr) ?
	3407	UCHARAT(locinput - 1) : '\n';
	3408	if (OP(scan) == BOUND \|\| OP(scan) == NBOUND) {
	3409	ln = isALNUM(ln);
	3410	n = isALNUM(nextchr);
	3411	}
	3412	else {
	3413	ln = isALNUM_LC(ln);
	3414	n = isALNUM_LC(nextchr);
	3415	}
	3416	}
	3417	if (((!ln) == (!n)) == (OP(scan) == BOUND \|\|
	3418	OP(scan) == BOUNDL))
	3419	sayNO;
	3420	break;
	3421	case SPACEL:
	3422	PL_reg_flags \|= RF_tainted;
	3423	/* FALL THROUGH */
	3424	case SPACE:
	3425	if (!nextchr)
	3426	sayNO;
	3427	if (do_utf8) {
	3428	if (UTF8_IS_CONTINUED(nextchr)) {
	3429	LOAD_UTF8_CHARCLASS_SPACE();
	3430	if (!(OP(scan) == SPACE
	3431	? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
	3432	: isSPACE_LC_utf8((U8*)locinput)))
	3433	{
	3434	sayNO;
	3435	}
	3436	locinput += PL_utf8skip[nextchr];
	3437	nextchr = UCHARAT(locinput);
	3438	break;
	3439	}
	3440	if (!(OP(scan) == SPACE
	3441	? isSPACE(nextchr) : isSPACE_LC(nextchr)))
	3442	sayNO;
	3443	nextchr = UCHARAT(++locinput);
	3444	}
	3445	else {
	3446	if (!(OP(scan) == SPACE
	3447	? isSPACE(nextchr) : isSPACE_LC(nextchr)))
	3448	sayNO;
	3449	nextchr = UCHARAT(++locinput);
	3450	}
	3451	break;
	3452	case NSPACEL:
	3453	PL_reg_flags \|= RF_tainted;
	3454	/* FALL THROUGH */
	3455	case NSPACE:
	3456	if (!nextchr && locinput >= PL_regeol)
	3457	sayNO;
	3458	if (do_utf8) {
	3459	LOAD_UTF8_CHARCLASS_SPACE();
	3460	if (OP(scan) == NSPACE
	3461	? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
	3462	: isSPACE_LC_utf8((U8*)locinput))
	3463	{
	3464	sayNO;
	3465	}
	3466	locinput += PL_utf8skip[nextchr];
	3467	nextchr = UCHARAT(locinput);
	3468	break;
	3469	}
	3470	if (OP(scan) == NSPACE
	3471	? isSPACE(nextchr) : isSPACE_LC(nextchr))
	3472	sayNO;
	3473	nextchr = UCHARAT(++locinput);
	3474	break;
	3475	case DIGITL:
	3476	PL_reg_flags \|= RF_tainted;
	3477	/* FALL THROUGH */
	3478	case DIGIT:
	3479	if (!nextchr)
	3480	sayNO;
	3481	if (do_utf8) {
	3482	LOAD_UTF8_CHARCLASS_DIGIT();
	3483	if (!(OP(scan) == DIGIT
	3484	? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
	3485	: isDIGIT_LC_utf8((U8*)locinput)))
	3486	{
	3487	sayNO;
	3488	}
	3489	locinput += PL_utf8skip[nextchr];
	3490	nextchr = UCHARAT(locinput);
	3491	break;
	3492	}
	3493	if (!(OP(scan) == DIGIT
	3494	? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
	3495	sayNO;
	3496	nextchr = UCHARAT(++locinput);
	3497	break;
	3498	case NDIGITL:
	3499	PL_reg_flags \|= RF_tainted;
	3500	/* FALL THROUGH */
	3501	case NDIGIT:
	3502	if (!nextchr && locinput >= PL_regeol)
	3503	sayNO;
	3504	if (do_utf8) {
	3505	LOAD_UTF8_CHARCLASS_DIGIT();
	3506	if (OP(scan) == NDIGIT
	3507	? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
	3508	: isDIGIT_LC_utf8((U8*)locinput))
	3509	{
	3510	sayNO;
	3511	}
	3512	locinput += PL_utf8skip[nextchr];
	3513	nextchr = UCHARAT(locinput);
	3514	break;
	3515	}
	3516	if (OP(scan) == NDIGIT
	3517	? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
	3518	sayNO;
	3519	nextchr = UCHARAT(++locinput);
	3520	break;
	3521	case CLUMP:
	3522	if (locinput >= PL_regeol)
	3523	sayNO;
	3524	if (do_utf8) {
	3525	LOAD_UTF8_CHARCLASS_MARK();
	3526	if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
	3527	sayNO;
	3528	locinput += PL_utf8skip[nextchr];
	3529	while (locinput < PL_regeol &&
	3530	swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
	3531	locinput += UTF8SKIP(locinput);
	3532	if (locinput > PL_regeol)
	3533	sayNO;
	3534	}
	3535	else
	3536	locinput++;
	3537	nextchr = UCHARAT(locinput);
	3538	break;
	3539
	3540	case NREFFL:
	3541	{
	3542	char *s;
	3543	char type;
	3544	PL_reg_flags \|= RF_tainted;
	3545	/* FALL THROUGH */
	3546	case NREF:
	3547	case NREFF:
	3548	type = OP(scan);
	3549	n = reg_check_named_buff_matched(rex,scan);
	3550
	3551	if ( n ) {
	3552	type = REF + ( type - NREF );
	3553	goto do_ref;
	3554	} else {
	3555	sayNO;
	3556	}
	3557	/* unreached */
	3558	case REFFL:
	3559	PL_reg_flags \|= RF_tainted;
	3560	/* FALL THROUGH */
	3561	case REF:
	3562	case REFF:
	3563	n = ARG(scan); /* which paren pair */
	3564	type = OP(scan);
	3565	do_ref:
	3566	ln = PL_regoffs[n].start;
	3567	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	3568	if (*PL_reglastparen < n \|\| ln == -1)
	3569	sayNO; /* Do not match unless seen CLOSEn. */
	3570	if (ln == PL_regoffs[n].end)
	3571	break;
	3572
	3573	s = PL_bostr + ln;
	3574	if (do_utf8 && type != REF) { /* REF can do byte comparison */
	3575	char *l = locinput;
	3576	const char *e = PL_bostr + PL_regoffs[n].end;
	3577	/*
	3578	* Note that we can't do the "other character" lookup trick as
	3579	* in the 8-bit case (no pun intended) because in Unicode we
	3580	* have to map both upper and title case to lower case.
	3581	*/
	3582	if (type == REFF) {
	3583	while (s < e) {
	3584	STRLEN ulen1, ulen2;
	3585	U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
	3586	U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
	3587
	3588	if (l >= PL_regeol)
	3589	sayNO;
	3590	toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
	3591	toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
	3592	if (ulen1 != ulen2 \|\| memNE((char )tmpbuf1, (char )tmpbuf2, ulen1))
	3593	sayNO;
	3594	s += ulen1;
	3595	l += ulen2;
	3596	}
	3597	}
	3598	locinput = l;
	3599	nextchr = UCHARAT(locinput);
	3600	break;
	3601	}
	3602
	3603	/* Inline the first character, for speed. */
	3604	if (UCHARAT(s) != nextchr &&
	3605	(type == REF \|\|
	3606	(UCHARAT(s) != (type == REFF
	3607	? PL_fold : PL_fold_locale)[nextchr])))
	3608	sayNO;
	3609	ln = PL_regoffs[n].end - ln;
	3610	if (locinput + ln > PL_regeol)
	3611	sayNO;
	3612	if (ln > 1 && (type == REF
	3613	? memNE(s, locinput, ln)
	3614	: (type == REFF
	3615	? ibcmp(s, locinput, ln)
	3616	: ibcmp_locale(s, locinput, ln))))
	3617	sayNO;
	3618	locinput += ln;
	3619	nextchr = UCHARAT(locinput);
	3620	break;
	3621	}
	3622	case NOTHING:
	3623	case TAIL:
	3624	break;
	3625	case BACK:
	3626	break;
	3627
	3628	#undef ST
	3629	#define ST st->u.eval
	3630	{
	3631	SV *ret;
	3632	regexp *re;
	3633	regexp_internal *rei;
	3634	regnode *startpoint;
	3635
	3636	case GOSTART:
	3637	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	3638	if (cur_eval && cur_eval->locinput==locinput) {
	3639	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	3640	Perl_croak(aTHX_ "Infinite recursion in regex");
	3641	if ( ++nochange_depth > max_nochange_depth )
	3642	Perl_croak(aTHX_
	3643	"Pattern subroutine nesting without pos change"
	3644	" exceeded limit in regex");
	3645	} else {
	3646	nochange_depth = 0;
	3647	}
	3648	re = rex;
	3649	rei = rexi;
	3650	(void)ReREFCNT_inc(rex);
	3651	if (OP(scan)==GOSUB) {
	3652	startpoint = scan + ARG2L(scan);
	3653	ST.close_paren = ARG(scan);
	3654	} else {
	3655	startpoint = rei->program+1;
	3656	ST.close_paren = 0;
	3657	}
	3658	goto eval_recurse_doit;
	3659	/* NOTREACHED */
	3660	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	3661	if (cur_eval && cur_eval->locinput==locinput) {
	3662	if ( ++nochange_depth > max_nochange_depth )
	3663	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	3664	} else {
	3665	nochange_depth = 0;
	3666	}
	3667	{
	3668	/* execute the code in the {...} */
	3669	dSP;
	3670	SV ** const before = SP;
	3671	OP_4tree * const oop = PL_op;
	3672	COP * const ocurcop = PL_curcop;
	3673	PAD *old_comppad;
	3674
	3675	n = ARG(scan);
	3676	PL_op = (OP_4tree*)rexi->data->data[n];
	3677	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	3678	" re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
	3679	PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
	3680	PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	3681
	3682	if (sv_yes_mark) {
	3683	SV *sv_mrk = get_sv("REGMARK", 1);
	3684	sv_setsv(sv_mrk, sv_yes_mark);
	3685	}
	3686
	3687	CALLRUNOPS(aTHX); /* Scalar context. */
	3688	SPAGAIN;
	3689	if (SP == before)
	3690	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	3691	else {
	3692	ret = POPs;
	3693	PUTBACK;
	3694	}
	3695
	3696	PL_op = oop;
	3697	PAD_RESTORE_LOCAL(old_comppad);
	3698	PL_curcop = ocurcop;
	3699	if (!logical) {
	3700	/* /(?{...})/ */
	3701	sv_setsv(save_scalar(PL_replgv), ret);
	3702	break;
	3703	}
	3704	}
	3705	if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
	3706	logical = 0;
	3707	{
	3708	/* extract RE object from returned value; compiling if
	3709	* necessary */
	3710
	3711	MAGIC *mg = NULL;
	3712	const SV *sv;
	3713	if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
	3714	mg = mg_find(sv, PERL_MAGIC_qr);
	3715	else if (SvSMAGICAL(ret)) {
	3716	if (SvGMAGICAL(ret))
	3717	sv_unmagic(ret, PERL_MAGIC_qr);
	3718	else
	3719	mg = mg_find(ret, PERL_MAGIC_qr);
	3720	}
	3721
	3722	if (mg) {
	3723	re = reg_temp_copy((regexp )mg->mg_obj); /XXX:dmq*/
	3724	}
	3725	else {
	3726	U32 pm_flags = 0;
	3727	const I32 osize = PL_regsize;
	3728
	3729	if (DO_UTF8(ret)) pm_flags \|= RXf_UTF8;
	3730	re = CALLREGCOMP(ret, pm_flags);
	3731	if (!(SvFLAGS(ret)
	3732	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	3733	\| SVs_GMG)))
	3734	sv_magic(ret,(SV*)ReREFCNT_inc(re),
	3735	PERL_MAGIC_qr,0,0);
	3736	PL_regsize = osize;
	3737	}
	3738	}
	3739	RX_MATCH_COPIED_off(re);
	3740	re->subbeg = rex->subbeg;
	3741	re->sublen = rex->sublen;
	3742	rei = RXi_GET(re);
	3743	DEBUG_EXECUTE_r(
	3744	debug_start_match(re, do_utf8, locinput, PL_regeol,
	3745	"Matching embedded");
	3746	);
	3747	startpoint = rei->program + 1;
	3748	ST.close_paren = 0; /* only used for GOSUB */
	3749	/* borrowed from regtry */
	3750	if (PL_reg_start_tmpl <= re->nparens) {
	3751	PL_reg_start_tmpl = re->nparens*3/2 + 3;
	3752	if(PL_reg_start_tmp)
	3753	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	3754	else
	3755	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	3756	}
	3757
	3758	eval_recurse_doit: /* Share code with GOSUB below this line */
	3759	/* run the pattern returned from (??{...}) */
	3760	ST.cp = regcppush(0); /* Save all the positions. */
	3761	REGCP_SET(ST.lastcp);
	3762
	3763	PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
	3764
	3765	*PL_reglastparen = 0;
	3766	*PL_reglastcloseparen = 0;
	3767	PL_reginput = locinput;
	3768	PL_regsize = 0;
	3769
	3770	/* XXXX This is too dramatic a measure... */
	3771	PL_reg_maxiter = 0;
	3772
	3773	ST.toggle_reg_flags = PL_reg_flags;
	3774	if (re->extflags & RXf_UTF8)
	3775	PL_reg_flags \|= RF_utf8;
	3776	else
	3777	PL_reg_flags &= ~RF_utf8;
	3778	ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
	3779
	3780	ST.prev_rex = rex;
	3781	ST.prev_curlyx = cur_curlyx;
	3782	SETREX(rex,re);
	3783	rexi = rei;
	3784	cur_curlyx = NULL;
	3785	ST.B = next;
	3786	ST.prev_eval = cur_eval;
	3787	cur_eval = st;
	3788	/* now continue from first node in postoned RE */
	3789	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
	3790	/* NOTREACHED */
	3791	}
	3792	/* logical is 1, /(?(?{...})X\|Y)/ */
	3793	sw = (bool)SvTRUE(ret);
	3794	logical = 0;
	3795	break;
	3796	}
	3797
	3798	case EVAL_AB: /* cleanup after a successful (??{A})B */
	3799	/* note: this is called twice; first after popping B, then A */
	3800	PL_reg_flags ^= ST.toggle_reg_flags;
	3801	ReREFCNT_dec(rex);
	3802	SETREX(rex,ST.prev_rex);
	3803	rexi = RXi_GET(rex);
	3804	regcpblow(ST.cp);
	3805	cur_eval = ST.prev_eval;
	3806	cur_curlyx = ST.prev_curlyx;
	3807	/* XXXX This is too dramatic a measure... */
	3808	PL_reg_maxiter = 0;
	3809	if ( nochange_depth )
	3810	nochange_depth--;
	3811	sayYES;
	3812
	3813
	3814	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	3815	/* note: this is called twice; first after popping B, then A */
	3816	PL_reg_flags ^= ST.toggle_reg_flags;
	3817	ReREFCNT_dec(rex);
	3818	SETREX(rex,ST.prev_rex);
	3819	rexi = RXi_GET(rex);
	3820	PL_reginput = locinput;
	3821	REGCP_UNWIND(ST.lastcp);
	3822	regcppop(rex);
	3823	cur_eval = ST.prev_eval;
	3824	cur_curlyx = ST.prev_curlyx;
	3825	/* XXXX This is too dramatic a measure... */
	3826	PL_reg_maxiter = 0;
	3827	if ( nochange_depth )
	3828	nochange_depth--;
	3829	sayNO_SILENT;
	3830	#undef ST
	3831
	3832	case OPEN:
	3833	n = ARG(scan); /* which paren pair */
	3834	PL_reg_start_tmp[n] = locinput;
	3835	if (n > PL_regsize)
	3836	PL_regsize = n;
	3837	lastopen = n;
	3838	break;
	3839	case CLOSE:
	3840	n = ARG(scan); /* which paren pair */
	3841	PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
	3842	PL_regoffs[n].end = locinput - PL_bostr;
	3843	/*if (n > PL_regsize)
	3844	PL_regsize = n;*/
	3845	if (n > *PL_reglastparen)
	3846	*PL_reglastparen = n;
	3847	*PL_reglastcloseparen = n;
	3848	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	3849	goto fake_end;
	3850	}
	3851	break;
	3852	case ACCEPT:
	3853	if (ARG(scan)){
	3854	regnode *cursor;
	3855	for (cursor=scan;
	3856	cursor && OP(cursor)!=END;
	3857	cursor=regnext(cursor))
	3858	{
	3859	if ( OP(cursor)==CLOSE ){
	3860	n = ARG(cursor);
	3861	if ( n <= lastopen ) {
	3862	PL_regoffs[n].start
	3863	= PL_reg_start_tmp[n] - PL_bostr;
	3864	PL_regoffs[n].end = locinput - PL_bostr;
	3865	/*if (n > PL_regsize)
	3866	PL_regsize = n;*/
	3867	if (n > *PL_reglastparen)
	3868	*PL_reglastparen = n;
	3869	*PL_reglastcloseparen = n;
	3870	if ( n == ARG(scan) \|\| (cur_eval &&
	3871	cur_eval->u.eval.close_paren == n))
	3872	break;
	3873	}
	3874	}
	3875	}
	3876	}
	3877	goto fake_end;
	3878	/NOTREACHED/
	3879	case GROUPP:
	3880	n = ARG(scan); /* which paren pair */
	3881	sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
	3882	break;
	3883	case NGROUPP:
	3884	/* reg_check_named_buff_matched returns 0 for no match */
	3885	sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
	3886	break;
	3887	case INSUBP:
	3888	n = ARG(scan);
	3889	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	3890	break;
	3891	case DEFINEP:
	3892	sw = 0;
	3893	break;
	3894	case IFTHEN:
	3895	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	3896	if (sw)
	3897	next = NEXTOPER(NEXTOPER(scan));
	3898	else {
	3899	next = scan + ARG(scan);
	3900	if (OP(next) == IFTHEN) /* Fake one. */
	3901	next = NEXTOPER(NEXTOPER(next));
	3902	}
	3903	break;
	3904	case LOGICAL:
	3905	logical = scan->flags;
	3906	break;
	3907
	3908	/*******************************************************************
	3909
	3910	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	3911	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	3912	STAR/PLUS/CURLY/CURLYN are used instead.)
	3913
	3914	A*B is compiled as <CURLYX><A><WHILEM><B>
	3915
	3916	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	3917	state, which contains the current count, initialised to -1. It also sets
	3918	cur_curlyx to point to this state, with any previous value saved in the
	3919	state block.
	3920
	3921	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	3922	since the pattern may possibly match zero times (i.e. it's a while {} loop
	3923	rather than a do {} while loop).
	3924
	3925	Each entry to WHILEM represents a successful match of A. The count in the
	3926	CURLYX block is incremented, another WHILEM state is pushed, and execution
	3927	passes to A or B depending on greediness and the current count.
	3928
	3929	For example, if matching against the string a1a2a3b (where the aN are
	3930	substrings that match /A/), then the match progresses as follows: (the
	3931	pushed states are interspersed with the bits of strings matched so far):
	3932
	3933	<CURLYX cnt=-1>
	3934	<CURLYX cnt=0><WHILEM>
	3935	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	3936	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	3937	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	3938	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	3939
	3940	(Contrast this with something like CURLYM, which maintains only a single
	3941	backtrack state:
	3942
	3943	<CURLYM cnt=0> a1
	3944	a1 <CURLYM cnt=1> a2
	3945	a1 a2 <CURLYM cnt=2> a3
	3946	a1 a2 a3 <CURLYM cnt=3> b
	3947	)
	3948
	3949	Each WHILEM state block marks a point to backtrack to upon partial failure
	3950	of A or B, and also contains some minor state data related to that
	3951	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	3952	overall state, such as the count, and pointers to the A and B ops.
	3953
	3954	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	3955	must always point to the current CURLYX block, the rules are:
	3956
	3957	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	3958	and set cur_curlyx to point the new block.
	3959
	3960	When popping the CURLYX block after a successful or unsuccessful match,
	3961	restore the previous cur_curlyx.
	3962
	3963	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	3964	to the outer one saved in the CURLYX block.
	3965
	3966	When popping the WHILEM block after a successful or unsuccessful B match,
	3967	restore the previous cur_curlyx.
	3968
	3969	Here's an example for the pattern (AI* BI)*BO
	3970	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	3971
	3972	cur_
	3973	curlyx backtrack stack
	3974	------ ---------------
	3975	NULL
	3976	CO <CO prev=NULL> <WO>
	3977	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	3978	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	3979	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	3980
	3981	At this point the pattern succeeds, and we work back down the stack to
	3982	clean up, restoring as we go:
	3983
	3984	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	3985	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	3986	CO <CO prev=NULL> <WO>
	3987	NULL
	3988
	3989	*******************************************************************/
	3990
	3991	#define ST st->u.curlyx
	3992
	3993	case CURLYX: /* start of /AB/ (for complex A) /
	3994	{
	3995	/* No need to save/restore up to this paren */
	3996	I32 parenfloor = scan->flags;
	3997
	3998	assert(next); /* keep Coverity happy */
	3999	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	4000	next += ARG(next);
	4001
	4002	/* XXXX Probably it is better to teach regpush to support
	4003	parenfloor > PL_regsize... */
	4004	if (parenfloor > (I32)*PL_reglastparen)
	4005	parenfloor = PL_reglastparen; / Pessimization... */
	4006
	4007	ST.prev_curlyx= cur_curlyx;
	4008	cur_curlyx = st;
	4009	ST.cp = PL_savestack_ix;
	4010
	4011	/* these fields contain the state of the current curly.
	4012	* they are accessed by subsequent WHILEMs */
	4013	ST.parenfloor = parenfloor;
	4014	ST.min = ARG1(scan);
	4015	ST.max = ARG2(scan);
	4016	ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
	4017	ST.B = next;
	4018	ST.minmod = minmod;
	4019	minmod = 0;
	4020	ST.count = -1; /* this will be updated by WHILEM */
	4021	ST.lastloc = NULL; /* this will be updated by WHILEM */
	4022
	4023	PL_reginput = locinput;
	4024	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
	4025	/* NOTREACHED */
	4026	}
	4027
	4028	case CURLYX_end: /* just finished matching all of AB /
	4029	cur_curlyx = ST.prev_curlyx;
	4030	sayYES;
	4031	/* NOTREACHED */
	4032
	4033	case CURLYX_end_fail: /* just failed to match all of AB /
	4034	regcpblow(ST.cp);
	4035	cur_curlyx = ST.prev_curlyx;
	4036	sayNO;
	4037	/* NOTREACHED */
	4038
	4039
	4040	#undef ST
	4041	#define ST st->u.whilem
	4042
	4043	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	4044	{
	4045	/* see the discussion above about CURLYX/WHILEM */
	4046	I32 n;
	4047	assert(cur_curlyx); /* keep Coverity happy */
	4048	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	4049	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	4050	ST.cache_offset = 0;
	4051	ST.cache_mask = 0;
	4052
	4053	PL_reginput = locinput;
	4054
	4055	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4056	"%*s whilem: matched %ld out of %ld..%ld\n",
	4057	REPORT_CODE_OFF+depth*2, "", (long)n,
	4058	(long)cur_curlyx->u.curlyx.min,
	4059	(long)cur_curlyx->u.curlyx.max)
	4060	);
	4061
	4062	/* First just match a string of min A's. */
	4063
	4064	if (n < cur_curlyx->u.curlyx.min) {
	4065	cur_curlyx->u.curlyx.lastloc = locinput;
	4066	PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
	4067	/* NOTREACHED */
	4068	}
	4069
	4070	/* If degenerate A matches "", assume A done. */
	4071
	4072	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	4073	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4074	"%*s whilem: empty match detected, trying continuation...\n",
	4075	REPORT_CODE_OFF+depth*2, "")
	4076	);
	4077	goto do_whilem_B_max;
	4078	}
	4079
	4080	/* super-linear cache processing */
	4081
	4082	if (scan->flags) {
	4083
	4084	if (!PL_reg_maxiter) {
	4085	/* start the countdown: Postpone detection until we
	4086	* know the match is not that much linear. */
	4087	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	4088	/* possible overflow for long strings and many CURLYX's */
	4089	if (PL_reg_maxiter < 0)
	4090	PL_reg_maxiter = I32_MAX;
	4091	PL_reg_leftiter = PL_reg_maxiter;
	4092	}
	4093
	4094	if (PL_reg_leftiter-- == 0) {
	4095	/* initialise cache */
	4096	const I32 size = (PL_reg_maxiter + 7)/8;
	4097	if (PL_reg_poscache) {
	4098	if ((I32)PL_reg_poscache_size < size) {
	4099	Renew(PL_reg_poscache, size, char);
	4100	PL_reg_poscache_size = size;
	4101	}
	4102	Zero(PL_reg_poscache, size, char);
	4103	}
	4104	else {
	4105	PL_reg_poscache_size = size;
	4106	Newxz(PL_reg_poscache, size, char);
	4107	}
	4108	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4109	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	4110	PL_colors[4], PL_colors[5])
	4111	);
	4112	}
	4113
	4114	if (PL_reg_leftiter < 0) {
	4115	/* have we already failed at this position? */
	4116	I32 offset, mask;
	4117	offset = (scan->flags & 0xf) - 1
	4118	+ (locinput - PL_bostr) * (scan->flags>>4);
	4119	mask = 1 << (offset % 8);
	4120	offset /= 8;
	4121	if (PL_reg_poscache[offset] & mask) {
	4122	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4123	"%*s whilem: (cache) already tried at this position...\n",
	4124	REPORT_CODE_OFF+depth*2, "")
	4125	);
	4126	sayNO; /* cache records failure */
	4127	}
	4128	ST.cache_offset = offset;
	4129	ST.cache_mask = mask;
	4130	}
	4131	}
	4132
	4133	/* Prefer B over A for minimal matching. */
	4134
	4135	if (cur_curlyx->u.curlyx.minmod) {
	4136	ST.save_curlyx = cur_curlyx;
	4137	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4138	ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
	4139	REGCP_SET(ST.lastcp);
	4140	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
	4141	/* NOTREACHED */
	4142	}
	4143
	4144	/* Prefer A over B for maximal matching. */
	4145
	4146	if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
	4147	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4148	cur_curlyx->u.curlyx.lastloc = locinput;
	4149	REGCP_SET(ST.lastcp);
	4150	PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
	4151	/* NOTREACHED */
	4152	}
	4153	goto do_whilem_B_max;
	4154	}
	4155	/* NOTREACHED */
	4156
	4157	case WHILEM_B_min: /* just matched B in a minimal match */
	4158	case WHILEM_B_max: /* just matched B in a maximal match */
	4159	cur_curlyx = ST.save_curlyx;
	4160	sayYES;
	4161	/* NOTREACHED */
	4162
	4163	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	4164	cur_curlyx = ST.save_curlyx;
	4165	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4166	cur_curlyx->u.curlyx.count--;
	4167	CACHEsayNO;
	4168	/* NOTREACHED */
	4169
	4170	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	4171	REGCP_UNWIND(ST.lastcp);
	4172	regcppop(rex);
	4173	/* FALL THROUGH */
	4174	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	4175	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4176	cur_curlyx->u.curlyx.count--;
	4177	CACHEsayNO;
	4178	/* NOTREACHED */
	4179
	4180	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	4181	REGCP_UNWIND(ST.lastcp);
	4182	regcppop(rex); /* Restore some previous $<digit>s? */
	4183	PL_reginput = locinput;
	4184	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4185	"%*s whilem: failed, trying continuation...\n",
	4186	REPORT_CODE_OFF+depth*2, "")
	4187	);
	4188	do_whilem_B_max:
	4189	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4190	&& ckWARN(WARN_REGEXP)
	4191	&& !(PL_reg_flags & RF_warned))
	4192	{
	4193	PL_reg_flags \|= RF_warned;
	4194	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
	4195	"Complex regular subexpression recursion",
	4196	REG_INFTY - 1);
	4197	}
	4198
	4199	/* now try B */
	4200	ST.save_curlyx = cur_curlyx;
	4201	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4202	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
	4203	/* NOTREACHED */
	4204
	4205	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	4206	cur_curlyx = ST.save_curlyx;
	4207	REGCP_UNWIND(ST.lastcp);
	4208	regcppop(rex);
	4209
	4210	if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
	4211	/* Maximum greed exceeded */
	4212	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4213	&& ckWARN(WARN_REGEXP)
	4214	&& !(PL_reg_flags & RF_warned))
	4215	{
	4216	PL_reg_flags \|= RF_warned;
	4217	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	4218	"%s limit (%d) exceeded",
	4219	"Complex regular subexpression recursion",
	4220	REG_INFTY - 1);
	4221	}
	4222	cur_curlyx->u.curlyx.count--;
	4223	CACHEsayNO;
	4224	}
	4225
	4226	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4227	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	4228	);
	4229	/* Try grabbing another A and see if it helps. */
	4230	PL_reginput = locinput;
	4231	cur_curlyx->u.curlyx.lastloc = locinput;
	4232	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4233	REGCP_SET(ST.lastcp);
	4234	PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
	4235	/* NOTREACHED */
	4236
	4237	#undef ST
	4238	#define ST st->u.branch
	4239
	4240	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	4241	next = scan + ARG(scan);
	4242	if (next == scan)
	4243	next = NULL;
	4244	scan = NEXTOPER(scan);
	4245	/* FALL THROUGH */
	4246
	4247	case BRANCH: /* /(...\|A\|...)/ */
	4248	scan = NEXTOPER(scan); /* scan now points to inner node */
	4249	ST.lastparen = *PL_reglastparen;
	4250	ST.next_branch = next;
	4251	REGCP_SET(ST.cp);
	4252	PL_reginput = locinput;
	4253
	4254	/* Now go into the branch */
	4255	if (has_cutgroup) {
	4256	PUSH_YES_STATE_GOTO(BRANCH_next, scan);
	4257	} else {
	4258	PUSH_STATE_GOTO(BRANCH_next, scan);
	4259	}
	4260	/* NOTREACHED */
	4261	case CUTGROUP:
	4262	PL_reginput = locinput;
	4263	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	4264	(SV*)rexi->data->data[ ARG( scan ) ];
	4265	PUSH_STATE_GOTO(CUTGROUP_next,next);
	4266	/* NOTREACHED */
	4267	case CUTGROUP_next_fail:
	4268	do_cutgroup = 1;
	4269	no_final = 1;
	4270	if (st->u.mark.mark_name)
	4271	sv_commit = st->u.mark.mark_name;
	4272	sayNO;
	4273	/* NOTREACHED */
	4274	case BRANCH_next:
	4275	sayYES;
	4276	/* NOTREACHED */
	4277	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	4278	if (do_cutgroup) {
	4279	do_cutgroup = 0;
	4280	no_final = 0;
	4281	}
	4282	REGCP_UNWIND(ST.cp);
	4283	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	4284	PL_regoffs[n].end = -1;
	4285	*PL_reglastparen = n;
	4286	/dmq: PL_reglastcloseparen = n; */
	4287	scan = ST.next_branch;
	4288	/* no more branches? */
	4289	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	4290	DEBUG_EXECUTE_r({
	4291	PerlIO_printf( Perl_debug_log,
	4292	"%*s %sBRANCH failed...%s\n",
	4293	REPORT_CODE_OFF+depth*2, "",
	4294	PL_colors[4],
	4295	PL_colors[5] );
	4296	});
	4297	sayNO_SILENT;
	4298	}
	4299	continue; /* execute next BRANCH[J] op */
	4300	/* NOTREACHED */
	4301
	4302	case MINMOD:
	4303	minmod = 1;
	4304	break;
	4305
	4306	#undef ST
	4307	#define ST st->u.curlym
	4308
	4309	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	4310
	4311	/* This is an optimisation of CURLYX that enables us to push
	4312	* only a single backtracking state, no matter now many matches
	4313	* there are in {m,n}. It relies on the pattern being constant
	4314	* length, with no parens to influence future backrefs
	4315	*/
	4316
	4317	ST.me = scan;
	4318	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	4319
	4320	/* if paren positive, emulate an OPEN/CLOSE around A */
	4321	if (ST.me->flags) {
	4322	U32 paren = ST.me->flags;
	4323	if (paren > PL_regsize)
	4324	PL_regsize = paren;
	4325	if (paren > *PL_reglastparen)
	4326	*PL_reglastparen = paren;
	4327	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	4328	}
	4329	ST.A = scan;
	4330	ST.B = next;
	4331	ST.alen = 0;
	4332	ST.count = 0;
	4333	ST.minmod = minmod;
	4334	minmod = 0;
	4335	ST.c1 = CHRTEST_UNINIT;
	4336	REGCP_SET(ST.cp);
	4337
	4338	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	4339	goto curlym_do_B;
	4340
	4341	curlym_do_A: /* execute the A in /A{m,n}B/ */
	4342	PL_reginput = locinput;
	4343	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
	4344	/* NOTREACHED */
	4345
	4346	case CURLYM_A: /* we've just matched an A */
	4347	locinput = st->locinput;
	4348	nextchr = UCHARAT(locinput);
	4349
	4350	ST.count++;
	4351	/* after first match, determine A's length: u.curlym.alen */
	4352	if (ST.count == 1) {
	4353	if (PL_reg_match_utf8) {
	4354	char *s = locinput;
	4355	while (s < PL_reginput) {
	4356	ST.alen++;
	4357	s += UTF8SKIP(s);
	4358	}
	4359	}
	4360	else {
	4361	ST.alen = PL_reginput - locinput;
	4362	}
	4363	if (ST.alen == 0)
	4364	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	4365	}
	4366	DEBUG_EXECUTE_r(
	4367	PerlIO_printf(Perl_debug_log,
	4368	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	4369	(int)(REPORT_CODE_OFF+(depth*2)), "",
	4370	(IV) ST.count, (IV)ST.alen)
	4371	);
	4372
	4373	locinput = PL_reginput;
	4374
	4375	if (cur_eval && cur_eval->u.eval.close_paren &&
	4376	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	4377	goto fake_end;
	4378
	4379	if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
	4380	goto curlym_do_A; /* try to match another A */
	4381	goto curlym_do_B; /* try to match B */
	4382
	4383	case CURLYM_A_fail: /* just failed to match an A */
	4384	REGCP_UNWIND(ST.cp);
	4385
	4386	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	4387	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	4388	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	4389	sayNO;
	4390
	4391	curlym_do_B: /* execute the B in /A{m,n}B/ */
	4392	PL_reginput = locinput;
	4393	if (ST.c1 == CHRTEST_UNINIT) {
	4394	/* calculate c1 and c2 for possible match of 1st char
	4395	* following curly */
	4396	ST.c1 = ST.c2 = CHRTEST_VOID;
	4397	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	4398	regnode *text_node = ST.B;
	4399	if (! HAS_TEXT(text_node))
	4400	FIND_NEXT_IMPT(text_node);
	4401	/* this used to be
	4402
	4403	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	4404
	4405	But the former is redundant in light of the latter.
	4406
	4407	if this changes back then the macro for
	4408	IS_TEXT and friends need to change.
	4409	*/
	4410	if (PL_regkind[OP(text_node)] == EXACT)
	4411	{
	4412
	4413	ST.c1 = (U8)*STRING(text_node);
	4414	ST.c2 =
	4415	(IS_TEXTF(text_node))
	4416	? PL_fold[ST.c1]
	4417	: (IS_TEXTFL(text_node))
	4418	? PL_fold_locale[ST.c1]
	4419	: ST.c1;
	4420	}
	4421	}
	4422	}
	4423
	4424	DEBUG_EXECUTE_r(
	4425	PerlIO_printf(Perl_debug_log,
	4426	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	4427	(int)(REPORT_CODE_OFF+(depth*2)),
	4428	"", (IV)ST.count)
	4429	);
	4430	if (ST.c1 != CHRTEST_VOID
	4431	&& UCHARAT(PL_reginput) != ST.c1
	4432	&& UCHARAT(PL_reginput) != ST.c2)
	4433	{
	4434	/* simulate B failing */
	4435	DEBUG_OPTIMISE_r(
	4436	PerlIO_printf(Perl_debug_log,
	4437	"%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
	4438	(int)(REPORT_CODE_OFF+(depth*2)),"",
	4439	(IV)ST.c1,(IV)ST.c2
	4440	));
	4441	state_num = CURLYM_B_fail;
	4442	goto reenter_switch;
	4443	}
	4444
	4445	if (ST.me->flags) {
	4446	/* mark current A as captured */
	4447	I32 paren = ST.me->flags;
	4448	if (ST.count) {
	4449	PL_regoffs[paren].start
	4450	= HOPc(PL_reginput, -ST.alen) - PL_bostr;
	4451	PL_regoffs[paren].end = PL_reginput - PL_bostr;
	4452	/dmq: PL_reglastcloseparen = paren; */
	4453	}
	4454	else
	4455	PL_regoffs[paren].end = -1;
	4456	if (cur_eval && cur_eval->u.eval.close_paren &&
	4457	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	4458	{
	4459	if (ST.count)
	4460	goto fake_end;
	4461	else
	4462	sayNO;
	4463	}
	4464	}
	4465
	4466	PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
	4467	/* NOTREACHED */
	4468
	4469	case CURLYM_B_fail: /* just failed to match a B */
	4470	REGCP_UNWIND(ST.cp);
	4471	if (ST.minmod) {
	4472	if (ST.count == ARG2(ST.me) /* max */)
	4473	sayNO;
	4474	goto curlym_do_A; /* try to match a further A */
	4475	}
	4476	/* backtrack one A */
	4477	if (ST.count == ARG1(ST.me) /* min */)
	4478	sayNO;
	4479	ST.count--;
	4480	locinput = HOPc(locinput, -ST.alen);
	4481	goto curlym_do_B; /* try to match B */
	4482
	4483	#undef ST
	4484	#define ST st->u.curly
	4485
	4486	#define CURLY_SETPAREN(paren, success) \
	4487	if (paren) { \
	4488	if (success) { \
	4489	PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	4490	PL_regoffs[paren].end = locinput - PL_bostr; \
	4491	*PL_reglastcloseparen = paren; \
	4492	} \
	4493	else \
	4494	PL_regoffs[paren].end = -1; \
	4495	}
	4496
	4497	case STAR: /* /AB/ where A is width 1 /
	4498	ST.paren = 0;
	4499	ST.min = 0;
	4500	ST.max = REG_INFTY;
	4501	scan = NEXTOPER(scan);
	4502	goto repeat;
	4503	case PLUS: /* /A+B/ where A is width 1 */
	4504	ST.paren = 0;
	4505	ST.min = 1;
	4506	ST.max = REG_INFTY;
	4507	scan = NEXTOPER(scan);
	4508	goto repeat;
	4509	case CURLYN: /* /(A){m,n}B/ where A is width 1 */
	4510	ST.paren = scan->flags; /* Which paren to set */
	4511	if (ST.paren > PL_regsize)
	4512	PL_regsize = ST.paren;
	4513	if (ST.paren > *PL_reglastparen)
	4514	*PL_reglastparen = ST.paren;
	4515	ST.min = ARG1(scan); /* min to match */
	4516	ST.max = ARG2(scan); /* max to match */
	4517	if (cur_eval && cur_eval->u.eval.close_paren &&
	4518	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	4519	ST.min=1;
	4520	ST.max=1;
	4521	}
	4522	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	4523	goto repeat;
	4524	case CURLY: /* /A{m,n}B/ where A is width 1 */
	4525	ST.paren = 0;
	4526	ST.min = ARG1(scan); /* min to match */
	4527	ST.max = ARG2(scan); /* max to match */
	4528	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	4529	repeat:
	4530	/*
	4531	* Lookahead to avoid useless match attempts
	4532	* when we know what character comes next.
	4533	*
	4534	* Used to only do .x and .?x, but now it allows
	4535	* for )'s, ('s and (?{ ... })'s to be in the way
	4536	* of the quantifier and the EXACT-like node. -- japhy
	4537	*/
	4538
	4539	if (ST.min > ST.max) /* XXX make this a compile-time check? */
	4540	sayNO;
	4541	if (HAS_TEXT(next) \|\| JUMPABLE(next)) {
	4542	U8 *s;
	4543	regnode *text_node = next;
	4544
	4545	if (! HAS_TEXT(text_node))
	4546	FIND_NEXT_IMPT(text_node);
	4547
	4548	if (! HAS_TEXT(text_node))
	4549	ST.c1 = ST.c2 = CHRTEST_VOID;
	4550	else {
	4551	if ( PL_regkind[OP(text_node)] != EXACT ) {
	4552	ST.c1 = ST.c2 = CHRTEST_VOID;
	4553	goto assume_ok_easy;
	4554	}
	4555	else
	4556	s = (U8*)STRING(text_node);
	4557
	4558	/* Currently we only get here when
	4559
	4560	PL_rekind[OP(text_node)] == EXACT
	4561
	4562	if this changes back then the macro for IS_TEXT and
	4563	friends need to change. */
	4564	if (!UTF) {
	4565	ST.c2 = ST.c1 = *s;
	4566	if (IS_TEXTF(text_node))
	4567	ST.c2 = PL_fold[ST.c1];
	4568	else if (IS_TEXTFL(text_node))
	4569	ST.c2 = PL_fold_locale[ST.c1];
	4570	}
	4571	else { /* UTF */
	4572	if (IS_TEXTF(text_node)) {
	4573	STRLEN ulen1, ulen2;
	4574	U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
	4575	U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
	4576
	4577	to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
	4578	to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
	4579	#ifdef EBCDIC
	4580	ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
	4581	ckWARN(WARN_UTF8) ?
	4582	0 : UTF8_ALLOW_ANY);
	4583	ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
	4584	ckWARN(WARN_UTF8) ?
	4585	0 : UTF8_ALLOW_ANY);
	4586	#else
	4587	ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
	4588	uniflags);
	4589	ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
	4590	uniflags);
	4591	#endif
	4592	}
	4593	else {
	4594	ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
	4595	uniflags);
	4596	}
	4597	}
	4598	}
	4599	}
	4600	else
	4601	ST.c1 = ST.c2 = CHRTEST_VOID;
	4602	assume_ok_easy:
	4603
	4604	ST.A = scan;
	4605	ST.B = next;
	4606	PL_reginput = locinput;
	4607	if (minmod) {
	4608	minmod = 0;
	4609	if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
	4610	sayNO;
	4611	ST.count = ST.min;
	4612	locinput = PL_reginput;
	4613	REGCP_SET(ST.cp);
	4614	if (ST.c1 == CHRTEST_VOID)
	4615	goto curly_try_B_min;
	4616
	4617	ST.oldloc = locinput;
	4618
	4619	/* set ST.maxpos to the furthest point along the
	4620	* string that could possibly match */
	4621	if (ST.max == REG_INFTY) {
	4622	ST.maxpos = PL_regeol - 1;
	4623	if (do_utf8)
	4624	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	4625	ST.maxpos--;
	4626	}
	4627	else if (do_utf8) {
	4628	int m = ST.max - ST.min;
	4629	for (ST.maxpos = locinput;
	4630	m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
	4631	ST.maxpos += UTF8SKIP(ST.maxpos);
	4632	}
	4633	else {
	4634	ST.maxpos = locinput + ST.max - ST.min;
	4635	if (ST.maxpos >= PL_regeol)
	4636	ST.maxpos = PL_regeol - 1;
	4637	}
	4638	goto curly_try_B_min_known;
	4639
	4640	}
	4641	else {
	4642	ST.count = regrepeat(rex, ST.A, ST.max, depth);
	4643	locinput = PL_reginput;
	4644	if (ST.count < ST.min)
	4645	sayNO;
	4646	if ((ST.count > ST.min)
	4647	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	4648	{
	4649	/* A{m,n} must come at the end of the string, there's
	4650	* no point in backing off ... */
	4651	ST.min = ST.count;
	4652	/* ...except that $ and \Z can match before and after
	4653	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	4654	We may back off by one in this case. */
	4655	if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
	4656	ST.min--;
	4657	}
	4658	REGCP_SET(ST.cp);
	4659	goto curly_try_B_max;
	4660	}
	4661	/* NOTREACHED */
	4662
	4663
	4664	case CURLY_B_min_known_fail:
	4665	/* failed to find B in a non-greedy match where c1,c2 valid */
	4666	if (ST.paren && ST.count)
	4667	PL_regoffs[ST.paren].end = -1;
	4668
	4669	PL_reginput = locinput; /* Could be reset... */
	4670	REGCP_UNWIND(ST.cp);
	4671	/* Couldn't or didn't -- move forward. */
	4672	ST.oldloc = locinput;
	4673	if (do_utf8)
	4674	locinput += UTF8SKIP(locinput);
	4675	else
	4676	locinput++;
	4677	ST.count++;
	4678	curly_try_B_min_known:
	4679	/* find the next place where 'B' could work, then call B */
	4680	{
	4681	int n;
	4682	if (do_utf8) {
	4683	n = (ST.oldloc == locinput) ? 0 : 1;
	4684	if (ST.c1 == ST.c2) {
	4685	STRLEN len;
	4686	/* set n to utf8_distance(oldloc, locinput) */
	4687	while (locinput <= ST.maxpos &&
	4688	utf8n_to_uvchr((U8*)locinput,
	4689	UTF8_MAXBYTES, &len,
	4690	uniflags) != (UV)ST.c1) {
	4691	locinput += len;
	4692	n++;
	4693	}
	4694	}
	4695	else {
	4696	/* set n to utf8_distance(oldloc, locinput) */
	4697	while (locinput <= ST.maxpos) {
	4698	STRLEN len;
	4699	const UV c = utf8n_to_uvchr((U8*)locinput,
	4700	UTF8_MAXBYTES, &len,
	4701	uniflags);
	4702	if (c == (UV)ST.c1 \|\| c == (UV)ST.c2)
	4703	break;
	4704	locinput += len;
	4705	n++;
	4706	}
	4707	}
	4708	}
	4709	else {
	4710	if (ST.c1 == ST.c2) {
	4711	while (locinput <= ST.maxpos &&
	4712	UCHARAT(locinput) != ST.c1)
	4713	locinput++;
	4714	}
	4715	else {
	4716	while (locinput <= ST.maxpos
	4717	&& UCHARAT(locinput) != ST.c1
	4718	&& UCHARAT(locinput) != ST.c2)
	4719	locinput++;
	4720	}
	4721	n = locinput - ST.oldloc;
	4722	}
	4723	if (locinput > ST.maxpos)
	4724	sayNO;
	4725	/* PL_reginput == oldloc now */
	4726	if (n) {
	4727	ST.count += n;
	4728	if (regrepeat(rex, ST.A, n, depth) < n)
	4729	sayNO;
	4730	}
	4731	PL_reginput = locinput;
	4732	CURLY_SETPAREN(ST.paren, ST.count);
	4733	if (cur_eval && cur_eval->u.eval.close_paren &&
	4734	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	4735	goto fake_end;
	4736	}
	4737	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
	4738	}
	4739	/* NOTREACHED */
	4740
	4741
	4742	case CURLY_B_min_fail:
	4743	/* failed to find B in a non-greedy match where c1,c2 invalid */
	4744	if (ST.paren && ST.count)
	4745	PL_regoffs[ST.paren].end = -1;
	4746
	4747	REGCP_UNWIND(ST.cp);
	4748	/* failed -- move forward one */
	4749	PL_reginput = locinput;
	4750	if (regrepeat(rex, ST.A, 1, depth)) {
	4751	ST.count++;
	4752	locinput = PL_reginput;
	4753	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	4754	ST.count > 0)) /* count overflow ? */
	4755	{
	4756	curly_try_B_min:
	4757	CURLY_SETPAREN(ST.paren, ST.count);
	4758	if (cur_eval && cur_eval->u.eval.close_paren &&
	4759	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	4760	goto fake_end;
	4761	}
	4762	PUSH_STATE_GOTO(CURLY_B_min, ST.B);
	4763	}
	4764	}
	4765	sayNO;
	4766	/* NOTREACHED */
	4767
	4768
	4769	curly_try_B_max:
	4770	/* a successful greedy match: now try to match B */
	4771	if (cur_eval && cur_eval->u.eval.close_paren &&
	4772	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	4773	goto fake_end;
	4774	}
	4775	{
	4776	UV c = 0;
	4777	if (ST.c1 != CHRTEST_VOID)
	4778	c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
	4779	UTF8_MAXBYTES, 0, uniflags)
	4780	: (UV) UCHARAT(PL_reginput);
	4781	/* If it could work, try it. */
	4782	if (ST.c1 == CHRTEST_VOID \|\| c == (UV)ST.c1 \|\| c == (UV)ST.c2) {
	4783	CURLY_SETPAREN(ST.paren, ST.count);
	4784	PUSH_STATE_GOTO(CURLY_B_max, ST.B);
	4785	/* NOTREACHED */
	4786	}
	4787	}
	4788	/* FALL THROUGH */
	4789	case CURLY_B_max_fail:
	4790	/* failed to find B in a greedy match */
	4791	if (ST.paren && ST.count)
	4792	PL_regoffs[ST.paren].end = -1;
	4793
	4794	REGCP_UNWIND(ST.cp);
	4795	/* back up. */
	4796	if (--ST.count < ST.min)
	4797	sayNO;
	4798	PL_reginput = locinput = HOPc(locinput, -1);
	4799	goto curly_try_B_max;
	4800
	4801	#undef ST
	4802
	4803	case END:
	4804	fake_end:
	4805	if (cur_eval) {
	4806	/* we've just finished A in /(??{A})B/; now continue with B */
	4807	I32 tmpix;
	4808	st->u.eval.toggle_reg_flags
	4809	= cur_eval->u.eval.toggle_reg_flags;
	4810	PL_reg_flags ^= st->u.eval.toggle_reg_flags;
	4811
	4812	st->u.eval.prev_rex = rex; /* inner */
	4813	SETREX(rex,cur_eval->u.eval.prev_rex);
	4814	rexi = RXi_GET(rex);
	4815	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	4816	ReREFCNT_inc(rex);
	4817	st->u.eval.cp = regcppush(0); /* Save all the positions. */
	4818	REGCP_SET(st->u.eval.lastcp);
	4819	PL_reginput = locinput;
	4820
	4821	/* Restore parens of the outer rex without popping the
	4822	* savestack */
	4823	tmpix = PL_savestack_ix;
	4824	PL_savestack_ix = cur_eval->u.eval.lastcp;
	4825	regcppop(rex);
	4826	PL_savestack_ix = tmpix;
	4827
	4828	st->u.eval.prev_eval = cur_eval;
	4829	cur_eval = cur_eval->u.eval.prev_eval;
	4830	DEBUG_EXECUTE_r(
	4831	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	4832	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	4833	if ( nochange_depth )
	4834	nochange_depth--;
	4835
	4836	PUSH_YES_STATE_GOTO(EVAL_AB,
	4837	st->u.eval.prev_eval->u.eval.B); /* match B */
	4838	}
	4839
	4840	if (locinput < reginfo->till) {
	4841	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4842	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	4843	PL_colors[4],
	4844	(long)(locinput - PL_reg_starttry),
	4845	(long)(reginfo->till - PL_reg_starttry),
	4846	PL_colors[5]));
	4847
	4848	sayNO_SILENT; /* Cannot match: too short. */
	4849	}
	4850	PL_reginput = locinput; /* put where regtry can find it */
	4851	sayYES; /* Success! */
	4852
	4853	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	4854	DEBUG_EXECUTE_r(
	4855	PerlIO_printf(Perl_debug_log,
	4856	"%*s %ssubpattern success...%s\n",
	4857	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	4858	PL_reginput = locinput; /* put where regtry can find it */
	4859	sayYES; /* Success! */
	4860
	4861	#undef ST
	4862	#define ST st->u.ifmatch
	4863
	4864	case SUSPEND: /* (?>A) */
	4865	ST.wanted = 1;
	4866	PL_reginput = locinput;
	4867	goto do_ifmatch;
	4868
	4869	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	4870	ST.wanted = 0;
	4871	goto ifmatch_trivial_fail_test;
	4872
	4873	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	4874	ST.wanted = 1;
	4875	ifmatch_trivial_fail_test:
	4876	if (scan->flags) {
	4877	char * const s = HOPBACKc(locinput, scan->flags);
	4878	if (!s) {
	4879	/* trivial fail */
	4880	if (logical) {
	4881	logical = 0;
	4882	sw = 1 - (bool)ST.wanted;
	4883	}
	4884	else if (ST.wanted)
	4885	sayNO;
	4886	next = scan + ARG(scan);
	4887	if (next == scan)
	4888	next = NULL;
	4889	break;
	4890	}
	4891	PL_reginput = s;
	4892	}
	4893	else
	4894	PL_reginput = locinput;
	4895
	4896	do_ifmatch:
	4897	ST.me = scan;
	4898	ST.logical = logical;
	4899	/* execute body of (?...A) */
	4900	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
	4901	/* NOTREACHED */
	4902
	4903	case IFMATCH_A_fail: /* body of (?...A) failed */
	4904	ST.wanted = !ST.wanted;
	4905	/* FALL THROUGH */
	4906
	4907	case IFMATCH_A: /* body of (?...A) succeeded */
	4908	if (ST.logical) {
	4909	sw = (bool)ST.wanted;
	4910	}
	4911	else if (!ST.wanted)
	4912	sayNO;
	4913
	4914	if (OP(ST.me) == SUSPEND)
	4915	locinput = PL_reginput;
	4916	else {
	4917	locinput = PL_reginput = st->locinput;
	4918	nextchr = UCHARAT(locinput);
	4919	}
	4920	scan = ST.me + ARG(ST.me);
	4921	if (scan == ST.me)
	4922	scan = NULL;
	4923	continue; /* execute B */
	4924
	4925	#undef ST
	4926
	4927	case LONGJMP:
	4928	next = scan + ARG(scan);
	4929	if (next == scan)
	4930	next = NULL;
	4931	break;
	4932	case COMMIT:
	4933	reginfo->cutpoint = PL_regeol;
	4934	/* FALLTHROUGH */
	4935	case PRUNE:
	4936	PL_reginput = locinput;
	4937	if (!scan->flags)
	4938	sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
	4939	PUSH_STATE_GOTO(COMMIT_next,next);
	4940	/* NOTREACHED */
	4941	case COMMIT_next_fail:
	4942	no_final = 1;
	4943	/* FALLTHROUGH */
	4944	case OPFAIL:
	4945	sayNO;
	4946	/* NOTREACHED */
	4947
	4948	#define ST st->u.mark
	4949	case MARKPOINT:
	4950	ST.prev_mark = mark_state;
	4951	ST.mark_name = sv_commit = sv_yes_mark
	4952	= (SV*)rexi->data->data[ ARG( scan ) ];
	4953	mark_state = st;
	4954	ST.mark_loc = PL_reginput = locinput;
	4955	PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
	4956	/* NOTREACHED */
	4957	case MARKPOINT_next:
	4958	mark_state = ST.prev_mark;
	4959	sayYES;
	4960	/* NOTREACHED */
	4961	case MARKPOINT_next_fail:
	4962	if (popmark && sv_eq(ST.mark_name,popmark))
	4963	{
	4964	if (ST.mark_loc > startpoint)
	4965	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	4966	popmark = NULL; /* we found our mark */
	4967	sv_commit = ST.mark_name;
	4968
	4969	DEBUG_EXECUTE_r({
	4970	PerlIO_printf(Perl_debug_log,
	4971	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	4972	REPORT_CODE_OFF+depth*2, "",
	4973	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	4974	});
	4975	}
	4976	mark_state = ST.prev_mark;
	4977	sv_yes_mark = mark_state ?
	4978	mark_state->u.mark.mark_name : NULL;
	4979	sayNO;
	4980	/* NOTREACHED */
	4981	case SKIP:
	4982	PL_reginput = locinput;
	4983	if (scan->flags) {
	4984	/* (SKIP) : if we fail we cut here/
	4985	ST.mark_name = NULL;
	4986	ST.mark_loc = locinput;
	4987	PUSH_STATE_GOTO(SKIP_next,next);
	4988	} else {
	4989	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	4990	otherwise do nothing. Meaning we need to scan
	4991	*/
	4992	regmatch_state *cur = mark_state;
	4993	SV find = (SV)rexi->data->data[ ARG( scan ) ];
	4994
	4995	while (cur) {
	4996	if ( sv_eq( cur->u.mark.mark_name,
	4997	find ) )
	4998	{
	4999	ST.mark_name = find;
	5000	PUSH_STATE_GOTO( SKIP_next, next );
	5001	}
	5002	cur = cur->u.mark.prev_mark;
	5003	}
	5004	}
	5005	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	5006	break;
	5007	case SKIP_next_fail:
	5008	if (ST.mark_name) {
	5009	/* (*CUT:NAME) - Set up to search for the name as we
	5010	collapse the stack*/
	5011	popmark = ST.mark_name;
	5012	} else {
	5013	/* (CUT) - No name, we cut here./
	5014	if (ST.mark_loc > startpoint)
	5015	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5016	/* but we set sv_commit to latest mark_name if there
	5017	is one so they can test to see how things lead to this
	5018	cut */
	5019	if (mark_state)
	5020	sv_commit=mark_state->u.mark.mark_name;
	5021	}
	5022	no_final = 1;
	5023	sayNO;
	5024	/* NOTREACHED */
	5025	#undef ST
	5026	case FOLDCHAR:
	5027	n = ARG(scan);
	5028	if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
	5029	locinput += ln;
	5030	} else if ( 0xDF == n && !do_utf8 && !UTF ) {
	5031	sayNO;
	5032	} else {
	5033	U8 folded[UTF8_MAXBYTES_CASE+1];
	5034	STRLEN foldlen;
	5035	const char * const l = locinput;
	5036	char *e = PL_regeol;
	5037	to_uni_fold(n, folded, &foldlen);
	5038
	5039	if (ibcmp_utf8((const char*) folded, 0, foldlen, 1,
	5040	l, &e, 0, do_utf8)) {
	5041	sayNO;
	5042	}
	5043	locinput = e;
	5044	}
	5045	nextchr = UCHARAT(locinput);
	5046	break;
	5047	case LNBREAK:
	5048	if ((n=is_LNBREAK(locinput,do_utf8))) {
	5049	locinput += n;
	5050	nextchr = UCHARAT(locinput);
	5051	} else
	5052	sayNO;
	5053	break;
	5054
	5055	#define CASE_CLASS(nAmE) \
	5056	case nAmE: \
	5057	if ((n=is_##nAmE(locinput,do_utf8))) { \
	5058	locinput += n; \
	5059	nextchr = UCHARAT(locinput); \
	5060	} else \
	5061	sayNO; \
	5062	break; \
	5063	case N##nAmE: \
	5064	if ((n=is_##nAmE(locinput,do_utf8))) { \
	5065	sayNO; \
	5066	} else { \
	5067	locinput += UTF8SKIP(locinput); \
	5068	nextchr = UCHARAT(locinput); \
	5069	} \
	5070	break
	5071
	5072	CASE_CLASS(VERTWS);
	5073	CASE_CLASS(HORIZWS);
	5074	#undef CASE_CLASS
	5075
	5076	default:
	5077	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	5078	PTR2UV(scan), OP(scan));
	5079	Perl_croak(aTHX_ "regexp memory corruption");
	5080
	5081	} /* end switch */
	5082
	5083	/* switch break jumps here */
	5084	scan = next; /* prepare to execute the next op and ... */
	5085	continue; /* ... jump back to the top, reusing st */
	5086	/* NOTREACHED */
	5087
	5088	push_yes_state:
	5089	/* push a state that backtracks on success */
	5090	st->u.yes.prev_yes_state = yes_state;
	5091	yes_state = st;
	5092	/* FALL THROUGH */
	5093	push_state:
	5094	/* push a new regex state, then continue at scan */
	5095	{
	5096	regmatch_state *newst;
	5097
	5098	DEBUG_STACK_r({
	5099	regmatch_state *cur = st;
	5100	regmatch_state *curyes = yes_state;
	5101	int curd = depth;
	5102	regmatch_slab *slab = PL_regmatch_slab;
	5103	for (;curd > -1;cur--,curd--) {
	5104	if (cur < SLAB_FIRST(slab)) {
	5105	slab = slab->prev;
	5106	cur = SLAB_LAST(slab);
	5107	}
	5108	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	5109	REPORT_CODE_OFF + 2 + depth * 2,"",
	5110	curd, PL_reg_name[cur->resume_state],
	5111	(curyes == cur) ? "yes" : ""
	5112	);
	5113	if (curyes == cur)
	5114	curyes = cur->u.yes.prev_yes_state;
	5115	}
	5116	} else
	5117	DEBUG_STATE_pp("push")
	5118	);
	5119	depth++;
	5120	st->locinput = locinput;
	5121	newst = st+1;
	5122	if (newst > SLAB_LAST(PL_regmatch_slab))
	5123	newst = S_push_slab(aTHX);
	5124	PL_regmatch_state = newst;
	5125
	5126	locinput = PL_reginput;
	5127	nextchr = UCHARAT(locinput);
	5128	st = newst;
	5129	continue;
	5130	/* NOTREACHED */
	5131	}
	5132	}
	5133
	5134	/*
	5135	* We get here only if there's trouble -- normally "case END" is
	5136	* the terminating point.
	5137	*/
	5138	Perl_croak(aTHX_ "corrupted regexp pointers");
	5139	/NOTREACHED/
	5140	sayNO;
	5141
	5142	yes:
	5143	if (yes_state) {
	5144	/* we have successfully completed a subexpression, but we must now
	5145	* pop to the state marked by yes_state and continue from there */
	5146	assert(st != yes_state);
	5147	#ifdef DEBUGGING
	5148	while (st != yes_state) {
	5149	st--;
	5150	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5151	PL_regmatch_slab = PL_regmatch_slab->prev;
	5152	st = SLAB_LAST(PL_regmatch_slab);
	5153	}
	5154	DEBUG_STATE_r({
	5155	if (no_final) {
	5156	DEBUG_STATE_pp("pop (no final)");
	5157	} else {
	5158	DEBUG_STATE_pp("pop (yes)");
	5159	}
	5160	});
	5161	depth--;
	5162	}
	5163	#else
	5164	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	5165	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	5166	{
	5167	/* not in this slab, pop slab */
	5168	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	5169	PL_regmatch_slab = PL_regmatch_slab->prev;
	5170	st = SLAB_LAST(PL_regmatch_slab);
	5171	}
	5172	depth -= (st - yes_state);
	5173	#endif
	5174	st = yes_state;
	5175	yes_state = st->u.yes.prev_yes_state;
	5176	PL_regmatch_state = st;
	5177
	5178	if (no_final) {
	5179	locinput= st->locinput;
	5180	nextchr = UCHARAT(locinput);
	5181	}
	5182	state_num = st->resume_state + no_final;
	5183	goto reenter_switch;
	5184	}
	5185
	5186	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	5187	PL_colors[4], PL_colors[5]));
	5188
	5189	if (PL_reg_eval_set) {
	5190	/* each successfully executed (?{...}) block does the equivalent of
	5191	* local $^R = do {...}
	5192	* When popping the save stack, all these locals would be undone;
	5193	* bypass this by setting the outermost saved $^R to the latest
	5194	* value */
	5195	if (oreplsv != GvSV(PL_replgv))
	5196	sv_setsv(oreplsv, GvSV(PL_replgv));
	5197	}
	5198	result = 1;
	5199	goto final_exit;
	5200
	5201	no:
	5202	DEBUG_EXECUTE_r(
	5203	PerlIO_printf(Perl_debug_log,
	5204	"%*s %sfailed...%s\n",
	5205	REPORT_CODE_OFF+depth*2, "",
	5206	PL_colors[4], PL_colors[5])
	5207	);
	5208
	5209	no_silent:
	5210	if (no_final) {
	5211	if (yes_state) {
	5212	goto yes;
	5213	} else {
	5214	goto final_exit;
	5215	}
	5216	}
	5217	if (depth) {
	5218	/* there's a previous state to backtrack to */
	5219	st--;
	5220	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5221	PL_regmatch_slab = PL_regmatch_slab->prev;
	5222	st = SLAB_LAST(PL_regmatch_slab);
	5223	}
	5224	PL_regmatch_state = st;
	5225	locinput= st->locinput;
	5226	nextchr = UCHARAT(locinput);
	5227
	5228	DEBUG_STATE_pp("pop");
	5229	depth--;
	5230	if (yes_state == st)
	5231	yes_state = st->u.yes.prev_yes_state;
	5232
	5233	state_num = st->resume_state + 1; /* failure = success + 1 */
	5234	goto reenter_switch;
	5235	}
	5236	result = 0;
	5237
	5238	final_exit:
	5239	if (rex->intflags & PREGf_VERBARG_SEEN) {
	5240	SV *sv_err = get_sv("REGERROR", 1);
	5241	SV *sv_mrk = get_sv("REGMARK", 1);
	5242	if (result) {
	5243	sv_commit = &PL_sv_no;
	5244	if (!sv_yes_mark)
	5245	sv_yes_mark = &PL_sv_yes;
	5246	} else {
	5247	if (!sv_commit)
	5248	sv_commit = &PL_sv_yes;
	5249	sv_yes_mark = &PL_sv_no;
	5250	}
	5251	sv_setsv(sv_err, sv_commit);
	5252	sv_setsv(sv_mrk, sv_yes_mark);
	5253	}
	5254
	5255	/* clean up; in particular, free all slabs above current one */
	5256	LEAVE_SCOPE(oldsave);
	5257
	5258	return result;
	5259	}
	5260
	5261	/*
	5262	- regrepeat - repeatedly match something simple, report how many
	5263	*/
	5264	/*
	5265	* [This routine now assumes that it will only match on things of length 1.
	5266	* That was true before, but now we assume scan - reginput is the count,
	5267	* rather than incrementing count on every character. [Er, except utf8.]]
	5268	*/
	5269	STATIC I32
	5270	S_regrepeat(pTHX_ const regexp prog, const regnode p, I32 max, int depth)
	5271	{
	5272	dVAR;
	5273	register char *scan;
	5274	register I32 c;
	5275	register char *loceol = PL_regeol;
	5276	register I32 hardcount = 0;
	5277	register bool do_utf8 = PL_reg_match_utf8;
	5278	#ifndef DEBUGGING
	5279	PERL_UNUSED_ARG(depth);
	5280	#endif
	5281
	5282	scan = PL_reginput;
	5283	if (max == REG_INFTY)
	5284	max = I32_MAX;
	5285	else if (max < loceol - scan)
	5286	loceol = scan + max;
	5287	switch (OP(p)) {
	5288	case REG_ANY:
	5289	if (do_utf8) {
	5290	loceol = PL_regeol;
	5291	while (scan < loceol && hardcount < max && *scan != '\n') {
	5292	scan += UTF8SKIP(scan);
	5293	hardcount++;
	5294	}
	5295	} else {
	5296	while (scan < loceol && *scan != '\n')
	5297	scan++;
	5298	}
	5299	break;
	5300	case SANY:
	5301	if (do_utf8) {
	5302	loceol = PL_regeol;
	5303	while (scan < loceol && hardcount < max) {
	5304	scan += UTF8SKIP(scan);
	5305	hardcount++;
	5306	}
	5307	}
	5308	else
	5309	scan = loceol;
	5310	break;
	5311	case CANY:
	5312	scan = loceol;
	5313	break;
	5314	case EXACT: /* length of string is 1 */
	5315	c = (U8)*STRING(p);
	5316	while (scan < loceol && UCHARAT(scan) == c)
	5317	scan++;
	5318	break;
	5319	case EXACTF: /* length of string is 1 */
	5320	c = (U8)*STRING(p);
	5321	while (scan < loceol &&
	5322	(UCHARAT(scan) == c \|\| UCHARAT(scan) == PL_fold[c]))
	5323	scan++;
	5324	break;
	5325	case EXACTFL: /* length of string is 1 */
	5326	PL_reg_flags \|= RF_tainted;
	5327	c = (U8)*STRING(p);
	5328	while (scan < loceol &&
	5329	(UCHARAT(scan) == c \|\| UCHARAT(scan) == PL_fold_locale[c]))
	5330	scan++;
	5331	break;
	5332	case ANYOF:
	5333	if (do_utf8) {
	5334	loceol = PL_regeol;
	5335	while (hardcount < max && scan < loceol &&
	5336	reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
	5337	scan += UTF8SKIP(scan);
	5338	hardcount++;
	5339	}
	5340	} else {
	5341	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	5342	scan++;
	5343	}
	5344	break;
	5345	case ALNUM:
	5346	if (do_utf8) {
	5347	loceol = PL_regeol;
	5348	LOAD_UTF8_CHARCLASS_ALNUM();
	5349	while (hardcount < max && scan < loceol &&
	5350	swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
	5351	scan += UTF8SKIP(scan);
	5352	hardcount++;
	5353	}
	5354	} else {
	5355	while (scan < loceol && isALNUM(*scan))
	5356	scan++;
	5357	}
	5358	break;
	5359	case ALNUML:
	5360	PL_reg_flags \|= RF_tainted;
	5361	if (do_utf8) {
	5362	loceol = PL_regeol;
	5363	while (hardcount < max && scan < loceol &&
	5364	isALNUM_LC_utf8((U8*)scan)) {
	5365	scan += UTF8SKIP(scan);
	5366	hardcount++;
	5367	}
	5368	} else {
	5369	while (scan < loceol && isALNUM_LC(*scan))
	5370	scan++;
	5371	}
	5372	break;
	5373	case NALNUM:
	5374	if (do_utf8) {
	5375	loceol = PL_regeol;
	5376	LOAD_UTF8_CHARCLASS_ALNUM();
	5377	while (hardcount < max && scan < loceol &&
	5378	!swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
	5379	scan += UTF8SKIP(scan);
	5380	hardcount++;
	5381	}
	5382	} else {
	5383	while (scan < loceol && !isALNUM(*scan))
	5384	scan++;
	5385	}
	5386	break;
	5387	case NALNUML:
	5388	PL_reg_flags \|= RF_tainted;
	5389	if (do_utf8) {
	5390	loceol = PL_regeol;
	5391	while (hardcount < max && scan < loceol &&
	5392	!isALNUM_LC_utf8((U8*)scan)) {
	5393	scan += UTF8SKIP(scan);
	5394	hardcount++;
	5395	}
	5396	} else {
	5397	while (scan < loceol && !isALNUM_LC(*scan))
	5398	scan++;
	5399	}
	5400	break;
	5401	case SPACE:
	5402	if (do_utf8) {
	5403	loceol = PL_regeol;
	5404	LOAD_UTF8_CHARCLASS_SPACE();
	5405	while (hardcount < max && scan < loceol &&
	5406	(*scan == ' ' \|\|
	5407	swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
	5408	scan += UTF8SKIP(scan);
	5409	hardcount++;
	5410	}
	5411	} else {
	5412	while (scan < loceol && isSPACE(*scan))
	5413	scan++;
	5414	}
	5415	break;
	5416	case SPACEL:
	5417	PL_reg_flags \|= RF_tainted;
	5418	if (do_utf8) {
	5419	loceol = PL_regeol;
	5420	while (hardcount < max && scan < loceol &&
	5421	(scan == ' ' \|\| isSPACE_LC_utf8((U8)scan))) {
	5422	scan += UTF8SKIP(scan);
	5423	hardcount++;
	5424	}
	5425	} else {
	5426	while (scan < loceol && isSPACE_LC(*scan))
	5427	scan++;
	5428	}
	5429	break;
	5430	case NSPACE:
	5431	if (do_utf8) {
	5432	loceol = PL_regeol;
	5433	LOAD_UTF8_CHARCLASS_SPACE();
	5434	while (hardcount < max && scan < loceol &&
	5435	!(*scan == ' ' \|\|
	5436	swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
	5437	scan += UTF8SKIP(scan);
	5438	hardcount++;
	5439	}
	5440	} else {
	5441	while (scan < loceol && !isSPACE(*scan))
	5442	scan++;
	5443	}
	5444	break;
	5445	case NSPACEL:
	5446	PL_reg_flags \|= RF_tainted;
	5447	if (do_utf8) {
	5448	loceol = PL_regeol;
	5449	while (hardcount < max && scan < loceol &&
	5450	!(scan == ' ' \|\| isSPACE_LC_utf8((U8)scan))) {
	5451	scan += UTF8SKIP(scan);
	5452	hardcount++;
	5453	}
	5454	} else {
	5455	while (scan < loceol && !isSPACE_LC(*scan))
	5456	scan++;
	5457	}
	5458	break;
	5459	case DIGIT:
	5460	if (do_utf8) {
	5461	loceol = PL_regeol;
	5462	LOAD_UTF8_CHARCLASS_DIGIT();
	5463	while (hardcount < max && scan < loceol &&
	5464	swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
	5465	scan += UTF8SKIP(scan);
	5466	hardcount++;
	5467	}
	5468	} else {
	5469	while (scan < loceol && isDIGIT(*scan))
	5470	scan++;
	5471	}
	5472	break;
	5473	case NDIGIT:
	5474	if (do_utf8) {
	5475	loceol = PL_regeol;
	5476	LOAD_UTF8_CHARCLASS_DIGIT();
	5477	while (hardcount < max && scan < loceol &&
	5478	!swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
	5479	scan += UTF8SKIP(scan);
	5480	hardcount++;
	5481	}
	5482	} else {
	5483	while (scan < loceol && !isDIGIT(*scan))
	5484	scan++;
	5485	}
	5486	case LNBREAK:
	5487	if (do_utf8) {
	5488	loceol = PL_regeol;
	5489	while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
	5490	scan += c;
	5491	hardcount++;
	5492	}
	5493	} else {
	5494	/*
	5495	LNBREAK can match two latin chars, which is ok,
	5496	because we have a null terminated string, but we
	5497	have to use hardcount in this situation
	5498	*/
	5499	while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
	5500	scan+=c;
	5501	hardcount++;
	5502	}
	5503	}
	5504	break;
	5505	case HORIZWS:
	5506	if (do_utf8) {
	5507	loceol = PL_regeol;
	5508	while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
	5509	scan += c;
	5510	hardcount++;
	5511	}
	5512	} else {
	5513	while (scan < loceol && is_HORIZWS_latin1(scan))
	5514	scan++;
	5515	}
	5516	break;
	5517	case NHORIZWS:
	5518	if (do_utf8) {
	5519	loceol = PL_regeol;
	5520	while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
	5521	scan += UTF8SKIP(scan);
	5522	hardcount++;
	5523	}
	5524	} else {
	5525	while (scan < loceol && !is_HORIZWS_latin1(scan))
	5526	scan++;
	5527
	5528	}
	5529	break;
	5530	case VERTWS:
	5531	if (do_utf8) {
	5532	loceol = PL_regeol;
	5533	while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
	5534	scan += c;
	5535	hardcount++;
	5536	}
	5537	} else {
	5538	while (scan < loceol && is_VERTWS_latin1(scan))
	5539	scan++;
	5540
	5541	}
	5542	break;
	5543	case NVERTWS:
	5544	if (do_utf8) {
	5545	loceol = PL_regeol;
	5546	while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
	5547	scan += UTF8SKIP(scan);
	5548	hardcount++;
	5549	}
	5550	} else {
	5551	while (scan < loceol && !is_VERTWS_latin1(scan))
	5552	scan++;
	5553
	5554	}
	5555	break;
	5556
	5557	default: /* Called on something of 0 width. */
	5558	break; /* So match right here or not at all. */
	5559	}
	5560
	5561	if (hardcount)
	5562	c = hardcount;
	5563	else
	5564	c = scan - PL_reginput;
	5565	PL_reginput = scan;
	5566
	5567	DEBUG_r({
	5568	GET_RE_DEBUG_FLAGS_DECL;
	5569	DEBUG_EXECUTE_r({
	5570	SV * const prop = sv_newmortal();
	5571	regprop(prog, prop, p);
	5572	PerlIO_printf(Perl_debug_log,
	5573	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	5574	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	5575	});
	5576	});
	5577
	5578	return(c);
	5579	}
	5580
	5581
	5582	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	5583	/*
	5584	- regclass_swash - prepare the utf8 swash
	5585	*/
	5586
	5587	SV *
	5588	Perl_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV listsvp, SV altsvp)
	5589	{
	5590	dVAR;
	5591	SV *sw = NULL;
	5592	SV *si = NULL;
	5593	SV *alt = NULL;
	5594	RXi_GET_DECL(prog,progi);
	5595	const struct reg_data * const data = prog ? progi->data : NULL;
	5596
	5597	if (data && data->count) {
	5598	const U32 n = ARG(node);
	5599
	5600	if (data->what[n] == 's') {
	5601	SV * const rv = (SV*)data->data[n];
	5602	AV * const av = (AV)SvRV((SV)rv);
	5603	SV **const ary = AvARRAY(av);
	5604	SV a, b;
	5605
	5606	/* See the end of regcomp.c:S_regclass() for
	5607	* documentation of these array elements. */
	5608
	5609	si = *ary;
	5610	a = SvROK(ary[1]) ? &ary[1] : NULL;
	5611	b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
	5612
	5613	if (a)
	5614	sw = *a;
	5615	else if (si && doinit) {
	5616	sw = swash_init("utf8", "", si, 1, 0);
	5617	(void)av_store(av, 1, sw);
	5618	}
	5619	if (b)
	5620	alt = *b;
	5621	}
	5622	}
	5623
	5624	if (listsvp)
	5625	*listsvp = si;
	5626	if (altsvp)
	5627	*altsvp = alt;
	5628
	5629	return sw;
	5630	}
	5631	#endif
	5632
	5633	/*
	5634	- reginclass - determine if a character falls into a character class
	5635
	5636	The n is the ANYOF regnode, the p is the target string, lenp
	5637	is pointer to the maximum length of how far to go in the p
	5638	(if the lenp is zero, UTF8SKIP(p) is used),
	5639	do_utf8 tells whether the target string is in UTF-8.
	5640
	5641	*/
	5642
	5643	STATIC bool
	5644	S_reginclass(pTHX_ const regexp prog, register const regnode n, register const U8* p, STRLEN* lenp, register bool do_utf8)
	5645	{
	5646	dVAR;
	5647	const char flags = ANYOF_FLAGS(n);
	5648	bool match = FALSE;
	5649	UV c = *p;
	5650	STRLEN len = 0;
	5651	STRLEN plen;
	5652
	5653	if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
	5654	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
	5655	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) \| UTF8_CHECK_ONLY);
	5656	/* see [perl #37836] for UTF8_ALLOW_ANYUV */
	5657	if (len == (STRLEN)-1)
	5658	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	5659	}
	5660
	5661	plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
	5662	if (do_utf8 \|\| (flags & ANYOF_UNICODE)) {
	5663	if (lenp)
	5664	*lenp = 0;
	5665	if (do_utf8 && !ANYOF_RUNTIME(n)) {
	5666	if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
	5667	match = TRUE;
	5668	}
	5669	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
	5670	match = TRUE;
	5671	if (!match) {
	5672	AV *av;
	5673	SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
	5674
	5675	if (sw) {
	5676	if (swash_fetch(sw, p, do_utf8))
	5677	match = TRUE;
	5678	else if (flags & ANYOF_FOLD) {
	5679	if (!match && lenp && av) {
	5680	I32 i;
	5681	for (i = 0; i <= av_len(av); i++) {
	5682	SV* const sv = *av_fetch(av, i, FALSE);
	5683	STRLEN len;
	5684	const char * const s = SvPV_const(sv, len);
	5685
	5686	if (len <= plen && memEQ(s, (char*)p, len)) {
	5687	*lenp = len;
	5688	match = TRUE;
	5689	break;
	5690	}
	5691	}
	5692	}
	5693	if (!match) {
	5694	U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
	5695	STRLEN tmplen;
	5696
	5697	to_utf8_fold(p, tmpbuf, &tmplen);
	5698	if (swash_fetch(sw, tmpbuf, do_utf8))
	5699	match = TRUE;
	5700	}
	5701	}
	5702	}
	5703	}
	5704	if (match && lenp && *lenp == 0)
	5705	*lenp = UNISKIP(NATIVE_TO_UNI(c));
	5706	}
	5707	if (!match && c < 256) {
	5708	if (ANYOF_BITMAP_TEST(n, c))
	5709	match = TRUE;
	5710	else if (flags & ANYOF_FOLD) {
	5711	U8 f;
	5712
	5713	if (flags & ANYOF_LOCALE) {
	5714	PL_reg_flags \|= RF_tainted;
	5715	f = PL_fold_locale[c];
	5716	}
	5717	else
	5718	f = PL_fold[c];
	5719	if (f != c && ANYOF_BITMAP_TEST(n, f))
	5720	match = TRUE;
	5721	}
	5722
	5723	if (!match && (flags & ANYOF_CLASS)) {
	5724	PL_reg_flags \|= RF_tainted;
	5725	if (
	5726	(ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) \|\|
	5727	(ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) \|\|
	5728	(ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) \|\|
	5729	(ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) \|\|
	5730	(ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) \|\|
	5731	(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) \|\|
	5732	(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) \|\|
	5733	(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) \|\|
	5734	(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) \|\|
	5735	(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) \|\|
	5736	(ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) \|\|
	5737	(ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) \|\|
	5738	(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) \|\|
	5739	(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) \|\|
	5740	(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) \|\|
	5741	(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) \|\|
	5742	(ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) \|\|
	5743	(ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) \|\|
	5744	(ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) \|\|
	5745	(ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) \|\|
	5746	(ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) \|\|
	5747	(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) \|\|
	5748	(ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) \|\|
	5749	(ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) \|\|
	5750	(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) \|\|
	5751	(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) \|\|
	5752	(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) \|\|
	5753	(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) \|\|
	5754	(ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) \|\|
	5755	(ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
	5756	) /* How's that for a conditional? */
	5757	{
	5758	match = TRUE;
	5759	}
	5760	}
	5761	}
	5762
	5763	return (flags & ANYOF_INVERT) ? !match : match;
	5764	}
	5765
	5766	STATIC U8 *
	5767	S_reghop3(U8 s, I32 off, const U8 lim)
	5768	{
	5769	dVAR;
	5770	if (off >= 0) {
	5771	while (off-- && s < lim) {
	5772	/* XXX could check well-formedness here */
	5773	s += UTF8SKIP(s);
	5774	}
	5775	}
	5776	else {
	5777	while (off++ && s > lim) {
	5778	s--;
	5779	if (UTF8_IS_CONTINUED(*s)) {
	5780	while (s > lim && UTF8_IS_CONTINUATION(*s))
	5781	s--;
	5782	}
	5783	/* XXX could check well-formedness here */
	5784	}
	5785	}
	5786	return s;
	5787	}
	5788
	5789	#ifdef XXX_dmq
	5790	/* there are a bunch of places where we use two reghop3's that should
	5791	be replaced with this routine. but since thats not done yet
	5792	we ifdef it out - dmq
	5793	*/
	5794	STATIC U8 *
	5795	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	5796	{
	5797	dVAR;
	5798	if (off >= 0) {
	5799	while (off-- && s < rlim) {
	5800	/* XXX could check well-formedness here */
	5801	s += UTF8SKIP(s);
	5802	}
	5803	}
	5804	else {
	5805	while (off++ && s > llim) {
	5806	s--;
	5807	if (UTF8_IS_CONTINUED(*s)) {
	5808	while (s > llim && UTF8_IS_CONTINUATION(*s))
	5809	s--;
	5810	}
	5811	/* XXX could check well-formedness here */
	5812	}
	5813	}
	5814	return s;
	5815	}
	5816	#endif
	5817
	5818	STATIC U8 *
	5819	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	5820	{
	5821	dVAR;
	5822	if (off >= 0) {
	5823	while (off-- && s < lim) {
	5824	/* XXX could check well-formedness here */
	5825	s += UTF8SKIP(s);
	5826	}
	5827	if (off >= 0)
	5828	return NULL;
	5829	}
	5830	else {
	5831	while (off++ && s > lim) {
	5832	s--;
	5833	if (UTF8_IS_CONTINUED(*s)) {
	5834	while (s > lim && UTF8_IS_CONTINUATION(*s))
	5835	s--;
	5836	}
	5837	/* XXX could check well-formedness here */
	5838	}
	5839	if (off <= 0)
	5840	return NULL;
	5841	}
	5842	return s;
	5843	}
	5844
	5845	static void
	5846	restore_pos(pTHX_ void *arg)
	5847	{
	5848	dVAR;
	5849	regexp * const rex = (regexp *)arg;
	5850	if (PL_reg_eval_set) {
	5851	if (PL_reg_oldsaved) {
	5852	rex->subbeg = PL_reg_oldsaved;
	5853	rex->sublen = PL_reg_oldsavedlen;
	5854	#ifdef PERL_OLD_COPY_ON_WRITE
	5855	rex->saved_copy = PL_nrs;
	5856	#endif
	5857	RX_MATCH_COPIED_on(rex);
	5858	}
	5859	PL_reg_magic->mg_len = PL_reg_oldpos;
	5860	PL_reg_eval_set = 0;
	5861	PL_curpm = PL_reg_oldcurpm;
	5862	}
	5863	}
	5864
	5865	STATIC void
	5866	S_to_utf8_substr(pTHX_ register regexp *prog)
	5867	{
	5868	int i = 1;
	5869	do {
	5870	if (prog->substrs->data[i].substr
	5871	&& !prog->substrs->data[i].utf8_substr) {
	5872	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	5873	prog->substrs->data[i].utf8_substr = sv;
	5874	sv_utf8_upgrade(sv);
	5875	if (SvVALID(prog->substrs->data[i].substr)) {
	5876	const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
	5877	if (flags & FBMcf_TAIL) {
	5878	/* Trim the trailing \n that fbm_compile added last
	5879	time. */
	5880	SvCUR_set(sv, SvCUR(sv) - 1);
	5881	/* Whilst this makes the SV technically "invalid" (as its
	5882	buffer is no longer followed by "\0") when fbm_compile()
	5883	adds the "\n" back, a "\0" is restored. */
	5884	}
	5885	fbm_compile(sv, flags);
	5886	}
	5887	if (prog->substrs->data[i].substr == prog->check_substr)
	5888	prog->check_utf8 = sv;
	5889	}
	5890	} while (i--);
	5891	}
	5892
	5893	STATIC void
	5894	S_to_byte_substr(pTHX_ register regexp *prog)
	5895	{
	5896	dVAR;
	5897	int i = 1;
	5898	do {
	5899	if (prog->substrs->data[i].utf8_substr
	5900	&& !prog->substrs->data[i].substr) {
	5901	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	5902	if (sv_utf8_downgrade(sv, TRUE)) {
	5903	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	5904	const U8 flags
	5905	= BmFLAGS(prog->substrs->data[i].utf8_substr);
	5906	if (flags & FBMcf_TAIL) {
	5907	/* Trim the trailing \n that fbm_compile added last
	5908	time. */
	5909	SvCUR_set(sv, SvCUR(sv) - 1);
	5910	}
	5911	fbm_compile(sv, flags);
	5912	}
	5913	} else {
	5914	SvREFCNT_dec(sv);
	5915	sv = &PL_sv_undef;
	5916	}
	5917	prog->substrs->data[i].substr = sv;
	5918	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	5919	prog->check_substr = sv;
	5920	}
	5921	} while (i--);
	5922	}
	5923
	5924	/*
	5925	* Local variables:
	5926	* c-indentation-style: bsd
	5927	* c-basic-offset: 4
	5928	* indent-tabs-mode: t
	5929	* End:
	5930	*
	5931	* ex: set ts=8 sts=4 sw=4 noet:
	5932	*/