perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/*
	41	* pregcomp and pregexec -- regsub and regerror are not used in perl
	42	*
	43	* Copyright (c) 1986 by University of Toronto.
	44	* Written by Henry Spencer. Not derived from licensed software.
	45	*
	46	* Permission is granted to anyone to use this software for any
	47	* purpose on any computer system, and to redistribute it freely,
	48	* subject to the following restrictions:
	49	*
	50	* 1. The author is not responsible for the consequences of use of
	51	* this software, no matter how awful, even if they arise
	52	* from defects in it.
	53	*
	54	* 2. The origin of this software must not be misrepresented, either
	55	* by explicit claim or by omission.
	56	*
	57	* 3. Altered versions must be plainly marked as such, and must not
	58	* be misrepresented as being the original software.
	59	*
	60	**** Alterations to Henry's code are...
	61	****
	62	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	63	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	64	**** by Larry Wall and others
	65	****
	66	**** You may distribute under the terms of either the GNU General Public
	67	**** License or the Artistic License, as specified in the README file.
	68	*
	69	* Beware that some of this code is subtly aware of the way operator
	70	* precedence is structured in regular expressions. Serious changes in
	71	* regular-expression syntax might require a total rethink.
	72	*/
	73	#include "EXTERN.h"
	74	#define PERL_IN_REGEXEC_C
	75	#include "perl.h"
	76
	77	#ifdef PERL_IN_XSUB_RE
	78	# include "re_comp.h"
	79	#else
	80	# include "regcomp.h"
	81	#endif
	82
	83	#define RF_tainted 1 /* tainted information used? e.g. locale */
	84	#define RF_warned 2 /* warned about big count? */
	85
	86	#define RF_utf8 8 /* Pattern contains multibyte chars? */
	87
	88	#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
	89
	90	#define RS_init 1 /* eval environment created */
	91	#define RS_set 2 /* replsv value is set */
	92
	93	#ifndef STATIC
	94	#define STATIC static
	95	#endif
	96
	97	/* Valid for non-utf8 strings, non-ANYOFV nodes only: avoids the reginclass
	98	* call if there are no complications: i.e., if everything matchable is
	99	* straight forward in the bitmap */
	100	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) \
	101	: ANYOF_BITMAP_TEST(p,*(c)))
	102
	103	/*
	104	* Forwards.
	105	*/
	106
	107	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	108	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	109
	110	#define HOPc(pos,off) \
	111	(char *)(PL_reg_match_utf8 \
	112	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	113	: (U8*)(pos + off))
	114	#define HOPBACKc(pos, off) \
	115	(char*)(PL_reg_match_utf8\
	116	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	117	: (pos - off >= PL_bostr) \
	118	? (U8*)pos - off \
	119	: NULL)
	120
	121	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	122	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	123
	124	/* these are unrolled below in the CCC_TRY_XXX defined */
	125	#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
	126	if (!CAT2(PL_utf8_,class)) { \
	127	bool ok; \
	128	ENTER; save_re_context(); \
	129	ok=CAT2(is_utf8_,class)((const U8*)str); \
	130	assert(ok); LEAVE; } } STMT_END
	131
	132	/* Doesn't do an assert to verify that is correct */
	133	#define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
	134	if (!CAT2(PL_utf8_,class)) { \
	135	bool throw_away __attribute__unused__; \
	136	ENTER; save_re_context(); \
	137	throw_away = CAT2(is_utf8_,class)((const U8*)" "); \
	138	LEAVE; } } STMT_END
	139
	140	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
	141	#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
	142	#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
	143
	144	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	145	LOAD_UTF8_CHARCLASS(X_begin, " "); \
	146	LOAD_UTF8_CHARCLASS(X_non_hangul, "A"); \
	147	/* These are utf8 constants, and not utf-ebcdic constants, so the \
	148	* assert should likely and hopefully fail on an EBCDIC machine */ \
	149	LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */ \
	150	\
	151	/* No asserts are done for these, in case called on an early \
	152	* Unicode version in which they map to nothing */ \
	153	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
	154	LOAD_UTF8_CHARCLASS_NO_CHECK(X_L); /* U+1100 "\xe1\x84\x80" */ \
	155	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV); /* U+AC00 "\xea\xb0\x80" */ \
	156	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT); /* U+AC01 "\xea\xb0\x81" */ \
	157	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
	158	LOAD_UTF8_CHARCLASS_NO_CHECK(X_T); /* U+11A8 "\xe1\x86\xa8" */ \
	159	LOAD_UTF8_CHARCLASS_NO_CHECK(X_V) /* U+1160 "\xe1\x85\xa0" */
	160
	161	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	162
	163	/* The actual code for CCC_TRY, which uses several variables from the routine
	164	* it's callable from. It is designed to be the bulk of a case statement.
	165	* FUNC is the macro or function to call on non-utf8 targets that indicate if
	166	* nextchr matches the class.
	167	* UTF8_TEST is the whole test string to use for utf8 targets
	168	* LOAD is what to use to test, and if not present to load in the swash for the
	169	* class
	170	* POS_OR_NEG is either empty or ! to complement the results of FUNC or
	171	* UTF8_TEST test.
	172	* The logic is: Fail if we're at the end-of-string; otherwise if the target is
	173	* utf8 and a variant, load the swash if necessary and test using the utf8
	174	* test. Advance to the next character if test is ok, otherwise fail; If not
	175	* utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
	176	* fails, or advance to the next character */
	177
	178	#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \
	179	if (locinput >= PL_regeol) { \
	180	sayNO; \
	181	} \
	182	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
	183	LOAD_UTF8_CHARCLASS(CLASS, STR); \
	184	if (POS_OR_NEG (UTF8_TEST)) { \
	185	sayNO; \
	186	} \
	187	locinput += PL_utf8skip[nextchr]; \
	188	nextchr = UCHARAT(locinput); \
	189	break; \
	190	} \
	191	if (POS_OR_NEG (FUNC(nextchr))) { \
	192	sayNO; \
	193	} \
	194	nextchr = UCHARAT(++locinput); \
	195	break;
	196
	197	/* Handle the non-locale cases for a character class and its complement. It
	198	* calls _CCC_TRY_CODE with a ! to complement the test for the character class.
	199	* This is because that code fails when the test succeeds, so we want to have
	200	* the test fail so that the code succeeds. The swash is stored in a
	201	* predictable PL_ place */
	202	#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, \
	203	CLASS, STR) \
	204	case NAME: \
	205	_CCC_TRY_CODE( !, FUNC, \
	206	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	207	(U8*)locinput, TRUE)), \
	208	CLASS, STR) \
	209	case NNAME: \
	210	_CCC_TRY_CODE( PLACEHOLDER , FUNC, \
	211	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	212	(U8*)locinput, TRUE)), \
	213	CLASS, STR) \
	214
	215	/* Generate the case statements for both locale and non-locale character
	216	* classes in regmatch for classes that don't have special unicode semantics.
	217	* Locales don't use an immediate swash, but an intermediary special locale
	218	* function that is called on the pointer to the current place in the input
	219	* string. That function will resolve to needing the same swash. One might
	220	* think that because we don't know what the locale will match, we shouldn't
	221	* check with the swash loading function that it loaded properly; ie, that we
	222	* should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
	223	* regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
	224	* irrelevant here */
	225	#define CCC_TRY(NAME, NNAME, FUNC, \
	226	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	227	NAMEA, NNAMEA, FUNCA, \
	228	CLASS, STR) \
	229	case NAMEL: \
	230	PL_reg_flags \|= RF_tainted; \
	231	_CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \
	232	case NNAMEL: \
	233	PL_reg_flags \|= RF_tainted; \
	234	_CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput), \
	235	CLASS, STR) \
	236	case NAMEA: \
	237	if (locinput >= PL_regeol \|\| ! FUNCA(nextchr)) { \
	238	sayNO; \
	239	} \
	240	/* Matched a utf8-invariant, so don't have to worry about utf8 */ \
	241	nextchr = UCHARAT(++locinput); \
	242	break; \
	243	case NNAMEA: \
	244	if (locinput >= PL_regeol \|\| FUNCA(nextchr)) { \
	245	sayNO; \
	246	} \
	247	if (utf8_target) { \
	248	locinput += PL_utf8skip[nextchr]; \
	249	nextchr = UCHARAT(locinput); \
	250	} \
	251	else { \
	252	nextchr = UCHARAT(++locinput); \
	253	} \
	254	break; \
	255	/* Generate the non-locale cases */ \
	256	_CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
	257
	258	/* This is like CCC_TRY, but has an extra set of parameters for generating case
	259	* statements to handle separate Unicode semantics nodes */
	260	#define CCC_TRY_U(NAME, NNAME, FUNC, \
	261	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	262	NAMEU, NNAMEU, FUNCU, \
	263	NAMEA, NNAMEA, FUNCA, \
	264	CLASS, STR) \
	265	CCC_TRY(NAME, NNAME, FUNC, \
	266	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	267	NAMEA, NNAMEA, FUNCA, \
	268	CLASS, STR) \
	269	_CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)
	270
	271	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	272
	273	/* for use after a quantifier and before an EXACT-like node -- japhy */
	274	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	275	*
	276	* NOTE that nothing that affects backtracking should be in here, specifically
	277	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	278	* node that is in between two EXACT like nodes when ascertaining what the required
	279	* "follow" character is. This should probably be moved to regex compile time
	280	* although it may be done at run time beause of the REF possibility - more
	281	* investigation required. -- demerphq
	282	*/
	283	#define JUMPABLE(rn) ( \
	284	OP(rn) == OPEN \|\| \
	285	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	286	OP(rn) == EVAL \|\| \
	287	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	288	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	289	OP(rn) == KEEPS \|\| \
	290	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	291	)
	292	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	293
	294	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	295
	296	#if 0
	297	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	298	we don't need this definition. */
	299	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	300	#define IS_TEXTF(rn) ( (OP(rn)==EXACTFU \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTF) \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	301	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	302
	303	#else
	304	/* ... so we use this as its faster. */
	305	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	306	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn) == EXACTFA)
	307	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	308	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	309
	310	#endif
	311
	312	/*
	313	Search for mandatory following text node; for lookahead, the text must
	314	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	315	*/
	316	#define FIND_NEXT_IMPT(rn) STMT_START { \
	317	while (JUMPABLE(rn)) { \
	318	const OPCODE type = OP(rn); \
	319	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	320	rn = NEXTOPER(NEXTOPER(rn)); \
	321	else if (type == PLUS) \
	322	rn = NEXTOPER(rn); \
	323	else if (type == IFMATCH) \
	324	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	325	else rn += NEXT_OFF(rn); \
	326	} \
	327	} STMT_END
	328
	329
	330	static void restore_pos(pTHX_ void *arg);
	331
	332	#define REGCP_PAREN_ELEMS 4
	333	#define REGCP_OTHER_ELEMS 5
	334	#define REGCP_FRAME_ELEMS 1
	335	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	336	* are needed for the regexp context stack bookkeeping. */
	337
	338	STATIC CHECKPOINT
	339	S_regcppush(pTHX_ I32 parenfloor)
	340	{
	341	dVAR;
	342	const int retval = PL_savestack_ix;
	343	const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
	344	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	345	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	346	int p;
	347	GET_RE_DEBUG_FLAGS_DECL;
	348
	349	if (paren_elems_to_push < 0)
	350	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
	351
	352	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	353	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	354	" out of range (%lu-%ld)",
	355	total_elems, (unsigned long)PL_regsize, (long)parenfloor);
	356
	357	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	358
	359	for (p = PL_regsize; p > parenfloor; p--) {
	360	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	361	SSPUSHINT(PL_regoffs[p].end);
	362	SSPUSHINT(PL_regoffs[p].start);
	363	SSPUSHPTR(PL_reg_start_tmp[p]);
	364	SSPUSHINT(p);
	365	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	366	" saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
	367	(UV)p, (IV)PL_regoffs[p].start,
	368	(IV)(PL_reg_start_tmp[p] - PL_bostr),
	369	(IV)PL_regoffs[p].end
	370	));
	371	}
	372	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	373	SSPUSHPTR(PL_regoffs);
	374	SSPUSHINT(PL_regsize);
	375	SSPUSHINT(*PL_reglastparen);
	376	SSPUSHINT(*PL_reglastcloseparen);
	377	SSPUSHPTR(PL_reginput);
	378	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	379
	380	return retval;
	381	}
	382
	383	/* These are needed since we do not localize EVAL nodes: */
	384	#define REGCP_SET(cp) \
	385	DEBUG_STATE_r( \
	386	PerlIO_printf(Perl_debug_log, \
	387	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	388	(IV)PL_savestack_ix)); \
	389	cp = PL_savestack_ix
	390
	391	#define REGCP_UNWIND(cp) \
	392	DEBUG_STATE_r( \
	393	if (cp != PL_savestack_ix) \
	394	PerlIO_printf(Perl_debug_log, \
	395	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	396	(IV)(cp), (IV)PL_savestack_ix)); \
	397	regcpblow(cp)
	398
	399	STATIC char *
	400	S_regcppop(pTHX_ const regexp *rex)
	401	{
	402	dVAR;
	403	UV i;
	404	char *input;
	405	GET_RE_DEBUG_FLAGS_DECL;
	406
	407	PERL_ARGS_ASSERT_REGCPPOP;
	408
	409	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	410	i = SSPOPUV;
	411	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	412	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	413	input = (char *) SSPOPPTR;
	414	*PL_reglastcloseparen = SSPOPINT;
	415	*PL_reglastparen = SSPOPINT;
	416	PL_regsize = SSPOPINT;
	417	PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
	418
	419	i -= REGCP_OTHER_ELEMS;
	420	/* Now restore the parentheses context. */
	421	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	422	I32 tmps;
	423	U32 paren = (U32)SSPOPINT;
	424	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
	425	PL_regoffs[paren].start = SSPOPINT;
	426	tmps = SSPOPINT;
	427	if (paren <= *PL_reglastparen)
	428	PL_regoffs[paren].end = tmps;
	429	DEBUG_BUFFERS_r(
	430	PerlIO_printf(Perl_debug_log,
	431	" restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	432	(UV)paren, (IV)PL_regoffs[paren].start,
	433	(IV)(PL_reg_start_tmp[paren] - PL_bostr),
	434	(IV)PL_regoffs[paren].end,
	435	(paren > *PL_reglastparen ? "(no)" : ""));
	436	);
	437	}
	438	DEBUG_BUFFERS_r(
	439	if (*PL_reglastparen + 1 <= rex->nparens) {
	440	PerlIO_printf(Perl_debug_log,
	441	" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
	442	(IV)(*PL_reglastparen + 1), (IV)rex->nparens);
	443	}
	444	);
	445	#if 1
	446	/* It would seem that the similar code in regtry()
	447	* already takes care of this, and in fact it is in
	448	* a better location to since this code can #if 0-ed out
	449	* but the code in regtry() is needed or otherwise tests
	450	* requiring null fields (pat.t#187 and split.t#{13,14}
	451	* (as of patchlevel 7877) will fail. Then again,
	452	* this code seems to be necessary or otherwise
	453	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	454	* --jhi updated by dapm */
	455	for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
	456	if (i > PL_regsize)
	457	PL_regoffs[i].start = -1;
	458	PL_regoffs[i].end = -1;
	459	}
	460	#endif
	461	return input;
	462	}
	463
	464	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	465
	466	/*
	467	* pregexec and friends
	468	*/
	469
	470	#ifndef PERL_IN_XSUB_RE
	471	/*
	472	- pregexec - match a regexp against a string
	473	*/
	474	I32
	475	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
	476	char strbeg, I32 minend, SV screamer, U32 nosave)
	477	/* strend: pointer to null at end of string */
	478	/* strbeg: real beginning of string */
	479	/* minend: end of match must be >=minend after stringarg. */
	480	/* nosave: For optimizations. */
	481	{
	482	PERL_ARGS_ASSERT_PREGEXEC;
	483
	484	return
	485	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	486	nosave ? 0 : REXEC_COPY_STR);
	487	}
	488	#endif
	489
	490	/*
	491	* Need to implement the following flags for reg_anch:
	492	*
	493	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	494	* USE_INTUIT_ML
	495	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	496	* INTUIT_AUTORITATIVE_ML
	497	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	498	* INTUIT_ONCE_ML
	499	*
	500	* Another flag for this function: SECOND_TIME (so that float substrs
	501	* with giant delta may be not rechecked).
	502	*/
	503
	504	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	505
	506	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	507	Otherwise, only SvCUR(sv) is used to get strbeg. */
	508
	509	/* XXXX We assume that strpos is strbeg unless sv. */
	510
	511	/* XXXX Some places assume that there is a fixed substring.
	512	An update may be needed if optimizer marks as "INTUITable"
	513	RExen without fixed substrings. Similarly, it is assumed that
	514	lengths of all the strings are no more than minlen, thus they
	515	cannot come from lookahead.
	516	(Or minlen should take into account lookahead.)
	517	NOTE: Some of this comment is not correct. minlen does now take account
	518	of lookahead/behind. Further research is required. -- demerphq
	519
	520	*/
	521
	522	/* A failure to find a constant substring means that there is no need to make
	523	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	524	finding a substring too deep into the string means that less calls to
	525	regtry() should be needed.
	526
	527	REx compiler's optimizer found 4 possible hints:
	528	a) Anchored substring;
	529	b) Fixed substring;
	530	c) Whether we are anchored (beginning-of-line or \G);
	531	d) First node (of those at offset 0) which may distinguish positions;
	532	We use a)b)d) and multiline-part of c), and try to find a position in the
	533	string which does not contradict any of them.
	534	*/
	535
	536	/* Most of decisions we do here should have been done at compile time.
	537	The nodes of the REx which we used for the search should have been
	538	deleted from the finite automaton. */
	539
	540	char *
	541	Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV sv, char strpos,
	542	char strend, const U32 flags, re_scream_pos_data data)
	543	{
	544	dVAR;
	545	struct regexp const prog = (struct regexp )SvANY(rx);
	546	register I32 start_shift = 0;
	547	/* Should be nonnegative! */
	548	register I32 end_shift = 0;
	549	register char *s;
	550	register SV *check;
	551	char *strbeg;
	552	char *t;
	553	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	554	I32 ml_anch;
	555	register char other_last = NULL; / other substr checked before this */
	556	char check_at = NULL; / check substr found at this pos */
	557	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	558	RXi_GET_DECL(prog,progi);
	559	#ifdef DEBUGGING
	560	const char * const i_strpos = strpos;
	561	#endif
	562	GET_RE_DEBUG_FLAGS_DECL;
	563
	564	PERL_ARGS_ASSERT_RE_INTUIT_START;
	565
	566	RX_MATCH_UTF8_set(rx,utf8_target);
	567
	568	if (RX_UTF8(rx)) {
	569	PL_reg_flags \|= RF_utf8;
	570	}
	571	DEBUG_EXECUTE_r(
	572	debug_start_match(rx, utf8_target, strpos, strend,
	573	sv ? "Guessing start of match in sv for"
	574	: "Guessing start of match in string for");
	575	);
	576
	577	/* CHR_DIST() would be more correct here but it makes things slow. */
	578	if (prog->minlen > strend - strpos) {
	579	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	580	"String too short... [re_intuit_start]\n"));
	581	goto fail;
	582	}
	583
	584	strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
	585	PL_regeol = strend;
	586	if (utf8_target) {
	587	if (!prog->check_utf8 && prog->check_substr)
	588	to_utf8_substr(prog);
	589	check = prog->check_utf8;
	590	} else {
	591	if (!prog->check_substr && prog->check_utf8)
	592	to_byte_substr(prog);
	593	check = prog->check_substr;
	594	}
	595	if (check == &PL_sv_undef) {
	596	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	597	"Non-utf8 string cannot match utf8 check string\n"));
	598	goto fail;
	599	}
	600	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	601	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	602	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	603	&& !multiline ) ); /* Check after \n? */
	604
	605	if (!ml_anch) {
	606	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	607	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	608	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	609	&& sv && !SvROK(sv)
	610	&& (strpos != strbeg)) {
	611	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	612	goto fail;
	613	}
	614	if (prog->check_offset_min == prog->check_offset_max &&
	615	!(prog->extflags & RXf_CANY_SEEN)) {
	616	/* Substring at constant offset from beg-of-str... */
	617	I32 slen;
	618
	619	s = HOP3c(strpos, prog->check_offset_min, strend);
	620
	621	if (SvTAIL(check)) {
	622	slen = SvCUR(check); /* >= 1 */
	623
	624	if ( strend - s > slen \|\| strend - s < slen - 1
	625	\|\| (strend - s == slen && strend[-1] != '\n')) {
	626	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	627	goto fail_finish;
	628	}
	629	/* Now should match s[0..slen-2] */
	630	slen--;
	631	if (slen && (SvPVX_const(check) != s
	632	\|\| (slen > 1
	633	&& memNE(SvPVX_const(check), s, slen)))) {
	634	report_neq:
	635	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	636	goto fail_finish;
	637	}
	638	}
	639	else if (SvPVX_const(check) != s
	640	\|\| ((slen = SvCUR(check)) > 1
	641	&& memNE(SvPVX_const(check), s, slen)))
	642	goto report_neq;
	643	check_at = s;
	644	goto success_at_start;
	645	}
	646	}
	647	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	648	s = strpos;
	649	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	650	end_shift = prog->check_end_shift;
	651
	652	if (!ml_anch) {
	653	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	654	- (SvTAIL(check) != 0);
	655	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	656
	657	if (end_shift < eshift)
	658	end_shift = eshift;
	659	}
	660	}
	661	else { /* Can match at random position */
	662	ml_anch = 0;
	663	s = strpos;
	664	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	665	end_shift = prog->check_end_shift;
	666
	667	/* end shift should be non negative here */
	668	}
	669
	670	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	671	if (end_shift < 0)
	672	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	673	(IV)end_shift, RX_PRECOMP(prog));
	674	#endif
	675
	676	restart:
	677	/* Find a possible match in the region s..strend by looking for
	678	the "check" substring in the region corrected by start/end_shift. */
	679
	680	{
	681	I32 srch_start_shift = start_shift;
	682	I32 srch_end_shift = end_shift;
	683	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	684	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	685	srch_start_shift = strbeg - s;
	686	}
	687	DEBUG_OPTIMISE_MORE_r({
	688	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	689	(IV)prog->check_offset_min,
	690	(IV)srch_start_shift,
	691	(IV)srch_end_shift,
	692	(IV)prog->check_end_shift);
	693	});
	694
	695	if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) {
	696	I32 p = -1; /* Internal iterator of scream. */
	697	I32 * const pp = data ? data->scream_pos : &p;
	698	const MAGIC *mg;
	699	bool found = FALSE;
	700
	701	assert(SvMAGICAL(sv));
	702	mg = mg_find(sv, PERL_MAGIC_study);
	703	assert(mg);
	704
	705	if (mg->mg_private == 1) {
	706	found = ((U8 *)mg->mg_ptr)[BmRARE(check)] != (U8)~0;
	707	} else if (mg->mg_private == 2) {
	708	found = ((U16 *)mg->mg_ptr)[BmRARE(check)] != (U16)~0;
	709	} else {
	710	assert (mg->mg_private == 4);
	711	found = ((U32 *)mg->mg_ptr)[BmRARE(check)] != (U32)~0;
	712	}
	713
	714	if (found
	715	\|\| ( BmRARE(check) == '\n'
	716	&& (BmPREVIOUS(check) == SvCUR(check) - 1)
	717	&& SvTAIL(check) ))
	718	s = screaminstr(sv, check,
	719	srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
	720	else
	721	goto fail_finish;
	722	/* we may be pointing at the wrong string */
	723	if (s && RXp_MATCH_COPIED(prog))
	724	s = strbeg + (s - SvPVX_const(sv));
	725	if (data)
	726	*data->scream_olds = s;
	727	}
	728	else {
	729	U8* start_point;
	730	U8* end_point;
	731	if (prog->extflags & RXf_CANY_SEEN) {
	732	start_point= (U8*)(s + srch_start_shift);
	733	end_point= (U8*)(strend - srch_end_shift);
	734	} else {
	735	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	736	end_point= HOP3(strend, -srch_end_shift, strbeg);
	737	}
	738	DEBUG_OPTIMISE_MORE_r({
	739	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	740	(int)(end_point - start_point),
	741	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	742	start_point);
	743	});
	744
	745	s = fbm_instr( start_point, end_point,
	746	check, multiline ? FBMrf_MULTILINE : 0);
	747	}
	748	}
	749	/* Update the count-of-usability, remove useless subpatterns,
	750	unshift s. */
	751
	752	DEBUG_EXECUTE_r({
	753	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	754	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	755	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	756	(s ? "Found" : "Did not find"),
	757	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	758	? "anchored" : "floating"),
	759	quoted,
	760	RE_SV_TAIL(check),
	761	(s ? " at offset " : "...\n") );
	762	});
	763
	764	if (!s)
	765	goto fail_finish;
	766	/* Finish the diagnostic message */
	767	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	768
	769	/* XXX dmq: first branch is for positive lookbehind...
	770	Our check string is offset from the beginning of the pattern.
	771	So we need to do any stclass tests offset forward from that
	772	point. I think. :-(
	773	*/
	774
	775
	776
	777	check_at=s;
	778
	779
	780	/* Got a candidate. Check MBOL anchoring, and the other substr.
	781	Start with the other substr.
	782	XXXX no SCREAM optimization yet - and a very coarse implementation
	783	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	784	always match. Probably should be marked during compile...
	785	Probably it is right to do no SCREAM here...
	786	*/
	787
	788	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	789	: (prog->float_substr && prog->anchored_substr))
	790	{
	791	/* Take into account the "other" substring. */
	792	/* XXXX May be hopelessly wrong for UTF... */
	793	if (!other_last)
	794	other_last = strpos;
	795	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	796	do_other_anchored:
	797	{
	798	char * const last = HOP3c(s, -start_shift, strbeg);
	799	char last1, last2;
	800	char * const saved_s = s;
	801	SV* must;
	802
	803	t = s - prog->check_offset_max;
	804	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	805	&& (!utf8_target
	806	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	807	&& t > strpos)))
	808	NOOP;
	809	else
	810	t = strpos;
	811	t = HOP3c(t, prog->anchored_offset, strend);
	812	if (t < other_last) /* These positions already checked */
	813	t = other_last;
	814	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	815	if (last < last1)
	816	last1 = last;
	817	/* XXXX It is not documented what units *_offsets are in.
	818	We assume bytes, but this is clearly wrong.
	819	Meaning this code needs to be carefully reviewed for errors.
	820	dmq.
	821	*/
	822
	823	/* On end-of-str: see comment below. */
	824	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	825	if (must == &PL_sv_undef) {
	826	s = (char*)NULL;
	827	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	828	}
	829	else
	830	s = fbm_instr(
	831	(unsigned char*)t,
	832	HOP3(HOP3(last1, prog->anchored_offset, strend)
	833	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	834	must,
	835	multiline ? FBMrf_MULTILINE : 0
	836	);
	837	DEBUG_EXECUTE_r({
	838	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	839	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	840	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	841	(s ? "Found" : "Contradicts"),
	842	quoted, RE_SV_TAIL(must));
	843	});
	844
	845
	846	if (!s) {
	847	if (last1 >= last2) {
	848	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	849	", giving up...\n"));
	850	goto fail_finish;
	851	}
	852	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	853	", trying floating at offset %ld...\n",
	854	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	855	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	856	s = HOP3c(last, 1, strend);
	857	goto restart;
	858	}
	859	else {
	860	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	861	(long)(s - i_strpos)));
	862	t = HOP3c(s, -prog->anchored_offset, strbeg);
	863	other_last = HOP3c(s, 1, strend);
	864	s = saved_s;
	865	if (t == strpos)
	866	goto try_at_start;
	867	goto try_at_offset;
	868	}
	869	}
	870	}
	871	else { /* Take into account the floating substring. */
	872	char last, last1;
	873	char * const saved_s = s;
	874	SV* must;
	875
	876	t = HOP3c(s, -start_shift, strbeg);
	877	last1 = last =
	878	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	879	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	880	last = HOP3c(t, prog->float_max_offset, strend);
	881	s = HOP3c(t, prog->float_min_offset, strend);
	882	if (s < other_last)
	883	s = other_last;
	884	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	885	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	886	/* fbm_instr() takes into account exact value of end-of-str
	887	if the check is SvTAIL(ed). Since false positives are OK,
	888	and end-of-str is not later than strend we are OK. */
	889	if (must == &PL_sv_undef) {
	890	s = (char*)NULL;
	891	DEBUG_r(must = prog->float_utf8); /* for debug message */
	892	}
	893	else
	894	s = fbm_instr((unsigned char*)s,
	895	(unsigned char*)last + SvCUR(must)
	896	- (SvTAIL(must)!=0),
	897	must, multiline ? FBMrf_MULTILINE : 0);
	898	DEBUG_EXECUTE_r({
	899	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	900	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	901	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	902	(s ? "Found" : "Contradicts"),
	903	quoted, RE_SV_TAIL(must));
	904	});
	905	if (!s) {
	906	if (last1 == last) {
	907	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	908	", giving up...\n"));
	909	goto fail_finish;
	910	}
	911	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	912	", trying anchored starting at offset %ld...\n",
	913	(long)(saved_s + 1 - i_strpos)));
	914	other_last = last;
	915	s = HOP3c(t, 1, strend);
	916	goto restart;
	917	}
	918	else {
	919	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	920	(long)(s - i_strpos)));
	921	other_last = s; /* Fix this later. --Hugo */
	922	s = saved_s;
	923	if (t == strpos)
	924	goto try_at_start;
	925	goto try_at_offset;
	926	}
	927	}
	928	}
	929
	930
	931	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	932
	933	DEBUG_OPTIMISE_MORE_r(
	934	PerlIO_printf(Perl_debug_log,
	935	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	936	(IV)prog->check_offset_min,
	937	(IV)prog->check_offset_max,
	938	(IV)(s-strpos),
	939	(IV)(t-strpos),
	940	(IV)(t-s),
	941	(IV)(strend-strpos)
	942	)
	943	);
	944
	945	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	946	&& (!utf8_target
	947	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	948	&& t > strpos)))
	949	{
	950	/* Fixed substring is found far enough so that the match
	951	cannot start at strpos. */
	952	try_at_offset:
	953	if (ml_anch && t[-1] != '\n') {
	954	/* Eventually fbm_*() should handle this, but often
	955	anchored_offset is not 0, so this check will not be wasted. */
	956	/* XXXX In the code below we prefer to look for "^" even in
	957	presence of anchored substrings. And we search even
	958	beyond the found float position. These pessimizations
	959	are historical artefacts only. */
	960	find_anchor:
	961	while (t < strend - prog->minlen) {
	962	if (*t == '\n') {
	963	if (t < check_at - prog->check_offset_min) {
	964	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	965	/* Since we moved from the found position,
	966	we definitely contradict the found anchored
	967	substr. Due to the above check we do not
	968	contradict "check" substr.
	969	Thus we can arrive here only if check substr
	970	is float. Redo checking for "other"=="fixed".
	971	*/
	972	strpos = t + 1;
	973	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	974	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	975	goto do_other_anchored;
	976	}
	977	/* We don't contradict the found floating substring. */
	978	/* XXXX Why not check for STCLASS? */
	979	s = t + 1;
	980	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	981	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	982	goto set_useful;
	983	}
	984	/* Position contradicts check-string */
	985	/* XXXX probably better to look for check-string
	986	than for "\n", so one should lower the limit for t? */
	987	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	988	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	989	other_last = strpos = s = t + 1;
	990	goto restart;
	991	}
	992	t++;
	993	}
	994	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	995	PL_colors[0], PL_colors[1]));
	996	goto fail_finish;
	997	}
	998	else {
	999	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1000	PL_colors[0], PL_colors[1]));
	1001	}
	1002	s = t;
	1003	set_useful:
	1004	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1005	}
	1006	else {
	1007	/* The found string does not prohibit matching at strpos,
	1008	- no optimization of calling REx engine can be performed,
	1009	unless it was an MBOL and we are not after MBOL,
	1010	or a future STCLASS check will fail this. */
	1011	try_at_start:
	1012	/* Even in this situation we may use MBOL flag if strpos is offset
	1013	wrt the start of the string. */
	1014	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	1015	&& (strpos != strbeg) && strpos[-1] != '\n'
	1016	/* May be due to an implicit anchor of m{.foo} /
	1017	&& !(prog->intflags & PREGf_IMPLICIT))
	1018	{
	1019	t = strpos;
	1020	goto find_anchor;
	1021	}
	1022	DEBUG_EXECUTE_r( if (ml_anch)
	1023	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1024	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1025	);
	1026	success_at_start:
	1027	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1028	&& (utf8_target ? (
	1029	prog->check_utf8 /* Could be deleted already */
	1030	&& --BmUSEFUL(prog->check_utf8) < 0
	1031	&& (prog->check_utf8 == prog->float_utf8)
	1032	) : (
	1033	prog->check_substr /* Could be deleted already */
	1034	&& --BmUSEFUL(prog->check_substr) < 0
	1035	&& (prog->check_substr == prog->float_substr)
	1036	)))
	1037	{
	1038	/* If flags & SOMETHING - do not do it many times on the same match */
	1039	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1040	/* XXX Does the destruction order has to change with utf8_target? */
	1041	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1042	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1043	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1044	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1045	check = NULL; /* abort */
	1046	s = strpos;
	1047	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1048	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1049	if (prog->intflags & PREGf_IMPLICIT)
	1050	prog->extflags &= ~RXf_ANCH_MBOL;
	1051	/* XXXX This is a remnant of the old implementation. It
	1052	looks wasteful, since now INTUIT can use many
	1053	other heuristics. */
	1054	prog->extflags &= ~RXf_USE_INTUIT;
	1055	/* XXXX What other flags might need to be cleared in this branch? */
	1056	}
	1057	else
	1058	s = strpos;
	1059	}
	1060
	1061	/* Last resort... */
	1062	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1063	/* trie stclasses are too expensive to use here, we are better off to
	1064	leave it to regmatch itself */
	1065	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1066	/* minlen == 0 is possible if regstclass is \b or \B,
	1067	and the fixed substr is ''$.
	1068	Since minlen is already taken into account, s+1 is before strend;
	1069	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1070	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1071	regstclass does not come from lookahead... */
	1072	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1073	This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
	1074	const U8* const str = (U8*)STRING(progi->regstclass);
	1075	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1076	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	1077	: 1);
	1078	char * endpos;
	1079	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1080	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1081	else if (prog->float_substr \|\| prog->float_utf8)
	1082	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1083	else
	1084	endpos= strend;
	1085
	1086	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
	1087	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
	1088
	1089	t = s;
	1090	s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
	1091	if (!s) {
	1092	#ifdef DEBUGGING
	1093	const char *what = NULL;
	1094	#endif
	1095	if (endpos == strend) {
	1096	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1097	"Could not match STCLASS...\n") );
	1098	goto fail;
	1099	}
	1100	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1101	"This position contradicts STCLASS...\n") );
	1102	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1103	goto fail;
	1104	/* Contradict one of substrings */
	1105	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1106	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1107	DEBUG_EXECUTE_r( what = "anchored" );
	1108	hop_and_restart:
	1109	s = HOP3c(t, 1, strend);
	1110	if (s + start_shift + end_shift > strend) {
	1111	/* XXXX Should be taken into account earlier? */
	1112	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1113	"Could not match STCLASS...\n") );
	1114	goto fail;
	1115	}
	1116	if (!check)
	1117	goto giveup;
	1118	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1119	"Looking for %s substr starting at offset %ld...\n",
	1120	what, (long)(s + start_shift - i_strpos)) );
	1121	goto restart;
	1122	}
	1123	/* Have both, check_string is floating */
	1124	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1125	goto retry_floating_check;
	1126	/* Recheck anchored substring, but not floating... */
	1127	s = check_at;
	1128	if (!check)
	1129	goto giveup;
	1130	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1131	"Looking for anchored substr starting at offset %ld...\n",
	1132	(long)(other_last - i_strpos)) );
	1133	goto do_other_anchored;
	1134	}
	1135	/* Another way we could have checked stclass at the
	1136	current position only: */
	1137	if (ml_anch) {
	1138	s = t = t + 1;
	1139	if (!check)
	1140	goto giveup;
	1141	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1142	"Looking for /%s^%s/m starting at offset %ld...\n",
	1143	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1144	goto try_at_offset;
	1145	}
	1146	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1147	goto fail;
	1148	/* Check is floating substring. */
	1149	retry_floating_check:
	1150	t = check_at - start_shift;
	1151	DEBUG_EXECUTE_r( what = "floating" );
	1152	goto hop_and_restart;
	1153	}
	1154	if (t != s) {
	1155	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1156	"By STCLASS: moving %ld --> %ld\n",
	1157	(long)(t - i_strpos), (long)(s - i_strpos))
	1158	);
	1159	}
	1160	else {
	1161	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1162	"Does not contradict STCLASS...\n");
	1163	);
	1164	}
	1165	}
	1166	giveup:
	1167	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1168	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1169	PL_colors[5], (long)(s - i_strpos)) );
	1170	return s;
	1171
	1172	fail_finish: /* Substring not found */
	1173	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1174	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1175	fail:
	1176	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1177	PL_colors[4], PL_colors[5]));
	1178	return NULL;
	1179	}
	1180
	1181	#define DECL_TRIE_TYPE(scan) \
	1182	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	1183	trie_type = (scan->flags != EXACT) \
	1184	? (utf8_target ? trie_utf8_fold : (UTF_PATTERN ? trie_latin_utf8_fold : trie_plain)) \
	1185	: (utf8_target ? trie_utf8 : trie_plain)
	1186
	1187	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
	1188	uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
	1189	switch (trie_type) { \
	1190	case trie_utf8_fold: \
	1191	if ( foldlen>0 ) { \
	1192	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1193	foldlen -= len; \
	1194	uscan += len; \
	1195	len=0; \
	1196	} else { \
	1197	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1198	uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
	1199	foldlen -= UNISKIP( uvc ); \
	1200	uscan = foldbuf + UNISKIP( uvc ); \
	1201	} \
	1202	break; \
	1203	case trie_latin_utf8_fold: \
	1204	if ( foldlen>0 ) { \
	1205	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1206	foldlen -= len; \
	1207	uscan += len; \
	1208	len=0; \
	1209	} else { \
	1210	len = 1; \
	1211	uvc = to_uni_fold( (U8)uc, foldbuf, &foldlen ); \
	1212	foldlen -= UNISKIP( uvc ); \
	1213	uscan = foldbuf + UNISKIP( uvc ); \
	1214	} \
	1215	break; \
	1216	case trie_utf8: \
	1217	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1218	break; \
	1219	case trie_plain: \
	1220	uvc = (UV)*uc; \
	1221	len = 1; \
	1222	} \
	1223	if (uvc < 256) { \
	1224	charid = trie->charmap[ uvc ]; \
	1225	} \
	1226	else { \
	1227	charid = 0; \
	1228	if (widecharmap) { \
	1229	SV** const svpp = hv_fetch(widecharmap, \
	1230	(char*)&uvc, sizeof(UV), 0); \
	1231	if (svpp) \
	1232	charid = (U16)SvIV(*svpp); \
	1233	} \
	1234	} \
	1235	} STMT_END
	1236
	1237	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1238	STMT_START { \
	1239	while (s <= e) { \
	1240	if ( (CoNd) \
	1241	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1242	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1243	goto got_it; \
	1244	s++; \
	1245	} \
	1246	} STMT_END
	1247
	1248	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1249	STMT_START { \
	1250	while (s + (uskip = UTF8SKIP(s)) <= strend) { \
	1251	CoDe \
	1252	s += uskip; \
	1253	} \
	1254	} STMT_END
	1255
	1256	#define REXEC_FBC_SCAN(CoDe) \
	1257	STMT_START { \
	1258	while (s < strend) { \
	1259	CoDe \
	1260	s++; \
	1261	} \
	1262	} STMT_END
	1263
	1264	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1265	REXEC_FBC_UTF8_SCAN( \
	1266	if (CoNd) { \
	1267	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1268	goto got_it; \
	1269	else \
	1270	tmp = doevery; \
	1271	} \
	1272	else \
	1273	tmp = 1; \
	1274	)
	1275
	1276	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1277	REXEC_FBC_SCAN( \
	1278	if (CoNd) { \
	1279	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1280	goto got_it; \
	1281	else \
	1282	tmp = doevery; \
	1283	} \
	1284	else \
	1285	tmp = 1; \
	1286	)
	1287
	1288	#define REXEC_FBC_TRYIT \
	1289	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1290	goto got_it
	1291
	1292	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1293	if (utf8_target) { \
	1294	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1295	} \
	1296	else { \
	1297	REXEC_FBC_CLASS_SCAN(CoNd); \
	1298	}
	1299
	1300	#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
	1301	if (utf8_target) { \
	1302	UtFpReLoAd; \
	1303	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1304	} \
	1305	else { \
	1306	REXEC_FBC_CLASS_SCAN(CoNd); \
	1307	}
	1308
	1309	#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
	1310	PL_reg_flags \|= RF_tainted; \
	1311	if (utf8_target) { \
	1312	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1313	} \
	1314	else { \
	1315	REXEC_FBC_CLASS_SCAN(CoNd); \
	1316	}
	1317
	1318	#define DUMP_EXEC_POS(li,s,doutf8) \
	1319	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1320
	1321
	1322	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1323	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1324	tmp = TEST_NON_UTF8(tmp); \
	1325	REXEC_FBC_UTF8_SCAN( \
	1326	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1327	tmp = !tmp; \
	1328	IF_SUCCESS; \
	1329	} \
	1330	else { \
	1331	IF_FAIL; \
	1332	} \
	1333	); \
	1334
	1335	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1336	if (s == PL_bostr) { \
	1337	tmp = '\n'; \
	1338	} \
	1339	else { \
	1340	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr); \
	1341	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
	1342	} \
	1343	tmp = TeSt1_UtF8; \
	1344	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1345	REXEC_FBC_UTF8_SCAN( \
	1346	if (tmp == ! (TeSt2_UtF8)) { \
	1347	tmp = !tmp; \
	1348	IF_SUCCESS; \
	1349	} \
	1350	else { \
	1351	IF_FAIL; \
	1352	} \
	1353	); \
	1354
	1355	/* The only difference between the BOUND and NBOUND cases is that
	1356	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1357	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1358	* with the other one being empty */
	1359	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1360	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1361
	1362	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1363	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1364
	1365	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1366	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1367
	1368	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1369	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1370
	1371
	1372	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1373	* be passed in completely with the variable name being tested, which isn't
	1374	* such a clean interface, but this is easier to read than it was before. We
	1375	* are looking for the boundary (or non-boundary between a word and non-word
	1376	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1377	* must be different. Find the "wordness" of the character just prior to this
	1378	* one, and compare it with the wordness of this one. If they differ, we have
	1379	* a boundary. At the beginning of the string, pretend that the previous
	1380	* character was a new-line */
	1381	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1382	if (utf8_target) { \
	1383	UTF8_CODE \
	1384	} \
	1385	else { /* Not utf8 */ \
	1386	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1387	tmp = TEST_NON_UTF8(tmp); \
	1388	REXEC_FBC_SCAN( \
	1389	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1390	tmp = !tmp; \
	1391	IF_SUCCESS; \
	1392	} \
	1393	else { \
	1394	IF_FAIL; \
	1395	} \
	1396	); \
	1397	} \
	1398	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s))) \
	1399	goto got_it;
	1400
	1401	/* We know what class REx starts with. Try to find this position... */
	1402	/* if reginfo is NULL, its a dryrun */
	1403	/* annoyingly all the vars in this routine have different names from their counterparts
	1404	in regmatch. /grrr */
	1405
	1406	STATIC char *
	1407	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1408	const char strend, regmatch_info reginfo)
	1409	{
	1410	dVAR;
	1411	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1412	char pat_string; / The pattern's exactish string */
	1413	char pat_end; / ptr to end char of pat_string */
	1414	re_fold_t folder; /* Function for computing non-utf8 folds */
	1415	const U8 fold_array; / array for folding ords < 256 */
	1416	STRLEN ln;
	1417	STRLEN lnc;
	1418	register STRLEN uskip;
	1419	U8 c1;
	1420	U8 c2;
	1421	char *e;
	1422	register I32 tmp = 1; /* Scratch variable? */
	1423	register const bool utf8_target = PL_reg_match_utf8;
	1424	UV utf8_fold_flags = 0;
	1425	RXi_GET_DECL(prog,progi);
	1426
	1427	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1428
	1429	/* We know what class it must start with. */
	1430	switch (OP(c)) {
	1431	case ANYOFV:
	1432	case ANYOF:
	1433	if (utf8_target \|\| OP(c) == ANYOFV) {
	1434	STRLEN inclasslen = strend - s;
	1435	REXEC_FBC_UTF8_CLASS_SCAN(
	1436	reginclass(prog, c, (U8*)s, &inclasslen, utf8_target));
	1437	}
	1438	else {
	1439	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
	1440	}
	1441	break;
	1442	case CANY:
	1443	REXEC_FBC_SCAN(
	1444	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1445	goto got_it;
	1446	else
	1447	tmp = doevery;
	1448	);
	1449	break;
	1450
	1451	case EXACTFA:
	1452	if (UTF_PATTERN \|\| utf8_target) {
	1453	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1454	goto do_exactf_utf8;
	1455	}
	1456	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1457	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1458	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1459
	1460	case EXACTFU:
	1461	if (UTF_PATTERN \|\| utf8_target) {
	1462	utf8_fold_flags = 0;
	1463	goto do_exactf_utf8;
	1464	}
	1465
	1466	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1467	* so we don't have to worry here about this single special case
	1468	* in the Latin1 range */
	1469	fold_array = PL_fold_latin1;
	1470	folder = foldEQ_latin1;
	1471	goto do_exactf_non_utf8;
	1472
	1473	case EXACTF:
	1474	if (UTF_PATTERN \|\| utf8_target) {
	1475	utf8_fold_flags = 0;
	1476	goto do_exactf_utf8;
	1477	}
	1478	fold_array = PL_fold;
	1479	folder = foldEQ;
	1480	goto do_exactf_non_utf8;
	1481
	1482	case EXACTFL:
	1483	if (UTF_PATTERN \|\| utf8_target) {
	1484	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	1485	goto do_exactf_utf8;
	1486	}
	1487	fold_array = PL_fold_locale;
	1488	folder = foldEQ_locale;
	1489
	1490	/* FALL THROUGH */
	1491
	1492	do_exactf_non_utf8: /* Neither pattern nor string are UTF8 */
	1493
	1494	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1495	* first character of the EXACTF* node and then, if necessary,
	1496	* case-insensitively compare the full text of the node. c1 is the
	1497	* first character. c2 is its fold. This logic will not work for
	1498	* Unicode semantics and the german sharp ss, which hence should
	1499	* not be compiled into a node that gets here. */
	1500	pat_string = STRING(c);
	1501	ln = STR_LEN(c); /* length to match in octets/bytes */
	1502
	1503	e = HOP3c(strend, -((I32)ln), s);
	1504
	1505	if (!reginfo && e < s) {
	1506	e = s; /* Due to minlen logic of intuit() */
	1507	}
	1508
	1509	c1 = *pat_string;
	1510	c2 = fold_array[c1];
	1511	if (c1 == c2) { /* If char and fold are the same */
	1512	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1513	}
	1514	else {
	1515	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1516	}
	1517	break;
	1518
	1519	do_exactf_utf8:
	1520
	1521	/* If one of the operands is in utf8, we can't use the simpler
	1522	* folding above, due to the fact that many different characters
	1523	* can have the same fold, or portion of a fold, or different-
	1524	* length fold */
	1525	pat_string = STRING(c);
	1526	ln = STR_LEN(c); /* length to match in octets/bytes */
	1527	pat_end = pat_string + ln;
	1528	lnc = (UTF_PATTERN) /* length to match in characters */
	1529	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1530	: ln;
	1531
	1532	e = HOP3c(strend, -((I32)lnc), s);
	1533
	1534	if (!reginfo && e < s) {
	1535	e = s; /* Due to minlen logic of intuit() */
	1536	}
	1537
	1538	while (s <= e) {
	1539	char my_strend= (char )strend;
	1540	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	1541	pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
	1542	&& (!reginfo \|\| regtry(reginfo, &s)) )
	1543	{
	1544	goto got_it;
	1545	}
	1546	s += UTF8SKIP(s);
	1547	}
	1548	break;
	1549	case BOUNDL:
	1550	PL_reg_flags \|= RF_tainted;
	1551	FBC_BOUND(isALNUM_LC,
	1552	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1553	isALNUM_LC_utf8((U8*)s));
	1554	break;
	1555	case NBOUNDL:
	1556	PL_reg_flags \|= RF_tainted;
	1557	FBC_NBOUND(isALNUM_LC,
	1558	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1559	isALNUM_LC_utf8((U8*)s));
	1560	break;
	1561	case BOUND:
	1562	FBC_BOUND(isWORDCHAR,
	1563	isALNUM_uni(tmp),
	1564	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1565	break;
	1566	case BOUNDA:
	1567	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1568	isWORDCHAR_A(tmp),
	1569	isWORDCHAR_A((U8*)s));
	1570	break;
	1571	case NBOUND:
	1572	FBC_NBOUND(isWORDCHAR,
	1573	isALNUM_uni(tmp),
	1574	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1575	break;
	1576	case NBOUNDA:
	1577	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1578	isWORDCHAR_A(tmp),
	1579	isWORDCHAR_A((U8*)s));
	1580	break;
	1581	case BOUNDU:
	1582	FBC_BOUND(isWORDCHAR_L1,
	1583	isALNUM_uni(tmp),
	1584	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1585	break;
	1586	case NBOUNDU:
	1587	FBC_NBOUND(isWORDCHAR_L1,
	1588	isALNUM_uni(tmp),
	1589	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1590	break;
	1591	case ALNUML:
	1592	REXEC_FBC_CSCAN_TAINT(
	1593	isALNUM_LC_utf8((U8*)s),
	1594	isALNUM_LC(*s)
	1595	);
	1596	break;
	1597	case ALNUMU:
	1598	REXEC_FBC_CSCAN_PRELOAD(
	1599	LOAD_UTF8_CHARCLASS_ALNUM(),
	1600	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1601	isWORDCHAR_L1((U8) *s)
	1602	);
	1603	break;
	1604	case ALNUM:
	1605	REXEC_FBC_CSCAN_PRELOAD(
	1606	LOAD_UTF8_CHARCLASS_ALNUM(),
	1607	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1608	isWORDCHAR((U8) *s)
	1609	);
	1610	break;
	1611	case ALNUMA:
	1612	/* Don't need to worry about utf8, as it can match only a single
	1613	* byte invariant character */
	1614	REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
	1615	break;
	1616	case NALNUMU:
	1617	REXEC_FBC_CSCAN_PRELOAD(
	1618	LOAD_UTF8_CHARCLASS_ALNUM(),
	1619	!swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1620	! isWORDCHAR_L1((U8) *s)
	1621	);
	1622	break;
	1623	case NALNUM:
	1624	REXEC_FBC_CSCAN_PRELOAD(
	1625	LOAD_UTF8_CHARCLASS_ALNUM(),
	1626	!swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target),
	1627	! isALNUM(*s)
	1628	);
	1629	break;
	1630	case NALNUMA:
	1631	REXEC_FBC_CSCAN(
	1632	!isWORDCHAR_A(*s),
	1633	!isWORDCHAR_A(*s)
	1634	);
	1635	break;
	1636	case NALNUML:
	1637	REXEC_FBC_CSCAN_TAINT(
	1638	!isALNUM_LC_utf8((U8*)s),
	1639	!isALNUM_LC(*s)
	1640	);
	1641	break;
	1642	case SPACEU:
	1643	REXEC_FBC_CSCAN_PRELOAD(
	1644	LOAD_UTF8_CHARCLASS_SPACE(),
	1645	s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target),
	1646	isSPACE_L1((U8) *s)
	1647	);
	1648	break;
	1649	case SPACE:
	1650	REXEC_FBC_CSCAN_PRELOAD(
	1651	LOAD_UTF8_CHARCLASS_SPACE(),
	1652	s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target),
	1653	isSPACE((U8) *s)
	1654	);
	1655	break;
	1656	case SPACEA:
	1657	/* Don't need to worry about utf8, as it can match only a single
	1658	* byte invariant character */
	1659	REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
	1660	break;
	1661	case SPACEL:
	1662	REXEC_FBC_CSCAN_TAINT(
	1663	isSPACE_LC_utf8((U8*)s),
	1664	isSPACE_LC(*s)
	1665	);
	1666	break;
	1667	case NSPACEU:
	1668	REXEC_FBC_CSCAN_PRELOAD(
	1669	LOAD_UTF8_CHARCLASS_SPACE(),
	1670	!( s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target)),
	1671	! isSPACE_L1((U8) *s)
	1672	);
	1673	break;
	1674	case NSPACE:
	1675	REXEC_FBC_CSCAN_PRELOAD(
	1676	LOAD_UTF8_CHARCLASS_SPACE(),
	1677	!(s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target)),
	1678	! isSPACE((U8) *s)
	1679	);
	1680	break;
	1681	case NSPACEA:
	1682	REXEC_FBC_CSCAN(
	1683	!isSPACE_A(*s),
	1684	!isSPACE_A(*s)
	1685	);
	1686	break;
	1687	case NSPACEL:
	1688	REXEC_FBC_CSCAN_TAINT(
	1689	!isSPACE_LC_utf8((U8*)s),
	1690	!isSPACE_LC(*s)
	1691	);
	1692	break;
	1693	case DIGIT:
	1694	REXEC_FBC_CSCAN_PRELOAD(
	1695	LOAD_UTF8_CHARCLASS_DIGIT(),
	1696	swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1697	isDIGIT(*s)
	1698	);
	1699	break;
	1700	case DIGITA:
	1701	/* Don't need to worry about utf8, as it can match only a single
	1702	* byte invariant character */
	1703	REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
	1704	break;
	1705	case DIGITL:
	1706	REXEC_FBC_CSCAN_TAINT(
	1707	isDIGIT_LC_utf8((U8*)s),
	1708	isDIGIT_LC(*s)
	1709	);
	1710	break;
	1711	case NDIGIT:
	1712	REXEC_FBC_CSCAN_PRELOAD(
	1713	LOAD_UTF8_CHARCLASS_DIGIT(),
	1714	!swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1715	!isDIGIT(*s)
	1716	);
	1717	break;
	1718	case NDIGITA:
	1719	REXEC_FBC_CSCAN(
	1720	!isDIGIT_A(*s),
	1721	!isDIGIT_A(*s)
	1722	);
	1723	break;
	1724	case NDIGITL:
	1725	REXEC_FBC_CSCAN_TAINT(
	1726	!isDIGIT_LC_utf8((U8*)s),
	1727	!isDIGIT_LC(*s)
	1728	);
	1729	break;
	1730	case LNBREAK:
	1731	REXEC_FBC_CSCAN(
	1732	is_LNBREAK_utf8(s),
	1733	is_LNBREAK_latin1(s)
	1734	);
	1735	break;
	1736	case VERTWS:
	1737	REXEC_FBC_CSCAN(
	1738	is_VERTWS_utf8(s),
	1739	is_VERTWS_latin1(s)
	1740	);
	1741	break;
	1742	case NVERTWS:
	1743	REXEC_FBC_CSCAN(
	1744	!is_VERTWS_utf8(s),
	1745	!is_VERTWS_latin1(s)
	1746	);
	1747	break;
	1748	case HORIZWS:
	1749	REXEC_FBC_CSCAN(
	1750	is_HORIZWS_utf8(s),
	1751	is_HORIZWS_latin1(s)
	1752	);
	1753	break;
	1754	case NHORIZWS:
	1755	REXEC_FBC_CSCAN(
	1756	!is_HORIZWS_utf8(s),
	1757	!is_HORIZWS_latin1(s)
	1758	);
	1759	break;
	1760	case AHOCORASICKC:
	1761	case AHOCORASICK:
	1762	{
	1763	DECL_TRIE_TYPE(c);
	1764	/* what trie are we using right now */
	1765	reg_ac_data *aho
	1766	= (reg_ac_data*)progi->data->data[ ARG( c ) ];
	1767	reg_trie_data *trie
	1768	= (reg_trie_data*)progi->data->data[ aho->trie ];
	1769	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1770
	1771	const char *last_start = strend - trie->minlen;
	1772	#ifdef DEBUGGING
	1773	const char *real_start = s;
	1774	#endif
	1775	STRLEN maxlen = trie->maxlen;
	1776	SV *sv_points;
	1777	U8 *points; / map of where we were in the input string
	1778	when reading a given char. For ASCII this
	1779	is unnecessary overhead as the relationship
	1780	is always 1:1, but for Unicode, especially
	1781	case folded Unicode this is not true. */
	1782	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1783	U8 *bitmap=NULL;
	1784
	1785
	1786	GET_RE_DEBUG_FLAGS_DECL;
	1787
	1788	/* We can't just allocate points here. We need to wrap it in
	1789	* an SV so it gets freed properly if there is a croak while
	1790	* running the match */
	1791	ENTER;
	1792	SAVETMPS;
	1793	sv_points=newSV(maxlen * sizeof(U8 *));
	1794	SvCUR_set(sv_points,
	1795	maxlen * sizeof(U8 *));
	1796	SvPOK_on(sv_points);
	1797	sv_2mortal(sv_points);
	1798	points=(U8**)SvPV_nolen(sv_points );
	1799	if ( trie_type != trie_utf8_fold
	1800	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1801	{
	1802	if (trie->bitmap)
	1803	bitmap=(U8*)trie->bitmap;
	1804	else
	1805	bitmap=(U8*)ANYOF_BITMAP(c);
	1806	}
	1807	/* this is the Aho-Corasick algorithm modified a touch
	1808	to include special handling for long "unknown char"
	1809	sequences. The basic idea being that we use AC as long
	1810	as we are dealing with a possible matching char, when
	1811	we encounter an unknown char (and we have not encountered
	1812	an accepting state) we scan forward until we find a legal
	1813	starting char.
	1814	AC matching is basically that of trie matching, except
	1815	that when we encounter a failing transition, we fall back
	1816	to the current states "fail state", and try the current char
	1817	again, a process we repeat until we reach the root state,
	1818	state 1, or a legal transition. If we fail on the root state
	1819	then we can either terminate if we have reached an accepting
	1820	state previously, or restart the entire process from the beginning
	1821	if we have not.
	1822
	1823	*/
	1824	while (s <= last_start) {
	1825	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1826	U8 uc = (U8)s;
	1827	U16 charid = 0;
	1828	U32 base = 1;
	1829	U32 state = 1;
	1830	UV uvc = 0;
	1831	STRLEN len = 0;
	1832	STRLEN foldlen = 0;
	1833	U8 uscan = (U8)NULL;
	1834	U8 *leftmost = NULL;
	1835	#ifdef DEBUGGING
	1836	U32 accepted_word= 0;
	1837	#endif
	1838	U32 pointpos = 0;
	1839
	1840	while ( state && uc <= (U8*)strend ) {
	1841	int failed=0;
	1842	U32 word = aho->states[ state ].wordnum;
	1843
	1844	if( state==1 ) {
	1845	if ( bitmap ) {
	1846	DEBUG_TRIE_EXECUTE_r(
	1847	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1848	dump_exec_pos( (char *)uc, c, strend, real_start,
	1849	(char *)uc, utf8_target );
	1850	PerlIO_printf( Perl_debug_log,
	1851	" Scanning for legal start char...\n");
	1852	}
	1853	);
	1854	if (utf8_target) {
	1855	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1856	uc += UTF8SKIP(uc);
	1857	}
	1858	} else {
	1859	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1860	uc++;
	1861	}
	1862	}
	1863	s= (char *)uc;
	1864	}
	1865	if (uc >(U8*)last_start) break;
	1866	}
	1867
	1868	if ( word ) {
	1869	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1870	if (!leftmost \|\| lpos < leftmost) {
	1871	DEBUG_r(accepted_word=word);
	1872	leftmost= lpos;
	1873	}
	1874	if (base==0) break;
	1875
	1876	}
	1877	points[pointpos++ % maxlen]= uc;
	1878	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	1879	uscan, len, uvc, charid, foldlen,
	1880	foldbuf, uniflags);
	1881	DEBUG_TRIE_EXECUTE_r({
	1882	dump_exec_pos( (char *)uc, c, strend, real_start,
	1883	s, utf8_target );
	1884	PerlIO_printf(Perl_debug_log,
	1885	" Charid:%3u CP:%4"UVxf" ",
	1886	charid, uvc);
	1887	});
	1888
	1889	do {
	1890	#ifdef DEBUGGING
	1891	word = aho->states[ state ].wordnum;
	1892	#endif
	1893	base = aho->states[ state ].trans.base;
	1894
	1895	DEBUG_TRIE_EXECUTE_r({
	1896	if (failed)
	1897	dump_exec_pos( (char *)uc, c, strend, real_start,
	1898	s, utf8_target );
	1899	PerlIO_printf( Perl_debug_log,
	1900	"%sState: %4"UVxf", word=%"UVxf,
	1901	failed ? " Fail transition to " : "",
	1902	(UV)state, (UV)word);
	1903	});
	1904	if ( base ) {
	1905	U32 tmp;
	1906	I32 offset;
	1907	if (charid &&
	1908	( ((offset = base + charid
	1909	- 1 - trie->uniquecharcount)) >= 0)
	1910	&& ((U32)offset < trie->lasttrans)
	1911	&& trie->trans[offset].check == state
	1912	&& (tmp=trie->trans[offset].next))
	1913	{
	1914	DEBUG_TRIE_EXECUTE_r(
	1915	PerlIO_printf( Perl_debug_log," - legal\n"));
	1916	state = tmp;
	1917	break;
	1918	}
	1919	else {
	1920	DEBUG_TRIE_EXECUTE_r(
	1921	PerlIO_printf( Perl_debug_log," - fail\n"));
	1922	failed = 1;
	1923	state = aho->fail[state];
	1924	}
	1925	}
	1926	else {
	1927	/* we must be accepting here */
	1928	DEBUG_TRIE_EXECUTE_r(
	1929	PerlIO_printf( Perl_debug_log," - accepting\n"));
	1930	failed = 1;
	1931	break;
	1932	}
	1933	} while(state);
	1934	uc += len;
	1935	if (failed) {
	1936	if (leftmost)
	1937	break;
	1938	if (!state) state = 1;
	1939	}
	1940	}
	1941	if ( aho->states[ state ].wordnum ) {
	1942	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	1943	if (!leftmost \|\| lpos < leftmost) {
	1944	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	1945	leftmost = lpos;
	1946	}
	1947	}
	1948	if (leftmost) {
	1949	s = (char*)leftmost;
	1950	DEBUG_TRIE_EXECUTE_r({
	1951	PerlIO_printf(
	1952	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	1953	(UV)accepted_word, (IV)(s - real_start)
	1954	);
	1955	});
	1956	if (!reginfo \|\| regtry(reginfo, &s)) {
	1957	FREETMPS;
	1958	LEAVE;
	1959	goto got_it;
	1960	}
	1961	s = HOPc(s,1);
	1962	DEBUG_TRIE_EXECUTE_r({
	1963	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	1964	});
	1965	} else {
	1966	DEBUG_TRIE_EXECUTE_r(
	1967	PerlIO_printf( Perl_debug_log,"No match.\n"));
	1968	break;
	1969	}
	1970	}
	1971	FREETMPS;
	1972	LEAVE;
	1973	}
	1974	break;
	1975	default:
	1976	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	1977	break;
	1978	}
	1979	return 0;
	1980	got_it:
	1981	return s;
	1982	}
	1983
	1984
	1985	/*
	1986	- regexec_flags - match a regexp against a string
	1987	*/
	1988	I32
	1989	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, register char strend,
	1990	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	1991	/* strend: pointer to null at end of string */
	1992	/* strbeg: real beginning of string */
	1993	/* minend: end of match must be >=minend after stringarg. */
	1994	/* data: May be used for some additional optimizations.
	1995	Currently its only used, with a U32 cast, for transmitting
	1996	the ganch offset when doing a /g match. This will change */
	1997	/* nosave: For optimizations. */
	1998	{
	1999	dVAR;
	2000	struct regexp const prog = (struct regexp )SvANY(rx);
	2001	/register/ char *s;
	2002	register regnode *c;
	2003	/register/ char *startpos = stringarg;
	2004	I32 minlen; /* must match at least this many chars */
	2005	I32 dontbother = 0; /* how many characters not to try at end */
	2006	I32 end_shift = 0; /* Same for the end. / / CC */
	2007	I32 scream_pos = -1; /* Internal iterator of scream. */
	2008	char *scream_olds = NULL;
	2009	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2010	I32 multiline;
	2011	RXi_GET_DECL(prog,progi);
	2012	regmatch_info reginfo; /* create some info to pass to regtry etc */
	2013	regexp_paren_pair *swap = NULL;
	2014	GET_RE_DEBUG_FLAGS_DECL;
	2015
	2016	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2017	PERL_UNUSED_ARG(data);
	2018
	2019	/* Be paranoid... */
	2020	if (prog == NULL \|\| startpos == NULL) {
	2021	Perl_croak(aTHX_ "NULL regexp parameter");
	2022	return 0;
	2023	}
	2024
	2025	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2026	reginfo.prog = rx; /* Yes, sorry that this is confusing. */
	2027
	2028	RX_MATCH_UTF8_set(rx, utf8_target);
	2029	DEBUG_EXECUTE_r(
	2030	debug_start_match(rx, utf8_target, startpos, strend,
	2031	"Matching");
	2032	);
	2033
	2034	minlen = prog->minlen;
	2035
	2036	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2037	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2038	"String too short [regexec_flags]...\n"));
	2039	goto phooey;
	2040	}
	2041
	2042
	2043	/* Check validity of program. */
	2044	if (UCHARAT(progi->program) != REG_MAGIC) {
	2045	Perl_croak(aTHX_ "corrupted regexp program");
	2046	}
	2047
	2048	PL_reg_flags = 0;
	2049	PL_reg_eval_set = 0;
	2050	PL_reg_maxiter = 0;
	2051
	2052	if (RX_UTF8(rx))
	2053	PL_reg_flags \|= RF_utf8;
	2054
	2055	/* Mark beginning of line for ^ and lookbehind. */
	2056	reginfo.bol = startpos; /* XXX not used ??? */
	2057	PL_bostr = strbeg;
	2058	reginfo.sv = sv;
	2059
	2060	/* Mark end of line for $ (and such) */
	2061	PL_regeol = strend;
	2062
	2063	/* see how far we have to get to not match where we matched before */
	2064	reginfo.till = startpos+minend;
	2065
	2066	/* If there is a "must appear" string, look for it. */
	2067	s = startpos;
	2068
	2069	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	2070	MAGIC *mg;
	2071	if (flags & REXEC_IGNOREPOS){ /* Means: check only at start */
	2072	reginfo.ganch = startpos + prog->gofs;
	2073	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2074	"GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
	2075	} else if (sv && SvTYPE(sv) >= SVt_PVMG
	2076	&& SvMAGIC(sv)
	2077	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	2078	&& mg->mg_len >= 0) {
	2079	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	2080	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2081	"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
	2082
	2083	if (prog->extflags & RXf_ANCH_GPOS) {
	2084	if (s > reginfo.ganch)
	2085	goto phooey;
	2086	s = reginfo.ganch - prog->gofs;
	2087	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2088	"GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
	2089	if (s < strbeg)
	2090	goto phooey;
	2091	}
	2092	}
	2093	else if (data) {
	2094	reginfo.ganch = strbeg + PTR2UV(data);
	2095	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2096	"GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
	2097
	2098	} else { /* pos() not defined */
	2099	reginfo.ganch = strbeg;
	2100	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2101	"GPOS: reginfo.ganch = strbeg\n"));
	2102	}
	2103	}
	2104	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2105	/* We have to be careful. If the previous successful match
	2106	was from this regex we don't want a subsequent partially
	2107	successful match to clobber the old results.
	2108	So when we detect this possibility we add a swap buffer
	2109	to the re, and switch the buffer each match. If we fail
	2110	we switch it back, otherwise we leave it swapped.
	2111	*/
	2112	swap = prog->offs;
	2113	/* do we need a save destructor here for eval dies? */
	2114	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2115	}
	2116	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	2117	re_scream_pos_data d;
	2118
	2119	d.scream_olds = &scream_olds;
	2120	d.scream_pos = &scream_pos;
	2121	s = re_intuit_start(rx, sv, s, strend, flags, &d);
	2122	if (!s) {
	2123	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	2124	goto phooey; /* not present */
	2125	}
	2126	}
	2127
	2128
	2129
	2130	/* Simplest case: anchored match need be tried only once. */
	2131	/* [unless only anchor is BOL and multiline is set] */
	2132	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2133	if (s == startpos && regtry(&reginfo, &startpos))
	2134	goto got_it;
	2135	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2136	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2137	{
	2138	char *end;
	2139
	2140	if (minlen)
	2141	dontbother = minlen - 1;
	2142	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2143	/* for multiline we only have to try after newlines */
	2144	if (prog->check_substr \|\| prog->check_utf8) {
	2145	/* because of the goto we can not easily reuse the macros for bifurcating the
	2146	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2147	if (utf8_target) {
	2148	if (s == startpos)
	2149	goto after_try_utf8;
	2150	while (1) {
	2151	if (regtry(&reginfo, &s)) {
	2152	goto got_it;
	2153	}
	2154	after_try_utf8:
	2155	if (s > end) {
	2156	goto phooey;
	2157	}
	2158	if (prog->extflags & RXf_USE_INTUIT) {
	2159	s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
	2160	if (!s) {
	2161	goto phooey;
	2162	}
	2163	}
	2164	else {
	2165	s += UTF8SKIP(s);
	2166	}
	2167	}
	2168	} /* end search for check string in unicode */
	2169	else {
	2170	if (s == startpos) {
	2171	goto after_try_latin;
	2172	}
	2173	while (1) {
	2174	if (regtry(&reginfo, &s)) {
	2175	goto got_it;
	2176	}
	2177	after_try_latin:
	2178	if (s > end) {
	2179	goto phooey;
	2180	}
	2181	if (prog->extflags & RXf_USE_INTUIT) {
	2182	s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
	2183	if (!s) {
	2184	goto phooey;
	2185	}
	2186	}
	2187	else {
	2188	s++;
	2189	}
	2190	}
	2191	} /* end search for check string in latin*/
	2192	} /* end search for check string */
	2193	else { /* search for newline */
	2194	if (s > startpos) {
	2195	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2196	s--;
	2197	}
	2198	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2199	while (s < end) {
	2200	if (s++ == '\n') { / don't need PL_utf8skip here */
	2201	if (regtry(&reginfo, &s))
	2202	goto got_it;
	2203	}
	2204	}
	2205	} /* end search for newline */
	2206	} /* end anchored/multiline check string search */
	2207	goto phooey;
	2208	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2209	{
	2210	/* the warning about reginfo.ganch being used without initialization
	2211	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	2212	and we only enter this block when the same bit is set. */
	2213	char *tmp_s = reginfo.ganch - prog->gofs;
	2214
	2215	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
	2216	goto got_it;
	2217	goto phooey;
	2218	}
	2219
	2220	/* Messy cases: unanchored match. */
	2221	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2222	/* we have /x+whatever/ */
	2223	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
	2224	char ch;
	2225	#ifdef DEBUGGING
	2226	int did_match = 0;
	2227	#endif
	2228	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2229	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2230	ch = SvPVX_const(utf8_target ? prog->anchored_utf8 : prog->anchored_substr)[0];
	2231
	2232	if (utf8_target) {
	2233	REXEC_FBC_SCAN(
	2234	if (*s == ch) {
	2235	DEBUG_EXECUTE_r( did_match = 1 );
	2236	if (regtry(&reginfo, &s)) goto got_it;
	2237	s += UTF8SKIP(s);
	2238	while (s < strend && *s == ch)
	2239	s += UTF8SKIP(s);
	2240	}
	2241	);
	2242	}
	2243	else {
	2244	REXEC_FBC_SCAN(
	2245	if (*s == ch) {
	2246	DEBUG_EXECUTE_r( did_match = 1 );
	2247	if (regtry(&reginfo, &s)) goto got_it;
	2248	s++;
	2249	while (s < strend && *s == ch)
	2250	s++;
	2251	}
	2252	);
	2253	}
	2254	DEBUG_EXECUTE_r(if (!did_match)
	2255	PerlIO_printf(Perl_debug_log,
	2256	"Did not find anchored character...\n")
	2257	);
	2258	}
	2259	else if (prog->anchored_substr != NULL
	2260	\|\| prog->anchored_utf8 != NULL
	2261	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2262	&& prog->float_max_offset < strend - s)) {
	2263	SV *must;
	2264	I32 back_max;
	2265	I32 back_min;
	2266	char *last;
	2267	char last1; / Last position checked before */
	2268	#ifdef DEBUGGING
	2269	int did_match = 0;
	2270	#endif
	2271	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2272	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2273	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2274	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	2275	back_max = back_min = prog->anchored_offset;
	2276	} else {
	2277	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2278	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2279	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	2280	back_max = prog->float_max_offset;
	2281	back_min = prog->float_min_offset;
	2282	}
	2283
	2284
	2285	if (must == &PL_sv_undef)
	2286	/* could not downgrade utf8 check substring, so must fail */
	2287	goto phooey;
	2288
	2289	if (back_min<0) {
	2290	last = strend;
	2291	} else {
	2292	last = HOP3c(strend, /* Cannot start after this */
	2293	-(I32)(CHR_SVLEN(must)
	2294	- (SvTAIL(must) != 0) + back_min), strbeg);
	2295	}
	2296	if (s > PL_bostr)
	2297	last1 = HOPc(s, -1);
	2298	else
	2299	last1 = s - 1; /* bogus */
	2300
	2301	/* XXXX check_substr already used to find "s", can optimize if
	2302	check_substr==must. */
	2303	scream_pos = -1;
	2304	dontbother = end_shift;
	2305	strend = HOPc(strend, -dontbother);
	2306	while ( (s <= last) &&
	2307	((flags & REXEC_SCREAM) && SvSCREAM(sv)
	2308	? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
	2309	end_shift, &scream_pos, 0))
	2310	: (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2311	(unsigned char*)strend, must,
	2312	multiline ? FBMrf_MULTILINE : 0))) ) {
	2313	/* we may be pointing at the wrong string */
	2314	if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
	2315	s = strbeg + (s - SvPVX_const(sv));
	2316	DEBUG_EXECUTE_r( did_match = 1 );
	2317	if (HOPc(s, -back_max) > last1) {
	2318	last1 = HOPc(s, -back_min);
	2319	s = HOPc(s, -back_max);
	2320	}
	2321	else {
	2322	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2323
	2324	last1 = HOPc(s, -back_min);
	2325	s = t;
	2326	}
	2327	if (utf8_target) {
	2328	while (s <= last1) {
	2329	if (regtry(&reginfo, &s))
	2330	goto got_it;
	2331	s += UTF8SKIP(s);
	2332	}
	2333	}
	2334	else {
	2335	while (s <= last1) {
	2336	if (regtry(&reginfo, &s))
	2337	goto got_it;
	2338	s++;
	2339	}
	2340	}
	2341	}
	2342	DEBUG_EXECUTE_r(if (!did_match) {
	2343	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2344	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2345	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2346	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2347	? "anchored" : "floating"),
	2348	quoted, RE_SV_TAIL(must));
	2349	});
	2350	goto phooey;
	2351	}
	2352	else if ( (c = progi->regstclass) ) {
	2353	if (minlen) {
	2354	const OPCODE op = OP(progi->regstclass);
	2355	/* don't bother with what can't match */
	2356	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2357	strend = HOPc(strend, -(minlen - 1));
	2358	}
	2359	DEBUG_EXECUTE_r({
	2360	SV * const prop = sv_newmortal();
	2361	regprop(prog, prop, c);
	2362	{
	2363	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2364	s,strend-s,60);
	2365	PerlIO_printf(Perl_debug_log,
	2366	"Matching stclass %.*s against %s (%d bytes)\n",
	2367	(int)SvCUR(prop), SvPVX_const(prop),
	2368	quoted, (int)(strend - s));
	2369	}
	2370	});
	2371	if (find_byclass(prog, c, s, strend, &reginfo))
	2372	goto got_it;
	2373	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2374	}
	2375	else {
	2376	dontbother = 0;
	2377	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2378	/* Trim the end. */
	2379	char *last;
	2380	SV* float_real;
	2381
	2382	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2383	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2384	float_real = utf8_target ? prog->float_utf8 : prog->float_substr;
	2385
	2386	if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) {
	2387	last = screaminstr(sv, float_real, s - strbeg,
	2388	end_shift, &scream_pos, 1); /* last one */
	2389	if (!last)
	2390	last = scream_olds; /* Only one occurrence. */
	2391	/* we may be pointing at the wrong string */
	2392	else if (RXp_MATCH_COPIED(prog))
	2393	s = strbeg + (s - SvPVX_const(sv));
	2394	}
	2395	else {
	2396	STRLEN len;
	2397	const char * const little = SvPV_const(float_real, len);
	2398
	2399	if (SvTAIL(float_real)) {
	2400	if (memEQ(strend - len + 1, little, len - 1))
	2401	last = strend - len + 1;
	2402	else if (!multiline)
	2403	last = memEQ(strend - len, little, len)
	2404	? strend - len : NULL;
	2405	else
	2406	goto find_last;
	2407	} else {
	2408	find_last:
	2409	if (len)
	2410	last = rninstr(s, strend, little, little + len);
	2411	else
	2412	last = strend; /* matching "$" */
	2413	}
	2414	}
	2415	if (last == NULL) {
	2416	DEBUG_EXECUTE_r(
	2417	PerlIO_printf(Perl_debug_log,
	2418	"%sCan't trim the tail, match fails (should not happen)%s\n",
	2419	PL_colors[4], PL_colors[5]));
	2420	goto phooey; /* Should not happen! */
	2421	}
	2422	dontbother = strend - last + prog->float_min_offset;
	2423	}
	2424	if (minlen && (dontbother < minlen))
	2425	dontbother = minlen - 1;
	2426	strend -= dontbother; /* this one's always in bytes! */
	2427	/* We don't know much -- general case. */
	2428	if (utf8_target) {
	2429	for (;;) {
	2430	if (regtry(&reginfo, &s))
	2431	goto got_it;
	2432	if (s >= strend)
	2433	break;
	2434	s += UTF8SKIP(s);
	2435	};
	2436	}
	2437	else {
	2438	do {
	2439	if (regtry(&reginfo, &s))
	2440	goto got_it;
	2441	} while (s++ < strend);
	2442	}
	2443	}
	2444
	2445	/* Failure. */
	2446	goto phooey;
	2447
	2448	got_it:
	2449	Safefree(swap);
	2450	RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
	2451
	2452	if (PL_reg_eval_set)
	2453	restore_pos(aTHX_ prog);
	2454	if (RXp_PAREN_NAMES(prog))
	2455	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2456
	2457	/* make sure $`, $&, $', and $digit will work later */
	2458	if ( !(flags & REXEC_NOT_FIRST) ) {
	2459	RX_MATCH_COPY_FREE(rx);
	2460	if (flags & REXEC_COPY_STR) {
	2461	const I32 i = PL_regeol - startpos + (stringarg - strbeg);
	2462	#ifdef PERL_OLD_COPY_ON_WRITE
	2463	if ((SvIsCOW(sv)
	2464	\|\| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
	2465	if (DEBUG_C_TEST) {
	2466	PerlIO_printf(Perl_debug_log,
	2467	"Copy on write: regexp capture, type %d\n",
	2468	(int) SvTYPE(sv));
	2469	}
	2470	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2471	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2472	assert (SvPOKp(prog->saved_copy));
	2473	} else
	2474	#endif
	2475	{
	2476	RX_MATCH_COPIED_on(rx);
	2477	s = savepvn(strbeg, i);
	2478	prog->subbeg = s;
	2479	}
	2480	prog->sublen = i;
	2481	}
	2482	else {
	2483	prog->subbeg = strbeg;
	2484	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2485	}
	2486	}
	2487
	2488	return 1;
	2489
	2490	phooey:
	2491	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2492	PL_colors[4], PL_colors[5]));
	2493	if (PL_reg_eval_set)
	2494	restore_pos(aTHX_ prog);
	2495	if (swap) {
	2496	/* we failed :-( roll it back */
	2497	Safefree(prog->offs);
	2498	prog->offs = swap;
	2499	}
	2500
	2501	return 0;
	2502	}
	2503
	2504
	2505	/*
	2506	- regtry - try match at specific point
	2507	*/
	2508	STATIC I32 /* 0 failure, 1 success */
	2509	S_regtry(pTHX_ regmatch_info reginfo, char *startpos)
	2510	{
	2511	dVAR;
	2512	CHECKPOINT lastcp;
	2513	REGEXP *const rx = reginfo->prog;
	2514	regexp const prog = (struct regexp )SvANY(rx);
	2515	RXi_GET_DECL(prog,progi);
	2516	GET_RE_DEBUG_FLAGS_DECL;
	2517
	2518	PERL_ARGS_ASSERT_REGTRY;
	2519
	2520	reginfo->cutpoint=NULL;
	2521
	2522	if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
	2523	MAGIC *mg;
	2524
	2525	PL_reg_eval_set = RS_init;
	2526	DEBUG_EXECUTE_r(DEBUG_s(
	2527	PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
	2528	(IV)(PL_stack_sp - PL_stack_base));
	2529	));
	2530	SAVESTACK_CXPOS();
	2531	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
	2532	/* Otherwise OP_NEXTSTATE will free whatever on stack now. */
	2533	SAVETMPS;
	2534	/* Apparently this is not needed, judging by wantarray. */
	2535	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
	2536	cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
	2537
	2538	if (reginfo->sv) {
	2539	/* Make $_ available to executed code. */
	2540	if (reginfo->sv != DEFSV) {
	2541	SAVE_DEFSV;
	2542	DEFSV_set(reginfo->sv);
	2543	}
	2544
	2545	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2546	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2547	/* prepare for quick setting of pos */
	2548	#ifdef PERL_OLD_COPY_ON_WRITE
	2549	if (SvIsCOW(reginfo->sv))
	2550	sv_force_normal_flags(reginfo->sv, 0);
	2551	#endif
	2552	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2553	&PL_vtbl_mglob, NULL, 0);
	2554	mg->mg_len = -1;
	2555	}
	2556	PL_reg_magic = mg;
	2557	PL_reg_oldpos = mg->mg_len;
	2558	SAVEDESTRUCTOR_X(restore_pos, prog);
	2559	}
	2560	if (!PL_reg_curpm) {
	2561	Newxz(PL_reg_curpm, 1, PMOP);
	2562	#ifdef USE_ITHREADS
	2563	{
	2564	SV* const repointer = &PL_sv_undef;
	2565	/* this regexp is also owned by the new PL_reg_curpm, which
	2566	will try to free it. */
	2567	av_push(PL_regex_padav, repointer);
	2568	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2569	PL_regex_pad = AvARRAY(PL_regex_padav);
	2570	}
	2571	#endif
	2572	}
	2573	#ifdef USE_ITHREADS
	2574	/* It seems that non-ithreads works both with and without this code.
	2575	So for efficiency reasons it seems best not to have the code
	2576	compiled when it is not needed. */
	2577	/* This is safe against NULLs: */
	2578	ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
	2579	/* PM_reg_curpm owns a reference to this regexp. */
	2580	(void)ReREFCNT_inc(rx);
	2581	#endif
	2582	PM_SETRE(PL_reg_curpm, rx);
	2583	PL_reg_oldcurpm = PL_curpm;
	2584	PL_curpm = PL_reg_curpm;
	2585	if (RXp_MATCH_COPIED(prog)) {
	2586	/* Here is a serious problem: we cannot rewrite subbeg,
	2587	since it may be needed if this match fails. Thus
	2588	$` inside (?{}) could fail... */
	2589	PL_reg_oldsaved = prog->subbeg;
	2590	PL_reg_oldsavedlen = prog->sublen;
	2591	#ifdef PERL_OLD_COPY_ON_WRITE
	2592	PL_nrs = prog->saved_copy;
	2593	#endif
	2594	RXp_MATCH_COPIED_off(prog);
	2595	}
	2596	else
	2597	PL_reg_oldsaved = NULL;
	2598	prog->subbeg = PL_bostr;
	2599	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2600	}
	2601	DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
	2602	prog->offs[0].start = *startpos - PL_bostr;
	2603	PL_reginput = *startpos;
	2604	PL_reglastparen = &prog->lastparen;
	2605	PL_reglastcloseparen = &prog->lastcloseparen;
	2606	prog->lastparen = 0;
	2607	prog->lastcloseparen = 0;
	2608	PL_regsize = 0;
	2609	PL_regoffs = prog->offs;
	2610	if (PL_reg_start_tmpl <= prog->nparens) {
	2611	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
	2612	if(PL_reg_start_tmp)
	2613	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2614	else
	2615	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2616	}
	2617
	2618	/* XXXX What this code is doing here?!!! There should be no need
	2619	to do this again and again, PL_reglastparen should take care of
	2620	this! --ilya*/
	2621
	2622	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2623	* Actually, the code in regcppop() (which Ilya may be meaning by
	2624	* PL_reglastparen), is not needed at all by the test suite
	2625	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2626	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2627	* Meanwhile, this code is needed for the
	2628	* above-mentioned test suite tests to succeed. The common theme
	2629	* on those tests seems to be returning null fields from matches.
	2630	* --jhi updated by dapm */
	2631	#if 1
	2632	if (prog->nparens) {
	2633	regexp_paren_pair *pp = PL_regoffs;
	2634	register I32 i;
	2635	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
	2636	++pp;
	2637	pp->start = -1;
	2638	pp->end = -1;
	2639	}
	2640	}
	2641	#endif
	2642	REGCP_SET(lastcp);
	2643	if (regmatch(reginfo, progi->program + 1)) {
	2644	PL_regoffs[0].end = PL_reginput - PL_bostr;
	2645	return 1;
	2646	}
	2647	if (reginfo->cutpoint)
	2648	*startpos= reginfo->cutpoint;
	2649	REGCP_UNWIND(lastcp);
	2650	return 0;
	2651	}
	2652
	2653
	2654	#define sayYES goto yes
	2655	#define sayNO goto no
	2656	#define sayNO_SILENT goto no_silent
	2657
	2658	/* we dont use STMT_START/END here because it leads to
	2659	"unreachable code" warnings, which are bogus, but distracting. */
	2660	#define CACHEsayNO \
	2661	if (ST.cache_mask) \
	2662	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2663	sayNO
	2664
	2665	/* this is used to determine how far from the left messages like
	2666	'failed...' are printed. It should be set such that messages
	2667	are inline with the regop output that created them.
	2668	*/
	2669	#define REPORT_CODE_OFF 32
	2670
	2671
	2672	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2673	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2674
	2675	#define SLAB_FIRST(s) (&(s)->states[0])
	2676	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2677
	2678	/* grab a new slab and return the first slot in it */
	2679
	2680	STATIC regmatch_state *
	2681	S_push_slab(pTHX)
	2682	{
	2683	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2684	dMY_CXT;
	2685	#endif
	2686	regmatch_slab *s = PL_regmatch_slab->next;
	2687	if (!s) {
	2688	Newx(s, 1, regmatch_slab);
	2689	s->prev = PL_regmatch_slab;
	2690	s->next = NULL;
	2691	PL_regmatch_slab->next = s;
	2692	}
	2693	PL_regmatch_slab = s;
	2694	return SLAB_FIRST(s);
	2695	}
	2696
	2697
	2698	/* push a new state then goto it */
	2699
	2700	#define PUSH_STATE_GOTO(state, node) \
	2701	scan = node; \
	2702	st->resume_state = state; \
	2703	goto push_state;
	2704
	2705	/* push a new state with success backtracking, then goto it */
	2706
	2707	#define PUSH_YES_STATE_GOTO(state, node) \
	2708	scan = node; \
	2709	st->resume_state = state; \
	2710	goto push_yes_state;
	2711
	2712
	2713
	2714	/*
	2715
	2716	regmatch() - main matching routine
	2717
	2718	This is basically one big switch statement in a loop. We execute an op,
	2719	set 'next' to point the next op, and continue. If we come to a point which
	2720	we may need to backtrack to on failure such as (A\|B\|C), we push a
	2721	backtrack state onto the backtrack stack. On failure, we pop the top
	2722	state, and re-enter the loop at the state indicated. If there are no more
	2723	states to pop, we return failure.
	2724
	2725	Sometimes we also need to backtrack on success; for example /A+/, where
	2726	after successfully matching one A, we need to go back and try to
	2727	match another one; similarly for lookahead assertions: if the assertion
	2728	completes successfully, we backtrack to the state just before the assertion
	2729	and then carry on. In these cases, the pushed state is marked as
	2730	'backtrack on success too'. This marking is in fact done by a chain of
	2731	pointers, each pointing to the previous 'yes' state. On success, we pop to
	2732	the nearest yes state, discarding any intermediate failure-only states.
	2733	Sometimes a yes state is pushed just to force some cleanup code to be
	2734	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	2735	it to free the inner regex.
	2736
	2737	Note that failure backtracking rewinds the cursor position, while
	2738	success backtracking leaves it alone.
	2739
	2740	A pattern is complete when the END op is executed, while a subpattern
	2741	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	2742	ops trigger the "pop to last yes state if any, otherwise return true"
	2743	behaviour.
	2744
	2745	A common convention in this function is to use A and B to refer to the two
	2746	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	2747	the subpattern to be matched possibly multiple times, while B is the entire
	2748	rest of the pattern. Variable and state names reflect this convention.
	2749
	2750	The states in the main switch are the union of ops and failure/success of
	2751	substates associated with with that op. For example, IFMATCH is the op
	2752	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	2753	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	2754	successfully matched A and IFMATCH_A_fail is a state saying that we have
	2755	just failed to match A. Resume states always come in pairs. The backtrack
	2756	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	2757	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	2758	on success or failure.
	2759
	2760	The struct that holds a backtracking state is actually a big union, with
	2761	one variant for each major type of op. The variable st points to the
	2762	top-most backtrack struct. To make the code clearer, within each
	2763	block of code we #define ST to alias the relevant union.
	2764
	2765	Here's a concrete example of a (vastly oversimplified) IFMATCH
	2766	implementation:
	2767
	2768	switch (state) {
	2769	....
	2770
	2771	#define ST st->u.ifmatch
	2772
	2773	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2774	ST.foo = ...; // some state we wish to save
	2775	...
	2776	// push a yes backtrack state with a resume value of
	2777	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	2778	// first node of A:
	2779	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
	2780	// NOTREACHED
	2781
	2782	case IFMATCH_A: // we have successfully executed A; now continue with B
	2783	next = B;
	2784	bar = ST.foo; // do something with the preserved value
	2785	break;
	2786
	2787	case IFMATCH_A_fail: // A failed, so the assertion failed
	2788	...; // do some housekeeping, then ...
	2789	sayNO; // propagate the failure
	2790
	2791	#undef ST
	2792
	2793	...
	2794	}
	2795
	2796	For any old-timers reading this who are familiar with the old recursive
	2797	approach, the code above is equivalent to:
	2798
	2799	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2800	{
	2801	int foo = ...
	2802	...
	2803	if (regmatch(A)) {
	2804	next = B;
	2805	bar = foo;
	2806	break;
	2807	}
	2808	...; // do some housekeeping, then ...
	2809	sayNO; // propagate the failure
	2810	}
	2811
	2812	The topmost backtrack state, pointed to by st, is usually free. If you
	2813	want to claim it, populate any ST.foo fields in it with values you wish to
	2814	save, then do one of
	2815
	2816	PUSH_STATE_GOTO(resume_state, node);
	2817	PUSH_YES_STATE_GOTO(resume_state, node);
	2818
	2819	which sets that backtrack state's resume value to 'resume_state', pushes a
	2820	new free entry to the top of the backtrack stack, then goes to 'node'.
	2821	On backtracking, the free slot is popped, and the saved state becomes the
	2822	new free state. An ST.foo field in this new top state can be temporarily
	2823	accessed to retrieve values, but once the main loop is re-entered, it
	2824	becomes available for reuse.
	2825
	2826	Note that the depth of the backtrack stack constantly increases during the
	2827	left-to-right execution of the pattern, rather than going up and down with
	2828	the pattern nesting. For example the stack is at its maximum at Z at the
	2829	end of the pattern, rather than at X in the following:
	2830
	2831	/(((X)+)+)+....(Y)+....Z/
	2832
	2833	The only exceptions to this are lookahead/behind assertions and the cut,
	2834	(?>A), which pop all the backtrack states associated with A before
	2835	continuing.
	2836
	2837	Backtrack state structs are allocated in slabs of about 4K in size.
	2838	PL_regmatch_state and st always point to the currently active state,
	2839	and PL_regmatch_slab points to the slab currently containing
	2840	PL_regmatch_state. The first time regmatch() is called, the first slab is
	2841	allocated, and is never freed until interpreter destruction. When the slab
	2842	is full, a new one is allocated and chained to the end. At exit from
	2843	regmatch(), slabs allocated since entry are freed.
	2844
	2845	*/
	2846
	2847
	2848	#define DEBUG_STATE_pp(pp) \
	2849	DEBUG_STATE_r({ \
	2850	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	2851	PerlIO_printf(Perl_debug_log, \
	2852	" %*s"pp" %s%s%s%s%s\n", \
	2853	depth*2, "", \
	2854	PL_reg_name[st->resume_state], \
	2855	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	2856	((st==yes_state) ? "Y" : ""), \
	2857	((st==mark_state) ? "M" : ""), \
	2858	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	2859	); \
	2860	});
	2861
	2862
	2863	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	2864
	2865	#ifdef DEBUGGING
	2866
	2867	STATIC void
	2868	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	2869	const char start, const char end, const char *blurb)
	2870	{
	2871	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	2872
	2873	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	2874
	2875	if (!PL_colorset)
	2876	reginitcolors();
	2877	{
	2878	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	2879	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	2880
	2881	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	2882	start, end - start, 60);
	2883
	2884	PerlIO_printf(Perl_debug_log,
	2885	"%s%s REx%s %s against %s\n",
	2886	PL_colors[4], blurb, PL_colors[5], s0, s1);
	2887
	2888	if (utf8_target\|\|utf8_pat)
	2889	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	2890	utf8_pat ? "pattern" : "",
	2891	utf8_pat && utf8_target ? " and " : "",
	2892	utf8_target ? "string" : ""
	2893	);
	2894	}
	2895	}
	2896
	2897	STATIC void
	2898	S_dump_exec_pos(pTHX_ const char *locinput,
	2899	const regnode *scan,
	2900	const char *loc_regeol,
	2901	const char *loc_bostr,
	2902	const char *loc_reg_starttry,
	2903	const bool utf8_target)
	2904	{
	2905	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	2906	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	2907	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	2908	/* The part of the string before starttry has one color
	2909	(pref0_len chars), between starttry and current
	2910	position another one (pref_len - pref0_len chars),
	2911	after the current position the third one.
	2912	We assume that pref0_len <= pref_len, otherwise we
	2913	decrease pref0_len. */
	2914	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	2915	? (5 + taill) - l : locinput - loc_bostr;
	2916	int pref0_len;
	2917
	2918	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	2919
	2920	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	2921	pref_len++;
	2922	pref0_len = pref_len - (locinput - loc_reg_starttry);
	2923	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	2924	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	2925	? (5 + taill) - pref_len : loc_regeol - locinput);
	2926	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	2927	l--;
	2928	if (pref0_len < 0)
	2929	pref0_len = 0;
	2930	if (pref0_len > pref_len)
	2931	pref0_len = pref_len;
	2932	{
	2933	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	2934
	2935	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	2936	(locinput - pref_len),pref0_len, 60, 4, 5);
	2937
	2938	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	2939	(locinput - pref_len + pref0_len),
	2940	pref_len - pref0_len, 60, 2, 3);
	2941
	2942	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	2943	locinput, loc_regeol - locinput, 10, 0, 1);
	2944
	2945	const STRLEN tlen=len0+len1+len2;
	2946	PerlIO_printf(Perl_debug_log,
	2947	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	2948	(IV)(locinput - loc_bostr),
	2949	len0, s0,
	2950	len1, s1,
	2951	(docolor ? "" : "> <"),
	2952	len2, s2,
	2953	(int)(tlen > 19 ? 0 : 19 - tlen),
	2954	"");
	2955	}
	2956	}
	2957
	2958	#endif
	2959
	2960	/* reg_check_named_buff_matched()
	2961	* Checks to see if a named buffer has matched. The data array of
	2962	* buffer numbers corresponding to the buffer is expected to reside
	2963	* in the regexp->data->data array in the slot stored in the ARG() of
	2964	* node involved. Note that this routine doesn't actually care about the
	2965	* name, that information is not preserved from compilation to execution.
	2966	* Returns the index of the leftmost defined buffer with the given name
	2967	* or 0 if non of the buffers matched.
	2968	*/
	2969	STATIC I32
	2970	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	2971	{
	2972	I32 n;
	2973	RXi_GET_DECL(rex,rexi);
	2974	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	2975	I32 nums=(I32)SvPVX(sv_dat);
	2976
	2977	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	2978
	2979	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	2980	if ((I32)*PL_reglastparen >= nums[n] &&
	2981	PL_regoffs[nums[n]].end != -1)
	2982	{
	2983	return nums[n];
	2984	}
	2985	}
	2986	return 0;
	2987	}
	2988
	2989
	2990	/* free all slabs above current one - called during LEAVE_SCOPE */
	2991
	2992	STATIC void
	2993	S_clear_backtrack_stack(pTHX_ void *p)
	2994	{
	2995	regmatch_slab *s = PL_regmatch_slab->next;
	2996	PERL_UNUSED_ARG(p);
	2997
	2998	if (!s)
	2999	return;
	3000	PL_regmatch_slab->next = NULL;
	3001	while (s) {
	3002	regmatch_slab * const osl = s;
	3003	s = s->next;
	3004	Safefree(osl);
	3005	}
	3006	}
	3007
	3008
	3009	#define SETREX(Re1,Re2) \
	3010	if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
	3011	Re1 = (Re2)
	3012
	3013	STATIC I32 /* 0 failure, 1 success */
	3014	S_regmatch(pTHX_ regmatch_info reginfo, regnode prog)
	3015	{
	3016	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3017	dMY_CXT;
	3018	#endif
	3019	dVAR;
	3020	register const bool utf8_target = PL_reg_match_utf8;
	3021	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3022	REGEXP *rex_sv = reginfo->prog;
	3023	regexp rex = (struct regexp )SvANY(rex_sv);
	3024	RXi_GET_DECL(rex,rexi);
	3025	I32 oldsave;
	3026	/* the current state. This is a cached copy of PL_regmatch_state */
	3027	register regmatch_state *st;
	3028	/* cache heavy used fields of st in registers */
	3029	register regnode *scan;
	3030	register regnode *next;
	3031	register U32 n = 0; /* general value; init to avoid compiler warning */
	3032	register I32 ln = 0; /* len or last; init to avoid compiler warning */
	3033	register char *locinput = PL_reginput;
	3034	register I32 nextchr; /* is always set to UCHARAT(locinput) */
	3035
	3036	bool result = 0; /* return value of S_regmatch */
	3037	int depth = 0; /* depth of backtrack stack */
	3038	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3039	const U32 max_nochange_depth =
	3040	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3041	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3042	regmatch_state yes_state = NULL; / state to pop to on success of
	3043	subpattern */
	3044	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3045	the stack on success we can update the mark_state as we go */
	3046	regmatch_state mark_state = NULL; / last mark state we have seen */
	3047	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3048	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3049	U32 state_num;
	3050	bool no_final = 0; /* prevent failure from backtracking? */
	3051	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3052	char *startpoint = PL_reginput;
	3053	SV popmark = NULL; / are we looking for a mark? */
	3054	SV sv_commit = NULL; / last mark name seen in failure */
	3055	SV sv_yes_mark = NULL; / last mark name we have seen
	3056	during a successful match */
	3057	U32 lastopen = 0; /* last open we saw */
	3058	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3059	SV* const oreplsv = GvSV(PL_replgv);
	3060	/* these three flags are set by various ops to signal information to
	3061	* the very next op. They have a useful lifetime of exactly one loop
	3062	* iteration, and are not preserved or restored by state pushes/pops
	3063	*/
	3064	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3065	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3066	int logical = 0; /* the following EVAL is:
	3067	0: (?{...})
	3068	1: (?(?{...})X\|Y)
	3069	2: (??{...})
	3070	or the following IFMATCH/UNLESSM is:
	3071	false: plain (?=foo)
	3072	true: used as a condition: (?(?=foo))
	3073	*/
	3074	#ifdef DEBUGGING
	3075	GET_RE_DEBUG_FLAGS_DECL;
	3076	#endif
	3077
	3078	PERL_ARGS_ASSERT_REGMATCH;
	3079
	3080	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3081	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3082	}));
	3083	/* on first ever call to regmatch, allocate first slab */
	3084	if (!PL_regmatch_slab) {
	3085	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3086	PL_regmatch_slab->prev = NULL;
	3087	PL_regmatch_slab->next = NULL;
	3088	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3089	}
	3090
	3091	oldsave = PL_savestack_ix;
	3092	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	3093	SAVEVPTR(PL_regmatch_slab);
	3094	SAVEVPTR(PL_regmatch_state);
	3095
	3096	/* grab next free state slot */
	3097	st = ++PL_regmatch_state;
	3098	if (st > SLAB_LAST(PL_regmatch_slab))
	3099	st = PL_regmatch_state = S_push_slab(aTHX);
	3100
	3101	/* Note that nextchr is a byte even in UTF */
	3102	nextchr = UCHARAT(locinput);
	3103	scan = prog;
	3104	while (scan != NULL) {
	3105
	3106	DEBUG_EXECUTE_r( {
	3107	SV * const prop = sv_newmortal();
	3108	regnode *rnext=regnext(scan);
	3109	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3110	regprop(rex, prop, scan);
	3111
	3112	PerlIO_printf(Perl_debug_log,
	3113	"%3"IVdf":%*s%s(%"IVdf")\n",
	3114	(IV)(scan - rexi->program), depth*2, "",
	3115	SvPVX_const(prop),
	3116	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3117	0 : (IV)(rnext - rexi->program));
	3118	});
	3119
	3120	next = scan + NEXT_OFF(scan);
	3121	if (next == scan)
	3122	next = NULL;
	3123	state_num = OP(scan);
	3124
	3125	reenter_switch:
	3126
	3127	assert(PL_reglastparen == &rex->lastparen);
	3128	assert(PL_reglastcloseparen == &rex->lastcloseparen);
	3129	assert(PL_regoffs == rex->offs);
	3130
	3131	switch (state_num) {
	3132	case BOL:
	3133	if (locinput == PL_bostr)
	3134	{
	3135	/* reginfo->till = reginfo->bol; */
	3136	break;
	3137	}
	3138	sayNO;
	3139	case MBOL:
	3140	if (locinput == PL_bostr \|\|
	3141	((nextchr \|\| locinput < PL_regeol) && locinput[-1] == '\n'))
	3142	{
	3143	break;
	3144	}
	3145	sayNO;
	3146	case SBOL:
	3147	if (locinput == PL_bostr)
	3148	break;
	3149	sayNO;
	3150	case GPOS:
	3151	if (locinput == reginfo->ganch)
	3152	break;
	3153	sayNO;
	3154
	3155	case KEEPS:
	3156	/* update the startpoint */
	3157	st->u.keeper.val = PL_regoffs[0].start;
	3158	PL_reginput = locinput;
	3159	PL_regoffs[0].start = locinput - PL_bostr;
	3160	PUSH_STATE_GOTO(KEEPS_next, next);
	3161	/NOT-REACHED/
	3162	case KEEPS_next_fail:
	3163	/* rollback the start point change */
	3164	PL_regoffs[0].start = st->u.keeper.val;
	3165	sayNO_SILENT;
	3166	/NOT-REACHED/
	3167	case EOL:
	3168	goto seol;
	3169	case MEOL:
	3170	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3171	sayNO;
	3172	break;
	3173	case SEOL:
	3174	seol:
	3175	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3176	sayNO;
	3177	if (PL_regeol - locinput > 1)
	3178	sayNO;
	3179	break;
	3180	case EOS:
	3181	if (PL_regeol != locinput)
	3182	sayNO;
	3183	break;
	3184	case SANY:
	3185	if (!nextchr && locinput >= PL_regeol)
	3186	sayNO;
	3187	if (utf8_target) {
	3188	locinput += PL_utf8skip[nextchr];
	3189	if (locinput > PL_regeol)
	3190	sayNO;
	3191	nextchr = UCHARAT(locinput);
	3192	}
	3193	else
	3194	nextchr = UCHARAT(++locinput);
	3195	break;
	3196	case CANY:
	3197	if (!nextchr && locinput >= PL_regeol)
	3198	sayNO;
	3199	nextchr = UCHARAT(++locinput);
	3200	break;
	3201	case REG_ANY:
	3202	if ((!nextchr && locinput >= PL_regeol) \|\| nextchr == '\n')
	3203	sayNO;
	3204	if (utf8_target) {
	3205	locinput += PL_utf8skip[nextchr];
	3206	if (locinput > PL_regeol)
	3207	sayNO;
	3208	nextchr = UCHARAT(locinput);
	3209	}
	3210	else
	3211	nextchr = UCHARAT(++locinput);
	3212	break;
	3213
	3214	#undef ST
	3215	#define ST st->u.trie
	3216	case TRIEC:
	3217	/* In this case the charclass data is available inline so
	3218	we can fail fast without a lot of extra overhead.
	3219	*/
	3220	if (scan->flags == EXACT \|\| !utf8_target) {
	3221	if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
	3222	DEBUG_EXECUTE_r(
	3223	PerlIO_printf(Perl_debug_log,
	3224	"%*s %sfailed to match trie start class...%s\n",
	3225	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3226	);
	3227	sayNO_SILENT;
	3228	/* NOTREACHED */
	3229	}
	3230	}
	3231	/* FALL THROUGH */
	3232	case TRIE:
	3233	/* the basic plan of execution of the trie is:
	3234	* At the beginning, run though all the states, and
	3235	* find the longest-matching word. Also remember the position
	3236	* of the shortest matching word. For example, this pattern:
	3237	* 1 2 3 4 5
	3238	* ab\|a\|x\|abcd\|abc
	3239	* when matched against the string "abcde", will generate
	3240	* accept states for all words except 3, with the longest
	3241	* matching word being 4, and the shortest being 1 (with
	3242	* the position being after char 1 of the string).
	3243	*
	3244	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3245	* we run the remainder of the pattern; on each try setting
	3246	* the current position to the character following the word,
	3247	* returning to try the next word on failure.
	3248	*
	3249	* We avoid having to build a list of words at runtime by
	3250	* using a compile-time structure, wordinfo[].prev, which
	3251	* gives, for each word, the previous accepting word (if any).
	3252	* In the case above it would contain the mappings 1->2, 2->0,
	3253	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3254	* the longest word (4 above), a list of all words, by
	3255	* following the list of prev pointers; this gives us the
	3256	* unordered list 4,5,1,2. Then given the current word we have
	3257	* just tried, we can go through the list and find the
	3258	* next-biggest word to try (so if we just failed on word 2,
	3259	* the next in the list is 4).
	3260	*
	3261	* Since at runtime we don't record the matching position in
	3262	* the string for each word, we have to work that out for
	3263	* each word we're about to process. The wordinfo table holds
	3264	* the character length of each word; given that we recorded
	3265	* at the start: the position of the shortest word and its
	3266	* length in chars, we just need to move the pointer the
	3267	* difference between the two char lengths. Depending on
	3268	* Unicode status and folding, that's cheap or expensive.
	3269	*
	3270	* This algorithm is optimised for the case where are only a
	3271	* small number of accept states, i.e. 0,1, or maybe 2.
	3272	* With lots of accepts states, and having to try all of them,
	3273	* it becomes quadratic on number of accept states to find all
	3274	* the next words.
	3275	*/
	3276
	3277	{
	3278	/* what type of TRIE am I? (utf8 makes this contextual) */
	3279	DECL_TRIE_TYPE(scan);
	3280
	3281	/* what trie are we using right now */
	3282	reg_trie_data * const trie
	3283	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3284	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3285	U32 state = trie->startstate;
	3286
	3287	if (trie->bitmap && trie_type != trie_utf8_fold &&
	3288	!TRIE_BITMAP_TEST(trie,*locinput)
	3289	) {
	3290	if (trie->states[ state ].wordnum) {
	3291	DEBUG_EXECUTE_r(
	3292	PerlIO_printf(Perl_debug_log,
	3293	"%*s %smatched empty string...%s\n",
	3294	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3295	);
	3296	if (!trie->jump)
	3297	break;
	3298	} else {
	3299	DEBUG_EXECUTE_r(
	3300	PerlIO_printf(Perl_debug_log,
	3301	"%*s %sfailed to match trie start class...%s\n",
	3302	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3303	);
	3304	sayNO_SILENT;
	3305	}
	3306	}
	3307
	3308	{
	3309	U8 uc = ( U8 )locinput;
	3310
	3311	STRLEN len = 0;
	3312	STRLEN foldlen = 0;
	3313	U8 uscan = (U8)NULL;
	3314	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3315	U32 charcount = 0; /* how many input chars we have matched */
	3316	U32 accepted = 0; /* have we seen any accepting states? */
	3317
	3318	ST.B = next;
	3319	ST.jump = trie->jump;
	3320	ST.me = scan;
	3321	ST.firstpos = NULL;
	3322	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3323	ST.nextword = 0;
	3324
	3325	/* fully traverse the TRIE; note the position of the
	3326	shortest accept state and the wordnum of the longest
	3327	accept state */
	3328
	3329	while ( state && uc <= (U8*)PL_regeol ) {
	3330	U32 base = trie->states[ state ].trans.base;
	3331	UV uvc = 0;
	3332	U16 charid = 0;
	3333	U16 wordnum;
	3334	wordnum = trie->states[ state ].wordnum;
	3335
	3336	if (wordnum) { /* it's an accept state */
	3337	if (!accepted) {
	3338	accepted = 1;
	3339	/* record first match position */
	3340	if (ST.longfold) {
	3341	ST.firstpos = (U8*)locinput;
	3342	ST.firstchars = 0;
	3343	}
	3344	else {
	3345	ST.firstpos = uc;
	3346	ST.firstchars = charcount;
	3347	}
	3348	}
	3349	if (!ST.nextword \|\| wordnum < ST.nextword)
	3350	ST.nextword = wordnum;
	3351	ST.topword = wordnum;
	3352	}
	3353
	3354	DEBUG_TRIE_EXECUTE_r({
	3355	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3356	PerlIO_printf( Perl_debug_log,
	3357	"%*s %sState: %4"UVxf" Accepted: %c ",
	3358	2+depth * 2, "", PL_colors[4],
	3359	(UV)state, (accepted ? 'Y' : 'N'));
	3360	});
	3361
	3362	/* read a char and goto next state */
	3363	if ( base ) {
	3364	I32 offset;
	3365	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3366	uscan, len, uvc, charid, foldlen,
	3367	foldbuf, uniflags);
	3368	charcount++;
	3369	if (foldlen>0)
	3370	ST.longfold = TRUE;
	3371	if (charid &&
	3372	( ((offset =
	3373	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3374
	3375	&& ((U32)offset < trie->lasttrans)
	3376	&& trie->trans[offset].check == state)
	3377	{
	3378	state = trie->trans[offset].next;
	3379	}
	3380	else {
	3381	state = 0;
	3382	}
	3383	uc += len;
	3384
	3385	}
	3386	else {
	3387	state = 0;
	3388	}
	3389	DEBUG_TRIE_EXECUTE_r(
	3390	PerlIO_printf( Perl_debug_log,
	3391	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3392	charid, uvc, (UV)state, PL_colors[5] );
	3393	);
	3394	}
	3395	if (!accepted)
	3396	sayNO;
	3397
	3398	/* calculate total number of accept states */
	3399	{
	3400	U16 w = ST.topword;
	3401	accepted = 0;
	3402	while (w) {
	3403	w = trie->wordinfo[w].prev;
	3404	accepted++;
	3405	}
	3406	ST.accepted = accepted;
	3407	}
	3408
	3409	DEBUG_EXECUTE_r(
	3410	PerlIO_printf( Perl_debug_log,
	3411	"%*s %sgot %"IVdf" possible matches%s\n",
	3412	REPORT_CODE_OFF + depth * 2, "",
	3413	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3414	);
	3415	goto trie_first_try; /* jump into the fail handler */
	3416	}}
	3417	/* NOTREACHED */
	3418
	3419	case TRIE_next_fail: /* we failed - try next alternative */
	3420	if ( ST.jump) {
	3421	REGCP_UNWIND(ST.cp);
	3422	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	3423	PL_regoffs[n].end = -1;
	3424	*PL_reglastparen = n;
	3425	}
	3426	if (!--ST.accepted) {
	3427	DEBUG_EXECUTE_r({
	3428	PerlIO_printf( Perl_debug_log,
	3429	"%*s %sTRIE failed...%s\n",
	3430	REPORT_CODE_OFF+depth*2, "",
	3431	PL_colors[4],
	3432	PL_colors[5] );
	3433	});
	3434	sayNO_SILENT;
	3435	}
	3436	{
	3437	/* Find next-highest word to process. Note that this code
	3438	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	3439	register U16 min = 0;
	3440	register U16 word;
	3441	register U16 const nextword = ST.nextword;
	3442	register reg_trie_wordinfo * const wordinfo
	3443	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	3444	for (word=ST.topword; word; word=wordinfo[word].prev) {
	3445	if (word > nextword && (!min \|\| word < min))
	3446	min = word;
	3447	}
	3448	ST.nextword = min;
	3449	}
	3450
	3451	trie_first_try:
	3452	if (do_cutgroup) {
	3453	do_cutgroup = 0;
	3454	no_final = 0;
	3455	}
	3456
	3457	if ( ST.jump) {
	3458	ST.lastparen = *PL_reglastparen;
	3459	REGCP_SET(ST.cp);
	3460	}
	3461
	3462	/* find start char of end of current word */
	3463	{
	3464	U32 chars; /* how many chars to skip */
	3465	U8 *uc = ST.firstpos;
	3466	reg_trie_data * const trie
	3467	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	3468
	3469	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3470	>= ST.firstchars);
	3471	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3472	- ST.firstchars;
	3473
	3474	if (ST.longfold) {
	3475	/* the hard option - fold each char in turn and find
	3476	* its folded length (which may be different */
	3477	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	3478	STRLEN foldlen;
	3479	STRLEN len;
	3480	UV uvc;
	3481	U8 *uscan;
	3482
	3483	while (chars) {
	3484	if (utf8_target) {
	3485	uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
	3486	uniflags);
	3487	uc += len;
	3488	}
	3489	else {
	3490	uvc = *uc;
	3491	uc++;
	3492	}
	3493	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	3494	uscan = foldbuf;
	3495	while (foldlen) {
	3496	if (!--chars)
	3497	break;
	3498	uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
	3499	uniflags);
	3500	uscan += len;
	3501	foldlen -= len;
	3502	}
	3503	}
	3504	}
	3505	else {
	3506	if (utf8_target)
	3507	while (chars--)
	3508	uc += UTF8SKIP(uc);
	3509	else
	3510	uc += chars;
	3511	}
	3512	PL_reginput = (char *)uc;
	3513	}
	3514
	3515	scan = (ST.jump && ST.jump[ST.nextword])
	3516	? ST.me + ST.jump[ST.nextword]
	3517	: ST.B;
	3518
	3519	DEBUG_EXECUTE_r({
	3520	PerlIO_printf( Perl_debug_log,
	3521	"%*s %sTRIE matched word #%d, continuing%s\n",
	3522	REPORT_CODE_OFF+depth*2, "",
	3523	PL_colors[4],
	3524	ST.nextword,
	3525	PL_colors[5]
	3526	);
	3527	});
	3528
	3529	if (ST.accepted > 1 \|\| has_cutgroup) {
	3530	PUSH_STATE_GOTO(TRIE_next, scan);
	3531	/* NOTREACHED */
	3532	}
	3533	/* only one choice left - just continue */
	3534	DEBUG_EXECUTE_r({
	3535	AV *const trie_words
	3536	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	3537	SV ** const tmp = av_fetch( trie_words,
	3538	ST.nextword-1, 0 );
	3539	SV *sv= tmp ? sv_newmortal() : NULL;
	3540
	3541	PerlIO_printf( Perl_debug_log,
	3542	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	3543	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	3544	ST.nextword,
	3545	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	3546	PL_colors[0], PL_colors[1],
	3547	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	3548	)
	3549	: "not compiled under -Dr",
	3550	PL_colors[5] );
	3551	});
	3552
	3553	locinput = PL_reginput;
	3554	nextchr = UCHARAT(locinput);
	3555	continue; /* execute rest of RE */
	3556	/* NOTREACHED */
	3557	#undef ST
	3558
	3559	case EXACT: {
	3560	char *s = STRING(scan);
	3561	ln = STR_LEN(scan);
	3562	if (utf8_target != UTF_PATTERN) {
	3563	/* The target and the pattern have differing utf8ness. */
	3564	char *l = locinput;
	3565	const char * const e = s + ln;
	3566
	3567	if (utf8_target) {
	3568	/* The target is utf8, the pattern is not utf8. */
	3569	while (s < e) {
	3570	STRLEN ulen;
	3571	if (l >= PL_regeol)
	3572	sayNO;
	3573	if (NATIVE_TO_UNI((U8)s) !=
	3574	utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
	3575	uniflags))
	3576	sayNO;
	3577	l += ulen;
	3578	s ++;
	3579	}
	3580	}
	3581	else {
	3582	/* The target is not utf8, the pattern is utf8. */
	3583	while (s < e) {
	3584	STRLEN ulen;
	3585	if (l >= PL_regeol)
	3586	sayNO;
	3587	if (NATIVE_TO_UNI(((U8)l)) !=
	3588	utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
	3589	uniflags))
	3590	sayNO;
	3591	s += ulen;
	3592	l ++;
	3593	}
	3594	}
	3595	locinput = l;
	3596	nextchr = UCHARAT(locinput);
	3597	break;
	3598	}
	3599	/* The target and the pattern have the same utf8ness. */
	3600	/* Inline the first character, for speed. */
	3601	if (UCHARAT(s) != nextchr)
	3602	sayNO;
	3603	if (PL_regeol - locinput < ln)
	3604	sayNO;
	3605	if (ln > 1 && memNE(s, locinput, ln))
	3606	sayNO;
	3607	locinput += ln;
	3608	nextchr = UCHARAT(locinput);
	3609	break;
	3610	}
	3611	case EXACTFL: {
	3612	re_fold_t folder;
	3613	const U8 * fold_array;
	3614	const char * s;
	3615	U32 fold_utf8_flags;
	3616
	3617	PL_reg_flags \|= RF_tainted;
	3618	folder = foldEQ_locale;
	3619	fold_array = PL_fold_locale;
	3620	fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
	3621	goto do_exactf;
	3622
	3623	case EXACTFU:
	3624	folder = foldEQ_latin1;
	3625	fold_array = PL_fold_latin1;
	3626	fold_utf8_flags = 0;
	3627	goto do_exactf;
	3628
	3629	case EXACTFA:
	3630	folder = foldEQ_latin1;
	3631	fold_array = PL_fold_latin1;
	3632	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	3633	goto do_exactf;
	3634
	3635	case EXACTF:
	3636	folder = foldEQ;
	3637	fold_array = PL_fold;
	3638	fold_utf8_flags = 0;
	3639
	3640	do_exactf:
	3641	s = STRING(scan);
	3642	ln = STR_LEN(scan);
	3643
	3644	if (utf8_target \|\| UTF_PATTERN) {
	3645	/* Either target or the pattern are utf8. */
	3646	const char * const l = locinput;
	3647	char *e = PL_regeol;
	3648
	3649	if (! foldEQ_utf8_flags(s, 0, ln, cBOOL(UTF_PATTERN),
	3650	l, &e, 0, utf8_target, fold_utf8_flags))
	3651	{
	3652	sayNO;
	3653	}
	3654	locinput = e;
	3655	nextchr = UCHARAT(locinput);
	3656	break;
	3657	}
	3658
	3659	/* Neither the target nor the pattern are utf8 */
	3660	if (UCHARAT(s) != nextchr &&
	3661	UCHARAT(s) != fold_array[nextchr])
	3662	{
	3663	sayNO;
	3664	}
	3665	if (PL_regeol - locinput < ln)
	3666	sayNO;
	3667	if (ln > 1 && ! folder(s, locinput, ln))
	3668	sayNO;
	3669	locinput += ln;
	3670	nextchr = UCHARAT(locinput);
	3671	break;
	3672	}
	3673
	3674	/* XXX Could improve efficiency by separating these all out using a
	3675	* macro or in-line function. At that point regcomp.c would no longer
	3676	* have to set the FLAGS fields of these */
	3677	case BOUNDL:
	3678	case NBOUNDL:
	3679	PL_reg_flags \|= RF_tainted;
	3680	/* FALL THROUGH */
	3681	case BOUND:
	3682	case BOUNDU:
	3683	case BOUNDA:
	3684	case NBOUND:
	3685	case NBOUNDU:
	3686	case NBOUNDA:
	3687	/* was last char in word? */
	3688	if (utf8_target && FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET) {
	3689	if (locinput == PL_bostr)
	3690	ln = '\n';
	3691	else {
	3692	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	3693
	3694	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	3695	}
	3696	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	3697	ln = isALNUM_uni(ln);
	3698	LOAD_UTF8_CHARCLASS_ALNUM();
	3699	n = swash_fetch(PL_utf8_alnum, (U8*)locinput, utf8_target);
	3700	}
	3701	else {
	3702	ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
	3703	n = isALNUM_LC_utf8((U8*)locinput);
	3704	}
	3705	}
	3706	else {
	3707
	3708	/* Here the string isn't utf8, or is utf8 and only ascii
	3709	* characters are to match \w. In the latter case looking at
	3710	* the byte just prior to the current one may be just the final
	3711	* byte of a multi-byte character. This is ok. There are two
	3712	* cases:
	3713	* 1) it is a single byte character, and then the test is doing
	3714	* just what it's supposed to.
	3715	* 2) it is a multi-byte character, in which case the final
	3716	* byte is never mistakable for ASCII, and so the test
	3717	* will say it is not a word character, which is the
	3718	* correct answer. */
	3719	ln = (locinput != PL_bostr) ?
	3720	UCHARAT(locinput - 1) : '\n';
	3721	switch (FLAGS(scan)) {
	3722	case REGEX_UNICODE_CHARSET:
	3723	ln = isWORDCHAR_L1(ln);
	3724	n = isWORDCHAR_L1(nextchr);
	3725	break;
	3726	case REGEX_LOCALE_CHARSET:
	3727	ln = isALNUM_LC(ln);
	3728	n = isALNUM_LC(nextchr);
	3729	break;
	3730	case REGEX_DEPENDS_CHARSET:
	3731	ln = isALNUM(ln);
	3732	n = isALNUM(nextchr);
	3733	break;
	3734	case REGEX_ASCII_RESTRICTED_CHARSET:
	3735	ln = isWORDCHAR_A(ln);
	3736	n = isWORDCHAR_A(nextchr);
	3737	break;
	3738	default:
	3739	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	3740	break;
	3741	}
	3742	}
	3743	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	3744	* regcomp.sym */
	3745	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	3746	sayNO;
	3747	break;
	3748	case ANYOFV:
	3749	case ANYOF:
	3750	if (utf8_target \|\| state_num == ANYOFV) {
	3751	STRLEN inclasslen = PL_regeol - locinput;
	3752	if (locinput >= PL_regeol)
	3753	sayNO;
	3754
	3755	if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target))
	3756	sayNO;
	3757	locinput += inclasslen;
	3758	nextchr = UCHARAT(locinput);
	3759	break;
	3760	}
	3761	else {
	3762	if (nextchr < 0)
	3763	nextchr = UCHARAT(locinput);
	3764	if (!nextchr && locinput >= PL_regeol)
	3765	sayNO;
	3766	if (!REGINCLASS(rex, scan, (U8*)locinput))
	3767	sayNO;
	3768	nextchr = UCHARAT(++locinput);
	3769	break;
	3770	}
	3771	break;
	3772	/* Special char classes - The defines start on line 129 or so */
	3773	CCC_TRY_U(ALNUM, NALNUM, isWORDCHAR,
	3774	ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
	3775	ALNUMU, NALNUMU, isWORDCHAR_L1,
	3776	ALNUMA, NALNUMA, isWORDCHAR_A,
	3777	alnum, "a");
	3778
	3779	CCC_TRY_U(SPACE, NSPACE, isSPACE,
	3780	SPACEL, NSPACEL, isSPACE_LC, isSPACE_LC_utf8,
	3781	SPACEU, NSPACEU, isSPACE_L1,
	3782	SPACEA, NSPACEA, isSPACE_A,
	3783	space, " ");
	3784
	3785	CCC_TRY(DIGIT, NDIGIT, isDIGIT,
	3786	DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
	3787	DIGITA, NDIGITA, isDIGIT_A,
	3788	digit, "0");
	3789
	3790	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	3791	a Unicode extended Grapheme Cluster */
	3792	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	3793	extended Grapheme Cluster is:
	3794
	3795	CR LF
	3796	\| Prepend* Begin Extend*
	3797	\| .
	3798
	3799	Begin is (Hangul-syllable \| ! Control)
	3800	Extend is (Grapheme_Extend \| Spacing_Mark)
	3801	Control is [ GCB_Control CR LF ]
	3802
	3803	The discussion below shows how the code for CLUMP is derived
	3804	from this regex. Note that most of these concepts are from
	3805	property values of the Grapheme Cluster Boundary (GCB) property.
	3806	No code point can have multiple property values for a given
	3807	property. Thus a code point in Prepend can't be in Control, but
	3808	it must be in !Control. This is why Control above includes
	3809	GCB_Control plus CR plus LF. The latter two are used in the GCB
	3810	property separately, and so can't be in GCB_Control, even though
	3811	they logically are controls. Control is not the same as gc=cc,
	3812	but includes format and other characters as well.
	3813
	3814	The Unicode definition of Hangul-syllable is:
	3815	L+
	3816	\| (L* ( ( V \| LV ) V* \| LVT ) T*)
	3817	\| T+
	3818	)
	3819	Each of these is a value for the GCB property, and hence must be
	3820	disjoint, so the order they are tested is immaterial, so the
	3821	above can safely be changed to
	3822	T+
	3823	\| L+
	3824	\| (L* ( LVT \| ( V \| LV ) V) T)
	3825
	3826	The last two terms can be combined like this:
	3827	L* ( L
	3828	\| (( LVT \| ( V \| LV ) V) T))
	3829
	3830	And refactored into this:
	3831	L* (L \| LVT T* \| V V* T* \| LV V* T*)
	3832
	3833	That means that if we have seen any L's at all we can quit
	3834	there, but if the next character is an LVT, a V, or an LV we
	3835	should keep going.
	3836
	3837	There is a subtlety with Prepend* which showed up in testing.
	3838	Note that the Begin, and only the Begin is required in:
	3839	\| Prepend* Begin Extend*
	3840	Also, Begin contains '! Control'. A Prepend must be a
	3841	'! Control', which means it must also be a Begin. What it
	3842	comes down to is that if we match Prepend* and then find no
	3843	suitable Begin afterwards, that if we backtrack the last
	3844	Prepend, that one will be a suitable Begin.
	3845	*/
	3846
	3847	if (locinput >= PL_regeol)
	3848	sayNO;
	3849	if (! utf8_target) {
	3850
	3851	/* Match either CR LF or '.', as all the other possibilities
	3852	* require utf8 */
	3853	locinput++; /* Match the . or CR */
	3854	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	3855	match the LF */
	3856	&& locinput < PL_regeol
	3857	&& UCHARAT(locinput) == '\n') locinput++;
	3858	}
	3859	else {
	3860
	3861	/* Utf8: See if is ( CR LF ); already know that locinput <
	3862	* PL_regeol, so locinput+1 is in bounds */
	3863	if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
	3864	locinput += 2;
	3865	}
	3866	else {
	3867	/* In case have to backtrack to beginning, then match '.' */
	3868	char *starting = locinput;
	3869
	3870	/* In case have to backtrack the last prepend */
	3871	char *previous_prepend = 0;
	3872
	3873	LOAD_UTF8_CHARCLASS_GCB();
	3874
	3875	/* Match (prepend)* */
	3876	while (locinput < PL_regeol
	3877	&& swash_fetch(PL_utf8_X_prepend,
	3878	(U8*)locinput, utf8_target))
	3879	{
	3880	previous_prepend = locinput;
	3881	locinput += UTF8SKIP(locinput);
	3882	}
	3883
	3884	/* As noted above, if we matched a prepend character, but
	3885	* the next thing won't match, back off the last prepend we
	3886	* matched, as it is guaranteed to match the begin */
	3887	if (previous_prepend
	3888	&& (locinput >= PL_regeol
	3889	\|\| ! swash_fetch(PL_utf8_X_begin,
	3890	(U8*)locinput, utf8_target)))
	3891	{
	3892	locinput = previous_prepend;
	3893	}
	3894
	3895	/* Note that here we know PL_regeol > locinput, as we
	3896	* tested that upon input to this switch case, and if we
	3897	* moved locinput forward, we tested the result just above
	3898	* and it either passed, or we backed off so that it will
	3899	* now pass */
	3900	if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, utf8_target)) {
	3901
	3902	/* Here did not match the required 'Begin' in the
	3903	* second term. So just match the very first
	3904	* character, the '.' of the final term of the regex */
	3905	locinput = starting + UTF8SKIP(starting);
	3906	} else {
	3907
	3908	/* Here is the beginning of a character that can have
	3909	* an extender. It is either a hangul syllable, or a
	3910	* non-control */
	3911	if (swash_fetch(PL_utf8_X_non_hangul,
	3912	(U8*)locinput, utf8_target))
	3913	{
	3914
	3915	/* Here not a Hangul syllable, must be a
	3916	* ('! * Control') */
	3917	locinput += UTF8SKIP(locinput);
	3918	} else {
	3919
	3920	/* Here is a Hangul syllable. It can be composed
	3921	* of several individual characters. One
	3922	* possibility is T+ */
	3923	if (swash_fetch(PL_utf8_X_T,
	3924	(U8*)locinput, utf8_target))
	3925	{
	3926	while (locinput < PL_regeol
	3927	&& swash_fetch(PL_utf8_X_T,
	3928	(U8*)locinput, utf8_target))
	3929	{
	3930	locinput += UTF8SKIP(locinput);
	3931	}
	3932	} else {
	3933
	3934	/* Here, not T+, but is a Hangul. That means
	3935	* it is one of the others: L, LV, LVT or V,
	3936	* and matches:
	3937	* L* (L \| LVT T* \| V V* T* \| LV V* T) /
	3938
	3939	/* Match L* */
	3940	while (locinput < PL_regeol
	3941	&& swash_fetch(PL_utf8_X_L,
	3942	(U8*)locinput, utf8_target))
	3943	{
	3944	locinput += UTF8SKIP(locinput);
	3945	}
	3946
	3947	/* Here, have exhausted L*. If the next
	3948	* character is not an LV, LVT nor V, it means
	3949	* we had to have at least one L, so matches L+
	3950	* in the original equation, we have a complete
	3951	* hangul syllable. Are done. */
	3952
	3953	if (locinput < PL_regeol
	3954	&& swash_fetch(PL_utf8_X_LV_LVT_V,
	3955	(U8*)locinput, utf8_target))
	3956	{
	3957
	3958	/* Otherwise keep going. Must be LV, LVT
	3959	* or V. See if LVT */
	3960	if (swash_fetch(PL_utf8_X_LVT,
	3961	(U8*)locinput, utf8_target))
	3962	{
	3963	locinput += UTF8SKIP(locinput);
	3964	} else {
	3965
	3966	/* Must be V or LV. Take it, then
	3967	* match V* */
	3968	locinput += UTF8SKIP(locinput);
	3969	while (locinput < PL_regeol
	3970	&& swash_fetch(PL_utf8_X_V,
	3971	(U8*)locinput, utf8_target))
	3972	{
	3973	locinput += UTF8SKIP(locinput);
	3974	}
	3975	}
	3976
	3977	/* And any of LV, LVT, or V can be followed
	3978	* by T* */
	3979	while (locinput < PL_regeol
	3980	&& swash_fetch(PL_utf8_X_T,
	3981	(U8*)locinput,
	3982	utf8_target))
	3983	{
	3984	locinput += UTF8SKIP(locinput);
	3985	}
	3986	}
	3987	}
	3988	}
	3989
	3990	/* Match any extender */
	3991	while (locinput < PL_regeol
	3992	&& swash_fetch(PL_utf8_X_extend,
	3993	(U8*)locinput, utf8_target))
	3994	{
	3995	locinput += UTF8SKIP(locinput);
	3996	}
	3997	}
	3998	}
	3999	if (locinput > PL_regeol) sayNO;
	4000	}
	4001	nextchr = UCHARAT(locinput);
	4002	break;
	4003
	4004	case NREFFL:
	4005	{ /* The capture buffer cases. The ones beginning with N for the
	4006	named buffers just convert to the equivalent numbered and
	4007	pretend they were called as the corresponding numbered buffer
	4008	op. */
	4009	/* don't initialize these in the declaration, it makes C++
	4010	unhappy */
	4011	char *s;
	4012	char type;
	4013	re_fold_t folder;
	4014	const U8 *fold_array;
	4015	UV utf8_fold_flags;
	4016
	4017	PL_reg_flags \|= RF_tainted;
	4018	folder = foldEQ_locale;
	4019	fold_array = PL_fold_locale;
	4020	type = REFFL;
	4021	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4022	goto do_nref;
	4023
	4024	case NREFFA:
	4025	folder = foldEQ_latin1;
	4026	fold_array = PL_fold_latin1;
	4027	type = REFFA;
	4028	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4029	goto do_nref;
	4030
	4031	case NREFFU:
	4032	folder = foldEQ_latin1;
	4033	fold_array = PL_fold_latin1;
	4034	type = REFFU;
	4035	utf8_fold_flags = 0;
	4036	goto do_nref;
	4037
	4038	case NREFF:
	4039	folder = foldEQ;
	4040	fold_array = PL_fold;
	4041	type = REFF;
	4042	utf8_fold_flags = 0;
	4043	goto do_nref;
	4044
	4045	case NREF:
	4046	type = REF;
	4047	folder = NULL;
	4048	fold_array = NULL;
	4049	utf8_fold_flags = 0;
	4050	do_nref:
	4051
	4052	/* For the named back references, find the corresponding buffer
	4053	* number */
	4054	n = reg_check_named_buff_matched(rex,scan);
	4055
	4056	if ( ! n ) {
	4057	sayNO;
	4058	}
	4059	goto do_nref_ref_common;
	4060
	4061	case REFFL:
	4062	PL_reg_flags \|= RF_tainted;
	4063	folder = foldEQ_locale;
	4064	fold_array = PL_fold_locale;
	4065	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4066	goto do_ref;
	4067
	4068	case REFFA:
	4069	folder = foldEQ_latin1;
	4070	fold_array = PL_fold_latin1;
	4071	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4072	goto do_ref;
	4073
	4074	case REFFU:
	4075	folder = foldEQ_latin1;
	4076	fold_array = PL_fold_latin1;
	4077	utf8_fold_flags = 0;
	4078	goto do_ref;
	4079
	4080	case REFF:
	4081	folder = foldEQ;
	4082	fold_array = PL_fold;
	4083	utf8_fold_flags = 0;
	4084	goto do_ref;
	4085
	4086	case REF:
	4087	folder = NULL;
	4088	fold_array = NULL;
	4089	utf8_fold_flags = 0;
	4090
	4091	do_ref:
	4092	type = OP(scan);
	4093	n = ARG(scan); /* which paren pair */
	4094
	4095	do_nref_ref_common:
	4096	ln = PL_regoffs[n].start;
	4097	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4098	if (*PL_reglastparen < n \|\| ln == -1)
	4099	sayNO; /* Do not match unless seen CLOSEn. */
	4100	if (ln == PL_regoffs[n].end)
	4101	break;
	4102
	4103	s = PL_bostr + ln;
	4104	if (type != REF /* REF can do byte comparison */
	4105	&& (utf8_target \|\| type == REFFU))
	4106	{ /* XXX handle REFFL better */
	4107	char * limit = PL_regeol;
	4108
	4109	/* This call case insensitively compares the entire buffer
	4110	* at s, with the current input starting at locinput, but
	4111	* not going off the end given by PL_regeol, and returns in
	4112	* limit upon success, how much of the current input was
	4113	* matched */
	4114	if (! foldEQ_utf8_flags(s, NULL, PL_regoffs[n].end - ln, utf8_target,
	4115	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	4116	{
	4117	sayNO;
	4118	}
	4119	locinput = limit;
	4120	nextchr = UCHARAT(locinput);
	4121	break;
	4122	}
	4123
	4124	/* Not utf8: Inline the first character, for speed. */
	4125	if (UCHARAT(s) != nextchr &&
	4126	(type == REF \|\|
	4127	UCHARAT(s) != fold_array[nextchr]))
	4128	sayNO;
	4129	ln = PL_regoffs[n].end - ln;
	4130	if (locinput + ln > PL_regeol)
	4131	sayNO;
	4132	if (ln > 1 && (type == REF
	4133	? memNE(s, locinput, ln)
	4134	: ! folder(s, locinput, ln)))
	4135	sayNO;
	4136	locinput += ln;
	4137	nextchr = UCHARAT(locinput);
	4138	break;
	4139	}
	4140	case NOTHING:
	4141	case TAIL:
	4142	break;
	4143	case BACK:
	4144	break;
	4145
	4146	#undef ST
	4147	#define ST st->u.eval
	4148	{
	4149	SV *ret;
	4150	REGEXP *re_sv;
	4151	regexp *re;
	4152	regexp_internal *rei;
	4153	regnode *startpoint;
	4154
	4155	case GOSTART:
	4156	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4157	if (cur_eval && cur_eval->locinput==locinput) {
	4158	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4159	Perl_croak(aTHX_ "Infinite recursion in regex");
	4160	if ( ++nochange_depth > max_nochange_depth )
	4161	Perl_croak(aTHX_
	4162	"Pattern subroutine nesting without pos change"
	4163	" exceeded limit in regex");
	4164	} else {
	4165	nochange_depth = 0;
	4166	}
	4167	re_sv = rex_sv;
	4168	re = rex;
	4169	rei = rexi;
	4170	(void)ReREFCNT_inc(rex_sv);
	4171	if (OP(scan)==GOSUB) {
	4172	startpoint = scan + ARG2L(scan);
	4173	ST.close_paren = ARG(scan);
	4174	} else {
	4175	startpoint = rei->program+1;
	4176	ST.close_paren = 0;
	4177	}
	4178	goto eval_recurse_doit;
	4179	/* NOTREACHED */
	4180	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4181	if (cur_eval && cur_eval->locinput==locinput) {
	4182	if ( ++nochange_depth > max_nochange_depth )
	4183	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4184	} else {
	4185	nochange_depth = 0;
	4186	}
	4187	{
	4188	/* execute the code in the {...} */
	4189	dSP;
	4190	SV ** const before = SP;
	4191	OP_4tree * const oop = PL_op;
	4192	COP * const ocurcop = PL_curcop;
	4193	PAD *old_comppad;
	4194	char *saved_regeol = PL_regeol;
	4195	struct re_save_state saved_state;
	4196
	4197	/* To not corrupt the existing regex state while executing the
	4198	* eval we would normally put it on the save stack, like with
	4199	* save_re_context. However, re-evals have a weird scoping so we
	4200	* can't just add ENTER/LEAVE here. With that, things like
	4201	*
	4202	* (?{$a=2})(a(?{local$a=$a+1}))aakc(?{$b=$a})
	4203	*
	4204	* would break, as they expect the localisation to be unwound
	4205	* only when the re-engine backtracks through the bit that
	4206	* localised it.
	4207	*
	4208	* What we do instead is just saving the state in a local c
	4209	* variable.
	4210	*/
	4211	Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
	4212
	4213	n = ARG(scan);
	4214	PL_op = (OP_4tree*)rexi->data->data[n];
	4215	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	4216	" re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
	4217	/* wrap the call in two SAVECOMPPADs. This ensures that
	4218	* when the save stack is eventually unwound, all the
	4219	* accumulated SAVEt_CLEARSV's will be processed with
	4220	* interspersed SAVEt_COMPPAD's to ensure that lexicals
	4221	* are cleared in the right pad */
	4222	SAVECOMPPAD();
	4223	PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
	4224	PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	4225
	4226	if (sv_yes_mark) {
	4227	SV *sv_mrk = get_sv("REGMARK", 1);
	4228	sv_setsv(sv_mrk, sv_yes_mark);
	4229	}
	4230
	4231	CALLRUNOPS(aTHX); /* Scalar context. */
	4232	SPAGAIN;
	4233	if (SP == before)
	4234	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	4235	else {
	4236	ret = POPs;
	4237	PUTBACK;
	4238	}
	4239
	4240	Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
	4241
	4242	PL_op = oop;
	4243	SAVECOMPPAD();
	4244	PAD_RESTORE_LOCAL(old_comppad);
	4245	PL_curcop = ocurcop;
	4246	PL_regeol = saved_regeol;
	4247	if (!logical) {
	4248	/* /(?{...})/ */
	4249	sv_setsv(save_scalar(PL_replgv), ret);
	4250	break;
	4251	}
	4252	}
	4253	if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
	4254	logical = 0;
	4255	{
	4256	/* extract RE object from returned value; compiling if
	4257	* necessary */
	4258	MAGIC *mg = NULL;
	4259	REGEXP *rx = NULL;
	4260
	4261	if (SvROK(ret)) {
	4262	SV *const sv = SvRV(ret);
	4263
	4264	if (SvTYPE(sv) == SVt_REGEXP) {
	4265	rx = (REGEXP*) sv;
	4266	} else if (SvSMAGICAL(sv)) {
	4267	mg = mg_find(sv, PERL_MAGIC_qr);
	4268	assert(mg);
	4269	}
	4270	} else if (SvTYPE(ret) == SVt_REGEXP) {
	4271	rx = (REGEXP*) ret;
	4272	} else if (SvSMAGICAL(ret)) {
	4273	if (SvGMAGICAL(ret)) {
	4274	/* I don't believe that there is ever qr magic
	4275	here. */
	4276	assert(!mg_find(ret, PERL_MAGIC_qr));
	4277	sv_unmagic(ret, PERL_MAGIC_qr);
	4278	}
	4279	else {
	4280	mg = mg_find(ret, PERL_MAGIC_qr);
	4281	/* testing suggests mg only ends up non-NULL for
	4282	scalars who were upgraded and compiled in the
	4283	else block below. In turn, this is only
	4284	triggered in the "postponed utf8 string" tests
	4285	in t/op/pat.t */
	4286	}
	4287	}
	4288
	4289	if (mg) {
	4290	rx = (REGEXP ) mg->mg_obj; /XXX:dmq*/
	4291	assert(rx);
	4292	}
	4293	if (rx) {
	4294	rx = reg_temp_copy(NULL, rx);
	4295	}
	4296	else {
	4297	U32 pm_flags = 0;
	4298	const I32 osize = PL_regsize;
	4299
	4300	if (DO_UTF8(ret)) {
	4301	assert (SvUTF8(ret));
	4302	} else if (SvUTF8(ret)) {
	4303	/* Not doing UTF-8, despite what the SV says. Is
	4304	this only if we're trapped in use 'bytes'? */
	4305	/* Make a copy of the octet sequence, but without
	4306	the flag on, as the compiler now honours the
	4307	SvUTF8 flag on ret. */
	4308	STRLEN len;
	4309	const char *const p = SvPV(ret, len);
	4310	ret = newSVpvn_flags(p, len, SVs_TEMP);
	4311	}
	4312	rx = CALLREGCOMP(ret, pm_flags);
	4313	if (!(SvFLAGS(ret)
	4314	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	4315	\| SVs_GMG))) {
	4316	/* This isn't a first class regexp. Instead, it's
	4317	caching a regexp onto an existing, Perl visible
	4318	scalar. */
	4319	sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
	4320	}
	4321	PL_regsize = osize;
	4322	}
	4323	re_sv = rx;
	4324	re = (struct regexp *)SvANY(rx);
	4325	}
	4326	RXp_MATCH_COPIED_off(re);
	4327	re->subbeg = rex->subbeg;
	4328	re->sublen = rex->sublen;
	4329	rei = RXi_GET(re);
	4330	DEBUG_EXECUTE_r(
	4331	debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
	4332	"Matching embedded");
	4333	);
	4334	startpoint = rei->program + 1;
	4335	ST.close_paren = 0; /* only used for GOSUB */
	4336	/* borrowed from regtry */
	4337	if (PL_reg_start_tmpl <= re->nparens) {
	4338	PL_reg_start_tmpl = re->nparens*3/2 + 3;
	4339	if(PL_reg_start_tmp)
	4340	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	4341	else
	4342	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	4343	}
	4344
	4345	eval_recurse_doit: /* Share code with GOSUB below this line */
	4346	/* run the pattern returned from (??{...}) */
	4347	ST.cp = regcppush(0); /* Save all the positions. */
	4348	REGCP_SET(ST.lastcp);
	4349
	4350	PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
	4351
	4352	/* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
	4353	PL_reglastparen = &re->lastparen;
	4354	PL_reglastcloseparen = &re->lastcloseparen;
	4355	re->lastparen = 0;
	4356	re->lastcloseparen = 0;
	4357
	4358	PL_reginput = locinput;
	4359	PL_regsize = 0;
	4360
	4361	/* XXXX This is too dramatic a measure... */
	4362	PL_reg_maxiter = 0;
	4363
	4364	ST.toggle_reg_flags = PL_reg_flags;
	4365	if (RX_UTF8(re_sv))
	4366	PL_reg_flags \|= RF_utf8;
	4367	else
	4368	PL_reg_flags &= ~RF_utf8;
	4369	ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
	4370
	4371	ST.prev_rex = rex_sv;
	4372	ST.prev_curlyx = cur_curlyx;
	4373	SETREX(rex_sv,re_sv);
	4374	rex = re;
	4375	rexi = rei;
	4376	cur_curlyx = NULL;
	4377	ST.B = next;
	4378	ST.prev_eval = cur_eval;
	4379	cur_eval = st;
	4380	/* now continue from first node in postoned RE */
	4381	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
	4382	/* NOTREACHED */
	4383	}
	4384	/* logical is 1, /(?(?{...})X\|Y)/ */
	4385	sw = cBOOL(SvTRUE(ret));
	4386	logical = 0;
	4387	break;
	4388	}
	4389
	4390	case EVAL_AB: /* cleanup after a successful (??{A})B */
	4391	/* note: this is called twice; first after popping B, then A */
	4392	PL_reg_flags ^= ST.toggle_reg_flags;
	4393	ReREFCNT_dec(rex_sv);
	4394	SETREX(rex_sv,ST.prev_rex);
	4395	rex = (struct regexp *)SvANY(rex_sv);
	4396	rexi = RXi_GET(rex);
	4397	regcpblow(ST.cp);
	4398	cur_eval = ST.prev_eval;
	4399	cur_curlyx = ST.prev_curlyx;
	4400
	4401	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	4402	PL_reglastparen = &rex->lastparen;
	4403	PL_reglastcloseparen = &rex->lastcloseparen;
	4404	/* also update PL_regoffs */
	4405	PL_regoffs = rex->offs;
	4406
	4407	/* XXXX This is too dramatic a measure... */
	4408	PL_reg_maxiter = 0;
	4409	if ( nochange_depth )
	4410	nochange_depth--;
	4411	sayYES;
	4412
	4413
	4414	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	4415	/* note: this is called twice; first after popping B, then A */
	4416	PL_reg_flags ^= ST.toggle_reg_flags;
	4417	ReREFCNT_dec(rex_sv);
	4418	SETREX(rex_sv,ST.prev_rex);
	4419	rex = (struct regexp *)SvANY(rex_sv);
	4420	rexi = RXi_GET(rex);
	4421	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	4422	PL_reglastparen = &rex->lastparen;
	4423	PL_reglastcloseparen = &rex->lastcloseparen;
	4424
	4425	PL_reginput = locinput;
	4426	REGCP_UNWIND(ST.lastcp);
	4427	regcppop(rex);
	4428	cur_eval = ST.prev_eval;
	4429	cur_curlyx = ST.prev_curlyx;
	4430	/* XXXX This is too dramatic a measure... */
	4431	PL_reg_maxiter = 0;
	4432	if ( nochange_depth )
	4433	nochange_depth--;
	4434	sayNO_SILENT;
	4435	#undef ST
	4436
	4437	case OPEN:
	4438	n = ARG(scan); /* which paren pair */
	4439	PL_reg_start_tmp[n] = locinput;
	4440	if (n > PL_regsize)
	4441	PL_regsize = n;
	4442	lastopen = n;
	4443	break;
	4444	case CLOSE:
	4445	n = ARG(scan); /* which paren pair */
	4446	PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
	4447	PL_regoffs[n].end = locinput - PL_bostr;
	4448	/*if (n > PL_regsize)
	4449	PL_regsize = n;*/
	4450	if (n > *PL_reglastparen)
	4451	*PL_reglastparen = n;
	4452	*PL_reglastcloseparen = n;
	4453	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	4454	goto fake_end;
	4455	}
	4456	break;
	4457	case ACCEPT:
	4458	if (ARG(scan)){
	4459	regnode *cursor;
	4460	for (cursor=scan;
	4461	cursor && OP(cursor)!=END;
	4462	cursor=regnext(cursor))
	4463	{
	4464	if ( OP(cursor)==CLOSE ){
	4465	n = ARG(cursor);
	4466	if ( n <= lastopen ) {
	4467	PL_regoffs[n].start
	4468	= PL_reg_start_tmp[n] - PL_bostr;
	4469	PL_regoffs[n].end = locinput - PL_bostr;
	4470	/*if (n > PL_regsize)
	4471	PL_regsize = n;*/
	4472	if (n > *PL_reglastparen)
	4473	*PL_reglastparen = n;
	4474	*PL_reglastcloseparen = n;
	4475	if ( n == ARG(scan) \|\| (cur_eval &&
	4476	cur_eval->u.eval.close_paren == n))
	4477	break;
	4478	}
	4479	}
	4480	}
	4481	}
	4482	goto fake_end;
	4483	/NOTREACHED/
	4484	case GROUPP:
	4485	n = ARG(scan); /* which paren pair */
	4486	sw = cBOOL(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
	4487	break;
	4488	case NGROUPP:
	4489	/* reg_check_named_buff_matched returns 0 for no match */
	4490	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	4491	break;
	4492	case INSUBP:
	4493	n = ARG(scan);
	4494	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	4495	break;
	4496	case DEFINEP:
	4497	sw = 0;
	4498	break;
	4499	case IFTHEN:
	4500	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4501	if (sw)
	4502	next = NEXTOPER(NEXTOPER(scan));
	4503	else {
	4504	next = scan + ARG(scan);
	4505	if (OP(next) == IFTHEN) /* Fake one. */
	4506	next = NEXTOPER(NEXTOPER(next));
	4507	}
	4508	break;
	4509	case LOGICAL:
	4510	logical = scan->flags;
	4511	break;
	4512
	4513	/*******************************************************************
	4514
	4515	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	4516	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	4517	STAR/PLUS/CURLY/CURLYN are used instead.)
	4518
	4519	A*B is compiled as <CURLYX><A><WHILEM><B>
	4520
	4521	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	4522	state, which contains the current count, initialised to -1. It also sets
	4523	cur_curlyx to point to this state, with any previous value saved in the
	4524	state block.
	4525
	4526	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	4527	since the pattern may possibly match zero times (i.e. it's a while {} loop
	4528	rather than a do {} while loop).
	4529
	4530	Each entry to WHILEM represents a successful match of A. The count in the
	4531	CURLYX block is incremented, another WHILEM state is pushed, and execution
	4532	passes to A or B depending on greediness and the current count.
	4533
	4534	For example, if matching against the string a1a2a3b (where the aN are
	4535	substrings that match /A/), then the match progresses as follows: (the
	4536	pushed states are interspersed with the bits of strings matched so far):
	4537
	4538	<CURLYX cnt=-1>
	4539	<CURLYX cnt=0><WHILEM>
	4540	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	4541	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	4542	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	4543	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	4544
	4545	(Contrast this with something like CURLYM, which maintains only a single
	4546	backtrack state:
	4547
	4548	<CURLYM cnt=0> a1
	4549	a1 <CURLYM cnt=1> a2
	4550	a1 a2 <CURLYM cnt=2> a3
	4551	a1 a2 a3 <CURLYM cnt=3> b
	4552	)
	4553
	4554	Each WHILEM state block marks a point to backtrack to upon partial failure
	4555	of A or B, and also contains some minor state data related to that
	4556	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	4557	overall state, such as the count, and pointers to the A and B ops.
	4558
	4559	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	4560	must always point to the current CURLYX block, the rules are:
	4561
	4562	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	4563	and set cur_curlyx to point the new block.
	4564
	4565	When popping the CURLYX block after a successful or unsuccessful match,
	4566	restore the previous cur_curlyx.
	4567
	4568	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	4569	to the outer one saved in the CURLYX block.
	4570
	4571	When popping the WHILEM block after a successful or unsuccessful B match,
	4572	restore the previous cur_curlyx.
	4573
	4574	Here's an example for the pattern (AI* BI)*BO
	4575	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	4576
	4577	cur_
	4578	curlyx backtrack stack
	4579	------ ---------------
	4580	NULL
	4581	CO <CO prev=NULL> <WO>
	4582	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4583	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4584	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	4585
	4586	At this point the pattern succeeds, and we work back down the stack to
	4587	clean up, restoring as we go:
	4588
	4589	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4590	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4591	CO <CO prev=NULL> <WO>
	4592	NULL
	4593
	4594	*******************************************************************/
	4595
	4596	#define ST st->u.curlyx
	4597
	4598	case CURLYX: /* start of /AB/ (for complex A) /
	4599	{
	4600	/* No need to save/restore up to this paren */
	4601	I32 parenfloor = scan->flags;
	4602
	4603	assert(next); /* keep Coverity happy */
	4604	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	4605	next += ARG(next);
	4606
	4607	/* XXXX Probably it is better to teach regpush to support
	4608	parenfloor > PL_regsize... */
	4609	if (parenfloor > (I32)*PL_reglastparen)
	4610	parenfloor = PL_reglastparen; / Pessimization... */
	4611
	4612	ST.prev_curlyx= cur_curlyx;
	4613	cur_curlyx = st;
	4614	ST.cp = PL_savestack_ix;
	4615
	4616	/* these fields contain the state of the current curly.
	4617	* they are accessed by subsequent WHILEMs */
	4618	ST.parenfloor = parenfloor;
	4619	ST.me = scan;
	4620	ST.B = next;
	4621	ST.minmod = minmod;
	4622	minmod = 0;
	4623	ST.count = -1; /* this will be updated by WHILEM */
	4624	ST.lastloc = NULL; /* this will be updated by WHILEM */
	4625
	4626	PL_reginput = locinput;
	4627	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
	4628	/* NOTREACHED */
	4629	}
	4630
	4631	case CURLYX_end: /* just finished matching all of AB /
	4632	cur_curlyx = ST.prev_curlyx;
	4633	sayYES;
	4634	/* NOTREACHED */
	4635
	4636	case CURLYX_end_fail: /* just failed to match all of AB /
	4637	regcpblow(ST.cp);
	4638	cur_curlyx = ST.prev_curlyx;
	4639	sayNO;
	4640	/* NOTREACHED */
	4641
	4642
	4643	#undef ST
	4644	#define ST st->u.whilem
	4645
	4646	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	4647	{
	4648	/* see the discussion above about CURLYX/WHILEM */
	4649	I32 n;
	4650	int min = ARG1(cur_curlyx->u.curlyx.me);
	4651	int max = ARG2(cur_curlyx->u.curlyx.me);
	4652	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	4653
	4654	assert(cur_curlyx); /* keep Coverity happy */
	4655	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	4656	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	4657	ST.cache_offset = 0;
	4658	ST.cache_mask = 0;
	4659
	4660	PL_reginput = locinput;
	4661
	4662	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4663	"%*s whilem: matched %ld out of %d..%d\n",
	4664	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	4665	);
	4666
	4667	/* First just match a string of min A's. */
	4668
	4669	if (n < min) {
	4670	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4671	cur_curlyx->u.curlyx.lastloc = locinput;
	4672	REGCP_SET(ST.lastcp);
	4673
	4674	PUSH_STATE_GOTO(WHILEM_A_pre, A);
	4675	/* NOTREACHED */
	4676	}
	4677
	4678	/* If degenerate A matches "", assume A done. */
	4679
	4680	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	4681	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4682	"%*s whilem: empty match detected, trying continuation...\n",
	4683	REPORT_CODE_OFF+depth*2, "")
	4684	);
	4685	goto do_whilem_B_max;
	4686	}
	4687
	4688	/* super-linear cache processing */
	4689
	4690	if (scan->flags) {
	4691
	4692	if (!PL_reg_maxiter) {
	4693	/* start the countdown: Postpone detection until we
	4694	* know the match is not that much linear. */
	4695	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	4696	/* possible overflow for long strings and many CURLYX's */
	4697	if (PL_reg_maxiter < 0)
	4698	PL_reg_maxiter = I32_MAX;
	4699	PL_reg_leftiter = PL_reg_maxiter;
	4700	}
	4701
	4702	if (PL_reg_leftiter-- == 0) {
	4703	/* initialise cache */
	4704	const I32 size = (PL_reg_maxiter + 7)/8;
	4705	if (PL_reg_poscache) {
	4706	if ((I32)PL_reg_poscache_size < size) {
	4707	Renew(PL_reg_poscache, size, char);
	4708	PL_reg_poscache_size = size;
	4709	}
	4710	Zero(PL_reg_poscache, size, char);
	4711	}
	4712	else {
	4713	PL_reg_poscache_size = size;
	4714	Newxz(PL_reg_poscache, size, char);
	4715	}
	4716	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4717	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	4718	PL_colors[4], PL_colors[5])
	4719	);
	4720	}
	4721
	4722	if (PL_reg_leftiter < 0) {
	4723	/* have we already failed at this position? */
	4724	I32 offset, mask;
	4725	offset = (scan->flags & 0xf) - 1
	4726	+ (locinput - PL_bostr) * (scan->flags>>4);
	4727	mask = 1 << (offset % 8);
	4728	offset /= 8;
	4729	if (PL_reg_poscache[offset] & mask) {
	4730	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4731	"%*s whilem: (cache) already tried at this position...\n",
	4732	REPORT_CODE_OFF+depth*2, "")
	4733	);
	4734	sayNO; /* cache records failure */
	4735	}
	4736	ST.cache_offset = offset;
	4737	ST.cache_mask = mask;
	4738	}
	4739	}
	4740
	4741	/* Prefer B over A for minimal matching. */
	4742
	4743	if (cur_curlyx->u.curlyx.minmod) {
	4744	ST.save_curlyx = cur_curlyx;
	4745	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4746	ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
	4747	REGCP_SET(ST.lastcp);
	4748	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
	4749	/* NOTREACHED */
	4750	}
	4751
	4752	/* Prefer A over B for maximal matching. */
	4753
	4754	if (n < max) { /* More greed allowed? */
	4755	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4756	cur_curlyx->u.curlyx.lastloc = locinput;
	4757	REGCP_SET(ST.lastcp);
	4758	PUSH_STATE_GOTO(WHILEM_A_max, A);
	4759	/* NOTREACHED */
	4760	}
	4761	goto do_whilem_B_max;
	4762	}
	4763	/* NOTREACHED */
	4764
	4765	case WHILEM_B_min: /* just matched B in a minimal match */
	4766	case WHILEM_B_max: /* just matched B in a maximal match */
	4767	cur_curlyx = ST.save_curlyx;
	4768	sayYES;
	4769	/* NOTREACHED */
	4770
	4771	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	4772	cur_curlyx = ST.save_curlyx;
	4773	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4774	cur_curlyx->u.curlyx.count--;
	4775	CACHEsayNO;
	4776	/* NOTREACHED */
	4777
	4778	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	4779	/* FALL THROUGH */
	4780	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	4781	REGCP_UNWIND(ST.lastcp);
	4782	regcppop(rex);
	4783	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4784	cur_curlyx->u.curlyx.count--;
	4785	CACHEsayNO;
	4786	/* NOTREACHED */
	4787
	4788	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	4789	REGCP_UNWIND(ST.lastcp);
	4790	regcppop(rex); /* Restore some previous $<digit>s? */
	4791	PL_reginput = locinput;
	4792	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4793	"%*s whilem: failed, trying continuation...\n",
	4794	REPORT_CODE_OFF+depth*2, "")
	4795	);
	4796	do_whilem_B_max:
	4797	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4798	&& ckWARN(WARN_REGEXP)
	4799	&& !(PL_reg_flags & RF_warned))
	4800	{
	4801	PL_reg_flags \|= RF_warned;
	4802	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
	4803	"Complex regular subexpression recursion",
	4804	REG_INFTY - 1);
	4805	}
	4806
	4807	/* now try B */
	4808	ST.save_curlyx = cur_curlyx;
	4809	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4810	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
	4811	/* NOTREACHED */
	4812
	4813	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	4814	cur_curlyx = ST.save_curlyx;
	4815	REGCP_UNWIND(ST.lastcp);
	4816	regcppop(rex);
	4817
	4818	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	4819	/* Maximum greed exceeded */
	4820	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4821	&& ckWARN(WARN_REGEXP)
	4822	&& !(PL_reg_flags & RF_warned))
	4823	{
	4824	PL_reg_flags \|= RF_warned;
	4825	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	4826	"%s limit (%d) exceeded",
	4827	"Complex regular subexpression recursion",
	4828	REG_INFTY - 1);
	4829	}
	4830	cur_curlyx->u.curlyx.count--;
	4831	CACHEsayNO;
	4832	}
	4833
	4834	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4835	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	4836	);
	4837	/* Try grabbing another A and see if it helps. */
	4838	PL_reginput = locinput;
	4839	cur_curlyx->u.curlyx.lastloc = locinput;
	4840	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4841	REGCP_SET(ST.lastcp);
	4842	PUSH_STATE_GOTO(WHILEM_A_min,
	4843	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS);
	4844	/* NOTREACHED */
	4845
	4846	#undef ST
	4847	#define ST st->u.branch
	4848
	4849	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	4850	next = scan + ARG(scan);
	4851	if (next == scan)
	4852	next = NULL;
	4853	scan = NEXTOPER(scan);
	4854	/* FALL THROUGH */
	4855
	4856	case BRANCH: /* /(...\|A\|...)/ */
	4857	scan = NEXTOPER(scan); /* scan now points to inner node */
	4858	ST.lastparen = *PL_reglastparen;
	4859	ST.next_branch = next;
	4860	REGCP_SET(ST.cp);
	4861	PL_reginput = locinput;
	4862
	4863	/* Now go into the branch */
	4864	if (has_cutgroup) {
	4865	PUSH_YES_STATE_GOTO(BRANCH_next, scan);
	4866	} else {
	4867	PUSH_STATE_GOTO(BRANCH_next, scan);
	4868	}
	4869	/* NOTREACHED */
	4870	case CUTGROUP:
	4871	PL_reginput = locinput;
	4872	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	4873	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	4874	PUSH_STATE_GOTO(CUTGROUP_next,next);
	4875	/* NOTREACHED */
	4876	case CUTGROUP_next_fail:
	4877	do_cutgroup = 1;
	4878	no_final = 1;
	4879	if (st->u.mark.mark_name)
	4880	sv_commit = st->u.mark.mark_name;
	4881	sayNO;
	4882	/* NOTREACHED */
	4883	case BRANCH_next:
	4884	sayYES;
	4885	/* NOTREACHED */
	4886	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	4887	if (do_cutgroup) {
	4888	do_cutgroup = 0;
	4889	no_final = 0;
	4890	}
	4891	REGCP_UNWIND(ST.cp);
	4892	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	4893	PL_regoffs[n].end = -1;
	4894	*PL_reglastparen = n;
	4895	/dmq: PL_reglastcloseparen = n; */
	4896	scan = ST.next_branch;
	4897	/* no more branches? */
	4898	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	4899	DEBUG_EXECUTE_r({
	4900	PerlIO_printf( Perl_debug_log,
	4901	"%*s %sBRANCH failed...%s\n",
	4902	REPORT_CODE_OFF+depth*2, "",
	4903	PL_colors[4],
	4904	PL_colors[5] );
	4905	});
	4906	sayNO_SILENT;
	4907	}
	4908	continue; /* execute next BRANCH[J] op */
	4909	/* NOTREACHED */
	4910
	4911	case MINMOD:
	4912	minmod = 1;
	4913	break;
	4914
	4915	#undef ST
	4916	#define ST st->u.curlym
	4917
	4918	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	4919
	4920	/* This is an optimisation of CURLYX that enables us to push
	4921	* only a single backtracking state, no matter how many matches
	4922	* there are in {m,n}. It relies on the pattern being constant
	4923	* length, with no parens to influence future backrefs
	4924	*/
	4925
	4926	ST.me = scan;
	4927	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	4928
	4929	/* if paren positive, emulate an OPEN/CLOSE around A */
	4930	if (ST.me->flags) {
	4931	U32 paren = ST.me->flags;
	4932	if (paren > PL_regsize)
	4933	PL_regsize = paren;
	4934	if (paren > *PL_reglastparen)
	4935	*PL_reglastparen = paren;
	4936	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	4937	}
	4938	ST.A = scan;
	4939	ST.B = next;
	4940	ST.alen = 0;
	4941	ST.count = 0;
	4942	ST.minmod = minmod;
	4943	minmod = 0;
	4944	ST.c1 = CHRTEST_UNINIT;
	4945	REGCP_SET(ST.cp);
	4946
	4947	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	4948	goto curlym_do_B;
	4949
	4950	curlym_do_A: /* execute the A in /A{m,n}B/ */
	4951	PL_reginput = locinput;
	4952	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
	4953	/* NOTREACHED */
	4954
	4955	case CURLYM_A: /* we've just matched an A */
	4956	locinput = st->locinput;
	4957	nextchr = UCHARAT(locinput);
	4958
	4959	ST.count++;
	4960	/* after first match, determine A's length: u.curlym.alen */
	4961	if (ST.count == 1) {
	4962	if (PL_reg_match_utf8) {
	4963	char *s = locinput;
	4964	while (s < PL_reginput) {
	4965	ST.alen++;
	4966	s += UTF8SKIP(s);
	4967	}
	4968	}
	4969	else {
	4970	ST.alen = PL_reginput - locinput;
	4971	}
	4972	if (ST.alen == 0)
	4973	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	4974	}
	4975	DEBUG_EXECUTE_r(
	4976	PerlIO_printf(Perl_debug_log,
	4977	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	4978	(int)(REPORT_CODE_OFF+(depth*2)), "",
	4979	(IV) ST.count, (IV)ST.alen)
	4980	);
	4981
	4982	locinput = PL_reginput;
	4983
	4984	if (cur_eval && cur_eval->u.eval.close_paren &&
	4985	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	4986	goto fake_end;
	4987
	4988	{
	4989	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	4990	if ( max == REG_INFTY \|\| ST.count < max )
	4991	goto curlym_do_A; /* try to match another A */
	4992	}
	4993	goto curlym_do_B; /* try to match B */
	4994
	4995	case CURLYM_A_fail: /* just failed to match an A */
	4996	REGCP_UNWIND(ST.cp);
	4997
	4998	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	4999	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5000	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5001	sayNO;
	5002
	5003	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5004	PL_reginput = locinput;
	5005	if (ST.c1 == CHRTEST_UNINIT) {
	5006	/* calculate c1 and c2 for possible match of 1st char
	5007	* following curly */
	5008	ST.c1 = ST.c2 = CHRTEST_VOID;
	5009	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5010	regnode *text_node = ST.B;
	5011	if (! HAS_TEXT(text_node))
	5012	FIND_NEXT_IMPT(text_node);
	5013	/* this used to be
	5014
	5015	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5016
	5017	But the former is redundant in light of the latter.
	5018
	5019	if this changes back then the macro for
	5020	IS_TEXT and friends need to change.
	5021	*/
	5022	if (PL_regkind[OP(text_node)] == EXACT)
	5023	{
	5024
	5025	ST.c1 = (U8)*STRING(text_node);
	5026	switch (OP(text_node)) {
	5027	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5028	case EXACTFA:
	5029	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5030	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5031	default: ST.c2 = ST.c1;
	5032	}
	5033	}
	5034	}
	5035	}
	5036
	5037	DEBUG_EXECUTE_r(
	5038	PerlIO_printf(Perl_debug_log,
	5039	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5040	(int)(REPORT_CODE_OFF+(depth*2)),
	5041	"", (IV)ST.count)
	5042	);
	5043	if (ST.c1 != CHRTEST_VOID
	5044	&& UCHARAT(PL_reginput) != ST.c1
	5045	&& UCHARAT(PL_reginput) != ST.c2)
	5046	{
	5047	/* simulate B failing */
	5048	DEBUG_OPTIMISE_r(
	5049	PerlIO_printf(Perl_debug_log,
	5050	"%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
	5051	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5052	(IV)ST.c1,(IV)ST.c2
	5053	));
	5054	state_num = CURLYM_B_fail;
	5055	goto reenter_switch;
	5056	}
	5057
	5058	if (ST.me->flags) {
	5059	/* mark current A as captured */
	5060	I32 paren = ST.me->flags;
	5061	if (ST.count) {
	5062	PL_regoffs[paren].start
	5063	= HOPc(PL_reginput, -ST.alen) - PL_bostr;
	5064	PL_regoffs[paren].end = PL_reginput - PL_bostr;
	5065	/dmq: PL_reglastcloseparen = paren; */
	5066	}
	5067	else
	5068	PL_regoffs[paren].end = -1;
	5069	if (cur_eval && cur_eval->u.eval.close_paren &&
	5070	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5071	{
	5072	if (ST.count)
	5073	goto fake_end;
	5074	else
	5075	sayNO;
	5076	}
	5077	}
	5078
	5079	PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
	5080	/* NOTREACHED */
	5081
	5082	case CURLYM_B_fail: /* just failed to match a B */
	5083	REGCP_UNWIND(ST.cp);
	5084	if (ST.minmod) {
	5085	I32 max = ARG2(ST.me);
	5086	if (max != REG_INFTY && ST.count == max)
	5087	sayNO;
	5088	goto curlym_do_A; /* try to match a further A */
	5089	}
	5090	/* backtrack one A */
	5091	if (ST.count == ARG1(ST.me) /* min */)
	5092	sayNO;
	5093	ST.count--;
	5094	locinput = HOPc(locinput, -ST.alen);
	5095	goto curlym_do_B; /* try to match B */
	5096
	5097	#undef ST
	5098	#define ST st->u.curly
	5099
	5100	#define CURLY_SETPAREN(paren, success) \
	5101	if (paren) { \
	5102	if (success) { \
	5103	PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	5104	PL_regoffs[paren].end = locinput - PL_bostr; \
	5105	*PL_reglastcloseparen = paren; \
	5106	} \
	5107	else \
	5108	PL_regoffs[paren].end = -1; \
	5109	}
	5110
	5111	case STAR: /* /AB/ where A is width 1 /
	5112	ST.paren = 0;
	5113	ST.min = 0;
	5114	ST.max = REG_INFTY;
	5115	scan = NEXTOPER(scan);
	5116	goto repeat;
	5117	case PLUS: /* /A+B/ where A is width 1 */
	5118	ST.paren = 0;
	5119	ST.min = 1;
	5120	ST.max = REG_INFTY;
	5121	scan = NEXTOPER(scan);
	5122	goto repeat;
	5123	case CURLYN: /* /(A){m,n}B/ where A is width 1 */
	5124	ST.paren = scan->flags; /* Which paren to set */
	5125	if (ST.paren > PL_regsize)
	5126	PL_regsize = ST.paren;
	5127	if (ST.paren > *PL_reglastparen)
	5128	*PL_reglastparen = ST.paren;
	5129	ST.min = ARG1(scan); /* min to match */
	5130	ST.max = ARG2(scan); /* max to match */
	5131	if (cur_eval && cur_eval->u.eval.close_paren &&
	5132	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5133	ST.min=1;
	5134	ST.max=1;
	5135	}
	5136	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	5137	goto repeat;
	5138	case CURLY: /* /A{m,n}B/ where A is width 1 */
	5139	ST.paren = 0;
	5140	ST.min = ARG1(scan); /* min to match */
	5141	ST.max = ARG2(scan); /* max to match */
	5142	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5143	repeat:
	5144	/*
	5145	* Lookahead to avoid useless match attempts
	5146	* when we know what character comes next.
	5147	*
	5148	* Used to only do .x and .?x, but now it allows
	5149	* for )'s, ('s and (?{ ... })'s to be in the way
	5150	* of the quantifier and the EXACT-like node. -- japhy
	5151	*/
	5152
	5153	if (ST.min > ST.max) /* XXX make this a compile-time check? */
	5154	sayNO;
	5155	if (HAS_TEXT(next) \|\| JUMPABLE(next)) {
	5156	U8 *s;
	5157	regnode *text_node = next;
	5158
	5159	if (! HAS_TEXT(text_node))
	5160	FIND_NEXT_IMPT(text_node);
	5161
	5162	if (! HAS_TEXT(text_node))
	5163	ST.c1 = ST.c2 = CHRTEST_VOID;
	5164	else {
	5165	if ( PL_regkind[OP(text_node)] != EXACT ) {
	5166	ST.c1 = ST.c2 = CHRTEST_VOID;
	5167	goto assume_ok_easy;
	5168	}
	5169	else
	5170	s = (U8*)STRING(text_node);
	5171
	5172	/* Currently we only get here when
	5173
	5174	PL_rekind[OP(text_node)] == EXACT
	5175
	5176	if this changes back then the macro for IS_TEXT and
	5177	friends need to change. */
	5178	if (!UTF_PATTERN) {
	5179	ST.c1 = *s;
	5180	switch (OP(text_node)) {
	5181	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5182	case EXACTFA:
	5183	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5184	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5185	default: ST.c2 = ST.c1; break;
	5186	}
	5187	}
	5188	else { /* UTF_PATTERN */
	5189	if (IS_TEXTFU(text_node) \|\| IS_TEXTF(text_node)) {
	5190	STRLEN ulen1, ulen2;
	5191	U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
	5192	U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
	5193
	5194	to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
	5195	to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
	5196	#ifdef EBCDIC
	5197	ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
	5198	ckWARN(WARN_UTF8) ?
	5199	0 : UTF8_ALLOW_ANY);
	5200	ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
	5201	ckWARN(WARN_UTF8) ?
	5202	0 : UTF8_ALLOW_ANY);
	5203	#else
	5204	ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
	5205	uniflags);
	5206	ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
	5207	uniflags);
	5208	#endif
	5209	}
	5210	else {
	5211	ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
	5212	uniflags);
	5213	}
	5214	}
	5215	}
	5216	}
	5217	else
	5218	ST.c1 = ST.c2 = CHRTEST_VOID;
	5219	assume_ok_easy:
	5220
	5221	ST.A = scan;
	5222	ST.B = next;
	5223	PL_reginput = locinput;
	5224	if (minmod) {
	5225	minmod = 0;
	5226	if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
	5227	sayNO;
	5228	ST.count = ST.min;
	5229	locinput = PL_reginput;
	5230	REGCP_SET(ST.cp);
	5231	if (ST.c1 == CHRTEST_VOID)
	5232	goto curly_try_B_min;
	5233
	5234	ST.oldloc = locinput;
	5235
	5236	/* set ST.maxpos to the furthest point along the
	5237	* string that could possibly match */
	5238	if (ST.max == REG_INFTY) {
	5239	ST.maxpos = PL_regeol - 1;
	5240	if (utf8_target)
	5241	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	5242	ST.maxpos--;
	5243	}
	5244	else if (utf8_target) {
	5245	int m = ST.max - ST.min;
	5246	for (ST.maxpos = locinput;
	5247	m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
	5248	ST.maxpos += UTF8SKIP(ST.maxpos);
	5249	}
	5250	else {
	5251	ST.maxpos = locinput + ST.max - ST.min;
	5252	if (ST.maxpos >= PL_regeol)
	5253	ST.maxpos = PL_regeol - 1;
	5254	}
	5255	goto curly_try_B_min_known;
	5256
	5257	}
	5258	else {
	5259	ST.count = regrepeat(rex, ST.A, ST.max, depth);
	5260	locinput = PL_reginput;
	5261	if (ST.count < ST.min)
	5262	sayNO;
	5263	if ((ST.count > ST.min)
	5264	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	5265	{
	5266	/* A{m,n} must come at the end of the string, there's
	5267	* no point in backing off ... */
	5268	ST.min = ST.count;
	5269	/* ...except that $ and \Z can match before and after
	5270	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	5271	We may back off by one in this case. */
	5272	if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
	5273	ST.min--;
	5274	}
	5275	REGCP_SET(ST.cp);
	5276	goto curly_try_B_max;
	5277	}
	5278	/* NOTREACHED */
	5279
	5280
	5281	case CURLY_B_min_known_fail:
	5282	/* failed to find B in a non-greedy match where c1,c2 valid */
	5283	if (ST.paren && ST.count)
	5284	PL_regoffs[ST.paren].end = -1;
	5285
	5286	PL_reginput = locinput; /* Could be reset... */
	5287	REGCP_UNWIND(ST.cp);
	5288	/* Couldn't or didn't -- move forward. */
	5289	ST.oldloc = locinput;
	5290	if (utf8_target)
	5291	locinput += UTF8SKIP(locinput);
	5292	else
	5293	locinput++;
	5294	ST.count++;
	5295	curly_try_B_min_known:
	5296	/* find the next place where 'B' could work, then call B */
	5297	{
	5298	int n;
	5299	if (utf8_target) {
	5300	n = (ST.oldloc == locinput) ? 0 : 1;
	5301	if (ST.c1 == ST.c2) {
	5302	STRLEN len;
	5303	/* set n to utf8_distance(oldloc, locinput) */
	5304	while (locinput <= ST.maxpos &&
	5305	utf8n_to_uvchr((U8*)locinput,
	5306	UTF8_MAXBYTES, &len,
	5307	uniflags) != (UV)ST.c1) {
	5308	locinput += len;
	5309	n++;
	5310	}
	5311	}
	5312	else {
	5313	/* set n to utf8_distance(oldloc, locinput) */
	5314	while (locinput <= ST.maxpos) {
	5315	STRLEN len;
	5316	const UV c = utf8n_to_uvchr((U8*)locinput,
	5317	UTF8_MAXBYTES, &len,
	5318	uniflags);
	5319	if (c == (UV)ST.c1 \|\| c == (UV)ST.c2)
	5320	break;
	5321	locinput += len;
	5322	n++;
	5323	}
	5324	}
	5325	}
	5326	else {
	5327	if (ST.c1 == ST.c2) {
	5328	while (locinput <= ST.maxpos &&
	5329	UCHARAT(locinput) != ST.c1)
	5330	locinput++;
	5331	}
	5332	else {
	5333	while (locinput <= ST.maxpos
	5334	&& UCHARAT(locinput) != ST.c1
	5335	&& UCHARAT(locinput) != ST.c2)
	5336	locinput++;
	5337	}
	5338	n = locinput - ST.oldloc;
	5339	}
	5340	if (locinput > ST.maxpos)
	5341	sayNO;
	5342	/* PL_reginput == oldloc now */
	5343	if (n) {
	5344	ST.count += n;
	5345	if (regrepeat(rex, ST.A, n, depth) < n)
	5346	sayNO;
	5347	}
	5348	PL_reginput = locinput;
	5349	CURLY_SETPAREN(ST.paren, ST.count);
	5350	if (cur_eval && cur_eval->u.eval.close_paren &&
	5351	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5352	goto fake_end;
	5353	}
	5354	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
	5355	}
	5356	/* NOTREACHED */
	5357
	5358
	5359	case CURLY_B_min_fail:
	5360	/* failed to find B in a non-greedy match where c1,c2 invalid */
	5361	if (ST.paren && ST.count)
	5362	PL_regoffs[ST.paren].end = -1;
	5363
	5364	REGCP_UNWIND(ST.cp);
	5365	/* failed -- move forward one */
	5366	PL_reginput = locinput;
	5367	if (regrepeat(rex, ST.A, 1, depth)) {
	5368	ST.count++;
	5369	locinput = PL_reginput;
	5370	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	5371	ST.count > 0)) /* count overflow ? */
	5372	{
	5373	curly_try_B_min:
	5374	CURLY_SETPAREN(ST.paren, ST.count);
	5375	if (cur_eval && cur_eval->u.eval.close_paren &&
	5376	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5377	goto fake_end;
	5378	}
	5379	PUSH_STATE_GOTO(CURLY_B_min, ST.B);
	5380	}
	5381	}
	5382	sayNO;
	5383	/* NOTREACHED */
	5384
	5385
	5386	curly_try_B_max:
	5387	/* a successful greedy match: now try to match B */
	5388	if (cur_eval && cur_eval->u.eval.close_paren &&
	5389	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5390	goto fake_end;
	5391	}
	5392	{
	5393	UV c = 0;
	5394	if (ST.c1 != CHRTEST_VOID)
	5395	c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
	5396	UTF8_MAXBYTES, 0, uniflags)
	5397	: (UV) UCHARAT(PL_reginput);
	5398	/* If it could work, try it. */
	5399	if (ST.c1 == CHRTEST_VOID \|\| c == (UV)ST.c1 \|\| c == (UV)ST.c2) {
	5400	CURLY_SETPAREN(ST.paren, ST.count);
	5401	PUSH_STATE_GOTO(CURLY_B_max, ST.B);
	5402	/* NOTREACHED */
	5403	}
	5404	}
	5405	/* FALL THROUGH */
	5406	case CURLY_B_max_fail:
	5407	/* failed to find B in a greedy match */
	5408	if (ST.paren && ST.count)
	5409	PL_regoffs[ST.paren].end = -1;
	5410
	5411	REGCP_UNWIND(ST.cp);
	5412	/* back up. */
	5413	if (--ST.count < ST.min)
	5414	sayNO;
	5415	PL_reginput = locinput = HOPc(locinput, -1);
	5416	goto curly_try_B_max;
	5417
	5418	#undef ST
	5419
	5420	case END:
	5421	fake_end:
	5422	if (cur_eval) {
	5423	/* we've just finished A in /(??{A})B/; now continue with B */
	5424	I32 tmpix;
	5425	st->u.eval.toggle_reg_flags
	5426	= cur_eval->u.eval.toggle_reg_flags;
	5427	PL_reg_flags ^= st->u.eval.toggle_reg_flags;
	5428
	5429	st->u.eval.prev_rex = rex_sv; /* inner */
	5430	SETREX(rex_sv,cur_eval->u.eval.prev_rex);
	5431	rex = (struct regexp *)SvANY(rex_sv);
	5432	rexi = RXi_GET(rex);
	5433	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	5434	(void)ReREFCNT_inc(rex_sv);
	5435	st->u.eval.cp = regcppush(0); /* Save all the positions. */
	5436
	5437	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	5438	PL_reglastparen = &rex->lastparen;
	5439	PL_reglastcloseparen = &rex->lastcloseparen;
	5440
	5441	REGCP_SET(st->u.eval.lastcp);
	5442	PL_reginput = locinput;
	5443
	5444	/* Restore parens of the outer rex without popping the
	5445	* savestack */
	5446	tmpix = PL_savestack_ix;
	5447	PL_savestack_ix = cur_eval->u.eval.lastcp;
	5448	regcppop(rex);
	5449	PL_savestack_ix = tmpix;
	5450
	5451	st->u.eval.prev_eval = cur_eval;
	5452	cur_eval = cur_eval->u.eval.prev_eval;
	5453	DEBUG_EXECUTE_r(
	5454	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	5455	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	5456	if ( nochange_depth )
	5457	nochange_depth--;
	5458
	5459	PUSH_YES_STATE_GOTO(EVAL_AB,
	5460	st->u.eval.prev_eval->u.eval.B); /* match B */
	5461	}
	5462
	5463	if (locinput < reginfo->till) {
	5464	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5465	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	5466	PL_colors[4],
	5467	(long)(locinput - PL_reg_starttry),
	5468	(long)(reginfo->till - PL_reg_starttry),
	5469	PL_colors[5]));
	5470
	5471	sayNO_SILENT; /* Cannot match: too short. */
	5472	}
	5473	PL_reginput = locinput; /* put where regtry can find it */
	5474	sayYES; /* Success! */
	5475
	5476	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	5477	DEBUG_EXECUTE_r(
	5478	PerlIO_printf(Perl_debug_log,
	5479	"%*s %ssubpattern success...%s\n",
	5480	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	5481	PL_reginput = locinput; /* put where regtry can find it */
	5482	sayYES; /* Success! */
	5483
	5484	#undef ST
	5485	#define ST st->u.ifmatch
	5486
	5487	case SUSPEND: /* (?>A) */
	5488	ST.wanted = 1;
	5489	PL_reginput = locinput;
	5490	goto do_ifmatch;
	5491
	5492	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	5493	ST.wanted = 0;
	5494	goto ifmatch_trivial_fail_test;
	5495
	5496	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	5497	ST.wanted = 1;
	5498	ifmatch_trivial_fail_test:
	5499	if (scan->flags) {
	5500	char * const s = HOPBACKc(locinput, scan->flags);
	5501	if (!s) {
	5502	/* trivial fail */
	5503	if (logical) {
	5504	logical = 0;
	5505	sw = 1 - cBOOL(ST.wanted);
	5506	}
	5507	else if (ST.wanted)
	5508	sayNO;
	5509	next = scan + ARG(scan);
	5510	if (next == scan)
	5511	next = NULL;
	5512	break;
	5513	}
	5514	PL_reginput = s;
	5515	}
	5516	else
	5517	PL_reginput = locinput;
	5518
	5519	do_ifmatch:
	5520	ST.me = scan;
	5521	ST.logical = logical;
	5522	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	5523
	5524	/* execute body of (?...A) */
	5525	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
	5526	/* NOTREACHED */
	5527
	5528	case IFMATCH_A_fail: /* body of (?...A) failed */
	5529	ST.wanted = !ST.wanted;
	5530	/* FALL THROUGH */
	5531
	5532	case IFMATCH_A: /* body of (?...A) succeeded */
	5533	if (ST.logical) {
	5534	sw = cBOOL(ST.wanted);
	5535	}
	5536	else if (!ST.wanted)
	5537	sayNO;
	5538
	5539	if (OP(ST.me) == SUSPEND)
	5540	locinput = PL_reginput;
	5541	else {
	5542	locinput = PL_reginput = st->locinput;
	5543	nextchr = UCHARAT(locinput);
	5544	}
	5545	scan = ST.me + ARG(ST.me);
	5546	if (scan == ST.me)
	5547	scan = NULL;
	5548	continue; /* execute B */
	5549
	5550	#undef ST
	5551
	5552	case LONGJMP:
	5553	next = scan + ARG(scan);
	5554	if (next == scan)
	5555	next = NULL;
	5556	break;
	5557	case COMMIT:
	5558	reginfo->cutpoint = PL_regeol;
	5559	/* FALLTHROUGH */
	5560	case PRUNE:
	5561	PL_reginput = locinput;
	5562	if (!scan->flags)
	5563	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5564	PUSH_STATE_GOTO(COMMIT_next,next);
	5565	/* NOTREACHED */
	5566	case COMMIT_next_fail:
	5567	no_final = 1;
	5568	/* FALLTHROUGH */
	5569	case OPFAIL:
	5570	sayNO;
	5571	/* NOTREACHED */
	5572
	5573	#define ST st->u.mark
	5574	case MARKPOINT:
	5575	ST.prev_mark = mark_state;
	5576	ST.mark_name = sv_commit = sv_yes_mark
	5577	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5578	mark_state = st;
	5579	ST.mark_loc = PL_reginput = locinput;
	5580	PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
	5581	/* NOTREACHED */
	5582	case MARKPOINT_next:
	5583	mark_state = ST.prev_mark;
	5584	sayYES;
	5585	/* NOTREACHED */
	5586	case MARKPOINT_next_fail:
	5587	if (popmark && sv_eq(ST.mark_name,popmark))
	5588	{
	5589	if (ST.mark_loc > startpoint)
	5590	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5591	popmark = NULL; /* we found our mark */
	5592	sv_commit = ST.mark_name;
	5593
	5594	DEBUG_EXECUTE_r({
	5595	PerlIO_printf(Perl_debug_log,
	5596	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	5597	REPORT_CODE_OFF+depth*2, "",
	5598	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	5599	});
	5600	}
	5601	mark_state = ST.prev_mark;
	5602	sv_yes_mark = mark_state ?
	5603	mark_state->u.mark.mark_name : NULL;
	5604	sayNO;
	5605	/* NOTREACHED */
	5606	case SKIP:
	5607	PL_reginput = locinput;
	5608	if (scan->flags) {
	5609	/* (SKIP) : if we fail we cut here/
	5610	ST.mark_name = NULL;
	5611	ST.mark_loc = locinput;
	5612	PUSH_STATE_GOTO(SKIP_next,next);
	5613	} else {
	5614	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	5615	otherwise do nothing. Meaning we need to scan
	5616	*/
	5617	regmatch_state *cur = mark_state;
	5618	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5619
	5620	while (cur) {
	5621	if ( sv_eq( cur->u.mark.mark_name,
	5622	find ) )
	5623	{
	5624	ST.mark_name = find;
	5625	PUSH_STATE_GOTO( SKIP_next, next );
	5626	}
	5627	cur = cur->u.mark.prev_mark;
	5628	}
	5629	}
	5630	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	5631	break;
	5632	case SKIP_next_fail:
	5633	if (ST.mark_name) {
	5634	/* (*CUT:NAME) - Set up to search for the name as we
	5635	collapse the stack*/
	5636	popmark = ST.mark_name;
	5637	} else {
	5638	/* (CUT) - No name, we cut here./
	5639	if (ST.mark_loc > startpoint)
	5640	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5641	/* but we set sv_commit to latest mark_name if there
	5642	is one so they can test to see how things lead to this
	5643	cut */
	5644	if (mark_state)
	5645	sv_commit=mark_state->u.mark.mark_name;
	5646	}
	5647	no_final = 1;
	5648	sayNO;
	5649	/* NOTREACHED */
	5650	#undef ST
	5651	case FOLDCHAR:
	5652	n = ARG(scan);
	5653	if ( n == (U32)what_len_TRICKYFOLD(locinput,utf8_target,ln) ) {
	5654	locinput += ln;
	5655	} else if ( LATIN_SMALL_LETTER_SHARP_S == n && !utf8_target && !UTF_PATTERN ) {
	5656	sayNO;
	5657	} else {
	5658	U8 folded[UTF8_MAXBYTES_CASE+1];
	5659	STRLEN foldlen;
	5660	const char * const l = locinput;
	5661	char *e = PL_regeol;
	5662	to_uni_fold(n, folded, &foldlen);
	5663
	5664	if (! foldEQ_utf8((const char*) folded, 0, foldlen, 1,
	5665	l, &e, 0, utf8_target)) {
	5666	sayNO;
	5667	}
	5668	locinput = e;
	5669	}
	5670	nextchr = UCHARAT(locinput);
	5671	break;
	5672	case LNBREAK:
	5673	if ((n=is_LNBREAK(locinput,utf8_target))) {
	5674	locinput += n;
	5675	nextchr = UCHARAT(locinput);
	5676	} else
	5677	sayNO;
	5678	break;
	5679
	5680	#define CASE_CLASS(nAmE) \
	5681	case nAmE: \
	5682	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5683	locinput += n; \
	5684	nextchr = UCHARAT(locinput); \
	5685	} else \
	5686	sayNO; \
	5687	break; \
	5688	case N##nAmE: \
	5689	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5690	sayNO; \
	5691	} else { \
	5692	locinput += UTF8SKIP(locinput); \
	5693	nextchr = UCHARAT(locinput); \
	5694	} \
	5695	break
	5696
	5697	CASE_CLASS(VERTWS);
	5698	CASE_CLASS(HORIZWS);
	5699	#undef CASE_CLASS
	5700
	5701	default:
	5702	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	5703	PTR2UV(scan), OP(scan));
	5704	Perl_croak(aTHX_ "regexp memory corruption");
	5705
	5706	} /* end switch */
	5707
	5708	/* switch break jumps here */
	5709	scan = next; /* prepare to execute the next op and ... */
	5710	continue; /* ... jump back to the top, reusing st */
	5711	/* NOTREACHED */
	5712
	5713	push_yes_state:
	5714	/* push a state that backtracks on success */
	5715	st->u.yes.prev_yes_state = yes_state;
	5716	yes_state = st;
	5717	/* FALL THROUGH */
	5718	push_state:
	5719	/* push a new regex state, then continue at scan */
	5720	{
	5721	regmatch_state *newst;
	5722
	5723	DEBUG_STACK_r({
	5724	regmatch_state *cur = st;
	5725	regmatch_state *curyes = yes_state;
	5726	int curd = depth;
	5727	regmatch_slab *slab = PL_regmatch_slab;
	5728	for (;curd > -1;cur--,curd--) {
	5729	if (cur < SLAB_FIRST(slab)) {
	5730	slab = slab->prev;
	5731	cur = SLAB_LAST(slab);
	5732	}
	5733	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	5734	REPORT_CODE_OFF + 2 + depth * 2,"",
	5735	curd, PL_reg_name[cur->resume_state],
	5736	(curyes == cur) ? "yes" : ""
	5737	);
	5738	if (curyes == cur)
	5739	curyes = cur->u.yes.prev_yes_state;
	5740	}
	5741	} else
	5742	DEBUG_STATE_pp("push")
	5743	);
	5744	depth++;
	5745	st->locinput = locinput;
	5746	newst = st+1;
	5747	if (newst > SLAB_LAST(PL_regmatch_slab))
	5748	newst = S_push_slab(aTHX);
	5749	PL_regmatch_state = newst;
	5750
	5751	locinput = PL_reginput;
	5752	nextchr = UCHARAT(locinput);
	5753	st = newst;
	5754	continue;
	5755	/* NOTREACHED */
	5756	}
	5757	}
	5758
	5759	/*
	5760	* We get here only if there's trouble -- normally "case END" is
	5761	* the terminating point.
	5762	*/
	5763	Perl_croak(aTHX_ "corrupted regexp pointers");
	5764	/NOTREACHED/
	5765	sayNO;
	5766
	5767	yes:
	5768	if (yes_state) {
	5769	/* we have successfully completed a subexpression, but we must now
	5770	* pop to the state marked by yes_state and continue from there */
	5771	assert(st != yes_state);
	5772	#ifdef DEBUGGING
	5773	while (st != yes_state) {
	5774	st--;
	5775	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5776	PL_regmatch_slab = PL_regmatch_slab->prev;
	5777	st = SLAB_LAST(PL_regmatch_slab);
	5778	}
	5779	DEBUG_STATE_r({
	5780	if (no_final) {
	5781	DEBUG_STATE_pp("pop (no final)");
	5782	} else {
	5783	DEBUG_STATE_pp("pop (yes)");
	5784	}
	5785	});
	5786	depth--;
	5787	}
	5788	#else
	5789	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	5790	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	5791	{
	5792	/* not in this slab, pop slab */
	5793	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	5794	PL_regmatch_slab = PL_regmatch_slab->prev;
	5795	st = SLAB_LAST(PL_regmatch_slab);
	5796	}
	5797	depth -= (st - yes_state);
	5798	#endif
	5799	st = yes_state;
	5800	yes_state = st->u.yes.prev_yes_state;
	5801	PL_regmatch_state = st;
	5802
	5803	if (no_final) {
	5804	locinput= st->locinput;
	5805	nextchr = UCHARAT(locinput);
	5806	}
	5807	state_num = st->resume_state + no_final;
	5808	goto reenter_switch;
	5809	}
	5810
	5811	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	5812	PL_colors[4], PL_colors[5]));
	5813
	5814	if (PL_reg_eval_set) {
	5815	/* each successfully executed (?{...}) block does the equivalent of
	5816	* local $^R = do {...}
	5817	* When popping the save stack, all these locals would be undone;
	5818	* bypass this by setting the outermost saved $^R to the latest
	5819	* value */
	5820	if (oreplsv != GvSV(PL_replgv))
	5821	sv_setsv(oreplsv, GvSV(PL_replgv));
	5822	}
	5823	result = 1;
	5824	goto final_exit;
	5825
	5826	no:
	5827	DEBUG_EXECUTE_r(
	5828	PerlIO_printf(Perl_debug_log,
	5829	"%*s %sfailed...%s\n",
	5830	REPORT_CODE_OFF+depth*2, "",
	5831	PL_colors[4], PL_colors[5])
	5832	);
	5833
	5834	no_silent:
	5835	if (no_final) {
	5836	if (yes_state) {
	5837	goto yes;
	5838	} else {
	5839	goto final_exit;
	5840	}
	5841	}
	5842	if (depth) {
	5843	/* there's a previous state to backtrack to */
	5844	st--;
	5845	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5846	PL_regmatch_slab = PL_regmatch_slab->prev;
	5847	st = SLAB_LAST(PL_regmatch_slab);
	5848	}
	5849	PL_regmatch_state = st;
	5850	locinput= st->locinput;
	5851	nextchr = UCHARAT(locinput);
	5852
	5853	DEBUG_STATE_pp("pop");
	5854	depth--;
	5855	if (yes_state == st)
	5856	yes_state = st->u.yes.prev_yes_state;
	5857
	5858	state_num = st->resume_state + 1; /* failure = success + 1 */
	5859	goto reenter_switch;
	5860	}
	5861	result = 0;
	5862
	5863	final_exit:
	5864	if (rex->intflags & PREGf_VERBARG_SEEN) {
	5865	SV *sv_err = get_sv("REGERROR", 1);
	5866	SV *sv_mrk = get_sv("REGMARK", 1);
	5867	if (result) {
	5868	sv_commit = &PL_sv_no;
	5869	if (!sv_yes_mark)
	5870	sv_yes_mark = &PL_sv_yes;
	5871	} else {
	5872	if (!sv_commit)
	5873	sv_commit = &PL_sv_yes;
	5874	sv_yes_mark = &PL_sv_no;
	5875	}
	5876	sv_setsv(sv_err, sv_commit);
	5877	sv_setsv(sv_mrk, sv_yes_mark);
	5878	}
	5879
	5880	/* clean up; in particular, free all slabs above current one */
	5881	LEAVE_SCOPE(oldsave);
	5882
	5883	return result;
	5884	}
	5885
	5886	/*
	5887	- regrepeat - repeatedly match something simple, report how many
	5888	*/
	5889	/*
	5890	* [This routine now assumes that it will only match on things of length 1.
	5891	* That was true before, but now we assume scan - reginput is the count,
	5892	* rather than incrementing count on every character. [Er, except utf8.]]
	5893	*/
	5894	STATIC I32
	5895	S_regrepeat(pTHX_ const regexp prog, const regnode p, I32 max, int depth)
	5896	{
	5897	dVAR;
	5898	register char *scan;
	5899	register I32 c;
	5900	register char *loceol = PL_regeol;
	5901	register I32 hardcount = 0;
	5902	register bool utf8_target = PL_reg_match_utf8;
	5903	UV utf8_flags;
	5904	#ifndef DEBUGGING
	5905	PERL_UNUSED_ARG(depth);
	5906	#endif
	5907
	5908	PERL_ARGS_ASSERT_REGREPEAT;
	5909
	5910	scan = PL_reginput;
	5911	if (max == REG_INFTY)
	5912	max = I32_MAX;
	5913	else if (max < loceol - scan)
	5914	loceol = scan + max;
	5915	switch (OP(p)) {
	5916	case REG_ANY:
	5917	if (utf8_target) {
	5918	loceol = PL_regeol;
	5919	while (scan < loceol && hardcount < max && *scan != '\n') {
	5920	scan += UTF8SKIP(scan);
	5921	hardcount++;
	5922	}
	5923	} else {
	5924	while (scan < loceol && *scan != '\n')
	5925	scan++;
	5926	}
	5927	break;
	5928	case SANY:
	5929	if (utf8_target) {
	5930	loceol = PL_regeol;
	5931	while (scan < loceol && hardcount < max) {
	5932	scan += UTF8SKIP(scan);
	5933	hardcount++;
	5934	}
	5935	}
	5936	else
	5937	scan = loceol;
	5938	break;
	5939	case CANY:
	5940	scan = loceol;
	5941	break;
	5942	case EXACT:
	5943	/* To get here, EXACTish nodes must have byte length == 1. That
	5944	* means they match only characters in the string that can be expressed
	5945	* as a single byte. For non-utf8 strings, that means a simple match.
	5946	* For utf8 strings, the character matched must be an invariant, or
	5947	* downgradable to a single byte. The pattern's utf8ness is
	5948	* irrelevant, as since it's a single byte, it either isn't utf8, or if
	5949	* it is, it's an invariant */
	5950
	5951	c = (U8)*STRING(p);
	5952	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	5953
	5954	if (! utf8_target \|\| UNI_IS_INVARIANT(c)) {
	5955	while (scan < loceol && UCHARAT(scan) == c) {
	5956	scan++;
	5957	}
	5958	}
	5959	else {
	5960
	5961	/* Here, the string is utf8, and the pattern char is different
	5962	* in utf8 than not, so can't compare them directly. Outside the
	5963	* loop, find find the two utf8 bytes that represent c, and then
	5964	* look for those in sequence in the utf8 string */
	5965	U8 high = UTF8_TWO_BYTE_HI(c);
	5966	U8 low = UTF8_TWO_BYTE_LO(c);
	5967	loceol = PL_regeol;
	5968
	5969	while (hardcount < max
	5970	&& scan + 1 < loceol
	5971	&& UCHARAT(scan) == high
	5972	&& UCHARAT(scan + 1) == low)
	5973	{
	5974	scan += 2;
	5975	hardcount++;
	5976	}
	5977	}
	5978	break;
	5979	case EXACTFA:
	5980	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	5981	goto do_exactf;
	5982
	5983	case EXACTFL:
	5984	PL_reg_flags \|= RF_tainted;
	5985	utf8_flags = FOLDEQ_UTF8_LOCALE;
	5986	goto do_exactf;
	5987
	5988	case EXACTF:
	5989	case EXACTFU:
	5990	utf8_flags = 0;
	5991
	5992	/* The comments for the EXACT case above apply as well to these fold
	5993	* ones */
	5994
	5995	do_exactf:
	5996	c = (U8)*STRING(p);
	5997	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	5998
	5999	if (utf8_target) { /* Use full Unicode fold matching */
	6000	char *tmpeol = loceol;
	6001	while (hardcount < max
	6002	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	6003	STRING(p), NULL, 1, cBOOL(UTF_PATTERN), utf8_flags))
	6004	{
	6005	scan = tmpeol;
	6006	tmpeol = loceol;
	6007	hardcount++;
	6008	}
	6009
	6010	/* XXX Note that the above handles properly the German sharp s in
	6011	* the pattern matching ss in the string. But it doesn't handle
	6012	* properly cases where the string contains say 'LIGATURE ff' and
	6013	* the pattern is 'f+'. This would require, say, a new function or
	6014	* revised interface to foldEQ_utf8(), in which the maximum number
	6015	* of characters to match could be passed and it would return how
	6016	* many actually did. This is just one of many cases where
	6017	* multi-char folds don't work properly, and so the fix is being
	6018	* deferred */
	6019	}
	6020	else {
	6021	U8 folded;
	6022
	6023	/* Here, the string isn't utf8 and c is a single byte; and either
	6024	* the pattern isn't utf8 or c is an invariant, so its utf8ness
	6025	* doesn't affect c. Can just do simple comparisons for exact or
	6026	* fold matching. */
	6027	switch (OP(p)) {
	6028	case EXACTF: folded = PL_fold[c]; break;
	6029	case EXACTFA:
	6030	case EXACTFU: folded = PL_fold_latin1[c]; break;
	6031	case EXACTFL: folded = PL_fold_locale[c]; break;
	6032	default: Perl_croak(aTHX_ "panic: Unexpected op %u", OP(p));
	6033	}
	6034	while (scan < loceol &&
	6035	(UCHARAT(scan) == c \|\| UCHARAT(scan) == folded))
	6036	{
	6037	scan++;
	6038	}
	6039	}
	6040	break;
	6041	case ANYOFV:
	6042	case ANYOF:
	6043	if (utf8_target \|\| OP(p) == ANYOFV) {
	6044	STRLEN inclasslen;
	6045	loceol = PL_regeol;
	6046	inclasslen = loceol - scan;
	6047	while (hardcount < max
	6048	&& ((inclasslen = loceol - scan) > 0)
	6049	&& reginclass(prog, p, (U8*)scan, &inclasslen, utf8_target))
	6050	{
	6051	scan += inclasslen;
	6052	hardcount++;
	6053	}
	6054	} else {
	6055	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	6056	scan++;
	6057	}
	6058	break;
	6059	case ALNUMU:
	6060	if (utf8_target) {
	6061	utf8_wordchar:
	6062	loceol = PL_regeol;
	6063	LOAD_UTF8_CHARCLASS_ALNUM();
	6064	while (hardcount < max && scan < loceol &&
	6065	swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6066	{
	6067	scan += UTF8SKIP(scan);
	6068	hardcount++;
	6069	}
	6070	} else {
	6071	while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
	6072	scan++;
	6073	}
	6074	}
	6075	break;
	6076	case ALNUM:
	6077	if (utf8_target)
	6078	goto utf8_wordchar;
	6079	while (scan < loceol && isALNUM((U8) *scan)) {
	6080	scan++;
	6081	}
	6082	break;
	6083	case ALNUMA:
	6084	while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
	6085	scan++;
	6086	}
	6087	break;
	6088	case ALNUML:
	6089	PL_reg_flags \|= RF_tainted;
	6090	if (utf8_target) {
	6091	loceol = PL_regeol;
	6092	while (hardcount < max && scan < loceol &&
	6093	isALNUM_LC_utf8((U8*)scan)) {
	6094	scan += UTF8SKIP(scan);
	6095	hardcount++;
	6096	}
	6097	} else {
	6098	while (scan < loceol && isALNUM_LC(*scan))
	6099	scan++;
	6100	}
	6101	break;
	6102	case NALNUMU:
	6103	if (utf8_target) {
	6104
	6105	utf8_Nwordchar:
	6106
	6107	loceol = PL_regeol;
	6108	LOAD_UTF8_CHARCLASS_ALNUM();
	6109	while (hardcount < max && scan < loceol &&
	6110	! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6111	{
	6112	scan += UTF8SKIP(scan);
	6113	hardcount++;
	6114	}
	6115	} else {
	6116	while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
	6117	scan++;
	6118	}
	6119	}
	6120	break;
	6121	case NALNUM:
	6122	if (utf8_target)
	6123	goto utf8_Nwordchar;
	6124	while (scan < loceol && ! isALNUM((U8) *scan)) {
	6125	scan++;
	6126	}
	6127	break;
	6128	case NALNUMA:
	6129	if (utf8_target) {
	6130	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6131	scan += UTF8SKIP(scan);
	6132	}
	6133	}
	6134	else {
	6135	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6136	scan++;
	6137	}
	6138	}
	6139	break;
	6140	case NALNUML:
	6141	PL_reg_flags \|= RF_tainted;
	6142	if (utf8_target) {
	6143	loceol = PL_regeol;
	6144	while (hardcount < max && scan < loceol &&
	6145	!isALNUM_LC_utf8((U8*)scan)) {
	6146	scan += UTF8SKIP(scan);
	6147	hardcount++;
	6148	}
	6149	} else {
	6150	while (scan < loceol && !isALNUM_LC(*scan))
	6151	scan++;
	6152	}
	6153	break;
	6154	case SPACEU:
	6155	if (utf8_target) {
	6156
	6157	utf8_space:
	6158
	6159	loceol = PL_regeol;
	6160	LOAD_UTF8_CHARCLASS_SPACE();
	6161	while (hardcount < max && scan < loceol &&
	6162	(*scan == ' ' \|\|
	6163	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6164	{
	6165	scan += UTF8SKIP(scan);
	6166	hardcount++;
	6167	}
	6168	break;
	6169	}
	6170	else {
	6171	while (scan < loceol && isSPACE_L1((U8) *scan)) {
	6172	scan++;
	6173	}
	6174	break;
	6175	}
	6176	case SPACE:
	6177	if (utf8_target)
	6178	goto utf8_space;
	6179
	6180	while (scan < loceol && isSPACE((U8) *scan)) {
	6181	scan++;
	6182	}
	6183	break;
	6184	case SPACEA:
	6185	while (scan < loceol && isSPACE_A((U8) *scan)) {
	6186	scan++;
	6187	}
	6188	break;
	6189	case SPACEL:
	6190	PL_reg_flags \|= RF_tainted;
	6191	if (utf8_target) {
	6192	loceol = PL_regeol;
	6193	while (hardcount < max && scan < loceol &&
	6194	isSPACE_LC_utf8((U8*)scan)) {
	6195	scan += UTF8SKIP(scan);
	6196	hardcount++;
	6197	}
	6198	} else {
	6199	while (scan < loceol && isSPACE_LC(*scan))
	6200	scan++;
	6201	}
	6202	break;
	6203	case NSPACEU:
	6204	if (utf8_target) {
	6205
	6206	utf8_Nspace:
	6207
	6208	loceol = PL_regeol;
	6209	LOAD_UTF8_CHARCLASS_SPACE();
	6210	while (hardcount < max && scan < loceol &&
	6211	! (*scan == ' ' \|\|
	6212	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6213	{
	6214	scan += UTF8SKIP(scan);
	6215	hardcount++;
	6216	}
	6217	break;
	6218	}
	6219	else {
	6220	while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
	6221	scan++;
	6222	}
	6223	}
	6224	break;
	6225	case NSPACE:
	6226	if (utf8_target)
	6227	goto utf8_Nspace;
	6228
	6229	while (scan < loceol && ! isSPACE((U8) *scan)) {
	6230	scan++;
	6231	}
	6232	break;
	6233	case NSPACEA:
	6234	if (utf8_target) {
	6235	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6236	scan += UTF8SKIP(scan);
	6237	}
	6238	}
	6239	else {
	6240	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6241	scan++;
	6242	}
	6243	}
	6244	break;
	6245	case NSPACEL:
	6246	PL_reg_flags \|= RF_tainted;
	6247	if (utf8_target) {
	6248	loceol = PL_regeol;
	6249	while (hardcount < max && scan < loceol &&
	6250	!isSPACE_LC_utf8((U8*)scan)) {
	6251	scan += UTF8SKIP(scan);
	6252	hardcount++;
	6253	}
	6254	} else {
	6255	while (scan < loceol && !isSPACE_LC(*scan))
	6256	scan++;
	6257	}
	6258	break;
	6259	case DIGIT:
	6260	if (utf8_target) {
	6261	loceol = PL_regeol;
	6262	LOAD_UTF8_CHARCLASS_DIGIT();
	6263	while (hardcount < max && scan < loceol &&
	6264	swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6265	scan += UTF8SKIP(scan);
	6266	hardcount++;
	6267	}
	6268	} else {
	6269	while (scan < loceol && isDIGIT(*scan))
	6270	scan++;
	6271	}
	6272	break;
	6273	case DIGITA:
	6274	while (scan < loceol && isDIGIT_A((U8) *scan)) {
	6275	scan++;
	6276	}
	6277	break;
	6278	case DIGITL:
	6279	PL_reg_flags \|= RF_tainted;
	6280	if (utf8_target) {
	6281	loceol = PL_regeol;
	6282	while (hardcount < max && scan < loceol &&
	6283	isDIGIT_LC_utf8((U8*)scan)) {
	6284	scan += UTF8SKIP(scan);
	6285	hardcount++;
	6286	}
	6287	} else {
	6288	while (scan < loceol && isDIGIT_LC(*scan))
	6289	scan++;
	6290	}
	6291	break;
	6292	case NDIGIT:
	6293	if (utf8_target) {
	6294	loceol = PL_regeol;
	6295	LOAD_UTF8_CHARCLASS_DIGIT();
	6296	while (hardcount < max && scan < loceol &&
	6297	!swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6298	scan += UTF8SKIP(scan);
	6299	hardcount++;
	6300	}
	6301	} else {
	6302	while (scan < loceol && !isDIGIT(*scan))
	6303	scan++;
	6304	}
	6305	break;
	6306	case NDIGITA:
	6307	if (utf8_target) {
	6308	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6309	scan += UTF8SKIP(scan);
	6310	}
	6311	}
	6312	else {
	6313	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6314	scan++;
	6315	}
	6316	}
	6317	break;
	6318	case NDIGITL:
	6319	PL_reg_flags \|= RF_tainted;
	6320	if (utf8_target) {
	6321	loceol = PL_regeol;
	6322	while (hardcount < max && scan < loceol &&
	6323	!isDIGIT_LC_utf8((U8*)scan)) {
	6324	scan += UTF8SKIP(scan);
	6325	hardcount++;
	6326	}
	6327	} else {
	6328	while (scan < loceol && !isDIGIT_LC(*scan))
	6329	scan++;
	6330	}
	6331	break;
	6332	case LNBREAK:
	6333	if (utf8_target) {
	6334	loceol = PL_regeol;
	6335	while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
	6336	scan += c;
	6337	hardcount++;
	6338	}
	6339	} else {
	6340	/*
	6341	LNBREAK can match two latin chars, which is ok,
	6342	because we have a null terminated string, but we
	6343	have to use hardcount in this situation
	6344	*/
	6345	while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
	6346	scan+=c;
	6347	hardcount++;
	6348	}
	6349	}
	6350	break;
	6351	case HORIZWS:
	6352	if (utf8_target) {
	6353	loceol = PL_regeol;
	6354	while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
	6355	scan += c;
	6356	hardcount++;
	6357	}
	6358	} else {
	6359	while (scan < loceol && is_HORIZWS_latin1(scan))
	6360	scan++;
	6361	}
	6362	break;
	6363	case NHORIZWS:
	6364	if (utf8_target) {
	6365	loceol = PL_regeol;
	6366	while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
	6367	scan += UTF8SKIP(scan);
	6368	hardcount++;
	6369	}
	6370	} else {
	6371	while (scan < loceol && !is_HORIZWS_latin1(scan))
	6372	scan++;
	6373
	6374	}
	6375	break;
	6376	case VERTWS:
	6377	if (utf8_target) {
	6378	loceol = PL_regeol;
	6379	while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
	6380	scan += c;
	6381	hardcount++;
	6382	}
	6383	} else {
	6384	while (scan < loceol && is_VERTWS_latin1(scan))
	6385	scan++;
	6386
	6387	}
	6388	break;
	6389	case NVERTWS:
	6390	if (utf8_target) {
	6391	loceol = PL_regeol;
	6392	while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
	6393	scan += UTF8SKIP(scan);
	6394	hardcount++;
	6395	}
	6396	} else {
	6397	while (scan < loceol && !is_VERTWS_latin1(scan))
	6398	scan++;
	6399
	6400	}
	6401	break;
	6402
	6403	default: /* Called on something of 0 width. */
	6404	break; /* So match right here or not at all. */
	6405	}
	6406
	6407	if (hardcount)
	6408	c = hardcount;
	6409	else
	6410	c = scan - PL_reginput;
	6411	PL_reginput = scan;
	6412
	6413	DEBUG_r({
	6414	GET_RE_DEBUG_FLAGS_DECL;
	6415	DEBUG_EXECUTE_r({
	6416	SV * const prop = sv_newmortal();
	6417	regprop(prog, prop, p);
	6418	PerlIO_printf(Perl_debug_log,
	6419	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	6420	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	6421	});
	6422	});
	6423
	6424	return(c);
	6425	}
	6426
	6427
	6428	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	6429	/*
	6430	- regclass_swash - prepare the utf8 swash
	6431	*/
	6432
	6433	SV *
	6434	Perl_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV listsvp, SV altsvp)
	6435	{
	6436	dVAR;
	6437	SV *sw = NULL;
	6438	SV *si = NULL;
	6439	SV *alt = NULL;
	6440	RXi_GET_DECL(prog,progi);
	6441	const struct reg_data * const data = prog ? progi->data : NULL;
	6442
	6443	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	6444
	6445	assert(ANYOF_NONBITMAP(node));
	6446
	6447	if (data && data->count) {
	6448	const U32 n = ARG(node);
	6449
	6450	if (data->what[n] == 's') {
	6451	SV * const rv = MUTABLE_SV(data->data[n]);
	6452	AV * const av = MUTABLE_AV(SvRV(rv));
	6453	SV **const ary = AvARRAY(av);
	6454	SV a, b;
	6455
	6456	/* See the end of regcomp.c:S_regclass() for
	6457	* documentation of these array elements. */
	6458
	6459	si = *ary;
	6460	a = SvROK(ary[1]) ? &ary[1] : NULL;
	6461	b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
	6462
	6463	if (a)
	6464	sw = *a;
	6465	else if (si && doinit) {
	6466	sw = swash_init("utf8", "", si, 1, 0);
	6467	(void)av_store(av, 1, sw);
	6468	}
	6469	if (b)
	6470	alt = *b;
	6471	}
	6472	}
	6473
	6474	if (listsvp)
	6475	*listsvp = si;
	6476	if (altsvp)
	6477	*altsvp = alt;
	6478
	6479	return sw;
	6480	}
	6481	#endif
	6482
	6483	/*
	6484	- reginclass - determine if a character falls into a character class
	6485
	6486	n is the ANYOF regnode
	6487	p is the target string
	6488	lenp is pointer to the maximum number of bytes of how far to go in p
	6489	(This is assumed wthout checking to always be at least the current
	6490	character's size)
	6491	utf8_target tells whether p is in UTF-8.
	6492
	6493	Returns true if matched; false otherwise. If lenp is not NULL, on return
	6494	from a successful match, the value it points to will be updated to how many
	6495	bytes in p were matched. If there was no match, the value is undefined,
	6496	possibly changed from the input.
	6497
	6498	Note that this can be a synthetic start class, a combination of various
	6499	nodes, so things you think might be mutually exclusive, such as locale,
	6500	aren't. It can match both locale and non-locale
	6501
	6502	*/
	6503
	6504	STATIC bool
	6505	S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target)
	6506	{
	6507	dVAR;
	6508	const char flags = ANYOF_FLAGS(n);
	6509	bool match = FALSE;
	6510	UV c = *p;
	6511	STRLEN c_len = 0;
	6512	STRLEN maxlen;
	6513
	6514	PERL_ARGS_ASSERT_REGINCLASS;
	6515
	6516	/* If c is not already the code point, get it */
	6517	if (utf8_target && !UTF8_IS_INVARIANT(c)) {
	6518	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
	6519	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	6520	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	6521	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	6522	* UTF8_ALLOW_FFFF */
	6523	if (c_len == (STRLEN)-1)
	6524	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	6525	}
	6526	else {
	6527	c_len = 1;
	6528	}
	6529
	6530	/* Use passed in max length, or one character if none passed in or less
	6531	* than one character. And assume will match just one character. This is
	6532	* overwritten later if matched more. */
	6533	if (lenp) {
	6534	maxlen = (lenp > c_len) ? lenp : c_len;
	6535	*lenp = c_len;
	6536
	6537	}
	6538	else {
	6539	maxlen = c_len;
	6540	}
	6541
	6542	/* If this character is potentially in the bitmap, check it */
	6543	if (c < 256) {
	6544	if (ANYOF_BITMAP_TEST(n, c))
	6545	match = TRUE;
	6546	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	6547	&& ! utf8_target
	6548	&& ! isASCII(c))
	6549	{
	6550	match = TRUE;
	6551	}
	6552
	6553	else if (flags & ANYOF_LOCALE) {
	6554	PL_reg_flags \|= RF_tainted;
	6555
	6556	if ((flags & ANYOF_LOC_NONBITMAP_FOLD)
	6557	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	6558	{
	6559	match = TRUE;
	6560	}
	6561	else if (ANYOF_CLASS_TEST_ANY_SET(n) &&
	6562	((ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) \|\|
	6563	(ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) \|\|
	6564	(ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) \|\|
	6565	(ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) \|\|
	6566	(ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) \|\|
	6567	(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) \|\|
	6568	(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) \|\|
	6569	(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) \|\|
	6570	(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) \|\|
	6571	(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) \|\|
	6572	(ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) \|\|
	6573	(ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) \|\|
	6574	(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) \|\|
	6575	(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) \|\|
	6576	(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) \|\|
	6577	(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) \|\|
	6578	(ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) \|\|
	6579	(ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) \|\|
	6580	(ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) \|\|
	6581	(ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) \|\|
	6582	(ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) \|\|
	6583	(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) \|\|
	6584	(ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) \|\|
	6585	(ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) \|\|
	6586	(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) \|\|
	6587	(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) \|\|
	6588	(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) \|\|
	6589	(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) \|\|
	6590	(ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) \|\|
	6591	(ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
	6592	) /* How's that for a conditional? */
	6593	) {
	6594	match = TRUE;
	6595	}
	6596	}
	6597	}
	6598
	6599	/* If the bitmap didn't (or couldn't) match, and something outside the
	6600	* bitmap could match, try that. Locale nodes specifiy completely the
	6601	* behavior of code points in the bit map (otherwise, a utf8 target would
	6602	* cause them to be treated as Unicode and not locale), except in
	6603	* the very unlikely event when this node is a synthetic start class, which
	6604	* could be a combination of locale and non-locale nodes. So allow locale
	6605	* to match for the synthetic start class, which will give a false
	6606	* positive that will be resolved when the match is done again as not part
	6607	* of the synthetic start class */
	6608	if (!match) {
	6609	if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
	6610	match = TRUE; /* Everything above 255 matches */
	6611	}
	6612	else if (ANYOF_NONBITMAP(n)
	6613	&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
	6614	\|\| (utf8_target
	6615	&& (c >=256
	6616	\|\| (! (flags & ANYOF_LOCALE))
	6617	\|\| (flags & ANYOF_IS_SYNTHETIC)))))
	6618	{
	6619	AV *av;
	6620	SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
	6621
	6622	if (sw) {
	6623	U8 * utf8_p;
	6624	if (utf8_target) {
	6625	utf8_p = (U8 *) p;
	6626	} else {
	6627
	6628	/* Not utf8. Convert as much of the string as available up
	6629	* to the limit of how far the (single) character in the
	6630	* pattern can possibly match (no need to go further). If
	6631	* the node is a straight ANYOF or not folding, it can't
	6632	* match more than one. Otherwise, It can match up to how
	6633	* far a single char can fold to. Since not utf8, each
	6634	* character is a single byte, so the max it can be in
	6635	* bytes is the same as the max it can be in characters */
	6636	STRLEN len = (OP(n) == ANYOF
	6637	\|\| ! (flags & ANYOF_LOC_NONBITMAP_FOLD))
	6638	? 1
	6639	: (maxlen < UTF8_MAX_FOLD_CHAR_EXPAND)
	6640	? maxlen
	6641	: UTF8_MAX_FOLD_CHAR_EXPAND;
	6642	utf8_p = bytes_to_utf8(p, &len);
	6643	}
	6644
	6645	if (swash_fetch(sw, utf8_p, TRUE))
	6646	match = TRUE;
	6647	else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
	6648
	6649	/* Here, we need to test if the fold of the target string
	6650	* matches. The non-multi char folds have all been moved to
	6651	* the compilation phase, and the multi-char folds have
	6652	* been stored by regcomp into 'av'; we linearly check to
	6653	* see if any match the target string (folded). We know
	6654	* that the originals were each one character, but we don't
	6655	* currently know how many characters/bytes each folded to,
	6656	* except we do know that there are small limits imposed by
	6657	* Unicode. XXX A performance enhancement would be to have
	6658	* regcomp.c store the max number of chars/bytes that are
	6659	* in an av entry, as, say the 0th element. Even better
	6660	* would be to have a hash of the few characters that can
	6661	* start a multi-char fold to the max number of chars of
	6662	* those folds.
	6663	*
	6664	* If there is a match, we will need to advance (if lenp is
	6665	* specified) the match pointer in the target string. But
	6666	* what we are comparing here isn't that string directly,
	6667	* but its fold, whose length may differ from the original.
	6668	* As we go along in constructing the fold, therefore, we
	6669	* create a map so that we know how many bytes in the
	6670	* source to advance given that we have matched a certain
	6671	* number of bytes in the fold. This map is stored in
	6672	* 'map_fold_len_back'. Let n mean the number of bytes in
	6673	* the fold of the first character that we are folding.
	6674	* Then map_fold_len_back[n] is set to the number of bytes
	6675	* in that first character. Similarly let m be the
	6676	* corresponding number for the second character to be
	6677	* folded. Then map_fold_len_back[n+m] is set to the
	6678	* number of bytes occupied by the first two source
	6679	* characters. ... */
	6680	U8 map_fold_len_back[UTF8_MAXBYTES_CASE+1] = { 0 };
	6681	U8 folded[UTF8_MAXBYTES_CASE+1];
	6682	STRLEN foldlen = 0; /* num bytes in fold of 1st char */
	6683	STRLEN total_foldlen = 0; /* num bytes in fold of all
	6684	chars */
	6685
	6686	if (OP(n) == ANYOF \|\| maxlen == 1 \|\| ! lenp \|\| ! av) {
	6687
	6688	/* Here, only need to fold the first char of the target
	6689	* string. It the source wasn't utf8, is 1 byte long */
	6690	to_utf8_fold(utf8_p, folded, &foldlen);
	6691	total_foldlen = foldlen;
	6692	map_fold_len_back[foldlen] = (utf8_target)
	6693	? UTF8SKIP(utf8_p)
	6694	: 1;
	6695	}
	6696	else {
	6697
	6698	/* Here, need to fold more than the first char. Do so
	6699	* up to the limits */
	6700	U8* source_ptr = utf8_p; /* The source for the fold
	6701	is the regex target
	6702	string */
	6703	U8* folded_ptr = folded;
	6704	U8* e = utf8_p + maxlen; /* Can't go beyond last
	6705	available byte in the
	6706	target string */
	6707	U8 i;
	6708	for (i = 0;
	6709	i < UTF8_MAX_FOLD_CHAR_EXPAND && source_ptr < e;
	6710	i++)
	6711	{
	6712
	6713	/* Fold the next character */
	6714	U8 this_char_folded[UTF8_MAXBYTES_CASE+1];
	6715	STRLEN this_char_foldlen;
	6716	to_utf8_fold(source_ptr,
	6717	this_char_folded,
	6718	&this_char_foldlen);
	6719
	6720	/* Bail if it would exceed the byte limit for
	6721	* folding a single char. */
	6722	if (this_char_foldlen + folded_ptr - folded >
	6723	UTF8_MAXBYTES_CASE)
	6724	{
	6725	break;
	6726	}
	6727
	6728	/* Add the fold of this character */
	6729	Copy(this_char_folded,
	6730	folded_ptr,
	6731	this_char_foldlen,
	6732	U8);
	6733	source_ptr += UTF8SKIP(source_ptr);
	6734	folded_ptr += this_char_foldlen;
	6735	total_foldlen = folded_ptr - folded;
	6736
	6737	/* Create map from the number of bytes in the fold
	6738	* back to the number of bytes in the source. If
	6739	* the source isn't utf8, the byte count is just
	6740	* the number of characters so far */
	6741	map_fold_len_back[total_foldlen]
	6742	= (utf8_target)
	6743	? source_ptr - utf8_p
	6744	: i + 1;
	6745	}
	6746	*folded_ptr = '\0';
	6747	}
	6748
	6749
	6750	/* Do the linear search to see if the fold is in the list
	6751	* of multi-char folds. */
	6752	if (av) {
	6753	I32 i;
	6754	for (i = 0; i <= av_len(av); i++) {
	6755	SV* const sv = *av_fetch(av, i, FALSE);
	6756	STRLEN len;
	6757	const char * const s = SvPV_const(sv, len);
	6758
	6759	if (len <= total_foldlen
	6760	&& memEQ(s, (char*)folded, len)
	6761
	6762	/* If 0, means matched a partial char. See
	6763	* [perl #90536] */
	6764	&& map_fold_len_back[len])
	6765	{
	6766
	6767	/* Advance the target string ptr to account for
	6768	* this fold, but have to translate from the
	6769	* folded length to the corresponding source
	6770	* length. */
	6771	if (lenp) {
	6772	*lenp = map_fold_len_back[len];
	6773	}
	6774	match = TRUE;
	6775	break;
	6776	}
	6777	}
	6778	}
	6779	}
	6780
	6781	/* If we allocated a string above, free it */
	6782	if (! utf8_target) Safefree(utf8_p);
	6783	}
	6784	}
	6785	}
	6786
	6787	return (flags & ANYOF_INVERT) ? !match : match;
	6788	}
	6789
	6790	STATIC U8 *
	6791	S_reghop3(U8 s, I32 off, const U8 lim)
	6792	{
	6793	dVAR;
	6794
	6795	PERL_ARGS_ASSERT_REGHOP3;
	6796
	6797	if (off >= 0) {
	6798	while (off-- && s < lim) {
	6799	/* XXX could check well-formedness here */
	6800	s += UTF8SKIP(s);
	6801	}
	6802	}
	6803	else {
	6804	while (off++ && s > lim) {
	6805	s--;
	6806	if (UTF8_IS_CONTINUED(*s)) {
	6807	while (s > lim && UTF8_IS_CONTINUATION(*s))
	6808	s--;
	6809	}
	6810	/* XXX could check well-formedness here */
	6811	}
	6812	}
	6813	return s;
	6814	}
	6815
	6816	#ifdef XXX_dmq
	6817	/* there are a bunch of places where we use two reghop3's that should
	6818	be replaced with this routine. but since thats not done yet
	6819	we ifdef it out - dmq
	6820	*/
	6821	STATIC U8 *
	6822	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	6823	{
	6824	dVAR;
	6825
	6826	PERL_ARGS_ASSERT_REGHOP4;
	6827
	6828	if (off >= 0) {
	6829	while (off-- && s < rlim) {
	6830	/* XXX could check well-formedness here */
	6831	s += UTF8SKIP(s);
	6832	}
	6833	}
	6834	else {
	6835	while (off++ && s > llim) {
	6836	s--;
	6837	if (UTF8_IS_CONTINUED(*s)) {
	6838	while (s > llim && UTF8_IS_CONTINUATION(*s))
	6839	s--;
	6840	}
	6841	/* XXX could check well-formedness here */
	6842	}
	6843	}
	6844	return s;
	6845	}
	6846	#endif
	6847
	6848	STATIC U8 *
	6849	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	6850	{
	6851	dVAR;
	6852
	6853	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	6854
	6855	if (off >= 0) {
	6856	while (off-- && s < lim) {
	6857	/* XXX could check well-formedness here */
	6858	s += UTF8SKIP(s);
	6859	}
	6860	if (off >= 0)
	6861	return NULL;
	6862	}
	6863	else {
	6864	while (off++ && s > lim) {
	6865	s--;
	6866	if (UTF8_IS_CONTINUED(*s)) {
	6867	while (s > lim && UTF8_IS_CONTINUATION(*s))
	6868	s--;
	6869	}
	6870	/* XXX could check well-formedness here */
	6871	}
	6872	if (off <= 0)
	6873	return NULL;
	6874	}
	6875	return s;
	6876	}
	6877
	6878	static void
	6879	restore_pos(pTHX_ void *arg)
	6880	{
	6881	dVAR;
	6882	regexp * const rex = (regexp *)arg;
	6883	if (PL_reg_eval_set) {
	6884	if (PL_reg_oldsaved) {
	6885	rex->subbeg = PL_reg_oldsaved;
	6886	rex->sublen = PL_reg_oldsavedlen;
	6887	#ifdef PERL_OLD_COPY_ON_WRITE
	6888	rex->saved_copy = PL_nrs;
	6889	#endif
	6890	RXp_MATCH_COPIED_on(rex);
	6891	}
	6892	PL_reg_magic->mg_len = PL_reg_oldpos;
	6893	PL_reg_eval_set = 0;
	6894	PL_curpm = PL_reg_oldcurpm;
	6895	}
	6896	}
	6897
	6898	STATIC void
	6899	S_to_utf8_substr(pTHX_ register regexp *prog)
	6900	{
	6901	int i = 1;
	6902
	6903	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	6904
	6905	do {
	6906	if (prog->substrs->data[i].substr
	6907	&& !prog->substrs->data[i].utf8_substr) {
	6908	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	6909	prog->substrs->data[i].utf8_substr = sv;
	6910	sv_utf8_upgrade(sv);
	6911	if (SvVALID(prog->substrs->data[i].substr)) {
	6912	if (SvTAIL(prog->substrs->data[i].substr)) {
	6913	/* Trim the trailing \n that fbm_compile added last
	6914	time. */
	6915	SvCUR_set(sv, SvCUR(sv) - 1);
	6916	/* Whilst this makes the SV technically "invalid" (as its
	6917	buffer is no longer followed by "\0") when fbm_compile()
	6918	adds the "\n" back, a "\0" is restored. */
	6919	fbm_compile(sv, FBMcf_TAIL);
	6920	} else
	6921	fbm_compile(sv, 0);
	6922	}
	6923	if (prog->substrs->data[i].substr == prog->check_substr)
	6924	prog->check_utf8 = sv;
	6925	}
	6926	} while (i--);
	6927	}
	6928
	6929	STATIC void
	6930	S_to_byte_substr(pTHX_ register regexp *prog)
	6931	{
	6932	dVAR;
	6933	int i = 1;
	6934
	6935	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	6936
	6937	do {
	6938	if (prog->substrs->data[i].utf8_substr
	6939	&& !prog->substrs->data[i].substr) {
	6940	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	6941	if (sv_utf8_downgrade(sv, TRUE)) {
	6942	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	6943	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	6944	/* Trim the trailing \n that fbm_compile added last
	6945	time. */
	6946	SvCUR_set(sv, SvCUR(sv) - 1);
	6947	fbm_compile(sv, FBMcf_TAIL);
	6948	} else
	6949	fbm_compile(sv, 0);
	6950	}
	6951	} else {
	6952	SvREFCNT_dec(sv);
	6953	sv = &PL_sv_undef;
	6954	}
	6955	prog->substrs->data[i].substr = sv;
	6956	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	6957	prog->check_substr = sv;
	6958	}
	6959	} while (i--);
	6960	}
	6961
	6962	/*
	6963	* Local variables:
	6964	* c-indentation-style: bsd
	6965	* c-basic-offset: 4
	6966	* indent-tabs-mode: t
	6967	* End:
	6968	*
	6969	* ex: set ts=8 sts=4 sw=4 noet:
	6970	*/