perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/*
	41	* pregcomp and pregexec -- regsub and regerror are not used in perl
	42	*
	43	* Copyright (c) 1986 by University of Toronto.
	44	* Written by Henry Spencer. Not derived from licensed software.
	45	*
	46	* Permission is granted to anyone to use this software for any
	47	* purpose on any computer system, and to redistribute it freely,
	48	* subject to the following restrictions:
	49	*
	50	* 1. The author is not responsible for the consequences of use of
	51	* this software, no matter how awful, even if they arise
	52	* from defects in it.
	53	*
	54	* 2. The origin of this software must not be misrepresented, either
	55	* by explicit claim or by omission.
	56	*
	57	* 3. Altered versions must be plainly marked as such, and must not
	58	* be misrepresented as being the original software.
	59	*
	60	**** Alterations to Henry's code are...
	61	****
	62	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	63	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	64	**** by Larry Wall and others
	65	****
	66	**** You may distribute under the terms of either the GNU General Public
	67	**** License or the Artistic License, as specified in the README file.
	68	*
	69	* Beware that some of this code is subtly aware of the way operator
	70	* precedence is structured in regular expressions. Serious changes in
	71	* regular-expression syntax might require a total rethink.
	72	*/
	73	#include "EXTERN.h"
	74	#define PERL_IN_REGEXEC_C
	75	#include "perl.h"
	76
	77	#ifdef PERL_IN_XSUB_RE
	78	# include "re_comp.h"
	79	#else
	80	# include "regcomp.h"
	81	#endif
	82
	83	#define RF_tainted 1 /* tainted information used? e.g. locale */
	84	#define RF_warned 2 /* warned about big count? */
	85
	86	#define RF_utf8 8 /* Pattern contains multibyte chars? */
	87
	88	#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
	89
	90	#ifndef STATIC
	91	#define STATIC static
	92	#endif
	93
	94	/* Valid for non-utf8 strings, non-ANYOFV nodes only: avoids the reginclass
	95	* call if there are no complications: i.e., if everything matchable is
	96	* straight forward in the bitmap */
	97	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) \
	98	: ANYOF_BITMAP_TEST(p,*(c)))
	99
	100	/*
	101	* Forwards.
	102	*/
	103
	104	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	105	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	106
	107	#define HOPc(pos,off) \
	108	(char *)(PL_reg_match_utf8 \
	109	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	110	: (U8*)(pos + off))
	111	#define HOPBACKc(pos, off) \
	112	(char*)(PL_reg_match_utf8\
	113	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	114	: (pos - off >= PL_bostr) \
	115	? (U8*)pos - off \
	116	: NULL)
	117
	118	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	119	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	120
	121	/* these are unrolled below in the CCC_TRY_XXX defined */
	122	#ifdef EBCDIC
	123	/* Often 'str' is a hard-coded utf8 string instead of utfebcdic. so just
	124	* skip the check on EBCDIC platforms */
	125	# define LOAD_UTF8_CHARCLASS(class,str) LOAD_UTF8_CHARCLASS_NO_CHECK(class)
	126	#else
	127	# define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
	128	if (!CAT2(PL_utf8_,class)) { \
	129	bool ok; \
	130	ENTER; save_re_context(); \
	131	ok=CAT2(is_utf8_,class)((const U8*)str); \
	132	PERL_UNUSED_VAR(ok); \
	133	assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
	134	#endif
	135
	136	/* Doesn't do an assert to verify that is correct */
	137	#define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
	138	if (!CAT2(PL_utf8_,class)) { \
	139	bool throw_away PERL_UNUSED_DECL; \
	140	ENTER; save_re_context(); \
	141	throw_away = CAT2(is_utf8_,class)((const U8*)" "); \
	142	LEAVE; } } STMT_END
	143
	144	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
	145	#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
	146	#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
	147
	148	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	149	LOAD_UTF8_CHARCLASS(X_begin, " "); \
	150	LOAD_UTF8_CHARCLASS(X_non_hangul, "A"); \
	151	/* These are utf8 constants, and not utf-ebcdic constants, so the \
	152	* assert should likely and hopefully fail on an EBCDIC machine */ \
	153	LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */ \
	154	\
	155	/* No asserts are done for these, in case called on an early \
	156	* Unicode version in which they map to nothing */ \
	157	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
	158	LOAD_UTF8_CHARCLASS_NO_CHECK(X_L); /* U+1100 "\xe1\x84\x80" */ \
	159	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV); /* U+AC00 "\xea\xb0\x80" */ \
	160	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT); /* U+AC01 "\xea\xb0\x81" */ \
	161	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
	162	LOAD_UTF8_CHARCLASS_NO_CHECK(X_T); /* U+11A8 "\xe1\x86\xa8" */ \
	163	LOAD_UTF8_CHARCLASS_NO_CHECK(X_V) /* U+1160 "\xe1\x85\xa0" */
	164
	165	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	166
	167	/* The actual code for CCC_TRY, which uses several variables from the routine
	168	* it's callable from. It is designed to be the bulk of a case statement.
	169	* FUNC is the macro or function to call on non-utf8 targets that indicate if
	170	* nextchr matches the class.
	171	* UTF8_TEST is the whole test string to use for utf8 targets
	172	* LOAD is what to use to test, and if not present to load in the swash for the
	173	* class
	174	* POS_OR_NEG is either empty or ! to complement the results of FUNC or
	175	* UTF8_TEST test.
	176	* The logic is: Fail if we're at the end-of-string; otherwise if the target is
	177	* utf8 and a variant, load the swash if necessary and test using the utf8
	178	* test. Advance to the next character if test is ok, otherwise fail; If not
	179	* utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
	180	* fails, or advance to the next character */
	181
	182	#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \
	183	if (locinput >= PL_regeol) { \
	184	sayNO; \
	185	} \
	186	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
	187	LOAD_UTF8_CHARCLASS(CLASS, STR); \
	188	if (POS_OR_NEG (UTF8_TEST)) { \
	189	sayNO; \
	190	} \
	191	locinput += PL_utf8skip[nextchr]; \
	192	nextchr = UCHARAT(locinput); \
	193	break; \
	194	} \
	195	if (POS_OR_NEG (FUNC(nextchr))) { \
	196	sayNO; \
	197	} \
	198	nextchr = UCHARAT(++locinput); \
	199	break;
	200
	201	/* Handle the non-locale cases for a character class and its complement. It
	202	* calls _CCC_TRY_CODE with a ! to complement the test for the character class.
	203	* This is because that code fails when the test succeeds, so we want to have
	204	* the test fail so that the code succeeds. The swash is stored in a
	205	* predictable PL_ place */
	206	#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, \
	207	CLASS, STR) \
	208	case NAME: \
	209	_CCC_TRY_CODE( !, FUNC, \
	210	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	211	(U8*)locinput, TRUE)), \
	212	CLASS, STR) \
	213	case NNAME: \
	214	_CCC_TRY_CODE( PLACEHOLDER , FUNC, \
	215	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	216	(U8*)locinput, TRUE)), \
	217	CLASS, STR) \
	218
	219	/* Generate the case statements for both locale and non-locale character
	220	* classes in regmatch for classes that don't have special unicode semantics.
	221	* Locales don't use an immediate swash, but an intermediary special locale
	222	* function that is called on the pointer to the current place in the input
	223	* string. That function will resolve to needing the same swash. One might
	224	* think that because we don't know what the locale will match, we shouldn't
	225	* check with the swash loading function that it loaded properly; ie, that we
	226	* should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
	227	* regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
	228	* irrelevant here */
	229	#define CCC_TRY(NAME, NNAME, FUNC, \
	230	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	231	NAMEA, NNAMEA, FUNCA, \
	232	CLASS, STR) \
	233	case NAMEL: \
	234	PL_reg_flags \|= RF_tainted; \
	235	_CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \
	236	case NNAMEL: \
	237	PL_reg_flags \|= RF_tainted; \
	238	_CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput), \
	239	CLASS, STR) \
	240	case NAMEA: \
	241	if (locinput >= PL_regeol \|\| ! FUNCA(nextchr)) { \
	242	sayNO; \
	243	} \
	244	/* Matched a utf8-invariant, so don't have to worry about utf8 */ \
	245	nextchr = UCHARAT(++locinput); \
	246	break; \
	247	case NNAMEA: \
	248	if (locinput >= PL_regeol \|\| FUNCA(nextchr)) { \
	249	sayNO; \
	250	} \
	251	if (utf8_target) { \
	252	locinput += PL_utf8skip[nextchr]; \
	253	nextchr = UCHARAT(locinput); \
	254	} \
	255	else { \
	256	nextchr = UCHARAT(++locinput); \
	257	} \
	258	break; \
	259	/* Generate the non-locale cases */ \
	260	_CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
	261
	262	/* This is like CCC_TRY, but has an extra set of parameters for generating case
	263	* statements to handle separate Unicode semantics nodes */
	264	#define CCC_TRY_U(NAME, NNAME, FUNC, \
	265	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	266	NAMEU, NNAMEU, FUNCU, \
	267	NAMEA, NNAMEA, FUNCA, \
	268	CLASS, STR) \
	269	CCC_TRY(NAME, NNAME, FUNC, \
	270	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	271	NAMEA, NNAMEA, FUNCA, \
	272	CLASS, STR) \
	273	_CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)
	274
	275	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	276
	277	/* for use after a quantifier and before an EXACT-like node -- japhy */
	278	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	279	*
	280	* NOTE that nothing that affects backtracking should be in here, specifically
	281	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	282	* node that is in between two EXACT like nodes when ascertaining what the required
	283	* "follow" character is. This should probably be moved to regex compile time
	284	* although it may be done at run time beause of the REF possibility - more
	285	* investigation required. -- demerphq
	286	*/
	287	#define JUMPABLE(rn) ( \
	288	OP(rn) == OPEN \|\| \
	289	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	290	OP(rn) == EVAL \|\| \
	291	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	292	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	293	OP(rn) == KEEPS \|\| \
	294	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	295	)
	296	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	297
	298	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	299
	300	#if 0
	301	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	302	we don't need this definition. */
	303	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	304	#define IS_TEXTF(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTF \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	305	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	306
	307	#else
	308	/* ... so we use this as its faster. */
	309	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	310	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn) == EXACTFA)
	311	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	312	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	313
	314	#endif
	315
	316	/*
	317	Search for mandatory following text node; for lookahead, the text must
	318	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	319	*/
	320	#define FIND_NEXT_IMPT(rn) STMT_START { \
	321	while (JUMPABLE(rn)) { \
	322	const OPCODE type = OP(rn); \
	323	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	324	rn = NEXTOPER(NEXTOPER(rn)); \
	325	else if (type == PLUS) \
	326	rn = NEXTOPER(rn); \
	327	else if (type == IFMATCH) \
	328	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	329	else rn += NEXT_OFF(rn); \
	330	} \
	331	} STMT_END
	332
	333
	334	static void restore_pos(pTHX_ void *arg);
	335
	336	#define REGCP_PAREN_ELEMS 3
	337	#define REGCP_OTHER_ELEMS 3
	338	#define REGCP_FRAME_ELEMS 1
	339	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	340	* are needed for the regexp context stack bookkeeping. */
	341
	342	STATIC CHECKPOINT
	343	S_regcppush(pTHX_ const regexp *rex, I32 parenfloor)
	344	{
	345	dVAR;
	346	const int retval = PL_savestack_ix;
	347	const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
	348	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	349	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	350	I32 p;
	351	GET_RE_DEBUG_FLAGS_DECL;
	352
	353	PERL_ARGS_ASSERT_REGCPPUSH;
	354
	355	if (paren_elems_to_push < 0)
	356	Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0",
	357	paren_elems_to_push);
	358
	359	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	360	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	361	" out of range (%lu-%ld)",
	362	total_elems, (unsigned long)PL_regsize, (long)parenfloor);
	363
	364	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	365
	366	DEBUG_BUFFERS_r(
	367	if ((int)PL_regsize > (int)parenfloor)
	368	PerlIO_printf(Perl_debug_log,
	369	"rex=0x%"UVxf" offs=0x%"UVxf": saving capture indices:\n",
	370	PTR2UV(rex),
	371	PTR2UV(rex->offs)
	372	);
	373	);
	374	for (p = parenfloor+1; p <= (I32)PL_regsize; p++) {
	375	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	376	SSPUSHINT(rex->offs[p].end);
	377	SSPUSHINT(rex->offs[p].start);
	378	SSPUSHINT(rex->offs[p].start_tmp);
	379	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	380	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
	381	(UV)p,
	382	(IV)rex->offs[p].start,
	383	(IV)rex->offs[p].start_tmp,
	384	(IV)rex->offs[p].end
	385	));
	386	}
	387	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	388	SSPUSHINT(PL_regsize);
	389	SSPUSHINT(rex->lastparen);
	390	SSPUSHINT(rex->lastcloseparen);
	391	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	392
	393	return retval;
	394	}
	395
	396	/* These are needed since we do not localize EVAL nodes: */
	397	#define REGCP_SET(cp) \
	398	DEBUG_STATE_r( \
	399	PerlIO_printf(Perl_debug_log, \
	400	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	401	(IV)PL_savestack_ix)); \
	402	cp = PL_savestack_ix
	403
	404	#define REGCP_UNWIND(cp) \
	405	DEBUG_STATE_r( \
	406	if (cp != PL_savestack_ix) \
	407	PerlIO_printf(Perl_debug_log, \
	408	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	409	(IV)(cp), (IV)PL_savestack_ix)); \
	410	regcpblow(cp)
	411
	412	#define UNWIND_PAREN(lp, lcp) \
	413	for (n = rex->lastparen; n > lp; n--) \
	414	rex->offs[n].end = -1; \
	415	rex->lastparen = n; \
	416	rex->lastcloseparen = lcp;
	417
	418
	419	STATIC void
	420	S_regcppop(pTHX_ regexp *rex)
	421	{
	422	dVAR;
	423	UV i;
	424	U32 paren;
	425	GET_RE_DEBUG_FLAGS_DECL;
	426
	427	PERL_ARGS_ASSERT_REGCPPOP;
	428
	429	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	430	i = SSPOPUV;
	431	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	432	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	433	rex->lastcloseparen = SSPOPINT;
	434	rex->lastparen = SSPOPINT;
	435	PL_regsize = SSPOPINT;
	436
	437	i -= REGCP_OTHER_ELEMS;
	438	/* Now restore the parentheses context. */
	439	DEBUG_BUFFERS_r(
	440	if (i \|\| rex->lastparen + 1 <= rex->nparens)
	441	PerlIO_printf(Perl_debug_log,
	442	"rex=0x%"UVxf" offs=0x%"UVxf": restoring capture indices to:\n",
	443	PTR2UV(rex),
	444	PTR2UV(rex->offs)
	445	);
	446	);
	447	paren = PL_regsize;
	448	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	449	I32 tmps;
	450	rex->offs[paren].start_tmp = SSPOPINT;
	451	rex->offs[paren].start = SSPOPINT;
	452	tmps = SSPOPINT;
	453	if (paren <= rex->lastparen)
	454	rex->offs[paren].end = tmps;
	455	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	456	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	457	(UV)paren,
	458	(IV)rex->offs[paren].start,
	459	(IV)rex->offs[paren].start_tmp,
	460	(IV)rex->offs[paren].end,
	461	(paren > rex->lastparen ? "(skipped)" : ""));
	462	);
	463	paren--;
	464	}
	465	#if 1
	466	/* It would seem that the similar code in regtry()
	467	* already takes care of this, and in fact it is in
	468	* a better location to since this code can #if 0-ed out
	469	* but the code in regtry() is needed or otherwise tests
	470	* requiring null fields (pat.t#187 and split.t#{13,14}
	471	* (as of patchlevel 7877) will fail. Then again,
	472	* this code seems to be necessary or otherwise
	473	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	474	* --jhi updated by dapm */
	475	for (i = rex->lastparen + 1; i <= rex->nparens; i++) {
	476	if (i > PL_regsize)
	477	rex->offs[i].start = -1;
	478	rex->offs[i].end = -1;
	479	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	480	" \\%"UVuf": %s ..-1 undeffing\n",
	481	(UV)i,
	482	(i > PL_regsize) ? "-1" : " "
	483	));
	484	}
	485	#endif
	486	}
	487
	488	/* restore the parens and associated vars at savestack position ix,
	489	* but without popping the stack */
	490
	491	STATIC void
	492	S_regcp_restore(pTHX_ regexp *rex, I32 ix)
	493	{
	494	I32 tmpix = PL_savestack_ix;
	495	PL_savestack_ix = ix;
	496	regcppop(rex);
	497	PL_savestack_ix = tmpix;
	498	}
	499
	500	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	501
	502	/*
	503	* pregexec and friends
	504	*/
	505
	506	#ifndef PERL_IN_XSUB_RE
	507	/*
	508	- pregexec - match a regexp against a string
	509	*/
	510	I32
	511	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
	512	char strbeg, I32 minend, SV screamer, U32 nosave)
	513	/* strend: pointer to null at end of string */
	514	/* strbeg: real beginning of string */
	515	/* minend: end of match must be >=minend after stringarg. */
	516	/* nosave: For optimizations. */
	517	{
	518	PERL_ARGS_ASSERT_PREGEXEC;
	519
	520	return
	521	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	522	nosave ? 0 : REXEC_COPY_STR);
	523	}
	524	#endif
	525
	526	/*
	527	* Need to implement the following flags for reg_anch:
	528	*
	529	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	530	* USE_INTUIT_ML
	531	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	532	* INTUIT_AUTORITATIVE_ML
	533	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	534	* INTUIT_ONCE_ML
	535	*
	536	* Another flag for this function: SECOND_TIME (so that float substrs
	537	* with giant delta may be not rechecked).
	538	*/
	539
	540	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	541
	542	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	543	Otherwise, only SvCUR(sv) is used to get strbeg. */
	544
	545	/* XXXX We assume that strpos is strbeg unless sv. */
	546
	547	/* XXXX Some places assume that there is a fixed substring.
	548	An update may be needed if optimizer marks as "INTUITable"
	549	RExen without fixed substrings. Similarly, it is assumed that
	550	lengths of all the strings are no more than minlen, thus they
	551	cannot come from lookahead.
	552	(Or minlen should take into account lookahead.)
	553	NOTE: Some of this comment is not correct. minlen does now take account
	554	of lookahead/behind. Further research is required. -- demerphq
	555
	556	*/
	557
	558	/* A failure to find a constant substring means that there is no need to make
	559	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	560	finding a substring too deep into the string means that less calls to
	561	regtry() should be needed.
	562
	563	REx compiler's optimizer found 4 possible hints:
	564	a) Anchored substring;
	565	b) Fixed substring;
	566	c) Whether we are anchored (beginning-of-line or \G);
	567	d) First node (of those at offset 0) which may distinguish positions;
	568	We use a)b)d) and multiline-part of c), and try to find a position in the
	569	string which does not contradict any of them.
	570	*/
	571
	572	/* Most of decisions we do here should have been done at compile time.
	573	The nodes of the REx which we used for the search should have been
	574	deleted from the finite automaton. */
	575
	576	char *
	577	Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV sv, char strpos,
	578	char strend, const U32 flags, re_scream_pos_data data)
	579	{
	580	dVAR;
	581	struct regexp const prog = (struct regexp )SvANY(rx);
	582	register I32 start_shift = 0;
	583	/* Should be nonnegative! */
	584	register I32 end_shift = 0;
	585	register char *s;
	586	register SV *check;
	587	char *strbeg;
	588	char *t;
	589	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	590	I32 ml_anch;
	591	register char other_last = NULL; / other substr checked before this */
	592	char check_at = NULL; / check substr found at this pos */
	593	char checked_upto = NULL; / how far into the string we have already checked using find_byclass*/
	594	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	595	RXi_GET_DECL(prog,progi);
	596	#ifdef DEBUGGING
	597	const char * const i_strpos = strpos;
	598	#endif
	599	GET_RE_DEBUG_FLAGS_DECL;
	600
	601	PERL_ARGS_ASSERT_RE_INTUIT_START;
	602	PERL_UNUSED_ARG(flags);
	603	PERL_UNUSED_ARG(data);
	604
	605	RX_MATCH_UTF8_set(rx,utf8_target);
	606
	607	if (RX_UTF8(rx)) {
	608	PL_reg_flags \|= RF_utf8;
	609	}
	610	DEBUG_EXECUTE_r(
	611	debug_start_match(rx, utf8_target, strpos, strend,
	612	sv ? "Guessing start of match in sv for"
	613	: "Guessing start of match in string for");
	614	);
	615
	616	/* CHR_DIST() would be more correct here but it makes things slow. */
	617	if (prog->minlen > strend - strpos) {
	618	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	619	"String too short... [re_intuit_start]\n"));
	620	goto fail;
	621	}
	622
	623	strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
	624	PL_regeol = strend;
	625	if (utf8_target) {
	626	if (!prog->check_utf8 && prog->check_substr)
	627	to_utf8_substr(prog);
	628	check = prog->check_utf8;
	629	} else {
	630	if (!prog->check_substr && prog->check_utf8)
	631	to_byte_substr(prog);
	632	check = prog->check_substr;
	633	}
	634	if (check == &PL_sv_undef) {
	635	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	636	"Non-utf8 string cannot match utf8 check string\n"));
	637	goto fail;
	638	}
	639	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	640	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	641	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	642	&& !multiline ) ); /* Check after \n? */
	643
	644	if (!ml_anch) {
	645	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	646	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	647	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	648	&& sv && !SvROK(sv)
	649	&& (strpos != strbeg)) {
	650	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	651	goto fail;
	652	}
	653	if (prog->check_offset_min == prog->check_offset_max &&
	654	!(prog->extflags & RXf_CANY_SEEN)) {
	655	/* Substring at constant offset from beg-of-str... */
	656	I32 slen;
	657
	658	s = HOP3c(strpos, prog->check_offset_min, strend);
	659
	660	if (SvTAIL(check)) {
	661	slen = SvCUR(check); /* >= 1 */
	662
	663	if ( strend - s > slen \|\| strend - s < slen - 1
	664	\|\| (strend - s == slen && strend[-1] != '\n')) {
	665	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	666	goto fail_finish;
	667	}
	668	/* Now should match s[0..slen-2] */
	669	slen--;
	670	if (slen && (SvPVX_const(check) != s
	671	\|\| (slen > 1
	672	&& memNE(SvPVX_const(check), s, slen)))) {
	673	report_neq:
	674	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	675	goto fail_finish;
	676	}
	677	}
	678	else if (SvPVX_const(check) != s
	679	\|\| ((slen = SvCUR(check)) > 1
	680	&& memNE(SvPVX_const(check), s, slen)))
	681	goto report_neq;
	682	check_at = s;
	683	goto success_at_start;
	684	}
	685	}
	686	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	687	s = strpos;
	688	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	689	end_shift = prog->check_end_shift;
	690
	691	if (!ml_anch) {
	692	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	693	- (SvTAIL(check) != 0);
	694	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	695
	696	if (end_shift < eshift)
	697	end_shift = eshift;
	698	}
	699	}
	700	else { /* Can match at random position */
	701	ml_anch = 0;
	702	s = strpos;
	703	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	704	end_shift = prog->check_end_shift;
	705
	706	/* end shift should be non negative here */
	707	}
	708
	709	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	710	if (end_shift < 0)
	711	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	712	(IV)end_shift, RX_PRECOMP(prog));
	713	#endif
	714
	715	restart:
	716	/* Find a possible match in the region s..strend by looking for
	717	the "check" substring in the region corrected by start/end_shift. */
	718
	719	{
	720	I32 srch_start_shift = start_shift;
	721	I32 srch_end_shift = end_shift;
	722	U8* start_point;
	723	U8* end_point;
	724	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	725	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	726	srch_start_shift = strbeg - s;
	727	}
	728	DEBUG_OPTIMISE_MORE_r({
	729	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	730	(IV)prog->check_offset_min,
	731	(IV)srch_start_shift,
	732	(IV)srch_end_shift,
	733	(IV)prog->check_end_shift);
	734	});
	735
	736	if (prog->extflags & RXf_CANY_SEEN) {
	737	start_point= (U8*)(s + srch_start_shift);
	738	end_point= (U8*)(strend - srch_end_shift);
	739	} else {
	740	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	741	end_point= HOP3(strend, -srch_end_shift, strbeg);
	742	}
	743	DEBUG_OPTIMISE_MORE_r({
	744	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	745	(int)(end_point - start_point),
	746	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	747	start_point);
	748	});
	749
	750	s = fbm_instr( start_point, end_point,
	751	check, multiline ? FBMrf_MULTILINE : 0);
	752	}
	753	/* Update the count-of-usability, remove useless subpatterns,
	754	unshift s. */
	755
	756	DEBUG_EXECUTE_r({
	757	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	758	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	759	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	760	(s ? "Found" : "Did not find"),
	761	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	762	? "anchored" : "floating"),
	763	quoted,
	764	RE_SV_TAIL(check),
	765	(s ? " at offset " : "...\n") );
	766	});
	767
	768	if (!s)
	769	goto fail_finish;
	770	/* Finish the diagnostic message */
	771	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	772
	773	/* XXX dmq: first branch is for positive lookbehind...
	774	Our check string is offset from the beginning of the pattern.
	775	So we need to do any stclass tests offset forward from that
	776	point. I think. :-(
	777	*/
	778
	779
	780
	781	check_at=s;
	782
	783
	784	/* Got a candidate. Check MBOL anchoring, and the other substr.
	785	Start with the other substr.
	786	XXXX no SCREAM optimization yet - and a very coarse implementation
	787	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	788	always match. Probably should be marked during compile...
	789	Probably it is right to do no SCREAM here...
	790	*/
	791
	792	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	793	: (prog->float_substr && prog->anchored_substr))
	794	{
	795	/* Take into account the "other" substring. */
	796	/* XXXX May be hopelessly wrong for UTF... */
	797	if (!other_last)
	798	other_last = strpos;
	799	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	800	do_other_anchored:
	801	{
	802	char * const last = HOP3c(s, -start_shift, strbeg);
	803	char last1, last2;
	804	char * const saved_s = s;
	805	SV* must;
	806
	807	t = s - prog->check_offset_max;
	808	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	809	&& (!utf8_target
	810	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	811	&& t > strpos)))
	812	NOOP;
	813	else
	814	t = strpos;
	815	t = HOP3c(t, prog->anchored_offset, strend);
	816	if (t < other_last) /* These positions already checked */
	817	t = other_last;
	818	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	819	if (last < last1)
	820	last1 = last;
	821	/* XXXX It is not documented what units *_offsets are in.
	822	We assume bytes, but this is clearly wrong.
	823	Meaning this code needs to be carefully reviewed for errors.
	824	dmq.
	825	*/
	826
	827	/* On end-of-str: see comment below. */
	828	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	829	if (must == &PL_sv_undef) {
	830	s = (char*)NULL;
	831	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	832	}
	833	else
	834	s = fbm_instr(
	835	(unsigned char*)t,
	836	HOP3(HOP3(last1, prog->anchored_offset, strend)
	837	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	838	must,
	839	multiline ? FBMrf_MULTILINE : 0
	840	);
	841	DEBUG_EXECUTE_r({
	842	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	843	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	844	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	845	(s ? "Found" : "Contradicts"),
	846	quoted, RE_SV_TAIL(must));
	847	});
	848
	849
	850	if (!s) {
	851	if (last1 >= last2) {
	852	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	853	", giving up...\n"));
	854	goto fail_finish;
	855	}
	856	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	857	", trying floating at offset %ld...\n",
	858	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	859	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	860	s = HOP3c(last, 1, strend);
	861	goto restart;
	862	}
	863	else {
	864	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	865	(long)(s - i_strpos)));
	866	t = HOP3c(s, -prog->anchored_offset, strbeg);
	867	other_last = HOP3c(s, 1, strend);
	868	s = saved_s;
	869	if (t == strpos)
	870	goto try_at_start;
	871	goto try_at_offset;
	872	}
	873	}
	874	}
	875	else { /* Take into account the floating substring. */
	876	char last, last1;
	877	char * const saved_s = s;
	878	SV* must;
	879
	880	t = HOP3c(s, -start_shift, strbeg);
	881	last1 = last =
	882	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	883	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	884	last = HOP3c(t, prog->float_max_offset, strend);
	885	s = HOP3c(t, prog->float_min_offset, strend);
	886	if (s < other_last)
	887	s = other_last;
	888	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	889	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	890	/* fbm_instr() takes into account exact value of end-of-str
	891	if the check is SvTAIL(ed). Since false positives are OK,
	892	and end-of-str is not later than strend we are OK. */
	893	if (must == &PL_sv_undef) {
	894	s = (char*)NULL;
	895	DEBUG_r(must = prog->float_utf8); /* for debug message */
	896	}
	897	else
	898	s = fbm_instr((unsigned char*)s,
	899	(unsigned char*)last + SvCUR(must)
	900	- (SvTAIL(must)!=0),
	901	must, multiline ? FBMrf_MULTILINE : 0);
	902	DEBUG_EXECUTE_r({
	903	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	904	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	905	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	906	(s ? "Found" : "Contradicts"),
	907	quoted, RE_SV_TAIL(must));
	908	});
	909	if (!s) {
	910	if (last1 == last) {
	911	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	912	", giving up...\n"));
	913	goto fail_finish;
	914	}
	915	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	916	", trying anchored starting at offset %ld...\n",
	917	(long)(saved_s + 1 - i_strpos)));
	918	other_last = last;
	919	s = HOP3c(t, 1, strend);
	920	goto restart;
	921	}
	922	else {
	923	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	924	(long)(s - i_strpos)));
	925	other_last = s; /* Fix this later. --Hugo */
	926	s = saved_s;
	927	if (t == strpos)
	928	goto try_at_start;
	929	goto try_at_offset;
	930	}
	931	}
	932	}
	933
	934
	935	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	936
	937	DEBUG_OPTIMISE_MORE_r(
	938	PerlIO_printf(Perl_debug_log,
	939	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	940	(IV)prog->check_offset_min,
	941	(IV)prog->check_offset_max,
	942	(IV)(s-strpos),
	943	(IV)(t-strpos),
	944	(IV)(t-s),
	945	(IV)(strend-strpos)
	946	)
	947	);
	948
	949	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	950	&& (!utf8_target
	951	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	952	&& t > strpos)))
	953	{
	954	/* Fixed substring is found far enough so that the match
	955	cannot start at strpos. */
	956	try_at_offset:
	957	if (ml_anch && t[-1] != '\n') {
	958	/* Eventually fbm_*() should handle this, but often
	959	anchored_offset is not 0, so this check will not be wasted. */
	960	/* XXXX In the code below we prefer to look for "^" even in
	961	presence of anchored substrings. And we search even
	962	beyond the found float position. These pessimizations
	963	are historical artefacts only. */
	964	find_anchor:
	965	while (t < strend - prog->minlen) {
	966	if (*t == '\n') {
	967	if (t < check_at - prog->check_offset_min) {
	968	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	969	/* Since we moved from the found position,
	970	we definitely contradict the found anchored
	971	substr. Due to the above check we do not
	972	contradict "check" substr.
	973	Thus we can arrive here only if check substr
	974	is float. Redo checking for "other"=="fixed".
	975	*/
	976	strpos = t + 1;
	977	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	978	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	979	goto do_other_anchored;
	980	}
	981	/* We don't contradict the found floating substring. */
	982	/* XXXX Why not check for STCLASS? */
	983	s = t + 1;
	984	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	985	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	986	goto set_useful;
	987	}
	988	/* Position contradicts check-string */
	989	/* XXXX probably better to look for check-string
	990	than for "\n", so one should lower the limit for t? */
	991	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	992	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	993	other_last = strpos = s = t + 1;
	994	goto restart;
	995	}
	996	t++;
	997	}
	998	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	999	PL_colors[0], PL_colors[1]));
	1000	goto fail_finish;
	1001	}
	1002	else {
	1003	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1004	PL_colors[0], PL_colors[1]));
	1005	}
	1006	s = t;
	1007	set_useful:
	1008	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1009	}
	1010	else {
	1011	/* The found string does not prohibit matching at strpos,
	1012	- no optimization of calling REx engine can be performed,
	1013	unless it was an MBOL and we are not after MBOL,
	1014	or a future STCLASS check will fail this. */
	1015	try_at_start:
	1016	/* Even in this situation we may use MBOL flag if strpos is offset
	1017	wrt the start of the string. */
	1018	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	1019	&& (strpos != strbeg) && strpos[-1] != '\n'
	1020	/* May be due to an implicit anchor of m{.foo} /
	1021	&& !(prog->intflags & PREGf_IMPLICIT))
	1022	{
	1023	t = strpos;
	1024	goto find_anchor;
	1025	}
	1026	DEBUG_EXECUTE_r( if (ml_anch)
	1027	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1028	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1029	);
	1030	success_at_start:
	1031	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1032	&& (utf8_target ? (
	1033	prog->check_utf8 /* Could be deleted already */
	1034	&& --BmUSEFUL(prog->check_utf8) < 0
	1035	&& (prog->check_utf8 == prog->float_utf8)
	1036	) : (
	1037	prog->check_substr /* Could be deleted already */
	1038	&& --BmUSEFUL(prog->check_substr) < 0
	1039	&& (prog->check_substr == prog->float_substr)
	1040	)))
	1041	{
	1042	/* If flags & SOMETHING - do not do it many times on the same match */
	1043	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1044	/* XXX Does the destruction order has to change with utf8_target? */
	1045	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1046	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1047	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1048	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1049	check = NULL; /* abort */
	1050	s = strpos;
	1051	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1052	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1053	if (prog->intflags & PREGf_IMPLICIT)
	1054	prog->extflags &= ~RXf_ANCH_MBOL;
	1055	/* XXXX This is a remnant of the old implementation. It
	1056	looks wasteful, since now INTUIT can use many
	1057	other heuristics. */
	1058	prog->extflags &= ~RXf_USE_INTUIT;
	1059	/* XXXX What other flags might need to be cleared in this branch? */
	1060	}
	1061	else
	1062	s = strpos;
	1063	}
	1064
	1065	/* Last resort... */
	1066	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1067	/* trie stclasses are too expensive to use here, we are better off to
	1068	leave it to regmatch itself */
	1069	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1070	/* minlen == 0 is possible if regstclass is \b or \B,
	1071	and the fixed substr is ''$.
	1072	Since minlen is already taken into account, s+1 is before strend;
	1073	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1074	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1075	regstclass does not come from lookahead... */
	1076	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1077	This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
	1078	const U8* const str = (U8*)STRING(progi->regstclass);
	1079	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1080	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	1081	: 1);
	1082	char * endpos;
	1083	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1084	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1085	else if (prog->float_substr \|\| prog->float_utf8)
	1086	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1087	else
	1088	endpos= strend;
	1089
	1090	if (checked_upto < s)
	1091	checked_upto = s;
	1092	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1093	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1094
	1095	t = s;
	1096	s = find_byclass(prog, progi->regstclass, checked_upto, endpos, NULL);
	1097	if (s) {
	1098	checked_upto = s;
	1099	} else {
	1100	#ifdef DEBUGGING
	1101	const char *what = NULL;
	1102	#endif
	1103	if (endpos == strend) {
	1104	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1105	"Could not match STCLASS...\n") );
	1106	goto fail;
	1107	}
	1108	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1109	"This position contradicts STCLASS...\n") );
	1110	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1111	goto fail;
	1112	checked_upto = HOPBACKc(endpos, start_shift);
	1113	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1114	(IV)start_shift, (IV)(check_at - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1115	/* Contradict one of substrings */
	1116	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1117	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1118	DEBUG_EXECUTE_r( what = "anchored" );
	1119	hop_and_restart:
	1120	s = HOP3c(t, 1, strend);
	1121	if (s + start_shift + end_shift > strend) {
	1122	/* XXXX Should be taken into account earlier? */
	1123	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1124	"Could not match STCLASS...\n") );
	1125	goto fail;
	1126	}
	1127	if (!check)
	1128	goto giveup;
	1129	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1130	"Looking for %s substr starting at offset %ld...\n",
	1131	what, (long)(s + start_shift - i_strpos)) );
	1132	goto restart;
	1133	}
	1134	/* Have both, check_string is floating */
	1135	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1136	goto retry_floating_check;
	1137	/* Recheck anchored substring, but not floating... */
	1138	s = check_at;
	1139	if (!check)
	1140	goto giveup;
	1141	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1142	"Looking for anchored substr starting at offset %ld...\n",
	1143	(long)(other_last - i_strpos)) );
	1144	goto do_other_anchored;
	1145	}
	1146	/* Another way we could have checked stclass at the
	1147	current position only: */
	1148	if (ml_anch) {
	1149	s = t = t + 1;
	1150	if (!check)
	1151	goto giveup;
	1152	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1153	"Looking for /%s^%s/m starting at offset %ld...\n",
	1154	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1155	goto try_at_offset;
	1156	}
	1157	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1158	goto fail;
	1159	/* Check is floating substring. */
	1160	retry_floating_check:
	1161	t = check_at - start_shift;
	1162	DEBUG_EXECUTE_r( what = "floating" );
	1163	goto hop_and_restart;
	1164	}
	1165	if (t != s) {
	1166	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1167	"By STCLASS: moving %ld --> %ld\n",
	1168	(long)(t - i_strpos), (long)(s - i_strpos))
	1169	);
	1170	}
	1171	else {
	1172	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1173	"Does not contradict STCLASS...\n");
	1174	);
	1175	}
	1176	}
	1177	giveup:
	1178	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1179	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1180	PL_colors[5], (long)(s - i_strpos)) );
	1181	return s;
	1182
	1183	fail_finish: /* Substring not found */
	1184	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1185	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1186	fail:
	1187	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1188	PL_colors[4], PL_colors[5]));
	1189	return NULL;
	1190	}
	1191
	1192	#define DECL_TRIE_TYPE(scan) \
	1193	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	1194	trie_type = ((scan->flags == EXACT) \
	1195	? (utf8_target ? trie_utf8 : trie_plain) \
	1196	: (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
	1197
	1198	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
	1199	uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
	1200	STRLEN skiplen; \
	1201	switch (trie_type) { \
	1202	case trie_utf8_fold: \
	1203	if ( foldlen>0 ) { \
	1204	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1205	foldlen -= len; \
	1206	uscan += len; \
	1207	len=0; \
	1208	} else { \
	1209	uvc = to_utf8_fold( (const U8*) uc, foldbuf, &foldlen ); \
	1210	len = UTF8SKIP(uc); \
	1211	skiplen = UNISKIP( uvc ); \
	1212	foldlen -= skiplen; \
	1213	uscan = foldbuf + skiplen; \
	1214	} \
	1215	break; \
	1216	case trie_latin_utf8_fold: \
	1217	if ( foldlen>0 ) { \
	1218	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1219	foldlen -= len; \
	1220	uscan += len; \
	1221	len=0; \
	1222	} else { \
	1223	len = 1; \
	1224	uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, 1); \
	1225	skiplen = UNISKIP( uvc ); \
	1226	foldlen -= skiplen; \
	1227	uscan = foldbuf + skiplen; \
	1228	} \
	1229	break; \
	1230	case trie_utf8: \
	1231	uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
	1232	break; \
	1233	case trie_plain: \
	1234	uvc = (UV)*uc; \
	1235	len = 1; \
	1236	} \
	1237	if (uvc < 256) { \
	1238	charid = trie->charmap[ uvc ]; \
	1239	} \
	1240	else { \
	1241	charid = 0; \
	1242	if (widecharmap) { \
	1243	SV** const svpp = hv_fetch(widecharmap, \
	1244	(char*)&uvc, sizeof(UV), 0); \
	1245	if (svpp) \
	1246	charid = (U16)SvIV(*svpp); \
	1247	} \
	1248	} \
	1249	} STMT_END
	1250
	1251	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1252	STMT_START { \
	1253	while (s <= e) { \
	1254	if ( (CoNd) \
	1255	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1256	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1257	goto got_it; \
	1258	s++; \
	1259	} \
	1260	} STMT_END
	1261
	1262	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1263	STMT_START { \
	1264	while (s + (uskip = UTF8SKIP(s)) <= strend) { \
	1265	CoDe \
	1266	s += uskip; \
	1267	} \
	1268	} STMT_END
	1269
	1270	#define REXEC_FBC_SCAN(CoDe) \
	1271	STMT_START { \
	1272	while (s < strend) { \
	1273	CoDe \
	1274	s++; \
	1275	} \
	1276	} STMT_END
	1277
	1278	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1279	REXEC_FBC_UTF8_SCAN( \
	1280	if (CoNd) { \
	1281	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1282	goto got_it; \
	1283	else \
	1284	tmp = doevery; \
	1285	} \
	1286	else \
	1287	tmp = 1; \
	1288	)
	1289
	1290	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1291	REXEC_FBC_SCAN( \
	1292	if (CoNd) { \
	1293	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1294	goto got_it; \
	1295	else \
	1296	tmp = doevery; \
	1297	} \
	1298	else \
	1299	tmp = 1; \
	1300	)
	1301
	1302	#define REXEC_FBC_TRYIT \
	1303	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1304	goto got_it
	1305
	1306	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1307	if (utf8_target) { \
	1308	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1309	} \
	1310	else { \
	1311	REXEC_FBC_CLASS_SCAN(CoNd); \
	1312	}
	1313
	1314	#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
	1315	if (utf8_target) { \
	1316	UtFpReLoAd; \
	1317	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1318	} \
	1319	else { \
	1320	REXEC_FBC_CLASS_SCAN(CoNd); \
	1321	}
	1322
	1323	#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
	1324	PL_reg_flags \|= RF_tainted; \
	1325	if (utf8_target) { \
	1326	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1327	} \
	1328	else { \
	1329	REXEC_FBC_CLASS_SCAN(CoNd); \
	1330	}
	1331
	1332	#define DUMP_EXEC_POS(li,s,doutf8) \
	1333	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1334
	1335
	1336	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1337	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1338	tmp = TEST_NON_UTF8(tmp); \
	1339	REXEC_FBC_UTF8_SCAN( \
	1340	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1341	tmp = !tmp; \
	1342	IF_SUCCESS; \
	1343	} \
	1344	else { \
	1345	IF_FAIL; \
	1346	} \
	1347	); \
	1348
	1349	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1350	if (s == PL_bostr) { \
	1351	tmp = '\n'; \
	1352	} \
	1353	else { \
	1354	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr); \
	1355	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
	1356	} \
	1357	tmp = TeSt1_UtF8; \
	1358	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1359	REXEC_FBC_UTF8_SCAN( \
	1360	if (tmp == ! (TeSt2_UtF8)) { \
	1361	tmp = !tmp; \
	1362	IF_SUCCESS; \
	1363	} \
	1364	else { \
	1365	IF_FAIL; \
	1366	} \
	1367	); \
	1368
	1369	/* The only difference between the BOUND and NBOUND cases is that
	1370	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1371	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1372	* with the other one being empty */
	1373	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1374	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1375
	1376	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1377	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1378
	1379	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1380	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1381
	1382	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1383	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1384
	1385
	1386	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1387	* be passed in completely with the variable name being tested, which isn't
	1388	* such a clean interface, but this is easier to read than it was before. We
	1389	* are looking for the boundary (or non-boundary between a word and non-word
	1390	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1391	* must be different. Find the "wordness" of the character just prior to this
	1392	* one, and compare it with the wordness of this one. If they differ, we have
	1393	* a boundary. At the beginning of the string, pretend that the previous
	1394	* character was a new-line */
	1395	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1396	if (utf8_target) { \
	1397	UTF8_CODE \
	1398	} \
	1399	else { /* Not utf8 */ \
	1400	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1401	tmp = TEST_NON_UTF8(tmp); \
	1402	REXEC_FBC_SCAN( \
	1403	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1404	tmp = !tmp; \
	1405	IF_SUCCESS; \
	1406	} \
	1407	else { \
	1408	IF_FAIL; \
	1409	} \
	1410	); \
	1411	} \
	1412	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s))) \
	1413	goto got_it;
	1414
	1415	/* We know what class REx starts with. Try to find this position... */
	1416	/* if reginfo is NULL, its a dryrun */
	1417	/* annoyingly all the vars in this routine have different names from their counterparts
	1418	in regmatch. /grrr */
	1419
	1420	STATIC char *
	1421	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1422	const char strend, regmatch_info reginfo)
	1423	{
	1424	dVAR;
	1425	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1426	char pat_string; / The pattern's exactish string */
	1427	char pat_end; / ptr to end char of pat_string */
	1428	re_fold_t folder; /* Function for computing non-utf8 folds */
	1429	const U8 fold_array; / array for folding ords < 256 */
	1430	STRLEN ln;
	1431	STRLEN lnc;
	1432	register STRLEN uskip;
	1433	U8 c1;
	1434	U8 c2;
	1435	char *e;
	1436	register I32 tmp = 1; /* Scratch variable? */
	1437	register const bool utf8_target = PL_reg_match_utf8;
	1438	UV utf8_fold_flags = 0;
	1439	RXi_GET_DECL(prog,progi);
	1440
	1441	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1442
	1443	/* We know what class it must start with. */
	1444	switch (OP(c)) {
	1445	case ANYOFV:
	1446	case ANYOF:
	1447	if (utf8_target \|\| OP(c) == ANYOFV) {
	1448	STRLEN inclasslen = strend - s;
	1449	REXEC_FBC_UTF8_CLASS_SCAN(
	1450	reginclass(prog, c, (U8*)s, &inclasslen, utf8_target));
	1451	}
	1452	else {
	1453	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
	1454	}
	1455	break;
	1456	case CANY:
	1457	REXEC_FBC_SCAN(
	1458	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1459	goto got_it;
	1460	else
	1461	tmp = doevery;
	1462	);
	1463	break;
	1464
	1465	case EXACTFA:
	1466	if (UTF_PATTERN \|\| utf8_target) {
	1467	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1468	goto do_exactf_utf8;
	1469	}
	1470	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1471	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1472	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1473
	1474	case EXACTF:
	1475	if (utf8_target) {
	1476
	1477	/* regcomp.c already folded this if pattern is in UTF-8 */
	1478	utf8_fold_flags = 0;
	1479	goto do_exactf_utf8;
	1480	}
	1481	fold_array = PL_fold;
	1482	folder = foldEQ;
	1483	goto do_exactf_non_utf8;
	1484
	1485	case EXACTFL:
	1486	if (UTF_PATTERN \|\| utf8_target) {
	1487	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	1488	goto do_exactf_utf8;
	1489	}
	1490	fold_array = PL_fold_locale;
	1491	folder = foldEQ_locale;
	1492	goto do_exactf_non_utf8;
	1493
	1494	case EXACTFU_SS:
	1495	if (UTF_PATTERN) {
	1496	utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
	1497	}
	1498	goto do_exactf_utf8;
	1499
	1500	case EXACTFU_TRICKYFOLD:
	1501	case EXACTFU:
	1502	if (UTF_PATTERN \|\| utf8_target) {
	1503	utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1504	goto do_exactf_utf8;
	1505	}
	1506
	1507	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1508	* so we don't have to worry here about this single special case
	1509	* in the Latin1 range */
	1510	fold_array = PL_fold_latin1;
	1511	folder = foldEQ_latin1;
	1512
	1513	/* FALL THROUGH */
	1514
	1515	do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
	1516	are no glitches with fold-length differences
	1517	between the target string and pattern */
	1518
	1519	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1520	* first character of the EXACTF* node and then, if necessary,
	1521	* case-insensitively compare the full text of the node. c1 is the
	1522	* first character. c2 is its fold. This logic will not work for
	1523	* Unicode semantics and the german sharp ss, which hence should
	1524	* not be compiled into a node that gets here. */
	1525	pat_string = STRING(c);
	1526	ln = STR_LEN(c); /* length to match in octets/bytes */
	1527
	1528	/* We know that we have to match at least 'ln' bytes (which is the
	1529	* same as characters, since not utf8). If we have to match 3
	1530	* characters, and there are only 2 availabe, we know without
	1531	* trying that it will fail; so don't start a match past the
	1532	* required minimum number from the far end */
	1533	e = HOP3c(strend, -((I32)ln), s);
	1534
	1535	if (!reginfo && e < s) {
	1536	e = s; /* Due to minlen logic of intuit() */
	1537	}
	1538
	1539	c1 = *pat_string;
	1540	c2 = fold_array[c1];
	1541	if (c1 == c2) { /* If char and fold are the same */
	1542	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1543	}
	1544	else {
	1545	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1546	}
	1547	break;
	1548
	1549	do_exactf_utf8:
	1550	{
	1551	unsigned expansion;
	1552
	1553
	1554	/* If one of the operands is in utf8, we can't use the simpler
	1555	* folding above, due to the fact that many different characters
	1556	* can have the same fold, or portion of a fold, or different-
	1557	* length fold */
	1558	pat_string = STRING(c);
	1559	ln = STR_LEN(c); /* length to match in octets/bytes */
	1560	pat_end = pat_string + ln;
	1561	lnc = (UTF_PATTERN) /* length to match in characters */
	1562	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1563	: ln;
	1564
	1565	/* We have 'lnc' characters to match in the pattern, but because of
	1566	* multi-character folding, each character in the target can match
	1567	* up to 3 characters (Unicode guarantees it will never exceed
	1568	* this) if it is utf8-encoded; and up to 2 if not (based on the
	1569	* fact that the Latin 1 folds are already determined, and the
	1570	* only multi-char fold in that range is the sharp-s folding to
	1571	* 'ss'. Thus, a pattern character can match as little as 1/3 of a
	1572	* string character. Adjust lnc accordingly, rounding up, so that
	1573	* if we need to match at least 4+1/3 chars, that really is 5. */
	1574	expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
	1575	lnc = (lnc + expansion - 1) / expansion;
	1576
	1577	/* As in the non-UTF8 case, if we have to match 3 characters, and
	1578	* only 2 are left, it's guaranteed to fail, so don't start a
	1579	* match that would require us to go beyond the end of the string
	1580	*/
	1581	e = HOP3c(strend, -((I32)lnc), s);
	1582
	1583	if (!reginfo && e < s) {
	1584	e = s; /* Due to minlen logic of intuit() */
	1585	}
	1586
	1587	/* XXX Note that we could recalculate e to stop the loop earlier,
	1588	* as the worst case expansion above will rarely be met, and as we
	1589	* go along we would usually find that e moves further to the left.
	1590	* This would happen only after we reached the point in the loop
	1591	* where if there were no expansion we should fail. Unclear if
	1592	* worth the expense */
	1593
	1594	while (s <= e) {
	1595	char my_strend= (char )strend;
	1596	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	1597	pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
	1598	&& (!reginfo \|\| regtry(reginfo, &s)) )
	1599	{
	1600	goto got_it;
	1601	}
	1602	s += (utf8_target) ? UTF8SKIP(s) : 1;
	1603	}
	1604	break;
	1605	}
	1606	case BOUNDL:
	1607	PL_reg_flags \|= RF_tainted;
	1608	FBC_BOUND(isALNUM_LC,
	1609	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1610	isALNUM_LC_utf8((U8*)s));
	1611	break;
	1612	case NBOUNDL:
	1613	PL_reg_flags \|= RF_tainted;
	1614	FBC_NBOUND(isALNUM_LC,
	1615	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1616	isALNUM_LC_utf8((U8*)s));
	1617	break;
	1618	case BOUND:
	1619	FBC_BOUND(isWORDCHAR,
	1620	isALNUM_uni(tmp),
	1621	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1622	break;
	1623	case BOUNDA:
	1624	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1625	isWORDCHAR_A(tmp),
	1626	isWORDCHAR_A((U8*)s));
	1627	break;
	1628	case NBOUND:
	1629	FBC_NBOUND(isWORDCHAR,
	1630	isALNUM_uni(tmp),
	1631	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1632	break;
	1633	case NBOUNDA:
	1634	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1635	isWORDCHAR_A(tmp),
	1636	isWORDCHAR_A((U8*)s));
	1637	break;
	1638	case BOUNDU:
	1639	FBC_BOUND(isWORDCHAR_L1,
	1640	isALNUM_uni(tmp),
	1641	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1642	break;
	1643	case NBOUNDU:
	1644	FBC_NBOUND(isWORDCHAR_L1,
	1645	isALNUM_uni(tmp),
	1646	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1647	break;
	1648	case ALNUML:
	1649	REXEC_FBC_CSCAN_TAINT(
	1650	isALNUM_LC_utf8((U8*)s),
	1651	isALNUM_LC(*s)
	1652	);
	1653	break;
	1654	case ALNUMU:
	1655	REXEC_FBC_CSCAN_PRELOAD(
	1656	LOAD_UTF8_CHARCLASS_ALNUM(),
	1657	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1658	isWORDCHAR_L1((U8) *s)
	1659	);
	1660	break;
	1661	case ALNUM:
	1662	REXEC_FBC_CSCAN_PRELOAD(
	1663	LOAD_UTF8_CHARCLASS_ALNUM(),
	1664	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1665	isWORDCHAR((U8) *s)
	1666	);
	1667	break;
	1668	case ALNUMA:
	1669	/* Don't need to worry about utf8, as it can match only a single
	1670	* byte invariant character */
	1671	REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
	1672	break;
	1673	case NALNUMU:
	1674	REXEC_FBC_CSCAN_PRELOAD(
	1675	LOAD_UTF8_CHARCLASS_ALNUM(),
	1676	!swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1677	! isWORDCHAR_L1((U8) *s)
	1678	);
	1679	break;
	1680	case NALNUM:
	1681	REXEC_FBC_CSCAN_PRELOAD(
	1682	LOAD_UTF8_CHARCLASS_ALNUM(),
	1683	!swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target),
	1684	! isALNUM(*s)
	1685	);
	1686	break;
	1687	case NALNUMA:
	1688	REXEC_FBC_CSCAN(
	1689	!isWORDCHAR_A(*s),
	1690	!isWORDCHAR_A(*s)
	1691	);
	1692	break;
	1693	case NALNUML:
	1694	REXEC_FBC_CSCAN_TAINT(
	1695	!isALNUM_LC_utf8((U8*)s),
	1696	!isALNUM_LC(*s)
	1697	);
	1698	break;
	1699	case SPACEU:
	1700	REXEC_FBC_CSCAN_PRELOAD(
	1701	LOAD_UTF8_CHARCLASS_SPACE(),
	1702	s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target),
	1703	isSPACE_L1((U8) *s)
	1704	);
	1705	break;
	1706	case SPACE:
	1707	REXEC_FBC_CSCAN_PRELOAD(
	1708	LOAD_UTF8_CHARCLASS_SPACE(),
	1709	s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target),
	1710	isSPACE((U8) *s)
	1711	);
	1712	break;
	1713	case SPACEA:
	1714	/* Don't need to worry about utf8, as it can match only a single
	1715	* byte invariant character */
	1716	REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
	1717	break;
	1718	case SPACEL:
	1719	REXEC_FBC_CSCAN_TAINT(
	1720	isSPACE_LC_utf8((U8*)s),
	1721	isSPACE_LC(*s)
	1722	);
	1723	break;
	1724	case NSPACEU:
	1725	REXEC_FBC_CSCAN_PRELOAD(
	1726	LOAD_UTF8_CHARCLASS_SPACE(),
	1727	!( s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target)),
	1728	! isSPACE_L1((U8) *s)
	1729	);
	1730	break;
	1731	case NSPACE:
	1732	REXEC_FBC_CSCAN_PRELOAD(
	1733	LOAD_UTF8_CHARCLASS_SPACE(),
	1734	!(s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target)),
	1735	! isSPACE((U8) *s)
	1736	);
	1737	break;
	1738	case NSPACEA:
	1739	REXEC_FBC_CSCAN(
	1740	!isSPACE_A(*s),
	1741	!isSPACE_A(*s)
	1742	);
	1743	break;
	1744	case NSPACEL:
	1745	REXEC_FBC_CSCAN_TAINT(
	1746	!isSPACE_LC_utf8((U8*)s),
	1747	!isSPACE_LC(*s)
	1748	);
	1749	break;
	1750	case DIGIT:
	1751	REXEC_FBC_CSCAN_PRELOAD(
	1752	LOAD_UTF8_CHARCLASS_DIGIT(),
	1753	swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1754	isDIGIT(*s)
	1755	);
	1756	break;
	1757	case DIGITA:
	1758	/* Don't need to worry about utf8, as it can match only a single
	1759	* byte invariant character */
	1760	REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
	1761	break;
	1762	case DIGITL:
	1763	REXEC_FBC_CSCAN_TAINT(
	1764	isDIGIT_LC_utf8((U8*)s),
	1765	isDIGIT_LC(*s)
	1766	);
	1767	break;
	1768	case NDIGIT:
	1769	REXEC_FBC_CSCAN_PRELOAD(
	1770	LOAD_UTF8_CHARCLASS_DIGIT(),
	1771	!swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1772	!isDIGIT(*s)
	1773	);
	1774	break;
	1775	case NDIGITA:
	1776	REXEC_FBC_CSCAN(
	1777	!isDIGIT_A(*s),
	1778	!isDIGIT_A(*s)
	1779	);
	1780	break;
	1781	case NDIGITL:
	1782	REXEC_FBC_CSCAN_TAINT(
	1783	!isDIGIT_LC_utf8((U8*)s),
	1784	!isDIGIT_LC(*s)
	1785	);
	1786	break;
	1787	case LNBREAK:
	1788	REXEC_FBC_CSCAN(
	1789	is_LNBREAK_utf8(s),
	1790	is_LNBREAK_latin1(s)
	1791	);
	1792	break;
	1793	case VERTWS:
	1794	REXEC_FBC_CSCAN(
	1795	is_VERTWS_utf8(s),
	1796	is_VERTWS_latin1(s)
	1797	);
	1798	break;
	1799	case NVERTWS:
	1800	REXEC_FBC_CSCAN(
	1801	!is_VERTWS_utf8(s),
	1802	!is_VERTWS_latin1(s)
	1803	);
	1804	break;
	1805	case HORIZWS:
	1806	REXEC_FBC_CSCAN(
	1807	is_HORIZWS_utf8(s),
	1808	is_HORIZWS_latin1(s)
	1809	);
	1810	break;
	1811	case NHORIZWS:
	1812	REXEC_FBC_CSCAN(
	1813	!is_HORIZWS_utf8(s),
	1814	!is_HORIZWS_latin1(s)
	1815	);
	1816	break;
	1817	case AHOCORASICKC:
	1818	case AHOCORASICK:
	1819	{
	1820	DECL_TRIE_TYPE(c);
	1821	/* what trie are we using right now */
	1822	reg_ac_data *aho
	1823	= (reg_ac_data*)progi->data->data[ ARG( c ) ];
	1824	reg_trie_data *trie
	1825	= (reg_trie_data*)progi->data->data[ aho->trie ];
	1826	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1827
	1828	const char *last_start = strend - trie->minlen;
	1829	#ifdef DEBUGGING
	1830	const char *real_start = s;
	1831	#endif
	1832	STRLEN maxlen = trie->maxlen;
	1833	SV *sv_points;
	1834	U8 *points; / map of where we were in the input string
	1835	when reading a given char. For ASCII this
	1836	is unnecessary overhead as the relationship
	1837	is always 1:1, but for Unicode, especially
	1838	case folded Unicode this is not true. */
	1839	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1840	U8 *bitmap=NULL;
	1841
	1842
	1843	GET_RE_DEBUG_FLAGS_DECL;
	1844
	1845	/* We can't just allocate points here. We need to wrap it in
	1846	* an SV so it gets freed properly if there is a croak while
	1847	* running the match */
	1848	ENTER;
	1849	SAVETMPS;
	1850	sv_points=newSV(maxlen * sizeof(U8 *));
	1851	SvCUR_set(sv_points,
	1852	maxlen * sizeof(U8 *));
	1853	SvPOK_on(sv_points);
	1854	sv_2mortal(sv_points);
	1855	points=(U8**)SvPV_nolen(sv_points );
	1856	if ( trie_type != trie_utf8_fold
	1857	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1858	{
	1859	if (trie->bitmap)
	1860	bitmap=(U8*)trie->bitmap;
	1861	else
	1862	bitmap=(U8*)ANYOF_BITMAP(c);
	1863	}
	1864	/* this is the Aho-Corasick algorithm modified a touch
	1865	to include special handling for long "unknown char"
	1866	sequences. The basic idea being that we use AC as long
	1867	as we are dealing with a possible matching char, when
	1868	we encounter an unknown char (and we have not encountered
	1869	an accepting state) we scan forward until we find a legal
	1870	starting char.
	1871	AC matching is basically that of trie matching, except
	1872	that when we encounter a failing transition, we fall back
	1873	to the current states "fail state", and try the current char
	1874	again, a process we repeat until we reach the root state,
	1875	state 1, or a legal transition. If we fail on the root state
	1876	then we can either terminate if we have reached an accepting
	1877	state previously, or restart the entire process from the beginning
	1878	if we have not.
	1879
	1880	*/
	1881	while (s <= last_start) {
	1882	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1883	U8 uc = (U8)s;
	1884	U16 charid = 0;
	1885	U32 base = 1;
	1886	U32 state = 1;
	1887	UV uvc = 0;
	1888	STRLEN len = 0;
	1889	STRLEN foldlen = 0;
	1890	U8 uscan = (U8)NULL;
	1891	U8 *leftmost = NULL;
	1892	#ifdef DEBUGGING
	1893	U32 accepted_word= 0;
	1894	#endif
	1895	U32 pointpos = 0;
	1896
	1897	while ( state && uc <= (U8*)strend ) {
	1898	int failed=0;
	1899	U32 word = aho->states[ state ].wordnum;
	1900
	1901	if( state==1 ) {
	1902	if ( bitmap ) {
	1903	DEBUG_TRIE_EXECUTE_r(
	1904	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1905	dump_exec_pos( (char *)uc, c, strend, real_start,
	1906	(char *)uc, utf8_target );
	1907	PerlIO_printf( Perl_debug_log,
	1908	" Scanning for legal start char...\n");
	1909	}
	1910	);
	1911	if (utf8_target) {
	1912	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1913	uc += UTF8SKIP(uc);
	1914	}
	1915	} else {
	1916	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1917	uc++;
	1918	}
	1919	}
	1920	s= (char *)uc;
	1921	}
	1922	if (uc >(U8*)last_start) break;
	1923	}
	1924
	1925	if ( word ) {
	1926	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1927	if (!leftmost \|\| lpos < leftmost) {
	1928	DEBUG_r(accepted_word=word);
	1929	leftmost= lpos;
	1930	}
	1931	if (base==0) break;
	1932
	1933	}
	1934	points[pointpos++ % maxlen]= uc;
	1935	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	1936	uscan, len, uvc, charid, foldlen,
	1937	foldbuf, uniflags);
	1938	DEBUG_TRIE_EXECUTE_r({
	1939	dump_exec_pos( (char *)uc, c, strend, real_start,
	1940	s, utf8_target );
	1941	PerlIO_printf(Perl_debug_log,
	1942	" Charid:%3u CP:%4"UVxf" ",
	1943	charid, uvc);
	1944	});
	1945
	1946	do {
	1947	#ifdef DEBUGGING
	1948	word = aho->states[ state ].wordnum;
	1949	#endif
	1950	base = aho->states[ state ].trans.base;
	1951
	1952	DEBUG_TRIE_EXECUTE_r({
	1953	if (failed)
	1954	dump_exec_pos( (char *)uc, c, strend, real_start,
	1955	s, utf8_target );
	1956	PerlIO_printf( Perl_debug_log,
	1957	"%sState: %4"UVxf", word=%"UVxf,
	1958	failed ? " Fail transition to " : "",
	1959	(UV)state, (UV)word);
	1960	});
	1961	if ( base ) {
	1962	U32 tmp;
	1963	I32 offset;
	1964	if (charid &&
	1965	( ((offset = base + charid
	1966	- 1 - trie->uniquecharcount)) >= 0)
	1967	&& ((U32)offset < trie->lasttrans)
	1968	&& trie->trans[offset].check == state
	1969	&& (tmp=trie->trans[offset].next))
	1970	{
	1971	DEBUG_TRIE_EXECUTE_r(
	1972	PerlIO_printf( Perl_debug_log," - legal\n"));
	1973	state = tmp;
	1974	break;
	1975	}
	1976	else {
	1977	DEBUG_TRIE_EXECUTE_r(
	1978	PerlIO_printf( Perl_debug_log," - fail\n"));
	1979	failed = 1;
	1980	state = aho->fail[state];
	1981	}
	1982	}
	1983	else {
	1984	/* we must be accepting here */
	1985	DEBUG_TRIE_EXECUTE_r(
	1986	PerlIO_printf( Perl_debug_log," - accepting\n"));
	1987	failed = 1;
	1988	break;
	1989	}
	1990	} while(state);
	1991	uc += len;
	1992	if (failed) {
	1993	if (leftmost)
	1994	break;
	1995	if (!state) state = 1;
	1996	}
	1997	}
	1998	if ( aho->states[ state ].wordnum ) {
	1999	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	2000	if (!leftmost \|\| lpos < leftmost) {
	2001	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	2002	leftmost = lpos;
	2003	}
	2004	}
	2005	if (leftmost) {
	2006	s = (char*)leftmost;
	2007	DEBUG_TRIE_EXECUTE_r({
	2008	PerlIO_printf(
	2009	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	2010	(UV)accepted_word, (IV)(s - real_start)
	2011	);
	2012	});
	2013	if (!reginfo \|\| regtry(reginfo, &s)) {
	2014	FREETMPS;
	2015	LEAVE;
	2016	goto got_it;
	2017	}
	2018	s = HOPc(s,1);
	2019	DEBUG_TRIE_EXECUTE_r({
	2020	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	2021	});
	2022	} else {
	2023	DEBUG_TRIE_EXECUTE_r(
	2024	PerlIO_printf( Perl_debug_log,"No match.\n"));
	2025	break;
	2026	}
	2027	}
	2028	FREETMPS;
	2029	LEAVE;
	2030	}
	2031	break;
	2032	default:
	2033	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2034	break;
	2035	}
	2036	return 0;
	2037	got_it:
	2038	return s;
	2039	}
	2040
	2041
	2042	/*
	2043	- regexec_flags - match a regexp against a string
	2044	*/
	2045	I32
	2046	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, register char strend,
	2047	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	2048	/* strend: pointer to null at end of string */
	2049	/* strbeg: real beginning of string */
	2050	/* minend: end of match must be >=minend after stringarg. */
	2051	/* data: May be used for some additional optimizations.
	2052	Currently its only used, with a U32 cast, for transmitting
	2053	the ganch offset when doing a /g match. This will change */
	2054	/* nosave: For optimizations. */
	2055	{
	2056	dVAR;
	2057	struct regexp const prog = (struct regexp )SvANY(rx);
	2058	/register/ char *s;
	2059	register regnode *c;
	2060	/register/ char *startpos = stringarg;
	2061	I32 minlen; /* must match at least this many chars */
	2062	I32 dontbother = 0; /* how many characters not to try at end */
	2063	I32 end_shift = 0; /* Same for the end. / / CC */
	2064	I32 scream_pos = -1; /* Internal iterator of scream. */
	2065	char *scream_olds = NULL;
	2066	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2067	I32 multiline;
	2068	RXi_GET_DECL(prog,progi);
	2069	regmatch_info reginfo; /* create some info to pass to regtry etc */
	2070	regexp_paren_pair *swap = NULL;
	2071	GET_RE_DEBUG_FLAGS_DECL;
	2072
	2073	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2074	PERL_UNUSED_ARG(data);
	2075
	2076	/* Be paranoid... */
	2077	if (prog == NULL \|\| startpos == NULL) {
	2078	Perl_croak(aTHX_ "NULL regexp parameter");
	2079	return 0;
	2080	}
	2081
	2082	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2083	reginfo.prog = rx; /* Yes, sorry that this is confusing. */
	2084
	2085	RX_MATCH_UTF8_set(rx, utf8_target);
	2086	DEBUG_EXECUTE_r(
	2087	debug_start_match(rx, utf8_target, startpos, strend,
	2088	"Matching");
	2089	);
	2090
	2091	minlen = prog->minlen;
	2092
	2093	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2094	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2095	"String too short [regexec_flags]...\n"));
	2096	goto phooey;
	2097	}
	2098
	2099
	2100	/* Check validity of program. */
	2101	if (UCHARAT(progi->program) != REG_MAGIC) {
	2102	Perl_croak(aTHX_ "corrupted regexp program");
	2103	}
	2104
	2105	PL_reg_flags = 0;
	2106	PL_reg_state.re_state_eval_setup_done = FALSE;
	2107	PL_reg_maxiter = 0;
	2108
	2109	if (RX_UTF8(rx))
	2110	PL_reg_flags \|= RF_utf8;
	2111
	2112	/* Mark beginning of line for ^ and lookbehind. */
	2113	reginfo.bol = startpos; /* XXX not used ??? */
	2114	PL_bostr = strbeg;
	2115	reginfo.sv = sv;
	2116
	2117	/* Mark end of line for $ (and such) */
	2118	PL_regeol = strend;
	2119
	2120	/* see how far we have to get to not match where we matched before */
	2121	reginfo.till = startpos+minend;
	2122
	2123	/* If there is a "must appear" string, look for it. */
	2124	s = startpos;
	2125
	2126	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	2127	MAGIC *mg;
	2128	if (flags & REXEC_IGNOREPOS){ /* Means: check only at start */
	2129	reginfo.ganch = startpos + prog->gofs;
	2130	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2131	"GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
	2132	} else if (sv && SvTYPE(sv) >= SVt_PVMG
	2133	&& SvMAGIC(sv)
	2134	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	2135	&& mg->mg_len >= 0) {
	2136	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	2137	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2138	"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
	2139
	2140	if (prog->extflags & RXf_ANCH_GPOS) {
	2141	if (s > reginfo.ganch)
	2142	goto phooey;
	2143	s = reginfo.ganch - prog->gofs;
	2144	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2145	"GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
	2146	if (s < strbeg)
	2147	goto phooey;
	2148	}
	2149	}
	2150	else if (data) {
	2151	reginfo.ganch = strbeg + PTR2UV(data);
	2152	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2153	"GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
	2154
	2155	} else { /* pos() not defined */
	2156	reginfo.ganch = strbeg;
	2157	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2158	"GPOS: reginfo.ganch = strbeg\n"));
	2159	}
	2160	}
	2161	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2162	/* We have to be careful. If the previous successful match
	2163	was from this regex we don't want a subsequent partially
	2164	successful match to clobber the old results.
	2165	So when we detect this possibility we add a swap buffer
	2166	to the re, and switch the buffer each match. If we fail
	2167	we switch it back, otherwise we leave it swapped.
	2168	*/
	2169	swap = prog->offs;
	2170	/* do we need a save destructor here for eval dies? */
	2171	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2172	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2173	"rex=0x%"UVxf" saving offs: orig=0x%"UVxf" new=0x%"UVxf"\n",
	2174	PTR2UV(prog),
	2175	PTR2UV(swap),
	2176	PTR2UV(prog->offs)
	2177	));
	2178	}
	2179	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	2180	re_scream_pos_data d;
	2181
	2182	d.scream_olds = &scream_olds;
	2183	d.scream_pos = &scream_pos;
	2184	s = re_intuit_start(rx, sv, s, strend, flags, &d);
	2185	if (!s) {
	2186	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	2187	goto phooey; /* not present */
	2188	}
	2189	}
	2190
	2191
	2192
	2193	/* Simplest case: anchored match need be tried only once. */
	2194	/* [unless only anchor is BOL and multiline is set] */
	2195	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2196	if (s == startpos && regtry(&reginfo, &startpos))
	2197	goto got_it;
	2198	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2199	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2200	{
	2201	char *end;
	2202
	2203	if (minlen)
	2204	dontbother = minlen - 1;
	2205	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2206	/* for multiline we only have to try after newlines */
	2207	if (prog->check_substr \|\| prog->check_utf8) {
	2208	/* because of the goto we can not easily reuse the macros for bifurcating the
	2209	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2210	if (utf8_target) {
	2211	if (s == startpos)
	2212	goto after_try_utf8;
	2213	while (1) {
	2214	if (regtry(&reginfo, &s)) {
	2215	goto got_it;
	2216	}
	2217	after_try_utf8:
	2218	if (s > end) {
	2219	goto phooey;
	2220	}
	2221	if (prog->extflags & RXf_USE_INTUIT) {
	2222	s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
	2223	if (!s) {
	2224	goto phooey;
	2225	}
	2226	}
	2227	else {
	2228	s += UTF8SKIP(s);
	2229	}
	2230	}
	2231	} /* end search for check string in unicode */
	2232	else {
	2233	if (s == startpos) {
	2234	goto after_try_latin;
	2235	}
	2236	while (1) {
	2237	if (regtry(&reginfo, &s)) {
	2238	goto got_it;
	2239	}
	2240	after_try_latin:
	2241	if (s > end) {
	2242	goto phooey;
	2243	}
	2244	if (prog->extflags & RXf_USE_INTUIT) {
	2245	s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
	2246	if (!s) {
	2247	goto phooey;
	2248	}
	2249	}
	2250	else {
	2251	s++;
	2252	}
	2253	}
	2254	} /* end search for check string in latin*/
	2255	} /* end search for check string */
	2256	else { /* search for newline */
	2257	if (s > startpos) {
	2258	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2259	s--;
	2260	}
	2261	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2262	while (s <= end) { /* note it could be possible to match at the end of the string */
	2263	if (s++ == '\n') { / don't need PL_utf8skip here */
	2264	if (regtry(&reginfo, &s))
	2265	goto got_it;
	2266	}
	2267	}
	2268	} /* end search for newline */
	2269	} /* end anchored/multiline check string search */
	2270	goto phooey;
	2271	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2272	{
	2273	/* the warning about reginfo.ganch being used without initialization
	2274	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	2275	and we only enter this block when the same bit is set. */
	2276	char *tmp_s = reginfo.ganch - prog->gofs;
	2277
	2278	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
	2279	goto got_it;
	2280	goto phooey;
	2281	}
	2282
	2283	/* Messy cases: unanchored match. */
	2284	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2285	/* we have /x+whatever/ */
	2286	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
	2287	char ch;
	2288	#ifdef DEBUGGING
	2289	int did_match = 0;
	2290	#endif
	2291	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2292	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2293	ch = SvPVX_const(utf8_target ? prog->anchored_utf8 : prog->anchored_substr)[0];
	2294
	2295	if (utf8_target) {
	2296	REXEC_FBC_SCAN(
	2297	if (*s == ch) {
	2298	DEBUG_EXECUTE_r( did_match = 1 );
	2299	if (regtry(&reginfo, &s)) goto got_it;
	2300	s += UTF8SKIP(s);
	2301	while (s < strend && *s == ch)
	2302	s += UTF8SKIP(s);
	2303	}
	2304	);
	2305	}
	2306	else {
	2307	REXEC_FBC_SCAN(
	2308	if (*s == ch) {
	2309	DEBUG_EXECUTE_r( did_match = 1 );
	2310	if (regtry(&reginfo, &s)) goto got_it;
	2311	s++;
	2312	while (s < strend && *s == ch)
	2313	s++;
	2314	}
	2315	);
	2316	}
	2317	DEBUG_EXECUTE_r(if (!did_match)
	2318	PerlIO_printf(Perl_debug_log,
	2319	"Did not find anchored character...\n")
	2320	);
	2321	}
	2322	else if (prog->anchored_substr != NULL
	2323	\|\| prog->anchored_utf8 != NULL
	2324	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2325	&& prog->float_max_offset < strend - s)) {
	2326	SV *must;
	2327	I32 back_max;
	2328	I32 back_min;
	2329	char *last;
	2330	char last1; / Last position checked before */
	2331	#ifdef DEBUGGING
	2332	int did_match = 0;
	2333	#endif
	2334	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2335	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2336	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2337	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	2338	back_max = back_min = prog->anchored_offset;
	2339	} else {
	2340	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2341	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2342	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	2343	back_max = prog->float_max_offset;
	2344	back_min = prog->float_min_offset;
	2345	}
	2346
	2347
	2348	if (must == &PL_sv_undef)
	2349	/* could not downgrade utf8 check substring, so must fail */
	2350	goto phooey;
	2351
	2352	if (back_min<0) {
	2353	last = strend;
	2354	} else {
	2355	last = HOP3c(strend, /* Cannot start after this */
	2356	-(I32)(CHR_SVLEN(must)
	2357	- (SvTAIL(must) != 0) + back_min), strbeg);
	2358	}
	2359	if (s > PL_bostr)
	2360	last1 = HOPc(s, -1);
	2361	else
	2362	last1 = s - 1; /* bogus */
	2363
	2364	/* XXXX check_substr already used to find "s", can optimize if
	2365	check_substr==must. */
	2366	scream_pos = -1;
	2367	dontbother = end_shift;
	2368	strend = HOPc(strend, -dontbother);
	2369	while ( (s <= last) &&
	2370	(s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2371	(unsigned char*)strend, must,
	2372	multiline ? FBMrf_MULTILINE : 0)) ) {
	2373	DEBUG_EXECUTE_r( did_match = 1 );
	2374	if (HOPc(s, -back_max) > last1) {
	2375	last1 = HOPc(s, -back_min);
	2376	s = HOPc(s, -back_max);
	2377	}
	2378	else {
	2379	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2380
	2381	last1 = HOPc(s, -back_min);
	2382	s = t;
	2383	}
	2384	if (utf8_target) {
	2385	while (s <= last1) {
	2386	if (regtry(&reginfo, &s))
	2387	goto got_it;
	2388	s += UTF8SKIP(s);
	2389	}
	2390	}
	2391	else {
	2392	while (s <= last1) {
	2393	if (regtry(&reginfo, &s))
	2394	goto got_it;
	2395	s++;
	2396	}
	2397	}
	2398	}
	2399	DEBUG_EXECUTE_r(if (!did_match) {
	2400	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2401	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2402	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2403	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2404	? "anchored" : "floating"),
	2405	quoted, RE_SV_TAIL(must));
	2406	});
	2407	goto phooey;
	2408	}
	2409	else if ( (c = progi->regstclass) ) {
	2410	if (minlen) {
	2411	const OPCODE op = OP(progi->regstclass);
	2412	/* don't bother with what can't match */
	2413	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2414	strend = HOPc(strend, -(minlen - 1));
	2415	}
	2416	DEBUG_EXECUTE_r({
	2417	SV * const prop = sv_newmortal();
	2418	regprop(prog, prop, c);
	2419	{
	2420	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2421	s,strend-s,60);
	2422	PerlIO_printf(Perl_debug_log,
	2423	"Matching stclass %.*s against %s (%d bytes)\n",
	2424	(int)SvCUR(prop), SvPVX_const(prop),
	2425	quoted, (int)(strend - s));
	2426	}
	2427	});
	2428	if (find_byclass(prog, c, s, strend, &reginfo))
	2429	goto got_it;
	2430	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2431	}
	2432	else {
	2433	dontbother = 0;
	2434	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2435	/* Trim the end. */
	2436	char *last= NULL;
	2437	SV* float_real;
	2438	STRLEN len;
	2439	const char *little;
	2440
	2441	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2442	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2443	float_real = utf8_target ? prog->float_utf8 : prog->float_substr;
	2444
	2445	little = SvPV_const(float_real, len);
	2446	if (SvTAIL(float_real)) {
	2447	/* This means that float_real contains an artificial \n on the end
	2448	* due to the presence of something like this: /foo$/
	2449	* where we can match both "foo" and "foo\n" at the end of the string.
	2450	* So we have to compare the end of the string first against the float_real
	2451	* without the \n and then against the full float_real with the string.
	2452	* We have to watch out for cases where the string might be smaller
	2453	* than the float_real or the float_real without the \n.
	2454	*/
	2455	char *checkpos= strend - len;
	2456	DEBUG_OPTIMISE_r(
	2457	PerlIO_printf(Perl_debug_log,
	2458	"%sChecking for float_real.%s\n",
	2459	PL_colors[4], PL_colors[5]));
	2460	if (checkpos + 1 < strbeg) {
	2461	/* can't match, even if we remove the trailing \n string is too short to match */
	2462	DEBUG_EXECUTE_r(
	2463	PerlIO_printf(Perl_debug_log,
	2464	"%sString shorter than required trailing substring, cannot match.%s\n",
	2465	PL_colors[4], PL_colors[5]));
	2466	goto phooey;
	2467	} else if (memEQ(checkpos + 1, little, len - 1)) {
	2468	/* can match, the end of the string matches without the "\n" */
	2469	last = checkpos + 1;
	2470	} else if (checkpos < strbeg) {
	2471	/* cant match, string is too short when the "\n" is included */
	2472	DEBUG_EXECUTE_r(
	2473	PerlIO_printf(Perl_debug_log,
	2474	"%sString does not contain required trailing substring, cannot match.%s\n",
	2475	PL_colors[4], PL_colors[5]));
	2476	goto phooey;
	2477	} else if (!multiline) {
	2478	/* non multiline match, so compare with the "\n" at the end of the string */
	2479	if (memEQ(checkpos, little, len)) {
	2480	last= checkpos;
	2481	} else {
	2482	DEBUG_EXECUTE_r(
	2483	PerlIO_printf(Perl_debug_log,
	2484	"%sString does not contain required trailing substring, cannot match.%s\n",
	2485	PL_colors[4], PL_colors[5]));
	2486	goto phooey;
	2487	}
	2488	} else {
	2489	/* multiline match, so we have to search for a place where the full string is located */
	2490	goto find_last;
	2491	}
	2492	} else {
	2493	find_last:
	2494	if (len)
	2495	last = rninstr(s, strend, little, little + len);
	2496	else
	2497	last = strend; /* matching "$" */
	2498	}
	2499	if (!last) {
	2500	/* at one point this block contained a comment which was probably
	2501	* incorrect, which said that this was a "should not happen" case.
	2502	* Even if it was true when it was written I am pretty sure it is
	2503	* not anymore, so I have removed the comment and replaced it with
	2504	* this one. Yves */
	2505	DEBUG_EXECUTE_r(
	2506	PerlIO_printf(Perl_debug_log,
	2507	"String does not contain required substring, cannot match.\n"
	2508	));
	2509	goto phooey;
	2510	}
	2511	dontbother = strend - last + prog->float_min_offset;
	2512	}
	2513	if (minlen && (dontbother < minlen))
	2514	dontbother = minlen - 1;
	2515	strend -= dontbother; /* this one's always in bytes! */
	2516	/* We don't know much -- general case. */
	2517	if (utf8_target) {
	2518	for (;;) {
	2519	if (regtry(&reginfo, &s))
	2520	goto got_it;
	2521	if (s >= strend)
	2522	break;
	2523	s += UTF8SKIP(s);
	2524	};
	2525	}
	2526	else {
	2527	do {
	2528	if (regtry(&reginfo, &s))
	2529	goto got_it;
	2530	} while (s++ < strend);
	2531	}
	2532	}
	2533
	2534	/* Failure. */
	2535	goto phooey;
	2536
	2537	got_it:
	2538	DEBUG_BUFFERS_r(
	2539	if (swap)
	2540	PerlIO_printf(Perl_debug_log,
	2541	"rex=0x%"UVxf" freeing offs: 0x%"UVxf"\n",
	2542	PTR2UV(prog),
	2543	PTR2UV(swap)
	2544	);
	2545	);
	2546	Safefree(swap);
	2547	RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
	2548
	2549	if (PL_reg_state.re_state_eval_setup_done)
	2550	restore_pos(aTHX_ prog);
	2551	if (RXp_PAREN_NAMES(prog))
	2552	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2553
	2554	/* make sure $`, $&, $', and $digit will work later */
	2555	if ( !(flags & REXEC_NOT_FIRST) ) {
	2556	RX_MATCH_COPY_FREE(rx);
	2557	if (flags & REXEC_COPY_STR) {
	2558	const I32 i = PL_regeol - startpos + (stringarg - strbeg);
	2559	#ifdef PERL_OLD_COPY_ON_WRITE
	2560	if ((SvIsCOW(sv)
	2561	\|\| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
	2562	if (DEBUG_C_TEST) {
	2563	PerlIO_printf(Perl_debug_log,
	2564	"Copy on write: regexp capture, type %d\n",
	2565	(int) SvTYPE(sv));
	2566	}
	2567	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2568	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2569	assert (SvPOKp(prog->saved_copy));
	2570	} else
	2571	#endif
	2572	{
	2573	RX_MATCH_COPIED_on(rx);
	2574	s = savepvn(strbeg, i);
	2575	prog->subbeg = s;
	2576	}
	2577	prog->sublen = i;
	2578	}
	2579	else {
	2580	prog->subbeg = strbeg;
	2581	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2582	}
	2583	}
	2584
	2585	return 1;
	2586
	2587	phooey:
	2588	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2589	PL_colors[4], PL_colors[5]));
	2590	if (PL_reg_state.re_state_eval_setup_done)
	2591	restore_pos(aTHX_ prog);
	2592	if (swap) {
	2593	/* we failed :-( roll it back */
	2594	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2595	"rex=0x%"UVxf" rolling back offs: freeing=0x%"UVxf" restoring=0x%"UVxf"\n",
	2596	PTR2UV(prog),
	2597	PTR2UV(prog->offs),
	2598	PTR2UV(swap)
	2599	));
	2600	Safefree(prog->offs);
	2601	prog->offs = swap;
	2602	}
	2603
	2604	return 0;
	2605	}
	2606
	2607
	2608	/* Set which rex is pointed to by PL_reg_state, handling ref counting.
	2609	* Do inc before dec, in case old and new rex are the same */
	2610	#define SET_reg_curpm(Re2) \
	2611	if (PL_reg_state.re_state_eval_setup_done) { \
	2612	(void)ReREFCNT_inc(Re2); \
	2613	ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
	2614	PM_SETRE((PL_reg_curpm), (Re2)); \
	2615	}
	2616
	2617
	2618	/*
	2619	- regtry - try match at specific point
	2620	*/
	2621	STATIC I32 /* 0 failure, 1 success */
	2622	S_regtry(pTHX_ regmatch_info reginfo, char *startpos)
	2623	{
	2624	dVAR;
	2625	CHECKPOINT lastcp;
	2626	REGEXP *const rx = reginfo->prog;
	2627	regexp const prog = (struct regexp )SvANY(rx);
	2628	RXi_GET_DECL(prog,progi);
	2629	GET_RE_DEBUG_FLAGS_DECL;
	2630
	2631	PERL_ARGS_ASSERT_REGTRY;
	2632
	2633	reginfo->cutpoint=NULL;
	2634
	2635	if ((prog->extflags & RXf_EVAL_SEEN)
	2636	&& !PL_reg_state.re_state_eval_setup_done)
	2637	{
	2638	MAGIC *mg;
	2639
	2640	PL_reg_state.re_state_eval_setup_done = TRUE;
	2641	if (reginfo->sv) {
	2642	/* Make $_ available to executed code. */
	2643	if (reginfo->sv != DEFSV) {
	2644	SAVE_DEFSV;
	2645	DEFSV_set(reginfo->sv);
	2646	}
	2647
	2648	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2649	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2650	/* prepare for quick setting of pos */
	2651	#ifdef PERL_OLD_COPY_ON_WRITE
	2652	if (SvIsCOW(reginfo->sv))
	2653	sv_force_normal_flags(reginfo->sv, 0);
	2654	#endif
	2655	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2656	&PL_vtbl_mglob, NULL, 0);
	2657	mg->mg_len = -1;
	2658	}
	2659	PL_reg_magic = mg;
	2660	PL_reg_oldpos = mg->mg_len;
	2661	SAVEDESTRUCTOR_X(restore_pos, prog);
	2662	}
	2663	if (!PL_reg_curpm) {
	2664	Newxz(PL_reg_curpm, 1, PMOP);
	2665	#ifdef USE_ITHREADS
	2666	{
	2667	SV* const repointer = &PL_sv_undef;
	2668	/* this regexp is also owned by the new PL_reg_curpm, which
	2669	will try to free it. */
	2670	av_push(PL_regex_padav, repointer);
	2671	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2672	PL_regex_pad = AvARRAY(PL_regex_padav);
	2673	}
	2674	#endif
	2675	}
	2676	SET_reg_curpm(rx);
	2677	PL_reg_oldcurpm = PL_curpm;
	2678	PL_curpm = PL_reg_curpm;
	2679	if (RXp_MATCH_COPIED(prog)) {
	2680	/* Here is a serious problem: we cannot rewrite subbeg,
	2681	since it may be needed if this match fails. Thus
	2682	$` inside (?{}) could fail... */
	2683	PL_reg_oldsaved = prog->subbeg;
	2684	PL_reg_oldsavedlen = prog->sublen;
	2685	#ifdef PERL_OLD_COPY_ON_WRITE
	2686	PL_nrs = prog->saved_copy;
	2687	#endif
	2688	RXp_MATCH_COPIED_off(prog);
	2689	}
	2690	else
	2691	PL_reg_oldsaved = NULL;
	2692	prog->subbeg = PL_bostr;
	2693	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2694	}
	2695	#ifdef DEBUGGING
	2696	PL_reg_starttry = *startpos;
	2697	#endif
	2698	prog->offs[0].start = *startpos - PL_bostr;
	2699	PL_reginput = *startpos;
	2700	prog->lastparen = 0;
	2701	prog->lastcloseparen = 0;
	2702	PL_regsize = 0;
	2703
	2704	/* XXXX What this code is doing here?!!! There should be no need
	2705	to do this again and again, prog->lastparen should take care of
	2706	this! --ilya*/
	2707
	2708	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2709	* Actually, the code in regcppop() (which Ilya may be meaning by
	2710	* prog->lastparen), is not needed at all by the test suite
	2711	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2712	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2713	* Meanwhile, this code is needed for the
	2714	* above-mentioned test suite tests to succeed. The common theme
	2715	* on those tests seems to be returning null fields from matches.
	2716	* --jhi updated by dapm */
	2717	#if 1
	2718	if (prog->nparens) {
	2719	regexp_paren_pair *pp = prog->offs;
	2720	register I32 i;
	2721	for (i = prog->nparens; i > (I32)prog->lastparen; i--) {
	2722	++pp;
	2723	pp->start = -1;
	2724	pp->end = -1;
	2725	}
	2726	}
	2727	#endif
	2728	REGCP_SET(lastcp);
	2729	if (regmatch(reginfo, progi->program + 1)) {
	2730	prog->offs[0].end = PL_reginput - PL_bostr;
	2731	return 1;
	2732	}
	2733	if (reginfo->cutpoint)
	2734	*startpos= reginfo->cutpoint;
	2735	REGCP_UNWIND(lastcp);
	2736	return 0;
	2737	}
	2738
	2739
	2740	#define sayYES goto yes
	2741	#define sayNO goto no
	2742	#define sayNO_SILENT goto no_silent
	2743
	2744	/* we dont use STMT_START/END here because it leads to
	2745	"unreachable code" warnings, which are bogus, but distracting. */
	2746	#define CACHEsayNO \
	2747	if (ST.cache_mask) \
	2748	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2749	sayNO
	2750
	2751	/* this is used to determine how far from the left messages like
	2752	'failed...' are printed. It should be set such that messages
	2753	are inline with the regop output that created them.
	2754	*/
	2755	#define REPORT_CODE_OFF 32
	2756
	2757
	2758	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2759	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2760
	2761	#define SLAB_FIRST(s) (&(s)->states[0])
	2762	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2763
	2764	/* grab a new slab and return the first slot in it */
	2765
	2766	STATIC regmatch_state *
	2767	S_push_slab(pTHX)
	2768	{
	2769	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2770	dMY_CXT;
	2771	#endif
	2772	regmatch_slab *s = PL_regmatch_slab->next;
	2773	if (!s) {
	2774	Newx(s, 1, regmatch_slab);
	2775	s->prev = PL_regmatch_slab;
	2776	s->next = NULL;
	2777	PL_regmatch_slab->next = s;
	2778	}
	2779	PL_regmatch_slab = s;
	2780	return SLAB_FIRST(s);
	2781	}
	2782
	2783
	2784	/* push a new state then goto it */
	2785
	2786	#define PUSH_STATE_GOTO(state, node) \
	2787	scan = node; \
	2788	st->resume_state = state; \
	2789	goto push_state;
	2790
	2791	/* push a new state with success backtracking, then goto it */
	2792
	2793	#define PUSH_YES_STATE_GOTO(state, node) \
	2794	scan = node; \
	2795	st->resume_state = state; \
	2796	goto push_yes_state;
	2797
	2798
	2799
	2800	/*
	2801
	2802	regmatch() - main matching routine
	2803
	2804	This is basically one big switch statement in a loop. We execute an op,
	2805	set 'next' to point the next op, and continue. If we come to a point which
	2806	we may need to backtrack to on failure such as (A\|B\|C), we push a
	2807	backtrack state onto the backtrack stack. On failure, we pop the top
	2808	state, and re-enter the loop at the state indicated. If there are no more
	2809	states to pop, we return failure.
	2810
	2811	Sometimes we also need to backtrack on success; for example /A+/, where
	2812	after successfully matching one A, we need to go back and try to
	2813	match another one; similarly for lookahead assertions: if the assertion
	2814	completes successfully, we backtrack to the state just before the assertion
	2815	and then carry on. In these cases, the pushed state is marked as
	2816	'backtrack on success too'. This marking is in fact done by a chain of
	2817	pointers, each pointing to the previous 'yes' state. On success, we pop to
	2818	the nearest yes state, discarding any intermediate failure-only states.
	2819	Sometimes a yes state is pushed just to force some cleanup code to be
	2820	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	2821	it to free the inner regex.
	2822
	2823	Note that failure backtracking rewinds the cursor position, while
	2824	success backtracking leaves it alone.
	2825
	2826	A pattern is complete when the END op is executed, while a subpattern
	2827	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	2828	ops trigger the "pop to last yes state if any, otherwise return true"
	2829	behaviour.
	2830
	2831	A common convention in this function is to use A and B to refer to the two
	2832	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	2833	the subpattern to be matched possibly multiple times, while B is the entire
	2834	rest of the pattern. Variable and state names reflect this convention.
	2835
	2836	The states in the main switch are the union of ops and failure/success of
	2837	substates associated with with that op. For example, IFMATCH is the op
	2838	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	2839	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	2840	successfully matched A and IFMATCH_A_fail is a state saying that we have
	2841	just failed to match A. Resume states always come in pairs. The backtrack
	2842	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	2843	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	2844	on success or failure.
	2845
	2846	The struct that holds a backtracking state is actually a big union, with
	2847	one variant for each major type of op. The variable st points to the
	2848	top-most backtrack struct. To make the code clearer, within each
	2849	block of code we #define ST to alias the relevant union.
	2850
	2851	Here's a concrete example of a (vastly oversimplified) IFMATCH
	2852	implementation:
	2853
	2854	switch (state) {
	2855	....
	2856
	2857	#define ST st->u.ifmatch
	2858
	2859	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2860	ST.foo = ...; // some state we wish to save
	2861	...
	2862	// push a yes backtrack state with a resume value of
	2863	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	2864	// first node of A:
	2865	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
	2866	// NOTREACHED
	2867
	2868	case IFMATCH_A: // we have successfully executed A; now continue with B
	2869	next = B;
	2870	bar = ST.foo; // do something with the preserved value
	2871	break;
	2872
	2873	case IFMATCH_A_fail: // A failed, so the assertion failed
	2874	...; // do some housekeeping, then ...
	2875	sayNO; // propagate the failure
	2876
	2877	#undef ST
	2878
	2879	...
	2880	}
	2881
	2882	For any old-timers reading this who are familiar with the old recursive
	2883	approach, the code above is equivalent to:
	2884
	2885	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2886	{
	2887	int foo = ...
	2888	...
	2889	if (regmatch(A)) {
	2890	next = B;
	2891	bar = foo;
	2892	break;
	2893	}
	2894	...; // do some housekeeping, then ...
	2895	sayNO; // propagate the failure
	2896	}
	2897
	2898	The topmost backtrack state, pointed to by st, is usually free. If you
	2899	want to claim it, populate any ST.foo fields in it with values you wish to
	2900	save, then do one of
	2901
	2902	PUSH_STATE_GOTO(resume_state, node);
	2903	PUSH_YES_STATE_GOTO(resume_state, node);
	2904
	2905	which sets that backtrack state's resume value to 'resume_state', pushes a
	2906	new free entry to the top of the backtrack stack, then goes to 'node'.
	2907	On backtracking, the free slot is popped, and the saved state becomes the
	2908	new free state. An ST.foo field in this new top state can be temporarily
	2909	accessed to retrieve values, but once the main loop is re-entered, it
	2910	becomes available for reuse.
	2911
	2912	Note that the depth of the backtrack stack constantly increases during the
	2913	left-to-right execution of the pattern, rather than going up and down with
	2914	the pattern nesting. For example the stack is at its maximum at Z at the
	2915	end of the pattern, rather than at X in the following:
	2916
	2917	/(((X)+)+)+....(Y)+....Z/
	2918
	2919	The only exceptions to this are lookahead/behind assertions and the cut,
	2920	(?>A), which pop all the backtrack states associated with A before
	2921	continuing.
	2922
	2923	Backtrack state structs are allocated in slabs of about 4K in size.
	2924	PL_regmatch_state and st always point to the currently active state,
	2925	and PL_regmatch_slab points to the slab currently containing
	2926	PL_regmatch_state. The first time regmatch() is called, the first slab is
	2927	allocated, and is never freed until interpreter destruction. When the slab
	2928	is full, a new one is allocated and chained to the end. At exit from
	2929	regmatch(), slabs allocated since entry are freed.
	2930
	2931	*/
	2932
	2933
	2934	#define DEBUG_STATE_pp(pp) \
	2935	DEBUG_STATE_r({ \
	2936	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	2937	PerlIO_printf(Perl_debug_log, \
	2938	" %*s"pp" %s%s%s%s%s\n", \
	2939	depth*2, "", \
	2940	PL_reg_name[st->resume_state], \
	2941	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	2942	((st==yes_state) ? "Y" : ""), \
	2943	((st==mark_state) ? "M" : ""), \
	2944	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	2945	); \
	2946	});
	2947
	2948
	2949	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	2950
	2951	#ifdef DEBUGGING
	2952
	2953	STATIC void
	2954	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	2955	const char start, const char end, const char *blurb)
	2956	{
	2957	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	2958
	2959	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	2960
	2961	if (!PL_colorset)
	2962	reginitcolors();
	2963	{
	2964	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	2965	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	2966
	2967	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	2968	start, end - start, 60);
	2969
	2970	PerlIO_printf(Perl_debug_log,
	2971	"%s%s REx%s %s against %s\n",
	2972	PL_colors[4], blurb, PL_colors[5], s0, s1);
	2973
	2974	if (utf8_target\|\|utf8_pat)
	2975	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	2976	utf8_pat ? "pattern" : "",
	2977	utf8_pat && utf8_target ? " and " : "",
	2978	utf8_target ? "string" : ""
	2979	);
	2980	}
	2981	}
	2982
	2983	STATIC void
	2984	S_dump_exec_pos(pTHX_ const char *locinput,
	2985	const regnode *scan,
	2986	const char *loc_regeol,
	2987	const char *loc_bostr,
	2988	const char *loc_reg_starttry,
	2989	const bool utf8_target)
	2990	{
	2991	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	2992	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	2993	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	2994	/* The part of the string before starttry has one color
	2995	(pref0_len chars), between starttry and current
	2996	position another one (pref_len - pref0_len chars),
	2997	after the current position the third one.
	2998	We assume that pref0_len <= pref_len, otherwise we
	2999	decrease pref0_len. */
	3000	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	3001	? (5 + taill) - l : locinput - loc_bostr;
	3002	int pref0_len;
	3003
	3004	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	3005
	3006	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	3007	pref_len++;
	3008	pref0_len = pref_len - (locinput - loc_reg_starttry);
	3009	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	3010	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	3011	? (5 + taill) - pref_len : loc_regeol - locinput);
	3012	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	3013	l--;
	3014	if (pref0_len < 0)
	3015	pref0_len = 0;
	3016	if (pref0_len > pref_len)
	3017	pref0_len = pref_len;
	3018	{
	3019	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	3020
	3021	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	3022	(locinput - pref_len),pref0_len, 60, 4, 5);
	3023
	3024	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	3025	(locinput - pref_len + pref0_len),
	3026	pref_len - pref0_len, 60, 2, 3);
	3027
	3028	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	3029	locinput, loc_regeol - locinput, 10, 0, 1);
	3030
	3031	const STRLEN tlen=len0+len1+len2;
	3032	PerlIO_printf(Perl_debug_log,
	3033	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	3034	(IV)(locinput - loc_bostr),
	3035	len0, s0,
	3036	len1, s1,
	3037	(docolor ? "" : "> <"),
	3038	len2, s2,
	3039	(int)(tlen > 19 ? 0 : 19 - tlen),
	3040	"");
	3041	}
	3042	}
	3043
	3044	#endif
	3045
	3046	/* reg_check_named_buff_matched()
	3047	* Checks to see if a named buffer has matched. The data array of
	3048	* buffer numbers corresponding to the buffer is expected to reside
	3049	* in the regexp->data->data array in the slot stored in the ARG() of
	3050	* node involved. Note that this routine doesn't actually care about the
	3051	* name, that information is not preserved from compilation to execution.
	3052	* Returns the index of the leftmost defined buffer with the given name
	3053	* or 0 if non of the buffers matched.
	3054	*/
	3055	STATIC I32
	3056	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	3057	{
	3058	I32 n;
	3059	RXi_GET_DECL(rex,rexi);
	3060	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3061	I32 nums=(I32)SvPVX(sv_dat);
	3062
	3063	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3064
	3065	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3066	if ((I32)rex->lastparen >= nums[n] &&
	3067	rex->offs[nums[n]].end != -1)
	3068	{
	3069	return nums[n];
	3070	}
	3071	}
	3072	return 0;
	3073	}
	3074
	3075
	3076	/* free all slabs above current one - called during LEAVE_SCOPE */
	3077
	3078	STATIC void
	3079	S_clear_backtrack_stack(pTHX_ void *p)
	3080	{
	3081	regmatch_slab *s = PL_regmatch_slab->next;
	3082	PERL_UNUSED_ARG(p);
	3083
	3084	if (!s)
	3085	return;
	3086	PL_regmatch_slab->next = NULL;
	3087	while (s) {
	3088	regmatch_slab * const osl = s;
	3089	s = s->next;
	3090	Safefree(osl);
	3091	}
	3092	}
	3093
	3094
	3095	STATIC I32 /* 0 failure, 1 success */
	3096	S_regmatch(pTHX_ regmatch_info reginfo, regnode prog)
	3097	{
	3098	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3099	dMY_CXT;
	3100	#endif
	3101	dVAR;
	3102	register const bool utf8_target = PL_reg_match_utf8;
	3103	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3104	REGEXP *rex_sv = reginfo->prog;
	3105	regexp rex = (struct regexp )SvANY(rex_sv);
	3106	RXi_GET_DECL(rex,rexi);
	3107	I32 oldsave;
	3108	/* the current state. This is a cached copy of PL_regmatch_state */
	3109	register regmatch_state *st;
	3110	/* cache heavy used fields of st in registers */
	3111	register regnode *scan;
	3112	register regnode *next;
	3113	register U32 n = 0; /* general value; init to avoid compiler warning */
	3114	register I32 ln = 0; /* len or last; init to avoid compiler warning */
	3115	register char *locinput = PL_reginput;
	3116	register I32 nextchr; /* is always set to UCHARAT(locinput) */
	3117
	3118	bool result = 0; /* return value of S_regmatch */
	3119	int depth = 0; /* depth of backtrack stack */
	3120	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3121	const U32 max_nochange_depth =
	3122	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3123	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3124	regmatch_state yes_state = NULL; / state to pop to on success of
	3125	subpattern */
	3126	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3127	the stack on success we can update the mark_state as we go */
	3128	regmatch_state mark_state = NULL; / last mark state we have seen */
	3129	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3130	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3131	U32 state_num;
	3132	bool no_final = 0; /* prevent failure from backtracking? */
	3133	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3134	char *startpoint = PL_reginput;
	3135	SV popmark = NULL; / are we looking for a mark? */
	3136	SV sv_commit = NULL; / last mark name seen in failure */
	3137	SV sv_yes_mark = NULL; / last mark name we have seen
	3138	during a successful match */
	3139	U32 lastopen = 0; /* last open we saw */
	3140	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3141	SV* const oreplsv = GvSV(PL_replgv);
	3142	/* these three flags are set by various ops to signal information to
	3143	* the very next op. They have a useful lifetime of exactly one loop
	3144	* iteration, and are not preserved or restored by state pushes/pops
	3145	*/
	3146	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3147	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3148	int logical = 0; /* the following EVAL is:
	3149	0: (?{...})
	3150	1: (?(?{...})X\|Y)
	3151	2: (??{...})
	3152	or the following IFMATCH/UNLESSM is:
	3153	false: plain (?=foo)
	3154	true: used as a condition: (?(?=foo))
	3155	*/
	3156	PAD* last_pad = NULL;
	3157	dMULTICALL;
	3158	I32 gimme = G_SCALAR;
	3159	CV caller_cv = NULL; / who called us */
	3160	CV last_pushed_cv = NULL; / most recently called (?{}) CV */
	3161	CHECKPOINT runops_cp; /* savestack position before executing EVAL */
	3162
	3163	#ifdef DEBUGGING
	3164	GET_RE_DEBUG_FLAGS_DECL;
	3165	#endif
	3166
	3167	/* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */
	3168	multicall_oldcatch = 0;
	3169	multicall_cv = NULL;
	3170	cx = NULL;
	3171	PERL_UNUSED_VAR(multicall_cop);
	3172	PERL_UNUSED_VAR(newsp);
	3173
	3174
	3175	PERL_ARGS_ASSERT_REGMATCH;
	3176
	3177	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3178	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3179	}));
	3180	/* on first ever call to regmatch, allocate first slab */
	3181	if (!PL_regmatch_slab) {
	3182	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3183	PL_regmatch_slab->prev = NULL;
	3184	PL_regmatch_slab->next = NULL;
	3185	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3186	}
	3187
	3188	oldsave = PL_savestack_ix;
	3189	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	3190	SAVEVPTR(PL_regmatch_slab);
	3191	SAVEVPTR(PL_regmatch_state);
	3192
	3193	/* grab next free state slot */
	3194	st = ++PL_regmatch_state;
	3195	if (st > SLAB_LAST(PL_regmatch_slab))
	3196	st = PL_regmatch_state = S_push_slab(aTHX);
	3197
	3198	/* Note that nextchr is a byte even in UTF */
	3199	nextchr = UCHARAT(locinput);
	3200	scan = prog;
	3201	while (scan != NULL) {
	3202
	3203	DEBUG_EXECUTE_r( {
	3204	SV * const prop = sv_newmortal();
	3205	regnode *rnext=regnext(scan);
	3206	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3207	regprop(rex, prop, scan);
	3208
	3209	PerlIO_printf(Perl_debug_log,
	3210	"%3"IVdf":%*s%s(%"IVdf")\n",
	3211	(IV)(scan - rexi->program), depth*2, "",
	3212	SvPVX_const(prop),
	3213	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3214	0 : (IV)(rnext - rexi->program));
	3215	});
	3216
	3217	next = scan + NEXT_OFF(scan);
	3218	if (next == scan)
	3219	next = NULL;
	3220	state_num = OP(scan);
	3221
	3222	reenter_switch:
	3223
	3224	switch (state_num) {
	3225	case BOL:
	3226	if (locinput == PL_bostr)
	3227	{
	3228	/* reginfo->till = reginfo->bol; */
	3229	break;
	3230	}
	3231	sayNO;
	3232	case MBOL:
	3233	if (locinput == PL_bostr \|\|
	3234	((nextchr \|\| locinput < PL_regeol) && locinput[-1] == '\n'))
	3235	{
	3236	break;
	3237	}
	3238	sayNO;
	3239	case SBOL:
	3240	if (locinput == PL_bostr)
	3241	break;
	3242	sayNO;
	3243	case GPOS:
	3244	if (locinput == reginfo->ganch)
	3245	break;
	3246	sayNO;
	3247
	3248	case KEEPS:
	3249	/* update the startpoint */
	3250	st->u.keeper.val = rex->offs[0].start;
	3251	PL_reginput = locinput;
	3252	rex->offs[0].start = locinput - PL_bostr;
	3253	PUSH_STATE_GOTO(KEEPS_next, next);
	3254	/NOT-REACHED/
	3255	case KEEPS_next_fail:
	3256	/* rollback the start point change */
	3257	rex->offs[0].start = st->u.keeper.val;
	3258	sayNO_SILENT;
	3259	/NOT-REACHED/
	3260	case EOL:
	3261	goto seol;
	3262	case MEOL:
	3263	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3264	sayNO;
	3265	break;
	3266	case SEOL:
	3267	seol:
	3268	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3269	sayNO;
	3270	if (PL_regeol - locinput > 1)
	3271	sayNO;
	3272	break;
	3273	case EOS:
	3274	if (PL_regeol != locinput)
	3275	sayNO;
	3276	break;
	3277	case SANY:
	3278	if (!nextchr && locinput >= PL_regeol)
	3279	sayNO;
	3280	if (utf8_target) {
	3281	locinput += PL_utf8skip[nextchr];
	3282	if (locinput > PL_regeol)
	3283	sayNO;
	3284	nextchr = UCHARAT(locinput);
	3285	}
	3286	else
	3287	nextchr = UCHARAT(++locinput);
	3288	break;
	3289	case CANY:
	3290	if (!nextchr && locinput >= PL_regeol)
	3291	sayNO;
	3292	nextchr = UCHARAT(++locinput);
	3293	break;
	3294	case REG_ANY:
	3295	if ((!nextchr && locinput >= PL_regeol) \|\| nextchr == '\n')
	3296	sayNO;
	3297	if (utf8_target) {
	3298	locinput += PL_utf8skip[nextchr];
	3299	if (locinput > PL_regeol)
	3300	sayNO;
	3301	nextchr = UCHARAT(locinput);
	3302	}
	3303	else
	3304	nextchr = UCHARAT(++locinput);
	3305	break;
	3306
	3307	#undef ST
	3308	#define ST st->u.trie
	3309	case TRIEC:
	3310	/* In this case the charclass data is available inline so
	3311	we can fail fast without a lot of extra overhead.
	3312	*/
	3313	if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
	3314	DEBUG_EXECUTE_r(
	3315	PerlIO_printf(Perl_debug_log,
	3316	"%*s %sfailed to match trie start class...%s\n",
	3317	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3318	);
	3319	sayNO_SILENT;
	3320	assert(0); /* NOTREACHED */
	3321	}
	3322	/* FALL THROUGH */
	3323	case TRIE:
	3324	/* the basic plan of execution of the trie is:
	3325	* At the beginning, run though all the states, and
	3326	* find the longest-matching word. Also remember the position
	3327	* of the shortest matching word. For example, this pattern:
	3328	* 1 2 3 4 5
	3329	* ab\|a\|x\|abcd\|abc
	3330	* when matched against the string "abcde", will generate
	3331	* accept states for all words except 3, with the longest
	3332	* matching word being 4, and the shortest being 1 (with
	3333	* the position being after char 1 of the string).
	3334	*
	3335	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3336	* we run the remainder of the pattern; on each try setting
	3337	* the current position to the character following the word,
	3338	* returning to try the next word on failure.
	3339	*
	3340	* We avoid having to build a list of words at runtime by
	3341	* using a compile-time structure, wordinfo[].prev, which
	3342	* gives, for each word, the previous accepting word (if any).
	3343	* In the case above it would contain the mappings 1->2, 2->0,
	3344	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3345	* the longest word (4 above), a list of all words, by
	3346	* following the list of prev pointers; this gives us the
	3347	* unordered list 4,5,1,2. Then given the current word we have
	3348	* just tried, we can go through the list and find the
	3349	* next-biggest word to try (so if we just failed on word 2,
	3350	* the next in the list is 4).
	3351	*
	3352	* Since at runtime we don't record the matching position in
	3353	* the string for each word, we have to work that out for
	3354	* each word we're about to process. The wordinfo table holds
	3355	* the character length of each word; given that we recorded
	3356	* at the start: the position of the shortest word and its
	3357	* length in chars, we just need to move the pointer the
	3358	* difference between the two char lengths. Depending on
	3359	* Unicode status and folding, that's cheap or expensive.
	3360	*
	3361	* This algorithm is optimised for the case where are only a
	3362	* small number of accept states, i.e. 0,1, or maybe 2.
	3363	* With lots of accepts states, and having to try all of them,
	3364	* it becomes quadratic on number of accept states to find all
	3365	* the next words.
	3366	*/
	3367
	3368	{
	3369	/* what type of TRIE am I? (utf8 makes this contextual) */
	3370	DECL_TRIE_TYPE(scan);
	3371
	3372	/* what trie are we using right now */
	3373	reg_trie_data * const trie
	3374	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3375	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3376	U32 state = trie->startstate;
	3377
	3378	if (trie->bitmap && !TRIE_BITMAP_TEST(trie,*locinput) ) {
	3379	if (trie->states[ state ].wordnum) {
	3380	DEBUG_EXECUTE_r(
	3381	PerlIO_printf(Perl_debug_log,
	3382	"%*s %smatched empty string...%s\n",
	3383	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3384	);
	3385	if (!trie->jump)
	3386	break;
	3387	} else {
	3388	DEBUG_EXECUTE_r(
	3389	PerlIO_printf(Perl_debug_log,
	3390	"%*s %sfailed to match trie start class...%s\n",
	3391	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3392	);
	3393	sayNO_SILENT;
	3394	}
	3395	}
	3396
	3397	{
	3398	U8 uc = ( U8 )locinput;
	3399
	3400	STRLEN len = 0;
	3401	STRLEN foldlen = 0;
	3402	U8 uscan = (U8)NULL;
	3403	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3404	U32 charcount = 0; /* how many input chars we have matched */
	3405	U32 accepted = 0; /* have we seen any accepting states? */
	3406
	3407	ST.jump = trie->jump;
	3408	ST.me = scan;
	3409	ST.firstpos = NULL;
	3410	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3411	ST.nextword = 0;
	3412
	3413	/* fully traverse the TRIE; note the position of the
	3414	shortest accept state and the wordnum of the longest
	3415	accept state */
	3416
	3417	while ( state && uc <= (U8*)PL_regeol ) {
	3418	U32 base = trie->states[ state ].trans.base;
	3419	UV uvc = 0;
	3420	U16 charid = 0;
	3421	U16 wordnum;
	3422	wordnum = trie->states[ state ].wordnum;
	3423
	3424	if (wordnum) { /* it's an accept state */
	3425	if (!accepted) {
	3426	accepted = 1;
	3427	/* record first match position */
	3428	if (ST.longfold) {
	3429	ST.firstpos = (U8*)locinput;
	3430	ST.firstchars = 0;
	3431	}
	3432	else {
	3433	ST.firstpos = uc;
	3434	ST.firstchars = charcount;
	3435	}
	3436	}
	3437	if (!ST.nextword \|\| wordnum < ST.nextword)
	3438	ST.nextword = wordnum;
	3439	ST.topword = wordnum;
	3440	}
	3441
	3442	DEBUG_TRIE_EXECUTE_r({
	3443	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3444	PerlIO_printf( Perl_debug_log,
	3445	"%*s %sState: %4"UVxf" Accepted: %c ",
	3446	2+depth * 2, "", PL_colors[4],
	3447	(UV)state, (accepted ? 'Y' : 'N'));
	3448	});
	3449
	3450	/* read a char and goto next state */
	3451	if ( base ) {
	3452	I32 offset;
	3453	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3454	uscan, len, uvc, charid, foldlen,
	3455	foldbuf, uniflags);
	3456	charcount++;
	3457	if (foldlen>0)
	3458	ST.longfold = TRUE;
	3459	if (charid &&
	3460	( ((offset =
	3461	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3462
	3463	&& ((U32)offset < trie->lasttrans)
	3464	&& trie->trans[offset].check == state)
	3465	{
	3466	state = trie->trans[offset].next;
	3467	}
	3468	else {
	3469	state = 0;
	3470	}
	3471	uc += len;
	3472
	3473	}
	3474	else {
	3475	state = 0;
	3476	}
	3477	DEBUG_TRIE_EXECUTE_r(
	3478	PerlIO_printf( Perl_debug_log,
	3479	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3480	charid, uvc, (UV)state, PL_colors[5] );
	3481	);
	3482	}
	3483	if (!accepted)
	3484	sayNO;
	3485
	3486	/* calculate total number of accept states */
	3487	{
	3488	U16 w = ST.topword;
	3489	accepted = 0;
	3490	while (w) {
	3491	w = trie->wordinfo[w].prev;
	3492	accepted++;
	3493	}
	3494	ST.accepted = accepted;
	3495	}
	3496
	3497	DEBUG_EXECUTE_r(
	3498	PerlIO_printf( Perl_debug_log,
	3499	"%*s %sgot %"IVdf" possible matches%s\n",
	3500	REPORT_CODE_OFF + depth * 2, "",
	3501	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3502	);
	3503	goto trie_first_try; /* jump into the fail handler */
	3504	}}
	3505	assert(0); /* NOTREACHED */
	3506
	3507	case TRIE_next_fail: /* we failed - try next alternative */
	3508	if ( ST.jump) {
	3509	REGCP_UNWIND(ST.cp);
	3510	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	3511	}
	3512	if (!--ST.accepted) {
	3513	DEBUG_EXECUTE_r({
	3514	PerlIO_printf( Perl_debug_log,
	3515	"%*s %sTRIE failed...%s\n",
	3516	REPORT_CODE_OFF+depth*2, "",
	3517	PL_colors[4],
	3518	PL_colors[5] );
	3519	});
	3520	sayNO_SILENT;
	3521	}
	3522	{
	3523	/* Find next-highest word to process. Note that this code
	3524	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	3525	register U16 min = 0;
	3526	register U16 word;
	3527	register U16 const nextword = ST.nextword;
	3528	register reg_trie_wordinfo * const wordinfo
	3529	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	3530	for (word=ST.topword; word; word=wordinfo[word].prev) {
	3531	if (word > nextword && (!min \|\| word < min))
	3532	min = word;
	3533	}
	3534	ST.nextword = min;
	3535	}
	3536
	3537	trie_first_try:
	3538	if (do_cutgroup) {
	3539	do_cutgroup = 0;
	3540	no_final = 0;
	3541	}
	3542
	3543	if ( ST.jump) {
	3544	ST.lastparen = rex->lastparen;
	3545	ST.lastcloseparen = rex->lastcloseparen;
	3546	REGCP_SET(ST.cp);
	3547	}
	3548
	3549	/* find start char of end of current word */
	3550	{
	3551	U32 chars; /* how many chars to skip */
	3552	U8 *uc = ST.firstpos;
	3553	reg_trie_data * const trie
	3554	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	3555
	3556	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3557	>= ST.firstchars);
	3558	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3559	- ST.firstchars;
	3560
	3561	if (ST.longfold) {
	3562	/* the hard option - fold each char in turn and find
	3563	* its folded length (which may be different */
	3564	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	3565	STRLEN foldlen;
	3566	STRLEN len;
	3567	UV uvc;
	3568	U8 *uscan;
	3569
	3570	while (chars) {
	3571	if (utf8_target) {
	3572	uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
	3573	uniflags);
	3574	uc += len;
	3575	}
	3576	else {
	3577	uvc = *uc;
	3578	uc++;
	3579	}
	3580	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	3581	uscan = foldbuf;
	3582	while (foldlen) {
	3583	if (!--chars)
	3584	break;
	3585	uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
	3586	uniflags);
	3587	uscan += len;
	3588	foldlen -= len;
	3589	}
	3590	}
	3591	}
	3592	else {
	3593	if (utf8_target)
	3594	while (chars--)
	3595	uc += UTF8SKIP(uc);
	3596	else
	3597	uc += chars;
	3598	}
	3599	PL_reginput = (char *)uc;
	3600	}
	3601
	3602	scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
	3603	? ST.jump[ST.nextword]
	3604	: NEXT_OFF(ST.me));
	3605
	3606	DEBUG_EXECUTE_r({
	3607	PerlIO_printf( Perl_debug_log,
	3608	"%*s %sTRIE matched word #%d, continuing%s\n",
	3609	REPORT_CODE_OFF+depth*2, "",
	3610	PL_colors[4],
	3611	ST.nextword,
	3612	PL_colors[5]
	3613	);
	3614	});
	3615
	3616	if (ST.accepted > 1 \|\| has_cutgroup) {
	3617	PUSH_STATE_GOTO(TRIE_next, scan);
	3618	assert(0); /* NOTREACHED */
	3619	}
	3620	/* only one choice left - just continue */
	3621	DEBUG_EXECUTE_r({
	3622	AV *const trie_words
	3623	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	3624	SV ** const tmp = av_fetch( trie_words,
	3625	ST.nextword-1, 0 );
	3626	SV *sv= tmp ? sv_newmortal() : NULL;
	3627
	3628	PerlIO_printf( Perl_debug_log,
	3629	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	3630	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	3631	ST.nextword,
	3632	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	3633	PL_colors[0], PL_colors[1],
	3634	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	3635	)
	3636	: "not compiled under -Dr",
	3637	PL_colors[5] );
	3638	});
	3639
	3640	locinput = PL_reginput;
	3641	nextchr = UCHARAT(locinput);
	3642	continue; /* execute rest of RE */
	3643	assert(0); /* NOTREACHED */
	3644	#undef ST
	3645
	3646	case EXACT: {
	3647	char *s = STRING(scan);
	3648	ln = STR_LEN(scan);
	3649	if (utf8_target != UTF_PATTERN) {
	3650	/* The target and the pattern have differing utf8ness. */
	3651	char *l = locinput;
	3652	const char * const e = s + ln;
	3653
	3654	if (utf8_target) {
	3655	/* The target is utf8, the pattern is not utf8. */
	3656	while (s < e) {
	3657	STRLEN ulen;
	3658	if (l >= PL_regeol)
	3659	sayNO;
	3660	if (NATIVE_TO_UNI((U8)s) !=
	3661	utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
	3662	uniflags))
	3663	sayNO;
	3664	l += ulen;
	3665	s ++;
	3666	}
	3667	}
	3668	else {
	3669	/* The target is not utf8, the pattern is utf8. */
	3670	while (s < e) {
	3671	STRLEN ulen;
	3672	if (l >= PL_regeol)
	3673	sayNO;
	3674	if (NATIVE_TO_UNI(((U8)l)) !=
	3675	utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
	3676	uniflags))
	3677	sayNO;
	3678	s += ulen;
	3679	l ++;
	3680	}
	3681	}
	3682	locinput = l;
	3683	nextchr = UCHARAT(locinput);
	3684	break;
	3685	}
	3686	/* The target and the pattern have the same utf8ness. */
	3687	/* Inline the first character, for speed. */
	3688	if (UCHARAT(s) != nextchr)
	3689	sayNO;
	3690	if (PL_regeol - locinput < ln)
	3691	sayNO;
	3692	if (ln > 1 && memNE(s, locinput, ln))
	3693	sayNO;
	3694	locinput += ln;
	3695	nextchr = UCHARAT(locinput);
	3696	break;
	3697	}
	3698	case EXACTFL: {
	3699	re_fold_t folder;
	3700	const U8 * fold_array;
	3701	const char * s;
	3702	U32 fold_utf8_flags;
	3703
	3704	PL_reg_flags \|= RF_tainted;
	3705	folder = foldEQ_locale;
	3706	fold_array = PL_fold_locale;
	3707	fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
	3708	goto do_exactf;
	3709
	3710	case EXACTFU_SS:
	3711	case EXACTFU_TRICKYFOLD:
	3712	case EXACTFU:
	3713	folder = foldEQ_latin1;
	3714	fold_array = PL_fold_latin1;
	3715	fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	3716	goto do_exactf;
	3717
	3718	case EXACTFA:
	3719	folder = foldEQ_latin1;
	3720	fold_array = PL_fold_latin1;
	3721	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	3722	goto do_exactf;
	3723
	3724	case EXACTF:
	3725	folder = foldEQ;
	3726	fold_array = PL_fold;
	3727	fold_utf8_flags = 0;
	3728
	3729	do_exactf:
	3730	s = STRING(scan);
	3731	ln = STR_LEN(scan);
	3732
	3733	if (utf8_target \|\| UTF_PATTERN \|\| state_num == EXACTFU_SS) {
	3734	/* Either target or the pattern are utf8, or has the issue where
	3735	* the fold lengths may differ. */
	3736	const char * const l = locinput;
	3737	char *e = PL_regeol;
	3738
	3739	if (! foldEQ_utf8_flags(s, 0, ln, cBOOL(UTF_PATTERN),
	3740	l, &e, 0, utf8_target, fold_utf8_flags))
	3741	{
	3742	sayNO;
	3743	}
	3744	locinput = e;
	3745	nextchr = UCHARAT(locinput);
	3746	break;
	3747	}
	3748
	3749	/* Neither the target nor the pattern are utf8 */
	3750	if (UCHARAT(s) != nextchr &&
	3751	UCHARAT(s) != fold_array[nextchr])
	3752	{
	3753	sayNO;
	3754	}
	3755	if (PL_regeol - locinput < ln)
	3756	sayNO;
	3757	if (ln > 1 && ! folder(s, locinput, ln))
	3758	sayNO;
	3759	locinput += ln;
	3760	nextchr = UCHARAT(locinput);
	3761	break;
	3762	}
	3763
	3764	/* XXX Could improve efficiency by separating these all out using a
	3765	* macro or in-line function. At that point regcomp.c would no longer
	3766	* have to set the FLAGS fields of these */
	3767	case BOUNDL:
	3768	case NBOUNDL:
	3769	PL_reg_flags \|= RF_tainted;
	3770	/* FALL THROUGH */
	3771	case BOUND:
	3772	case BOUNDU:
	3773	case BOUNDA:
	3774	case NBOUND:
	3775	case NBOUNDU:
	3776	case NBOUNDA:
	3777	/* was last char in word? */
	3778	if (utf8_target
	3779	&& FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
	3780	&& FLAGS(scan) != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
	3781	{
	3782	if (locinput == PL_bostr)
	3783	ln = '\n';
	3784	else {
	3785	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	3786
	3787	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	3788	}
	3789	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	3790	ln = isALNUM_uni(ln);
	3791	LOAD_UTF8_CHARCLASS_ALNUM();
	3792	n = swash_fetch(PL_utf8_alnum, (U8*)locinput, utf8_target);
	3793	}
	3794	else {
	3795	ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
	3796	n = isALNUM_LC_utf8((U8*)locinput);
	3797	}
	3798	}
	3799	else {
	3800
	3801	/* Here the string isn't utf8, or is utf8 and only ascii
	3802	* characters are to match \w. In the latter case looking at
	3803	* the byte just prior to the current one may be just the final
	3804	* byte of a multi-byte character. This is ok. There are two
	3805	* cases:
	3806	* 1) it is a single byte character, and then the test is doing
	3807	* just what it's supposed to.
	3808	* 2) it is a multi-byte character, in which case the final
	3809	* byte is never mistakable for ASCII, and so the test
	3810	* will say it is not a word character, which is the
	3811	* correct answer. */
	3812	ln = (locinput != PL_bostr) ?
	3813	UCHARAT(locinput - 1) : '\n';
	3814	switch (FLAGS(scan)) {
	3815	case REGEX_UNICODE_CHARSET:
	3816	ln = isWORDCHAR_L1(ln);
	3817	n = isWORDCHAR_L1(nextchr);
	3818	break;
	3819	case REGEX_LOCALE_CHARSET:
	3820	ln = isALNUM_LC(ln);
	3821	n = isALNUM_LC(nextchr);
	3822	break;
	3823	case REGEX_DEPENDS_CHARSET:
	3824	ln = isALNUM(ln);
	3825	n = isALNUM(nextchr);
	3826	break;
	3827	case REGEX_ASCII_RESTRICTED_CHARSET:
	3828	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	3829	ln = isWORDCHAR_A(ln);
	3830	n = isWORDCHAR_A(nextchr);
	3831	break;
	3832	default:
	3833	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	3834	break;
	3835	}
	3836	}
	3837	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	3838	* regcomp.sym */
	3839	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	3840	sayNO;
	3841	break;
	3842	case ANYOFV:
	3843	case ANYOF:
	3844	if (utf8_target \|\| state_num == ANYOFV) {
	3845	STRLEN inclasslen = PL_regeol - locinput;
	3846	if (locinput >= PL_regeol)
	3847	sayNO;
	3848
	3849	if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target))
	3850	sayNO;
	3851	locinput += inclasslen;
	3852	nextchr = UCHARAT(locinput);
	3853	break;
	3854	}
	3855	else {
	3856	if (nextchr < 0)
	3857	nextchr = UCHARAT(locinput);
	3858	if (!nextchr && locinput >= PL_regeol)
	3859	sayNO;
	3860	if (!REGINCLASS(rex, scan, (U8*)locinput))
	3861	sayNO;
	3862	nextchr = UCHARAT(++locinput);
	3863	break;
	3864	}
	3865	break;
	3866	/* Special char classes - The defines start on line 129 or so */
	3867	CCC_TRY_U(ALNUM, NALNUM, isWORDCHAR,
	3868	ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
	3869	ALNUMU, NALNUMU, isWORDCHAR_L1,
	3870	ALNUMA, NALNUMA, isWORDCHAR_A,
	3871	alnum, "a");
	3872
	3873	CCC_TRY_U(SPACE, NSPACE, isSPACE,
	3874	SPACEL, NSPACEL, isSPACE_LC, isSPACE_LC_utf8,
	3875	SPACEU, NSPACEU, isSPACE_L1,
	3876	SPACEA, NSPACEA, isSPACE_A,
	3877	space, " ");
	3878
	3879	CCC_TRY(DIGIT, NDIGIT, isDIGIT,
	3880	DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
	3881	DIGITA, NDIGITA, isDIGIT_A,
	3882	digit, "0");
	3883
	3884	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	3885	a Unicode extended Grapheme Cluster */
	3886	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	3887	extended Grapheme Cluster is:
	3888
	3889	CR LF
	3890	\| Prepend* Begin Extend*
	3891	\| .
	3892
	3893	Begin is (Hangul-syllable \| ! Control)
	3894	Extend is (Grapheme_Extend \| Spacing_Mark)
	3895	Control is [ GCB_Control CR LF ]
	3896
	3897	The discussion below shows how the code for CLUMP is derived
	3898	from this regex. Note that most of these concepts are from
	3899	property values of the Grapheme Cluster Boundary (GCB) property.
	3900	No code point can have multiple property values for a given
	3901	property. Thus a code point in Prepend can't be in Control, but
	3902	it must be in !Control. This is why Control above includes
	3903	GCB_Control plus CR plus LF. The latter two are used in the GCB
	3904	property separately, and so can't be in GCB_Control, even though
	3905	they logically are controls. Control is not the same as gc=cc,
	3906	but includes format and other characters as well.
	3907
	3908	The Unicode definition of Hangul-syllable is:
	3909	L+
	3910	\| (L* ( ( V \| LV ) V* \| LVT ) T*)
	3911	\| T+
	3912	)
	3913	Each of these is a value for the GCB property, and hence must be
	3914	disjoint, so the order they are tested is immaterial, so the
	3915	above can safely be changed to
	3916	T+
	3917	\| L+
	3918	\| (L* ( LVT \| ( V \| LV ) V) T)
	3919
	3920	The last two terms can be combined like this:
	3921	L* ( L
	3922	\| (( LVT \| ( V \| LV ) V) T))
	3923
	3924	And refactored into this:
	3925	L* (L \| LVT T* \| V V* T* \| LV V* T*)
	3926
	3927	That means that if we have seen any L's at all we can quit
	3928	there, but if the next character is an LVT, a V, or an LV we
	3929	should keep going.
	3930
	3931	There is a subtlety with Prepend* which showed up in testing.
	3932	Note that the Begin, and only the Begin is required in:
	3933	\| Prepend* Begin Extend*
	3934	Also, Begin contains '! Control'. A Prepend must be a
	3935	'! Control', which means it must also be a Begin. What it
	3936	comes down to is that if we match Prepend* and then find no
	3937	suitable Begin afterwards, that if we backtrack the last
	3938	Prepend, that one will be a suitable Begin.
	3939	*/
	3940
	3941	if (locinput >= PL_regeol)
	3942	sayNO;
	3943	if (! utf8_target) {
	3944
	3945	/* Match either CR LF or '.', as all the other possibilities
	3946	* require utf8 */
	3947	locinput++; /* Match the . or CR */
	3948	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	3949	match the LF */
	3950	&& locinput < PL_regeol
	3951	&& UCHARAT(locinput) == '\n') locinput++;
	3952	}
	3953	else {
	3954
	3955	/* Utf8: See if is ( CR LF ); already know that locinput <
	3956	* PL_regeol, so locinput+1 is in bounds */
	3957	if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
	3958	locinput += 2;
	3959	}
	3960	else {
	3961	/* In case have to backtrack to beginning, then match '.' */
	3962	char *starting = locinput;
	3963
	3964	/* In case have to backtrack the last prepend */
	3965	char *previous_prepend = 0;
	3966
	3967	LOAD_UTF8_CHARCLASS_GCB();
	3968
	3969	/* Match (prepend)* */
	3970	while (locinput < PL_regeol
	3971	&& swash_fetch(PL_utf8_X_prepend,
	3972	(U8*)locinput, utf8_target))
	3973	{
	3974	previous_prepend = locinput;
	3975	locinput += UTF8SKIP(locinput);
	3976	}
	3977
	3978	/* As noted above, if we matched a prepend character, but
	3979	* the next thing won't match, back off the last prepend we
	3980	* matched, as it is guaranteed to match the begin */
	3981	if (previous_prepend
	3982	&& (locinput >= PL_regeol
	3983	\|\| ! swash_fetch(PL_utf8_X_begin,
	3984	(U8*)locinput, utf8_target)))
	3985	{
	3986	locinput = previous_prepend;
	3987	}
	3988
	3989	/* Note that here we know PL_regeol > locinput, as we
	3990	* tested that upon input to this switch case, and if we
	3991	* moved locinput forward, we tested the result just above
	3992	* and it either passed, or we backed off so that it will
	3993	* now pass */
	3994	if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, utf8_target)) {
	3995
	3996	/* Here did not match the required 'Begin' in the
	3997	* second term. So just match the very first
	3998	* character, the '.' of the final term of the regex */
	3999	locinput = starting + UTF8SKIP(starting);
	4000	} else {
	4001
	4002	/* Here is the beginning of a character that can have
	4003	* an extender. It is either a hangul syllable, or a
	4004	* non-control */
	4005	if (swash_fetch(PL_utf8_X_non_hangul,
	4006	(U8*)locinput, utf8_target))
	4007	{
	4008
	4009	/* Here not a Hangul syllable, must be a
	4010	* ('! * Control') */
	4011	locinput += UTF8SKIP(locinput);
	4012	} else {
	4013
	4014	/* Here is a Hangul syllable. It can be composed
	4015	* of several individual characters. One
	4016	* possibility is T+ */
	4017	if (swash_fetch(PL_utf8_X_T,
	4018	(U8*)locinput, utf8_target))
	4019	{
	4020	while (locinput < PL_regeol
	4021	&& swash_fetch(PL_utf8_X_T,
	4022	(U8*)locinput, utf8_target))
	4023	{
	4024	locinput += UTF8SKIP(locinput);
	4025	}
	4026	} else {
	4027
	4028	/* Here, not T+, but is a Hangul. That means
	4029	* it is one of the others: L, LV, LVT or V,
	4030	* and matches:
	4031	* L* (L \| LVT T* \| V V* T* \| LV V* T) /
	4032
	4033	/* Match L* */
	4034	while (locinput < PL_regeol
	4035	&& swash_fetch(PL_utf8_X_L,
	4036	(U8*)locinput, utf8_target))
	4037	{
	4038	locinput += UTF8SKIP(locinput);
	4039	}
	4040
	4041	/* Here, have exhausted L*. If the next
	4042	* character is not an LV, LVT nor V, it means
	4043	* we had to have at least one L, so matches L+
	4044	* in the original equation, we have a complete
	4045	* hangul syllable. Are done. */
	4046
	4047	if (locinput < PL_regeol
	4048	&& swash_fetch(PL_utf8_X_LV_LVT_V,
	4049	(U8*)locinput, utf8_target))
	4050	{
	4051
	4052	/* Otherwise keep going. Must be LV, LVT
	4053	* or V. See if LVT */
	4054	if (swash_fetch(PL_utf8_X_LVT,
	4055	(U8*)locinput, utf8_target))
	4056	{
	4057	locinput += UTF8SKIP(locinput);
	4058	} else {
	4059
	4060	/* Must be V or LV. Take it, then
	4061	* match V* */
	4062	locinput += UTF8SKIP(locinput);
	4063	while (locinput < PL_regeol
	4064	&& swash_fetch(PL_utf8_X_V,
	4065	(U8*)locinput, utf8_target))
	4066	{
	4067	locinput += UTF8SKIP(locinput);
	4068	}
	4069	}
	4070
	4071	/* And any of LV, LVT, or V can be followed
	4072	* by T* */
	4073	while (locinput < PL_regeol
	4074	&& swash_fetch(PL_utf8_X_T,
	4075	(U8*)locinput,
	4076	utf8_target))
	4077	{
	4078	locinput += UTF8SKIP(locinput);
	4079	}
	4080	}
	4081	}
	4082	}
	4083
	4084	/* Match any extender */
	4085	while (locinput < PL_regeol
	4086	&& swash_fetch(PL_utf8_X_extend,
	4087	(U8*)locinput, utf8_target))
	4088	{
	4089	locinput += UTF8SKIP(locinput);
	4090	}
	4091	}
	4092	}
	4093	if (locinput > PL_regeol) sayNO;
	4094	}
	4095	nextchr = UCHARAT(locinput);
	4096	break;
	4097
	4098	case NREFFL:
	4099	{ /* The capture buffer cases. The ones beginning with N for the
	4100	named buffers just convert to the equivalent numbered and
	4101	pretend they were called as the corresponding numbered buffer
	4102	op. */
	4103	/* don't initialize these in the declaration, it makes C++
	4104	unhappy */
	4105	char *s;
	4106	char type;
	4107	re_fold_t folder;
	4108	const U8 *fold_array;
	4109	UV utf8_fold_flags;
	4110
	4111	PL_reg_flags \|= RF_tainted;
	4112	folder = foldEQ_locale;
	4113	fold_array = PL_fold_locale;
	4114	type = REFFL;
	4115	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4116	goto do_nref;
	4117
	4118	case NREFFA:
	4119	folder = foldEQ_latin1;
	4120	fold_array = PL_fold_latin1;
	4121	type = REFFA;
	4122	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4123	goto do_nref;
	4124
	4125	case NREFFU:
	4126	folder = foldEQ_latin1;
	4127	fold_array = PL_fold_latin1;
	4128	type = REFFU;
	4129	utf8_fold_flags = 0;
	4130	goto do_nref;
	4131
	4132	case NREFF:
	4133	folder = foldEQ;
	4134	fold_array = PL_fold;
	4135	type = REFF;
	4136	utf8_fold_flags = 0;
	4137	goto do_nref;
	4138
	4139	case NREF:
	4140	type = REF;
	4141	folder = NULL;
	4142	fold_array = NULL;
	4143	utf8_fold_flags = 0;
	4144	do_nref:
	4145
	4146	/* For the named back references, find the corresponding buffer
	4147	* number */
	4148	n = reg_check_named_buff_matched(rex,scan);
	4149
	4150	if ( ! n ) {
	4151	sayNO;
	4152	}
	4153	goto do_nref_ref_common;
	4154
	4155	case REFFL:
	4156	PL_reg_flags \|= RF_tainted;
	4157	folder = foldEQ_locale;
	4158	fold_array = PL_fold_locale;
	4159	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4160	goto do_ref;
	4161
	4162	case REFFA:
	4163	folder = foldEQ_latin1;
	4164	fold_array = PL_fold_latin1;
	4165	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4166	goto do_ref;
	4167
	4168	case REFFU:
	4169	folder = foldEQ_latin1;
	4170	fold_array = PL_fold_latin1;
	4171	utf8_fold_flags = 0;
	4172	goto do_ref;
	4173
	4174	case REFF:
	4175	folder = foldEQ;
	4176	fold_array = PL_fold;
	4177	utf8_fold_flags = 0;
	4178	goto do_ref;
	4179
	4180	case REF:
	4181	folder = NULL;
	4182	fold_array = NULL;
	4183	utf8_fold_flags = 0;
	4184
	4185	do_ref:
	4186	type = OP(scan);
	4187	n = ARG(scan); /* which paren pair */
	4188
	4189	do_nref_ref_common:
	4190	ln = rex->offs[n].start;
	4191	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4192	if (rex->lastparen < n \|\| ln == -1)
	4193	sayNO; /* Do not match unless seen CLOSEn. */
	4194	if (ln == rex->offs[n].end)
	4195	break;
	4196
	4197	s = PL_bostr + ln;
	4198	if (type != REF /* REF can do byte comparison */
	4199	&& (utf8_target \|\| type == REFFU))
	4200	{ /* XXX handle REFFL better */
	4201	char * limit = PL_regeol;
	4202
	4203	/* This call case insensitively compares the entire buffer
	4204	* at s, with the current input starting at locinput, but
	4205	* not going off the end given by PL_regeol, and returns in
	4206	* limit upon success, how much of the current input was
	4207	* matched */
	4208	if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
	4209	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	4210	{
	4211	sayNO;
	4212	}
	4213	locinput = limit;
	4214	nextchr = UCHARAT(locinput);
	4215	break;
	4216	}
	4217
	4218	/* Not utf8: Inline the first character, for speed. */
	4219	if (UCHARAT(s) != nextchr &&
	4220	(type == REF \|\|
	4221	UCHARAT(s) != fold_array[nextchr]))
	4222	sayNO;
	4223	ln = rex->offs[n].end - ln;
	4224	if (locinput + ln > PL_regeol)
	4225	sayNO;
	4226	if (ln > 1 && (type == REF
	4227	? memNE(s, locinput, ln)
	4228	: ! folder(s, locinput, ln)))
	4229	sayNO;
	4230	locinput += ln;
	4231	nextchr = UCHARAT(locinput);
	4232	break;
	4233	}
	4234	case NOTHING:
	4235	case TAIL:
	4236	break;
	4237	case BACK:
	4238	break;
	4239
	4240	#undef ST
	4241	#define ST st->u.eval
	4242	{
	4243	SV *ret;
	4244	REGEXP *re_sv;
	4245	regexp *re;
	4246	regexp_internal *rei;
	4247	regnode *startpoint;
	4248
	4249	case GOSTART:
	4250	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4251	if (cur_eval && cur_eval->locinput==locinput) {
	4252	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4253	Perl_croak(aTHX_ "Infinite recursion in regex");
	4254	if ( ++nochange_depth > max_nochange_depth )
	4255	Perl_croak(aTHX_
	4256	"Pattern subroutine nesting without pos change"
	4257	" exceeded limit in regex");
	4258	} else {
	4259	nochange_depth = 0;
	4260	}
	4261	re_sv = rex_sv;
	4262	re = rex;
	4263	rei = rexi;
	4264	if (OP(scan)==GOSUB) {
	4265	startpoint = scan + ARG2L(scan);
	4266	ST.close_paren = ARG(scan);
	4267	} else {
	4268	startpoint = rei->program+1;
	4269	ST.close_paren = 0;
	4270	}
	4271	goto eval_recurse_doit;
	4272	assert(0); /* NOTREACHED */
	4273	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4274	if (cur_eval && cur_eval->locinput==locinput) {
	4275	if ( ++nochange_depth > max_nochange_depth )
	4276	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4277	} else {
	4278	nochange_depth = 0;
	4279	}
	4280	{
	4281	/* execute the code in the {...} */
	4282
	4283	dSP;
	4284	SV ** before;
	4285	OP * const oop = PL_op;
	4286	COP * const ocurcop = PL_curcop;
	4287	OP *nop;
	4288	char *saved_regeol = PL_regeol;
	4289	struct re_save_state saved_state;
	4290	CV *newcv;
	4291
	4292	/* save all paren positions */
	4293	regcppush(rex, 0);
	4294	REGCP_SET(runops_cp);
	4295
	4296	/* To not corrupt the existing regex state while executing the
	4297	* eval we would normally put it on the save stack, like with
	4298	* save_re_context. However, re-evals have a weird scoping so we
	4299	* can't just add ENTER/LEAVE here. With that, things like
	4300	*
	4301	* (?{$a=2})(a(?{local$a=$a+1}))aakc(?{$b=$a})
	4302	*
	4303	* would break, as they expect the localisation to be unwound
	4304	* only when the re-engine backtracks through the bit that
	4305	* localised it.
	4306	*
	4307	* What we do instead is just saving the state in a local c
	4308	* variable.
	4309	*/
	4310	Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
	4311
	4312	PL_reg_state.re_reparsing = FALSE;
	4313
	4314	if (!caller_cv)
	4315	caller_cv = find_runcv(NULL);
	4316
	4317	n = ARG(scan);
	4318
	4319	if (rexi->data->what[n] == 'r') { /* code from an external qr */
	4320	newcv = ((struct regexp *)SvANY(
	4321	(REGEXP*)(rexi->data->data[n])
	4322	))->qr_anoncv
	4323	;
	4324	nop = (OP*)rexi->data->data[n+1];
	4325	}
	4326	else if (rexi->data->what[n] == 'l') { /* literal code */
	4327	newcv = caller_cv;
	4328	nop = (OP*)rexi->data->data[n];
	4329	assert(CvDEPTH(newcv));
	4330	}
	4331	else {
	4332	/* literal with own CV */
	4333	assert(rexi->data->what[n] == 'L');
	4334	newcv = rex->qr_anoncv;
	4335	nop = (OP*)rexi->data->data[n];
	4336	}
	4337
	4338	/* normally if we're about to execute code from the same
	4339	* CV that we used previously, we just use the existing
	4340	* CX stack entry. However, its possible that in the
	4341	* meantime we may have backtracked, popped from the save
	4342	* stack, and undone the SAVECOMPPAD(s) associated with
	4343	* PUSH_MULTICALL; in which case PL_comppad no longer
	4344	* points to newcv's pad. */
	4345	if (newcv != last_pushed_cv \|\| PL_comppad != last_pad)
	4346	{
	4347	I32 depth = (newcv == caller_cv) ? 0 : 1;
	4348	if (last_pushed_cv) {
	4349	CHANGE_MULTICALL_WITHDEPTH(newcv, depth);
	4350	}
	4351	else {
	4352	PUSH_MULTICALL_WITHDEPTH(newcv, depth);
	4353	}
	4354	last_pushed_cv = newcv;
	4355	}
	4356	last_pad = PL_comppad;
	4357
	4358	/* the initial nextstate you would normally execute
	4359	* at the start of an eval (which would cause error
	4360	* messages to come from the eval), may be optimised
	4361	* away from the execution path in the regex code blocks;
	4362	* so manually set PL_curcop to it initially */
	4363	{
	4364	OP *o = cUNOPx(nop)->op_first;
	4365	assert(o->op_type == OP_NULL);
	4366	if (o->op_targ == OP_SCOPE) {
	4367	o = cUNOPo->op_first;
	4368	}
	4369	else {
	4370	assert(o->op_targ == OP_LEAVE);
	4371	o = cUNOPo->op_first;
	4372	assert(o->op_type == OP_ENTER);
	4373	o = o->op_sibling;
	4374	}
	4375
	4376	if (o->op_type != OP_STUB) {
	4377	assert( o->op_type == OP_NEXTSTATE
	4378	\|\| o->op_type == OP_DBSTATE
	4379	\|\| (o->op_type == OP_NULL
	4380	&& ( o->op_targ == OP_NEXTSTATE
	4381	\|\| o->op_targ == OP_DBSTATE
	4382	)
	4383	)
	4384	);
	4385	PL_curcop = (COP*)o;
	4386	}
	4387	}
	4388	nop = nop->op_next;
	4389
	4390	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	4391	" re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) );
	4392
	4393	rex->offs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	4394
	4395	if (sv_yes_mark) {
	4396	SV *sv_mrk = get_sv("REGMARK", 1);
	4397	sv_setsv(sv_mrk, sv_yes_mark);
	4398	}
	4399
	4400	/* we don't use MULTICALL here as we want to call the
	4401	* first op of the block of interest, rather than the
	4402	* first op of the sub */
	4403	before = SP;
	4404	PL_op = nop;
	4405	CALLRUNOPS(aTHX); /* Scalar context. */
	4406	SPAGAIN;
	4407	if (SP == before)
	4408	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	4409	else {
	4410	ret = POPs;
	4411	PUTBACK;
	4412	}
	4413
	4414	/* before restoring everything, evaluate the returned
	4415	* value, so that 'uninit' warnings don't use the wrong
	4416	* PL_op or pad. Also need to process any magic vars
	4417	* (e.g. $1) before parentheses are restored */
	4418
	4419	PL_op = NULL;
	4420
	4421	re_sv = NULL;
	4422	if (logical == 0) /* (?{})/ */
	4423	sv_setsv(save_scalar(PL_replgv), ret); /* $^R */
	4424	else if (logical == 1) { /* /(?(?{...})X\|Y)/ */
	4425	sw = cBOOL(SvTRUE(ret));
	4426	logical = 0;
	4427	}
	4428	else { /* /(??{}) */
	4429	/* if its overloaded, let the regex compiler handle
	4430	* it; otherwise extract regex, or stringify */
	4431	if (!SvAMAGIC(ret)) {
	4432	SV *sv = ret;
	4433	if (SvROK(sv))
	4434	sv = SvRV(sv);
	4435	if (SvTYPE(sv) == SVt_REGEXP)
	4436	re_sv = (REGEXP*) sv;
	4437	else if (SvSMAGICAL(sv)) {
	4438	MAGIC *mg = mg_find(sv, PERL_MAGIC_qr);
	4439	if (mg)
	4440	re_sv = (REGEXP *) mg->mg_obj;
	4441	}
	4442
	4443	/* force any magic, undef warnings here */
	4444	if (!re_sv) {
	4445	ret = sv_mortalcopy(ret);
	4446	(void) SvPV_force_nolen(ret);
	4447	}
	4448	}
	4449
	4450	}
	4451
	4452	Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
	4453
	4454	/* *** Note that at this point we don't restore
	4455	* PL_comppad, (or pop the CxSUB) on the assumption it may
	4456	* be used again soon. This is safe as long as nothing
	4457	* in the regexp code uses the pad ! */
	4458	PL_op = oop;
	4459	PL_curcop = ocurcop;
	4460	PL_regeol = saved_regeol;
	4461	S_regcp_restore(aTHX_ rex, runops_cp);
	4462
	4463	if (logical != 2)
	4464	break;
	4465	}
	4466
	4467	/* only /(??{})/ from now on */
	4468	logical = 0;
	4469	{
	4470	/* extract RE object from returned value; compiling if
	4471	* necessary */
	4472
	4473	if (re_sv) {
	4474	re_sv = reg_temp_copy(NULL, re_sv);
	4475	}
	4476	else {
	4477	U32 pm_flags = 0;
	4478	const I32 osize = PL_regsize;
	4479
	4480	if (SvUTF8(ret) && IN_BYTES) {
	4481	/* In use 'bytes': make a copy of the octet
	4482	* sequence, but without the flag on */
	4483	STRLEN len;
	4484	const char *const p = SvPV(ret, len);
	4485	ret = newSVpvn_flags(p, len, SVs_TEMP);
	4486	}
	4487	if (rex->intflags & PREGf_USE_RE_EVAL)
	4488	pm_flags \|= PMf_USE_RE_EVAL;
	4489
	4490	/* if we got here, it should be an engine which
	4491	* supports compiling code blocks and stuff */
	4492	assert(rex->engine && rex->engine->op_comp);
	4493	assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
	4494	re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
	4495	rex->engine, NULL, NULL,
	4496	/* copy /msix etc to inner pattern */
	4497	scan->flags,
	4498	pm_flags);
	4499
	4500	if (!(SvFLAGS(ret)
	4501	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	4502	\| SVs_GMG))) {
	4503	/* This isn't a first class regexp. Instead, it's
	4504	caching a regexp onto an existing, Perl visible
	4505	scalar. */
	4506	sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
	4507	}
	4508	PL_regsize = osize;
	4509	/* safe to do now that any $1 etc has been
	4510	* interpolated into the new pattern string and
	4511	* compiled */
	4512	S_regcp_restore(aTHX_ rex, runops_cp);
	4513	}
	4514	re = (struct regexp *)SvANY(re_sv);
	4515	}
	4516	RXp_MATCH_COPIED_off(re);
	4517	re->subbeg = rex->subbeg;
	4518	re->sublen = rex->sublen;
	4519	rei = RXi_GET(re);
	4520	DEBUG_EXECUTE_r(
	4521	debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
	4522	"Matching embedded");
	4523	);
	4524	startpoint = rei->program + 1;
	4525	ST.close_paren = 0; /* only used for GOSUB */
	4526
	4527	eval_recurse_doit: /* Share code with GOSUB below this line */
	4528	/* run the pattern returned from (??{...}) */
	4529	ST.cp = regcppush(rex, 0); /* Save all the positions. */
	4530	REGCP_SET(ST.lastcp);
	4531
	4532	re->lastparen = 0;
	4533	re->lastcloseparen = 0;
	4534
	4535	PL_reginput = locinput;
	4536	PL_regsize = 0;
	4537
	4538	/* XXXX This is too dramatic a measure... */
	4539	PL_reg_maxiter = 0;
	4540
	4541	ST.toggle_reg_flags = PL_reg_flags;
	4542	if (RX_UTF8(re_sv))
	4543	PL_reg_flags \|= RF_utf8;
	4544	else
	4545	PL_reg_flags &= ~RF_utf8;
	4546	ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
	4547
	4548	ST.prev_rex = rex_sv;
	4549	ST.prev_curlyx = cur_curlyx;
	4550	rex_sv = re_sv;
	4551	SET_reg_curpm(rex_sv);
	4552	rex = re;
	4553	rexi = rei;
	4554	cur_curlyx = NULL;
	4555	ST.B = next;
	4556	ST.prev_eval = cur_eval;
	4557	cur_eval = st;
	4558	/* now continue from first node in postoned RE */
	4559	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
	4560	assert(0); /* NOTREACHED */
	4561	}
	4562
	4563	case EVAL_AB: /* cleanup after a successful (??{A})B */
	4564	/* note: this is called twice; first after popping B, then A */
	4565	PL_reg_flags ^= ST.toggle_reg_flags;
	4566	rex_sv = ST.prev_rex;
	4567	SET_reg_curpm(rex_sv);
	4568	rex = (struct regexp *)SvANY(rex_sv);
	4569	rexi = RXi_GET(rex);
	4570	regcpblow(ST.cp);
	4571	cur_eval = ST.prev_eval;
	4572	cur_curlyx = ST.prev_curlyx;
	4573
	4574	/* XXXX This is too dramatic a measure... */
	4575	PL_reg_maxiter = 0;
	4576	if ( nochange_depth )
	4577	nochange_depth--;
	4578	sayYES;
	4579
	4580
	4581	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	4582	/* note: this is called twice; first after popping B, then A */
	4583	PL_reg_flags ^= ST.toggle_reg_flags;
	4584	rex_sv = ST.prev_rex;
	4585	SET_reg_curpm(rex_sv);
	4586	rex = (struct regexp *)SvANY(rex_sv);
	4587	rexi = RXi_GET(rex);
	4588
	4589	PL_reginput = locinput;
	4590	REGCP_UNWIND(ST.lastcp);
	4591	regcppop(rex);
	4592	cur_eval = ST.prev_eval;
	4593	cur_curlyx = ST.prev_curlyx;
	4594	/* XXXX This is too dramatic a measure... */
	4595	PL_reg_maxiter = 0;
	4596	if ( nochange_depth )
	4597	nochange_depth--;
	4598	sayNO_SILENT;
	4599	#undef ST
	4600
	4601	case OPEN:
	4602	n = ARG(scan); /* which paren pair */
	4603	rex->offs[n].start_tmp = locinput - PL_bostr;
	4604	if (n > PL_regsize)
	4605	PL_regsize = n;
	4606	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	4607	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf" tmp; regsize=%"UVuf"\n",
	4608	PTR2UV(rex),
	4609	PTR2UV(rex->offs),
	4610	(UV)n,
	4611	(IV)rex->offs[n].start_tmp,
	4612	(UV)PL_regsize
	4613	));
	4614	lastopen = n;
	4615	break;
	4616
	4617	/* XXX really need to log other places start/end are set too */
	4618	#define CLOSE_CAPTURE \
	4619	rex->offs[n].start = rex->offs[n].start_tmp; \
	4620	rex->offs[n].end = locinput - PL_bostr; \
	4621	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, \
	4622	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf"..%"IVdf"\n", \
	4623	PTR2UV(rex), \
	4624	PTR2UV(rex->offs), \
	4625	(UV)n, \
	4626	(IV)rex->offs[n].start, \
	4627	(IV)rex->offs[n].end \
	4628	))
	4629
	4630	case CLOSE:
	4631	n = ARG(scan); /* which paren pair */
	4632	CLOSE_CAPTURE;
	4633	/*if (n > PL_regsize)
	4634	PL_regsize = n;*/
	4635	if (n > rex->lastparen)
	4636	rex->lastparen = n;
	4637	rex->lastcloseparen = n;
	4638	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	4639	goto fake_end;
	4640	}
	4641	break;
	4642	case ACCEPT:
	4643	if (ARG(scan)){
	4644	regnode *cursor;
	4645	for (cursor=scan;
	4646	cursor && OP(cursor)!=END;
	4647	cursor=regnext(cursor))
	4648	{
	4649	if ( OP(cursor)==CLOSE ){
	4650	n = ARG(cursor);
	4651	if ( n <= lastopen ) {
	4652	CLOSE_CAPTURE;
	4653	/*if (n > PL_regsize)
	4654	PL_regsize = n;*/
	4655	if (n > rex->lastparen)
	4656	rex->lastparen = n;
	4657	rex->lastcloseparen = n;
	4658	if ( n == ARG(scan) \|\| (cur_eval &&
	4659	cur_eval->u.eval.close_paren == n))
	4660	break;
	4661	}
	4662	}
	4663	}
	4664	}
	4665	goto fake_end;
	4666	/NOTREACHED/
	4667	case GROUPP:
	4668	n = ARG(scan); /* which paren pair */
	4669	sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
	4670	break;
	4671	case NGROUPP:
	4672	/* reg_check_named_buff_matched returns 0 for no match */
	4673	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	4674	break;
	4675	case INSUBP:
	4676	n = ARG(scan);
	4677	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	4678	break;
	4679	case DEFINEP:
	4680	sw = 0;
	4681	break;
	4682	case IFTHEN:
	4683	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4684	if (sw)
	4685	next = NEXTOPER(NEXTOPER(scan));
	4686	else {
	4687	next = scan + ARG(scan);
	4688	if (OP(next) == IFTHEN) /* Fake one. */
	4689	next = NEXTOPER(NEXTOPER(next));
	4690	}
	4691	break;
	4692	case LOGICAL:
	4693	logical = scan->flags;
	4694	break;
	4695
	4696	/*******************************************************************
	4697
	4698	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	4699	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	4700	STAR/PLUS/CURLY/CURLYN are used instead.)
	4701
	4702	A*B is compiled as <CURLYX><A><WHILEM><B>
	4703
	4704	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	4705	state, which contains the current count, initialised to -1. It also sets
	4706	cur_curlyx to point to this state, with any previous value saved in the
	4707	state block.
	4708
	4709	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	4710	since the pattern may possibly match zero times (i.e. it's a while {} loop
	4711	rather than a do {} while loop).
	4712
	4713	Each entry to WHILEM represents a successful match of A. The count in the
	4714	CURLYX block is incremented, another WHILEM state is pushed, and execution
	4715	passes to A or B depending on greediness and the current count.
	4716
	4717	For example, if matching against the string a1a2a3b (where the aN are
	4718	substrings that match /A/), then the match progresses as follows: (the
	4719	pushed states are interspersed with the bits of strings matched so far):
	4720
	4721	<CURLYX cnt=-1>
	4722	<CURLYX cnt=0><WHILEM>
	4723	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	4724	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	4725	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	4726	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	4727
	4728	(Contrast this with something like CURLYM, which maintains only a single
	4729	backtrack state:
	4730
	4731	<CURLYM cnt=0> a1
	4732	a1 <CURLYM cnt=1> a2
	4733	a1 a2 <CURLYM cnt=2> a3
	4734	a1 a2 a3 <CURLYM cnt=3> b
	4735	)
	4736
	4737	Each WHILEM state block marks a point to backtrack to upon partial failure
	4738	of A or B, and also contains some minor state data related to that
	4739	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	4740	overall state, such as the count, and pointers to the A and B ops.
	4741
	4742	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	4743	must always point to the current CURLYX block, the rules are:
	4744
	4745	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	4746	and set cur_curlyx to point the new block.
	4747
	4748	When popping the CURLYX block after a successful or unsuccessful match,
	4749	restore the previous cur_curlyx.
	4750
	4751	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	4752	to the outer one saved in the CURLYX block.
	4753
	4754	When popping the WHILEM block after a successful or unsuccessful B match,
	4755	restore the previous cur_curlyx.
	4756
	4757	Here's an example for the pattern (AI* BI)*BO
	4758	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	4759
	4760	cur_
	4761	curlyx backtrack stack
	4762	------ ---------------
	4763	NULL
	4764	CO <CO prev=NULL> <WO>
	4765	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4766	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4767	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	4768
	4769	At this point the pattern succeeds, and we work back down the stack to
	4770	clean up, restoring as we go:
	4771
	4772	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4773	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4774	CO <CO prev=NULL> <WO>
	4775	NULL
	4776
	4777	*******************************************************************/
	4778
	4779	#define ST st->u.curlyx
	4780
	4781	case CURLYX: /* start of /AB/ (for complex A) /
	4782	{
	4783	/* No need to save/restore up to this paren */
	4784	I32 parenfloor = scan->flags;
	4785
	4786	assert(next); /* keep Coverity happy */
	4787	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	4788	next += ARG(next);
	4789
	4790	/* XXXX Probably it is better to teach regpush to support
	4791	parenfloor > PL_regsize... */
	4792	if (parenfloor > (I32)rex->lastparen)
	4793	parenfloor = rex->lastparen; /* Pessimization... */
	4794
	4795	ST.prev_curlyx= cur_curlyx;
	4796	cur_curlyx = st;
	4797	ST.cp = PL_savestack_ix;
	4798
	4799	/* these fields contain the state of the current curly.
	4800	* they are accessed by subsequent WHILEMs */
	4801	ST.parenfloor = parenfloor;
	4802	ST.me = scan;
	4803	ST.B = next;
	4804	ST.minmod = minmod;
	4805	minmod = 0;
	4806	ST.count = -1; /* this will be updated by WHILEM */
	4807	ST.lastloc = NULL; /* this will be updated by WHILEM */
	4808
	4809	PL_reginput = locinput;
	4810	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
	4811	assert(0); /* NOTREACHED */
	4812	}
	4813
	4814	case CURLYX_end: /* just finished matching all of AB /
	4815	cur_curlyx = ST.prev_curlyx;
	4816	sayYES;
	4817	assert(0); /* NOTREACHED */
	4818
	4819	case CURLYX_end_fail: /* just failed to match all of AB /
	4820	regcpblow(ST.cp);
	4821	cur_curlyx = ST.prev_curlyx;
	4822	sayNO;
	4823	assert(0); /* NOTREACHED */
	4824
	4825
	4826	#undef ST
	4827	#define ST st->u.whilem
	4828
	4829	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	4830	{
	4831	/* see the discussion above about CURLYX/WHILEM */
	4832	I32 n;
	4833	int min = ARG1(cur_curlyx->u.curlyx.me);
	4834	int max = ARG2(cur_curlyx->u.curlyx.me);
	4835	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	4836
	4837	assert(cur_curlyx); /* keep Coverity happy */
	4838	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	4839	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	4840	ST.cache_offset = 0;
	4841	ST.cache_mask = 0;
	4842
	4843	PL_reginput = locinput;
	4844
	4845	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4846	"%*s whilem: matched %ld out of %d..%d\n",
	4847	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	4848	);
	4849
	4850	/* First just match a string of min A's. */
	4851
	4852	if (n < min) {
	4853	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
	4854	cur_curlyx->u.curlyx.lastloc = locinput;
	4855	REGCP_SET(ST.lastcp);
	4856
	4857	PUSH_STATE_GOTO(WHILEM_A_pre, A);
	4858	assert(0); /* NOTREACHED */
	4859	}
	4860
	4861	/* If degenerate A matches "", assume A done. */
	4862
	4863	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	4864	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4865	"%*s whilem: empty match detected, trying continuation...\n",
	4866	REPORT_CODE_OFF+depth*2, "")
	4867	);
	4868	goto do_whilem_B_max;
	4869	}
	4870
	4871	/* super-linear cache processing */
	4872
	4873	if (scan->flags) {
	4874
	4875	if (!PL_reg_maxiter) {
	4876	/* start the countdown: Postpone detection until we
	4877	* know the match is not that much linear. */
	4878	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	4879	/* possible overflow for long strings and many CURLYX's */
	4880	if (PL_reg_maxiter < 0)
	4881	PL_reg_maxiter = I32_MAX;
	4882	PL_reg_leftiter = PL_reg_maxiter;
	4883	}
	4884
	4885	if (PL_reg_leftiter-- == 0) {
	4886	/* initialise cache */
	4887	const I32 size = (PL_reg_maxiter + 7)/8;
	4888	if (PL_reg_poscache) {
	4889	if ((I32)PL_reg_poscache_size < size) {
	4890	Renew(PL_reg_poscache, size, char);
	4891	PL_reg_poscache_size = size;
	4892	}
	4893	Zero(PL_reg_poscache, size, char);
	4894	}
	4895	else {
	4896	PL_reg_poscache_size = size;
	4897	Newxz(PL_reg_poscache, size, char);
	4898	}
	4899	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4900	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	4901	PL_colors[4], PL_colors[5])
	4902	);
	4903	}
	4904
	4905	if (PL_reg_leftiter < 0) {
	4906	/* have we already failed at this position? */
	4907	I32 offset, mask;
	4908	offset = (scan->flags & 0xf) - 1
	4909	+ (locinput - PL_bostr) * (scan->flags>>4);
	4910	mask = 1 << (offset % 8);
	4911	offset /= 8;
	4912	if (PL_reg_poscache[offset] & mask) {
	4913	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4914	"%*s whilem: (cache) already tried at this position...\n",
	4915	REPORT_CODE_OFF+depth*2, "")
	4916	);
	4917	sayNO; /* cache records failure */
	4918	}
	4919	ST.cache_offset = offset;
	4920	ST.cache_mask = mask;
	4921	}
	4922	}
	4923
	4924	/* Prefer B over A for minimal matching. */
	4925
	4926	if (cur_curlyx->u.curlyx.minmod) {
	4927	ST.save_curlyx = cur_curlyx;
	4928	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4929	ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor);
	4930	REGCP_SET(ST.lastcp);
	4931	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
	4932	assert(0); /* NOTREACHED */
	4933	}
	4934
	4935	/* Prefer A over B for maximal matching. */
	4936
	4937	if (n < max) { /* More greed allowed? */
	4938	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
	4939	cur_curlyx->u.curlyx.lastloc = locinput;
	4940	REGCP_SET(ST.lastcp);
	4941	PUSH_STATE_GOTO(WHILEM_A_max, A);
	4942	assert(0); /* NOTREACHED */
	4943	}
	4944	goto do_whilem_B_max;
	4945	}
	4946	assert(0); /* NOTREACHED */
	4947
	4948	case WHILEM_B_min: /* just matched B in a minimal match */
	4949	case WHILEM_B_max: /* just matched B in a maximal match */
	4950	cur_curlyx = ST.save_curlyx;
	4951	sayYES;
	4952	assert(0); /* NOTREACHED */
	4953
	4954	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	4955	cur_curlyx = ST.save_curlyx;
	4956	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4957	cur_curlyx->u.curlyx.count--;
	4958	CACHEsayNO;
	4959	assert(0); /* NOTREACHED */
	4960
	4961	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	4962	/* FALL THROUGH */
	4963	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	4964	REGCP_UNWIND(ST.lastcp);
	4965	regcppop(rex);
	4966	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4967	cur_curlyx->u.curlyx.count--;
	4968	CACHEsayNO;
	4969	assert(0); /* NOTREACHED */
	4970
	4971	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	4972	REGCP_UNWIND(ST.lastcp);
	4973	regcppop(rex); /* Restore some previous $<digit>s? */
	4974	PL_reginput = locinput;
	4975	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4976	"%*s whilem: failed, trying continuation...\n",
	4977	REPORT_CODE_OFF+depth*2, "")
	4978	);
	4979	do_whilem_B_max:
	4980	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4981	&& ckWARN(WARN_REGEXP)
	4982	&& !(PL_reg_flags & RF_warned))
	4983	{
	4984	PL_reg_flags \|= RF_warned;
	4985	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	4986	"Complex regular subexpression recursion limit (%d) "
	4987	"exceeded",
	4988	REG_INFTY - 1);
	4989	}
	4990
	4991	/* now try B */
	4992	ST.save_curlyx = cur_curlyx;
	4993	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4994	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
	4995	assert(0); /* NOTREACHED */
	4996
	4997	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	4998	cur_curlyx = ST.save_curlyx;
	4999	REGCP_UNWIND(ST.lastcp);
	5000	regcppop(rex);
	5001
	5002	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	5003	/* Maximum greed exceeded */
	5004	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5005	&& ckWARN(WARN_REGEXP)
	5006	&& !(PL_reg_flags & RF_warned))
	5007	{
	5008	PL_reg_flags \|= RF_warned;
	5009	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5010	"Complex regular subexpression recursion "
	5011	"limit (%d) exceeded",
	5012	REG_INFTY - 1);
	5013	}
	5014	cur_curlyx->u.curlyx.count--;
	5015	CACHEsayNO;
	5016	}
	5017
	5018	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5019	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	5020	);
	5021	/* Try grabbing another A and see if it helps. */
	5022	PL_reginput = locinput;
	5023	cur_curlyx->u.curlyx.lastloc = locinput;
	5024	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
	5025	REGCP_SET(ST.lastcp);
	5026	PUSH_STATE_GOTO(WHILEM_A_min,
	5027	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS);
	5028	assert(0); /* NOTREACHED */
	5029
	5030	#undef ST
	5031	#define ST st->u.branch
	5032
	5033	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	5034	next = scan + ARG(scan);
	5035	if (next == scan)
	5036	next = NULL;
	5037	scan = NEXTOPER(scan);
	5038	/* FALL THROUGH */
	5039
	5040	case BRANCH: /* /(...\|A\|...)/ */
	5041	scan = NEXTOPER(scan); /* scan now points to inner node */
	5042	ST.lastparen = rex->lastparen;
	5043	ST.lastcloseparen = rex->lastcloseparen;
	5044	ST.next_branch = next;
	5045	REGCP_SET(ST.cp);
	5046	PL_reginput = locinput;
	5047
	5048	/* Now go into the branch */
	5049	if (has_cutgroup) {
	5050	PUSH_YES_STATE_GOTO(BRANCH_next, scan);
	5051	} else {
	5052	PUSH_STATE_GOTO(BRANCH_next, scan);
	5053	}
	5054	assert(0); /* NOTREACHED */
	5055	case CUTGROUP:
	5056	PL_reginput = locinput;
	5057	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	5058	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5059	PUSH_STATE_GOTO(CUTGROUP_next,next);
	5060	assert(0); /* NOTREACHED */
	5061	case CUTGROUP_next_fail:
	5062	do_cutgroup = 1;
	5063	no_final = 1;
	5064	if (st->u.mark.mark_name)
	5065	sv_commit = st->u.mark.mark_name;
	5066	sayNO;
	5067	assert(0); /* NOTREACHED */
	5068	case BRANCH_next:
	5069	sayYES;
	5070	assert(0); /* NOTREACHED */
	5071	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	5072	if (do_cutgroup) {
	5073	do_cutgroup = 0;
	5074	no_final = 0;
	5075	}
	5076	REGCP_UNWIND(ST.cp);
	5077	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5078	scan = ST.next_branch;
	5079	/* no more branches? */
	5080	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	5081	DEBUG_EXECUTE_r({
	5082	PerlIO_printf( Perl_debug_log,
	5083	"%*s %sBRANCH failed...%s\n",
	5084	REPORT_CODE_OFF+depth*2, "",
	5085	PL_colors[4],
	5086	PL_colors[5] );
	5087	});
	5088	sayNO_SILENT;
	5089	}
	5090	continue; /* execute next BRANCH[J] op */
	5091	assert(0); /* NOTREACHED */
	5092
	5093	case MINMOD:
	5094	minmod = 1;
	5095	break;
	5096
	5097	#undef ST
	5098	#define ST st->u.curlym
	5099
	5100	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	5101
	5102	/* This is an optimisation of CURLYX that enables us to push
	5103	* only a single backtracking state, no matter how many matches
	5104	* there are in {m,n}. It relies on the pattern being constant
	5105	* length, with no parens to influence future backrefs
	5106	*/
	5107
	5108	ST.me = scan;
	5109	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5110
	5111	ST.lastparen = rex->lastparen;
	5112	ST.lastcloseparen = rex->lastcloseparen;
	5113
	5114	/* if paren positive, emulate an OPEN/CLOSE around A */
	5115	if (ST.me->flags) {
	5116	U32 paren = ST.me->flags;
	5117	if (paren > PL_regsize)
	5118	PL_regsize = paren;
	5119	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	5120	}
	5121	ST.A = scan;
	5122	ST.B = next;
	5123	ST.alen = 0;
	5124	ST.count = 0;
	5125	ST.minmod = minmod;
	5126	minmod = 0;
	5127	ST.c1 = CHRTEST_UNINIT;
	5128	REGCP_SET(ST.cp);
	5129
	5130	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	5131	goto curlym_do_B;
	5132
	5133	curlym_do_A: /* execute the A in /A{m,n}B/ */
	5134	PL_reginput = locinput;
	5135	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
	5136	assert(0); /* NOTREACHED */
	5137
	5138	case CURLYM_A: /* we've just matched an A */
	5139	locinput = st->locinput;
	5140	nextchr = UCHARAT(locinput);
	5141
	5142	ST.count++;
	5143	/* after first match, determine A's length: u.curlym.alen */
	5144	if (ST.count == 1) {
	5145	if (PL_reg_match_utf8) {
	5146	char *s = locinput;
	5147	while (s < PL_reginput) {
	5148	ST.alen++;
	5149	s += UTF8SKIP(s);
	5150	}
	5151	}
	5152	else {
	5153	ST.alen = PL_reginput - locinput;
	5154	}
	5155	if (ST.alen == 0)
	5156	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	5157	}
	5158	DEBUG_EXECUTE_r(
	5159	PerlIO_printf(Perl_debug_log,
	5160	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	5161	(int)(REPORT_CODE_OFF+(depth*2)), "",
	5162	(IV) ST.count, (IV)ST.alen)
	5163	);
	5164
	5165	locinput = PL_reginput;
	5166
	5167	if (cur_eval && cur_eval->u.eval.close_paren &&
	5168	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5169	goto fake_end;
	5170
	5171	{
	5172	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	5173	if ( max == REG_INFTY \|\| ST.count < max )
	5174	goto curlym_do_A; /* try to match another A */
	5175	}
	5176	goto curlym_do_B; /* try to match B */
	5177
	5178	case CURLYM_A_fail: /* just failed to match an A */
	5179	REGCP_UNWIND(ST.cp);
	5180
	5181	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	5182	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5183	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5184	sayNO;
	5185
	5186	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5187	PL_reginput = locinput;
	5188	if (ST.c1 == CHRTEST_UNINIT) {
	5189	/* calculate c1 and c2 for possible match of 1st char
	5190	* following curly */
	5191	ST.c1 = ST.c2 = CHRTEST_VOID;
	5192	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5193	regnode *text_node = ST.B;
	5194	if (! HAS_TEXT(text_node))
	5195	FIND_NEXT_IMPT(text_node);
	5196	/* this used to be
	5197
	5198	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5199
	5200	But the former is redundant in light of the latter.
	5201
	5202	if this changes back then the macro for
	5203	IS_TEXT and friends need to change.
	5204	*/
	5205	if (PL_regkind[OP(text_node)] == EXACT)
	5206	{
	5207
	5208	ST.c1 = (U8)*STRING(text_node);
	5209	switch (OP(text_node)) {
	5210	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5211	case EXACTFA:
	5212	case EXACTFU_SS:
	5213	case EXACTFU_TRICKYFOLD:
	5214	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5215	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5216	default: ST.c2 = ST.c1;
	5217	}
	5218	}
	5219	}
	5220	}
	5221
	5222	DEBUG_EXECUTE_r(
	5223	PerlIO_printf(Perl_debug_log,
	5224	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5225	(int)(REPORT_CODE_OFF+(depth*2)),
	5226	"", (IV)ST.count)
	5227	);
	5228	if (ST.c1 != CHRTEST_VOID
	5229	&& UCHARAT(PL_reginput) != ST.c1
	5230	&& UCHARAT(PL_reginput) != ST.c2)
	5231	{
	5232	/* simulate B failing */
	5233	DEBUG_OPTIMISE_r(
	5234	PerlIO_printf(Perl_debug_log,
	5235	"%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
	5236	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5237	(IV)ST.c1,(IV)ST.c2
	5238	));
	5239	state_num = CURLYM_B_fail;
	5240	goto reenter_switch;
	5241	}
	5242
	5243	if (ST.me->flags) {
	5244	/* emulate CLOSE: mark current A as captured */
	5245	I32 paren = ST.me->flags;
	5246	if (ST.count) {
	5247	rex->offs[paren].start
	5248	= HOPc(PL_reginput, -ST.alen) - PL_bostr;
	5249	rex->offs[paren].end = PL_reginput - PL_bostr;
	5250	if ((U32)paren > rex->lastparen)
	5251	rex->lastparen = paren;
	5252	rex->lastcloseparen = paren;
	5253	}
	5254	else
	5255	rex->offs[paren].end = -1;
	5256	if (cur_eval && cur_eval->u.eval.close_paren &&
	5257	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5258	{
	5259	if (ST.count)
	5260	goto fake_end;
	5261	else
	5262	sayNO;
	5263	}
	5264	}
	5265
	5266	PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
	5267	assert(0); /* NOTREACHED */
	5268
	5269	case CURLYM_B_fail: /* just failed to match a B */
	5270	REGCP_UNWIND(ST.cp);
	5271	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5272	if (ST.minmod) {
	5273	I32 max = ARG2(ST.me);
	5274	if (max != REG_INFTY && ST.count == max)
	5275	sayNO;
	5276	goto curlym_do_A; /* try to match a further A */
	5277	}
	5278	/* backtrack one A */
	5279	if (ST.count == ARG1(ST.me) /* min */)
	5280	sayNO;
	5281	ST.count--;
	5282	locinput = HOPc(locinput, -ST.alen);
	5283	goto curlym_do_B; /* try to match B */
	5284
	5285	#undef ST
	5286	#define ST st->u.curly
	5287
	5288	#define CURLY_SETPAREN(paren, success) \
	5289	if (paren) { \
	5290	if (success) { \
	5291	rex->offs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	5292	rex->offs[paren].end = locinput - PL_bostr; \
	5293	if (paren > rex->lastparen) \
	5294	rex->lastparen = paren; \
	5295	rex->lastcloseparen = paren; \
	5296	} \
	5297	else { \
	5298	rex->offs[paren].end = -1; \
	5299	rex->lastparen = ST.lastparen; \
	5300	rex->lastcloseparen = ST.lastcloseparen; \
	5301	} \
	5302	}
	5303
	5304	case STAR: /* /AB/ where A is width 1 /
	5305	ST.paren = 0;
	5306	ST.min = 0;
	5307	ST.max = REG_INFTY;
	5308	scan = NEXTOPER(scan);
	5309	goto repeat;
	5310	case PLUS: /* /A+B/ where A is width 1 */
	5311	ST.paren = 0;
	5312	ST.min = 1;
	5313	ST.max = REG_INFTY;
	5314	scan = NEXTOPER(scan);
	5315	goto repeat;
	5316	case CURLYN: /* /(A){m,n}B/ where A is width 1 */
	5317	ST.paren = scan->flags; /* Which paren to set */
	5318	ST.lastparen = rex->lastparen;
	5319	ST.lastcloseparen = rex->lastcloseparen;
	5320	if (ST.paren > PL_regsize)
	5321	PL_regsize = ST.paren;
	5322	ST.min = ARG1(scan); /* min to match */
	5323	ST.max = ARG2(scan); /* max to match */
	5324	if (cur_eval && cur_eval->u.eval.close_paren &&
	5325	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5326	ST.min=1;
	5327	ST.max=1;
	5328	}
	5329	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	5330	goto repeat;
	5331	case CURLY: /* /A{m,n}B/ where A is width 1 */
	5332	ST.paren = 0;
	5333	ST.min = ARG1(scan); /* min to match */
	5334	ST.max = ARG2(scan); /* max to match */
	5335	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5336	repeat:
	5337	/*
	5338	* Lookahead to avoid useless match attempts
	5339	* when we know what character comes next.
	5340	*
	5341	* Used to only do .x and .?x, but now it allows
	5342	* for )'s, ('s and (?{ ... })'s to be in the way
	5343	* of the quantifier and the EXACT-like node. -- japhy
	5344	*/
	5345
	5346	if (ST.min > ST.max) /* XXX make this a compile-time check? */
	5347	sayNO;
	5348	if (HAS_TEXT(next) \|\| JUMPABLE(next)) {
	5349	U8 *s;
	5350	regnode *text_node = next;
	5351
	5352	if (! HAS_TEXT(text_node))
	5353	FIND_NEXT_IMPT(text_node);
	5354
	5355	if (! HAS_TEXT(text_node))
	5356	ST.c1 = ST.c2 = CHRTEST_VOID;
	5357	else {
	5358	if ( PL_regkind[OP(text_node)] != EXACT ) {
	5359	ST.c1 = ST.c2 = CHRTEST_VOID;
	5360	goto assume_ok_easy;
	5361	}
	5362	else
	5363	s = (U8*)STRING(text_node);
	5364
	5365	/* Currently we only get here when
	5366
	5367	PL_rekind[OP(text_node)] == EXACT
	5368
	5369	if this changes back then the macro for IS_TEXT and
	5370	friends need to change. */
	5371	if (!UTF_PATTERN) {
	5372	ST.c1 = *s;
	5373	switch (OP(text_node)) {
	5374	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5375	case EXACTFA:
	5376	case EXACTFU_SS:
	5377	case EXACTFU_TRICKYFOLD:
	5378	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5379	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5380	default: ST.c2 = ST.c1; break;
	5381	}
	5382	}
	5383	else { /* UTF_PATTERN */
	5384	if (IS_TEXTFU(text_node) \|\| IS_TEXTF(text_node)) {
	5385	STRLEN ulen;
	5386	U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
	5387
	5388	to_utf8_fold((U8*)s, tmpbuf, &ulen);
	5389	ST.c1 = ST.c2 = utf8n_to_uvchr(tmpbuf, UTF8_MAXLEN, 0,
	5390	uniflags);
	5391	}
	5392	else {
	5393	ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
	5394	uniflags);
	5395	}
	5396	}
	5397	}
	5398	}
	5399	else
	5400	ST.c1 = ST.c2 = CHRTEST_VOID;
	5401	assume_ok_easy:
	5402
	5403	ST.A = scan;
	5404	ST.B = next;
	5405	PL_reginput = locinput;
	5406	if (minmod) {
	5407	minmod = 0;
	5408	if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
	5409	sayNO;
	5410	ST.count = ST.min;
	5411	locinput = PL_reginput;
	5412	REGCP_SET(ST.cp);
	5413	if (ST.c1 == CHRTEST_VOID)
	5414	goto curly_try_B_min;
	5415
	5416	ST.oldloc = locinput;
	5417
	5418	/* set ST.maxpos to the furthest point along the
	5419	* string that could possibly match */
	5420	if (ST.max == REG_INFTY) {
	5421	ST.maxpos = PL_regeol - 1;
	5422	if (utf8_target)
	5423	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	5424	ST.maxpos--;
	5425	}
	5426	else if (utf8_target) {
	5427	int m = ST.max - ST.min;
	5428	for (ST.maxpos = locinput;
	5429	m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
	5430	ST.maxpos += UTF8SKIP(ST.maxpos);
	5431	}
	5432	else {
	5433	ST.maxpos = locinput + ST.max - ST.min;
	5434	if (ST.maxpos >= PL_regeol)
	5435	ST.maxpos = PL_regeol - 1;
	5436	}
	5437	goto curly_try_B_min_known;
	5438
	5439	}
	5440	else {
	5441	ST.count = regrepeat(rex, ST.A, ST.max, depth);
	5442	locinput = PL_reginput;
	5443	if (ST.count < ST.min)
	5444	sayNO;
	5445	if ((ST.count > ST.min)
	5446	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	5447	{
	5448	/* A{m,n} must come at the end of the string, there's
	5449	* no point in backing off ... */
	5450	ST.min = ST.count;
	5451	/* ...except that $ and \Z can match before and after
	5452	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	5453	We may back off by one in this case. */
	5454	if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
	5455	ST.min--;
	5456	}
	5457	REGCP_SET(ST.cp);
	5458	goto curly_try_B_max;
	5459	}
	5460	assert(0); /* NOTREACHED */
	5461
	5462
	5463	case CURLY_B_min_known_fail:
	5464	/* failed to find B in a non-greedy match where c1,c2 valid */
	5465
	5466	PL_reginput = locinput; /* Could be reset... */
	5467	REGCP_UNWIND(ST.cp);
	5468	if (ST.paren) {
	5469	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5470	}
	5471	/* Couldn't or didn't -- move forward. */
	5472	ST.oldloc = locinput;
	5473	if (utf8_target)
	5474	locinput += UTF8SKIP(locinput);
	5475	else
	5476	locinput++;
	5477	ST.count++;
	5478	curly_try_B_min_known:
	5479	/* find the next place where 'B' could work, then call B */
	5480	{
	5481	int n;
	5482	if (utf8_target) {
	5483	n = (ST.oldloc == locinput) ? 0 : 1;
	5484	if (ST.c1 == ST.c2) {
	5485	STRLEN len;
	5486	/* set n to utf8_distance(oldloc, locinput) */
	5487	while (locinput <= ST.maxpos &&
	5488	utf8n_to_uvchr((U8*)locinput,
	5489	UTF8_MAXBYTES, &len,
	5490	uniflags) != (UV)ST.c1) {
	5491	locinput += len;
	5492	n++;
	5493	}
	5494	}
	5495	else {
	5496	/* set n to utf8_distance(oldloc, locinput) */
	5497	while (locinput <= ST.maxpos) {
	5498	STRLEN len;
	5499	const UV c = utf8n_to_uvchr((U8*)locinput,
	5500	UTF8_MAXBYTES, &len,
	5501	uniflags);
	5502	if (c == (UV)ST.c1 \|\| c == (UV)ST.c2)
	5503	break;
	5504	locinput += len;
	5505	n++;
	5506	}
	5507	}
	5508	}
	5509	else {
	5510	if (ST.c1 == ST.c2) {
	5511	while (locinput <= ST.maxpos &&
	5512	UCHARAT(locinput) != ST.c1)
	5513	locinput++;
	5514	}
	5515	else {
	5516	while (locinput <= ST.maxpos
	5517	&& UCHARAT(locinput) != ST.c1
	5518	&& UCHARAT(locinput) != ST.c2)
	5519	locinput++;
	5520	}
	5521	n = locinput - ST.oldloc;
	5522	}
	5523	if (locinput > ST.maxpos)
	5524	sayNO;
	5525	/* PL_reginput == oldloc now */
	5526	if (n) {
	5527	ST.count += n;
	5528	if (regrepeat(rex, ST.A, n, depth) < n)
	5529	sayNO;
	5530	}
	5531	PL_reginput = locinput;
	5532	CURLY_SETPAREN(ST.paren, ST.count);
	5533	if (cur_eval && cur_eval->u.eval.close_paren &&
	5534	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5535	goto fake_end;
	5536	}
	5537	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
	5538	}
	5539	assert(0); /* NOTREACHED */
	5540
	5541
	5542	case CURLY_B_min_fail:
	5543	/* failed to find B in a non-greedy match where c1,c2 invalid */
	5544
	5545	REGCP_UNWIND(ST.cp);
	5546	if (ST.paren) {
	5547	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5548	}
	5549	/* failed -- move forward one */
	5550	PL_reginput = locinput;
	5551	if (regrepeat(rex, ST.A, 1, depth)) {
	5552	ST.count++;
	5553	locinput = PL_reginput;
	5554	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	5555	ST.count > 0)) /* count overflow ? */
	5556	{
	5557	curly_try_B_min:
	5558	CURLY_SETPAREN(ST.paren, ST.count);
	5559	if (cur_eval && cur_eval->u.eval.close_paren &&
	5560	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5561	goto fake_end;
	5562	}
	5563	PUSH_STATE_GOTO(CURLY_B_min, ST.B);
	5564	}
	5565	}
	5566	sayNO;
	5567	assert(0); /* NOTREACHED */
	5568
	5569
	5570	curly_try_B_max:
	5571	/* a successful greedy match: now try to match B */
	5572	if (cur_eval && cur_eval->u.eval.close_paren &&
	5573	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5574	goto fake_end;
	5575	}
	5576	{
	5577	UV c = 0;
	5578	if (ST.c1 != CHRTEST_VOID)
	5579	c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
	5580	UTF8_MAXBYTES, 0, uniflags)
	5581	: (UV) UCHARAT(PL_reginput);
	5582	/* If it could work, try it. */
	5583	if (ST.c1 == CHRTEST_VOID \|\| c == (UV)ST.c1 \|\| c == (UV)ST.c2) {
	5584	CURLY_SETPAREN(ST.paren, ST.count);
	5585	PUSH_STATE_GOTO(CURLY_B_max, ST.B);
	5586	assert(0); /* NOTREACHED */
	5587	}
	5588	}
	5589	/* FALL THROUGH */
	5590	case CURLY_B_max_fail:
	5591	/* failed to find B in a greedy match */
	5592
	5593	REGCP_UNWIND(ST.cp);
	5594	if (ST.paren) {
	5595	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5596	}
	5597	/* back up. */
	5598	if (--ST.count < ST.min)
	5599	sayNO;
	5600	PL_reginput = locinput = HOPc(locinput, -1);
	5601	goto curly_try_B_max;
	5602
	5603	#undef ST
	5604
	5605	case END:
	5606	fake_end:
	5607	if (cur_eval) {
	5608	/* we've just finished A in /(??{A})B/; now continue with B */
	5609	st->u.eval.toggle_reg_flags
	5610	= cur_eval->u.eval.toggle_reg_flags;
	5611	PL_reg_flags ^= st->u.eval.toggle_reg_flags;
	5612
	5613	st->u.eval.prev_rex = rex_sv; /* inner */
	5614	st->u.eval.cp = regcppush(rex, 0); /* Save all the positions. */
	5615	rex_sv = cur_eval->u.eval.prev_rex;
	5616	SET_reg_curpm(rex_sv);
	5617	rex = (struct regexp *)SvANY(rex_sv);
	5618	rexi = RXi_GET(rex);
	5619	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	5620
	5621	REGCP_SET(st->u.eval.lastcp);
	5622	PL_reginput = locinput;
	5623
	5624	/* Restore parens of the outer rex without popping the
	5625	* savestack */
	5626	S_regcp_restore(aTHX_ rex, cur_eval->u.eval.lastcp);
	5627
	5628	st->u.eval.prev_eval = cur_eval;
	5629	cur_eval = cur_eval->u.eval.prev_eval;
	5630	DEBUG_EXECUTE_r(
	5631	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	5632	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	5633	if ( nochange_depth )
	5634	nochange_depth--;
	5635
	5636	PUSH_YES_STATE_GOTO(EVAL_AB,
	5637	st->u.eval.prev_eval->u.eval.B); /* match B */
	5638	}
	5639
	5640	if (locinput < reginfo->till) {
	5641	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5642	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	5643	PL_colors[4],
	5644	(long)(locinput - PL_reg_starttry),
	5645	(long)(reginfo->till - PL_reg_starttry),
	5646	PL_colors[5]));
	5647
	5648	sayNO_SILENT; /* Cannot match: too short. */
	5649	}
	5650	PL_reginput = locinput; /* put where regtry can find it */
	5651	sayYES; /* Success! */
	5652
	5653	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	5654	DEBUG_EXECUTE_r(
	5655	PerlIO_printf(Perl_debug_log,
	5656	"%*s %ssubpattern success...%s\n",
	5657	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	5658	PL_reginput = locinput; /* put where regtry can find it */
	5659	sayYES; /* Success! */
	5660
	5661	#undef ST
	5662	#define ST st->u.ifmatch
	5663
	5664	case SUSPEND: /* (?>A) */
	5665	ST.wanted = 1;
	5666	PL_reginput = locinput;
	5667	goto do_ifmatch;
	5668
	5669	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	5670	ST.wanted = 0;
	5671	goto ifmatch_trivial_fail_test;
	5672
	5673	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	5674	ST.wanted = 1;
	5675	ifmatch_trivial_fail_test:
	5676	if (scan->flags) {
	5677	char * const s = HOPBACKc(locinput, scan->flags);
	5678	if (!s) {
	5679	/* trivial fail */
	5680	if (logical) {
	5681	logical = 0;
	5682	sw = 1 - cBOOL(ST.wanted);
	5683	}
	5684	else if (ST.wanted)
	5685	sayNO;
	5686	next = scan + ARG(scan);
	5687	if (next == scan)
	5688	next = NULL;
	5689	break;
	5690	}
	5691	PL_reginput = s;
	5692	}
	5693	else
	5694	PL_reginput = locinput;
	5695
	5696	do_ifmatch:
	5697	ST.me = scan;
	5698	ST.logical = logical;
	5699	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	5700
	5701	/* execute body of (?...A) */
	5702	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
	5703	assert(0); /* NOTREACHED */
	5704
	5705	case IFMATCH_A_fail: /* body of (?...A) failed */
	5706	ST.wanted = !ST.wanted;
	5707	/* FALL THROUGH */
	5708
	5709	case IFMATCH_A: /* body of (?...A) succeeded */
	5710	if (ST.logical) {
	5711	sw = cBOOL(ST.wanted);
	5712	}
	5713	else if (!ST.wanted)
	5714	sayNO;
	5715
	5716	if (OP(ST.me) == SUSPEND)
	5717	locinput = PL_reginput;
	5718	else {
	5719	locinput = PL_reginput = st->locinput;
	5720	nextchr = UCHARAT(locinput);
	5721	}
	5722	scan = ST.me + ARG(ST.me);
	5723	if (scan == ST.me)
	5724	scan = NULL;
	5725	continue; /* execute B */
	5726
	5727	#undef ST
	5728
	5729	case LONGJMP:
	5730	next = scan + ARG(scan);
	5731	if (next == scan)
	5732	next = NULL;
	5733	break;
	5734	case COMMIT:
	5735	reginfo->cutpoint = PL_regeol;
	5736	/* FALLTHROUGH */
	5737	case PRUNE:
	5738	PL_reginput = locinput;
	5739	if (!scan->flags)
	5740	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5741	PUSH_STATE_GOTO(COMMIT_next,next);
	5742	assert(0); /* NOTREACHED */
	5743	case COMMIT_next_fail:
	5744	no_final = 1;
	5745	/* FALLTHROUGH */
	5746	case OPFAIL:
	5747	sayNO;
	5748	assert(0); /* NOTREACHED */
	5749
	5750	#define ST st->u.mark
	5751	case MARKPOINT:
	5752	ST.prev_mark = mark_state;
	5753	ST.mark_name = sv_commit = sv_yes_mark
	5754	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5755	mark_state = st;
	5756	ST.mark_loc = PL_reginput = locinput;
	5757	PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
	5758	assert(0); /* NOTREACHED */
	5759	case MARKPOINT_next:
	5760	mark_state = ST.prev_mark;
	5761	sayYES;
	5762	assert(0); /* NOTREACHED */
	5763	case MARKPOINT_next_fail:
	5764	if (popmark && sv_eq(ST.mark_name,popmark))
	5765	{
	5766	if (ST.mark_loc > startpoint)
	5767	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5768	popmark = NULL; /* we found our mark */
	5769	sv_commit = ST.mark_name;
	5770
	5771	DEBUG_EXECUTE_r({
	5772	PerlIO_printf(Perl_debug_log,
	5773	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	5774	REPORT_CODE_OFF+depth*2, "",
	5775	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	5776	});
	5777	}
	5778	mark_state = ST.prev_mark;
	5779	sv_yes_mark = mark_state ?
	5780	mark_state->u.mark.mark_name : NULL;
	5781	sayNO;
	5782	assert(0); /* NOTREACHED */
	5783	case SKIP:
	5784	PL_reginput = locinput;
	5785	if (scan->flags) {
	5786	/* (SKIP) : if we fail we cut here/
	5787	ST.mark_name = NULL;
	5788	ST.mark_loc = locinput;
	5789	PUSH_STATE_GOTO(SKIP_next,next);
	5790	} else {
	5791	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	5792	otherwise do nothing. Meaning we need to scan
	5793	*/
	5794	regmatch_state *cur = mark_state;
	5795	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5796
	5797	while (cur) {
	5798	if ( sv_eq( cur->u.mark.mark_name,
	5799	find ) )
	5800	{
	5801	ST.mark_name = find;
	5802	PUSH_STATE_GOTO( SKIP_next, next );
	5803	}
	5804	cur = cur->u.mark.prev_mark;
	5805	}
	5806	}
	5807	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	5808	break;
	5809	case SKIP_next_fail:
	5810	if (ST.mark_name) {
	5811	/* (*CUT:NAME) - Set up to search for the name as we
	5812	collapse the stack*/
	5813	popmark = ST.mark_name;
	5814	} else {
	5815	/* (CUT) - No name, we cut here./
	5816	if (ST.mark_loc > startpoint)
	5817	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5818	/* but we set sv_commit to latest mark_name if there
	5819	is one so they can test to see how things lead to this
	5820	cut */
	5821	if (mark_state)
	5822	sv_commit=mark_state->u.mark.mark_name;
	5823	}
	5824	no_final = 1;
	5825	sayNO;
	5826	assert(0); /* NOTREACHED */
	5827	#undef ST
	5828	case LNBREAK:
	5829	if ((n=is_LNBREAK(locinput,utf8_target))) {
	5830	locinput += n;
	5831	nextchr = UCHARAT(locinput);
	5832	} else
	5833	sayNO;
	5834	break;
	5835
	5836	#define CASE_CLASS(nAmE) \
	5837	case nAmE: \
	5838	if (locinput >= PL_regeol) \
	5839	sayNO; \
	5840	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5841	locinput += n; \
	5842	nextchr = UCHARAT(locinput); \
	5843	} else \
	5844	sayNO; \
	5845	break; \
	5846	case N##nAmE: \
	5847	if (locinput >= PL_regeol) \
	5848	sayNO; \
	5849	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5850	sayNO; \
	5851	} else { \
	5852	locinput += UTF8SKIP(locinput); \
	5853	nextchr = UCHARAT(locinput); \
	5854	} \
	5855	break
	5856
	5857	CASE_CLASS(VERTWS);
	5858	CASE_CLASS(HORIZWS);
	5859	#undef CASE_CLASS
	5860
	5861	default:
	5862	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	5863	PTR2UV(scan), OP(scan));
	5864	Perl_croak(aTHX_ "regexp memory corruption");
	5865
	5866	} /* end switch */
	5867
	5868	/* switch break jumps here */
	5869	scan = next; /* prepare to execute the next op and ... */
	5870	continue; /* ... jump back to the top, reusing st */
	5871	assert(0); /* NOTREACHED */
	5872
	5873	push_yes_state:
	5874	/* push a state that backtracks on success */
	5875	st->u.yes.prev_yes_state = yes_state;
	5876	yes_state = st;
	5877	/* FALL THROUGH */
	5878	push_state:
	5879	/* push a new regex state, then continue at scan */
	5880	{
	5881	regmatch_state *newst;
	5882
	5883	DEBUG_STACK_r({
	5884	regmatch_state *cur = st;
	5885	regmatch_state *curyes = yes_state;
	5886	int curd = depth;
	5887	regmatch_slab *slab = PL_regmatch_slab;
	5888	for (;curd > -1;cur--,curd--) {
	5889	if (cur < SLAB_FIRST(slab)) {
	5890	slab = slab->prev;
	5891	cur = SLAB_LAST(slab);
	5892	}
	5893	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	5894	REPORT_CODE_OFF + 2 + depth * 2,"",
	5895	curd, PL_reg_name[cur->resume_state],
	5896	(curyes == cur) ? "yes" : ""
	5897	);
	5898	if (curyes == cur)
	5899	curyes = cur->u.yes.prev_yes_state;
	5900	}
	5901	} else
	5902	DEBUG_STATE_pp("push")
	5903	);
	5904	depth++;
	5905	st->locinput = locinput;
	5906	newst = st+1;
	5907	if (newst > SLAB_LAST(PL_regmatch_slab))
	5908	newst = S_push_slab(aTHX);
	5909	PL_regmatch_state = newst;
	5910
	5911	locinput = PL_reginput;
	5912	nextchr = UCHARAT(locinput);
	5913	st = newst;
	5914	continue;
	5915	assert(0); /* NOTREACHED */
	5916	}
	5917	}
	5918
	5919	/*
	5920	* We get here only if there's trouble -- normally "case END" is
	5921	* the terminating point.
	5922	*/
	5923	Perl_croak(aTHX_ "corrupted regexp pointers");
	5924	/NOTREACHED/
	5925	sayNO;
	5926
	5927	yes:
	5928	if (yes_state) {
	5929	/* we have successfully completed a subexpression, but we must now
	5930	* pop to the state marked by yes_state and continue from there */
	5931	assert(st != yes_state);
	5932	#ifdef DEBUGGING
	5933	while (st != yes_state) {
	5934	st--;
	5935	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5936	PL_regmatch_slab = PL_regmatch_slab->prev;
	5937	st = SLAB_LAST(PL_regmatch_slab);
	5938	}
	5939	DEBUG_STATE_r({
	5940	if (no_final) {
	5941	DEBUG_STATE_pp("pop (no final)");
	5942	} else {
	5943	DEBUG_STATE_pp("pop (yes)");
	5944	}
	5945	});
	5946	depth--;
	5947	}
	5948	#else
	5949	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	5950	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	5951	{
	5952	/* not in this slab, pop slab */
	5953	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	5954	PL_regmatch_slab = PL_regmatch_slab->prev;
	5955	st = SLAB_LAST(PL_regmatch_slab);
	5956	}
	5957	depth -= (st - yes_state);
	5958	#endif
	5959	st = yes_state;
	5960	yes_state = st->u.yes.prev_yes_state;
	5961	PL_regmatch_state = st;
	5962
	5963	if (no_final) {
	5964	locinput= st->locinput;
	5965	nextchr = UCHARAT(locinput);
	5966	}
	5967	state_num = st->resume_state + no_final;
	5968	goto reenter_switch;
	5969	}
	5970
	5971	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	5972	PL_colors[4], PL_colors[5]));
	5973
	5974	if (PL_reg_state.re_state_eval_setup_done) {
	5975	/* each successfully executed (?{...}) block does the equivalent of
	5976	* local $^R = do {...}
	5977	* When popping the save stack, all these locals would be undone;
	5978	* bypass this by setting the outermost saved $^R to the latest
	5979	* value */
	5980	if (oreplsv != GvSV(PL_replgv))
	5981	sv_setsv(oreplsv, GvSV(PL_replgv));
	5982	}
	5983	result = 1;
	5984	goto final_exit;
	5985
	5986	no:
	5987	DEBUG_EXECUTE_r(
	5988	PerlIO_printf(Perl_debug_log,
	5989	"%*s %sfailed...%s\n",
	5990	REPORT_CODE_OFF+depth*2, "",
	5991	PL_colors[4], PL_colors[5])
	5992	);
	5993
	5994	no_silent:
	5995	if (no_final) {
	5996	if (yes_state) {
	5997	goto yes;
	5998	} else {
	5999	goto final_exit;
	6000	}
	6001	}
	6002	if (depth) {
	6003	/* there's a previous state to backtrack to */
	6004	st--;
	6005	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6006	PL_regmatch_slab = PL_regmatch_slab->prev;
	6007	st = SLAB_LAST(PL_regmatch_slab);
	6008	}
	6009	PL_regmatch_state = st;
	6010	locinput= st->locinput;
	6011	nextchr = UCHARAT(locinput);
	6012
	6013	DEBUG_STATE_pp("pop");
	6014	depth--;
	6015	if (yes_state == st)
	6016	yes_state = st->u.yes.prev_yes_state;
	6017
	6018	state_num = st->resume_state + 1; /* failure = success + 1 */
	6019	goto reenter_switch;
	6020	}
	6021	result = 0;
	6022
	6023	final_exit:
	6024	if (rex->intflags & PREGf_VERBARG_SEEN) {
	6025	SV *sv_err = get_sv("REGERROR", 1);
	6026	SV *sv_mrk = get_sv("REGMARK", 1);
	6027	if (result) {
	6028	sv_commit = &PL_sv_no;
	6029	if (!sv_yes_mark)
	6030	sv_yes_mark = &PL_sv_yes;
	6031	} else {
	6032	if (!sv_commit)
	6033	sv_commit = &PL_sv_yes;
	6034	sv_yes_mark = &PL_sv_no;
	6035	}
	6036	sv_setsv(sv_err, sv_commit);
	6037	sv_setsv(sv_mrk, sv_yes_mark);
	6038	}
	6039
	6040
	6041	if (last_pushed_cv) {
	6042	dSP;
	6043	POP_MULTICALL;
	6044	PERL_UNUSED_VAR(SP);
	6045	}
	6046
	6047	/* clean up; in particular, free all slabs above current one */
	6048	LEAVE_SCOPE(oldsave);
	6049
	6050	return result;
	6051	}
	6052
	6053	/*
	6054	- regrepeat - repeatedly match something simple, report how many
	6055	*/
	6056	/*
	6057	* [This routine now assumes that it will only match on things of length 1.
	6058	* That was true before, but now we assume scan - reginput is the count,
	6059	* rather than incrementing count on every character. [Er, except utf8.]]
	6060	*/
	6061	STATIC I32
	6062	S_regrepeat(pTHX_ const regexp prog, const regnode p, I32 max, int depth)
	6063	{
	6064	dVAR;
	6065	register char *scan;
	6066	register I32 c;
	6067	register char *loceol = PL_regeol;
	6068	register I32 hardcount = 0;
	6069	register bool utf8_target = PL_reg_match_utf8;
	6070	UV utf8_flags;
	6071	#ifndef DEBUGGING
	6072	PERL_UNUSED_ARG(depth);
	6073	#endif
	6074
	6075	PERL_ARGS_ASSERT_REGREPEAT;
	6076
	6077	scan = PL_reginput;
	6078	if (max == REG_INFTY)
	6079	max = I32_MAX;
	6080	else if (max < loceol - scan)
	6081	loceol = scan + max;
	6082	switch (OP(p)) {
	6083	case REG_ANY:
	6084	if (utf8_target) {
	6085	loceol = PL_regeol;
	6086	while (scan < loceol && hardcount < max && *scan != '\n') {
	6087	scan += UTF8SKIP(scan);
	6088	hardcount++;
	6089	}
	6090	} else {
	6091	while (scan < loceol && *scan != '\n')
	6092	scan++;
	6093	}
	6094	break;
	6095	case SANY:
	6096	if (utf8_target) {
	6097	loceol = PL_regeol;
	6098	while (scan < loceol && hardcount < max) {
	6099	scan += UTF8SKIP(scan);
	6100	hardcount++;
	6101	}
	6102	}
	6103	else
	6104	scan = loceol;
	6105	break;
	6106	case CANY:
	6107	scan = loceol;
	6108	break;
	6109	case EXACT:
	6110	/* To get here, EXACTish nodes must have byte length == 1. That
	6111	* means they match only characters in the string that can be expressed
	6112	* as a single byte. For non-utf8 strings, that means a simple match.
	6113	* For utf8 strings, the character matched must be an invariant, or
	6114	* downgradable to a single byte. The pattern's utf8ness is
	6115	* irrelevant, as since it's a single byte, it either isn't utf8, or if
	6116	* it is, it's an invariant */
	6117
	6118	c = (U8)*STRING(p);
	6119	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	6120
	6121	if (! utf8_target \|\| UNI_IS_INVARIANT(c)) {
	6122	while (scan < loceol && UCHARAT(scan) == c) {
	6123	scan++;
	6124	}
	6125	}
	6126	else {
	6127
	6128	/* Here, the string is utf8, and the pattern char is different
	6129	* in utf8 than not, so can't compare them directly. Outside the
	6130	* loop, find the two utf8 bytes that represent c, and then
	6131	* look for those in sequence in the utf8 string */
	6132	U8 high = UTF8_TWO_BYTE_HI(c);
	6133	U8 low = UTF8_TWO_BYTE_LO(c);
	6134	loceol = PL_regeol;
	6135
	6136	while (hardcount < max
	6137	&& scan + 1 < loceol
	6138	&& UCHARAT(scan) == high
	6139	&& UCHARAT(scan + 1) == low)
	6140	{
	6141	scan += 2;
	6142	hardcount++;
	6143	}
	6144	}
	6145	break;
	6146	case EXACTFA:
	6147	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6148	goto do_exactf;
	6149
	6150	case EXACTFL:
	6151	PL_reg_flags \|= RF_tainted;
	6152	utf8_flags = FOLDEQ_UTF8_LOCALE;
	6153	goto do_exactf;
	6154
	6155	case EXACTF:
	6156	utf8_flags = 0;
	6157	goto do_exactf;
	6158
	6159	case EXACTFU_SS:
	6160	case EXACTFU_TRICKYFOLD:
	6161	case EXACTFU:
	6162	utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	6163
	6164	/* The comments for the EXACT case above apply as well to these fold
	6165	* ones */
	6166
	6167	do_exactf:
	6168	c = (U8)*STRING(p);
	6169	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	6170
	6171	if (utf8_target \|\| OP(p) == EXACTFU_SS) { /* Use full Unicode fold matching */
	6172	char *tmpeol = loceol;
	6173	while (hardcount < max
	6174	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	6175	STRING(p), NULL, 1, cBOOL(UTF_PATTERN), utf8_flags))
	6176	{
	6177	scan = tmpeol;
	6178	tmpeol = loceol;
	6179	hardcount++;
	6180	}
	6181
	6182	/* XXX Note that the above handles properly the German sharp s in
	6183	* the pattern matching ss in the string. But it doesn't handle
	6184	* properly cases where the string contains say 'LIGATURE ff' and
	6185	* the pattern is 'f+'. This would require, say, a new function or
	6186	* revised interface to foldEQ_utf8(), in which the maximum number
	6187	* of characters to match could be passed and it would return how
	6188	* many actually did. This is just one of many cases where
	6189	* multi-char folds don't work properly, and so the fix is being
	6190	* deferred */
	6191	}
	6192	else {
	6193	U8 folded;
	6194
	6195	/* Here, the string isn't utf8 and c is a single byte; and either
	6196	* the pattern isn't utf8 or c is an invariant, so its utf8ness
	6197	* doesn't affect c. Can just do simple comparisons for exact or
	6198	* fold matching. */
	6199	switch (OP(p)) {
	6200	case EXACTF: folded = PL_fold[c]; break;
	6201	case EXACTFA:
	6202	case EXACTFU_TRICKYFOLD:
	6203	case EXACTFU: folded = PL_fold_latin1[c]; break;
	6204	case EXACTFL: folded = PL_fold_locale[c]; break;
	6205	default: Perl_croak(aTHX_ "panic: Unexpected op %u", OP(p));
	6206	}
	6207	while (scan < loceol &&
	6208	(UCHARAT(scan) == c \|\| UCHARAT(scan) == folded))
	6209	{
	6210	scan++;
	6211	}
	6212	}
	6213	break;
	6214	case ANYOFV:
	6215	case ANYOF:
	6216	if (utf8_target \|\| OP(p) == ANYOFV) {
	6217	STRLEN inclasslen;
	6218	loceol = PL_regeol;
	6219	inclasslen = loceol - scan;
	6220	while (hardcount < max
	6221	&& ((inclasslen = loceol - scan) > 0)
	6222	&& reginclass(prog, p, (U8*)scan, &inclasslen, utf8_target))
	6223	{
	6224	scan += inclasslen;
	6225	hardcount++;
	6226	}
	6227	} else {
	6228	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	6229	scan++;
	6230	}
	6231	break;
	6232	case ALNUMU:
	6233	if (utf8_target) {
	6234	utf8_wordchar:
	6235	loceol = PL_regeol;
	6236	LOAD_UTF8_CHARCLASS_ALNUM();
	6237	while (hardcount < max && scan < loceol &&
	6238	swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6239	{
	6240	scan += UTF8SKIP(scan);
	6241	hardcount++;
	6242	}
	6243	} else {
	6244	while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
	6245	scan++;
	6246	}
	6247	}
	6248	break;
	6249	case ALNUM:
	6250	if (utf8_target)
	6251	goto utf8_wordchar;
	6252	while (scan < loceol && isALNUM((U8) *scan)) {
	6253	scan++;
	6254	}
	6255	break;
	6256	case ALNUMA:
	6257	while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
	6258	scan++;
	6259	}
	6260	break;
	6261	case ALNUML:
	6262	PL_reg_flags \|= RF_tainted;
	6263	if (utf8_target) {
	6264	loceol = PL_regeol;
	6265	while (hardcount < max && scan < loceol &&
	6266	isALNUM_LC_utf8((U8*)scan)) {
	6267	scan += UTF8SKIP(scan);
	6268	hardcount++;
	6269	}
	6270	} else {
	6271	while (scan < loceol && isALNUM_LC(*scan))
	6272	scan++;
	6273	}
	6274	break;
	6275	case NALNUMU:
	6276	if (utf8_target) {
	6277
	6278	utf8_Nwordchar:
	6279
	6280	loceol = PL_regeol;
	6281	LOAD_UTF8_CHARCLASS_ALNUM();
	6282	while (hardcount < max && scan < loceol &&
	6283	! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6284	{
	6285	scan += UTF8SKIP(scan);
	6286	hardcount++;
	6287	}
	6288	} else {
	6289	while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
	6290	scan++;
	6291	}
	6292	}
	6293	break;
	6294	case NALNUM:
	6295	if (utf8_target)
	6296	goto utf8_Nwordchar;
	6297	while (scan < loceol && ! isALNUM((U8) *scan)) {
	6298	scan++;
	6299	}
	6300	break;
	6301	case NALNUMA:
	6302	if (utf8_target) {
	6303	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6304	scan += UTF8SKIP(scan);
	6305	}
	6306	}
	6307	else {
	6308	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6309	scan++;
	6310	}
	6311	}
	6312	break;
	6313	case NALNUML:
	6314	PL_reg_flags \|= RF_tainted;
	6315	if (utf8_target) {
	6316	loceol = PL_regeol;
	6317	while (hardcount < max && scan < loceol &&
	6318	!isALNUM_LC_utf8((U8*)scan)) {
	6319	scan += UTF8SKIP(scan);
	6320	hardcount++;
	6321	}
	6322	} else {
	6323	while (scan < loceol && !isALNUM_LC(*scan))
	6324	scan++;
	6325	}
	6326	break;
	6327	case SPACEU:
	6328	if (utf8_target) {
	6329
	6330	utf8_space:
	6331
	6332	loceol = PL_regeol;
	6333	LOAD_UTF8_CHARCLASS_SPACE();
	6334	while (hardcount < max && scan < loceol &&
	6335	(*scan == ' ' \|\|
	6336	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6337	{
	6338	scan += UTF8SKIP(scan);
	6339	hardcount++;
	6340	}
	6341	break;
	6342	}
	6343	else {
	6344	while (scan < loceol && isSPACE_L1((U8) *scan)) {
	6345	scan++;
	6346	}
	6347	break;
	6348	}
	6349	case SPACE:
	6350	if (utf8_target)
	6351	goto utf8_space;
	6352
	6353	while (scan < loceol && isSPACE((U8) *scan)) {
	6354	scan++;
	6355	}
	6356	break;
	6357	case SPACEA:
	6358	while (scan < loceol && isSPACE_A((U8) *scan)) {
	6359	scan++;
	6360	}
	6361	break;
	6362	case SPACEL:
	6363	PL_reg_flags \|= RF_tainted;
	6364	if (utf8_target) {
	6365	loceol = PL_regeol;
	6366	while (hardcount < max && scan < loceol &&
	6367	isSPACE_LC_utf8((U8*)scan)) {
	6368	scan += UTF8SKIP(scan);
	6369	hardcount++;
	6370	}
	6371	} else {
	6372	while (scan < loceol && isSPACE_LC(*scan))
	6373	scan++;
	6374	}
	6375	break;
	6376	case NSPACEU:
	6377	if (utf8_target) {
	6378
	6379	utf8_Nspace:
	6380
	6381	loceol = PL_regeol;
	6382	LOAD_UTF8_CHARCLASS_SPACE();
	6383	while (hardcount < max && scan < loceol &&
	6384	! (*scan == ' ' \|\|
	6385	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6386	{
	6387	scan += UTF8SKIP(scan);
	6388	hardcount++;
	6389	}
	6390	break;
	6391	}
	6392	else {
	6393	while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
	6394	scan++;
	6395	}
	6396	}
	6397	break;
	6398	case NSPACE:
	6399	if (utf8_target)
	6400	goto utf8_Nspace;
	6401
	6402	while (scan < loceol && ! isSPACE((U8) *scan)) {
	6403	scan++;
	6404	}
	6405	break;
	6406	case NSPACEA:
	6407	if (utf8_target) {
	6408	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6409	scan += UTF8SKIP(scan);
	6410	}
	6411	}
	6412	else {
	6413	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6414	scan++;
	6415	}
	6416	}
	6417	break;
	6418	case NSPACEL:
	6419	PL_reg_flags \|= RF_tainted;
	6420	if (utf8_target) {
	6421	loceol = PL_regeol;
	6422	while (hardcount < max && scan < loceol &&
	6423	!isSPACE_LC_utf8((U8*)scan)) {
	6424	scan += UTF8SKIP(scan);
	6425	hardcount++;
	6426	}
	6427	} else {
	6428	while (scan < loceol && !isSPACE_LC(*scan))
	6429	scan++;
	6430	}
	6431	break;
	6432	case DIGIT:
	6433	if (utf8_target) {
	6434	loceol = PL_regeol;
	6435	LOAD_UTF8_CHARCLASS_DIGIT();
	6436	while (hardcount < max && scan < loceol &&
	6437	swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6438	scan += UTF8SKIP(scan);
	6439	hardcount++;
	6440	}
	6441	} else {
	6442	while (scan < loceol && isDIGIT(*scan))
	6443	scan++;
	6444	}
	6445	break;
	6446	case DIGITA:
	6447	while (scan < loceol && isDIGIT_A((U8) *scan)) {
	6448	scan++;
	6449	}
	6450	break;
	6451	case DIGITL:
	6452	PL_reg_flags \|= RF_tainted;
	6453	if (utf8_target) {
	6454	loceol = PL_regeol;
	6455	while (hardcount < max && scan < loceol &&
	6456	isDIGIT_LC_utf8((U8*)scan)) {
	6457	scan += UTF8SKIP(scan);
	6458	hardcount++;
	6459	}
	6460	} else {
	6461	while (scan < loceol && isDIGIT_LC(*scan))
	6462	scan++;
	6463	}
	6464	break;
	6465	case NDIGIT:
	6466	if (utf8_target) {
	6467	loceol = PL_regeol;
	6468	LOAD_UTF8_CHARCLASS_DIGIT();
	6469	while (hardcount < max && scan < loceol &&
	6470	!swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6471	scan += UTF8SKIP(scan);
	6472	hardcount++;
	6473	}
	6474	} else {
	6475	while (scan < loceol && !isDIGIT(*scan))
	6476	scan++;
	6477	}
	6478	break;
	6479	case NDIGITA:
	6480	if (utf8_target) {
	6481	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6482	scan += UTF8SKIP(scan);
	6483	}
	6484	}
	6485	else {
	6486	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6487	scan++;
	6488	}
	6489	}
	6490	break;
	6491	case NDIGITL:
	6492	PL_reg_flags \|= RF_tainted;
	6493	if (utf8_target) {
	6494	loceol = PL_regeol;
	6495	while (hardcount < max && scan < loceol &&
	6496	!isDIGIT_LC_utf8((U8*)scan)) {
	6497	scan += UTF8SKIP(scan);
	6498	hardcount++;
	6499	}
	6500	} else {
	6501	while (scan < loceol && !isDIGIT_LC(*scan))
	6502	scan++;
	6503	}
	6504	break;
	6505	case LNBREAK:
	6506	if (utf8_target) {
	6507	loceol = PL_regeol;
	6508	while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
	6509	scan += c;
	6510	hardcount++;
	6511	}
	6512	} else {
	6513	/*
	6514	LNBREAK can match two latin chars, which is ok,
	6515	because we have a null terminated string, but we
	6516	have to use hardcount in this situation
	6517	*/
	6518	while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
	6519	scan+=c;
	6520	hardcount++;
	6521	}
	6522	}
	6523	break;
	6524	case HORIZWS:
	6525	if (utf8_target) {
	6526	loceol = PL_regeol;
	6527	while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
	6528	scan += c;
	6529	hardcount++;
	6530	}
	6531	} else {
	6532	while (scan < loceol && is_HORIZWS_latin1(scan))
	6533	scan++;
	6534	}
	6535	break;
	6536	case NHORIZWS:
	6537	if (utf8_target) {
	6538	loceol = PL_regeol;
	6539	while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
	6540	scan += UTF8SKIP(scan);
	6541	hardcount++;
	6542	}
	6543	} else {
	6544	while (scan < loceol && !is_HORIZWS_latin1(scan))
	6545	scan++;
	6546
	6547	}
	6548	break;
	6549	case VERTWS:
	6550	if (utf8_target) {
	6551	loceol = PL_regeol;
	6552	while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
	6553	scan += c;
	6554	hardcount++;
	6555	}
	6556	} else {
	6557	while (scan < loceol && is_VERTWS_latin1(scan))
	6558	scan++;
	6559
	6560	}
	6561	break;
	6562	case NVERTWS:
	6563	if (utf8_target) {
	6564	loceol = PL_regeol;
	6565	while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
	6566	scan += UTF8SKIP(scan);
	6567	hardcount++;
	6568	}
	6569	} else {
	6570	while (scan < loceol && !is_VERTWS_latin1(scan))
	6571	scan++;
	6572
	6573	}
	6574	break;
	6575
	6576	default: /* Called on something of 0 width. */
	6577	break; /* So match right here or not at all. */
	6578	}
	6579
	6580	if (hardcount)
	6581	c = hardcount;
	6582	else
	6583	c = scan - PL_reginput;
	6584	PL_reginput = scan;
	6585
	6586	DEBUG_r({
	6587	GET_RE_DEBUG_FLAGS_DECL;
	6588	DEBUG_EXECUTE_r({
	6589	SV * const prop = sv_newmortal();
	6590	regprop(prog, prop, p);
	6591	PerlIO_printf(Perl_debug_log,
	6592	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	6593	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	6594	});
	6595	});
	6596
	6597	return(c);
	6598	}
	6599
	6600
	6601	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	6602	/*
	6603	- regclass_swash - prepare the utf8 swash. Wraps the shared core version to
	6604	create a copy so that changes the caller makes won't change the shared one
	6605	*/
	6606	SV *
	6607	Perl_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV listsvp, SV altsvp)
	6608	{
	6609	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	6610	return newSVsv(core_regclass_swash(prog, node, doinit, listsvp, altsvp));
	6611	}
	6612	#endif
	6613
	6614	STATIC SV *
	6615	S_core_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV listsvp, SV altsvp)
	6616	{
	6617	/* Returns the swash for the input 'node' in the regex 'prog'.
	6618	* If <doinit> is true, will attempt to create the swash if not already
	6619	* done.
	6620	* If <listsvp> is non-null, will return the swash initialization string in
	6621	* it.
	6622	* If <altsvp> is non-null, will return the alternates to the regular swash
	6623	* in it
	6624	* Tied intimately to how regcomp.c sets up the data structure */
	6625
	6626	dVAR;
	6627	SV *sw = NULL;
	6628	SV *si = NULL;
	6629	SV *alt = NULL;
	6630	SV* invlist = NULL;
	6631
	6632	RXi_GET_DECL(prog,progi);
	6633	const struct reg_data * const data = prog ? progi->data : NULL;
	6634
	6635	PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH;
	6636
	6637	assert(ANYOF_NONBITMAP(node));
	6638
	6639	if (data && data->count) {
	6640	const U32 n = ARG(node);
	6641
	6642	if (data->what[n] == 's') {
	6643	SV * const rv = MUTABLE_SV(data->data[n]);
	6644	AV * const av = MUTABLE_AV(SvRV(rv));
	6645	SV **const ary = AvARRAY(av);
	6646	bool invlist_has_user_defined_property;
	6647
	6648	si = ary; / ary[0] = the string to initialize the swash with */
	6649
	6650	/* Elements 3 and 4 are either both present or both absent. [3] is
	6651	* any inversion list generated at compile time; [4] indicates if
	6652	* that inversion list has any user-defined properties in it. */
	6653	if (av_len(av) >= 3) {
	6654	invlist = ary[3];
	6655	invlist_has_user_defined_property = cBOOL(SvUV(ary[4]));
	6656	}
	6657	else {
	6658	invlist = NULL;
	6659	invlist_has_user_defined_property = FALSE;
	6660	}
	6661
	6662	/* Element [1] is reserved for the set-up swash. If already there,
	6663	* return it; if not, create it and store it there */
	6664	if (SvROK(ary[1])) {
	6665	sw = ary[1];
	6666	}
	6667	else if (si && doinit) {
	6668
	6669	sw = _core_swash_init("utf8", /* the utf8 package */
	6670	"", /* nameless */
	6671	si,
	6672	1, /* binary */
	6673	0, /* not from tr/// */
	6674	FALSE, /* is error if can't find
	6675	property */
	6676	invlist,
	6677	invlist_has_user_defined_property);
	6678	(void)av_store(av, 1, sw);
	6679	}
	6680
	6681	/* Element [2] is for any multi-char folds. Note that is a
	6682	* fundamentally flawed design, because can't backtrack and try
	6683	* again. See [perl #89774] */
	6684	if (SvTYPE(ary[2]) == SVt_PVAV) {
	6685	alt = ary[2];
	6686	}
	6687	}
	6688	}
	6689
	6690	if (listsvp) {
	6691	SV* matches_string = newSVpvn("", 0);
	6692	SV** invlistsvp;
	6693
	6694	/* Use the swash, if any, which has to have incorporated into it all
	6695	* possibilities */
	6696	if ( sw
	6697	&& SvROK(sw)
	6698	&& SvTYPE(SvRV(sw)) == SVt_PVHV
	6699	&& (invlistsvp = hv_fetchs(MUTABLE_HV(SvRV(sw)), "INVLIST", FALSE)))
	6700	{
	6701	invlist = *invlistsvp;
	6702	}
	6703	else if (si && si != &PL_sv_undef) {
	6704
	6705	/* If no swash, use the input nitialization string, if available */
	6706	sv_catsv(matches_string, si);
	6707	}
	6708
	6709	/* Add the inversion list to whatever we have. This may have come from
	6710	* the swash, or from an input parameter */
	6711	if (invlist) {
	6712	sv_catsv(matches_string, _invlist_contents(invlist));
	6713	}
	6714	*listsvp = matches_string;
	6715	}
	6716
	6717	if (altsvp)
	6718	*altsvp = alt;
	6719
	6720	return sw;
	6721	}
	6722
	6723	/*
	6724	- reginclass - determine if a character falls into a character class
	6725
	6726	n is the ANYOF regnode
	6727	p is the target string
	6728	lenp is pointer to the maximum number of bytes of how far to go in p
	6729	(This is assumed wthout checking to always be at least the current
	6730	character's size)
	6731	utf8_target tells whether p is in UTF-8.
	6732
	6733	Returns true if matched; false otherwise. If lenp is not NULL, on return
	6734	from a successful match, the value it points to will be updated to how many
	6735	bytes in p were matched. If there was no match, the value is undefined,
	6736	possibly changed from the input.
	6737
	6738	Note that this can be a synthetic start class, a combination of various
	6739	nodes, so things you think might be mutually exclusive, such as locale,
	6740	aren't. It can match both locale and non-locale
	6741
	6742	*/
	6743
	6744	STATIC bool
	6745	S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target)
	6746	{
	6747	dVAR;
	6748	const char flags = ANYOF_FLAGS(n);
	6749	bool match = FALSE;
	6750	UV c = *p;
	6751	STRLEN c_len = 0;
	6752	STRLEN maxlen;
	6753
	6754	PERL_ARGS_ASSERT_REGINCLASS;
	6755
	6756	/* If c is not already the code point, get it */
	6757	if (utf8_target && !UTF8_IS_INVARIANT(c)) {
	6758	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
	6759	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	6760	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	6761	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	6762	* UTF8_ALLOW_FFFF */
	6763	if (c_len == (STRLEN)-1)
	6764	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	6765	}
	6766	else {
	6767	c_len = 1;
	6768	}
	6769
	6770	/* Use passed in max length, or one character if none passed in or less
	6771	* than one character. And assume will match just one character. This is
	6772	* overwritten later if matched more. */
	6773	if (lenp) {
	6774	maxlen = (lenp > c_len) ? lenp : c_len;
	6775	*lenp = c_len;
	6776
	6777	}
	6778	else {
	6779	maxlen = c_len;
	6780	}
	6781
	6782	/* If this character is potentially in the bitmap, check it */
	6783	if (c < 256) {
	6784	if (ANYOF_BITMAP_TEST(n, c))
	6785	match = TRUE;
	6786	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	6787	&& ! utf8_target
	6788	&& ! isASCII(c))
	6789	{
	6790	match = TRUE;
	6791	}
	6792
	6793	else if (flags & ANYOF_LOCALE) {
	6794	PL_reg_flags \|= RF_tainted;
	6795
	6796	if ((flags & ANYOF_LOC_NONBITMAP_FOLD)
	6797	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	6798	{
	6799	match = TRUE;
	6800	}
	6801	else if (ANYOF_CLASS_TEST_ANY_SET(n) &&
	6802	((ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) \|\|
	6803	(ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) \|\|
	6804	(ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) \|\|
	6805	(ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) \|\|
	6806	(ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) \|\|
	6807	(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) \|\|
	6808	(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) \|\|
	6809	(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) \|\|
	6810	(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) \|\|
	6811	(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) \|\|
	6812	(ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII_LC(c)) \|\|
	6813	(ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII_LC(c)) \|\|
	6814	(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) \|\|
	6815	(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) \|\|
	6816	(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) \|\|
	6817	(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) \|\|
	6818	(ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) \|\|
	6819	(ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) \|\|
	6820	(ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) \|\|
	6821	(ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) \|\|
	6822	(ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) \|\|
	6823	(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) \|\|
	6824	(ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) \|\|
	6825	(ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) \|\|
	6826	(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) \|\|
	6827	(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) \|\|
	6828	(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) \|\|
	6829	(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) \|\|
	6830	(ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK_LC(c)) \|\|
	6831	(ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK_LC(c))
	6832	) /* How's that for a conditional? */
	6833	) {
	6834	match = TRUE;
	6835	}
	6836	}
	6837	}
	6838
	6839	/* If the bitmap didn't (or couldn't) match, and something outside the
	6840	* bitmap could match, try that. Locale nodes specifiy completely the
	6841	* behavior of code points in the bit map (otherwise, a utf8 target would
	6842	* cause them to be treated as Unicode and not locale), except in
	6843	* the very unlikely event when this node is a synthetic start class, which
	6844	* could be a combination of locale and non-locale nodes. So allow locale
	6845	* to match for the synthetic start class, which will give a false
	6846	* positive that will be resolved when the match is done again as not part
	6847	* of the synthetic start class */
	6848	if (!match) {
	6849	if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
	6850	match = TRUE; /* Everything above 255 matches */
	6851	}
	6852	else if (ANYOF_NONBITMAP(n)
	6853	&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
	6854	\|\| (utf8_target
	6855	&& (c >=256
	6856	\|\| (! (flags & ANYOF_LOCALE))
	6857	\|\| (flags & ANYOF_IS_SYNTHETIC)))))
	6858	{
	6859	AV *av;
	6860	SV * const sw = core_regclass_swash(prog, n, TRUE, 0, (SV**)&av);
	6861
	6862	if (sw) {
	6863	U8 * utf8_p;
	6864	if (utf8_target) {
	6865	utf8_p = (U8 *) p;
	6866	} else {
	6867
	6868	/* Not utf8. Convert as much of the string as available up
	6869	* to the limit of how far the (single) character in the
	6870	* pattern can possibly match (no need to go further). If
	6871	* the node is a straight ANYOF or not folding, it can't
	6872	* match more than one. Otherwise, It can match up to how
	6873	* far a single char can fold to. Since not utf8, each
	6874	* character is a single byte, so the max it can be in
	6875	* bytes is the same as the max it can be in characters */
	6876	STRLEN len = (OP(n) == ANYOF
	6877	\|\| ! (flags & ANYOF_LOC_NONBITMAP_FOLD))
	6878	? 1
	6879	: (maxlen < UTF8_MAX_FOLD_CHAR_EXPAND)
	6880	? maxlen
	6881	: UTF8_MAX_FOLD_CHAR_EXPAND;
	6882	utf8_p = bytes_to_utf8(p, &len);
	6883	}
	6884
	6885	if (swash_fetch(sw, utf8_p, TRUE))
	6886	match = TRUE;
	6887	else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
	6888
	6889	/* Here, we need to test if the fold of the target string
	6890	* matches. The non-multi char folds have all been moved to
	6891	* the compilation phase, and the multi-char folds have
	6892	* been stored by regcomp into 'av'; we linearly check to
	6893	* see if any match the target string (folded). We know
	6894	* that the originals were each one character, but we don't
	6895	* currently know how many characters/bytes each folded to,
	6896	* except we do know that there are small limits imposed by
	6897	* Unicode. XXX A performance enhancement would be to have
	6898	* regcomp.c store the max number of chars/bytes that are
	6899	* in an av entry, as, say the 0th element. Even better
	6900	* would be to have a hash of the few characters that can
	6901	* start a multi-char fold to the max number of chars of
	6902	* those folds.
	6903	*
	6904	* If there is a match, we will need to advance (if lenp is
	6905	* specified) the match pointer in the target string. But
	6906	* what we are comparing here isn't that string directly,
	6907	* but its fold, whose length may differ from the original.
	6908	* As we go along in constructing the fold, therefore, we
	6909	* create a map so that we know how many bytes in the
	6910	* source to advance given that we have matched a certain
	6911	* number of bytes in the fold. This map is stored in
	6912	* 'map_fold_len_back'. Let n mean the number of bytes in
	6913	* the fold of the first character that we are folding.
	6914	* Then map_fold_len_back[n] is set to the number of bytes
	6915	* in that first character. Similarly let m be the
	6916	* corresponding number for the second character to be
	6917	* folded. Then map_fold_len_back[n+m] is set to the
	6918	* number of bytes occupied by the first two source
	6919	* characters. ... */
	6920	U8 map_fold_len_back[UTF8_MAXBYTES_CASE+1] = { 0 };
	6921	U8 folded[UTF8_MAXBYTES_CASE+1];
	6922	STRLEN foldlen = 0; /* num bytes in fold of 1st char */
	6923	STRLEN total_foldlen = 0; /* num bytes in fold of all
	6924	chars */
	6925
	6926	if (OP(n) == ANYOF \|\| maxlen == 1 \|\| ! lenp \|\| ! av) {
	6927
	6928	/* Here, only need to fold the first char of the target
	6929	* string. It the source wasn't utf8, is 1 byte long */
	6930	to_utf8_fold(utf8_p, folded, &foldlen);
	6931	total_foldlen = foldlen;
	6932	map_fold_len_back[foldlen] = (utf8_target)
	6933	? UTF8SKIP(utf8_p)
	6934	: 1;
	6935	}
	6936	else {
	6937
	6938	/* Here, need to fold more than the first char. Do so
	6939	* up to the limits */
	6940	U8* source_ptr = utf8_p; /* The source for the fold
	6941	is the regex target
	6942	string */
	6943	U8* folded_ptr = folded;
	6944	U8* e = utf8_p + maxlen; /* Can't go beyond last
	6945	available byte in the
	6946	target string */
	6947	U8 i;
	6948	for (i = 0;
	6949	i < UTF8_MAX_FOLD_CHAR_EXPAND && source_ptr < e;
	6950	i++)
	6951	{
	6952
	6953	/* Fold the next character */
	6954	U8 this_char_folded[UTF8_MAXBYTES_CASE+1];
	6955	STRLEN this_char_foldlen;
	6956	to_utf8_fold(source_ptr,
	6957	this_char_folded,
	6958	&this_char_foldlen);
	6959
	6960	/* Bail if it would exceed the byte limit for
	6961	* folding a single char. */
	6962	if (this_char_foldlen + folded_ptr - folded >
	6963	UTF8_MAXBYTES_CASE)
	6964	{
	6965	break;
	6966	}
	6967
	6968	/* Add the fold of this character */
	6969	Copy(this_char_folded,
	6970	folded_ptr,
	6971	this_char_foldlen,
	6972	U8);
	6973	source_ptr += UTF8SKIP(source_ptr);
	6974	folded_ptr += this_char_foldlen;
	6975	total_foldlen = folded_ptr - folded;
	6976
	6977	/* Create map from the number of bytes in the fold
	6978	* back to the number of bytes in the source. If
	6979	* the source isn't utf8, the byte count is just
	6980	* the number of characters so far */
	6981	map_fold_len_back[total_foldlen]
	6982	= (utf8_target)
	6983	? source_ptr - utf8_p
	6984	: i + 1;
	6985	}
	6986	*folded_ptr = '\0';
	6987	}
	6988
	6989
	6990	/* Do the linear search to see if the fold is in the list
	6991	* of multi-char folds. */
	6992	if (av) {
	6993	I32 i;
	6994	for (i = 0; i <= av_len(av); i++) {
	6995	SV* const sv = *av_fetch(av, i, FALSE);
	6996	STRLEN len;
	6997	const char * const s = SvPV_const(sv, len);
	6998
	6999	if (len <= total_foldlen
	7000	&& memEQ(s, (char*)folded, len)
	7001
	7002	/* If 0, means matched a partial char. See
	7003	* [perl #90536] */
	7004	&& map_fold_len_back[len])
	7005	{
	7006
	7007	/* Advance the target string ptr to account for
	7008	* this fold, but have to translate from the
	7009	* folded length to the corresponding source
	7010	* length. */
	7011	if (lenp) {
	7012	*lenp = map_fold_len_back[len];
	7013	}
	7014	match = TRUE;
	7015	break;
	7016	}
	7017	}
	7018	}
	7019	}
	7020
	7021	/* If we allocated a string above, free it */
	7022	if (! utf8_target) Safefree(utf8_p);
	7023	}
	7024	}
	7025	}
	7026
	7027	return (flags & ANYOF_INVERT) ? !match : match;
	7028	}
	7029
	7030	STATIC U8 *
	7031	S_reghop3(U8 s, I32 off, const U8 lim)
	7032	{
	7033	/* return the position 'off' UTF-8 characters away from 's', forward if
	7034	* 'off' >= 0, backwards if negative. But don't go outside of position
	7035	* 'lim', which better be < s if off < 0 */
	7036
	7037	dVAR;
	7038
	7039	PERL_ARGS_ASSERT_REGHOP3;
	7040
	7041	if (off >= 0) {
	7042	while (off-- && s < lim) {
	7043	/* XXX could check well-formedness here */
	7044	s += UTF8SKIP(s);
	7045	}
	7046	}
	7047	else {
	7048	while (off++ && s > lim) {
	7049	s--;
	7050	if (UTF8_IS_CONTINUED(*s)) {
	7051	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7052	s--;
	7053	}
	7054	/* XXX could check well-formedness here */
	7055	}
	7056	}
	7057	return s;
	7058	}
	7059
	7060	#ifdef XXX_dmq
	7061	/* there are a bunch of places where we use two reghop3's that should
	7062	be replaced with this routine. but since thats not done yet
	7063	we ifdef it out - dmq
	7064	*/
	7065	STATIC U8 *
	7066	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	7067	{
	7068	dVAR;
	7069
	7070	PERL_ARGS_ASSERT_REGHOP4;
	7071
	7072	if (off >= 0) {
	7073	while (off-- && s < rlim) {
	7074	/* XXX could check well-formedness here */
	7075	s += UTF8SKIP(s);
	7076	}
	7077	}
	7078	else {
	7079	while (off++ && s > llim) {
	7080	s--;
	7081	if (UTF8_IS_CONTINUED(*s)) {
	7082	while (s > llim && UTF8_IS_CONTINUATION(*s))
	7083	s--;
	7084	}
	7085	/* XXX could check well-formedness here */
	7086	}
	7087	}
	7088	return s;
	7089	}
	7090	#endif
	7091
	7092	STATIC U8 *
	7093	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	7094	{
	7095	dVAR;
	7096
	7097	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	7098
	7099	if (off >= 0) {
	7100	while (off-- && s < lim) {
	7101	/* XXX could check well-formedness here */
	7102	s += UTF8SKIP(s);
	7103	}
	7104	if (off >= 0)
	7105	return NULL;
	7106	}
	7107	else {
	7108	while (off++ && s > lim) {
	7109	s--;
	7110	if (UTF8_IS_CONTINUED(*s)) {
	7111	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7112	s--;
	7113	}
	7114	/* XXX could check well-formedness here */
	7115	}
	7116	if (off <= 0)
	7117	return NULL;
	7118	}
	7119	return s;
	7120	}
	7121
	7122	static void
	7123	restore_pos(pTHX_ void *arg)
	7124	{
	7125	dVAR;
	7126	regexp * const rex = (regexp *)arg;
	7127	if (PL_reg_state.re_state_eval_setup_done) {
	7128	if (PL_reg_oldsaved) {
	7129	rex->subbeg = PL_reg_oldsaved;
	7130	rex->sublen = PL_reg_oldsavedlen;
	7131	#ifdef PERL_OLD_COPY_ON_WRITE
	7132	rex->saved_copy = PL_nrs;
	7133	#endif
	7134	RXp_MATCH_COPIED_on(rex);
	7135	}
	7136	PL_reg_magic->mg_len = PL_reg_oldpos;
	7137	PL_reg_state.re_state_eval_setup_done = FALSE;
	7138	PL_curpm = PL_reg_oldcurpm;
	7139	}
	7140	}
	7141
	7142	STATIC void
	7143	S_to_utf8_substr(pTHX_ register regexp *prog)
	7144	{
	7145	int i = 1;
	7146
	7147	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	7148
	7149	do {
	7150	if (prog->substrs->data[i].substr
	7151	&& !prog->substrs->data[i].utf8_substr) {
	7152	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	7153	prog->substrs->data[i].utf8_substr = sv;
	7154	sv_utf8_upgrade(sv);
	7155	if (SvVALID(prog->substrs->data[i].substr)) {
	7156	if (SvTAIL(prog->substrs->data[i].substr)) {
	7157	/* Trim the trailing \n that fbm_compile added last
	7158	time. */
	7159	SvCUR_set(sv, SvCUR(sv) - 1);
	7160	/* Whilst this makes the SV technically "invalid" (as its
	7161	buffer is no longer followed by "\0") when fbm_compile()
	7162	adds the "\n" back, a "\0" is restored. */
	7163	fbm_compile(sv, FBMcf_TAIL);
	7164	} else
	7165	fbm_compile(sv, 0);
	7166	}
	7167	if (prog->substrs->data[i].substr == prog->check_substr)
	7168	prog->check_utf8 = sv;
	7169	}
	7170	} while (i--);
	7171	}
	7172
	7173	STATIC void
	7174	S_to_byte_substr(pTHX_ register regexp *prog)
	7175	{
	7176	dVAR;
	7177	int i = 1;
	7178
	7179	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	7180
	7181	do {
	7182	if (prog->substrs->data[i].utf8_substr
	7183	&& !prog->substrs->data[i].substr) {
	7184	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	7185	if (sv_utf8_downgrade(sv, TRUE)) {
	7186	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	7187	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	7188	/* Trim the trailing \n that fbm_compile added last
	7189	time. */
	7190	SvCUR_set(sv, SvCUR(sv) - 1);
	7191	fbm_compile(sv, FBMcf_TAIL);
	7192	} else
	7193	fbm_compile(sv, 0);
	7194	}
	7195	} else {
	7196	SvREFCNT_dec(sv);
	7197	sv = &PL_sv_undef;
	7198	}
	7199	prog->substrs->data[i].substr = sv;
	7200	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	7201	prog->check_substr = sv;
	7202	}
	7203	} while (i--);
	7204	}
	7205
	7206	/*
	7207	* Local variables:
	7208	* c-indentation-style: bsd
	7209	* c-basic-offset: 4
	7210	* indent-tabs-mode: nil
	7211	* End:
	7212	*
	7213	* ex: set ts=8 sts=4 sw=4 et:
	7214	*/