perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/*
	41	* pregcomp and pregexec -- regsub and regerror are not used in perl
	42	*
	43	* Copyright (c) 1986 by University of Toronto.
	44	* Written by Henry Spencer. Not derived from licensed software.
	45	*
	46	* Permission is granted to anyone to use this software for any
	47	* purpose on any computer system, and to redistribute it freely,
	48	* subject to the following restrictions:
	49	*
	50	* 1. The author is not responsible for the consequences of use of
	51	* this software, no matter how awful, even if they arise
	52	* from defects in it.
	53	*
	54	* 2. The origin of this software must not be misrepresented, either
	55	* by explicit claim or by omission.
	56	*
	57	* 3. Altered versions must be plainly marked as such, and must not
	58	* be misrepresented as being the original software.
	59	*
	60	**** Alterations to Henry's code are...
	61	****
	62	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	63	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	64	**** by Larry Wall and others
	65	****
	66	**** You may distribute under the terms of either the GNU General Public
	67	**** License or the Artistic License, as specified in the README file.
	68	*
	69	* Beware that some of this code is subtly aware of the way operator
	70	* precedence is structured in regular expressions. Serious changes in
	71	* regular-expression syntax might require a total rethink.
	72	*/
	73	#include "EXTERN.h"
	74	#define PERL_IN_REGEXEC_C
	75	#include "perl.h"
	76
	77	#ifdef PERL_IN_XSUB_RE
	78	# include "re_comp.h"
	79	#else
	80	# include "regcomp.h"
	81	#endif
	82
	83	#define RF_tainted 1 /* tainted information used? */
	84	#define RF_warned 2 /* warned about big count? */
	85
	86	#define RF_utf8 8 /* Pattern contains multibyte chars? */
	87
	88	#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
	89
	90	#define RS_init 1 /* eval environment created */
	91	#define RS_set 2 /* replsv value is set */
	92
	93	#ifndef STATIC
	94	#define STATIC static
	95	#endif
	96
	97	/* Valid for non-utf8 strings only: avoids the reginclass call if there are no
	98	* complications: i.e., if everything matchable is straight forward in the
	99	* bitmap */
	100	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) \
	101	: ANYOF_BITMAP_TEST(p,*(c)))
	102
	103	/*
	104	* Forwards.
	105	*/
	106
	107	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	108	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	109
	110	#define HOPc(pos,off) \
	111	(char *)(PL_reg_match_utf8 \
	112	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	113	: (U8*)(pos + off))
	114	#define HOPBACKc(pos, off) \
	115	(char*)(PL_reg_match_utf8\
	116	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	117	: (pos - off >= PL_bostr) \
	118	? (U8*)pos - off \
	119	: NULL)
	120
	121	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	122	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	123
	124	/* these are unrolled below in the CCC_TRY_XXX defined */
	125	#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
	126	if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
	127
	128	/* Doesn't do an assert to verify that is correct */
	129	#define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
	130	if (!CAT2(PL_utf8_,class)) { bool throw_away; ENTER; save_re_context(); throw_away = CAT2(is_utf8_,class)((const U8*)" "); LEAVE; } } STMT_END
	131
	132	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
	133	#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
	134	#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
	135
	136	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	137	LOAD_UTF8_CHARCLASS(X_begin, " "); \
	138	LOAD_UTF8_CHARCLASS(X_non_hangul, "A"); \
	139	/* These are utf8 constants, and not utf-ebcdic constants, so the \
	140	* assert should likely and hopefully fail on an EBCDIC machine */ \
	141	LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */ \
	142	\
	143	/* No asserts are done for these, in case called on an early \
	144	* Unicode version in which they map to nothing */ \
	145	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
	146	LOAD_UTF8_CHARCLASS_NO_CHECK(X_L); /* U+1100 "\xe1\x84\x80" */ \
	147	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV); /* U+AC00 "\xea\xb0\x80" */ \
	148	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT); /* U+AC01 "\xea\xb0\x81" */ \
	149	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
	150	LOAD_UTF8_CHARCLASS_NO_CHECK(X_T); /* U+11A8 "\xe1\x86\xa8" */ \
	151	LOAD_UTF8_CHARCLASS_NO_CHECK(X_V) /* U+1160 "\xe1\x85\xa0" */
	152
	153	/*
	154	We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
	155	so that it is possible to override the option here without having to
	156	rebuild the entire core. as we are required to do if we change regcomp.h
	157	which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
	158	*/
	159	#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
	160	#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
	161	#endif
	162
	163	#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
	164	#define LOAD_UTF8_CHARCLASS_PERL_WORD() LOAD_UTF8_CHARCLASS_ALNUM()
	165	#define LOAD_UTF8_CHARCLASS_PERL_SPACE() LOAD_UTF8_CHARCLASS_SPACE()
	166	#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
	167	#define RE_utf8_perl_word PL_utf8_alnum
	168	#define RE_utf8_perl_space PL_utf8_space
	169	#define RE_utf8_posix_digit PL_utf8_digit
	170	#define perl_word alnum
	171	#define perl_space space
	172	#define posix_digit digit
	173	#else
	174	#define LOAD_UTF8_CHARCLASS_PERL_WORD() LOAD_UTF8_CHARCLASS(perl_word,"a")
	175	#define LOAD_UTF8_CHARCLASS_PERL_SPACE() LOAD_UTF8_CHARCLASS(perl_space," ")
	176	#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
	177	#define RE_utf8_perl_word PL_utf8_perl_word
	178	#define RE_utf8_perl_space PL_utf8_perl_space
	179	#define RE_utf8_posix_digit PL_utf8_posix_digit
	180	#endif
	181
	182	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	183
	184	/* The actual code for CCC_TRY, which uses several variables from the routine
	185	* it's callable from. It is designed to be the bulk of a case statement.
	186	* FUNC is the macro or function to call on non-utf8 targets that indicate if
	187	* nextchr matches the class.
	188	* UTF8_TEST is the whole test string to use for utf8 targets
	189	* LOAD is what to use to test, and if not present to load in the swash for the
	190	* class
	191	* POS_OR_NEG is either empty or ! to complement the results of FUNC or
	192	* UTF8_TEST test.
	193	* The logic is: Fail if we're at the end-of-string; otherwise if the target is
	194	* utf8 and a variant, load the swash if necessary and test using the utf8
	195	* test. Advance to the next character if test is ok, otherwise fail; If not
	196	* utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
	197	* fails, or advance to the next character */
	198
	199	#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \
	200	if (locinput >= PL_regeol) { \
	201	sayNO; \
	202	} \
	203	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
	204	LOAD_UTF8_CHARCLASS(CLASS, STR); \
	205	if (POS_OR_NEG (UTF8_TEST)) { \
	206	sayNO; \
	207	} \
	208	locinput += PL_utf8skip[nextchr]; \
	209	nextchr = UCHARAT(locinput); \
	210	break; \
	211	} \
	212	if (POS_OR_NEG (FUNC(nextchr))) { \
	213	sayNO; \
	214	} \
	215	nextchr = UCHARAT(++locinput); \
	216	break;
	217
	218	/* Handle the non-locale cases for a character class and its complement. It
	219	* calls _CCC_TRY_CODE with a ! to complement the test for the character class.
	220	* This is because that code fails when the test succeeds, so we want to have
	221	* the test fail so that the code succeeds. The swash is stored in a
	222	* predictable PL_ place */
	223	#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, \
	224	CLASS, STR) \
	225	case NAME: \
	226	_CCC_TRY_CODE( !, FUNC, \
	227	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	228	(U8*)locinput, TRUE)), \
	229	CLASS, STR) \
	230	case NNAME: \
	231	_CCC_TRY_CODE( PLACEHOLDER , FUNC, \
	232	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	233	(U8*)locinput, TRUE)), \
	234	CLASS, STR) \
	235
	236	/* Generate the case statements for both locale and non-locale character
	237	* classes in regmatch for classes that don't have special unicode semantics.
	238	* Locales don't use an immediate swash, but an intermediary special locale
	239	* function that is called on the pointer to the current place in the input
	240	* string. That function will resolve to needing the same swash. One might
	241	* think that because we don't know what the locale will match, we shouldn't
	242	* check with the swash loading function that it loaded properly; ie, that we
	243	* should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
	244	* regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
	245	* irrelevant here */
	246	#define CCC_TRY(NAME, NNAME, FUNC, \
	247	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	248	NAMEA, NNAMEA, FUNCA, \
	249	CLASS, STR) \
	250	case NAMEL: \
	251	PL_reg_flags \|= RF_tainted; \
	252	_CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \
	253	case NNAMEL: \
	254	PL_reg_flags \|= RF_tainted; \
	255	_CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput), \
	256	CLASS, STR) \
	257	case NAMEA: \
	258	if (locinput >= PL_regeol \|\| ! FUNCA(nextchr)) { \
	259	sayNO; \
	260	} \
	261	/* Matched a utf8-invariant, so don't have to worry about utf8 */ \
	262	nextchr = UCHARAT(++locinput); \
	263	break; \
	264	case NNAMEA: \
	265	if (locinput >= PL_regeol \|\| FUNCA(nextchr)) { \
	266	sayNO; \
	267	} \
	268	if (utf8_target) { \
	269	locinput += PL_utf8skip[nextchr]; \
	270	nextchr = UCHARAT(locinput); \
	271	} \
	272	else { \
	273	nextchr = UCHARAT(++locinput); \
	274	} \
	275	break; \
	276	/* Generate the non-locale cases */ \
	277	_CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
	278
	279	/* This is like CCC_TRY, but has an extra set of parameters for generating case
	280	* statements to handle separate Unicode semantics nodes */
	281	#define CCC_TRY_U(NAME, NNAME, FUNC, \
	282	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	283	NAMEU, NNAMEU, FUNCU, \
	284	NAMEA, NNAMEA, FUNCA, \
	285	CLASS, STR) \
	286	CCC_TRY(NAME, NNAME, FUNC, \
	287	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	288	NAMEA, NNAMEA, FUNCA, \
	289	CLASS, STR) \
	290	_CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)
	291
	292	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	293
	294	/* for use after a quantifier and before an EXACT-like node -- japhy */
	295	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	296	*
	297	* NOTE that nothing that affects backtracking should be in here, specifically
	298	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	299	* node that is in between two EXACT like nodes when ascertaining what the required
	300	* "follow" character is. This should probably be moved to regex compile time
	301	* although it may be done at run time beause of the REF possibility - more
	302	* investigation required. -- demerphq
	303	*/
	304	#define JUMPABLE(rn) ( \
	305	OP(rn) == OPEN \|\| \
	306	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	307	OP(rn) == EVAL \|\| \
	308	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	309	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	310	OP(rn) == KEEPS \|\| \
	311	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	312	)
	313	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	314
	315	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	316
	317	#if 0
	318	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	319	we don't need this definition. */
	320	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	321	#define IS_TEXTF(rn) ( (OP(rn)==EXACTFU \|\| OP(rn)==EXACTF) \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	322	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	323
	324	#else
	325	/* ... so we use this as its faster. */
	326	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	327	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU )
	328	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	329	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	330
	331	#endif
	332
	333	/*
	334	Search for mandatory following text node; for lookahead, the text must
	335	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	336	*/
	337	#define FIND_NEXT_IMPT(rn) STMT_START { \
	338	while (JUMPABLE(rn)) { \
	339	const OPCODE type = OP(rn); \
	340	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	341	rn = NEXTOPER(NEXTOPER(rn)); \
	342	else if (type == PLUS) \
	343	rn = NEXTOPER(rn); \
	344	else if (type == IFMATCH) \
	345	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	346	else rn += NEXT_OFF(rn); \
	347	} \
	348	} STMT_END
	349
	350
	351	static void restore_pos(pTHX_ void *arg);
	352
	353	#define REGCP_PAREN_ELEMS 4
	354	#define REGCP_OTHER_ELEMS 5
	355	#define REGCP_FRAME_ELEMS 1
	356	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	357	* are needed for the regexp context stack bookkeeping. */
	358
	359	STATIC CHECKPOINT
	360	S_regcppush(pTHX_ I32 parenfloor)
	361	{
	362	dVAR;
	363	const int retval = PL_savestack_ix;
	364	const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
	365	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	366	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	367	int p;
	368	GET_RE_DEBUG_FLAGS_DECL;
	369
	370	if (paren_elems_to_push < 0)
	371	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
	372
	373	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	374	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	375	" out of range (%lu-%ld)",
	376	total_elems, (unsigned long)PL_regsize, (long)parenfloor);
	377
	378	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	379
	380	for (p = PL_regsize; p > parenfloor; p--) {
	381	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	382	SSPUSHINT(PL_regoffs[p].end);
	383	SSPUSHINT(PL_regoffs[p].start);
	384	SSPUSHPTR(PL_reg_start_tmp[p]);
	385	SSPUSHINT(p);
	386	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	387	" saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
	388	(UV)p, (IV)PL_regoffs[p].start,
	389	(IV)(PL_reg_start_tmp[p] - PL_bostr),
	390	(IV)PL_regoffs[p].end
	391	));
	392	}
	393	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	394	SSPUSHPTR(PL_regoffs);
	395	SSPUSHINT(PL_regsize);
	396	SSPUSHINT(*PL_reglastparen);
	397	SSPUSHINT(*PL_reglastcloseparen);
	398	SSPUSHPTR(PL_reginput);
	399	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	400
	401	return retval;
	402	}
	403
	404	/* These are needed since we do not localize EVAL nodes: */
	405	#define REGCP_SET(cp) \
	406	DEBUG_STATE_r( \
	407	PerlIO_printf(Perl_debug_log, \
	408	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	409	(IV)PL_savestack_ix)); \
	410	cp = PL_savestack_ix
	411
	412	#define REGCP_UNWIND(cp) \
	413	DEBUG_STATE_r( \
	414	if (cp != PL_savestack_ix) \
	415	PerlIO_printf(Perl_debug_log, \
	416	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	417	(IV)(cp), (IV)PL_savestack_ix)); \
	418	regcpblow(cp)
	419
	420	STATIC char *
	421	S_regcppop(pTHX_ const regexp *rex)
	422	{
	423	dVAR;
	424	UV i;
	425	char *input;
	426	GET_RE_DEBUG_FLAGS_DECL;
	427
	428	PERL_ARGS_ASSERT_REGCPPOP;
	429
	430	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	431	i = SSPOPUV;
	432	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	433	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	434	input = (char *) SSPOPPTR;
	435	*PL_reglastcloseparen = SSPOPINT;
	436	*PL_reglastparen = SSPOPINT;
	437	PL_regsize = SSPOPINT;
	438	PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
	439
	440	i -= REGCP_OTHER_ELEMS;
	441	/* Now restore the parentheses context. */
	442	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	443	I32 tmps;
	444	U32 paren = (U32)SSPOPINT;
	445	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
	446	PL_regoffs[paren].start = SSPOPINT;
	447	tmps = SSPOPINT;
	448	if (paren <= *PL_reglastparen)
	449	PL_regoffs[paren].end = tmps;
	450	DEBUG_BUFFERS_r(
	451	PerlIO_printf(Perl_debug_log,
	452	" restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	453	(UV)paren, (IV)PL_regoffs[paren].start,
	454	(IV)(PL_reg_start_tmp[paren] - PL_bostr),
	455	(IV)PL_regoffs[paren].end,
	456	(paren > *PL_reglastparen ? "(no)" : ""));
	457	);
	458	}
	459	DEBUG_BUFFERS_r(
	460	if (*PL_reglastparen + 1 <= rex->nparens) {
	461	PerlIO_printf(Perl_debug_log,
	462	" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
	463	(IV)(*PL_reglastparen + 1), (IV)rex->nparens);
	464	}
	465	);
	466	#if 1
	467	/* It would seem that the similar code in regtry()
	468	* already takes care of this, and in fact it is in
	469	* a better location to since this code can #if 0-ed out
	470	* but the code in regtry() is needed or otherwise tests
	471	* requiring null fields (pat.t#187 and split.t#{13,14}
	472	* (as of patchlevel 7877) will fail. Then again,
	473	* this code seems to be necessary or otherwise
	474	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	475	* --jhi updated by dapm */
	476	for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
	477	if (i > PL_regsize)
	478	PL_regoffs[i].start = -1;
	479	PL_regoffs[i].end = -1;
	480	}
	481	#endif
	482	return input;
	483	}
	484
	485	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	486
	487	/*
	488	* pregexec and friends
	489	*/
	490
	491	#ifndef PERL_IN_XSUB_RE
	492	/*
	493	- pregexec - match a regexp against a string
	494	*/
	495	I32
	496	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
	497	char strbeg, I32 minend, SV screamer, U32 nosave)
	498	/* strend: pointer to null at end of string */
	499	/* strbeg: real beginning of string */
	500	/* minend: end of match must be >=minend after stringarg. */
	501	/* nosave: For optimizations. */
	502	{
	503	PERL_ARGS_ASSERT_PREGEXEC;
	504
	505	return
	506	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	507	nosave ? 0 : REXEC_COPY_STR);
	508	}
	509	#endif
	510
	511	/*
	512	* Need to implement the following flags for reg_anch:
	513	*
	514	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	515	* USE_INTUIT_ML
	516	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	517	* INTUIT_AUTORITATIVE_ML
	518	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	519	* INTUIT_ONCE_ML
	520	*
	521	* Another flag for this function: SECOND_TIME (so that float substrs
	522	* with giant delta may be not rechecked).
	523	*/
	524
	525	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	526
	527	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	528	Otherwise, only SvCUR(sv) is used to get strbeg. */
	529
	530	/* XXXX We assume that strpos is strbeg unless sv. */
	531
	532	/* XXXX Some places assume that there is a fixed substring.
	533	An update may be needed if optimizer marks as "INTUITable"
	534	RExen without fixed substrings. Similarly, it is assumed that
	535	lengths of all the strings are no more than minlen, thus they
	536	cannot come from lookahead.
	537	(Or minlen should take into account lookahead.)
	538	NOTE: Some of this comment is not correct. minlen does now take account
	539	of lookahead/behind. Further research is required. -- demerphq
	540
	541	*/
	542
	543	/* A failure to find a constant substring means that there is no need to make
	544	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	545	finding a substring too deep into the string means that less calls to
	546	regtry() should be needed.
	547
	548	REx compiler's optimizer found 4 possible hints:
	549	a) Anchored substring;
	550	b) Fixed substring;
	551	c) Whether we are anchored (beginning-of-line or \G);
	552	d) First node (of those at offset 0) which may distinguish positions;
	553	We use a)b)d) and multiline-part of c), and try to find a position in the
	554	string which does not contradict any of them.
	555	*/
	556
	557	/* Most of decisions we do here should have been done at compile time.
	558	The nodes of the REx which we used for the search should have been
	559	deleted from the finite automaton. */
	560
	561	char *
	562	Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV sv, char strpos,
	563	char strend, const U32 flags, re_scream_pos_data data)
	564	{
	565	dVAR;
	566	struct regexp const prog = (struct regexp )SvANY(rx);
	567	register I32 start_shift = 0;
	568	/* Should be nonnegative! */
	569	register I32 end_shift = 0;
	570	register char *s;
	571	register SV *check;
	572	char *strbeg;
	573	char *t;
	574	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	575	I32 ml_anch;
	576	register char other_last = NULL; / other substr checked before this */
	577	char check_at = NULL; / check substr found at this pos */
	578	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	579	RXi_GET_DECL(prog,progi);
	580	#ifdef DEBUGGING
	581	const char * const i_strpos = strpos;
	582	#endif
	583	GET_RE_DEBUG_FLAGS_DECL;
	584
	585	PERL_ARGS_ASSERT_RE_INTUIT_START;
	586
	587	RX_MATCH_UTF8_set(rx,utf8_target);
	588
	589	if (RX_UTF8(rx)) {
	590	PL_reg_flags \|= RF_utf8;
	591	}
	592	DEBUG_EXECUTE_r(
	593	debug_start_match(rx, utf8_target, strpos, strend,
	594	sv ? "Guessing start of match in sv for"
	595	: "Guessing start of match in string for");
	596	);
	597
	598	/* CHR_DIST() would be more correct here but it makes things slow. */
	599	if (prog->minlen > strend - strpos) {
	600	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	601	"String too short... [re_intuit_start]\n"));
	602	goto fail;
	603	}
	604
	605	strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
	606	PL_regeol = strend;
	607	if (utf8_target) {
	608	if (!prog->check_utf8 && prog->check_substr)
	609	to_utf8_substr(prog);
	610	check = prog->check_utf8;
	611	} else {
	612	if (!prog->check_substr && prog->check_utf8)
	613	to_byte_substr(prog);
	614	check = prog->check_substr;
	615	}
	616	if (check == &PL_sv_undef) {
	617	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	618	"Non-utf8 string cannot match utf8 check string\n"));
	619	goto fail;
	620	}
	621	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	622	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	623	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	624	&& !multiline ) ); /* Check after \n? */
	625
	626	if (!ml_anch) {
	627	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	628	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	629	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	630	&& sv && !SvROK(sv)
	631	&& (strpos != strbeg)) {
	632	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	633	goto fail;
	634	}
	635	if (prog->check_offset_min == prog->check_offset_max &&
	636	!(prog->extflags & RXf_CANY_SEEN)) {
	637	/* Substring at constant offset from beg-of-str... */
	638	I32 slen;
	639
	640	s = HOP3c(strpos, prog->check_offset_min, strend);
	641
	642	if (SvTAIL(check)) {
	643	slen = SvCUR(check); /* >= 1 */
	644
	645	if ( strend - s > slen \|\| strend - s < slen - 1
	646	\|\| (strend - s == slen && strend[-1] != '\n')) {
	647	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	648	goto fail_finish;
	649	}
	650	/* Now should match s[0..slen-2] */
	651	slen--;
	652	if (slen && (SvPVX_const(check) != s
	653	\|\| (slen > 1
	654	&& memNE(SvPVX_const(check), s, slen)))) {
	655	report_neq:
	656	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	657	goto fail_finish;
	658	}
	659	}
	660	else if (SvPVX_const(check) != s
	661	\|\| ((slen = SvCUR(check)) > 1
	662	&& memNE(SvPVX_const(check), s, slen)))
	663	goto report_neq;
	664	check_at = s;
	665	goto success_at_start;
	666	}
	667	}
	668	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	669	s = strpos;
	670	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	671	end_shift = prog->check_end_shift;
	672
	673	if (!ml_anch) {
	674	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	675	- (SvTAIL(check) != 0);
	676	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	677
	678	if (end_shift < eshift)
	679	end_shift = eshift;
	680	}
	681	}
	682	else { /* Can match at random position */
	683	ml_anch = 0;
	684	s = strpos;
	685	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	686	end_shift = prog->check_end_shift;
	687
	688	/* end shift should be non negative here */
	689	}
	690
	691	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	692	if (end_shift < 0)
	693	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	694	(IV)end_shift, RX_PRECOMP(prog));
	695	#endif
	696
	697	restart:
	698	/* Find a possible match in the region s..strend by looking for
	699	the "check" substring in the region corrected by start/end_shift. */
	700
	701	{
	702	I32 srch_start_shift = start_shift;
	703	I32 srch_end_shift = end_shift;
	704	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	705	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	706	srch_start_shift = strbeg - s;
	707	}
	708	DEBUG_OPTIMISE_MORE_r({
	709	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	710	(IV)prog->check_offset_min,
	711	(IV)srch_start_shift,
	712	(IV)srch_end_shift,
	713	(IV)prog->check_end_shift);
	714	});
	715
	716	if (flags & REXEC_SCREAM) {
	717	I32 p = -1; /* Internal iterator of scream. */
	718	I32 * const pp = data ? data->scream_pos : &p;
	719
	720	if (PL_screamfirst[BmRARE(check)] >= 0
	721	\|\| ( BmRARE(check) == '\n'
	722	&& (BmPREVIOUS(check) == SvCUR(check) - 1)
	723	&& SvTAIL(check) ))
	724	s = screaminstr(sv, check,
	725	srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
	726	else
	727	goto fail_finish;
	728	/* we may be pointing at the wrong string */
	729	if (s && RXp_MATCH_COPIED(prog))
	730	s = strbeg + (s - SvPVX_const(sv));
	731	if (data)
	732	*data->scream_olds = s;
	733	}
	734	else {
	735	U8* start_point;
	736	U8* end_point;
	737	if (prog->extflags & RXf_CANY_SEEN) {
	738	start_point= (U8*)(s + srch_start_shift);
	739	end_point= (U8*)(strend - srch_end_shift);
	740	} else {
	741	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	742	end_point= HOP3(strend, -srch_end_shift, strbeg);
	743	}
	744	DEBUG_OPTIMISE_MORE_r({
	745	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	746	(int)(end_point - start_point),
	747	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	748	start_point);
	749	});
	750
	751	s = fbm_instr( start_point, end_point,
	752	check, multiline ? FBMrf_MULTILINE : 0);
	753	}
	754	}
	755	/* Update the count-of-usability, remove useless subpatterns,
	756	unshift s. */
	757
	758	DEBUG_EXECUTE_r({
	759	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	760	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	761	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	762	(s ? "Found" : "Did not find"),
	763	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	764	? "anchored" : "floating"),
	765	quoted,
	766	RE_SV_TAIL(check),
	767	(s ? " at offset " : "...\n") );
	768	});
	769
	770	if (!s)
	771	goto fail_finish;
	772	/* Finish the diagnostic message */
	773	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	774
	775	/* XXX dmq: first branch is for positive lookbehind...
	776	Our check string is offset from the beginning of the pattern.
	777	So we need to do any stclass tests offset forward from that
	778	point. I think. :-(
	779	*/
	780
	781
	782
	783	check_at=s;
	784
	785
	786	/* Got a candidate. Check MBOL anchoring, and the other substr.
	787	Start with the other substr.
	788	XXXX no SCREAM optimization yet - and a very coarse implementation
	789	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	790	always match. Probably should be marked during compile...
	791	Probably it is right to do no SCREAM here...
	792	*/
	793
	794	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	795	: (prog->float_substr && prog->anchored_substr))
	796	{
	797	/* Take into account the "other" substring. */
	798	/* XXXX May be hopelessly wrong for UTF... */
	799	if (!other_last)
	800	other_last = strpos;
	801	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	802	do_other_anchored:
	803	{
	804	char * const last = HOP3c(s, -start_shift, strbeg);
	805	char last1, last2;
	806	char * const saved_s = s;
	807	SV* must;
	808
	809	t = s - prog->check_offset_max;
	810	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	811	&& (!utf8_target
	812	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	813	&& t > strpos)))
	814	NOOP;
	815	else
	816	t = strpos;
	817	t = HOP3c(t, prog->anchored_offset, strend);
	818	if (t < other_last) /* These positions already checked */
	819	t = other_last;
	820	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	821	if (last < last1)
	822	last1 = last;
	823	/* XXXX It is not documented what units *_offsets are in.
	824	We assume bytes, but this is clearly wrong.
	825	Meaning this code needs to be carefully reviewed for errors.
	826	dmq.
	827	*/
	828
	829	/* On end-of-str: see comment below. */
	830	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	831	if (must == &PL_sv_undef) {
	832	s = (char*)NULL;
	833	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	834	}
	835	else
	836	s = fbm_instr(
	837	(unsigned char*)t,
	838	HOP3(HOP3(last1, prog->anchored_offset, strend)
	839	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	840	must,
	841	multiline ? FBMrf_MULTILINE : 0
	842	);
	843	DEBUG_EXECUTE_r({
	844	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	845	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	846	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	847	(s ? "Found" : "Contradicts"),
	848	quoted, RE_SV_TAIL(must));
	849	});
	850
	851
	852	if (!s) {
	853	if (last1 >= last2) {
	854	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	855	", giving up...\n"));
	856	goto fail_finish;
	857	}
	858	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	859	", trying floating at offset %ld...\n",
	860	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	861	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	862	s = HOP3c(last, 1, strend);
	863	goto restart;
	864	}
	865	else {
	866	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	867	(long)(s - i_strpos)));
	868	t = HOP3c(s, -prog->anchored_offset, strbeg);
	869	other_last = HOP3c(s, 1, strend);
	870	s = saved_s;
	871	if (t == strpos)
	872	goto try_at_start;
	873	goto try_at_offset;
	874	}
	875	}
	876	}
	877	else { /* Take into account the floating substring. */
	878	char last, last1;
	879	char * const saved_s = s;
	880	SV* must;
	881
	882	t = HOP3c(s, -start_shift, strbeg);
	883	last1 = last =
	884	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	885	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	886	last = HOP3c(t, prog->float_max_offset, strend);
	887	s = HOP3c(t, prog->float_min_offset, strend);
	888	if (s < other_last)
	889	s = other_last;
	890	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	891	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	892	/* fbm_instr() takes into account exact value of end-of-str
	893	if the check is SvTAIL(ed). Since false positives are OK,
	894	and end-of-str is not later than strend we are OK. */
	895	if (must == &PL_sv_undef) {
	896	s = (char*)NULL;
	897	DEBUG_r(must = prog->float_utf8); /* for debug message */
	898	}
	899	else
	900	s = fbm_instr((unsigned char*)s,
	901	(unsigned char*)last + SvCUR(must)
	902	- (SvTAIL(must)!=0),
	903	must, multiline ? FBMrf_MULTILINE : 0);
	904	DEBUG_EXECUTE_r({
	905	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	906	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	907	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	908	(s ? "Found" : "Contradicts"),
	909	quoted, RE_SV_TAIL(must));
	910	});
	911	if (!s) {
	912	if (last1 == last) {
	913	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	914	", giving up...\n"));
	915	goto fail_finish;
	916	}
	917	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	918	", trying anchored starting at offset %ld...\n",
	919	(long)(saved_s + 1 - i_strpos)));
	920	other_last = last;
	921	s = HOP3c(t, 1, strend);
	922	goto restart;
	923	}
	924	else {
	925	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	926	(long)(s - i_strpos)));
	927	other_last = s; /* Fix this later. --Hugo */
	928	s = saved_s;
	929	if (t == strpos)
	930	goto try_at_start;
	931	goto try_at_offset;
	932	}
	933	}
	934	}
	935
	936
	937	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	938
	939	DEBUG_OPTIMISE_MORE_r(
	940	PerlIO_printf(Perl_debug_log,
	941	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	942	(IV)prog->check_offset_min,
	943	(IV)prog->check_offset_max,
	944	(IV)(s-strpos),
	945	(IV)(t-strpos),
	946	(IV)(t-s),
	947	(IV)(strend-strpos)
	948	)
	949	);
	950
	951	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	952	&& (!utf8_target
	953	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	954	&& t > strpos)))
	955	{
	956	/* Fixed substring is found far enough so that the match
	957	cannot start at strpos. */
	958	try_at_offset:
	959	if (ml_anch && t[-1] != '\n') {
	960	/* Eventually fbm_*() should handle this, but often
	961	anchored_offset is not 0, so this check will not be wasted. */
	962	/* XXXX In the code below we prefer to look for "^" even in
	963	presence of anchored substrings. And we search even
	964	beyond the found float position. These pessimizations
	965	are historical artefacts only. */
	966	find_anchor:
	967	while (t < strend - prog->minlen) {
	968	if (*t == '\n') {
	969	if (t < check_at - prog->check_offset_min) {
	970	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	971	/* Since we moved from the found position,
	972	we definitely contradict the found anchored
	973	substr. Due to the above check we do not
	974	contradict "check" substr.
	975	Thus we can arrive here only if check substr
	976	is float. Redo checking for "other"=="fixed".
	977	*/
	978	strpos = t + 1;
	979	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	980	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	981	goto do_other_anchored;
	982	}
	983	/* We don't contradict the found floating substring. */
	984	/* XXXX Why not check for STCLASS? */
	985	s = t + 1;
	986	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	987	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	988	goto set_useful;
	989	}
	990	/* Position contradicts check-string */
	991	/* XXXX probably better to look for check-string
	992	than for "\n", so one should lower the limit for t? */
	993	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	994	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	995	other_last = strpos = s = t + 1;
	996	goto restart;
	997	}
	998	t++;
	999	}
	1000	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	1001	PL_colors[0], PL_colors[1]));
	1002	goto fail_finish;
	1003	}
	1004	else {
	1005	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1006	PL_colors[0], PL_colors[1]));
	1007	}
	1008	s = t;
	1009	set_useful:
	1010	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1011	}
	1012	else {
	1013	/* The found string does not prohibit matching at strpos,
	1014	- no optimization of calling REx engine can be performed,
	1015	unless it was an MBOL and we are not after MBOL,
	1016	or a future STCLASS check will fail this. */
	1017	try_at_start:
	1018	/* Even in this situation we may use MBOL flag if strpos is offset
	1019	wrt the start of the string. */
	1020	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	1021	&& (strpos != strbeg) && strpos[-1] != '\n'
	1022	/* May be due to an implicit anchor of m{.foo} /
	1023	&& !(prog->intflags & PREGf_IMPLICIT))
	1024	{
	1025	t = strpos;
	1026	goto find_anchor;
	1027	}
	1028	DEBUG_EXECUTE_r( if (ml_anch)
	1029	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1030	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1031	);
	1032	success_at_start:
	1033	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1034	&& (utf8_target ? (
	1035	prog->check_utf8 /* Could be deleted already */
	1036	&& --BmUSEFUL(prog->check_utf8) < 0
	1037	&& (prog->check_utf8 == prog->float_utf8)
	1038	) : (
	1039	prog->check_substr /* Could be deleted already */
	1040	&& --BmUSEFUL(prog->check_substr) < 0
	1041	&& (prog->check_substr == prog->float_substr)
	1042	)))
	1043	{
	1044	/* If flags & SOMETHING - do not do it many times on the same match */
	1045	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1046	/* XXX Does the destruction order has to change with utf8_target? */
	1047	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1048	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1049	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1050	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1051	check = NULL; /* abort */
	1052	s = strpos;
	1053	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1054	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1055	if (prog->intflags & PREGf_IMPLICIT)
	1056	prog->extflags &= ~RXf_ANCH_MBOL;
	1057	/* XXXX This is a remnant of the old implementation. It
	1058	looks wasteful, since now INTUIT can use many
	1059	other heuristics. */
	1060	prog->extflags &= ~RXf_USE_INTUIT;
	1061	/* XXXX What other flags might need to be cleared in this branch? */
	1062	}
	1063	else
	1064	s = strpos;
	1065	}
	1066
	1067	/* Last resort... */
	1068	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1069	/* trie stclasses are too expensive to use here, we are better off to
	1070	leave it to regmatch itself */
	1071	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1072	/* minlen == 0 is possible if regstclass is \b or \B,
	1073	and the fixed substr is ''$.
	1074	Since minlen is already taken into account, s+1 is before strend;
	1075	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1076	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1077	regstclass does not come from lookahead... */
	1078	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1079	This leaves EXACTF, EXACTFU only, which are dealt with in find_byclass(). */
	1080	const U8* const str = (U8*)STRING(progi->regstclass);
	1081	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1082	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	1083	: 1);
	1084	char * endpos;
	1085	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1086	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1087	else if (prog->float_substr \|\| prog->float_utf8)
	1088	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1089	else
	1090	endpos= strend;
	1091
	1092	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
	1093	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
	1094
	1095	t = s;
	1096	s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
	1097	if (!s) {
	1098	#ifdef DEBUGGING
	1099	const char *what = NULL;
	1100	#endif
	1101	if (endpos == strend) {
	1102	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1103	"Could not match STCLASS...\n") );
	1104	goto fail;
	1105	}
	1106	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1107	"This position contradicts STCLASS...\n") );
	1108	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1109	goto fail;
	1110	/* Contradict one of substrings */
	1111	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1112	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1113	DEBUG_EXECUTE_r( what = "anchored" );
	1114	hop_and_restart:
	1115	s = HOP3c(t, 1, strend);
	1116	if (s + start_shift + end_shift > strend) {
	1117	/* XXXX Should be taken into account earlier? */
	1118	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1119	"Could not match STCLASS...\n") );
	1120	goto fail;
	1121	}
	1122	if (!check)
	1123	goto giveup;
	1124	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1125	"Looking for %s substr starting at offset %ld...\n",
	1126	what, (long)(s + start_shift - i_strpos)) );
	1127	goto restart;
	1128	}
	1129	/* Have both, check_string is floating */
	1130	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1131	goto retry_floating_check;
	1132	/* Recheck anchored substring, but not floating... */
	1133	s = check_at;
	1134	if (!check)
	1135	goto giveup;
	1136	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1137	"Looking for anchored substr starting at offset %ld...\n",
	1138	(long)(other_last - i_strpos)) );
	1139	goto do_other_anchored;
	1140	}
	1141	/* Another way we could have checked stclass at the
	1142	current position only: */
	1143	if (ml_anch) {
	1144	s = t = t + 1;
	1145	if (!check)
	1146	goto giveup;
	1147	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1148	"Looking for /%s^%s/m starting at offset %ld...\n",
	1149	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1150	goto try_at_offset;
	1151	}
	1152	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1153	goto fail;
	1154	/* Check is floating substring. */
	1155	retry_floating_check:
	1156	t = check_at - start_shift;
	1157	DEBUG_EXECUTE_r( what = "floating" );
	1158	goto hop_and_restart;
	1159	}
	1160	if (t != s) {
	1161	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1162	"By STCLASS: moving %ld --> %ld\n",
	1163	(long)(t - i_strpos), (long)(s - i_strpos))
	1164	);
	1165	}
	1166	else {
	1167	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1168	"Does not contradict STCLASS...\n");
	1169	);
	1170	}
	1171	}
	1172	giveup:
	1173	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1174	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1175	PL_colors[5], (long)(s - i_strpos)) );
	1176	return s;
	1177
	1178	fail_finish: /* Substring not found */
	1179	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1180	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1181	fail:
	1182	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1183	PL_colors[4], PL_colors[5]));
	1184	return NULL;
	1185	}
	1186
	1187	#define DECL_TRIE_TYPE(scan) \
	1188	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	1189	trie_type = (scan->flags != EXACT) \
	1190	? (utf8_target ? trie_utf8_fold : (UTF_PATTERN ? trie_latin_utf8_fold : trie_plain)) \
	1191	: (utf8_target ? trie_utf8 : trie_plain)
	1192
	1193	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
	1194	uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
	1195	switch (trie_type) { \
	1196	case trie_utf8_fold: \
	1197	if ( foldlen>0 ) { \
	1198	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1199	foldlen -= len; \
	1200	uscan += len; \
	1201	len=0; \
	1202	} else { \
	1203	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1204	uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
	1205	foldlen -= UNISKIP( uvc ); \
	1206	uscan = foldbuf + UNISKIP( uvc ); \
	1207	} \
	1208	break; \
	1209	case trie_latin_utf8_fold: \
	1210	if ( foldlen>0 ) { \
	1211	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1212	foldlen -= len; \
	1213	uscan += len; \
	1214	len=0; \
	1215	} else { \
	1216	len = 1; \
	1217	uvc = to_uni_fold( (U8)uc, foldbuf, &foldlen ); \
	1218	foldlen -= UNISKIP( uvc ); \
	1219	uscan = foldbuf + UNISKIP( uvc ); \
	1220	} \
	1221	break; \
	1222	case trie_utf8: \
	1223	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1224	break; \
	1225	case trie_plain: \
	1226	uvc = (UV)*uc; \
	1227	len = 1; \
	1228	} \
	1229	if (uvc < 256) { \
	1230	charid = trie->charmap[ uvc ]; \
	1231	} \
	1232	else { \
	1233	charid = 0; \
	1234	if (widecharmap) { \
	1235	SV** const svpp = hv_fetch(widecharmap, \
	1236	(char*)&uvc, sizeof(UV), 0); \
	1237	if (svpp) \
	1238	charid = (U16)SvIV(*svpp); \
	1239	} \
	1240	} \
	1241	} STMT_END
	1242
	1243	#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
	1244	{ \
	1245	char my_strend= (char )strend; \
	1246	if ( (CoNd) \
	1247	&& (ln == len \|\| \
	1248	foldEQ_utf8(s, &my_strend, 0, utf8_target, \
	1249	m, NULL, ln, cBOOL(UTF_PATTERN))) \
	1250	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1251	goto got_it; \
	1252	else { \
	1253	U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
	1254	uvchr_to_utf8(tmpbuf, c); \
	1255	f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
	1256	if ( f != c \
	1257	&& (f == c1 \|\| f == c2) \
	1258	&& (ln == len \|\| \
	1259	foldEQ_utf8(s, &my_strend, 0, utf8_target,\
	1260	m, NULL, ln, cBOOL(UTF_PATTERN)))\
	1261	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1262	goto got_it; \
	1263	} \
	1264	} \
	1265	s += len
	1266
	1267	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1268	STMT_START { \
	1269	re_fold_t folder; \
	1270	switch (OP(c)) { \
	1271	case EXACTFU: folder = foldEQ_latin1; break; \
	1272	case EXACTFL: folder = foldEQ_locale; break; \
	1273	case EXACTF: folder = foldEQ; break; \
	1274	default: \
	1275	Perl_croak(aTHX_ "panic: Unexpected op %u", OP(c)); \
	1276	} \
	1277	while (s <= e) { \
	1278	if ( (CoNd) \
	1279	&& (ln == 1 \|\| folder(s, m, ln)) \
	1280	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1281	goto got_it; \
	1282	s++; \
	1283	} \
	1284	} STMT_END
	1285
	1286	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1287	STMT_START { \
	1288	while (s + (uskip = UTF8SKIP(s)) <= strend) { \
	1289	CoDe \
	1290	s += uskip; \
	1291	} \
	1292	} STMT_END
	1293
	1294	#define REXEC_FBC_SCAN(CoDe) \
	1295	STMT_START { \
	1296	while (s < strend) { \
	1297	CoDe \
	1298	s++; \
	1299	} \
	1300	} STMT_END
	1301
	1302	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1303	REXEC_FBC_UTF8_SCAN( \
	1304	if (CoNd) { \
	1305	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1306	goto got_it; \
	1307	else \
	1308	tmp = doevery; \
	1309	} \
	1310	else \
	1311	tmp = 1; \
	1312	)
	1313
	1314	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1315	REXEC_FBC_SCAN( \
	1316	if (CoNd) { \
	1317	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1318	goto got_it; \
	1319	else \
	1320	tmp = doevery; \
	1321	} \
	1322	else \
	1323	tmp = 1; \
	1324	)
	1325
	1326	#define REXEC_FBC_TRYIT \
	1327	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1328	goto got_it
	1329
	1330	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1331	if (utf8_target) { \
	1332	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1333	} \
	1334	else { \
	1335	REXEC_FBC_CLASS_SCAN(CoNd); \
	1336	}
	1337
	1338	#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
	1339	if (utf8_target) { \
	1340	UtFpReLoAd; \
	1341	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1342	} \
	1343	else { \
	1344	REXEC_FBC_CLASS_SCAN(CoNd); \
	1345	}
	1346
	1347	#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
	1348	PL_reg_flags \|= RF_tainted; \
	1349	if (utf8_target) { \
	1350	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1351	} \
	1352	else { \
	1353	REXEC_FBC_CLASS_SCAN(CoNd); \
	1354	}
	1355
	1356	#define DUMP_EXEC_POS(li,s,doutf8) \
	1357	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1358
	1359
	1360	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1361	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1362	tmp = TEST_NON_UTF8(tmp); \
	1363	REXEC_FBC_UTF8_SCAN( \
	1364	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1365	tmp = !tmp; \
	1366	IF_SUCCESS; \
	1367	} \
	1368	else { \
	1369	IF_FAIL; \
	1370	} \
	1371	); \
	1372
	1373	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1374	if (s == PL_bostr) { \
	1375	tmp = '\n'; \
	1376	} \
	1377	else { \
	1378	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr); \
	1379	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
	1380	} \
	1381	tmp = TeSt1_UtF8; \
	1382	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1383	REXEC_FBC_UTF8_SCAN( \
	1384	if (tmp == ! (TeSt2_UtF8)) { \
	1385	tmp = !tmp; \
	1386	IF_SUCCESS; \
	1387	} \
	1388	else { \
	1389	IF_FAIL; \
	1390	} \
	1391	); \
	1392
	1393	/* The only difference between the BOUND and NBOUND cases is that
	1394	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1395	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1396	* with the other one being empty */
	1397	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1398	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1399
	1400	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1401	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1402
	1403	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1404	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1405
	1406	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1407	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1408
	1409
	1410	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1411	* be passed in completely with the variable name being tested, which isn't
	1412	* such a clean interface, but this is easier to read than it was before. We
	1413	* are looking for the boundary (or non-boundary between a word and non-word
	1414	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1415	* must be different. Find the "wordness" of the character just prior to this
	1416	* one, and compare it with the wordness of this one. If they differ, we have
	1417	* a boundary. At the beginning of the string, pretend that the previous
	1418	* character was a new-line */
	1419	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1420	if (utf8_target) { \
	1421	UTF8_CODE \
	1422	} \
	1423	else { /* Not utf8 */ \
	1424	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1425	tmp = TEST_NON_UTF8(tmp); \
	1426	REXEC_FBC_SCAN( \
	1427	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1428	tmp = !tmp; \
	1429	IF_SUCCESS; \
	1430	} \
	1431	else { \
	1432	IF_FAIL; \
	1433	} \
	1434	); \
	1435	} \
	1436	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s))) \
	1437	goto got_it;
	1438
	1439	/* We know what class REx starts with. Try to find this position... */
	1440	/* if reginfo is NULL, its a dryrun */
	1441	/* annoyingly all the vars in this routine have different names from their counterparts
	1442	in regmatch. /grrr */
	1443
	1444	STATIC char *
	1445	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1446	const char strend, regmatch_info reginfo)
	1447	{
	1448	dVAR;
	1449	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1450	char *m;
	1451	STRLEN ln;
	1452	STRLEN lnc;
	1453	register STRLEN uskip;
	1454	unsigned int c1;
	1455	unsigned int c2;
	1456	char *e;
	1457	register I32 tmp = 1; /* Scratch variable? */
	1458	register const bool utf8_target = PL_reg_match_utf8;
	1459	RXi_GET_DECL(prog,progi);
	1460
	1461	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1462
	1463	/* We know what class it must start with. */
	1464	switch (OP(c)) {
	1465	case ANYOFV:
	1466	case ANYOF:
	1467	if (utf8_target \|\| OP(c) == ANYOFV) {
	1468	REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_NONBITMAP) \|\|
	1469	!UTF8_IS_INVARIANT((U8)s[0]) ?
	1470	reginclass(prog, c, (U8*)s, 0, utf8_target) :
	1471	REGINCLASS(prog, c, (U8*)s));
	1472	}
	1473	else {
	1474	while (s < strend) {
	1475	STRLEN skip = 1;
	1476
	1477	if (REGINCLASS(prog, c, (U8*)s) \|\|
	1478	(ANYOF_FOLD_SHARP_S(c, s, strend) &&
	1479	/* The assignment of 2 is intentional:
	1480	* for the folded sharp s, the skip is 2. */
	1481	(skip = SHARP_S_SKIP))) {
	1482	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1483	goto got_it;
	1484	else
	1485	tmp = doevery;
	1486	}
	1487	else
	1488	tmp = 1;
	1489	s += skip;
	1490	}
	1491	}
	1492	break;
	1493	case CANY:
	1494	REXEC_FBC_SCAN(
	1495	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1496	goto got_it;
	1497	else
	1498	tmp = doevery;
	1499	);
	1500	break;
	1501	case EXACTFU:
	1502	case EXACTF:
	1503	m = STRING(c);
	1504	ln = STR_LEN(c); /* length to match in octets/bytes */
	1505	lnc = (I32) ln; /* length to match in characters */
	1506	if (UTF_PATTERN) {
	1507	STRLEN ulen1, ulen2;
	1508	U8 sm = (U8 ) m;
	1509	U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
	1510	U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
	1511	/* used by commented-out code below */
	1512	/const U32 uniflags = UTF8_ALLOW_DEFAULT;/
	1513
	1514	/* XXX: Since the node will be case folded at compile
	1515	time this logic is a little odd, although im not
	1516	sure that its actually wrong. --dmq */
	1517
	1518	c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
	1519	c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
	1520
	1521	/* XXX: This is kinda strange. to_utf8_XYZ returns the
	1522	codepoint of the first character in the converted
	1523	form, yet originally we did the extra step.
	1524	No tests fail by commenting this code out however
	1525	so Ive left it out. -- dmq.
	1526
	1527	c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
	1528	0, uniflags);
	1529	c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
	1530	0, uniflags);
	1531	*/
	1532
	1533	lnc = 0;
	1534	while (sm < ((U8 *) m + ln)) {
	1535	lnc++;
	1536	sm += UTF8SKIP(sm);
	1537	}
	1538	}
	1539	else {
	1540	c1 = (U8)m;
	1541	if (utf8_target \|\| OP(c) == EXACTFU) {
	1542
	1543	/* Micro sign folds to GREEK SMALL LETTER MU;
	1544	LATIN_SMALL_LETTER_SHARP_S folds to 'ss', and this sets
	1545	c2 to the first 's' of the pair, and the code below will
	1546	look for others */
	1547	c2 = (c1 == MICRO_SIGN)
	1548	? GREEK_SMALL_LETTER_MU
	1549	: (c1 == LATIN_SMALL_LETTER_SHARP_S)
	1550	? 's'
	1551	: PL_fold_latin1[c1];
	1552	} else c2 = PL_fold[c1];
	1553	}
	1554	goto do_exactf;
	1555	case EXACTFL:
	1556	m = STRING(c);
	1557	ln = STR_LEN(c);
	1558	lnc = (I32) ln;
	1559	c1 = (U8)m;
	1560	c2 = PL_fold_locale[c1];
	1561	do_exactf:
	1562	e = HOP3c(strend, -((I32)lnc), s);
	1563
	1564	if (!reginfo && e < s)
	1565	e = s; /* Due to minlen logic of intuit() */
	1566
	1567	/* The idea in the EXACTF* cases is to first find the
	1568	* first character of the EXACTF* node and then, if
	1569	* necessary, case-insensitively compare the full
	1570	* text of the node. The c1 and c2 are the first
	1571	* characters (though in Unicode it gets a bit
	1572	* more complicated because there are more cases
	1573	* than just upper and lower: one needs to use
	1574	* the so-called folding case for case-insensitive
	1575	* matching (called "loose matching" in Unicode).
	1576	* foldEQ_utf8() will do just that. */
	1577
	1578	if (utf8_target \|\| UTF_PATTERN) {
	1579	UV c, f;
	1580	U8 tmpbuf [UTF8_MAXBYTES+1];
	1581	STRLEN len = 1;
	1582	STRLEN foldlen;
	1583	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1584	if (c1 == c2) {
	1585	/* Upper and lower of 1st char are equal -
	1586	* probably not a "letter". */
	1587	while (s <= e) {
	1588	if (utf8_target) {
	1589	c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
	1590	uniflags);
	1591	} else {
	1592	c = ((U8)s);
	1593	}
	1594	REXEC_FBC_EXACTISH_CHECK(c == c1);
	1595	}
	1596	}
	1597	else {
	1598	while (s <= e) {
	1599	if (utf8_target) {
	1600	c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
	1601	uniflags);
	1602	} else {
	1603	c = ((U8)s);
	1604	}
	1605
	1606	/* Handle some of the three Greek sigmas cases.
	1607	* Note that not all the possible combinations
	1608	* are handled here: some of them are handled
	1609	* by the standard folding rules, and some of
	1610	* them (the character class or ANYOF cases)
	1611	* are handled during compiletime in
	1612	* regexec.c:S_regclass(). */
	1613	if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA \|\|
	1614	c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
	1615	c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
	1616
	1617	REXEC_FBC_EXACTISH_CHECK(c == c1 \|\| c == c2);
	1618	}
	1619	}
	1620	}
	1621	else {
	1622	/* Neither pattern nor string are UTF8 */
	1623	if (c1 == c2)
	1624	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1625	else
	1626	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1627	}
	1628	break;
	1629	case BOUNDL:
	1630	PL_reg_flags \|= RF_tainted;
	1631	FBC_BOUND(isALNUM_LC,
	1632	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1633	isALNUM_LC_utf8((U8*)s));
	1634	break;
	1635	case NBOUNDL:
	1636	PL_reg_flags \|= RF_tainted;
	1637	FBC_NBOUND(isALNUM_LC,
	1638	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1639	isALNUM_LC_utf8((U8*)s));
	1640	break;
	1641	case BOUND:
	1642	FBC_BOUND(isWORDCHAR,
	1643	isALNUM_uni(tmp),
	1644	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1645	break;
	1646	case BOUNDA:
	1647	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1648	isWORDCHAR_A(tmp),
	1649	isWORDCHAR_A((U8*)s));
	1650	break;
	1651	case NBOUND:
	1652	FBC_NBOUND(isWORDCHAR,
	1653	isALNUM_uni(tmp),
	1654	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1655	break;
	1656	case NBOUNDA:
	1657	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1658	isWORDCHAR_A(tmp),
	1659	isWORDCHAR_A((U8*)s));
	1660	break;
	1661	case BOUNDU:
	1662	FBC_BOUND(isWORDCHAR_L1,
	1663	isALNUM_uni(tmp),
	1664	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1665	break;
	1666	case NBOUNDU:
	1667	FBC_NBOUND(isWORDCHAR_L1,
	1668	isALNUM_uni(tmp),
	1669	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1670	break;
	1671	case ALNUML:
	1672	REXEC_FBC_CSCAN_TAINT(
	1673	isALNUM_LC_utf8((U8*)s),
	1674	isALNUM_LC(*s)
	1675	);
	1676	break;
	1677	case ALNUMU:
	1678	REXEC_FBC_CSCAN_PRELOAD(
	1679	LOAD_UTF8_CHARCLASS_PERL_WORD(),
	1680	swash_fetch(RE_utf8_perl_word,(U8*)s, utf8_target),
	1681	isWORDCHAR_L1((U8) *s)
	1682	);
	1683	break;
	1684	case ALNUM:
	1685	REXEC_FBC_CSCAN_PRELOAD(
	1686	LOAD_UTF8_CHARCLASS_PERL_WORD(),
	1687	swash_fetch(RE_utf8_perl_word,(U8*)s, utf8_target),
	1688	isWORDCHAR((U8) *s)
	1689	);
	1690	break;
	1691	case ALNUMA:
	1692	/* Don't need to worry about utf8, as it can match only a single
	1693	* byte invariant character */
	1694	REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
	1695	break;
	1696	case NALNUMU:
	1697	REXEC_FBC_CSCAN_PRELOAD(
	1698	LOAD_UTF8_CHARCLASS_PERL_WORD(),
	1699	swash_fetch(RE_utf8_perl_word,(U8*)s, utf8_target),
	1700	! isWORDCHAR_L1((U8) *s)
	1701	);
	1702	break;
	1703	case NALNUM:
	1704	REXEC_FBC_CSCAN_PRELOAD(
	1705	LOAD_UTF8_CHARCLASS_PERL_WORD(),
	1706	!swash_fetch(RE_utf8_perl_word, (U8*)s, utf8_target),
	1707	! isALNUM(*s)
	1708	);
	1709	break;
	1710	case NALNUMA:
	1711	REXEC_FBC_CSCAN(
	1712	!isWORDCHAR_A(*s),
	1713	!isWORDCHAR_A(*s)
	1714	);
	1715	break;
	1716	case NALNUML:
	1717	REXEC_FBC_CSCAN_TAINT(
	1718	!isALNUM_LC_utf8((U8*)s),
	1719	!isALNUM_LC(*s)
	1720	);
	1721	break;
	1722	case SPACEU:
	1723	REXEC_FBC_CSCAN_PRELOAD(
	1724	LOAD_UTF8_CHARCLASS_PERL_SPACE(),
	1725	s == ' ' \|\| swash_fetch(RE_utf8_perl_space,(U8)s, utf8_target),
	1726	isSPACE_L1((U8) *s)
	1727	);
	1728	break;
	1729	case SPACE:
	1730	REXEC_FBC_CSCAN_PRELOAD(
	1731	LOAD_UTF8_CHARCLASS_PERL_SPACE(),
	1732	s == ' ' \|\| swash_fetch(RE_utf8_perl_space,(U8)s, utf8_target),
	1733	isSPACE((U8) *s)
	1734	);
	1735	break;
	1736	case SPACEA:
	1737	/* Don't need to worry about utf8, as it can match only a single
	1738	* byte invariant character */
	1739	REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
	1740	break;
	1741	case SPACEL:
	1742	REXEC_FBC_CSCAN_TAINT(
	1743	isSPACE_LC_utf8((U8*)s),
	1744	isSPACE_LC(*s)
	1745	);
	1746	break;
	1747	case NSPACEU:
	1748	REXEC_FBC_CSCAN_PRELOAD(
	1749	LOAD_UTF8_CHARCLASS_PERL_SPACE(),
	1750	!( s == ' ' \|\| swash_fetch(RE_utf8_perl_space,(U8)s, utf8_target)),
	1751	! isSPACE_L1((U8) *s)
	1752	);
	1753	break;
	1754	case NSPACE:
	1755	REXEC_FBC_CSCAN_PRELOAD(
	1756	LOAD_UTF8_CHARCLASS_PERL_SPACE(),
	1757	!(s == ' ' \|\| swash_fetch(RE_utf8_perl_space,(U8)s, utf8_target)),
	1758	! isSPACE((U8) *s)
	1759	);
	1760	break;
	1761	case NSPACEA:
	1762	REXEC_FBC_CSCAN(
	1763	!isSPACE_A(*s),
	1764	!isSPACE_A(*s)
	1765	);
	1766	break;
	1767	case NSPACEL:
	1768	REXEC_FBC_CSCAN_TAINT(
	1769	!isSPACE_LC_utf8((U8*)s),
	1770	!isSPACE_LC(*s)
	1771	);
	1772	break;
	1773	case DIGIT:
	1774	REXEC_FBC_CSCAN_PRELOAD(
	1775	LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
	1776	swash_fetch(RE_utf8_posix_digit,(U8*)s, utf8_target),
	1777	isDIGIT(*s)
	1778	);
	1779	break;
	1780	case DIGITA:
	1781	/* Don't need to worry about utf8, as it can match only a single
	1782	* byte invariant character */
	1783	REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
	1784	break;
	1785	case DIGITL:
	1786	REXEC_FBC_CSCAN_TAINT(
	1787	isDIGIT_LC_utf8((U8*)s),
	1788	isDIGIT_LC(*s)
	1789	);
	1790	break;
	1791	case NDIGIT:
	1792	REXEC_FBC_CSCAN_PRELOAD(
	1793	LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
	1794	!swash_fetch(RE_utf8_posix_digit,(U8*)s, utf8_target),
	1795	!isDIGIT(*s)
	1796	);
	1797	break;
	1798	case NDIGITA:
	1799	REXEC_FBC_CSCAN(
	1800	!isDIGIT_A(*s),
	1801	!isDIGIT_A(*s)
	1802	);
	1803	break;
	1804	case NDIGITL:
	1805	REXEC_FBC_CSCAN_TAINT(
	1806	!isDIGIT_LC_utf8((U8*)s),
	1807	!isDIGIT_LC(*s)
	1808	);
	1809	break;
	1810	case LNBREAK:
	1811	REXEC_FBC_CSCAN(
	1812	is_LNBREAK_utf8(s),
	1813	is_LNBREAK_latin1(s)
	1814	);
	1815	break;
	1816	case VERTWS:
	1817	REXEC_FBC_CSCAN(
	1818	is_VERTWS_utf8(s),
	1819	is_VERTWS_latin1(s)
	1820	);
	1821	break;
	1822	case NVERTWS:
	1823	REXEC_FBC_CSCAN(
	1824	!is_VERTWS_utf8(s),
	1825	!is_VERTWS_latin1(s)
	1826	);
	1827	break;
	1828	case HORIZWS:
	1829	REXEC_FBC_CSCAN(
	1830	is_HORIZWS_utf8(s),
	1831	is_HORIZWS_latin1(s)
	1832	);
	1833	break;
	1834	case NHORIZWS:
	1835	REXEC_FBC_CSCAN(
	1836	!is_HORIZWS_utf8(s),
	1837	!is_HORIZWS_latin1(s)
	1838	);
	1839	break;
	1840	case AHOCORASICKC:
	1841	case AHOCORASICK:
	1842	{
	1843	DECL_TRIE_TYPE(c);
	1844	/* what trie are we using right now */
	1845	reg_ac_data *aho
	1846	= (reg_ac_data*)progi->data->data[ ARG( c ) ];
	1847	reg_trie_data *trie
	1848	= (reg_trie_data*)progi->data->data[ aho->trie ];
	1849	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1850
	1851	const char *last_start = strend - trie->minlen;
	1852	#ifdef DEBUGGING
	1853	const char *real_start = s;
	1854	#endif
	1855	STRLEN maxlen = trie->maxlen;
	1856	SV *sv_points;
	1857	U8 *points; / map of where we were in the input string
	1858	when reading a given char. For ASCII this
	1859	is unnecessary overhead as the relationship
	1860	is always 1:1, but for Unicode, especially
	1861	case folded Unicode this is not true. */
	1862	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1863	U8 *bitmap=NULL;
	1864
	1865
	1866	GET_RE_DEBUG_FLAGS_DECL;
	1867
	1868	/* We can't just allocate points here. We need to wrap it in
	1869	* an SV so it gets freed properly if there is a croak while
	1870	* running the match */
	1871	ENTER;
	1872	SAVETMPS;
	1873	sv_points=newSV(maxlen * sizeof(U8 *));
	1874	SvCUR_set(sv_points,
	1875	maxlen * sizeof(U8 *));
	1876	SvPOK_on(sv_points);
	1877	sv_2mortal(sv_points);
	1878	points=(U8**)SvPV_nolen(sv_points );
	1879	if ( trie_type != trie_utf8_fold
	1880	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1881	{
	1882	if (trie->bitmap)
	1883	bitmap=(U8*)trie->bitmap;
	1884	else
	1885	bitmap=(U8*)ANYOF_BITMAP(c);
	1886	}
	1887	/* this is the Aho-Corasick algorithm modified a touch
	1888	to include special handling for long "unknown char"
	1889	sequences. The basic idea being that we use AC as long
	1890	as we are dealing with a possible matching char, when
	1891	we encounter an unknown char (and we have not encountered
	1892	an accepting state) we scan forward until we find a legal
	1893	starting char.
	1894	AC matching is basically that of trie matching, except
	1895	that when we encounter a failing transition, we fall back
	1896	to the current states "fail state", and try the current char
	1897	again, a process we repeat until we reach the root state,
	1898	state 1, or a legal transition. If we fail on the root state
	1899	then we can either terminate if we have reached an accepting
	1900	state previously, or restart the entire process from the beginning
	1901	if we have not.
	1902
	1903	*/
	1904	while (s <= last_start) {
	1905	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1906	U8 uc = (U8)s;
	1907	U16 charid = 0;
	1908	U32 base = 1;
	1909	U32 state = 1;
	1910	UV uvc = 0;
	1911	STRLEN len = 0;
	1912	STRLEN foldlen = 0;
	1913	U8 uscan = (U8)NULL;
	1914	U8 *leftmost = NULL;
	1915	#ifdef DEBUGGING
	1916	U32 accepted_word= 0;
	1917	#endif
	1918	U32 pointpos = 0;
	1919
	1920	while ( state && uc <= (U8*)strend ) {
	1921	int failed=0;
	1922	U32 word = aho->states[ state ].wordnum;
	1923
	1924	if( state==1 ) {
	1925	if ( bitmap ) {
	1926	DEBUG_TRIE_EXECUTE_r(
	1927	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1928	dump_exec_pos( (char *)uc, c, strend, real_start,
	1929	(char *)uc, utf8_target );
	1930	PerlIO_printf( Perl_debug_log,
	1931	" Scanning for legal start char...\n");
	1932	}
	1933	);
	1934	if (utf8_target) {
	1935	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1936	uc += UTF8SKIP(uc);
	1937	}
	1938	} else {
	1939	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1940	uc++;
	1941	}
	1942	}
	1943	s= (char *)uc;
	1944	}
	1945	if (uc >(U8*)last_start) break;
	1946	}
	1947
	1948	if ( word ) {
	1949	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1950	if (!leftmost \|\| lpos < leftmost) {
	1951	DEBUG_r(accepted_word=word);
	1952	leftmost= lpos;
	1953	}
	1954	if (base==0) break;
	1955
	1956	}
	1957	points[pointpos++ % maxlen]= uc;
	1958	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	1959	uscan, len, uvc, charid, foldlen,
	1960	foldbuf, uniflags);
	1961	DEBUG_TRIE_EXECUTE_r({
	1962	dump_exec_pos( (char *)uc, c, strend, real_start,
	1963	s, utf8_target );
	1964	PerlIO_printf(Perl_debug_log,
	1965	" Charid:%3u CP:%4"UVxf" ",
	1966	charid, uvc);
	1967	});
	1968
	1969	do {
	1970	#ifdef DEBUGGING
	1971	word = aho->states[ state ].wordnum;
	1972	#endif
	1973	base = aho->states[ state ].trans.base;
	1974
	1975	DEBUG_TRIE_EXECUTE_r({
	1976	if (failed)
	1977	dump_exec_pos( (char *)uc, c, strend, real_start,
	1978	s, utf8_target );
	1979	PerlIO_printf( Perl_debug_log,
	1980	"%sState: %4"UVxf", word=%"UVxf,
	1981	failed ? " Fail transition to " : "",
	1982	(UV)state, (UV)word);
	1983	});
	1984	if ( base ) {
	1985	U32 tmp;
	1986	I32 offset;
	1987	if (charid &&
	1988	( ((offset = base + charid
	1989	- 1 - trie->uniquecharcount)) >= 0)
	1990	&& ((U32)offset < trie->lasttrans)
	1991	&& trie->trans[offset].check == state
	1992	&& (tmp=trie->trans[offset].next))
	1993	{
	1994	DEBUG_TRIE_EXECUTE_r(
	1995	PerlIO_printf( Perl_debug_log," - legal\n"));
	1996	state = tmp;
	1997	break;
	1998	}
	1999	else {
	2000	DEBUG_TRIE_EXECUTE_r(
	2001	PerlIO_printf( Perl_debug_log," - fail\n"));
	2002	failed = 1;
	2003	state = aho->fail[state];
	2004	}
	2005	}
	2006	else {
	2007	/* we must be accepting here */
	2008	DEBUG_TRIE_EXECUTE_r(
	2009	PerlIO_printf( Perl_debug_log," - accepting\n"));
	2010	failed = 1;
	2011	break;
	2012	}
	2013	} while(state);
	2014	uc += len;
	2015	if (failed) {
	2016	if (leftmost)
	2017	break;
	2018	if (!state) state = 1;
	2019	}
	2020	}
	2021	if ( aho->states[ state ].wordnum ) {
	2022	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	2023	if (!leftmost \|\| lpos < leftmost) {
	2024	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	2025	leftmost = lpos;
	2026	}
	2027	}
	2028	if (leftmost) {
	2029	s = (char*)leftmost;
	2030	DEBUG_TRIE_EXECUTE_r({
	2031	PerlIO_printf(
	2032	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	2033	(UV)accepted_word, (IV)(s - real_start)
	2034	);
	2035	});
	2036	if (!reginfo \|\| regtry(reginfo, &s)) {
	2037	FREETMPS;
	2038	LEAVE;
	2039	goto got_it;
	2040	}
	2041	s = HOPc(s,1);
	2042	DEBUG_TRIE_EXECUTE_r({
	2043	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	2044	});
	2045	} else {
	2046	DEBUG_TRIE_EXECUTE_r(
	2047	PerlIO_printf( Perl_debug_log,"No match.\n"));
	2048	break;
	2049	}
	2050	}
	2051	FREETMPS;
	2052	LEAVE;
	2053	}
	2054	break;
	2055	default:
	2056	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2057	break;
	2058	}
	2059	return 0;
	2060	got_it:
	2061	return s;
	2062	}
	2063
	2064
	2065	/*
	2066	- regexec_flags - match a regexp against a string
	2067	*/
	2068	I32
	2069	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, register char strend,
	2070	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	2071	/* strend: pointer to null at end of string */
	2072	/* strbeg: real beginning of string */
	2073	/* minend: end of match must be >=minend after stringarg. */
	2074	/* data: May be used for some additional optimizations.
	2075	Currently its only used, with a U32 cast, for transmitting
	2076	the ganch offset when doing a /g match. This will change */
	2077	/* nosave: For optimizations. */
	2078	{
	2079	dVAR;
	2080	struct regexp const prog = (struct regexp )SvANY(rx);
	2081	/register/ char *s;
	2082	register regnode *c;
	2083	/register/ char *startpos = stringarg;
	2084	I32 minlen; /* must match at least this many chars */
	2085	I32 dontbother = 0; /* how many characters not to try at end */
	2086	I32 end_shift = 0; /* Same for the end. / / CC */
	2087	I32 scream_pos = -1; /* Internal iterator of scream. */
	2088	char *scream_olds = NULL;
	2089	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2090	I32 multiline;
	2091	RXi_GET_DECL(prog,progi);
	2092	regmatch_info reginfo; /* create some info to pass to regtry etc */
	2093	regexp_paren_pair *swap = NULL;
	2094	GET_RE_DEBUG_FLAGS_DECL;
	2095
	2096	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2097	PERL_UNUSED_ARG(data);
	2098
	2099	/* Be paranoid... */
	2100	if (prog == NULL \|\| startpos == NULL) {
	2101	Perl_croak(aTHX_ "NULL regexp parameter");
	2102	return 0;
	2103	}
	2104
	2105	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2106	reginfo.prog = rx; /* Yes, sorry that this is confusing. */
	2107
	2108	RX_MATCH_UTF8_set(rx, utf8_target);
	2109	DEBUG_EXECUTE_r(
	2110	debug_start_match(rx, utf8_target, startpos, strend,
	2111	"Matching");
	2112	);
	2113
	2114	minlen = prog->minlen;
	2115
	2116	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2117	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2118	"String too short [regexec_flags]...\n"));
	2119	goto phooey;
	2120	}
	2121
	2122
	2123	/* Check validity of program. */
	2124	if (UCHARAT(progi->program) != REG_MAGIC) {
	2125	Perl_croak(aTHX_ "corrupted regexp program");
	2126	}
	2127
	2128	PL_reg_flags = 0;
	2129	PL_reg_eval_set = 0;
	2130	PL_reg_maxiter = 0;
	2131
	2132	if (RX_UTF8(rx))
	2133	PL_reg_flags \|= RF_utf8;
	2134
	2135	/* Mark beginning of line for ^ and lookbehind. */
	2136	reginfo.bol = startpos; /* XXX not used ??? */
	2137	PL_bostr = strbeg;
	2138	reginfo.sv = sv;
	2139
	2140	/* Mark end of line for $ (and such) */
	2141	PL_regeol = strend;
	2142
	2143	/* see how far we have to get to not match where we matched before */
	2144	reginfo.till = startpos+minend;
	2145
	2146	/* If there is a "must appear" string, look for it. */
	2147	s = startpos;
	2148
	2149	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	2150	MAGIC *mg;
	2151	if (flags & REXEC_IGNOREPOS){ /* Means: check only at start */
	2152	reginfo.ganch = startpos + prog->gofs;
	2153	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2154	"GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
	2155	} else if (sv && SvTYPE(sv) >= SVt_PVMG
	2156	&& SvMAGIC(sv)
	2157	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	2158	&& mg->mg_len >= 0) {
	2159	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	2160	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2161	"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
	2162
	2163	if (prog->extflags & RXf_ANCH_GPOS) {
	2164	if (s > reginfo.ganch)
	2165	goto phooey;
	2166	s = reginfo.ganch - prog->gofs;
	2167	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2168	"GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
	2169	if (s < strbeg)
	2170	goto phooey;
	2171	}
	2172	}
	2173	else if (data) {
	2174	reginfo.ganch = strbeg + PTR2UV(data);
	2175	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2176	"GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
	2177
	2178	} else { /* pos() not defined */
	2179	reginfo.ganch = strbeg;
	2180	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2181	"GPOS: reginfo.ganch = strbeg\n"));
	2182	}
	2183	}
	2184	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2185	/* We have to be careful. If the previous successful match
	2186	was from this regex we don't want a subsequent partially
	2187	successful match to clobber the old results.
	2188	So when we detect this possibility we add a swap buffer
	2189	to the re, and switch the buffer each match. If we fail
	2190	we switch it back, otherwise we leave it swapped.
	2191	*/
	2192	swap = prog->offs;
	2193	/* do we need a save destructor here for eval dies? */
	2194	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2195	}
	2196	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	2197	re_scream_pos_data d;
	2198
	2199	d.scream_olds = &scream_olds;
	2200	d.scream_pos = &scream_pos;
	2201	s = re_intuit_start(rx, sv, s, strend, flags, &d);
	2202	if (!s) {
	2203	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	2204	goto phooey; /* not present */
	2205	}
	2206	}
	2207
	2208
	2209
	2210	/* Simplest case: anchored match need be tried only once. */
	2211	/* [unless only anchor is BOL and multiline is set] */
	2212	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2213	if (s == startpos && regtry(&reginfo, &startpos))
	2214	goto got_it;
	2215	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2216	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2217	{
	2218	char *end;
	2219
	2220	if (minlen)
	2221	dontbother = minlen - 1;
	2222	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2223	/* for multiline we only have to try after newlines */
	2224	if (prog->check_substr \|\| prog->check_utf8) {
	2225	/* because of the goto we can not easily reuse the macros for bifurcating the
	2226	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2227	if (utf8_target) {
	2228	if (s == startpos)
	2229	goto after_try_utf8;
	2230	while (1) {
	2231	if (regtry(&reginfo, &s)) {
	2232	goto got_it;
	2233	}
	2234	after_try_utf8:
	2235	if (s > end) {
	2236	goto phooey;
	2237	}
	2238	if (prog->extflags & RXf_USE_INTUIT) {
	2239	s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
	2240	if (!s) {
	2241	goto phooey;
	2242	}
	2243	}
	2244	else {
	2245	s += UTF8SKIP(s);
	2246	}
	2247	}
	2248	} /* end search for check string in unicode */
	2249	else {
	2250	if (s == startpos) {
	2251	goto after_try_latin;
	2252	}
	2253	while (1) {
	2254	if (regtry(&reginfo, &s)) {
	2255	goto got_it;
	2256	}
	2257	after_try_latin:
	2258	if (s > end) {
	2259	goto phooey;
	2260	}
	2261	if (prog->extflags & RXf_USE_INTUIT) {
	2262	s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
	2263	if (!s) {
	2264	goto phooey;
	2265	}
	2266	}
	2267	else {
	2268	s++;
	2269	}
	2270	}
	2271	} /* end search for check string in latin*/
	2272	} /* end search for check string */
	2273	else { /* search for newline */
	2274	if (s > startpos) {
	2275	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2276	s--;
	2277	}
	2278	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2279	while (s < end) {
	2280	if (s++ == '\n') { / don't need PL_utf8skip here */
	2281	if (regtry(&reginfo, &s))
	2282	goto got_it;
	2283	}
	2284	}
	2285	} /* end search for newline */
	2286	} /* end anchored/multiline check string search */
	2287	goto phooey;
	2288	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2289	{
	2290	/* the warning about reginfo.ganch being used without initialization
	2291	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	2292	and we only enter this block when the same bit is set. */
	2293	char *tmp_s = reginfo.ganch - prog->gofs;
	2294
	2295	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
	2296	goto got_it;
	2297	goto phooey;
	2298	}
	2299
	2300	/* Messy cases: unanchored match. */
	2301	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2302	/* we have /x+whatever/ */
	2303	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
	2304	char ch;
	2305	#ifdef DEBUGGING
	2306	int did_match = 0;
	2307	#endif
	2308	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2309	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2310	ch = SvPVX_const(utf8_target ? prog->anchored_utf8 : prog->anchored_substr)[0];
	2311
	2312	if (utf8_target) {
	2313	REXEC_FBC_SCAN(
	2314	if (*s == ch) {
	2315	DEBUG_EXECUTE_r( did_match = 1 );
	2316	if (regtry(&reginfo, &s)) goto got_it;
	2317	s += UTF8SKIP(s);
	2318	while (s < strend && *s == ch)
	2319	s += UTF8SKIP(s);
	2320	}
	2321	);
	2322	}
	2323	else {
	2324	REXEC_FBC_SCAN(
	2325	if (*s == ch) {
	2326	DEBUG_EXECUTE_r( did_match = 1 );
	2327	if (regtry(&reginfo, &s)) goto got_it;
	2328	s++;
	2329	while (s < strend && *s == ch)
	2330	s++;
	2331	}
	2332	);
	2333	}
	2334	DEBUG_EXECUTE_r(if (!did_match)
	2335	PerlIO_printf(Perl_debug_log,
	2336	"Did not find anchored character...\n")
	2337	);
	2338	}
	2339	else if (prog->anchored_substr != NULL
	2340	\|\| prog->anchored_utf8 != NULL
	2341	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2342	&& prog->float_max_offset < strend - s)) {
	2343	SV *must;
	2344	I32 back_max;
	2345	I32 back_min;
	2346	char *last;
	2347	char last1; / Last position checked before */
	2348	#ifdef DEBUGGING
	2349	int did_match = 0;
	2350	#endif
	2351	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2352	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2353	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2354	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	2355	back_max = back_min = prog->anchored_offset;
	2356	} else {
	2357	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2358	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2359	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	2360	back_max = prog->float_max_offset;
	2361	back_min = prog->float_min_offset;
	2362	}
	2363
	2364
	2365	if (must == &PL_sv_undef)
	2366	/* could not downgrade utf8 check substring, so must fail */
	2367	goto phooey;
	2368
	2369	if (back_min<0) {
	2370	last = strend;
	2371	} else {
	2372	last = HOP3c(strend, /* Cannot start after this */
	2373	-(I32)(CHR_SVLEN(must)
	2374	- (SvTAIL(must) != 0) + back_min), strbeg);
	2375	}
	2376	if (s > PL_bostr)
	2377	last1 = HOPc(s, -1);
	2378	else
	2379	last1 = s - 1; /* bogus */
	2380
	2381	/* XXXX check_substr already used to find "s", can optimize if
	2382	check_substr==must. */
	2383	scream_pos = -1;
	2384	dontbother = end_shift;
	2385	strend = HOPc(strend, -dontbother);
	2386	while ( (s <= last) &&
	2387	((flags & REXEC_SCREAM)
	2388	? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
	2389	end_shift, &scream_pos, 0))
	2390	: (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2391	(unsigned char*)strend, must,
	2392	multiline ? FBMrf_MULTILINE : 0))) ) {
	2393	/* we may be pointing at the wrong string */
	2394	if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
	2395	s = strbeg + (s - SvPVX_const(sv));
	2396	DEBUG_EXECUTE_r( did_match = 1 );
	2397	if (HOPc(s, -back_max) > last1) {
	2398	last1 = HOPc(s, -back_min);
	2399	s = HOPc(s, -back_max);
	2400	}
	2401	else {
	2402	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2403
	2404	last1 = HOPc(s, -back_min);
	2405	s = t;
	2406	}
	2407	if (utf8_target) {
	2408	while (s <= last1) {
	2409	if (regtry(&reginfo, &s))
	2410	goto got_it;
	2411	s += UTF8SKIP(s);
	2412	}
	2413	}
	2414	else {
	2415	while (s <= last1) {
	2416	if (regtry(&reginfo, &s))
	2417	goto got_it;
	2418	s++;
	2419	}
	2420	}
	2421	}
	2422	DEBUG_EXECUTE_r(if (!did_match) {
	2423	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2424	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2425	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2426	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2427	? "anchored" : "floating"),
	2428	quoted, RE_SV_TAIL(must));
	2429	});
	2430	goto phooey;
	2431	}
	2432	else if ( (c = progi->regstclass) ) {
	2433	if (minlen) {
	2434	const OPCODE op = OP(progi->regstclass);
	2435	/* don't bother with what can't match */
	2436	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2437	strend = HOPc(strend, -(minlen - 1));
	2438	}
	2439	DEBUG_EXECUTE_r({
	2440	SV * const prop = sv_newmortal();
	2441	regprop(prog, prop, c);
	2442	{
	2443	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2444	s,strend-s,60);
	2445	PerlIO_printf(Perl_debug_log,
	2446	"Matching stclass %.*s against %s (%d bytes)\n",
	2447	(int)SvCUR(prop), SvPVX_const(prop),
	2448	quoted, (int)(strend - s));
	2449	}
	2450	});
	2451	if (find_byclass(prog, c, s, strend, &reginfo))
	2452	goto got_it;
	2453	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2454	}
	2455	else {
	2456	dontbother = 0;
	2457	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2458	/* Trim the end. */
	2459	char *last;
	2460	SV* float_real;
	2461
	2462	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2463	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2464	float_real = utf8_target ? prog->float_utf8 : prog->float_substr;
	2465
	2466	if (flags & REXEC_SCREAM) {
	2467	last = screaminstr(sv, float_real, s - strbeg,
	2468	end_shift, &scream_pos, 1); /* last one */
	2469	if (!last)
	2470	last = scream_olds; /* Only one occurrence. */
	2471	/* we may be pointing at the wrong string */
	2472	else if (RXp_MATCH_COPIED(prog))
	2473	s = strbeg + (s - SvPVX_const(sv));
	2474	}
	2475	else {
	2476	STRLEN len;
	2477	const char * const little = SvPV_const(float_real, len);
	2478
	2479	if (SvTAIL(float_real)) {
	2480	if (memEQ(strend - len + 1, little, len - 1))
	2481	last = strend - len + 1;
	2482	else if (!multiline)
	2483	last = memEQ(strend - len, little, len)
	2484	? strend - len : NULL;
	2485	else
	2486	goto find_last;
	2487	} else {
	2488	find_last:
	2489	if (len)
	2490	last = rninstr(s, strend, little, little + len);
	2491	else
	2492	last = strend; /* matching "$" */
	2493	}
	2494	}
	2495	if (last == NULL) {
	2496	DEBUG_EXECUTE_r(
	2497	PerlIO_printf(Perl_debug_log,
	2498	"%sCan't trim the tail, match fails (should not happen)%s\n",
	2499	PL_colors[4], PL_colors[5]));
	2500	goto phooey; /* Should not happen! */
	2501	}
	2502	dontbother = strend - last + prog->float_min_offset;
	2503	}
	2504	if (minlen && (dontbother < minlen))
	2505	dontbother = minlen - 1;
	2506	strend -= dontbother; /* this one's always in bytes! */
	2507	/* We don't know much -- general case. */
	2508	if (utf8_target) {
	2509	for (;;) {
	2510	if (regtry(&reginfo, &s))
	2511	goto got_it;
	2512	if (s >= strend)
	2513	break;
	2514	s += UTF8SKIP(s);
	2515	};
	2516	}
	2517	else {
	2518	do {
	2519	if (regtry(&reginfo, &s))
	2520	goto got_it;
	2521	} while (s++ < strend);
	2522	}
	2523	}
	2524
	2525	/* Failure. */
	2526	goto phooey;
	2527
	2528	got_it:
	2529	Safefree(swap);
	2530	RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
	2531
	2532	if (PL_reg_eval_set)
	2533	restore_pos(aTHX_ prog);
	2534	if (RXp_PAREN_NAMES(prog))
	2535	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2536
	2537	/* make sure $`, $&, $', and $digit will work later */
	2538	if ( !(flags & REXEC_NOT_FIRST) ) {
	2539	RX_MATCH_COPY_FREE(rx);
	2540	if (flags & REXEC_COPY_STR) {
	2541	const I32 i = PL_regeol - startpos + (stringarg - strbeg);
	2542	#ifdef PERL_OLD_COPY_ON_WRITE
	2543	if ((SvIsCOW(sv)
	2544	\|\| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
	2545	if (DEBUG_C_TEST) {
	2546	PerlIO_printf(Perl_debug_log,
	2547	"Copy on write: regexp capture, type %d\n",
	2548	(int) SvTYPE(sv));
	2549	}
	2550	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2551	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2552	assert (SvPOKp(prog->saved_copy));
	2553	} else
	2554	#endif
	2555	{
	2556	RX_MATCH_COPIED_on(rx);
	2557	s = savepvn(strbeg, i);
	2558	prog->subbeg = s;
	2559	}
	2560	prog->sublen = i;
	2561	}
	2562	else {
	2563	prog->subbeg = strbeg;
	2564	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2565	}
	2566	}
	2567
	2568	return 1;
	2569
	2570	phooey:
	2571	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2572	PL_colors[4], PL_colors[5]));
	2573	if (PL_reg_eval_set)
	2574	restore_pos(aTHX_ prog);
	2575	if (swap) {
	2576	/* we failed :-( roll it back */
	2577	Safefree(prog->offs);
	2578	prog->offs = swap;
	2579	}
	2580
	2581	return 0;
	2582	}
	2583
	2584
	2585	/*
	2586	- regtry - try match at specific point
	2587	*/
	2588	STATIC I32 /* 0 failure, 1 success */
	2589	S_regtry(pTHX_ regmatch_info reginfo, char *startpos)
	2590	{
	2591	dVAR;
	2592	CHECKPOINT lastcp;
	2593	REGEXP *const rx = reginfo->prog;
	2594	regexp const prog = (struct regexp )SvANY(rx);
	2595	RXi_GET_DECL(prog,progi);
	2596	GET_RE_DEBUG_FLAGS_DECL;
	2597
	2598	PERL_ARGS_ASSERT_REGTRY;
	2599
	2600	reginfo->cutpoint=NULL;
	2601
	2602	if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
	2603	MAGIC *mg;
	2604
	2605	PL_reg_eval_set = RS_init;
	2606	DEBUG_EXECUTE_r(DEBUG_s(
	2607	PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
	2608	(IV)(PL_stack_sp - PL_stack_base));
	2609	));
	2610	SAVESTACK_CXPOS();
	2611	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
	2612	/* Otherwise OP_NEXTSTATE will free whatever on stack now. */
	2613	SAVETMPS;
	2614	/* Apparently this is not needed, judging by wantarray. */
	2615	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
	2616	cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
	2617
	2618	if (reginfo->sv) {
	2619	/* Make $_ available to executed code. */
	2620	if (reginfo->sv != DEFSV) {
	2621	SAVE_DEFSV;
	2622	DEFSV_set(reginfo->sv);
	2623	}
	2624
	2625	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2626	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2627	/* prepare for quick setting of pos */
	2628	#ifdef PERL_OLD_COPY_ON_WRITE
	2629	if (SvIsCOW(reginfo->sv))
	2630	sv_force_normal_flags(reginfo->sv, 0);
	2631	#endif
	2632	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2633	&PL_vtbl_mglob, NULL, 0);
	2634	mg->mg_len = -1;
	2635	}
	2636	PL_reg_magic = mg;
	2637	PL_reg_oldpos = mg->mg_len;
	2638	SAVEDESTRUCTOR_X(restore_pos, prog);
	2639	}
	2640	if (!PL_reg_curpm) {
	2641	Newxz(PL_reg_curpm, 1, PMOP);
	2642	#ifdef USE_ITHREADS
	2643	{
	2644	SV* const repointer = &PL_sv_undef;
	2645	/* this regexp is also owned by the new PL_reg_curpm, which
	2646	will try to free it. */
	2647	av_push(PL_regex_padav, repointer);
	2648	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2649	PL_regex_pad = AvARRAY(PL_regex_padav);
	2650	}
	2651	#endif
	2652	}
	2653	#ifdef USE_ITHREADS
	2654	/* It seems that non-ithreads works both with and without this code.
	2655	So for efficiency reasons it seems best not to have the code
	2656	compiled when it is not needed. */
	2657	/* This is safe against NULLs: */
	2658	ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
	2659	/* PM_reg_curpm owns a reference to this regexp. */
	2660	ReREFCNT_inc(rx);
	2661	#endif
	2662	PM_SETRE(PL_reg_curpm, rx);
	2663	PL_reg_oldcurpm = PL_curpm;
	2664	PL_curpm = PL_reg_curpm;
	2665	if (RXp_MATCH_COPIED(prog)) {
	2666	/* Here is a serious problem: we cannot rewrite subbeg,
	2667	since it may be needed if this match fails. Thus
	2668	$` inside (?{}) could fail... */
	2669	PL_reg_oldsaved = prog->subbeg;
	2670	PL_reg_oldsavedlen = prog->sublen;
	2671	#ifdef PERL_OLD_COPY_ON_WRITE
	2672	PL_nrs = prog->saved_copy;
	2673	#endif
	2674	RXp_MATCH_COPIED_off(prog);
	2675	}
	2676	else
	2677	PL_reg_oldsaved = NULL;
	2678	prog->subbeg = PL_bostr;
	2679	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2680	}
	2681	DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
	2682	prog->offs[0].start = *startpos - PL_bostr;
	2683	PL_reginput = *startpos;
	2684	PL_reglastparen = &prog->lastparen;
	2685	PL_reglastcloseparen = &prog->lastcloseparen;
	2686	prog->lastparen = 0;
	2687	prog->lastcloseparen = 0;
	2688	PL_regsize = 0;
	2689	PL_regoffs = prog->offs;
	2690	if (PL_reg_start_tmpl <= prog->nparens) {
	2691	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
	2692	if(PL_reg_start_tmp)
	2693	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2694	else
	2695	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2696	}
	2697
	2698	/* XXXX What this code is doing here?!!! There should be no need
	2699	to do this again and again, PL_reglastparen should take care of
	2700	this! --ilya*/
	2701
	2702	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2703	* Actually, the code in regcppop() (which Ilya may be meaning by
	2704	* PL_reglastparen), is not needed at all by the test suite
	2705	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2706	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2707	* Meanwhile, this code is needed for the
	2708	* above-mentioned test suite tests to succeed. The common theme
	2709	* on those tests seems to be returning null fields from matches.
	2710	* --jhi updated by dapm */
	2711	#if 1
	2712	if (prog->nparens) {
	2713	regexp_paren_pair *pp = PL_regoffs;
	2714	register I32 i;
	2715	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
	2716	++pp;
	2717	pp->start = -1;
	2718	pp->end = -1;
	2719	}
	2720	}
	2721	#endif
	2722	REGCP_SET(lastcp);
	2723	if (regmatch(reginfo, progi->program + 1)) {
	2724	PL_regoffs[0].end = PL_reginput - PL_bostr;
	2725	return 1;
	2726	}
	2727	if (reginfo->cutpoint)
	2728	*startpos= reginfo->cutpoint;
	2729	REGCP_UNWIND(lastcp);
	2730	return 0;
	2731	}
	2732
	2733
	2734	#define sayYES goto yes
	2735	#define sayNO goto no
	2736	#define sayNO_SILENT goto no_silent
	2737
	2738	/* we dont use STMT_START/END here because it leads to
	2739	"unreachable code" warnings, which are bogus, but distracting. */
	2740	#define CACHEsayNO \
	2741	if (ST.cache_mask) \
	2742	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2743	sayNO
	2744
	2745	/* this is used to determine how far from the left messages like
	2746	'failed...' are printed. It should be set such that messages
	2747	are inline with the regop output that created them.
	2748	*/
	2749	#define REPORT_CODE_OFF 32
	2750
	2751
	2752	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2753	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2754
	2755	#define SLAB_FIRST(s) (&(s)->states[0])
	2756	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2757
	2758	/* grab a new slab and return the first slot in it */
	2759
	2760	STATIC regmatch_state *
	2761	S_push_slab(pTHX)
	2762	{
	2763	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2764	dMY_CXT;
	2765	#endif
	2766	regmatch_slab *s = PL_regmatch_slab->next;
	2767	if (!s) {
	2768	Newx(s, 1, regmatch_slab);
	2769	s->prev = PL_regmatch_slab;
	2770	s->next = NULL;
	2771	PL_regmatch_slab->next = s;
	2772	}
	2773	PL_regmatch_slab = s;
	2774	return SLAB_FIRST(s);
	2775	}
	2776
	2777
	2778	/* push a new state then goto it */
	2779
	2780	#define PUSH_STATE_GOTO(state, node) \
	2781	scan = node; \
	2782	st->resume_state = state; \
	2783	goto push_state;
	2784
	2785	/* push a new state with success backtracking, then goto it */
	2786
	2787	#define PUSH_YES_STATE_GOTO(state, node) \
	2788	scan = node; \
	2789	st->resume_state = state; \
	2790	goto push_yes_state;
	2791
	2792
	2793
	2794	/*
	2795
	2796	regmatch() - main matching routine
	2797
	2798	This is basically one big switch statement in a loop. We execute an op,
	2799	set 'next' to point the next op, and continue. If we come to a point which
	2800	we may need to backtrack to on failure such as (A\|B\|C), we push a
	2801	backtrack state onto the backtrack stack. On failure, we pop the top
	2802	state, and re-enter the loop at the state indicated. If there are no more
	2803	states to pop, we return failure.
	2804
	2805	Sometimes we also need to backtrack on success; for example /A+/, where
	2806	after successfully matching one A, we need to go back and try to
	2807	match another one; similarly for lookahead assertions: if the assertion
	2808	completes successfully, we backtrack to the state just before the assertion
	2809	and then carry on. In these cases, the pushed state is marked as
	2810	'backtrack on success too'. This marking is in fact done by a chain of
	2811	pointers, each pointing to the previous 'yes' state. On success, we pop to
	2812	the nearest yes state, discarding any intermediate failure-only states.
	2813	Sometimes a yes state is pushed just to force some cleanup code to be
	2814	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	2815	it to free the inner regex.
	2816
	2817	Note that failure backtracking rewinds the cursor position, while
	2818	success backtracking leaves it alone.
	2819
	2820	A pattern is complete when the END op is executed, while a subpattern
	2821	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	2822	ops trigger the "pop to last yes state if any, otherwise return true"
	2823	behaviour.
	2824
	2825	A common convention in this function is to use A and B to refer to the two
	2826	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	2827	the subpattern to be matched possibly multiple times, while B is the entire
	2828	rest of the pattern. Variable and state names reflect this convention.
	2829
	2830	The states in the main switch are the union of ops and failure/success of
	2831	substates associated with with that op. For example, IFMATCH is the op
	2832	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	2833	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	2834	successfully matched A and IFMATCH_A_fail is a state saying that we have
	2835	just failed to match A. Resume states always come in pairs. The backtrack
	2836	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	2837	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	2838	on success or failure.
	2839
	2840	The struct that holds a backtracking state is actually a big union, with
	2841	one variant for each major type of op. The variable st points to the
	2842	top-most backtrack struct. To make the code clearer, within each
	2843	block of code we #define ST to alias the relevant union.
	2844
	2845	Here's a concrete example of a (vastly oversimplified) IFMATCH
	2846	implementation:
	2847
	2848	switch (state) {
	2849	....
	2850
	2851	#define ST st->u.ifmatch
	2852
	2853	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2854	ST.foo = ...; // some state we wish to save
	2855	...
	2856	// push a yes backtrack state with a resume value of
	2857	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	2858	// first node of A:
	2859	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
	2860	// NOTREACHED
	2861
	2862	case IFMATCH_A: // we have successfully executed A; now continue with B
	2863	next = B;
	2864	bar = ST.foo; // do something with the preserved value
	2865	break;
	2866
	2867	case IFMATCH_A_fail: // A failed, so the assertion failed
	2868	...; // do some housekeeping, then ...
	2869	sayNO; // propagate the failure
	2870
	2871	#undef ST
	2872
	2873	...
	2874	}
	2875
	2876	For any old-timers reading this who are familiar with the old recursive
	2877	approach, the code above is equivalent to:
	2878
	2879	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2880	{
	2881	int foo = ...
	2882	...
	2883	if (regmatch(A)) {
	2884	next = B;
	2885	bar = foo;
	2886	break;
	2887	}
	2888	...; // do some housekeeping, then ...
	2889	sayNO; // propagate the failure
	2890	}
	2891
	2892	The topmost backtrack state, pointed to by st, is usually free. If you
	2893	want to claim it, populate any ST.foo fields in it with values you wish to
	2894	save, then do one of
	2895
	2896	PUSH_STATE_GOTO(resume_state, node);
	2897	PUSH_YES_STATE_GOTO(resume_state, node);
	2898
	2899	which sets that backtrack state's resume value to 'resume_state', pushes a
	2900	new free entry to the top of the backtrack stack, then goes to 'node'.
	2901	On backtracking, the free slot is popped, and the saved state becomes the
	2902	new free state. An ST.foo field in this new top state can be temporarily
	2903	accessed to retrieve values, but once the main loop is re-entered, it
	2904	becomes available for reuse.
	2905
	2906	Note that the depth of the backtrack stack constantly increases during the
	2907	left-to-right execution of the pattern, rather than going up and down with
	2908	the pattern nesting. For example the stack is at its maximum at Z at the
	2909	end of the pattern, rather than at X in the following:
	2910
	2911	/(((X)+)+)+....(Y)+....Z/
	2912
	2913	The only exceptions to this are lookahead/behind assertions and the cut,
	2914	(?>A), which pop all the backtrack states associated with A before
	2915	continuing.
	2916
	2917	Backtrack state structs are allocated in slabs of about 4K in size.
	2918	PL_regmatch_state and st always point to the currently active state,
	2919	and PL_regmatch_slab points to the slab currently containing
	2920	PL_regmatch_state. The first time regmatch() is called, the first slab is
	2921	allocated, and is never freed until interpreter destruction. When the slab
	2922	is full, a new one is allocated and chained to the end. At exit from
	2923	regmatch(), slabs allocated since entry are freed.
	2924
	2925	*/
	2926
	2927
	2928	#define DEBUG_STATE_pp(pp) \
	2929	DEBUG_STATE_r({ \
	2930	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	2931	PerlIO_printf(Perl_debug_log, \
	2932	" %*s"pp" %s%s%s%s%s\n", \
	2933	depth*2, "", \
	2934	PL_reg_name[st->resume_state], \
	2935	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	2936	((st==yes_state) ? "Y" : ""), \
	2937	((st==mark_state) ? "M" : ""), \
	2938	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	2939	); \
	2940	});
	2941
	2942
	2943	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	2944
	2945	#ifdef DEBUGGING
	2946
	2947	STATIC void
	2948	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	2949	const char start, const char end, const char *blurb)
	2950	{
	2951	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	2952
	2953	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	2954
	2955	if (!PL_colorset)
	2956	reginitcolors();
	2957	{
	2958	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	2959	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	2960
	2961	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	2962	start, end - start, 60);
	2963
	2964	PerlIO_printf(Perl_debug_log,
	2965	"%s%s REx%s %s against %s\n",
	2966	PL_colors[4], blurb, PL_colors[5], s0, s1);
	2967
	2968	if (utf8_target\|\|utf8_pat)
	2969	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	2970	utf8_pat ? "pattern" : "",
	2971	utf8_pat && utf8_target ? " and " : "",
	2972	utf8_target ? "string" : ""
	2973	);
	2974	}
	2975	}
	2976
	2977	STATIC void
	2978	S_dump_exec_pos(pTHX_ const char *locinput,
	2979	const regnode *scan,
	2980	const char *loc_regeol,
	2981	const char *loc_bostr,
	2982	const char *loc_reg_starttry,
	2983	const bool utf8_target)
	2984	{
	2985	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	2986	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	2987	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	2988	/* The part of the string before starttry has one color
	2989	(pref0_len chars), between starttry and current
	2990	position another one (pref_len - pref0_len chars),
	2991	after the current position the third one.
	2992	We assume that pref0_len <= pref_len, otherwise we
	2993	decrease pref0_len. */
	2994	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	2995	? (5 + taill) - l : locinput - loc_bostr;
	2996	int pref0_len;
	2997
	2998	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	2999
	3000	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	3001	pref_len++;
	3002	pref0_len = pref_len - (locinput - loc_reg_starttry);
	3003	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	3004	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	3005	? (5 + taill) - pref_len : loc_regeol - locinput);
	3006	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	3007	l--;
	3008	if (pref0_len < 0)
	3009	pref0_len = 0;
	3010	if (pref0_len > pref_len)
	3011	pref0_len = pref_len;
	3012	{
	3013	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	3014
	3015	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	3016	(locinput - pref_len),pref0_len, 60, 4, 5);
	3017
	3018	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	3019	(locinput - pref_len + pref0_len),
	3020	pref_len - pref0_len, 60, 2, 3);
	3021
	3022	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	3023	locinput, loc_regeol - locinput, 10, 0, 1);
	3024
	3025	const STRLEN tlen=len0+len1+len2;
	3026	PerlIO_printf(Perl_debug_log,
	3027	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	3028	(IV)(locinput - loc_bostr),
	3029	len0, s0,
	3030	len1, s1,
	3031	(docolor ? "" : "> <"),
	3032	len2, s2,
	3033	(int)(tlen > 19 ? 0 : 19 - tlen),
	3034	"");
	3035	}
	3036	}
	3037
	3038	#endif
	3039
	3040	/* reg_check_named_buff_matched()
	3041	* Checks to see if a named buffer has matched. The data array of
	3042	* buffer numbers corresponding to the buffer is expected to reside
	3043	* in the regexp->data->data array in the slot stored in the ARG() of
	3044	* node involved. Note that this routine doesn't actually care about the
	3045	* name, that information is not preserved from compilation to execution.
	3046	* Returns the index of the leftmost defined buffer with the given name
	3047	* or 0 if non of the buffers matched.
	3048	*/
	3049	STATIC I32
	3050	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	3051	{
	3052	I32 n;
	3053	RXi_GET_DECL(rex,rexi);
	3054	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3055	I32 nums=(I32)SvPVX(sv_dat);
	3056
	3057	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3058
	3059	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3060	if ((I32)*PL_reglastparen >= nums[n] &&
	3061	PL_regoffs[nums[n]].end != -1)
	3062	{
	3063	return nums[n];
	3064	}
	3065	}
	3066	return 0;
	3067	}
	3068
	3069
	3070	/* free all slabs above current one - called during LEAVE_SCOPE */
	3071
	3072	STATIC void
	3073	S_clear_backtrack_stack(pTHX_ void *p)
	3074	{
	3075	regmatch_slab *s = PL_regmatch_slab->next;
	3076	PERL_UNUSED_ARG(p);
	3077
	3078	if (!s)
	3079	return;
	3080	PL_regmatch_slab->next = NULL;
	3081	while (s) {
	3082	regmatch_slab * const osl = s;
	3083	s = s->next;
	3084	Safefree(osl);
	3085	}
	3086	}
	3087
	3088
	3089	#define SETREX(Re1,Re2) \
	3090	if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
	3091	Re1 = (Re2)
	3092
	3093	STATIC I32 /* 0 failure, 1 success */
	3094	S_regmatch(pTHX_ regmatch_info reginfo, regnode prog)
	3095	{
	3096	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3097	dMY_CXT;
	3098	#endif
	3099	dVAR;
	3100	register const bool utf8_target = PL_reg_match_utf8;
	3101	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3102	REGEXP *rex_sv = reginfo->prog;
	3103	regexp rex = (struct regexp )SvANY(rex_sv);
	3104	RXi_GET_DECL(rex,rexi);
	3105	I32 oldsave;
	3106	/* the current state. This is a cached copy of PL_regmatch_state */
	3107	register regmatch_state *st;
	3108	/* cache heavy used fields of st in registers */
	3109	register regnode *scan;
	3110	register regnode *next;
	3111	register U32 n = 0; /* general value; init to avoid compiler warning */
	3112	register I32 ln = 0; /* len or last; init to avoid compiler warning */
	3113	register char *locinput = PL_reginput;
	3114	register I32 nextchr; /* is always set to UCHARAT(locinput) */
	3115
	3116	bool result = 0; /* return value of S_regmatch */
	3117	int depth = 0; /* depth of backtrack stack */
	3118	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3119	const U32 max_nochange_depth =
	3120	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3121	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3122	regmatch_state yes_state = NULL; / state to pop to on success of
	3123	subpattern */
	3124	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3125	the stack on success we can update the mark_state as we go */
	3126	regmatch_state mark_state = NULL; / last mark state we have seen */
	3127	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3128	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3129	U32 state_num;
	3130	bool no_final = 0; /* prevent failure from backtracking? */
	3131	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3132	char *startpoint = PL_reginput;
	3133	SV popmark = NULL; / are we looking for a mark? */
	3134	SV sv_commit = NULL; / last mark name seen in failure */
	3135	SV sv_yes_mark = NULL; / last mark name we have seen
	3136	during a successful match */
	3137	U32 lastopen = 0; /* last open we saw */
	3138	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3139	SV* const oreplsv = GvSV(PL_replgv);
	3140	/* these three flags are set by various ops to signal information to
	3141	* the very next op. They have a useful lifetime of exactly one loop
	3142	* iteration, and are not preserved or restored by state pushes/pops
	3143	*/
	3144	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3145	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3146	int logical = 0; /* the following EVAL is:
	3147	0: (?{...})
	3148	1: (?(?{...})X\|Y)
	3149	2: (??{...})
	3150	or the following IFMATCH/UNLESSM is:
	3151	false: plain (?=foo)
	3152	true: used as a condition: (?(?=foo))
	3153	*/
	3154	#ifdef DEBUGGING
	3155	GET_RE_DEBUG_FLAGS_DECL;
	3156	#endif
	3157
	3158	PERL_ARGS_ASSERT_REGMATCH;
	3159
	3160	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3161	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3162	}));
	3163	/* on first ever call to regmatch, allocate first slab */
	3164	if (!PL_regmatch_slab) {
	3165	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3166	PL_regmatch_slab->prev = NULL;
	3167	PL_regmatch_slab->next = NULL;
	3168	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3169	}
	3170
	3171	oldsave = PL_savestack_ix;
	3172	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	3173	SAVEVPTR(PL_regmatch_slab);
	3174	SAVEVPTR(PL_regmatch_state);
	3175
	3176	/* grab next free state slot */
	3177	st = ++PL_regmatch_state;
	3178	if (st > SLAB_LAST(PL_regmatch_slab))
	3179	st = PL_regmatch_state = S_push_slab(aTHX);
	3180
	3181	/* Note that nextchr is a byte even in UTF */
	3182	nextchr = UCHARAT(locinput);
	3183	scan = prog;
	3184	while (scan != NULL) {
	3185
	3186	DEBUG_EXECUTE_r( {
	3187	SV * const prop = sv_newmortal();
	3188	regnode *rnext=regnext(scan);
	3189	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3190	regprop(rex, prop, scan);
	3191
	3192	PerlIO_printf(Perl_debug_log,
	3193	"%3"IVdf":%*s%s(%"IVdf")\n",
	3194	(IV)(scan - rexi->program), depth*2, "",
	3195	SvPVX_const(prop),
	3196	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3197	0 : (IV)(rnext - rexi->program));
	3198	});
	3199
	3200	next = scan + NEXT_OFF(scan);
	3201	if (next == scan)
	3202	next = NULL;
	3203	state_num = OP(scan);
	3204
	3205	reenter_switch:
	3206
	3207	assert(PL_reglastparen == &rex->lastparen);
	3208	assert(PL_reglastcloseparen == &rex->lastcloseparen);
	3209	assert(PL_regoffs == rex->offs);
	3210
	3211	switch (state_num) {
	3212	case BOL:
	3213	if (locinput == PL_bostr)
	3214	{
	3215	/* reginfo->till = reginfo->bol; */
	3216	break;
	3217	}
	3218	sayNO;
	3219	case MBOL:
	3220	if (locinput == PL_bostr \|\|
	3221	((nextchr \|\| locinput < PL_regeol) && locinput[-1] == '\n'))
	3222	{
	3223	break;
	3224	}
	3225	sayNO;
	3226	case SBOL:
	3227	if (locinput == PL_bostr)
	3228	break;
	3229	sayNO;
	3230	case GPOS:
	3231	if (locinput == reginfo->ganch)
	3232	break;
	3233	sayNO;
	3234
	3235	case KEEPS:
	3236	/* update the startpoint */
	3237	st->u.keeper.val = PL_regoffs[0].start;
	3238	PL_reginput = locinput;
	3239	PL_regoffs[0].start = locinput - PL_bostr;
	3240	PUSH_STATE_GOTO(KEEPS_next, next);
	3241	/NOT-REACHED/
	3242	case KEEPS_next_fail:
	3243	/* rollback the start point change */
	3244	PL_regoffs[0].start = st->u.keeper.val;
	3245	sayNO_SILENT;
	3246	/NOT-REACHED/
	3247	case EOL:
	3248	goto seol;
	3249	case MEOL:
	3250	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3251	sayNO;
	3252	break;
	3253	case SEOL:
	3254	seol:
	3255	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3256	sayNO;
	3257	if (PL_regeol - locinput > 1)
	3258	sayNO;
	3259	break;
	3260	case EOS:
	3261	if (PL_regeol != locinput)
	3262	sayNO;
	3263	break;
	3264	case SANY:
	3265	if (!nextchr && locinput >= PL_regeol)
	3266	sayNO;
	3267	if (utf8_target) {
	3268	locinput += PL_utf8skip[nextchr];
	3269	if (locinput > PL_regeol)
	3270	sayNO;
	3271	nextchr = UCHARAT(locinput);
	3272	}
	3273	else
	3274	nextchr = UCHARAT(++locinput);
	3275	break;
	3276	case CANY:
	3277	if (!nextchr && locinput >= PL_regeol)
	3278	sayNO;
	3279	nextchr = UCHARAT(++locinput);
	3280	break;
	3281	case REG_ANY:
	3282	if ((!nextchr && locinput >= PL_regeol) \|\| nextchr == '\n')
	3283	sayNO;
	3284	if (utf8_target) {
	3285	locinput += PL_utf8skip[nextchr];
	3286	if (locinput > PL_regeol)
	3287	sayNO;
	3288	nextchr = UCHARAT(locinput);
	3289	}
	3290	else
	3291	nextchr = UCHARAT(++locinput);
	3292	break;
	3293
	3294	#undef ST
	3295	#define ST st->u.trie
	3296	case TRIEC:
	3297	/* In this case the charclass data is available inline so
	3298	we can fail fast without a lot of extra overhead.
	3299	*/
	3300	if (scan->flags == EXACT \|\| !utf8_target) {
	3301	if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
	3302	DEBUG_EXECUTE_r(
	3303	PerlIO_printf(Perl_debug_log,
	3304	"%*s %sfailed to match trie start class...%s\n",
	3305	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3306	);
	3307	sayNO_SILENT;
	3308	/* NOTREACHED */
	3309	}
	3310	}
	3311	/* FALL THROUGH */
	3312	case TRIE:
	3313	/* the basic plan of execution of the trie is:
	3314	* At the beginning, run though all the states, and
	3315	* find the longest-matching word. Also remember the position
	3316	* of the shortest matching word. For example, this pattern:
	3317	* 1 2 3 4 5
	3318	* ab\|a\|x\|abcd\|abc
	3319	* when matched against the string "abcde", will generate
	3320	* accept states for all words except 3, with the longest
	3321	* matching word being 4, and the shortest being 1 (with
	3322	* the position being after char 1 of the string).
	3323	*
	3324	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3325	* we run the remainder of the pattern; on each try setting
	3326	* the current position to the character following the word,
	3327	* returning to try the next word on failure.
	3328	*
	3329	* We avoid having to build a list of words at runtime by
	3330	* using a compile-time structure, wordinfo[].prev, which
	3331	* gives, for each word, the previous accepting word (if any).
	3332	* In the case above it would contain the mappings 1->2, 2->0,
	3333	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3334	* the longest word (4 above), a list of all words, by
	3335	* following the list of prev pointers; this gives us the
	3336	* unordered list 4,5,1,2. Then given the current word we have
	3337	* just tried, we can go through the list and find the
	3338	* next-biggest word to try (so if we just failed on word 2,
	3339	* the next in the list is 4).
	3340	*
	3341	* Since at runtime we don't record the matching position in
	3342	* the string for each word, we have to work that out for
	3343	* each word we're about to process. The wordinfo table holds
	3344	* the character length of each word; given that we recorded
	3345	* at the start: the position of the shortest word and its
	3346	* length in chars, we just need to move the pointer the
	3347	* difference between the two char lengths. Depending on
	3348	* Unicode status and folding, that's cheap or expensive.
	3349	*
	3350	* This algorithm is optimised for the case where are only a
	3351	* small number of accept states, i.e. 0,1, or maybe 2.
	3352	* With lots of accepts states, and having to try all of them,
	3353	* it becomes quadratic on number of accept states to find all
	3354	* the next words.
	3355	*/
	3356
	3357	{
	3358	/* what type of TRIE am I? (utf8 makes this contextual) */
	3359	DECL_TRIE_TYPE(scan);
	3360
	3361	/* what trie are we using right now */
	3362	reg_trie_data * const trie
	3363	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3364	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3365	U32 state = trie->startstate;
	3366
	3367	if (trie->bitmap && trie_type != trie_utf8_fold &&
	3368	!TRIE_BITMAP_TEST(trie,*locinput)
	3369	) {
	3370	if (trie->states[ state ].wordnum) {
	3371	DEBUG_EXECUTE_r(
	3372	PerlIO_printf(Perl_debug_log,
	3373	"%*s %smatched empty string...%s\n",
	3374	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3375	);
	3376	if (!trie->jump)
	3377	break;
	3378	} else {
	3379	DEBUG_EXECUTE_r(
	3380	PerlIO_printf(Perl_debug_log,
	3381	"%*s %sfailed to match trie start class...%s\n",
	3382	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3383	);
	3384	sayNO_SILENT;
	3385	}
	3386	}
	3387
	3388	{
	3389	U8 uc = ( U8 )locinput;
	3390
	3391	STRLEN len = 0;
	3392	STRLEN foldlen = 0;
	3393	U8 uscan = (U8)NULL;
	3394	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3395	U32 charcount = 0; /* how many input chars we have matched */
	3396	U32 accepted = 0; /* have we seen any accepting states? */
	3397
	3398	ST.B = next;
	3399	ST.jump = trie->jump;
	3400	ST.me = scan;
	3401	ST.firstpos = NULL;
	3402	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3403	ST.nextword = 0;
	3404
	3405	/* fully traverse the TRIE; note the position of the
	3406	shortest accept state and the wordnum of the longest
	3407	accept state */
	3408
	3409	while ( state && uc <= (U8*)PL_regeol ) {
	3410	U32 base = trie->states[ state ].trans.base;
	3411	UV uvc = 0;
	3412	U16 charid = 0;
	3413	U16 wordnum;
	3414	wordnum = trie->states[ state ].wordnum;
	3415
	3416	if (wordnum) { /* it's an accept state */
	3417	if (!accepted) {
	3418	accepted = 1;
	3419	/* record first match position */
	3420	if (ST.longfold) {
	3421	ST.firstpos = (U8*)locinput;
	3422	ST.firstchars = 0;
	3423	}
	3424	else {
	3425	ST.firstpos = uc;
	3426	ST.firstchars = charcount;
	3427	}
	3428	}
	3429	if (!ST.nextword \|\| wordnum < ST.nextword)
	3430	ST.nextword = wordnum;
	3431	ST.topword = wordnum;
	3432	}
	3433
	3434	DEBUG_TRIE_EXECUTE_r({
	3435	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3436	PerlIO_printf( Perl_debug_log,
	3437	"%*s %sState: %4"UVxf" Accepted: %c ",
	3438	2+depth * 2, "", PL_colors[4],
	3439	(UV)state, (accepted ? 'Y' : 'N'));
	3440	});
	3441
	3442	/* read a char and goto next state */
	3443	if ( base ) {
	3444	I32 offset;
	3445	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3446	uscan, len, uvc, charid, foldlen,
	3447	foldbuf, uniflags);
	3448	charcount++;
	3449	if (foldlen>0)
	3450	ST.longfold = TRUE;
	3451	if (charid &&
	3452	( ((offset =
	3453	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3454
	3455	&& ((U32)offset < trie->lasttrans)
	3456	&& trie->trans[offset].check == state)
	3457	{
	3458	state = trie->trans[offset].next;
	3459	}
	3460	else {
	3461	state = 0;
	3462	}
	3463	uc += len;
	3464
	3465	}
	3466	else {
	3467	state = 0;
	3468	}
	3469	DEBUG_TRIE_EXECUTE_r(
	3470	PerlIO_printf( Perl_debug_log,
	3471	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3472	charid, uvc, (UV)state, PL_colors[5] );
	3473	);
	3474	}
	3475	if (!accepted)
	3476	sayNO;
	3477
	3478	/* calculate total number of accept states */
	3479	{
	3480	U16 w = ST.topword;
	3481	accepted = 0;
	3482	while (w) {
	3483	w = trie->wordinfo[w].prev;
	3484	accepted++;
	3485	}
	3486	ST.accepted = accepted;
	3487	}
	3488
	3489	DEBUG_EXECUTE_r(
	3490	PerlIO_printf( Perl_debug_log,
	3491	"%*s %sgot %"IVdf" possible matches%s\n",
	3492	REPORT_CODE_OFF + depth * 2, "",
	3493	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3494	);
	3495	goto trie_first_try; /* jump into the fail handler */
	3496	}}
	3497	/* NOTREACHED */
	3498
	3499	case TRIE_next_fail: /* we failed - try next alternative */
	3500	if ( ST.jump) {
	3501	REGCP_UNWIND(ST.cp);
	3502	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	3503	PL_regoffs[n].end = -1;
	3504	*PL_reglastparen = n;
	3505	}
	3506	if (!--ST.accepted) {
	3507	DEBUG_EXECUTE_r({
	3508	PerlIO_printf( Perl_debug_log,
	3509	"%*s %sTRIE failed...%s\n",
	3510	REPORT_CODE_OFF+depth*2, "",
	3511	PL_colors[4],
	3512	PL_colors[5] );
	3513	});
	3514	sayNO_SILENT;
	3515	}
	3516	{
	3517	/* Find next-highest word to process. Note that this code
	3518	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	3519	register U16 min = 0;
	3520	register U16 word;
	3521	register U16 const nextword = ST.nextword;
	3522	register reg_trie_wordinfo * const wordinfo
	3523	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	3524	for (word=ST.topword; word; word=wordinfo[word].prev) {
	3525	if (word > nextword && (!min \|\| word < min))
	3526	min = word;
	3527	}
	3528	ST.nextword = min;
	3529	}
	3530
	3531	trie_first_try:
	3532	if (do_cutgroup) {
	3533	do_cutgroup = 0;
	3534	no_final = 0;
	3535	}
	3536
	3537	if ( ST.jump) {
	3538	ST.lastparen = *PL_reglastparen;
	3539	REGCP_SET(ST.cp);
	3540	}
	3541
	3542	/* find start char of end of current word */
	3543	{
	3544	U32 chars; /* how many chars to skip */
	3545	U8 *uc = ST.firstpos;
	3546	reg_trie_data * const trie
	3547	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	3548
	3549	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3550	>= ST.firstchars);
	3551	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3552	- ST.firstchars;
	3553
	3554	if (ST.longfold) {
	3555	/* the hard option - fold each char in turn and find
	3556	* its folded length (which may be different */
	3557	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	3558	STRLEN foldlen;
	3559	STRLEN len;
	3560	UV uvc;
	3561	U8 *uscan;
	3562
	3563	while (chars) {
	3564	if (utf8_target) {
	3565	uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
	3566	uniflags);
	3567	uc += len;
	3568	}
	3569	else {
	3570	uvc = *uc;
	3571	uc++;
	3572	}
	3573	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	3574	uscan = foldbuf;
	3575	while (foldlen) {
	3576	if (!--chars)
	3577	break;
	3578	uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
	3579	uniflags);
	3580	uscan += len;
	3581	foldlen -= len;
	3582	}
	3583	}
	3584	}
	3585	else {
	3586	if (utf8_target)
	3587	while (chars--)
	3588	uc += UTF8SKIP(uc);
	3589	else
	3590	uc += chars;
	3591	}
	3592	PL_reginput = (char *)uc;
	3593	}
	3594
	3595	scan = (ST.jump && ST.jump[ST.nextword])
	3596	? ST.me + ST.jump[ST.nextword]
	3597	: ST.B;
	3598
	3599	DEBUG_EXECUTE_r({
	3600	PerlIO_printf( Perl_debug_log,
	3601	"%*s %sTRIE matched word #%d, continuing%s\n",
	3602	REPORT_CODE_OFF+depth*2, "",
	3603	PL_colors[4],
	3604	ST.nextword,
	3605	PL_colors[5]
	3606	);
	3607	});
	3608
	3609	if (ST.accepted > 1 \|\| has_cutgroup) {
	3610	PUSH_STATE_GOTO(TRIE_next, scan);
	3611	/* NOTREACHED */
	3612	}
	3613	/* only one choice left - just continue */
	3614	DEBUG_EXECUTE_r({
	3615	AV *const trie_words
	3616	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	3617	SV ** const tmp = av_fetch( trie_words,
	3618	ST.nextword-1, 0 );
	3619	SV *sv= tmp ? sv_newmortal() : NULL;
	3620
	3621	PerlIO_printf( Perl_debug_log,
	3622	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	3623	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	3624	ST.nextword,
	3625	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	3626	PL_colors[0], PL_colors[1],
	3627	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	3628	)
	3629	: "not compiled under -Dr",
	3630	PL_colors[5] );
	3631	});
	3632
	3633	locinput = PL_reginput;
	3634	nextchr = UCHARAT(locinput);
	3635	continue; /* execute rest of RE */
	3636	/* NOTREACHED */
	3637	#undef ST
	3638
	3639	case EXACT: {
	3640	char *s = STRING(scan);
	3641	ln = STR_LEN(scan);
	3642	if (utf8_target != UTF_PATTERN) {
	3643	/* The target and the pattern have differing utf8ness. */
	3644	char *l = locinput;
	3645	const char * const e = s + ln;
	3646
	3647	if (utf8_target) {
	3648	/* The target is utf8, the pattern is not utf8. */
	3649	while (s < e) {
	3650	STRLEN ulen;
	3651	if (l >= PL_regeol)
	3652	sayNO;
	3653	if (NATIVE_TO_UNI((U8)s) !=
	3654	utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
	3655	uniflags))
	3656	sayNO;
	3657	l += ulen;
	3658	s ++;
	3659	}
	3660	}
	3661	else {
	3662	/* The target is not utf8, the pattern is utf8. */
	3663	while (s < e) {
	3664	STRLEN ulen;
	3665	if (l >= PL_regeol)
	3666	sayNO;
	3667	if (NATIVE_TO_UNI(((U8)l)) !=
	3668	utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
	3669	uniflags))
	3670	sayNO;
	3671	s += ulen;
	3672	l ++;
	3673	}
	3674	}
	3675	locinput = l;
	3676	nextchr = UCHARAT(locinput);
	3677	break;
	3678	}
	3679	/* The target and the pattern have the same utf8ness. */
	3680	/* Inline the first character, for speed. */
	3681	if (UCHARAT(s) != nextchr)
	3682	sayNO;
	3683	if (PL_regeol - locinput < ln)
	3684	sayNO;
	3685	if (ln > 1 && memNE(s, locinput, ln))
	3686	sayNO;
	3687	locinput += ln;
	3688	nextchr = UCHARAT(locinput);
	3689	break;
	3690	}
	3691	case EXACTFL: {
	3692	re_fold_t folder;
	3693	const U8 * fold_array;
	3694	const char * s;
	3695
	3696	PL_reg_flags \|= RF_tainted;
	3697	folder = foldEQ_locale;
	3698	fold_array = PL_fold_locale;
	3699	goto do_exactf;
	3700
	3701	case EXACTFU:
	3702	folder = foldEQ_latin1;
	3703	fold_array = PL_fold_latin1;
	3704	goto do_exactf;
	3705
	3706	case EXACTF:
	3707	folder = foldEQ;
	3708	fold_array = PL_fold;
	3709
	3710	do_exactf:
	3711	s = STRING(scan);
	3712	ln = STR_LEN(scan);
	3713
	3714	if (utf8_target \|\| UTF_PATTERN) {
	3715	/* Either target or the pattern are utf8. */
	3716	const char * const l = locinput;
	3717	char *e = PL_regeol;
	3718
	3719	if (! foldEQ_utf8(s, 0, ln, cBOOL(UTF_PATTERN),
	3720	l, &e, 0, utf8_target)) {
	3721	/* One more case for the sharp s:
	3722	* pack("U0U*", 0xDF) =~ /ss/i,
	3723	* the 0xC3 0x9F are the UTF-8
	3724	* byte sequence for the U+00DF. */
	3725
	3726	if (!(utf8_target &&
	3727	toLOWER(s[0]) == 's' &&
	3728	ln >= 2 &&
	3729	toLOWER(s[1]) == 's' &&
	3730	(U8)l[0] == 0xC3 &&
	3731	e - l >= 2 &&
	3732	(U8)l[1] == 0x9F))
	3733	sayNO;
	3734	}
	3735	locinput = e;
	3736	nextchr = UCHARAT(locinput);
	3737	break;
	3738	}
	3739
	3740	/* Neither the target and the pattern are utf8. */
	3741
	3742	/* Inline the first character, for speed. */
	3743	if (UCHARAT(s) != nextchr &&
	3744	UCHARAT(s) != fold_array[nextchr])
	3745	{
	3746	sayNO;
	3747	}
	3748	if (PL_regeol - locinput < ln)
	3749	sayNO;
	3750	if (ln > 1 && ! folder(s, locinput, ln))
	3751	sayNO;
	3752	locinput += ln;
	3753	nextchr = UCHARAT(locinput);
	3754	break;
	3755	}
	3756
	3757	/* XXX Could improve efficiency by separating these all out using a
	3758	* macro or in-line function. At that point regcomp.c would no longer
	3759	* have to set the FLAGS fields of these */
	3760	case BOUNDL:
	3761	case NBOUNDL:
	3762	PL_reg_flags \|= RF_tainted;
	3763	/* FALL THROUGH */
	3764	case BOUND:
	3765	case BOUNDU:
	3766	case BOUNDA:
	3767	case NBOUND:
	3768	case NBOUNDU:
	3769	case NBOUNDA:
	3770	/* was last char in word? */
	3771	if (utf8_target && FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET) {
	3772	if (locinput == PL_bostr)
	3773	ln = '\n';
	3774	else {
	3775	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	3776
	3777	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	3778	}
	3779	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	3780	ln = isALNUM_uni(ln);
	3781	LOAD_UTF8_CHARCLASS_ALNUM();
	3782	n = swash_fetch(PL_utf8_alnum, (U8*)locinput, utf8_target);
	3783	}
	3784	else {
	3785	ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
	3786	n = isALNUM_LC_utf8((U8*)locinput);
	3787	}
	3788	}
	3789	else {
	3790
	3791	/* Here the string isn't utf8, or is utf8 and only ascii
	3792	* characters are to match \w. In the latter case looking at
	3793	* the byte just prior to the current one may be just the final
	3794	* byte of a multi-byte character. This is ok. There are two
	3795	* cases:
	3796	* 1) it is a single byte character, and then the test is doing
	3797	* just what it's supposed to.
	3798	* 2) it is a multi-byte character, in which case the final
	3799	* byte is never mistakable for ASCII, and so the test
	3800	* will say it is not a word character, which is the
	3801	* correct answer. */
	3802	ln = (locinput != PL_bostr) ?
	3803	UCHARAT(locinput - 1) : '\n';
	3804	switch (FLAGS(scan)) {
	3805	case REGEX_UNICODE_CHARSET:
	3806	ln = isWORDCHAR_L1(ln);
	3807	n = isWORDCHAR_L1(nextchr);
	3808	break;
	3809	case REGEX_LOCALE_CHARSET:
	3810	ln = isALNUM_LC(ln);
	3811	n = isALNUM_LC(nextchr);
	3812	break;
	3813	case REGEX_DEPENDS_CHARSET:
	3814	ln = isALNUM(ln);
	3815	n = isALNUM(nextchr);
	3816	break;
	3817	case REGEX_ASCII_RESTRICTED_CHARSET:
	3818	ln = isWORDCHAR_A(ln);
	3819	n = isWORDCHAR_A(nextchr);
	3820	break;
	3821	default:
	3822	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	3823	break;
	3824	}
	3825	}
	3826	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	3827	* regcomp.sym */
	3828	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	3829	sayNO;
	3830	break;
	3831	case ANYOFV:
	3832	case ANYOF:
	3833	if (utf8_target \|\| state_num == ANYOFV) {
	3834	STRLEN inclasslen = PL_regeol - locinput;
	3835	if (locinput >= PL_regeol)
	3836	sayNO;
	3837
	3838	if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target))
	3839	sayNO;
	3840	locinput += inclasslen;
	3841	nextchr = UCHARAT(locinput);
	3842	break;
	3843	}
	3844	else {
	3845	if (nextchr < 0)
	3846	nextchr = UCHARAT(locinput);
	3847	if (!nextchr && locinput >= PL_regeol)
	3848	sayNO;
	3849	if (!REGINCLASS(rex, scan, (U8*)locinput))
	3850	sayNO;
	3851	nextchr = UCHARAT(++locinput);
	3852	break;
	3853	}
	3854	break;
	3855	/* Special char classes - The defines start on line 129 or so */
	3856	CCC_TRY_U(ALNUM, NALNUM, isWORDCHAR,
	3857	ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
	3858	ALNUMU, NALNUMU, isWORDCHAR_L1,
	3859	ALNUMA, NALNUMA, isWORDCHAR_A,
	3860	perl_word, "a");
	3861
	3862	CCC_TRY_U(SPACE, NSPACE, isSPACE,
	3863	SPACEL, NSPACEL, isSPACE_LC, isSPACE_LC_utf8,
	3864	SPACEU, NSPACEU, isSPACE_L1,
	3865	SPACEA, NSPACEA, isSPACE_A,
	3866	perl_space, " ");
	3867
	3868	CCC_TRY(DIGIT, NDIGIT, isDIGIT,
	3869	DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
	3870	DIGITA, NDIGITA, isDIGIT_A,
	3871	posix_digit, "0");
	3872
	3873	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	3874	a Unicode extended Grapheme Cluster */
	3875	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	3876	extended Grapheme Cluster is:
	3877
	3878	CR LF
	3879	\| Prepend* Begin Extend*
	3880	\| .
	3881
	3882	Begin is (Hangul-syllable \| ! Control)
	3883	Extend is (Grapheme_Extend \| Spacing_Mark)
	3884	Control is [ GCB_Control CR LF ]
	3885
	3886	The discussion below shows how the code for CLUMP is derived
	3887	from this regex. Note that most of these concepts are from
	3888	property values of the Grapheme Cluster Boundary (GCB) property.
	3889	No code point can have multiple property values for a given
	3890	property. Thus a code point in Prepend can't be in Control, but
	3891	it must be in !Control. This is why Control above includes
	3892	GCB_Control plus CR plus LF. The latter two are used in the GCB
	3893	property separately, and so can't be in GCB_Control, even though
	3894	they logically are controls. Control is not the same as gc=cc,
	3895	but includes format and other characters as well.
	3896
	3897	The Unicode definition of Hangul-syllable is:
	3898	L+
	3899	\| (L* ( ( V \| LV ) V* \| LVT ) T*)
	3900	\| T+
	3901	)
	3902	Each of these is a value for the GCB property, and hence must be
	3903	disjoint, so the order they are tested is immaterial, so the
	3904	above can safely be changed to
	3905	T+
	3906	\| L+
	3907	\| (L* ( LVT \| ( V \| LV ) V) T)
	3908
	3909	The last two terms can be combined like this:
	3910	L* ( L
	3911	\| (( LVT \| ( V \| LV ) V) T))
	3912
	3913	And refactored into this:
	3914	L* (L \| LVT T* \| V V* T* \| LV V* T*)
	3915
	3916	That means that if we have seen any L's at all we can quit
	3917	there, but if the next character is a LVT, a V or and LV we
	3918	should keep going.
	3919
	3920	There is a subtlety with Prepend* which showed up in testing.
	3921	Note that the Begin, and only the Begin is required in:
	3922	\| Prepend* Begin Extend*
	3923	Also, Begin contains '! Control'. A Prepend must be a '!
	3924	Control', which means it must be a Begin. What it comes down to
	3925	is that if we match Prepend* and then find no suitable Begin
	3926	afterwards, that if we backtrack the last Prepend, that one will
	3927	be a suitable Begin.
	3928	*/
	3929
	3930	if (locinput >= PL_regeol)
	3931	sayNO;
	3932	if (! utf8_target) {
	3933
	3934	/* Match either CR LF or '.', as all the other possibilities
	3935	* require utf8 */
	3936	locinput++; /* Match the . or CR */
	3937	if (nextchr == '\r'
	3938	&& locinput < PL_regeol
	3939	&& UCHARAT(locinput) == '\n') locinput++;
	3940	}
	3941	else {
	3942
	3943	/* Utf8: See if is ( CR LF ); already know that locinput <
	3944	* PL_regeol, so locinput+1 is in bounds */
	3945	if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
	3946	locinput += 2;
	3947	}
	3948	else {
	3949	/* In case have to backtrack to beginning, then match '.' */
	3950	char *starting = locinput;
	3951
	3952	/* In case have to backtrack the last prepend */
	3953	char *previous_prepend = 0;
	3954
	3955	LOAD_UTF8_CHARCLASS_GCB();
	3956
	3957	/* Match (prepend)* */
	3958	while (locinput < PL_regeol
	3959	&& swash_fetch(PL_utf8_X_prepend,
	3960	(U8*)locinput, utf8_target))
	3961	{
	3962	previous_prepend = locinput;
	3963	locinput += UTF8SKIP(locinput);
	3964	}
	3965
	3966	/* As noted above, if we matched a prepend character, but
	3967	* the next thing won't match, back off the last prepend we
	3968	* matched, as it is guaranteed to match the begin */
	3969	if (previous_prepend
	3970	&& (locinput >= PL_regeol
	3971	\|\| ! swash_fetch(PL_utf8_X_begin,
	3972	(U8*)locinput, utf8_target)))
	3973	{
	3974	locinput = previous_prepend;
	3975	}
	3976
	3977	/* Note that here we know PL_regeol > locinput, as we
	3978	* tested that upon input to this switch case, and if we
	3979	* moved locinput forward, we tested the result just above
	3980	* and it either passed, or we backed off so that it will
	3981	* now pass */
	3982	if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, utf8_target)) {
	3983
	3984	/* Here did not match the required 'Begin' in the
	3985	* second term. So just match the very first
	3986	* character, the '.' of the final term of the regex */
	3987	locinput = starting + UTF8SKIP(starting);
	3988	} else {
	3989
	3990	/* Here is the beginning of a character that can have
	3991	* an extender. It is either a hangul syllable, or a
	3992	* non-control */
	3993	if (swash_fetch(PL_utf8_X_non_hangul,
	3994	(U8*)locinput, utf8_target))
	3995	{
	3996
	3997	/* Here not a Hangul syllable, must be a
	3998	* ('! * Control') */
	3999	locinput += UTF8SKIP(locinput);
	4000	} else {
	4001
	4002	/* Here is a Hangul syllable. It can be composed
	4003	* of several individual characters. One
	4004	* possibility is T+ */
	4005	if (swash_fetch(PL_utf8_X_T,
	4006	(U8*)locinput, utf8_target))
	4007	{
	4008	while (locinput < PL_regeol
	4009	&& swash_fetch(PL_utf8_X_T,
	4010	(U8*)locinput, utf8_target))
	4011	{
	4012	locinput += UTF8SKIP(locinput);
	4013	}
	4014	} else {
	4015
	4016	/* Here, not T+, but is a Hangul. That means
	4017	* it is one of the others: L, LV, LVT or V,
	4018	* and matches:
	4019	* L* (L \| LVT T* \| V V* T* \| LV V* T) /
	4020
	4021	/* Match L* */
	4022	while (locinput < PL_regeol
	4023	&& swash_fetch(PL_utf8_X_L,
	4024	(U8*)locinput, utf8_target))
	4025	{
	4026	locinput += UTF8SKIP(locinput);
	4027	}
	4028
	4029	/* Here, have exhausted L*. If the next
	4030	* character is not an LV, LVT nor V, it means
	4031	* we had to have at least one L, so matches L+
	4032	* in the original equation, we have a complete
	4033	* hangul syllable. Are done. */
	4034
	4035	if (locinput < PL_regeol
	4036	&& swash_fetch(PL_utf8_X_LV_LVT_V,
	4037	(U8*)locinput, utf8_target))
	4038	{
	4039
	4040	/* Otherwise keep going. Must be LV, LVT
	4041	* or V. See if LVT */
	4042	if (swash_fetch(PL_utf8_X_LVT,
	4043	(U8*)locinput, utf8_target))
	4044	{
	4045	locinput += UTF8SKIP(locinput);
	4046	} else {
	4047
	4048	/* Must be V or LV. Take it, then
	4049	* match V* */
	4050	locinput += UTF8SKIP(locinput);
	4051	while (locinput < PL_regeol
	4052	&& swash_fetch(PL_utf8_X_V,
	4053	(U8*)locinput, utf8_target))
	4054	{
	4055	locinput += UTF8SKIP(locinput);
	4056	}
	4057	}
	4058
	4059	/* And any of LV, LVT, or V can be followed
	4060	* by T* */
	4061	while (locinput < PL_regeol
	4062	&& swash_fetch(PL_utf8_X_T,
	4063	(U8*)locinput,
	4064	utf8_target))
	4065	{
	4066	locinput += UTF8SKIP(locinput);
	4067	}
	4068	}
	4069	}
	4070	}
	4071
	4072	/* Match any extender */
	4073	while (locinput < PL_regeol
	4074	&& swash_fetch(PL_utf8_X_extend,
	4075	(U8*)locinput, utf8_target))
	4076	{
	4077	locinput += UTF8SKIP(locinput);
	4078	}
	4079	}
	4080	}
	4081	if (locinput > PL_regeol) sayNO;
	4082	}
	4083	nextchr = UCHARAT(locinput);
	4084	break;
	4085
	4086	case NREFFL:
	4087	{ /* The capture buffer cases. The ones beginning with N for the
	4088	named buffers just convert to the equivalent numbered and
	4089	pretend they were called as the corresponding numbered buffer
	4090	op. */
	4091	/* don't initialize these, it makes C++ unhappy */
	4092	char *s;
	4093	char type;
	4094	re_fold_t folder;
	4095	const U8 *fold_array;
	4096
	4097	PL_reg_flags \|= RF_tainted;
	4098	folder = foldEQ_locale;
	4099	fold_array = PL_fold_locale;
	4100	type = REFFL;
	4101	goto do_nref;
	4102
	4103	case NREFFU:
	4104	folder = foldEQ_latin1;
	4105	fold_array = PL_fold_latin1;
	4106	type = REFFU;
	4107	goto do_nref;
	4108
	4109	case NREFF:
	4110	folder = foldEQ;
	4111	fold_array = PL_fold;
	4112	type = REFF;
	4113	goto do_nref;
	4114
	4115	case NREF:
	4116	type = REF;
	4117	folder = NULL;
	4118	fold_array = NULL;
	4119	do_nref:
	4120
	4121	/* For the named back references, find the corresponding buffer
	4122	* number */
	4123	n = reg_check_named_buff_matched(rex,scan);
	4124
	4125	if ( ! n ) {
	4126	sayNO;
	4127	}
	4128	goto do_nref_ref_common;
	4129
	4130	case REFFL:
	4131	PL_reg_flags \|= RF_tainted;
	4132	folder = foldEQ_locale;
	4133	fold_array = PL_fold_locale;
	4134	goto do_ref;
	4135
	4136	case REFFU:
	4137	folder = foldEQ_latin1;
	4138	fold_array = PL_fold_latin1;
	4139	goto do_ref;
	4140
	4141	case REFF:
	4142	folder = foldEQ;
	4143	fold_array = PL_fold;
	4144	goto do_ref;
	4145
	4146	case REF:
	4147	folder = NULL;
	4148	fold_array = NULL;
	4149
	4150	do_ref:
	4151	type = OP(scan);
	4152	n = ARG(scan); /* which paren pair */
	4153
	4154	do_nref_ref_common:
	4155	ln = PL_regoffs[n].start;
	4156	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4157	if (*PL_reglastparen < n \|\| ln == -1)
	4158	sayNO; /* Do not match unless seen CLOSEn. */
	4159	if (ln == PL_regoffs[n].end)
	4160	break;
	4161
	4162	s = PL_bostr + ln;
	4163	if (type != REF /* REF can do byte comparison */
	4164	&& (utf8_target
	4165	\|\| (type == REFFU
	4166	&& (*s == (char) LATIN_SMALL_LETTER_SHARP_S
	4167	\|\| *locinput == (char) LATIN_SMALL_LETTER_SHARP_S))))
	4168	{ /* XXX handle REFFL better */
	4169	char * limit = PL_regeol;
	4170
	4171	/* This call case insensitively compares the entire buffer
	4172	* at s, with the current input starting at locinput, but
	4173	* not going off the end given by PL_regeol, and returns in
	4174	* limit upon success, how much of the current input was
	4175	* matched */
	4176	if (! foldEQ_utf8(s, NULL, PL_regoffs[n].end - ln, utf8_target,
	4177	locinput, &limit, 0, utf8_target))
	4178	{
	4179	sayNO;
	4180	}
	4181	locinput = limit;
	4182	nextchr = UCHARAT(locinput);
	4183	break;
	4184	}
	4185
	4186	/* Not utf8: Inline the first character, for speed. */
	4187	if (UCHARAT(s) != nextchr &&
	4188	(type == REF \|\|
	4189	UCHARAT(s) != fold_array[nextchr]))
	4190	sayNO;
	4191	ln = PL_regoffs[n].end - ln;
	4192	if (locinput + ln > PL_regeol)
	4193	sayNO;
	4194	if (ln > 1 && (type == REF
	4195	? memNE(s, locinput, ln)
	4196	: ! folder(s, locinput, ln)))
	4197	sayNO;
	4198	locinput += ln;
	4199	nextchr = UCHARAT(locinput);
	4200	break;
	4201	}
	4202	case NOTHING:
	4203	case TAIL:
	4204	break;
	4205	case BACK:
	4206	break;
	4207
	4208	#undef ST
	4209	#define ST st->u.eval
	4210	{
	4211	SV *ret;
	4212	REGEXP *re_sv;
	4213	regexp *re;
	4214	regexp_internal *rei;
	4215	regnode *startpoint;
	4216
	4217	case GOSTART:
	4218	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4219	if (cur_eval && cur_eval->locinput==locinput) {
	4220	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4221	Perl_croak(aTHX_ "Infinite recursion in regex");
	4222	if ( ++nochange_depth > max_nochange_depth )
	4223	Perl_croak(aTHX_
	4224	"Pattern subroutine nesting without pos change"
	4225	" exceeded limit in regex");
	4226	} else {
	4227	nochange_depth = 0;
	4228	}
	4229	re_sv = rex_sv;
	4230	re = rex;
	4231	rei = rexi;
	4232	(void)ReREFCNT_inc(rex_sv);
	4233	if (OP(scan)==GOSUB) {
	4234	startpoint = scan + ARG2L(scan);
	4235	ST.close_paren = ARG(scan);
	4236	} else {
	4237	startpoint = rei->program+1;
	4238	ST.close_paren = 0;
	4239	}
	4240	goto eval_recurse_doit;
	4241	/* NOTREACHED */
	4242	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4243	if (cur_eval && cur_eval->locinput==locinput) {
	4244	if ( ++nochange_depth > max_nochange_depth )
	4245	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4246	} else {
	4247	nochange_depth = 0;
	4248	}
	4249	{
	4250	/* execute the code in the {...} */
	4251	dSP;
	4252	SV ** const before = SP;
	4253	OP_4tree * const oop = PL_op;
	4254	COP * const ocurcop = PL_curcop;
	4255	PAD *old_comppad;
	4256	char *saved_regeol = PL_regeol;
	4257	struct re_save_state saved_state;
	4258
	4259	/* To not corrupt the existing regex state while executing the
	4260	* eval we would normally put it on the save stack, like with
	4261	* save_re_context. However, re-evals have a weird scoping so we
	4262	* can't just add ENTER/LEAVE here. With that, things like
	4263	*
	4264	* (?{$a=2})(a(?{local$a=$a+1}))aakc(?{$b=$a})
	4265	*
	4266	* would break, as they expect the localisation to be unwound
	4267	* only when the re-engine backtracks through the bit that
	4268	* localised it.
	4269	*
	4270	* What we do instead is just saving the state in a local c
	4271	* variable.
	4272	*/
	4273	Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
	4274
	4275	n = ARG(scan);
	4276	PL_op = (OP_4tree*)rexi->data->data[n];
	4277	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	4278	" re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
	4279	PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
	4280	PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	4281
	4282	if (sv_yes_mark) {
	4283	SV *sv_mrk = get_sv("REGMARK", 1);
	4284	sv_setsv(sv_mrk, sv_yes_mark);
	4285	}
	4286
	4287	CALLRUNOPS(aTHX); /* Scalar context. */
	4288	SPAGAIN;
	4289	if (SP == before)
	4290	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	4291	else {
	4292	ret = POPs;
	4293	PUTBACK;
	4294	}
	4295
	4296	Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
	4297
	4298	PL_op = oop;
	4299	PAD_RESTORE_LOCAL(old_comppad);
	4300	PL_curcop = ocurcop;
	4301	PL_regeol = saved_regeol;
	4302	if (!logical) {
	4303	/* /(?{...})/ */
	4304	sv_setsv(save_scalar(PL_replgv), ret);
	4305	break;
	4306	}
	4307	}
	4308	if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
	4309	logical = 0;
	4310	{
	4311	/* extract RE object from returned value; compiling if
	4312	* necessary */
	4313	MAGIC *mg = NULL;
	4314	REGEXP *rx = NULL;
	4315
	4316	if (SvROK(ret)) {
	4317	SV *const sv = SvRV(ret);
	4318
	4319	if (SvTYPE(sv) == SVt_REGEXP) {
	4320	rx = (REGEXP*) sv;
	4321	} else if (SvSMAGICAL(sv)) {
	4322	mg = mg_find(sv, PERL_MAGIC_qr);
	4323	assert(mg);
	4324	}
	4325	} else if (SvTYPE(ret) == SVt_REGEXP) {
	4326	rx = (REGEXP*) ret;
	4327	} else if (SvSMAGICAL(ret)) {
	4328	if (SvGMAGICAL(ret)) {
	4329	/* I don't believe that there is ever qr magic
	4330	here. */
	4331	assert(!mg_find(ret, PERL_MAGIC_qr));
	4332	sv_unmagic(ret, PERL_MAGIC_qr);
	4333	}
	4334	else {
	4335	mg = mg_find(ret, PERL_MAGIC_qr);
	4336	/* testing suggests mg only ends up non-NULL for
	4337	scalars who were upgraded and compiled in the
	4338	else block below. In turn, this is only
	4339	triggered in the "postponed utf8 string" tests
	4340	in t/op/pat.t */
	4341	}
	4342	}
	4343
	4344	if (mg) {
	4345	rx = (REGEXP ) mg->mg_obj; /XXX:dmq*/
	4346	assert(rx);
	4347	}
	4348	if (rx) {
	4349	rx = reg_temp_copy(NULL, rx);
	4350	}
	4351	else {
	4352	U32 pm_flags = 0;
	4353	const I32 osize = PL_regsize;
	4354
	4355	if (DO_UTF8(ret)) {
	4356	assert (SvUTF8(ret));
	4357	} else if (SvUTF8(ret)) {
	4358	/* Not doing UTF-8, despite what the SV says. Is
	4359	this only if we're trapped in use 'bytes'? */
	4360	/* Make a copy of the octet sequence, but without
	4361	the flag on, as the compiler now honours the
	4362	SvUTF8 flag on ret. */
	4363	STRLEN len;
	4364	const char *const p = SvPV(ret, len);
	4365	ret = newSVpvn_flags(p, len, SVs_TEMP);
	4366	}
	4367	rx = CALLREGCOMP(ret, pm_flags);
	4368	if (!(SvFLAGS(ret)
	4369	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	4370	\| SVs_GMG))) {
	4371	/* This isn't a first class regexp. Instead, it's
	4372	caching a regexp onto an existing, Perl visible
	4373	scalar. */
	4374	sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
	4375	}
	4376	PL_regsize = osize;
	4377	}
	4378	re_sv = rx;
	4379	re = (struct regexp *)SvANY(rx);
	4380	}
	4381	RXp_MATCH_COPIED_off(re);
	4382	re->subbeg = rex->subbeg;
	4383	re->sublen = rex->sublen;
	4384	rei = RXi_GET(re);
	4385	DEBUG_EXECUTE_r(
	4386	debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
	4387	"Matching embedded");
	4388	);
	4389	startpoint = rei->program + 1;
	4390	ST.close_paren = 0; /* only used for GOSUB */
	4391	/* borrowed from regtry */
	4392	if (PL_reg_start_tmpl <= re->nparens) {
	4393	PL_reg_start_tmpl = re->nparens*3/2 + 3;
	4394	if(PL_reg_start_tmp)
	4395	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	4396	else
	4397	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	4398	}
	4399
	4400	eval_recurse_doit: /* Share code with GOSUB below this line */
	4401	/* run the pattern returned from (??{...}) */
	4402	ST.cp = regcppush(0); /* Save all the positions. */
	4403	REGCP_SET(ST.lastcp);
	4404
	4405	PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
	4406
	4407	/* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
	4408	PL_reglastparen = &re->lastparen;
	4409	PL_reglastcloseparen = &re->lastcloseparen;
	4410	re->lastparen = 0;
	4411	re->lastcloseparen = 0;
	4412
	4413	PL_reginput = locinput;
	4414	PL_regsize = 0;
	4415
	4416	/* XXXX This is too dramatic a measure... */
	4417	PL_reg_maxiter = 0;
	4418
	4419	ST.toggle_reg_flags = PL_reg_flags;
	4420	if (RX_UTF8(re_sv))
	4421	PL_reg_flags \|= RF_utf8;
	4422	else
	4423	PL_reg_flags &= ~RF_utf8;
	4424	ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
	4425
	4426	ST.prev_rex = rex_sv;
	4427	ST.prev_curlyx = cur_curlyx;
	4428	SETREX(rex_sv,re_sv);
	4429	rex = re;
	4430	rexi = rei;
	4431	cur_curlyx = NULL;
	4432	ST.B = next;
	4433	ST.prev_eval = cur_eval;
	4434	cur_eval = st;
	4435	/* now continue from first node in postoned RE */
	4436	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
	4437	/* NOTREACHED */
	4438	}
	4439	/* logical is 1, /(?(?{...})X\|Y)/ */
	4440	sw = cBOOL(SvTRUE(ret));
	4441	logical = 0;
	4442	break;
	4443	}
	4444
	4445	case EVAL_AB: /* cleanup after a successful (??{A})B */
	4446	/* note: this is called twice; first after popping B, then A */
	4447	PL_reg_flags ^= ST.toggle_reg_flags;
	4448	ReREFCNT_dec(rex_sv);
	4449	SETREX(rex_sv,ST.prev_rex);
	4450	rex = (struct regexp *)SvANY(rex_sv);
	4451	rexi = RXi_GET(rex);
	4452	regcpblow(ST.cp);
	4453	cur_eval = ST.prev_eval;
	4454	cur_curlyx = ST.prev_curlyx;
	4455
	4456	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	4457	PL_reglastparen = &rex->lastparen;
	4458	PL_reglastcloseparen = &rex->lastcloseparen;
	4459	/* also update PL_regoffs */
	4460	PL_regoffs = rex->offs;
	4461
	4462	/* XXXX This is too dramatic a measure... */
	4463	PL_reg_maxiter = 0;
	4464	if ( nochange_depth )
	4465	nochange_depth--;
	4466	sayYES;
	4467
	4468
	4469	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	4470	/* note: this is called twice; first after popping B, then A */
	4471	PL_reg_flags ^= ST.toggle_reg_flags;
	4472	ReREFCNT_dec(rex_sv);
	4473	SETREX(rex_sv,ST.prev_rex);
	4474	rex = (struct regexp *)SvANY(rex_sv);
	4475	rexi = RXi_GET(rex);
	4476	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	4477	PL_reglastparen = &rex->lastparen;
	4478	PL_reglastcloseparen = &rex->lastcloseparen;
	4479
	4480	PL_reginput = locinput;
	4481	REGCP_UNWIND(ST.lastcp);
	4482	regcppop(rex);
	4483	cur_eval = ST.prev_eval;
	4484	cur_curlyx = ST.prev_curlyx;
	4485	/* XXXX This is too dramatic a measure... */
	4486	PL_reg_maxiter = 0;
	4487	if ( nochange_depth )
	4488	nochange_depth--;
	4489	sayNO_SILENT;
	4490	#undef ST
	4491
	4492	case OPEN:
	4493	n = ARG(scan); /* which paren pair */
	4494	PL_reg_start_tmp[n] = locinput;
	4495	if (n > PL_regsize)
	4496	PL_regsize = n;
	4497	lastopen = n;
	4498	break;
	4499	case CLOSE:
	4500	n = ARG(scan); /* which paren pair */
	4501	PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
	4502	PL_regoffs[n].end = locinput - PL_bostr;
	4503	/*if (n > PL_regsize)
	4504	PL_regsize = n;*/
	4505	if (n > *PL_reglastparen)
	4506	*PL_reglastparen = n;
	4507	*PL_reglastcloseparen = n;
	4508	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	4509	goto fake_end;
	4510	}
	4511	break;
	4512	case ACCEPT:
	4513	if (ARG(scan)){
	4514	regnode *cursor;
	4515	for (cursor=scan;
	4516	cursor && OP(cursor)!=END;
	4517	cursor=regnext(cursor))
	4518	{
	4519	if ( OP(cursor)==CLOSE ){
	4520	n = ARG(cursor);
	4521	if ( n <= lastopen ) {
	4522	PL_regoffs[n].start
	4523	= PL_reg_start_tmp[n] - PL_bostr;
	4524	PL_regoffs[n].end = locinput - PL_bostr;
	4525	/*if (n > PL_regsize)
	4526	PL_regsize = n;*/
	4527	if (n > *PL_reglastparen)
	4528	*PL_reglastparen = n;
	4529	*PL_reglastcloseparen = n;
	4530	if ( n == ARG(scan) \|\| (cur_eval &&
	4531	cur_eval->u.eval.close_paren == n))
	4532	break;
	4533	}
	4534	}
	4535	}
	4536	}
	4537	goto fake_end;
	4538	/NOTREACHED/
	4539	case GROUPP:
	4540	n = ARG(scan); /* which paren pair */
	4541	sw = cBOOL(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
	4542	break;
	4543	case NGROUPP:
	4544	/* reg_check_named_buff_matched returns 0 for no match */
	4545	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	4546	break;
	4547	case INSUBP:
	4548	n = ARG(scan);
	4549	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	4550	break;
	4551	case DEFINEP:
	4552	sw = 0;
	4553	break;
	4554	case IFTHEN:
	4555	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4556	if (sw)
	4557	next = NEXTOPER(NEXTOPER(scan));
	4558	else {
	4559	next = scan + ARG(scan);
	4560	if (OP(next) == IFTHEN) /* Fake one. */
	4561	next = NEXTOPER(NEXTOPER(next));
	4562	}
	4563	break;
	4564	case LOGICAL:
	4565	logical = scan->flags;
	4566	break;
	4567
	4568	/*******************************************************************
	4569
	4570	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	4571	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	4572	STAR/PLUS/CURLY/CURLYN are used instead.)
	4573
	4574	A*B is compiled as <CURLYX><A><WHILEM><B>
	4575
	4576	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	4577	state, which contains the current count, initialised to -1. It also sets
	4578	cur_curlyx to point to this state, with any previous value saved in the
	4579	state block.
	4580
	4581	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	4582	since the pattern may possibly match zero times (i.e. it's a while {} loop
	4583	rather than a do {} while loop).
	4584
	4585	Each entry to WHILEM represents a successful match of A. The count in the
	4586	CURLYX block is incremented, another WHILEM state is pushed, and execution
	4587	passes to A or B depending on greediness and the current count.
	4588
	4589	For example, if matching against the string a1a2a3b (where the aN are
	4590	substrings that match /A/), then the match progresses as follows: (the
	4591	pushed states are interspersed with the bits of strings matched so far):
	4592
	4593	<CURLYX cnt=-1>
	4594	<CURLYX cnt=0><WHILEM>
	4595	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	4596	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	4597	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	4598	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	4599
	4600	(Contrast this with something like CURLYM, which maintains only a single
	4601	backtrack state:
	4602
	4603	<CURLYM cnt=0> a1
	4604	a1 <CURLYM cnt=1> a2
	4605	a1 a2 <CURLYM cnt=2> a3
	4606	a1 a2 a3 <CURLYM cnt=3> b
	4607	)
	4608
	4609	Each WHILEM state block marks a point to backtrack to upon partial failure
	4610	of A or B, and also contains some minor state data related to that
	4611	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	4612	overall state, such as the count, and pointers to the A and B ops.
	4613
	4614	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	4615	must always point to the current CURLYX block, the rules are:
	4616
	4617	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	4618	and set cur_curlyx to point the new block.
	4619
	4620	When popping the CURLYX block after a successful or unsuccessful match,
	4621	restore the previous cur_curlyx.
	4622
	4623	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	4624	to the outer one saved in the CURLYX block.
	4625
	4626	When popping the WHILEM block after a successful or unsuccessful B match,
	4627	restore the previous cur_curlyx.
	4628
	4629	Here's an example for the pattern (AI* BI)*BO
	4630	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	4631
	4632	cur_
	4633	curlyx backtrack stack
	4634	------ ---------------
	4635	NULL
	4636	CO <CO prev=NULL> <WO>
	4637	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4638	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4639	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	4640
	4641	At this point the pattern succeeds, and we work back down the stack to
	4642	clean up, restoring as we go:
	4643
	4644	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4645	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4646	CO <CO prev=NULL> <WO>
	4647	NULL
	4648
	4649	*******************************************************************/
	4650
	4651	#define ST st->u.curlyx
	4652
	4653	case CURLYX: /* start of /AB/ (for complex A) /
	4654	{
	4655	/* No need to save/restore up to this paren */
	4656	I32 parenfloor = scan->flags;
	4657
	4658	assert(next); /* keep Coverity happy */
	4659	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	4660	next += ARG(next);
	4661
	4662	/* XXXX Probably it is better to teach regpush to support
	4663	parenfloor > PL_regsize... */
	4664	if (parenfloor > (I32)*PL_reglastparen)
	4665	parenfloor = PL_reglastparen; / Pessimization... */
	4666
	4667	ST.prev_curlyx= cur_curlyx;
	4668	cur_curlyx = st;
	4669	ST.cp = PL_savestack_ix;
	4670
	4671	/* these fields contain the state of the current curly.
	4672	* they are accessed by subsequent WHILEMs */
	4673	ST.parenfloor = parenfloor;
	4674	ST.me = scan;
	4675	ST.B = next;
	4676	ST.minmod = minmod;
	4677	minmod = 0;
	4678	ST.count = -1; /* this will be updated by WHILEM */
	4679	ST.lastloc = NULL; /* this will be updated by WHILEM */
	4680
	4681	PL_reginput = locinput;
	4682	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
	4683	/* NOTREACHED */
	4684	}
	4685
	4686	case CURLYX_end: /* just finished matching all of AB /
	4687	cur_curlyx = ST.prev_curlyx;
	4688	sayYES;
	4689	/* NOTREACHED */
	4690
	4691	case CURLYX_end_fail: /* just failed to match all of AB /
	4692	regcpblow(ST.cp);
	4693	cur_curlyx = ST.prev_curlyx;
	4694	sayNO;
	4695	/* NOTREACHED */
	4696
	4697
	4698	#undef ST
	4699	#define ST st->u.whilem
	4700
	4701	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	4702	{
	4703	/* see the discussion above about CURLYX/WHILEM */
	4704	I32 n;
	4705	int min = ARG1(cur_curlyx->u.curlyx.me);
	4706	int max = ARG2(cur_curlyx->u.curlyx.me);
	4707	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	4708
	4709	assert(cur_curlyx); /* keep Coverity happy */
	4710	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	4711	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	4712	ST.cache_offset = 0;
	4713	ST.cache_mask = 0;
	4714
	4715	PL_reginput = locinput;
	4716
	4717	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4718	"%*s whilem: matched %ld out of %d..%d\n",
	4719	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	4720	);
	4721
	4722	/* First just match a string of min A's. */
	4723
	4724	if (n < min) {
	4725	cur_curlyx->u.curlyx.lastloc = locinput;
	4726	PUSH_STATE_GOTO(WHILEM_A_pre, A);
	4727	/* NOTREACHED */
	4728	}
	4729
	4730	/* If degenerate A matches "", assume A done. */
	4731
	4732	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	4733	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4734	"%*s whilem: empty match detected, trying continuation...\n",
	4735	REPORT_CODE_OFF+depth*2, "")
	4736	);
	4737	goto do_whilem_B_max;
	4738	}
	4739
	4740	/* super-linear cache processing */
	4741
	4742	if (scan->flags) {
	4743
	4744	if (!PL_reg_maxiter) {
	4745	/* start the countdown: Postpone detection until we
	4746	* know the match is not that much linear. */
	4747	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	4748	/* possible overflow for long strings and many CURLYX's */
	4749	if (PL_reg_maxiter < 0)
	4750	PL_reg_maxiter = I32_MAX;
	4751	PL_reg_leftiter = PL_reg_maxiter;
	4752	}
	4753
	4754	if (PL_reg_leftiter-- == 0) {
	4755	/* initialise cache */
	4756	const I32 size = (PL_reg_maxiter + 7)/8;
	4757	if (PL_reg_poscache) {
	4758	if ((I32)PL_reg_poscache_size < size) {
	4759	Renew(PL_reg_poscache, size, char);
	4760	PL_reg_poscache_size = size;
	4761	}
	4762	Zero(PL_reg_poscache, size, char);
	4763	}
	4764	else {
	4765	PL_reg_poscache_size = size;
	4766	Newxz(PL_reg_poscache, size, char);
	4767	}
	4768	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4769	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	4770	PL_colors[4], PL_colors[5])
	4771	);
	4772	}
	4773
	4774	if (PL_reg_leftiter < 0) {
	4775	/* have we already failed at this position? */
	4776	I32 offset, mask;
	4777	offset = (scan->flags & 0xf) - 1
	4778	+ (locinput - PL_bostr) * (scan->flags>>4);
	4779	mask = 1 << (offset % 8);
	4780	offset /= 8;
	4781	if (PL_reg_poscache[offset] & mask) {
	4782	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4783	"%*s whilem: (cache) already tried at this position...\n",
	4784	REPORT_CODE_OFF+depth*2, "")
	4785	);
	4786	sayNO; /* cache records failure */
	4787	}
	4788	ST.cache_offset = offset;
	4789	ST.cache_mask = mask;
	4790	}
	4791	}
	4792
	4793	/* Prefer B over A for minimal matching. */
	4794
	4795	if (cur_curlyx->u.curlyx.minmod) {
	4796	ST.save_curlyx = cur_curlyx;
	4797	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4798	ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
	4799	REGCP_SET(ST.lastcp);
	4800	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
	4801	/* NOTREACHED */
	4802	}
	4803
	4804	/* Prefer A over B for maximal matching. */
	4805
	4806	if (n < max) { /* More greed allowed? */
	4807	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4808	cur_curlyx->u.curlyx.lastloc = locinput;
	4809	REGCP_SET(ST.lastcp);
	4810	PUSH_STATE_GOTO(WHILEM_A_max, A);
	4811	/* NOTREACHED */
	4812	}
	4813	goto do_whilem_B_max;
	4814	}
	4815	/* NOTREACHED */
	4816
	4817	case WHILEM_B_min: /* just matched B in a minimal match */
	4818	case WHILEM_B_max: /* just matched B in a maximal match */
	4819	cur_curlyx = ST.save_curlyx;
	4820	sayYES;
	4821	/* NOTREACHED */
	4822
	4823	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	4824	cur_curlyx = ST.save_curlyx;
	4825	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4826	cur_curlyx->u.curlyx.count--;
	4827	CACHEsayNO;
	4828	/* NOTREACHED */
	4829
	4830	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	4831	REGCP_UNWIND(ST.lastcp);
	4832	regcppop(rex);
	4833	/* FALL THROUGH */
	4834	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	4835	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4836	cur_curlyx->u.curlyx.count--;
	4837	CACHEsayNO;
	4838	/* NOTREACHED */
	4839
	4840	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	4841	REGCP_UNWIND(ST.lastcp);
	4842	regcppop(rex); /* Restore some previous $<digit>s? */
	4843	PL_reginput = locinput;
	4844	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4845	"%*s whilem: failed, trying continuation...\n",
	4846	REPORT_CODE_OFF+depth*2, "")
	4847	);
	4848	do_whilem_B_max:
	4849	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4850	&& ckWARN(WARN_REGEXP)
	4851	&& !(PL_reg_flags & RF_warned))
	4852	{
	4853	PL_reg_flags \|= RF_warned;
	4854	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
	4855	"Complex regular subexpression recursion",
	4856	REG_INFTY - 1);
	4857	}
	4858
	4859	/* now try B */
	4860	ST.save_curlyx = cur_curlyx;
	4861	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4862	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
	4863	/* NOTREACHED */
	4864
	4865	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	4866	cur_curlyx = ST.save_curlyx;
	4867	REGCP_UNWIND(ST.lastcp);
	4868	regcppop(rex);
	4869
	4870	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	4871	/* Maximum greed exceeded */
	4872	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4873	&& ckWARN(WARN_REGEXP)
	4874	&& !(PL_reg_flags & RF_warned))
	4875	{
	4876	PL_reg_flags \|= RF_warned;
	4877	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	4878	"%s limit (%d) exceeded",
	4879	"Complex regular subexpression recursion",
	4880	REG_INFTY - 1);
	4881	}
	4882	cur_curlyx->u.curlyx.count--;
	4883	CACHEsayNO;
	4884	}
	4885
	4886	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4887	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	4888	);
	4889	/* Try grabbing another A and see if it helps. */
	4890	PL_reginput = locinput;
	4891	cur_curlyx->u.curlyx.lastloc = locinput;
	4892	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4893	REGCP_SET(ST.lastcp);
	4894	PUSH_STATE_GOTO(WHILEM_A_min,
	4895	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS);
	4896	/* NOTREACHED */
	4897
	4898	#undef ST
	4899	#define ST st->u.branch
	4900
	4901	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	4902	next = scan + ARG(scan);
	4903	if (next == scan)
	4904	next = NULL;
	4905	scan = NEXTOPER(scan);
	4906	/* FALL THROUGH */
	4907
	4908	case BRANCH: /* /(...\|A\|...)/ */
	4909	scan = NEXTOPER(scan); /* scan now points to inner node */
	4910	ST.lastparen = *PL_reglastparen;
	4911	ST.next_branch = next;
	4912	REGCP_SET(ST.cp);
	4913	PL_reginput = locinput;
	4914
	4915	/* Now go into the branch */
	4916	if (has_cutgroup) {
	4917	PUSH_YES_STATE_GOTO(BRANCH_next, scan);
	4918	} else {
	4919	PUSH_STATE_GOTO(BRANCH_next, scan);
	4920	}
	4921	/* NOTREACHED */
	4922	case CUTGROUP:
	4923	PL_reginput = locinput;
	4924	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	4925	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	4926	PUSH_STATE_GOTO(CUTGROUP_next,next);
	4927	/* NOTREACHED */
	4928	case CUTGROUP_next_fail:
	4929	do_cutgroup = 1;
	4930	no_final = 1;
	4931	if (st->u.mark.mark_name)
	4932	sv_commit = st->u.mark.mark_name;
	4933	sayNO;
	4934	/* NOTREACHED */
	4935	case BRANCH_next:
	4936	sayYES;
	4937	/* NOTREACHED */
	4938	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	4939	if (do_cutgroup) {
	4940	do_cutgroup = 0;
	4941	no_final = 0;
	4942	}
	4943	REGCP_UNWIND(ST.cp);
	4944	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	4945	PL_regoffs[n].end = -1;
	4946	*PL_reglastparen = n;
	4947	/dmq: PL_reglastcloseparen = n; */
	4948	scan = ST.next_branch;
	4949	/* no more branches? */
	4950	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	4951	DEBUG_EXECUTE_r({
	4952	PerlIO_printf( Perl_debug_log,
	4953	"%*s %sBRANCH failed...%s\n",
	4954	REPORT_CODE_OFF+depth*2, "",
	4955	PL_colors[4],
	4956	PL_colors[5] );
	4957	});
	4958	sayNO_SILENT;
	4959	}
	4960	continue; /* execute next BRANCH[J] op */
	4961	/* NOTREACHED */
	4962
	4963	case MINMOD:
	4964	minmod = 1;
	4965	break;
	4966
	4967	#undef ST
	4968	#define ST st->u.curlym
	4969
	4970	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	4971
	4972	/* This is an optimisation of CURLYX that enables us to push
	4973	* only a single backtracking state, no matter how many matches
	4974	* there are in {m,n}. It relies on the pattern being constant
	4975	* length, with no parens to influence future backrefs
	4976	*/
	4977
	4978	ST.me = scan;
	4979	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	4980
	4981	/* if paren positive, emulate an OPEN/CLOSE around A */
	4982	if (ST.me->flags) {
	4983	U32 paren = ST.me->flags;
	4984	if (paren > PL_regsize)
	4985	PL_regsize = paren;
	4986	if (paren > *PL_reglastparen)
	4987	*PL_reglastparen = paren;
	4988	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	4989	}
	4990	ST.A = scan;
	4991	ST.B = next;
	4992	ST.alen = 0;
	4993	ST.count = 0;
	4994	ST.minmod = minmod;
	4995	minmod = 0;
	4996	ST.c1 = CHRTEST_UNINIT;
	4997	REGCP_SET(ST.cp);
	4998
	4999	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	5000	goto curlym_do_B;
	5001
	5002	curlym_do_A: /* execute the A in /A{m,n}B/ */
	5003	PL_reginput = locinput;
	5004	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
	5005	/* NOTREACHED */
	5006
	5007	case CURLYM_A: /* we've just matched an A */
	5008	locinput = st->locinput;
	5009	nextchr = UCHARAT(locinput);
	5010
	5011	ST.count++;
	5012	/* after first match, determine A's length: u.curlym.alen */
	5013	if (ST.count == 1) {
	5014	if (PL_reg_match_utf8) {
	5015	char *s = locinput;
	5016	while (s < PL_reginput) {
	5017	ST.alen++;
	5018	s += UTF8SKIP(s);
	5019	}
	5020	}
	5021	else {
	5022	ST.alen = PL_reginput - locinput;
	5023	}
	5024	if (ST.alen == 0)
	5025	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	5026	}
	5027	DEBUG_EXECUTE_r(
	5028	PerlIO_printf(Perl_debug_log,
	5029	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	5030	(int)(REPORT_CODE_OFF+(depth*2)), "",
	5031	(IV) ST.count, (IV)ST.alen)
	5032	);
	5033
	5034	locinput = PL_reginput;
	5035
	5036	if (cur_eval && cur_eval->u.eval.close_paren &&
	5037	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5038	goto fake_end;
	5039
	5040	{
	5041	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	5042	if ( max == REG_INFTY \|\| ST.count < max )
	5043	goto curlym_do_A; /* try to match another A */
	5044	}
	5045	goto curlym_do_B; /* try to match B */
	5046
	5047	case CURLYM_A_fail: /* just failed to match an A */
	5048	REGCP_UNWIND(ST.cp);
	5049
	5050	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	5051	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5052	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5053	sayNO;
	5054
	5055	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5056	PL_reginput = locinput;
	5057	if (ST.c1 == CHRTEST_UNINIT) {
	5058	/* calculate c1 and c2 for possible match of 1st char
	5059	* following curly */
	5060	ST.c1 = ST.c2 = CHRTEST_VOID;
	5061	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5062	regnode *text_node = ST.B;
	5063	if (! HAS_TEXT(text_node))
	5064	FIND_NEXT_IMPT(text_node);
	5065	/* this used to be
	5066
	5067	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5068
	5069	But the former is redundant in light of the latter.
	5070
	5071	if this changes back then the macro for
	5072	IS_TEXT and friends need to change.
	5073	*/
	5074	if (PL_regkind[OP(text_node)] == EXACT)
	5075	{
	5076
	5077	ST.c1 = (U8)*STRING(text_node);
	5078	switch (OP(text_node)) {
	5079	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5080	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5081	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5082	default: ST.c2 = ST.c1;
	5083	}
	5084	}
	5085	}
	5086	}
	5087
	5088	DEBUG_EXECUTE_r(
	5089	PerlIO_printf(Perl_debug_log,
	5090	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5091	(int)(REPORT_CODE_OFF+(depth*2)),
	5092	"", (IV)ST.count)
	5093	);
	5094	if (ST.c1 != CHRTEST_VOID
	5095	&& UCHARAT(PL_reginput) != ST.c1
	5096	&& UCHARAT(PL_reginput) != ST.c2)
	5097	{
	5098	/* simulate B failing */
	5099	DEBUG_OPTIMISE_r(
	5100	PerlIO_printf(Perl_debug_log,
	5101	"%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
	5102	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5103	(IV)ST.c1,(IV)ST.c2
	5104	));
	5105	state_num = CURLYM_B_fail;
	5106	goto reenter_switch;
	5107	}
	5108
	5109	if (ST.me->flags) {
	5110	/* mark current A as captured */
	5111	I32 paren = ST.me->flags;
	5112	if (ST.count) {
	5113	PL_regoffs[paren].start
	5114	= HOPc(PL_reginput, -ST.alen) - PL_bostr;
	5115	PL_regoffs[paren].end = PL_reginput - PL_bostr;
	5116	/dmq: PL_reglastcloseparen = paren; */
	5117	}
	5118	else
	5119	PL_regoffs[paren].end = -1;
	5120	if (cur_eval && cur_eval->u.eval.close_paren &&
	5121	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5122	{
	5123	if (ST.count)
	5124	goto fake_end;
	5125	else
	5126	sayNO;
	5127	}
	5128	}
	5129
	5130	PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
	5131	/* NOTREACHED */
	5132
	5133	case CURLYM_B_fail: /* just failed to match a B */
	5134	REGCP_UNWIND(ST.cp);
	5135	if (ST.minmod) {
	5136	I32 max = ARG2(ST.me);
	5137	if (max != REG_INFTY && ST.count == max)
	5138	sayNO;
	5139	goto curlym_do_A; /* try to match a further A */
	5140	}
	5141	/* backtrack one A */
	5142	if (ST.count == ARG1(ST.me) /* min */)
	5143	sayNO;
	5144	ST.count--;
	5145	locinput = HOPc(locinput, -ST.alen);
	5146	goto curlym_do_B; /* try to match B */
	5147
	5148	#undef ST
	5149	#define ST st->u.curly
	5150
	5151	#define CURLY_SETPAREN(paren, success) \
	5152	if (paren) { \
	5153	if (success) { \
	5154	PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	5155	PL_regoffs[paren].end = locinput - PL_bostr; \
	5156	*PL_reglastcloseparen = paren; \
	5157	} \
	5158	else \
	5159	PL_regoffs[paren].end = -1; \
	5160	}
	5161
	5162	case STAR: /* /AB/ where A is width 1 /
	5163	ST.paren = 0;
	5164	ST.min = 0;
	5165	ST.max = REG_INFTY;
	5166	scan = NEXTOPER(scan);
	5167	goto repeat;
	5168	case PLUS: /* /A+B/ where A is width 1 */
	5169	ST.paren = 0;
	5170	ST.min = 1;
	5171	ST.max = REG_INFTY;
	5172	scan = NEXTOPER(scan);
	5173	goto repeat;
	5174	case CURLYN: /* /(A){m,n}B/ where A is width 1 */
	5175	ST.paren = scan->flags; /* Which paren to set */
	5176	if (ST.paren > PL_regsize)
	5177	PL_regsize = ST.paren;
	5178	if (ST.paren > *PL_reglastparen)
	5179	*PL_reglastparen = ST.paren;
	5180	ST.min = ARG1(scan); /* min to match */
	5181	ST.max = ARG2(scan); /* max to match */
	5182	if (cur_eval && cur_eval->u.eval.close_paren &&
	5183	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5184	ST.min=1;
	5185	ST.max=1;
	5186	}
	5187	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	5188	goto repeat;
	5189	case CURLY: /* /A{m,n}B/ where A is width 1 */
	5190	ST.paren = 0;
	5191	ST.min = ARG1(scan); /* min to match */
	5192	ST.max = ARG2(scan); /* max to match */
	5193	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5194	repeat:
	5195	/*
	5196	* Lookahead to avoid useless match attempts
	5197	* when we know what character comes next.
	5198	*
	5199	* Used to only do .x and .?x, but now it allows
	5200	* for )'s, ('s and (?{ ... })'s to be in the way
	5201	* of the quantifier and the EXACT-like node. -- japhy
	5202	*/
	5203
	5204	if (ST.min > ST.max) /* XXX make this a compile-time check? */
	5205	sayNO;
	5206	if (HAS_TEXT(next) \|\| JUMPABLE(next)) {
	5207	U8 *s;
	5208	regnode *text_node = next;
	5209
	5210	if (! HAS_TEXT(text_node))
	5211	FIND_NEXT_IMPT(text_node);
	5212
	5213	if (! HAS_TEXT(text_node))
	5214	ST.c1 = ST.c2 = CHRTEST_VOID;
	5215	else {
	5216	if ( PL_regkind[OP(text_node)] != EXACT ) {
	5217	ST.c1 = ST.c2 = CHRTEST_VOID;
	5218	goto assume_ok_easy;
	5219	}
	5220	else
	5221	s = (U8*)STRING(text_node);
	5222
	5223	/* Currently we only get here when
	5224
	5225	PL_rekind[OP(text_node)] == EXACT
	5226
	5227	if this changes back then the macro for IS_TEXT and
	5228	friends need to change. */
	5229	if (!UTF_PATTERN) {
	5230	ST.c1 = *s;
	5231	switch (OP(text_node)) {
	5232	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5233	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5234	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5235	default: ST.c2 = ST.c1; break;
	5236	}
	5237	}
	5238	else { /* UTF_PATTERN */
	5239	if (IS_TEXTFU(text_node) \|\| IS_TEXTF(text_node)) {
	5240	STRLEN ulen1, ulen2;
	5241	U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
	5242	U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
	5243
	5244	to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
	5245	to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
	5246	#ifdef EBCDIC
	5247	ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
	5248	ckWARN(WARN_UTF8) ?
	5249	0 : UTF8_ALLOW_ANY);
	5250	ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
	5251	ckWARN(WARN_UTF8) ?
	5252	0 : UTF8_ALLOW_ANY);
	5253	#else
	5254	ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
	5255	uniflags);
	5256	ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
	5257	uniflags);
	5258	#endif
	5259	}
	5260	else {
	5261	ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
	5262	uniflags);
	5263	}
	5264	}
	5265	}
	5266	}
	5267	else
	5268	ST.c1 = ST.c2 = CHRTEST_VOID;
	5269	assume_ok_easy:
	5270
	5271	ST.A = scan;
	5272	ST.B = next;
	5273	PL_reginput = locinput;
	5274	if (minmod) {
	5275	minmod = 0;
	5276	if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
	5277	sayNO;
	5278	ST.count = ST.min;
	5279	locinput = PL_reginput;
	5280	REGCP_SET(ST.cp);
	5281	if (ST.c1 == CHRTEST_VOID)
	5282	goto curly_try_B_min;
	5283
	5284	ST.oldloc = locinput;
	5285
	5286	/* set ST.maxpos to the furthest point along the
	5287	* string that could possibly match */
	5288	if (ST.max == REG_INFTY) {
	5289	ST.maxpos = PL_regeol - 1;
	5290	if (utf8_target)
	5291	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	5292	ST.maxpos--;
	5293	}
	5294	else if (utf8_target) {
	5295	int m = ST.max - ST.min;
	5296	for (ST.maxpos = locinput;
	5297	m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
	5298	ST.maxpos += UTF8SKIP(ST.maxpos);
	5299	}
	5300	else {
	5301	ST.maxpos = locinput + ST.max - ST.min;
	5302	if (ST.maxpos >= PL_regeol)
	5303	ST.maxpos = PL_regeol - 1;
	5304	}
	5305	goto curly_try_B_min_known;
	5306
	5307	}
	5308	else {
	5309	ST.count = regrepeat(rex, ST.A, ST.max, depth);
	5310	locinput = PL_reginput;
	5311	if (ST.count < ST.min)
	5312	sayNO;
	5313	if ((ST.count > ST.min)
	5314	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	5315	{
	5316	/* A{m,n} must come at the end of the string, there's
	5317	* no point in backing off ... */
	5318	ST.min = ST.count;
	5319	/* ...except that $ and \Z can match before and after
	5320	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	5321	We may back off by one in this case. */
	5322	if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
	5323	ST.min--;
	5324	}
	5325	REGCP_SET(ST.cp);
	5326	goto curly_try_B_max;
	5327	}
	5328	/* NOTREACHED */
	5329
	5330
	5331	case CURLY_B_min_known_fail:
	5332	/* failed to find B in a non-greedy match where c1,c2 valid */
	5333	if (ST.paren && ST.count)
	5334	PL_regoffs[ST.paren].end = -1;
	5335
	5336	PL_reginput = locinput; /* Could be reset... */
	5337	REGCP_UNWIND(ST.cp);
	5338	/* Couldn't or didn't -- move forward. */
	5339	ST.oldloc = locinput;
	5340	if (utf8_target)
	5341	locinput += UTF8SKIP(locinput);
	5342	else
	5343	locinput++;
	5344	ST.count++;
	5345	curly_try_B_min_known:
	5346	/* find the next place where 'B' could work, then call B */
	5347	{
	5348	int n;
	5349	if (utf8_target) {
	5350	n = (ST.oldloc == locinput) ? 0 : 1;
	5351	if (ST.c1 == ST.c2) {
	5352	STRLEN len;
	5353	/* set n to utf8_distance(oldloc, locinput) */
	5354	while (locinput <= ST.maxpos &&
	5355	utf8n_to_uvchr((U8*)locinput,
	5356	UTF8_MAXBYTES, &len,
	5357	uniflags) != (UV)ST.c1) {
	5358	locinput += len;
	5359	n++;
	5360	}
	5361	}
	5362	else {
	5363	/* set n to utf8_distance(oldloc, locinput) */
	5364	while (locinput <= ST.maxpos) {
	5365	STRLEN len;
	5366	const UV c = utf8n_to_uvchr((U8*)locinput,
	5367	UTF8_MAXBYTES, &len,
	5368	uniflags);
	5369	if (c == (UV)ST.c1 \|\| c == (UV)ST.c2)
	5370	break;
	5371	locinput += len;
	5372	n++;
	5373	}
	5374	}
	5375	}
	5376	else {
	5377	if (ST.c1 == ST.c2) {
	5378	while (locinput <= ST.maxpos &&
	5379	UCHARAT(locinput) != ST.c1)
	5380	locinput++;
	5381	}
	5382	else {
	5383	while (locinput <= ST.maxpos
	5384	&& UCHARAT(locinput) != ST.c1
	5385	&& UCHARAT(locinput) != ST.c2)
	5386	locinput++;
	5387	}
	5388	n = locinput - ST.oldloc;
	5389	}
	5390	if (locinput > ST.maxpos)
	5391	sayNO;
	5392	/* PL_reginput == oldloc now */
	5393	if (n) {
	5394	ST.count += n;
	5395	if (regrepeat(rex, ST.A, n, depth) < n)
	5396	sayNO;
	5397	}
	5398	PL_reginput = locinput;
	5399	CURLY_SETPAREN(ST.paren, ST.count);
	5400	if (cur_eval && cur_eval->u.eval.close_paren &&
	5401	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5402	goto fake_end;
	5403	}
	5404	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
	5405	}
	5406	/* NOTREACHED */
	5407
	5408
	5409	case CURLY_B_min_fail:
	5410	/* failed to find B in a non-greedy match where c1,c2 invalid */
	5411	if (ST.paren && ST.count)
	5412	PL_regoffs[ST.paren].end = -1;
	5413
	5414	REGCP_UNWIND(ST.cp);
	5415	/* failed -- move forward one */
	5416	PL_reginput = locinput;
	5417	if (regrepeat(rex, ST.A, 1, depth)) {
	5418	ST.count++;
	5419	locinput = PL_reginput;
	5420	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	5421	ST.count > 0)) /* count overflow ? */
	5422	{
	5423	curly_try_B_min:
	5424	CURLY_SETPAREN(ST.paren, ST.count);
	5425	if (cur_eval && cur_eval->u.eval.close_paren &&
	5426	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5427	goto fake_end;
	5428	}
	5429	PUSH_STATE_GOTO(CURLY_B_min, ST.B);
	5430	}
	5431	}
	5432	sayNO;
	5433	/* NOTREACHED */
	5434
	5435
	5436	curly_try_B_max:
	5437	/* a successful greedy match: now try to match B */
	5438	if (cur_eval && cur_eval->u.eval.close_paren &&
	5439	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5440	goto fake_end;
	5441	}
	5442	{
	5443	UV c = 0;
	5444	if (ST.c1 != CHRTEST_VOID)
	5445	c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
	5446	UTF8_MAXBYTES, 0, uniflags)
	5447	: (UV) UCHARAT(PL_reginput);
	5448	/* If it could work, try it. */
	5449	if (ST.c1 == CHRTEST_VOID \|\| c == (UV)ST.c1 \|\| c == (UV)ST.c2) {
	5450	CURLY_SETPAREN(ST.paren, ST.count);
	5451	PUSH_STATE_GOTO(CURLY_B_max, ST.B);
	5452	/* NOTREACHED */
	5453	}
	5454	}
	5455	/* FALL THROUGH */
	5456	case CURLY_B_max_fail:
	5457	/* failed to find B in a greedy match */
	5458	if (ST.paren && ST.count)
	5459	PL_regoffs[ST.paren].end = -1;
	5460
	5461	REGCP_UNWIND(ST.cp);
	5462	/* back up. */
	5463	if (--ST.count < ST.min)
	5464	sayNO;
	5465	PL_reginput = locinput = HOPc(locinput, -1);
	5466	goto curly_try_B_max;
	5467
	5468	#undef ST
	5469
	5470	case END:
	5471	fake_end:
	5472	if (cur_eval) {
	5473	/* we've just finished A in /(??{A})B/; now continue with B */
	5474	I32 tmpix;
	5475	st->u.eval.toggle_reg_flags
	5476	= cur_eval->u.eval.toggle_reg_flags;
	5477	PL_reg_flags ^= st->u.eval.toggle_reg_flags;
	5478
	5479	st->u.eval.prev_rex = rex_sv; /* inner */
	5480	SETREX(rex_sv,cur_eval->u.eval.prev_rex);
	5481	rex = (struct regexp *)SvANY(rex_sv);
	5482	rexi = RXi_GET(rex);
	5483	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	5484	ReREFCNT_inc(rex_sv);
	5485	st->u.eval.cp = regcppush(0); /* Save all the positions. */
	5486
	5487	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	5488	PL_reglastparen = &rex->lastparen;
	5489	PL_reglastcloseparen = &rex->lastcloseparen;
	5490
	5491	REGCP_SET(st->u.eval.lastcp);
	5492	PL_reginput = locinput;
	5493
	5494	/* Restore parens of the outer rex without popping the
	5495	* savestack */
	5496	tmpix = PL_savestack_ix;
	5497	PL_savestack_ix = cur_eval->u.eval.lastcp;
	5498	regcppop(rex);
	5499	PL_savestack_ix = tmpix;
	5500
	5501	st->u.eval.prev_eval = cur_eval;
	5502	cur_eval = cur_eval->u.eval.prev_eval;
	5503	DEBUG_EXECUTE_r(
	5504	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	5505	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	5506	if ( nochange_depth )
	5507	nochange_depth--;
	5508
	5509	PUSH_YES_STATE_GOTO(EVAL_AB,
	5510	st->u.eval.prev_eval->u.eval.B); /* match B */
	5511	}
	5512
	5513	if (locinput < reginfo->till) {
	5514	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5515	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	5516	PL_colors[4],
	5517	(long)(locinput - PL_reg_starttry),
	5518	(long)(reginfo->till - PL_reg_starttry),
	5519	PL_colors[5]));
	5520
	5521	sayNO_SILENT; /* Cannot match: too short. */
	5522	}
	5523	PL_reginput = locinput; /* put where regtry can find it */
	5524	sayYES; /* Success! */
	5525
	5526	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	5527	DEBUG_EXECUTE_r(
	5528	PerlIO_printf(Perl_debug_log,
	5529	"%*s %ssubpattern success...%s\n",
	5530	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	5531	PL_reginput = locinput; /* put where regtry can find it */
	5532	sayYES; /* Success! */
	5533
	5534	#undef ST
	5535	#define ST st->u.ifmatch
	5536
	5537	case SUSPEND: /* (?>A) */
	5538	ST.wanted = 1;
	5539	PL_reginput = locinput;
	5540	goto do_ifmatch;
	5541
	5542	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	5543	ST.wanted = 0;
	5544	goto ifmatch_trivial_fail_test;
	5545
	5546	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	5547	ST.wanted = 1;
	5548	ifmatch_trivial_fail_test:
	5549	if (scan->flags) {
	5550	char * const s = HOPBACKc(locinput, scan->flags);
	5551	if (!s) {
	5552	/* trivial fail */
	5553	if (logical) {
	5554	logical = 0;
	5555	sw = 1 - cBOOL(ST.wanted);
	5556	}
	5557	else if (ST.wanted)
	5558	sayNO;
	5559	next = scan + ARG(scan);
	5560	if (next == scan)
	5561	next = NULL;
	5562	break;
	5563	}
	5564	PL_reginput = s;
	5565	}
	5566	else
	5567	PL_reginput = locinput;
	5568
	5569	do_ifmatch:
	5570	ST.me = scan;
	5571	ST.logical = logical;
	5572	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	5573
	5574	/* execute body of (?...A) */
	5575	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
	5576	/* NOTREACHED */
	5577
	5578	case IFMATCH_A_fail: /* body of (?...A) failed */
	5579	ST.wanted = !ST.wanted;
	5580	/* FALL THROUGH */
	5581
	5582	case IFMATCH_A: /* body of (?...A) succeeded */
	5583	if (ST.logical) {
	5584	sw = cBOOL(ST.wanted);
	5585	}
	5586	else if (!ST.wanted)
	5587	sayNO;
	5588
	5589	if (OP(ST.me) == SUSPEND)
	5590	locinput = PL_reginput;
	5591	else {
	5592	locinput = PL_reginput = st->locinput;
	5593	nextchr = UCHARAT(locinput);
	5594	}
	5595	scan = ST.me + ARG(ST.me);
	5596	if (scan == ST.me)
	5597	scan = NULL;
	5598	continue; /* execute B */
	5599
	5600	#undef ST
	5601
	5602	case LONGJMP:
	5603	next = scan + ARG(scan);
	5604	if (next == scan)
	5605	next = NULL;
	5606	break;
	5607	case COMMIT:
	5608	reginfo->cutpoint = PL_regeol;
	5609	/* FALLTHROUGH */
	5610	case PRUNE:
	5611	PL_reginput = locinput;
	5612	if (!scan->flags)
	5613	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5614	PUSH_STATE_GOTO(COMMIT_next,next);
	5615	/* NOTREACHED */
	5616	case COMMIT_next_fail:
	5617	no_final = 1;
	5618	/* FALLTHROUGH */
	5619	case OPFAIL:
	5620	sayNO;
	5621	/* NOTREACHED */
	5622
	5623	#define ST st->u.mark
	5624	case MARKPOINT:
	5625	ST.prev_mark = mark_state;
	5626	ST.mark_name = sv_commit = sv_yes_mark
	5627	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5628	mark_state = st;
	5629	ST.mark_loc = PL_reginput = locinput;
	5630	PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
	5631	/* NOTREACHED */
	5632	case MARKPOINT_next:
	5633	mark_state = ST.prev_mark;
	5634	sayYES;
	5635	/* NOTREACHED */
	5636	case MARKPOINT_next_fail:
	5637	if (popmark && sv_eq(ST.mark_name,popmark))
	5638	{
	5639	if (ST.mark_loc > startpoint)
	5640	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5641	popmark = NULL; /* we found our mark */
	5642	sv_commit = ST.mark_name;
	5643
	5644	DEBUG_EXECUTE_r({
	5645	PerlIO_printf(Perl_debug_log,
	5646	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	5647	REPORT_CODE_OFF+depth*2, "",
	5648	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	5649	});
	5650	}
	5651	mark_state = ST.prev_mark;
	5652	sv_yes_mark = mark_state ?
	5653	mark_state->u.mark.mark_name : NULL;
	5654	sayNO;
	5655	/* NOTREACHED */
	5656	case SKIP:
	5657	PL_reginput = locinput;
	5658	if (scan->flags) {
	5659	/* (SKIP) : if we fail we cut here/
	5660	ST.mark_name = NULL;
	5661	ST.mark_loc = locinput;
	5662	PUSH_STATE_GOTO(SKIP_next,next);
	5663	} else {
	5664	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	5665	otherwise do nothing. Meaning we need to scan
	5666	*/
	5667	regmatch_state *cur = mark_state;
	5668	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5669
	5670	while (cur) {
	5671	if ( sv_eq( cur->u.mark.mark_name,
	5672	find ) )
	5673	{
	5674	ST.mark_name = find;
	5675	PUSH_STATE_GOTO( SKIP_next, next );
	5676	}
	5677	cur = cur->u.mark.prev_mark;
	5678	}
	5679	}
	5680	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	5681	break;
	5682	case SKIP_next_fail:
	5683	if (ST.mark_name) {
	5684	/* (*CUT:NAME) - Set up to search for the name as we
	5685	collapse the stack*/
	5686	popmark = ST.mark_name;
	5687	} else {
	5688	/* (CUT) - No name, we cut here./
	5689	if (ST.mark_loc > startpoint)
	5690	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5691	/* but we set sv_commit to latest mark_name if there
	5692	is one so they can test to see how things lead to this
	5693	cut */
	5694	if (mark_state)
	5695	sv_commit=mark_state->u.mark.mark_name;
	5696	}
	5697	no_final = 1;
	5698	sayNO;
	5699	/* NOTREACHED */
	5700	#undef ST
	5701	case FOLDCHAR:
	5702	n = ARG(scan);
	5703	if ( n == (U32)what_len_TRICKYFOLD(locinput,utf8_target,ln) ) {
	5704	locinput += ln;
	5705	} else if ( LATIN_SMALL_LETTER_SHARP_S == n && !utf8_target && !UTF_PATTERN ) {
	5706	sayNO;
	5707	} else {
	5708	U8 folded[UTF8_MAXBYTES_CASE+1];
	5709	STRLEN foldlen;
	5710	const char * const l = locinput;
	5711	char *e = PL_regeol;
	5712	to_uni_fold(n, folded, &foldlen);
	5713
	5714	if (! foldEQ_utf8((const char*) folded, 0, foldlen, 1,
	5715	l, &e, 0, utf8_target)) {
	5716	sayNO;
	5717	}
	5718	locinput = e;
	5719	}
	5720	nextchr = UCHARAT(locinput);
	5721	break;
	5722	case LNBREAK:
	5723	if ((n=is_LNBREAK(locinput,utf8_target))) {
	5724	locinput += n;
	5725	nextchr = UCHARAT(locinput);
	5726	} else
	5727	sayNO;
	5728	break;
	5729
	5730	#define CASE_CLASS(nAmE) \
	5731	case nAmE: \
	5732	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5733	locinput += n; \
	5734	nextchr = UCHARAT(locinput); \
	5735	} else \
	5736	sayNO; \
	5737	break; \
	5738	case N##nAmE: \
	5739	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5740	sayNO; \
	5741	} else { \
	5742	locinput += UTF8SKIP(locinput); \
	5743	nextchr = UCHARAT(locinput); \
	5744	} \
	5745	break
	5746
	5747	CASE_CLASS(VERTWS);
	5748	CASE_CLASS(HORIZWS);
	5749	#undef CASE_CLASS
	5750
	5751	default:
	5752	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	5753	PTR2UV(scan), OP(scan));
	5754	Perl_croak(aTHX_ "regexp memory corruption");
	5755
	5756	} /* end switch */
	5757
	5758	/* switch break jumps here */
	5759	scan = next; /* prepare to execute the next op and ... */
	5760	continue; /* ... jump back to the top, reusing st */
	5761	/* NOTREACHED */
	5762
	5763	push_yes_state:
	5764	/* push a state that backtracks on success */
	5765	st->u.yes.prev_yes_state = yes_state;
	5766	yes_state = st;
	5767	/* FALL THROUGH */
	5768	push_state:
	5769	/* push a new regex state, then continue at scan */
	5770	{
	5771	regmatch_state *newst;
	5772
	5773	DEBUG_STACK_r({
	5774	regmatch_state *cur = st;
	5775	regmatch_state *curyes = yes_state;
	5776	int curd = depth;
	5777	regmatch_slab *slab = PL_regmatch_slab;
	5778	for (;curd > -1;cur--,curd--) {
	5779	if (cur < SLAB_FIRST(slab)) {
	5780	slab = slab->prev;
	5781	cur = SLAB_LAST(slab);
	5782	}
	5783	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	5784	REPORT_CODE_OFF + 2 + depth * 2,"",
	5785	curd, PL_reg_name[cur->resume_state],
	5786	(curyes == cur) ? "yes" : ""
	5787	);
	5788	if (curyes == cur)
	5789	curyes = cur->u.yes.prev_yes_state;
	5790	}
	5791	} else
	5792	DEBUG_STATE_pp("push")
	5793	);
	5794	depth++;
	5795	st->locinput = locinput;
	5796	newst = st+1;
	5797	if (newst > SLAB_LAST(PL_regmatch_slab))
	5798	newst = S_push_slab(aTHX);
	5799	PL_regmatch_state = newst;
	5800
	5801	locinput = PL_reginput;
	5802	nextchr = UCHARAT(locinput);
	5803	st = newst;
	5804	continue;
	5805	/* NOTREACHED */
	5806	}
	5807	}
	5808
	5809	/*
	5810	* We get here only if there's trouble -- normally "case END" is
	5811	* the terminating point.
	5812	*/
	5813	Perl_croak(aTHX_ "corrupted regexp pointers");
	5814	/NOTREACHED/
	5815	sayNO;
	5816
	5817	yes:
	5818	if (yes_state) {
	5819	/* we have successfully completed a subexpression, but we must now
	5820	* pop to the state marked by yes_state and continue from there */
	5821	assert(st != yes_state);
	5822	#ifdef DEBUGGING
	5823	while (st != yes_state) {
	5824	st--;
	5825	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5826	PL_regmatch_slab = PL_regmatch_slab->prev;
	5827	st = SLAB_LAST(PL_regmatch_slab);
	5828	}
	5829	DEBUG_STATE_r({
	5830	if (no_final) {
	5831	DEBUG_STATE_pp("pop (no final)");
	5832	} else {
	5833	DEBUG_STATE_pp("pop (yes)");
	5834	}
	5835	});
	5836	depth--;
	5837	}
	5838	#else
	5839	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	5840	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	5841	{
	5842	/* not in this slab, pop slab */
	5843	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	5844	PL_regmatch_slab = PL_regmatch_slab->prev;
	5845	st = SLAB_LAST(PL_regmatch_slab);
	5846	}
	5847	depth -= (st - yes_state);
	5848	#endif
	5849	st = yes_state;
	5850	yes_state = st->u.yes.prev_yes_state;
	5851	PL_regmatch_state = st;
	5852
	5853	if (no_final) {
	5854	locinput= st->locinput;
	5855	nextchr = UCHARAT(locinput);
	5856	}
	5857	state_num = st->resume_state + no_final;
	5858	goto reenter_switch;
	5859	}
	5860
	5861	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	5862	PL_colors[4], PL_colors[5]));
	5863
	5864	if (PL_reg_eval_set) {
	5865	/* each successfully executed (?{...}) block does the equivalent of
	5866	* local $^R = do {...}
	5867	* When popping the save stack, all these locals would be undone;
	5868	* bypass this by setting the outermost saved $^R to the latest
	5869	* value */
	5870	if (oreplsv != GvSV(PL_replgv))
	5871	sv_setsv(oreplsv, GvSV(PL_replgv));
	5872	}
	5873	result = 1;
	5874	goto final_exit;
	5875
	5876	no:
	5877	DEBUG_EXECUTE_r(
	5878	PerlIO_printf(Perl_debug_log,
	5879	"%*s %sfailed...%s\n",
	5880	REPORT_CODE_OFF+depth*2, "",
	5881	PL_colors[4], PL_colors[5])
	5882	);
	5883
	5884	no_silent:
	5885	if (no_final) {
	5886	if (yes_state) {
	5887	goto yes;
	5888	} else {
	5889	goto final_exit;
	5890	}
	5891	}
	5892	if (depth) {
	5893	/* there's a previous state to backtrack to */
	5894	st--;
	5895	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5896	PL_regmatch_slab = PL_regmatch_slab->prev;
	5897	st = SLAB_LAST(PL_regmatch_slab);
	5898	}
	5899	PL_regmatch_state = st;
	5900	locinput= st->locinput;
	5901	nextchr = UCHARAT(locinput);
	5902
	5903	DEBUG_STATE_pp("pop");
	5904	depth--;
	5905	if (yes_state == st)
	5906	yes_state = st->u.yes.prev_yes_state;
	5907
	5908	state_num = st->resume_state + 1; /* failure = success + 1 */
	5909	goto reenter_switch;
	5910	}
	5911	result = 0;
	5912
	5913	final_exit:
	5914	if (rex->intflags & PREGf_VERBARG_SEEN) {
	5915	SV *sv_err = get_sv("REGERROR", 1);
	5916	SV *sv_mrk = get_sv("REGMARK", 1);
	5917	if (result) {
	5918	sv_commit = &PL_sv_no;
	5919	if (!sv_yes_mark)
	5920	sv_yes_mark = &PL_sv_yes;
	5921	} else {
	5922	if (!sv_commit)
	5923	sv_commit = &PL_sv_yes;
	5924	sv_yes_mark = &PL_sv_no;
	5925	}
	5926	sv_setsv(sv_err, sv_commit);
	5927	sv_setsv(sv_mrk, sv_yes_mark);
	5928	}
	5929
	5930	/* clean up; in particular, free all slabs above current one */
	5931	LEAVE_SCOPE(oldsave);
	5932
	5933	return result;
	5934	}
	5935
	5936	/*
	5937	- regrepeat - repeatedly match something simple, report how many
	5938	*/
	5939	/*
	5940	* [This routine now assumes that it will only match on things of length 1.
	5941	* That was true before, but now we assume scan - reginput is the count,
	5942	* rather than incrementing count on every character. [Er, except utf8.]]
	5943	*/
	5944	STATIC I32
	5945	S_regrepeat(pTHX_ const regexp prog, const regnode p, I32 max, int depth)
	5946	{
	5947	dVAR;
	5948	register char *scan;
	5949	register I32 c;
	5950	register char *loceol = PL_regeol;
	5951	register I32 hardcount = 0;
	5952	register bool utf8_target = PL_reg_match_utf8;
	5953	#ifndef DEBUGGING
	5954	PERL_UNUSED_ARG(depth);
	5955	#endif
	5956
	5957	PERL_ARGS_ASSERT_REGREPEAT;
	5958
	5959	scan = PL_reginput;
	5960	if (max == REG_INFTY)
	5961	max = I32_MAX;
	5962	else if (max < loceol - scan)
	5963	loceol = scan + max;
	5964	switch (OP(p)) {
	5965	case REG_ANY:
	5966	if (utf8_target) {
	5967	loceol = PL_regeol;
	5968	while (scan < loceol && hardcount < max && *scan != '\n') {
	5969	scan += UTF8SKIP(scan);
	5970	hardcount++;
	5971	}
	5972	} else {
	5973	while (scan < loceol && *scan != '\n')
	5974	scan++;
	5975	}
	5976	break;
	5977	case SANY:
	5978	if (utf8_target) {
	5979	loceol = PL_regeol;
	5980	while (scan < loceol && hardcount < max) {
	5981	scan += UTF8SKIP(scan);
	5982	hardcount++;
	5983	}
	5984	}
	5985	else
	5986	scan = loceol;
	5987	break;
	5988	case CANY:
	5989	scan = loceol;
	5990	break;
	5991	case EXACT:
	5992	/* To get here, EXACTish nodes must have byte length == 1. That
	5993	* means they match only characters in the string that can be expressed
	5994	* as a single byte. For non-utf8 strings, that means a simple match.
	5995	* For utf8 strings, the character matched must be an invariant, or
	5996	* downgradable to a single byte. The pattern's utf8ness is
	5997	* irrelevant, as since it's a single byte, it either isn't utf8, or if
	5998	* it is, it's an invariant */
	5999
	6000	c = (U8)*STRING(p);
	6001	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	6002
	6003	if (! utf8_target \|\| UNI_IS_INVARIANT(c)) {
	6004	while (scan < loceol && UCHARAT(scan) == c) {
	6005	scan++;
	6006	}
	6007	}
	6008	else {
	6009
	6010	/* Here, the string is utf8, and the pattern char is different
	6011	* in utf8 than not, so can't compare them directly. Outside the
	6012	* loop, find find the two utf8 bytes that represent c, and then
	6013	* look for those in sequence in the utf8 string */
	6014	U8 high = UTF8_TWO_BYTE_HI(c);
	6015	U8 low = UTF8_TWO_BYTE_LO(c);
	6016	loceol = PL_regeol;
	6017
	6018	while (hardcount < max
	6019	&& scan + 1 < loceol
	6020	&& UCHARAT(scan) == high
	6021	&& UCHARAT(scan + 1) == low)
	6022	{
	6023	scan += 2;
	6024	hardcount++;
	6025	}
	6026	}
	6027	break;
	6028	case EXACTFL:
	6029	PL_reg_flags \|= RF_tainted;
	6030	/* FALL THROUGH */
	6031	case EXACTF:
	6032	case EXACTFU:
	6033
	6034	/* The comments for the EXACT case above apply as well to these fold
	6035	* ones */
	6036
	6037	c = (U8)*STRING(p);
	6038	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	6039
	6040	if (utf8_target) { /* Use full Unicode fold matching */
	6041
	6042	/* For the EXACTFL case, It doesn't really make sense to compare
	6043	* locale and utf8, but it is best we can do. The documents warn
	6044	* against mixing them */
	6045
	6046	char *tmpeol = loceol;
	6047	while (hardcount < max
	6048	&& foldEQ_utf8(scan, &tmpeol, 0, utf8_target,
	6049	STRING(p), NULL, 1, cBOOL(UTF_PATTERN)))
	6050	{
	6051	scan = tmpeol;
	6052	tmpeol = loceol;
	6053	hardcount++;
	6054	}
	6055
	6056	/* XXX Note that the above handles properly the German sharp s in
	6057	* the pattern matching ss in the string. But it doesn't handle
	6058	* properly cases where the string contains say 'LIGATURE ff' and
	6059	* the pattern is 'f+'. This would require, say, a new function or
	6060	* revised interface to foldEQ_utf8(), in which the maximum number
	6061	* of characters to match could be passed and it would return how
	6062	* many actually did. This is just one of many cases where
	6063	* multi-char folds don't work properly, and so the fix is being
	6064	* deferred */
	6065	}
	6066	else {
	6067	U8 folded;
	6068
	6069	/* Here, the string isn't utf8 and c is a single byte; and either
	6070	* the pattern isn't utf8 or c is an invariant, so its utf8ness
	6071	* doesn't affect c. Can just do simple comparisons for exact or
	6072	* fold matching. */
	6073	switch (OP(p)) {
	6074	case EXACTF: folded = PL_fold[c]; break;
	6075	case EXACTFU: folded = PL_fold_latin1[c]; break;
	6076	case EXACTFL: folded = PL_fold_locale[c]; break;
	6077	default: Perl_croak(aTHX_ "panic: Unexpected op %u", OP(p));
	6078	}
	6079	while (scan < loceol &&
	6080	(UCHARAT(scan) == c \|\| UCHARAT(scan) == folded))
	6081	{
	6082	scan++;
	6083	}
	6084	}
	6085	break;
	6086	case ANYOFV:
	6087	case ANYOF:
	6088	if (utf8_target) {
	6089	loceol = PL_regeol;
	6090	while (hardcount < max && scan < loceol &&
	6091	reginclass(prog, p, (U8*)scan, 0, utf8_target)) {
	6092	scan += UTF8SKIP(scan);
	6093	hardcount++;
	6094	}
	6095	} else {
	6096	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	6097	scan++;
	6098	}
	6099	break;
	6100	case ALNUMU:
	6101	if (utf8_target) {
	6102	utf8_wordchar:
	6103	loceol = PL_regeol;
	6104	LOAD_UTF8_CHARCLASS_ALNUM();
	6105	while (hardcount < max && scan < loceol &&
	6106	swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6107	{
	6108	scan += UTF8SKIP(scan);
	6109	hardcount++;
	6110	}
	6111	} else {
	6112	while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
	6113	scan++;
	6114	}
	6115	}
	6116	break;
	6117	case ALNUM:
	6118	if (utf8_target)
	6119	goto utf8_wordchar;
	6120	while (scan < loceol && isALNUM((U8) *scan)) {
	6121	scan++;
	6122	}
	6123	break;
	6124	case ALNUMA:
	6125	while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
	6126	scan++;
	6127	}
	6128	break;
	6129	case ALNUML:
	6130	PL_reg_flags \|= RF_tainted;
	6131	if (utf8_target) {
	6132	loceol = PL_regeol;
	6133	while (hardcount < max && scan < loceol &&
	6134	isALNUM_LC_utf8((U8*)scan)) {
	6135	scan += UTF8SKIP(scan);
	6136	hardcount++;
	6137	}
	6138	} else {
	6139	while (scan < loceol && isALNUM_LC(*scan))
	6140	scan++;
	6141	}
	6142	break;
	6143	case NALNUMU:
	6144	if (utf8_target) {
	6145
	6146	utf8_Nwordchar:
	6147
	6148	loceol = PL_regeol;
	6149	LOAD_UTF8_CHARCLASS_ALNUM();
	6150	while (hardcount < max && scan < loceol &&
	6151	! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6152	{
	6153	scan += UTF8SKIP(scan);
	6154	hardcount++;
	6155	}
	6156	} else {
	6157	while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
	6158	scan++;
	6159	}
	6160	}
	6161	break;
	6162	case NALNUM:
	6163	if (utf8_target)
	6164	goto utf8_Nwordchar;
	6165	while (scan < loceol && ! isALNUM((U8) *scan)) {
	6166	scan++;
	6167	}
	6168	break;
	6169	case NALNUMA:
	6170	if (utf8_target) {
	6171	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6172	scan += UTF8SKIP(scan);
	6173	}
	6174	}
	6175	else {
	6176	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6177	scan++;
	6178	}
	6179	}
	6180	break;
	6181	case NALNUML:
	6182	PL_reg_flags \|= RF_tainted;
	6183	if (utf8_target) {
	6184	loceol = PL_regeol;
	6185	while (hardcount < max && scan < loceol &&
	6186	!isALNUM_LC_utf8((U8*)scan)) {
	6187	scan += UTF8SKIP(scan);
	6188	hardcount++;
	6189	}
	6190	} else {
	6191	while (scan < loceol && !isALNUM_LC(*scan))
	6192	scan++;
	6193	}
	6194	break;
	6195	case SPACEU:
	6196	if (utf8_target) {
	6197
	6198	utf8_space:
	6199
	6200	loceol = PL_regeol;
	6201	LOAD_UTF8_CHARCLASS_SPACE();
	6202	while (hardcount < max && scan < loceol &&
	6203	(*scan == ' ' \|\|
	6204	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6205	{
	6206	scan += UTF8SKIP(scan);
	6207	hardcount++;
	6208	}
	6209	break;
	6210	}
	6211	else {
	6212	while (scan < loceol && isSPACE_L1((U8) *scan)) {
	6213	scan++;
	6214	}
	6215	break;
	6216	}
	6217	case SPACE:
	6218	if (utf8_target)
	6219	goto utf8_space;
	6220
	6221	while (scan < loceol && isSPACE((U8) *scan)) {
	6222	scan++;
	6223	}
	6224	break;
	6225	case SPACEA:
	6226	while (scan < loceol && isSPACE_A((U8) *scan)) {
	6227	scan++;
	6228	}
	6229	break;
	6230	case SPACEL:
	6231	PL_reg_flags \|= RF_tainted;
	6232	if (utf8_target) {
	6233	loceol = PL_regeol;
	6234	while (hardcount < max && scan < loceol &&
	6235	isSPACE_LC_utf8((U8*)scan)) {
	6236	scan += UTF8SKIP(scan);
	6237	hardcount++;
	6238	}
	6239	} else {
	6240	while (scan < loceol && isSPACE_LC(*scan))
	6241	scan++;
	6242	}
	6243	break;
	6244	case NSPACEU:
	6245	if (utf8_target) {
	6246
	6247	utf8_Nspace:
	6248
	6249	loceol = PL_regeol;
	6250	LOAD_UTF8_CHARCLASS_SPACE();
	6251	while (hardcount < max && scan < loceol &&
	6252	! (*scan == ' ' \|\|
	6253	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6254	{
	6255	scan += UTF8SKIP(scan);
	6256	hardcount++;
	6257	}
	6258	break;
	6259	}
	6260	else {
	6261	while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
	6262	scan++;
	6263	}
	6264	}
	6265	break;
	6266	case NSPACE:
	6267	if (utf8_target)
	6268	goto utf8_Nspace;
	6269
	6270	while (scan < loceol && ! isSPACE((U8) *scan)) {
	6271	scan++;
	6272	}
	6273	break;
	6274	case NSPACEA:
	6275	if (utf8_target) {
	6276	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6277	scan += UTF8SKIP(scan);
	6278	}
	6279	}
	6280	else {
	6281	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6282	scan++;
	6283	}
	6284	}
	6285	break;
	6286	case NSPACEL:
	6287	PL_reg_flags \|= RF_tainted;
	6288	if (utf8_target) {
	6289	loceol = PL_regeol;
	6290	while (hardcount < max && scan < loceol &&
	6291	!isSPACE_LC_utf8((U8*)scan)) {
	6292	scan += UTF8SKIP(scan);
	6293	hardcount++;
	6294	}
	6295	} else {
	6296	while (scan < loceol && !isSPACE_LC(*scan))
	6297	scan++;
	6298	}
	6299	break;
	6300	case DIGIT:
	6301	if (utf8_target) {
	6302	loceol = PL_regeol;
	6303	LOAD_UTF8_CHARCLASS_DIGIT();
	6304	while (hardcount < max && scan < loceol &&
	6305	swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6306	scan += UTF8SKIP(scan);
	6307	hardcount++;
	6308	}
	6309	} else {
	6310	while (scan < loceol && isDIGIT(*scan))
	6311	scan++;
	6312	}
	6313	break;
	6314	case DIGITA:
	6315	while (scan < loceol && isDIGIT_A((U8) *scan)) {
	6316	scan++;
	6317	}
	6318	break;
	6319	case DIGITL:
	6320	PL_reg_flags \|= RF_tainted;
	6321	if (utf8_target) {
	6322	loceol = PL_regeol;
	6323	while (hardcount < max && scan < loceol &&
	6324	isDIGIT_LC_utf8((U8*)scan)) {
	6325	scan += UTF8SKIP(scan);
	6326	hardcount++;
	6327	}
	6328	} else {
	6329	while (scan < loceol && isDIGIT_LC(*scan))
	6330	scan++;
	6331	}
	6332	break;
	6333	case NDIGIT:
	6334	if (utf8_target) {
	6335	loceol = PL_regeol;
	6336	LOAD_UTF8_CHARCLASS_DIGIT();
	6337	while (hardcount < max && scan < loceol &&
	6338	!swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6339	scan += UTF8SKIP(scan);
	6340	hardcount++;
	6341	}
	6342	} else {
	6343	while (scan < loceol && !isDIGIT(*scan))
	6344	scan++;
	6345	}
	6346	break;
	6347	case NDIGITA:
	6348	if (utf8_target) {
	6349	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6350	scan += UTF8SKIP(scan);
	6351	}
	6352	}
	6353	else {
	6354	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6355	scan++;
	6356	}
	6357	}
	6358	break;
	6359	case NDIGITL:
	6360	PL_reg_flags \|= RF_tainted;
	6361	if (utf8_target) {
	6362	loceol = PL_regeol;
	6363	while (hardcount < max && scan < loceol &&
	6364	!isDIGIT_LC_utf8((U8*)scan)) {
	6365	scan += UTF8SKIP(scan);
	6366	hardcount++;
	6367	}
	6368	} else {
	6369	while (scan < loceol && !isDIGIT_LC(*scan))
	6370	scan++;
	6371	}
	6372	break;
	6373	case LNBREAK:
	6374	if (utf8_target) {
	6375	loceol = PL_regeol;
	6376	while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
	6377	scan += c;
	6378	hardcount++;
	6379	}
	6380	} else {
	6381	/*
	6382	LNBREAK can match two latin chars, which is ok,
	6383	because we have a null terminated string, but we
	6384	have to use hardcount in this situation
	6385	*/
	6386	while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
	6387	scan+=c;
	6388	hardcount++;
	6389	}
	6390	}
	6391	break;
	6392	case HORIZWS:
	6393	if (utf8_target) {
	6394	loceol = PL_regeol;
	6395	while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
	6396	scan += c;
	6397	hardcount++;
	6398	}
	6399	} else {
	6400	while (scan < loceol && is_HORIZWS_latin1(scan))
	6401	scan++;
	6402	}
	6403	break;
	6404	case NHORIZWS:
	6405	if (utf8_target) {
	6406	loceol = PL_regeol;
	6407	while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
	6408	scan += UTF8SKIP(scan);
	6409	hardcount++;
	6410	}
	6411	} else {
	6412	while (scan < loceol && !is_HORIZWS_latin1(scan))
	6413	scan++;
	6414
	6415	}
	6416	break;
	6417	case VERTWS:
	6418	if (utf8_target) {
	6419	loceol = PL_regeol;
	6420	while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
	6421	scan += c;
	6422	hardcount++;
	6423	}
	6424	} else {
	6425	while (scan < loceol && is_VERTWS_latin1(scan))
	6426	scan++;
	6427
	6428	}
	6429	break;
	6430	case NVERTWS:
	6431	if (utf8_target) {
	6432	loceol = PL_regeol;
	6433	while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
	6434	scan += UTF8SKIP(scan);
	6435	hardcount++;
	6436	}
	6437	} else {
	6438	while (scan < loceol && !is_VERTWS_latin1(scan))
	6439	scan++;
	6440
	6441	}
	6442	break;
	6443
	6444	default: /* Called on something of 0 width. */
	6445	break; /* So match right here or not at all. */
	6446	}
	6447
	6448	if (hardcount)
	6449	c = hardcount;
	6450	else
	6451	c = scan - PL_reginput;
	6452	PL_reginput = scan;
	6453
	6454	DEBUG_r({
	6455	GET_RE_DEBUG_FLAGS_DECL;
	6456	DEBUG_EXECUTE_r({
	6457	SV * const prop = sv_newmortal();
	6458	regprop(prog, prop, p);
	6459	PerlIO_printf(Perl_debug_log,
	6460	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	6461	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	6462	});
	6463	});
	6464
	6465	return(c);
	6466	}
	6467
	6468
	6469	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	6470	/*
	6471	- regclass_swash - prepare the utf8 swash
	6472	*/
	6473
	6474	SV *
	6475	Perl_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV listsvp, SV altsvp)
	6476	{
	6477	dVAR;
	6478	SV *sw = NULL;
	6479	SV *si = NULL;
	6480	SV *alt = NULL;
	6481	RXi_GET_DECL(prog,progi);
	6482	const struct reg_data * const data = prog ? progi->data : NULL;
	6483
	6484	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	6485
	6486	if (data && data->count) {
	6487	const U32 n = ARG(node);
	6488
	6489	if (data->what[n] == 's') {
	6490	SV * const rv = MUTABLE_SV(data->data[n]);
	6491	AV * const av = MUTABLE_AV(SvRV(rv));
	6492	SV **const ary = AvARRAY(av);
	6493	SV a, b;
	6494
	6495	/* See the end of regcomp.c:S_regclass() for
	6496	* documentation of these array elements. */
	6497
	6498	si = *ary;
	6499	a = SvROK(ary[1]) ? &ary[1] : NULL;
	6500	b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
	6501
	6502	if (a)
	6503	sw = *a;
	6504	else if (si && doinit) {
	6505	sw = swash_init("utf8", "", si, 1, 0);
	6506	(void)av_store(av, 1, sw);
	6507	}
	6508	if (b)
	6509	alt = *b;
	6510	}
	6511	}
	6512
	6513	if (listsvp)
	6514	*listsvp = si;
	6515	if (altsvp)
	6516	*altsvp = alt;
	6517
	6518	return sw;
	6519	}
	6520	#endif
	6521
	6522	/*
	6523	- reginclass - determine if a character falls into a character class
	6524
	6525	n is the ANYOF regnode
	6526	p is the target string
	6527	lenp is pointer to the maximum number of bytes of how far to go in p
	6528	(This is assumed wthout checking to always be at least the current
	6529	character's size)
	6530	utf8_target tells whether p is in UTF-8.
	6531
	6532	Returns true if matched; false otherwise. If lenp is not NULL, on return
	6533	from a successful match, the value it points to will be updated to how many
	6534	bytes in p were matched. If there was no match, the value is undefined,
	6535	possibly changed from the input.
	6536
	6537	Note that this can be a synthetic start class, a combination of various
	6538	nodes, so things you think might be mutually exclusive, such as locale,
	6539	aren't. It can match both locale and non-locale
	6540
	6541	*/
	6542
	6543	STATIC bool
	6544	S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target)
	6545	{
	6546	dVAR;
	6547	const char flags = ANYOF_FLAGS(n);
	6548	bool match = FALSE;
	6549	UV c = *p;
	6550	STRLEN c_len = 0;
	6551	STRLEN maxlen;
	6552
	6553	PERL_ARGS_ASSERT_REGINCLASS;
	6554
	6555	/* If c is not already the code point, get it */
	6556	if (utf8_target && !UTF8_IS_INVARIANT(c)) {
	6557	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
	6558	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	6559	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	6560	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	6561	* UTF8_ALLOW_FFFF */
	6562	if (c_len == (STRLEN)-1)
	6563	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	6564	}
	6565	else {
	6566	c_len = 1;
	6567	}
	6568
	6569	/* Use passed in max length, or one character if none passed in or less
	6570	* than one character. And assume will match just one character. This is
	6571	* overwritten later if matched more. */
	6572	if (lenp) {
	6573	maxlen = (lenp > c_len) ? lenp : c_len;
	6574	*lenp = c_len;
	6575
	6576	}
	6577	else {
	6578	maxlen = c_len;
	6579	}
	6580
	6581	/* If this character is potentially in the bitmap, check it */
	6582	if (c < 256) {
	6583	if (ANYOF_BITMAP_TEST(n, c))
	6584	match = TRUE;
	6585	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	6586	&& ! utf8_target
	6587	&& ! isASCII(c))
	6588	{
	6589	match = TRUE;
	6590	}
	6591
	6592	else if (flags & ANYOF_LOCALE) {
	6593	PL_reg_flags \|= RF_tainted;
	6594
	6595	if ((flags & ANYOF_LOC_NONBITMAP_FOLD)
	6596	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	6597	{
	6598	match = TRUE;
	6599	}
	6600	else if (ANYOF_CLASS_TEST_ANY_SET(n) &&
	6601	((ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) \|\|
	6602	(ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) \|\|
	6603	(ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) \|\|
	6604	(ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) \|\|
	6605	(ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) \|\|
	6606	(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) \|\|
	6607	(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) \|\|
	6608	(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) \|\|
	6609	(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) \|\|
	6610	(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) \|\|
	6611	(ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) \|\|
	6612	(ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) \|\|
	6613	(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) \|\|
	6614	(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) \|\|
	6615	(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) \|\|
	6616	(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) \|\|
	6617	(ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) \|\|
	6618	(ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) \|\|
	6619	(ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) \|\|
	6620	(ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) \|\|
	6621	(ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) \|\|
	6622	(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) \|\|
	6623	(ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) \|\|
	6624	(ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) \|\|
	6625	(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) \|\|
	6626	(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) \|\|
	6627	(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) \|\|
	6628	(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) \|\|
	6629	(ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) \|\|
	6630	(ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
	6631	) /* How's that for a conditional? */
	6632	) {
	6633	match = TRUE;
	6634	}
	6635	}
	6636	}
	6637
	6638	/* If the bitmap didn't (or couldn't) match, and something outside the
	6639	* bitmap could match, try that */
	6640	if (!match) {
	6641	if (utf8_target && (flags & ANYOF_UNICODE_ALL)) {
	6642	if (c >= 256
	6643	\|\| ((flags & ANYOF_LOC_NONBITMAP_FOLD) /* Latin1 1 that has a
	6644	non-Latin1 fold
	6645	should match */
	6646	&& _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c)))
	6647	{
	6648	match = TRUE;
	6649	}
	6650	}
	6651	if (!match && ((flags & ANYOF_NONBITMAP_NON_UTF8)
	6652	\|\| (utf8_target && flags & ANYOF_UTF8)))
	6653	{
	6654	AV *av;
	6655	SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
	6656
	6657	if (sw) {
	6658	U8 * utf8_p;
	6659	if (utf8_target) {
	6660	utf8_p = (U8 *) p;
	6661	} else {
	6662
	6663	/* Not utf8. Convert as much of the string as available up
	6664	* to the limit of how far the (single) character in the
	6665	* pattern can possibly match (no need to go further). If
	6666	* the node is a straight ANYOF or not folding, it can't
	6667	* match more than one. Otherwise, It can match up to how
	6668	* far a single char can fold to. Since not utf8, each
	6669	* character is a single byte, so the max it can be in
	6670	* bytes is the same as the max it can be in characters */
	6671	STRLEN len = (OP(n) == ANYOF
	6672	\|\| ! (flags & ANYOF_LOC_NONBITMAP_FOLD))
	6673	? 1
	6674	: (maxlen < UTF8_MAX_FOLD_CHAR_EXPAND)
	6675	? maxlen
	6676	: UTF8_MAX_FOLD_CHAR_EXPAND;
	6677	utf8_p = bytes_to_utf8(p, &len);
	6678	}
	6679
	6680	if (swash_fetch(sw, utf8_p, TRUE))
	6681	match = TRUE;
	6682	else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
	6683
	6684	/* Here, we need to test if the fold of the target string
	6685	* matches. In the case of a multi-char fold that is
	6686	* caught by regcomp.c, it has stored all such folds into
	6687	* 'av'; we linearly check to see if any match the target
	6688	* string (folded). We know that the originals were each
	6689	* one character, but we don't currently know how many
	6690	* characters/bytes each folded to, except we do know that
	6691	* there are small limits imposed by Unicode. XXX A
	6692	* performance enhancement would be to have regcomp.c store
	6693	* the max number of chars/bytes that are in an av entry,
	6694	* as, say the 0th element. Even better would be to have a
	6695	* hash of the few characters that can start a multi-char
	6696	* fold to the max number of chars of those folds.
	6697	*
	6698	* Further down, if there isn't a
	6699	* match in the av, we will check if there is another
	6700	* fold-type match. For that, we also need the fold, but
	6701	* only the first character. No sense in folding it twice,
	6702	* so we do it here, even if there isn't any multi-char
	6703	* fold, so we always fold at least the first character.
	6704	* If the node is a straight ANYOF node, or there is only
	6705	* one character available in the string, or if there isn't
	6706	* any av, that's all we have to fold. In the case of a
	6707	* multi-char fold, we do have guarantees in Unicode that
	6708	* it can only expand up to so many characters and so many
	6709	* bytes. We keep track so don't exceed either.
	6710	*
	6711	* If there is a match, we will need to advance (if lenp is
	6712	* specified) the match pointer in the target string. But
	6713	* what we are comparing here isn't that string directly,
	6714	* but its fold, whose length may differ from the original.
	6715	* As we go along in constructing the fold, therefore, we
	6716	* create a map so that we know how many bytes in the
	6717	* source to advance given that we have matched a certain
	6718	* number of bytes in the fold. This map is stored in
	6719	* 'map_fold_len_back'. The first character in the fold
	6720	* has array element 1 contain the number of bytes in the
	6721	* source that folded to it; the 2nd is the cumulative
	6722	* number to match it; ... */
	6723	U8 map_fold_len_back[UTF8_MAX_FOLD_CHAR_EXPAND] = { 0 };
	6724	U8 folded[UTF8_MAXBYTES_CASE+1];
	6725	STRLEN foldlen = 0; /* num bytes in fold of 1st char */
	6726	STRLEN foldlen_for_av; /* num bytes in fold of all chars */
	6727
	6728	if (OP(n) == ANYOF \|\| maxlen == 1 \|\| ! lenp \|\| ! av) {
	6729
	6730	/* Here, only need to fold the first char of the target
	6731	* string */
	6732	to_utf8_fold(utf8_p, folded, &foldlen);
	6733	foldlen_for_av = foldlen;
	6734	map_fold_len_back[1] = UTF8SKIP(utf8_p);
	6735	}
	6736	else {
	6737
	6738	/* Here, need to fold more than the first char. Do so
	6739	* up to the limits */
	6740	UV which_char = 0;
	6741	U8* source_ptr = utf8_p; /* The source for the fold
	6742	is the regex target
	6743	string */
	6744	U8* folded_ptr = folded;
	6745	U8* e = utf8_p + maxlen; /* Can't go beyond last
	6746	available byte in the
	6747	target string */
	6748	while (which_char < UTF8_MAX_FOLD_CHAR_EXPAND
	6749	&& source_ptr < e)
	6750	{
	6751
	6752	/* Fold the next character */
	6753	U8 this_char_folded[UTF8_MAXBYTES_CASE+1];
	6754	STRLEN this_char_foldlen;
	6755	to_utf8_fold(source_ptr,
	6756	this_char_folded,
	6757	&this_char_foldlen);
	6758
	6759	/* Bail if it would exceed the byte limit for
	6760	* folding a single char. */
	6761	if (this_char_foldlen + folded_ptr - folded >
	6762	UTF8_MAXBYTES_CASE)
	6763	{
	6764	break;
	6765	}
	6766
	6767	/* Save the first character's folded length, in
	6768	* case we have to use it later */
	6769	if (! foldlen) {
	6770	foldlen = this_char_foldlen;
	6771	}
	6772
	6773	/* Here, add the fold of this character */
	6774	Copy(this_char_folded,
	6775	folded_ptr,
	6776	this_char_foldlen,
	6777	U8);
	6778	which_char++;
	6779	map_fold_len_back[which_char] =
	6780	map_fold_len_back[which_char - 1]
	6781	+ UTF8SKIP(source_ptr);
	6782	folded_ptr += this_char_foldlen;
	6783	source_ptr += UTF8SKIP(source_ptr);
	6784	}
	6785	*folded_ptr = '\0';
	6786	foldlen_for_av = folded_ptr - folded;
	6787	}
	6788
	6789
	6790	/* Do the linear search to see if the fold is in the list
	6791	* of multi-char folds. (Useless to look if won't be able
	6792	* to store that it is a multi-char fold in lenp) /
	6793	if (lenp && av) {
	6794	I32 i;
	6795	for (i = 0; i <= av_len(av); i++) {
	6796	SV* const sv = *av_fetch(av, i, FALSE);
	6797	STRLEN len;
	6798	const char * const s = SvPV_const(sv, len);
	6799	if (len <= foldlen_for_av && memEQ(s,
	6800	(char*)folded,
	6801	len))
	6802	{
	6803
	6804	/* Advance the target string ptr to account for
	6805	* this fold, but have to translate from the
	6806	* folded length to the corresponding source
	6807	* length. The array is indexed by how many
	6808	* characters in the match */
	6809	*lenp = map_fold_len_back[
	6810	utf8_length(folded, folded + len)];
	6811	match = TRUE;
	6812	break;
	6813	}
	6814	}
	6815	}
	6816	if (!match) { /* See if the folded version matches */
	6817	SV** listp;
	6818
	6819	/* Consider "k" =~ /[K]/i. The line above would have
	6820	* just folded the 'k' to itself, and that isn't going
	6821	* to match 'K'. So we look through the closure of
	6822	* everything that folds to 'k'. That will find the
	6823	* 'K'. Initialize the list, if necessary */
	6824	if (! PL_utf8_foldclosures) {
	6825
	6826	/* If the folds haven't been read in, call a fold
	6827	* function to force that */
	6828	if (! PL_utf8_tofold) {
	6829	U8 dummy[UTF8_MAXBYTES+1];
	6830	STRLEN dummy_len;
	6831	to_utf8_fold((U8*) "A", dummy, &dummy_len);
	6832	}
	6833	PL_utf8_foldclosures =
	6834	_swash_inversion_hash(PL_utf8_tofold);
	6835	}
	6836
	6837	/* The data structure is a hash with the keys every
	6838	* character that is folded to, like 'k', and the
	6839	* values each an array of everything that folds to its
	6840	* key. e.g. [ 'k', 'K', KELVIN_SIGN ] */
	6841	if ((listp = hv_fetch(PL_utf8_foldclosures,
	6842	(char *) folded, foldlen, FALSE)))
	6843	{
	6844	AV* list = (AV) listp;
	6845	IV i;
	6846	for (i = 0; i <= av_len(list); i++) {
	6847	SV** try_p = av_fetch(list, i, FALSE);
	6848	char* try_c;
	6849	if (try_p == NULL) {
	6850	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	6851	}
	6852	/* Don't have to worry about embedded nulls
	6853	* since NULL isn't folded or foldable */
	6854	try_c = SvPVX(*try_p);
	6855
	6856	/* The fold in a few cases of an above Latin1
	6857	* char is in the Latin1 range, and hence may
	6858	* be in the bitmap */
	6859	if (UTF8_IS_INVARIANT(*try_c)
	6860	&& ANYOF_BITMAP_TEST(n,
	6861	UNI_TO_NATIVE(*try_c)))
	6862	{
	6863	match = TRUE;
	6864	break;
	6865	}
	6866	else if
	6867	(UTF8_IS_DOWNGRADEABLE_START(*try_c)
	6868	&& ANYOF_BITMAP_TEST(n, UNI_TO_NATIVE(
	6869	TWO_BYTE_UTF8_TO_UNI(try_c[0],
	6870	try_c[1]))))
	6871	{
	6872	/* Since the fold comes from internally
	6873	* generated data, we can safely assume it
	6874	* is valid utf8 in the test above */
	6875	match = TRUE;
	6876	break;
	6877	} else if (swash_fetch(sw, (U8*) try_c, TRUE)) {
	6878	match = TRUE;
	6879	break;
	6880	}
	6881	}
	6882	}
	6883	}
	6884	}
	6885
	6886	/* If we allocated a string above, free it */
	6887	if (! utf8_target) Safefree(utf8_p);
	6888	}
	6889	}
	6890	}
	6891
	6892	return (flags & ANYOF_INVERT) ? !match : match;
	6893	}
	6894
	6895	STATIC U8 *
	6896	S_reghop3(U8 s, I32 off, const U8 lim)
	6897	{
	6898	dVAR;
	6899
	6900	PERL_ARGS_ASSERT_REGHOP3;
	6901
	6902	if (off >= 0) {
	6903	while (off-- && s < lim) {
	6904	/* XXX could check well-formedness here */
	6905	s += UTF8SKIP(s);
	6906	}
	6907	}
	6908	else {
	6909	while (off++ && s > lim) {
	6910	s--;
	6911	if (UTF8_IS_CONTINUED(*s)) {
	6912	while (s > lim && UTF8_IS_CONTINUATION(*s))
	6913	s--;
	6914	}
	6915	/* XXX could check well-formedness here */
	6916	}
	6917	}
	6918	return s;
	6919	}
	6920
	6921	#ifdef XXX_dmq
	6922	/* there are a bunch of places where we use two reghop3's that should
	6923	be replaced with this routine. but since thats not done yet
	6924	we ifdef it out - dmq
	6925	*/
	6926	STATIC U8 *
	6927	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	6928	{
	6929	dVAR;
	6930
	6931	PERL_ARGS_ASSERT_REGHOP4;
	6932
	6933	if (off >= 0) {
	6934	while (off-- && s < rlim) {
	6935	/* XXX could check well-formedness here */
	6936	s += UTF8SKIP(s);
	6937	}
	6938	}
	6939	else {
	6940	while (off++ && s > llim) {
	6941	s--;
	6942	if (UTF8_IS_CONTINUED(*s)) {
	6943	while (s > llim && UTF8_IS_CONTINUATION(*s))
	6944	s--;
	6945	}
	6946	/* XXX could check well-formedness here */
	6947	}
	6948	}
	6949	return s;
	6950	}
	6951	#endif
	6952
	6953	STATIC U8 *
	6954	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	6955	{
	6956	dVAR;
	6957
	6958	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	6959
	6960	if (off >= 0) {
	6961	while (off-- && s < lim) {
	6962	/* XXX could check well-formedness here */
	6963	s += UTF8SKIP(s);
	6964	}
	6965	if (off >= 0)
	6966	return NULL;
	6967	}
	6968	else {
	6969	while (off++ && s > lim) {
	6970	s--;
	6971	if (UTF8_IS_CONTINUED(*s)) {
	6972	while (s > lim && UTF8_IS_CONTINUATION(*s))
	6973	s--;
	6974	}
	6975	/* XXX could check well-formedness here */
	6976	}
	6977	if (off <= 0)
	6978	return NULL;
	6979	}
	6980	return s;
	6981	}
	6982
	6983	static void
	6984	restore_pos(pTHX_ void *arg)
	6985	{
	6986	dVAR;
	6987	regexp * const rex = (regexp *)arg;
	6988	if (PL_reg_eval_set) {
	6989	if (PL_reg_oldsaved) {
	6990	rex->subbeg = PL_reg_oldsaved;
	6991	rex->sublen = PL_reg_oldsavedlen;
	6992	#ifdef PERL_OLD_COPY_ON_WRITE
	6993	rex->saved_copy = PL_nrs;
	6994	#endif
	6995	RXp_MATCH_COPIED_on(rex);
	6996	}
	6997	PL_reg_magic->mg_len = PL_reg_oldpos;
	6998	PL_reg_eval_set = 0;
	6999	PL_curpm = PL_reg_oldcurpm;
	7000	}
	7001	}
	7002
	7003	STATIC void
	7004	S_to_utf8_substr(pTHX_ register regexp *prog)
	7005	{
	7006	int i = 1;
	7007
	7008	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	7009
	7010	do {
	7011	if (prog->substrs->data[i].substr
	7012	&& !prog->substrs->data[i].utf8_substr) {
	7013	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	7014	prog->substrs->data[i].utf8_substr = sv;
	7015	sv_utf8_upgrade(sv);
	7016	if (SvVALID(prog->substrs->data[i].substr)) {
	7017	const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
	7018	if (flags & FBMcf_TAIL) {
	7019	/* Trim the trailing \n that fbm_compile added last
	7020	time. */
	7021	SvCUR_set(sv, SvCUR(sv) - 1);
	7022	/* Whilst this makes the SV technically "invalid" (as its
	7023	buffer is no longer followed by "\0") when fbm_compile()
	7024	adds the "\n" back, a "\0" is restored. */
	7025	}
	7026	fbm_compile(sv, flags);
	7027	}
	7028	if (prog->substrs->data[i].substr == prog->check_substr)
	7029	prog->check_utf8 = sv;
	7030	}
	7031	} while (i--);
	7032	}
	7033
	7034	STATIC void
	7035	S_to_byte_substr(pTHX_ register regexp *prog)
	7036	{
	7037	dVAR;
	7038	int i = 1;
	7039
	7040	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	7041
	7042	do {
	7043	if (prog->substrs->data[i].utf8_substr
	7044	&& !prog->substrs->data[i].substr) {
	7045	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	7046	if (sv_utf8_downgrade(sv, TRUE)) {
	7047	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	7048	const U8 flags
	7049	= BmFLAGS(prog->substrs->data[i].utf8_substr);
	7050	if (flags & FBMcf_TAIL) {
	7051	/* Trim the trailing \n that fbm_compile added last
	7052	time. */
	7053	SvCUR_set(sv, SvCUR(sv) - 1);
	7054	}
	7055	fbm_compile(sv, flags);
	7056	}
	7057	} else {
	7058	SvREFCNT_dec(sv);
	7059	sv = &PL_sv_undef;
	7060	}
	7061	prog->substrs->data[i].substr = sv;
	7062	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	7063	prog->check_substr = sv;
	7064	}
	7065	} while (i--);
	7066	}
	7067
	7068	/*
	7069	* Local variables:
	7070	* c-indentation-style: bsd
	7071	* c-basic-offset: 4
	7072	* indent-tabs-mode: t
	7073	* End:
	7074	*
	7075	* ex: set ts=8 sts=4 sw=4 noet:
	7076	*/