perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/* At least one required character in the target string is expressible only in
	41	* UTF-8. */
	42	static const char* const non_utf8_target_but_utf8_required
	43	= "Can't match, because target string needs to be in UTF-8\n";
	44
	45	#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
	46	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
	47	goto target; \
	48	} STMT_END
	49
	50	/*
	51	* pregcomp and pregexec -- regsub and regerror are not used in perl
	52	*
	53	* Copyright (c) 1986 by University of Toronto.
	54	* Written by Henry Spencer. Not derived from licensed software.
	55	*
	56	* Permission is granted to anyone to use this software for any
	57	* purpose on any computer system, and to redistribute it freely,
	58	* subject to the following restrictions:
	59	*
	60	* 1. The author is not responsible for the consequences of use of
	61	* this software, no matter how awful, even if they arise
	62	* from defects in it.
	63	*
	64	* 2. The origin of this software must not be misrepresented, either
	65	* by explicit claim or by omission.
	66	*
	67	* 3. Altered versions must be plainly marked as such, and must not
	68	* be misrepresented as being the original software.
	69	*
	70	**** Alterations to Henry's code are...
	71	****
	72	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	73	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	74	**** by Larry Wall and others
	75	****
	76	**** You may distribute under the terms of either the GNU General Public
	77	**** License or the Artistic License, as specified in the README file.
	78	*
	79	* Beware that some of this code is subtly aware of the way operator
	80	* precedence is structured in regular expressions. Serious changes in
	81	* regular-expression syntax might require a total rethink.
	82	*/
	83	#include "EXTERN.h"
	84	#define PERL_IN_REGEXEC_C
	85	#include "perl.h"
	86
	87	#ifdef PERL_IN_XSUB_RE
	88	# include "re_comp.h"
	89	#else
	90	# include "regcomp.h"
	91	#endif
	92
	93	#include "inline_invlist.c"
	94	#include "unicode_constants.h"
	95
	96	#define RF_tainted 1 /* tainted information used? e.g. locale */
	97	#define RF_warned 2 /* warned about big count? */
	98
	99	#define RF_utf8 8 /* Pattern contains multibyte chars? */
	100
	101	#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
	102
	103	#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
	104
	105	#ifndef STATIC
	106	#define STATIC static
	107	#endif
	108
	109	/* Valid for non-utf8 strings: avoids the reginclass
	110	* call if there are no complications: i.e., if everything matchable is
	111	* straight forward in the bitmap */
	112	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0) \
	113	: ANYOF_BITMAP_TEST(p,*(c)))
	114
	115	/*
	116	* Forwards.
	117	*/
	118
	119	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	120	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	121
	122	#define HOPc(pos,off) \
	123	(char *)(PL_reg_match_utf8 \
	124	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	125	: (U8*)(pos + off))
	126	#define HOPBACKc(pos, off) \
	127	(char*)(PL_reg_match_utf8\
	128	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	129	: (pos - off >= PL_bostr) \
	130	? (U8*)pos - off \
	131	: NULL)
	132
	133	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	134	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	135
	136
	137	#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
	138	#define NEXTCHR_IS_EOS (nextchr < 0)
	139
	140	#define SET_nextchr \
	141	nextchr = ((locinput < PL_regeol) ? UCHARAT(locinput) : NEXTCHR_EOS)
	142
	143	#define SET_locinput(p) \
	144	locinput = (p); \
	145	SET_nextchr
	146
	147
	148	/* these are unrolled below in the CCC_TRY_XXX defined */
	149	#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
	150	if (!CAT2(PL_utf8_,class)) { \
	151	bool ok; \
	152	ENTER; save_re_context(); \
	153	ok=CAT2(is_utf8_,class)((const U8*)str); \
	154	PERL_UNUSED_VAR(ok); \
	155	assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
	156	/* Doesn't do an assert to verify that is correct */
	157	#define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
	158	if (!CAT2(PL_utf8_,class)) { \
	159	bool throw_away PERL_UNUSED_DECL; \
	160	ENTER; save_re_context(); \
	161	throw_away = CAT2(is_utf8_,class)((const U8*)" "); \
	162	PERL_UNUSED_VAR(throw_away); \
	163	LEAVE; } } STMT_END
	164
	165	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
	166	#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
	167
	168	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	169	/* No asserts are done for some of these, in case called on a */ \
	170	/* Unicode version in which they map to nothing */ \
	171	LOAD_UTF8_CHARCLASS(X_regular_begin, HYPHEN_UTF8); \
	172	LOAD_UTF8_CHARCLASS(X_extend, COMBINING_GRAVE_ACCENT_UTF8); \
	173
	174	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	175
	176	/* The actual code for CCC_TRY, which uses several variables from the routine
	177	* it's callable from. It is designed to be the bulk of a case statement.
	178	* FUNC is the macro or function to call on non-utf8 targets that indicate if
	179	* nextchr matches the class.
	180	* UTF8_TEST is the whole test string to use for utf8 targets
	181	* LOAD is what to use to test, and if not present to load in the swash for the
	182	* class
	183	* POS_OR_NEG is either empty or ! to complement the results of FUNC or
	184	* UTF8_TEST test.
	185	* The logic is: Fail if we're at the end-of-string; otherwise if the target is
	186	* utf8 and a variant, load the swash if necessary and test using the utf8
	187	* test. Advance to the next character if test is ok, otherwise fail; If not
	188	* utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
	189	* fails, or advance to the next character */
	190
	191	#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \
	192	if (NEXTCHR_IS_EOS) { \
	193	sayNO; \
	194	} \
	195	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
	196	LOAD_UTF8_CHARCLASS(CLASS, STR); \
	197	if (POS_OR_NEG (UTF8_TEST)) { \
	198	sayNO; \
	199	} \
	200	} \
	201	else if (POS_OR_NEG (FUNC(nextchr))) { \
	202	sayNO; \
	203	} \
	204	goto increment_locinput;
	205
	206	/* Handle the non-locale cases for a character class and its complement. It
	207	* calls _CCC_TRY_CODE with a ! to complement the test for the character class.
	208	* This is because that code fails when the test succeeds, so we want to have
	209	* the test fail so that the code succeeds. The swash is stored in a
	210	* predictable PL_ place */
	211	#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, \
	212	CLASS, STR) \
	213	case NAME: \
	214	_CCC_TRY_CODE( !, FUNC, \
	215	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	216	(U8*)locinput, TRUE)), \
	217	CLASS, STR) \
	218	case NNAME: \
	219	_CCC_TRY_CODE( PLACEHOLDER , FUNC, \
	220	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	221	(U8*)locinput, TRUE)), \
	222	CLASS, STR) \
	223
	224	/* Generate the case statements for both locale and non-locale character
	225	* classes in regmatch for classes that don't have special unicode semantics.
	226	* Locales don't use an immediate swash, but an intermediary special locale
	227	* function that is called on the pointer to the current place in the input
	228	* string. That function will resolve to needing the same swash. One might
	229	* think that because we don't know what the locale will match, we shouldn't
	230	* check with the swash loading function that it loaded properly; ie, that we
	231	* should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
	232	* regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
	233	* irrelevant here */
	234	#define CCC_TRY(NAME, NNAME, FUNC, \
	235	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	236	NAMEA, NNAMEA, FUNCA, \
	237	CLASS, STR) \
	238	case NAMEL: \
	239	PL_reg_flags \|= RF_tainted; \
	240	_CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \
	241	case NNAMEL: \
	242	PL_reg_flags \|= RF_tainted; \
	243	_CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput), \
	244	CLASS, STR) \
	245	case NAMEA: \
	246	if (NEXTCHR_IS_EOS \|\| ! FUNCA(nextchr)) { \
	247	sayNO; \
	248	} \
	249	/* Matched a utf8-invariant, so don't have to worry about utf8 */ \
	250	locinput++; \
	251	break; \
	252	case NNAMEA: \
	253	if (NEXTCHR_IS_EOS \|\| FUNCA(nextchr)) { \
	254	sayNO; \
	255	} \
	256	goto increment_locinput; \
	257	/* Generate the non-locale cases */ \
	258	_CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
	259
	260	/* This is like CCC_TRY, but has an extra set of parameters for generating case
	261	* statements to handle separate Unicode semantics nodes */
	262	#define CCC_TRY_U(NAME, NNAME, FUNC, \
	263	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	264	NAMEU, NNAMEU, FUNCU, \
	265	NAMEA, NNAMEA, FUNCA, \
	266	CLASS, STR) \
	267	CCC_TRY(NAME, NNAME, FUNC, \
	268	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	269	NAMEA, NNAMEA, FUNCA, \
	270	CLASS, STR) \
	271	_CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)
	272
	273	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	274
	275	/* for use after a quantifier and before an EXACT-like node -- japhy */
	276	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	277	*
	278	* NOTE that nothing that affects backtracking should be in here, specifically
	279	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	280	* node that is in between two EXACT like nodes when ascertaining what the required
	281	* "follow" character is. This should probably be moved to regex compile time
	282	* although it may be done at run time beause of the REF possibility - more
	283	* investigation required. -- demerphq
	284	*/
	285	#define JUMPABLE(rn) ( \
	286	OP(rn) == OPEN \|\| \
	287	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	288	OP(rn) == EVAL \|\| \
	289	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	290	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	291	OP(rn) == KEEPS \|\| \
	292	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	293	)
	294	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	295
	296	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	297
	298	#if 0
	299	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	300	we don't need this definition. */
	301	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	302	#define IS_TEXTF(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTF \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	303	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	304
	305	#else
	306	/* ... so we use this as its faster. */
	307	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	308	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn) == EXACTFA)
	309	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	310	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	311
	312	#endif
	313
	314	/*
	315	Search for mandatory following text node; for lookahead, the text must
	316	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	317	*/
	318	#define FIND_NEXT_IMPT(rn) STMT_START { \
	319	while (JUMPABLE(rn)) { \
	320	const OPCODE type = OP(rn); \
	321	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	322	rn = NEXTOPER(NEXTOPER(rn)); \
	323	else if (type == PLUS) \
	324	rn = NEXTOPER(rn); \
	325	else if (type == IFMATCH) \
	326	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	327	else rn += NEXT_OFF(rn); \
	328	} \
	329	} STMT_END
	330
	331
	332	static void restore_pos(pTHX_ void *arg);
	333
	334	#define REGCP_PAREN_ELEMS 3
	335	#define REGCP_OTHER_ELEMS 3
	336	#define REGCP_FRAME_ELEMS 1
	337	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	338	* are needed for the regexp context stack bookkeeping. */
	339
	340	STATIC CHECKPOINT
	341	S_regcppush(pTHX_ const regexp *rex, I32 parenfloor)
	342	{
	343	dVAR;
	344	const int retval = PL_savestack_ix;
	345	const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
	346	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	347	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	348	I32 p;
	349	GET_RE_DEBUG_FLAGS_DECL;
	350
	351	PERL_ARGS_ASSERT_REGCPPUSH;
	352
	353	if (paren_elems_to_push < 0)
	354	Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0",
	355	paren_elems_to_push);
	356
	357	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	358	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	359	" out of range (%lu-%ld)",
	360	total_elems, (unsigned long)PL_regsize, (long)parenfloor);
	361
	362	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	363
	364	DEBUG_BUFFERS_r(
	365	if ((int)PL_regsize > (int)parenfloor)
	366	PerlIO_printf(Perl_debug_log,
	367	"rex=0x%"UVxf" offs=0x%"UVxf": saving capture indices:\n",
	368	PTR2UV(rex),
	369	PTR2UV(rex->offs)
	370	);
	371	);
	372	for (p = parenfloor+1; p <= (I32)PL_regsize; p++) {
	373	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	374	SSPUSHINT(rex->offs[p].end);
	375	SSPUSHINT(rex->offs[p].start);
	376	SSPUSHINT(rex->offs[p].start_tmp);
	377	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	378	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
	379	(UV)p,
	380	(IV)rex->offs[p].start,
	381	(IV)rex->offs[p].start_tmp,
	382	(IV)rex->offs[p].end
	383	));
	384	}
	385	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	386	SSPUSHINT(PL_regsize);
	387	SSPUSHINT(rex->lastparen);
	388	SSPUSHINT(rex->lastcloseparen);
	389	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	390
	391	return retval;
	392	}
	393
	394	/* These are needed since we do not localize EVAL nodes: */
	395	#define REGCP_SET(cp) \
	396	DEBUG_STATE_r( \
	397	PerlIO_printf(Perl_debug_log, \
	398	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	399	(IV)PL_savestack_ix)); \
	400	cp = PL_savestack_ix
	401
	402	#define REGCP_UNWIND(cp) \
	403	DEBUG_STATE_r( \
	404	if (cp != PL_savestack_ix) \
	405	PerlIO_printf(Perl_debug_log, \
	406	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	407	(IV)(cp), (IV)PL_savestack_ix)); \
	408	regcpblow(cp)
	409
	410	#define UNWIND_PAREN(lp, lcp) \
	411	for (n = rex->lastparen; n > lp; n--) \
	412	rex->offs[n].end = -1; \
	413	rex->lastparen = n; \
	414	rex->lastcloseparen = lcp;
	415
	416
	417	STATIC void
	418	S_regcppop(pTHX_ regexp *rex)
	419	{
	420	dVAR;
	421	UV i;
	422	U32 paren;
	423	GET_RE_DEBUG_FLAGS_DECL;
	424
	425	PERL_ARGS_ASSERT_REGCPPOP;
	426
	427	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	428	i = SSPOPUV;
	429	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	430	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	431	rex->lastcloseparen = SSPOPINT;
	432	rex->lastparen = SSPOPINT;
	433	PL_regsize = SSPOPINT;
	434
	435	i -= REGCP_OTHER_ELEMS;
	436	/* Now restore the parentheses context. */
	437	DEBUG_BUFFERS_r(
	438	if (i \|\| rex->lastparen + 1 <= rex->nparens)
	439	PerlIO_printf(Perl_debug_log,
	440	"rex=0x%"UVxf" offs=0x%"UVxf": restoring capture indices to:\n",
	441	PTR2UV(rex),
	442	PTR2UV(rex->offs)
	443	);
	444	);
	445	paren = PL_regsize;
	446	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	447	I32 tmps;
	448	rex->offs[paren].start_tmp = SSPOPINT;
	449	rex->offs[paren].start = SSPOPINT;
	450	tmps = SSPOPINT;
	451	if (paren <= rex->lastparen)
	452	rex->offs[paren].end = tmps;
	453	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	454	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	455	(UV)paren,
	456	(IV)rex->offs[paren].start,
	457	(IV)rex->offs[paren].start_tmp,
	458	(IV)rex->offs[paren].end,
	459	(paren > rex->lastparen ? "(skipped)" : ""));
	460	);
	461	paren--;
	462	}
	463	#if 1
	464	/* It would seem that the similar code in regtry()
	465	* already takes care of this, and in fact it is in
	466	* a better location to since this code can #if 0-ed out
	467	* but the code in regtry() is needed or otherwise tests
	468	* requiring null fields (pat.t#187 and split.t#{13,14}
	469	* (as of patchlevel 7877) will fail. Then again,
	470	* this code seems to be necessary or otherwise
	471	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	472	* --jhi updated by dapm */
	473	for (i = rex->lastparen + 1; i <= rex->nparens; i++) {
	474	if (i > PL_regsize)
	475	rex->offs[i].start = -1;
	476	rex->offs[i].end = -1;
	477	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	478	" \\%"UVuf": %s ..-1 undeffing\n",
	479	(UV)i,
	480	(i > PL_regsize) ? "-1" : " "
	481	));
	482	}
	483	#endif
	484	}
	485
	486	/* restore the parens and associated vars at savestack position ix,
	487	* but without popping the stack */
	488
	489	STATIC void
	490	S_regcp_restore(pTHX_ regexp *rex, I32 ix)
	491	{
	492	I32 tmpix = PL_savestack_ix;
	493	PL_savestack_ix = ix;
	494	regcppop(rex);
	495	PL_savestack_ix = tmpix;
	496	}
	497
	498	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	499
	500	/*
	501	* pregexec and friends
	502	*/
	503
	504	#ifndef PERL_IN_XSUB_RE
	505	/*
	506	- pregexec - match a regexp against a string
	507	*/
	508	I32
	509	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
	510	char strbeg, I32 minend, SV screamer, U32 nosave)
	511	/* stringarg: the point in the string at which to begin matching */
	512	/* strend: pointer to null at end of string */
	513	/* strbeg: real beginning of string */
	514	/* minend: end of match must be >= minend bytes after stringarg. */
	515	/* screamer: SV being matched: only used for utf8 flag, pos() etc; string
	516	* itself is accessed via the pointers above */
	517	/* nosave: For optimizations. */
	518	{
	519	PERL_ARGS_ASSERT_PREGEXEC;
	520
	521	return
	522	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	523	nosave ? 0 : REXEC_COPY_STR);
	524	}
	525	#endif
	526
	527	/*
	528	* Need to implement the following flags for reg_anch:
	529	*
	530	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	531	* USE_INTUIT_ML
	532	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	533	* INTUIT_AUTORITATIVE_ML
	534	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	535	* INTUIT_ONCE_ML
	536	*
	537	* Another flag for this function: SECOND_TIME (so that float substrs
	538	* with giant delta may be not rechecked).
	539	*/
	540
	541	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	542
	543	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	544	Otherwise, only SvCUR(sv) is used to get strbeg. */
	545
	546	/* XXXX We assume that strpos is strbeg unless sv. */
	547
	548	/* XXXX Some places assume that there is a fixed substring.
	549	An update may be needed if optimizer marks as "INTUITable"
	550	RExen without fixed substrings. Similarly, it is assumed that
	551	lengths of all the strings are no more than minlen, thus they
	552	cannot come from lookahead.
	553	(Or minlen should take into account lookahead.)
	554	NOTE: Some of this comment is not correct. minlen does now take account
	555	of lookahead/behind. Further research is required. -- demerphq
	556
	557	*/
	558
	559	/* A failure to find a constant substring means that there is no need to make
	560	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	561	finding a substring too deep into the string means that less calls to
	562	regtry() should be needed.
	563
	564	REx compiler's optimizer found 4 possible hints:
	565	a) Anchored substring;
	566	b) Fixed substring;
	567	c) Whether we are anchored (beginning-of-line or \G);
	568	d) First node (of those at offset 0) which may distinguish positions;
	569	We use a)b)d) and multiline-part of c), and try to find a position in the
	570	string which does not contradict any of them.
	571	*/
	572
	573	/* Most of decisions we do here should have been done at compile time.
	574	The nodes of the REx which we used for the search should have been
	575	deleted from the finite automaton. */
	576
	577	char *
	578	Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV sv, char strpos,
	579	char strend, const U32 flags, re_scream_pos_data data)
	580	{
	581	dVAR;
	582	struct regexp *const prog = ReANY(rx);
	583	I32 start_shift = 0;
	584	/* Should be nonnegative! */
	585	I32 end_shift = 0;
	586	char *s;
	587	SV *check;
	588	char *strbeg;
	589	char *t;
	590	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	591	I32 ml_anch;
	592	char other_last = NULL; / other substr checked before this */
	593	char check_at = NULL; / check substr found at this pos */
	594	char checked_upto = NULL; / how far into the string we have already checked using find_byclass*/
	595	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	596	RXi_GET_DECL(prog,progi);
	597	#ifdef DEBUGGING
	598	const char * const i_strpos = strpos;
	599	#endif
	600	GET_RE_DEBUG_FLAGS_DECL;
	601
	602	PERL_ARGS_ASSERT_RE_INTUIT_START;
	603	PERL_UNUSED_ARG(flags);
	604	PERL_UNUSED_ARG(data);
	605
	606	RX_MATCH_UTF8_set(rx,utf8_target);
	607
	608	if (RX_UTF8(rx)) {
	609	PL_reg_flags \|= RF_utf8;
	610	}
	611	DEBUG_EXECUTE_r(
	612	debug_start_match(rx, utf8_target, strpos, strend,
	613	sv ? "Guessing start of match in sv for"
	614	: "Guessing start of match in string for");
	615	);
	616
	617	/* CHR_DIST() would be more correct here but it makes things slow. */
	618	if (prog->minlen > strend - strpos) {
	619	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	620	"String too short... [re_intuit_start]\n"));
	621	goto fail;
	622	}
	623
	624	/* XXX we need to pass strbeg as a separate arg: the following is
	625	* guesswork and can be wrong... */
	626	if (sv && SvPOK(sv)) {
	627	char * p = SvPVX(sv);
	628	STRLEN cur = SvCUR(sv);
	629	if (p <= strpos && strpos < p + cur) {
	630	strbeg = p;
	631	assert(p <= strend && strend <= p + cur);
	632	}
	633	else
	634	strbeg = strend - cur;
	635	}
	636	else
	637	strbeg = strpos;
	638
	639	PL_regeol = strend;
	640	if (utf8_target) {
	641	if (!prog->check_utf8 && prog->check_substr)
	642	to_utf8_substr(prog);
	643	check = prog->check_utf8;
	644	} else {
	645	if (!prog->check_substr && prog->check_utf8) {
	646	if (! to_byte_substr(prog)) {
	647	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
	648	}
	649	}
	650	check = prog->check_substr;
	651	}
	652	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	653	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	654	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	655	&& !multiline ) ); /* Check after \n? */
	656
	657	if (!ml_anch) {
	658	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	659	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	660	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	661	&& sv && !SvROK(sv)
	662	&& (strpos != strbeg)) {
	663	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	664	goto fail;
	665	}
	666	if (prog->check_offset_min == prog->check_offset_max
	667	&& !(prog->extflags & RXf_CANY_SEEN)
	668	&& ! multiline) /* /m can cause \n's to match that aren't
	669	accounted for in the string max length.
	670	See [perl #115242] */
	671	{
	672	/* Substring at constant offset from beg-of-str... */
	673	I32 slen;
	674
	675	s = HOP3c(strpos, prog->check_offset_min, strend);
	676
	677	if (SvTAIL(check)) {
	678	slen = SvCUR(check); /* >= 1 */
	679
	680	if ( strend - s > slen \|\| strend - s < slen - 1
	681	\|\| (strend - s == slen && strend[-1] != '\n')) {
	682	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	683	goto fail_finish;
	684	}
	685	/* Now should match s[0..slen-2] */
	686	slen--;
	687	if (slen && (SvPVX_const(check) != s
	688	\|\| (slen > 1
	689	&& memNE(SvPVX_const(check), s, slen)))) {
	690	report_neq:
	691	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	692	goto fail_finish;
	693	}
	694	}
	695	else if (SvPVX_const(check) != s
	696	\|\| ((slen = SvCUR(check)) > 1
	697	&& memNE(SvPVX_const(check), s, slen)))
	698	goto report_neq;
	699	check_at = s;
	700	goto success_at_start;
	701	}
	702	}
	703	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	704	s = strpos;
	705	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	706	end_shift = prog->check_end_shift;
	707
	708	if (!ml_anch) {
	709	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	710	- (SvTAIL(check) != 0);
	711	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	712
	713	if (end_shift < eshift)
	714	end_shift = eshift;
	715	}
	716	}
	717	else { /* Can match at random position */
	718	ml_anch = 0;
	719	s = strpos;
	720	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	721	end_shift = prog->check_end_shift;
	722
	723	/* end shift should be non negative here */
	724	}
	725
	726	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	727	if (end_shift < 0)
	728	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	729	(IV)end_shift, RX_PRECOMP(prog));
	730	#endif
	731
	732	restart:
	733	/* Find a possible match in the region s..strend by looking for
	734	the "check" substring in the region corrected by start/end_shift. */
	735
	736	{
	737	I32 srch_start_shift = start_shift;
	738	I32 srch_end_shift = end_shift;
	739	U8* start_point;
	740	U8* end_point;
	741	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	742	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	743	srch_start_shift = strbeg - s;
	744	}
	745	DEBUG_OPTIMISE_MORE_r({
	746	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	747	(IV)prog->check_offset_min,
	748	(IV)srch_start_shift,
	749	(IV)srch_end_shift,
	750	(IV)prog->check_end_shift);
	751	});
	752
	753	if (prog->extflags & RXf_CANY_SEEN) {
	754	start_point= (U8*)(s + srch_start_shift);
	755	end_point= (U8*)(strend - srch_end_shift);
	756	} else {
	757	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	758	end_point= HOP3(strend, -srch_end_shift, strbeg);
	759	}
	760	DEBUG_OPTIMISE_MORE_r({
	761	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	762	(int)(end_point - start_point),
	763	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	764	start_point);
	765	});
	766
	767	s = fbm_instr( start_point, end_point,
	768	check, multiline ? FBMrf_MULTILINE : 0);
	769	}
	770	/* Update the count-of-usability, remove useless subpatterns,
	771	unshift s. */
	772
	773	DEBUG_EXECUTE_r({
	774	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	775	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	776	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	777	(s ? "Found" : "Did not find"),
	778	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	779	? "anchored" : "floating"),
	780	quoted,
	781	RE_SV_TAIL(check),
	782	(s ? " at offset " : "...\n") );
	783	});
	784
	785	if (!s)
	786	goto fail_finish;
	787	/* Finish the diagnostic message */
	788	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	789
	790	/* XXX dmq: first branch is for positive lookbehind...
	791	Our check string is offset from the beginning of the pattern.
	792	So we need to do any stclass tests offset forward from that
	793	point. I think. :-(
	794	*/
	795
	796
	797
	798	check_at=s;
	799
	800
	801	/* Got a candidate. Check MBOL anchoring, and the other substr.
	802	Start with the other substr.
	803	XXXX no SCREAM optimization yet - and a very coarse implementation
	804	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	805	always match. Probably should be marked during compile...
	806	Probably it is right to do no SCREAM here...
	807	*/
	808
	809	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	810	: (prog->float_substr && prog->anchored_substr))
	811	{
	812	/* Take into account the "other" substring. */
	813	/* XXXX May be hopelessly wrong for UTF... */
	814	if (!other_last)
	815	other_last = strpos;
	816	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	817	do_other_anchored:
	818	{
	819	char * const last = HOP3c(s, -start_shift, strbeg);
	820	char last1, last2;
	821	char * const saved_s = s;
	822	SV* must;
	823
	824	t = s - prog->check_offset_max;
	825	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	826	&& (!utf8_target
	827	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	828	&& t > strpos)))
	829	NOOP;
	830	else
	831	t = strpos;
	832	t = HOP3c(t, prog->anchored_offset, strend);
	833	if (t < other_last) /* These positions already checked */
	834	t = other_last;
	835	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	836	if (last < last1)
	837	last1 = last;
	838	/* XXXX It is not documented what units *_offsets are in.
	839	We assume bytes, but this is clearly wrong.
	840	Meaning this code needs to be carefully reviewed for errors.
	841	dmq.
	842	*/
	843
	844	/* On end-of-str: see comment below. */
	845	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	846	if (must == &PL_sv_undef) {
	847	s = (char*)NULL;
	848	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	849	}
	850	else
	851	s = fbm_instr(
	852	(unsigned char*)t,
	853	HOP3(HOP3(last1, prog->anchored_offset, strend)
	854	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	855	must,
	856	multiline ? FBMrf_MULTILINE : 0
	857	);
	858	DEBUG_EXECUTE_r({
	859	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	860	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	861	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	862	(s ? "Found" : "Contradicts"),
	863	quoted, RE_SV_TAIL(must));
	864	});
	865
	866
	867	if (!s) {
	868	if (last1 >= last2) {
	869	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	870	", giving up...\n"));
	871	goto fail_finish;
	872	}
	873	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	874	", trying floating at offset %ld...\n",
	875	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	876	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	877	s = HOP3c(last, 1, strend);
	878	goto restart;
	879	}
	880	else {
	881	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	882	(long)(s - i_strpos)));
	883	t = HOP3c(s, -prog->anchored_offset, strbeg);
	884	other_last = HOP3c(s, 1, strend);
	885	s = saved_s;
	886	if (t == strpos)
	887	goto try_at_start;
	888	goto try_at_offset;
	889	}
	890	}
	891	}
	892	else { /* Take into account the floating substring. */
	893	char last, last1;
	894	char * const saved_s = s;
	895	SV* must;
	896
	897	t = HOP3c(s, -start_shift, strbeg);
	898	last1 = last =
	899	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	900	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	901	last = HOP3c(t, prog->float_max_offset, strend);
	902	s = HOP3c(t, prog->float_min_offset, strend);
	903	if (s < other_last)
	904	s = other_last;
	905	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	906	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	907	/* fbm_instr() takes into account exact value of end-of-str
	908	if the check is SvTAIL(ed). Since false positives are OK,
	909	and end-of-str is not later than strend we are OK. */
	910	if (must == &PL_sv_undef) {
	911	s = (char*)NULL;
	912	DEBUG_r(must = prog->float_utf8); /* for debug message */
	913	}
	914	else
	915	s = fbm_instr((unsigned char*)s,
	916	(unsigned char*)last + SvCUR(must)
	917	- (SvTAIL(must)!=0),
	918	must, multiline ? FBMrf_MULTILINE : 0);
	919	DEBUG_EXECUTE_r({
	920	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	921	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	922	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	923	(s ? "Found" : "Contradicts"),
	924	quoted, RE_SV_TAIL(must));
	925	});
	926	if (!s) {
	927	if (last1 == last) {
	928	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	929	", giving up...\n"));
	930	goto fail_finish;
	931	}
	932	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	933	", trying anchored starting at offset %ld...\n",
	934	(long)(saved_s + 1 - i_strpos)));
	935	other_last = last;
	936	s = HOP3c(t, 1, strend);
	937	goto restart;
	938	}
	939	else {
	940	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	941	(long)(s - i_strpos)));
	942	other_last = s; /* Fix this later. --Hugo */
	943	s = saved_s;
	944	if (t == strpos)
	945	goto try_at_start;
	946	goto try_at_offset;
	947	}
	948	}
	949	}
	950
	951
	952	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	953
	954	DEBUG_OPTIMISE_MORE_r(
	955	PerlIO_printf(Perl_debug_log,
	956	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	957	(IV)prog->check_offset_min,
	958	(IV)prog->check_offset_max,
	959	(IV)(s-strpos),
	960	(IV)(t-strpos),
	961	(IV)(t-s),
	962	(IV)(strend-strpos)
	963	)
	964	);
	965
	966	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	967	&& (!utf8_target
	968	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	969	&& t > strpos)))
	970	{
	971	/* Fixed substring is found far enough so that the match
	972	cannot start at strpos. */
	973	try_at_offset:
	974	if (ml_anch && t[-1] != '\n') {
	975	/* Eventually fbm_*() should handle this, but often
	976	anchored_offset is not 0, so this check will not be wasted. */
	977	/* XXXX In the code below we prefer to look for "^" even in
	978	presence of anchored substrings. And we search even
	979	beyond the found float position. These pessimizations
	980	are historical artefacts only. */
	981	find_anchor:
	982	while (t < strend - prog->minlen) {
	983	if (*t == '\n') {
	984	if (t < check_at - prog->check_offset_min) {
	985	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	986	/* Since we moved from the found position,
	987	we definitely contradict the found anchored
	988	substr. Due to the above check we do not
	989	contradict "check" substr.
	990	Thus we can arrive here only if check substr
	991	is float. Redo checking for "other"=="fixed".
	992	*/
	993	strpos = t + 1;
	994	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	995	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	996	goto do_other_anchored;
	997	}
	998	/* We don't contradict the found floating substring. */
	999	/* XXXX Why not check for STCLASS? */
	1000	s = t + 1;
	1001	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	1002	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	1003	goto set_useful;
	1004	}
	1005	/* Position contradicts check-string */
	1006	/* XXXX probably better to look for check-string
	1007	than for "\n", so one should lower the limit for t? */
	1008	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	1009	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	1010	other_last = strpos = s = t + 1;
	1011	goto restart;
	1012	}
	1013	t++;
	1014	}
	1015	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	1016	PL_colors[0], PL_colors[1]));
	1017	goto fail_finish;
	1018	}
	1019	else {
	1020	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1021	PL_colors[0], PL_colors[1]));
	1022	}
	1023	s = t;
	1024	set_useful:
	1025	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1026	}
	1027	else {
	1028	/* The found string does not prohibit matching at strpos,
	1029	- no optimization of calling REx engine can be performed,
	1030	unless it was an MBOL and we are not after MBOL,
	1031	or a future STCLASS check will fail this. */
	1032	try_at_start:
	1033	/* Even in this situation we may use MBOL flag if strpos is offset
	1034	wrt the start of the string. */
	1035	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	1036	&& (strpos != strbeg) && strpos[-1] != '\n'
	1037	/* May be due to an implicit anchor of m{.foo} /
	1038	&& !(prog->intflags & PREGf_IMPLICIT))
	1039	{
	1040	t = strpos;
	1041	goto find_anchor;
	1042	}
	1043	DEBUG_EXECUTE_r( if (ml_anch)
	1044	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1045	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1046	);
	1047	success_at_start:
	1048	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1049	&& (utf8_target ? (
	1050	prog->check_utf8 /* Could be deleted already */
	1051	&& --BmUSEFUL(prog->check_utf8) < 0
	1052	&& (prog->check_utf8 == prog->float_utf8)
	1053	) : (
	1054	prog->check_substr /* Could be deleted already */
	1055	&& --BmUSEFUL(prog->check_substr) < 0
	1056	&& (prog->check_substr == prog->float_substr)
	1057	)))
	1058	{
	1059	/* If flags & SOMETHING - do not do it many times on the same match */
	1060	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1061	/* XXX Does the destruction order has to change with utf8_target? */
	1062	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1063	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1064	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1065	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1066	check = NULL; /* abort */
	1067	s = strpos;
	1068	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1069	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1070	if (prog->intflags & PREGf_IMPLICIT)
	1071	prog->extflags &= ~RXf_ANCH_MBOL;
	1072	/* XXXX This is a remnant of the old implementation. It
	1073	looks wasteful, since now INTUIT can use many
	1074	other heuristics. */
	1075	prog->extflags &= ~RXf_USE_INTUIT;
	1076	/* XXXX What other flags might need to be cleared in this branch? */
	1077	}
	1078	else
	1079	s = strpos;
	1080	}
	1081
	1082	/* Last resort... */
	1083	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1084	/* trie stclasses are too expensive to use here, we are better off to
	1085	leave it to regmatch itself */
	1086	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1087	/* minlen == 0 is possible if regstclass is \b or \B,
	1088	and the fixed substr is ''$.
	1089	Since minlen is already taken into account, s+1 is before strend;
	1090	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1091	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1092	regstclass does not come from lookahead... */
	1093	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1094	This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
	1095	const U8* const str = (U8*)STRING(progi->regstclass);
	1096	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1097	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	1098	: 1);
	1099	char * endpos;
	1100	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1101	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1102	else if (prog->float_substr \|\| prog->float_utf8)
	1103	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1104	else
	1105	endpos= strend;
	1106
	1107	if (checked_upto < s)
	1108	checked_upto = s;
	1109	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1110	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1111
	1112	t = s;
	1113	s = find_byclass(prog, progi->regstclass, checked_upto, endpos, NULL);
	1114	if (s) {
	1115	checked_upto = s;
	1116	} else {
	1117	#ifdef DEBUGGING
	1118	const char *what = NULL;
	1119	#endif
	1120	if (endpos == strend) {
	1121	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1122	"Could not match STCLASS...\n") );
	1123	goto fail;
	1124	}
	1125	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1126	"This position contradicts STCLASS...\n") );
	1127	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1128	goto fail;
	1129	checked_upto = HOPBACKc(endpos, start_shift);
	1130	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1131	(IV)start_shift, (IV)(check_at - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1132	/* Contradict one of substrings */
	1133	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1134	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1135	DEBUG_EXECUTE_r( what = "anchored" );
	1136	hop_and_restart:
	1137	s = HOP3c(t, 1, strend);
	1138	if (s + start_shift + end_shift > strend) {
	1139	/* XXXX Should be taken into account earlier? */
	1140	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1141	"Could not match STCLASS...\n") );
	1142	goto fail;
	1143	}
	1144	if (!check)
	1145	goto giveup;
	1146	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1147	"Looking for %s substr starting at offset %ld...\n",
	1148	what, (long)(s + start_shift - i_strpos)) );
	1149	goto restart;
	1150	}
	1151	/* Have both, check_string is floating */
	1152	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1153	goto retry_floating_check;
	1154	/* Recheck anchored substring, but not floating... */
	1155	s = check_at;
	1156	if (!check)
	1157	goto giveup;
	1158	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1159	"Looking for anchored substr starting at offset %ld...\n",
	1160	(long)(other_last - i_strpos)) );
	1161	goto do_other_anchored;
	1162	}
	1163	/* Another way we could have checked stclass at the
	1164	current position only: */
	1165	if (ml_anch) {
	1166	s = t = t + 1;
	1167	if (!check)
	1168	goto giveup;
	1169	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1170	"Looking for /%s^%s/m starting at offset %ld...\n",
	1171	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1172	goto try_at_offset;
	1173	}
	1174	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1175	goto fail;
	1176	/* Check is floating substring. */
	1177	retry_floating_check:
	1178	t = check_at - start_shift;
	1179	DEBUG_EXECUTE_r( what = "floating" );
	1180	goto hop_and_restart;
	1181	}
	1182	if (t != s) {
	1183	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1184	"By STCLASS: moving %ld --> %ld\n",
	1185	(long)(t - i_strpos), (long)(s - i_strpos))
	1186	);
	1187	}
	1188	else {
	1189	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1190	"Does not contradict STCLASS...\n");
	1191	);
	1192	}
	1193	}
	1194	giveup:
	1195	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1196	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1197	PL_colors[5], (long)(s - i_strpos)) );
	1198	return s;
	1199
	1200	fail_finish: /* Substring not found */
	1201	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1202	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1203	fail:
	1204	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1205	PL_colors[4], PL_colors[5]));
	1206	return NULL;
	1207	}
	1208
	1209	#define DECL_TRIE_TYPE(scan) \
	1210	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	1211	trie_type = ((scan->flags == EXACT) \
	1212	? (utf8_target ? trie_utf8 : trie_plain) \
	1213	: (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
	1214
	1215	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
	1216	uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
	1217	STRLEN skiplen; \
	1218	switch (trie_type) { \
	1219	case trie_utf8_fold: \
	1220	if ( foldlen>0 ) { \
	1221	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1222	foldlen -= len; \
	1223	uscan += len; \
	1224	len=0; \
	1225	} else { \
	1226	uvc = to_utf8_fold( (const U8*) uc, foldbuf, &foldlen ); \
	1227	len = UTF8SKIP(uc); \
	1228	skiplen = UNISKIP( uvc ); \
	1229	foldlen -= skiplen; \
	1230	uscan = foldbuf + skiplen; \
	1231	} \
	1232	break; \
	1233	case trie_latin_utf8_fold: \
	1234	if ( foldlen>0 ) { \
	1235	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1236	foldlen -= len; \
	1237	uscan += len; \
	1238	len=0; \
	1239	} else { \
	1240	len = 1; \
	1241	uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, 1); \
	1242	skiplen = UNISKIP( uvc ); \
	1243	foldlen -= skiplen; \
	1244	uscan = foldbuf + skiplen; \
	1245	} \
	1246	break; \
	1247	case trie_utf8: \
	1248	uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
	1249	break; \
	1250	case trie_plain: \
	1251	uvc = (UV)*uc; \
	1252	len = 1; \
	1253	} \
	1254	if (uvc < 256) { \
	1255	charid = trie->charmap[ uvc ]; \
	1256	} \
	1257	else { \
	1258	charid = 0; \
	1259	if (widecharmap) { \
	1260	SV** const svpp = hv_fetch(widecharmap, \
	1261	(char*)&uvc, sizeof(UV), 0); \
	1262	if (svpp) \
	1263	charid = (U16)SvIV(*svpp); \
	1264	} \
	1265	} \
	1266	} STMT_END
	1267
	1268	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1269	STMT_START { \
	1270	while (s <= e) { \
	1271	if ( (CoNd) \
	1272	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1273	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1274	goto got_it; \
	1275	s++; \
	1276	} \
	1277	} STMT_END
	1278
	1279	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1280	STMT_START { \
	1281	while (s < strend && s + (uskip = UTF8SKIP(s)) <= strend) { \
	1282	CoDe \
	1283	s += uskip; \
	1284	} \
	1285	} STMT_END
	1286
	1287	#define REXEC_FBC_SCAN(CoDe) \
	1288	STMT_START { \
	1289	while (s < strend) { \
	1290	CoDe \
	1291	s++; \
	1292	} \
	1293	} STMT_END
	1294
	1295	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1296	REXEC_FBC_UTF8_SCAN( \
	1297	if (CoNd) { \
	1298	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1299	goto got_it; \
	1300	else \
	1301	tmp = doevery; \
	1302	} \
	1303	else \
	1304	tmp = 1; \
	1305	)
	1306
	1307	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1308	REXEC_FBC_SCAN( \
	1309	if (CoNd) { \
	1310	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1311	goto got_it; \
	1312	else \
	1313	tmp = doevery; \
	1314	} \
	1315	else \
	1316	tmp = 1; \
	1317	)
	1318
	1319	#define REXEC_FBC_TRYIT \
	1320	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1321	goto got_it
	1322
	1323	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1324	if (utf8_target) { \
	1325	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1326	} \
	1327	else { \
	1328	REXEC_FBC_CLASS_SCAN(CoNd); \
	1329	}
	1330
	1331	#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
	1332	if (utf8_target) { \
	1333	UtFpReLoAd; \
	1334	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1335	} \
	1336	else { \
	1337	REXEC_FBC_CLASS_SCAN(CoNd); \
	1338	}
	1339
	1340	#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
	1341	PL_reg_flags \|= RF_tainted; \
	1342	if (utf8_target) { \
	1343	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1344	} \
	1345	else { \
	1346	REXEC_FBC_CLASS_SCAN(CoNd); \
	1347	}
	1348
	1349	#define DUMP_EXEC_POS(li,s,doutf8) \
	1350	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1351
	1352
	1353	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1354	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1355	tmp = TEST_NON_UTF8(tmp); \
	1356	REXEC_FBC_UTF8_SCAN( \
	1357	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1358	tmp = !tmp; \
	1359	IF_SUCCESS; \
	1360	} \
	1361	else { \
	1362	IF_FAIL; \
	1363	} \
	1364	); \
	1365
	1366	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1367	if (s == PL_bostr) { \
	1368	tmp = '\n'; \
	1369	} \
	1370	else { \
	1371	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr); \
	1372	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
	1373	} \
	1374	tmp = TeSt1_UtF8; \
	1375	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1376	REXEC_FBC_UTF8_SCAN( \
	1377	if (tmp == ! (TeSt2_UtF8)) { \
	1378	tmp = !tmp; \
	1379	IF_SUCCESS; \
	1380	} \
	1381	else { \
	1382	IF_FAIL; \
	1383	} \
	1384	); \
	1385
	1386	/* The only difference between the BOUND and NBOUND cases is that
	1387	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1388	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1389	* with the other one being empty */
	1390	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1391	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1392
	1393	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1394	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1395
	1396	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1397	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1398
	1399	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1400	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1401
	1402
	1403	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1404	* be passed in completely with the variable name being tested, which isn't
	1405	* such a clean interface, but this is easier to read than it was before. We
	1406	* are looking for the boundary (or non-boundary between a word and non-word
	1407	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1408	* must be different. Find the "wordness" of the character just prior to this
	1409	* one, and compare it with the wordness of this one. If they differ, we have
	1410	* a boundary. At the beginning of the string, pretend that the previous
	1411	* character was a new-line */
	1412	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1413	if (utf8_target) { \
	1414	UTF8_CODE \
	1415	} \
	1416	else { /* Not utf8 */ \
	1417	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1418	tmp = TEST_NON_UTF8(tmp); \
	1419	REXEC_FBC_SCAN( \
	1420	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1421	tmp = !tmp; \
	1422	IF_SUCCESS; \
	1423	} \
	1424	else { \
	1425	IF_FAIL; \
	1426	} \
	1427	); \
	1428	} \
	1429	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s))) \
	1430	goto got_it;
	1431
	1432	/* We know what class REx starts with. Try to find this position... */
	1433	/* if reginfo is NULL, its a dryrun */
	1434	/* annoyingly all the vars in this routine have different names from their counterparts
	1435	in regmatch. /grrr */
	1436
	1437	STATIC char *
	1438	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1439	const char strend, regmatch_info reginfo)
	1440	{
	1441	dVAR;
	1442	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1443	char pat_string; / The pattern's exactish string */
	1444	char pat_end; / ptr to end char of pat_string */
	1445	re_fold_t folder; /* Function for computing non-utf8 folds */
	1446	const U8 fold_array; / array for folding ords < 256 */
	1447	STRLEN ln;
	1448	STRLEN lnc;
	1449	STRLEN uskip;
	1450	U8 c1;
	1451	U8 c2;
	1452	char *e;
	1453	I32 tmp = 1; /* Scratch variable? */
	1454	const bool utf8_target = PL_reg_match_utf8;
	1455	UV utf8_fold_flags = 0;
	1456	RXi_GET_DECL(prog,progi);
	1457
	1458	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1459
	1460	/* We know what class it must start with. */
	1461	switch (OP(c)) {
	1462	case ANYOF:
	1463	if (utf8_target) {
	1464	REXEC_FBC_UTF8_CLASS_SCAN(
	1465	reginclass(prog, c, (U8*)s, utf8_target));
	1466	}
	1467	else {
	1468	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
	1469	}
	1470	break;
	1471	case CANY:
	1472	REXEC_FBC_SCAN(
	1473	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1474	goto got_it;
	1475	else
	1476	tmp = doevery;
	1477	);
	1478	break;
	1479
	1480	case EXACTFA:
	1481	if (UTF_PATTERN \|\| utf8_target) {
	1482	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1483	goto do_exactf_utf8;
	1484	}
	1485	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1486	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1487	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1488
	1489	case EXACTF:
	1490	if (utf8_target) {
	1491
	1492	/* regcomp.c already folded this if pattern is in UTF-8 */
	1493	utf8_fold_flags = 0;
	1494	goto do_exactf_utf8;
	1495	}
	1496	fold_array = PL_fold;
	1497	folder = foldEQ;
	1498	goto do_exactf_non_utf8;
	1499
	1500	case EXACTFL:
	1501	if (UTF_PATTERN \|\| utf8_target) {
	1502	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	1503	goto do_exactf_utf8;
	1504	}
	1505	fold_array = PL_fold_locale;
	1506	folder = foldEQ_locale;
	1507	goto do_exactf_non_utf8;
	1508
	1509	case EXACTFU_SS:
	1510	if (UTF_PATTERN) {
	1511	utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
	1512	}
	1513	goto do_exactf_utf8;
	1514
	1515	case EXACTFU_TRICKYFOLD:
	1516	case EXACTFU:
	1517	if (UTF_PATTERN \|\| utf8_target) {
	1518	utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1519	goto do_exactf_utf8;
	1520	}
	1521
	1522	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1523	* so we don't have to worry here about this single special case
	1524	* in the Latin1 range */
	1525	fold_array = PL_fold_latin1;
	1526	folder = foldEQ_latin1;
	1527
	1528	/* FALL THROUGH */
	1529
	1530	do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
	1531	are no glitches with fold-length differences
	1532	between the target string and pattern */
	1533
	1534	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1535	* first character of the EXACTF* node and then, if necessary,
	1536	* case-insensitively compare the full text of the node. c1 is the
	1537	* first character. c2 is its fold. This logic will not work for
	1538	* Unicode semantics and the german sharp ss, which hence should
	1539	* not be compiled into a node that gets here. */
	1540	pat_string = STRING(c);
	1541	ln = STR_LEN(c); /* length to match in octets/bytes */
	1542
	1543	/* We know that we have to match at least 'ln' bytes (which is the
	1544	* same as characters, since not utf8). If we have to match 3
	1545	* characters, and there are only 2 availabe, we know without
	1546	* trying that it will fail; so don't start a match past the
	1547	* required minimum number from the far end */
	1548	e = HOP3c(strend, -((I32)ln), s);
	1549
	1550	if (!reginfo && e < s) {
	1551	e = s; /* Due to minlen logic of intuit() */
	1552	}
	1553
	1554	c1 = *pat_string;
	1555	c2 = fold_array[c1];
	1556	if (c1 == c2) { /* If char and fold are the same */
	1557	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1558	}
	1559	else {
	1560	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1561	}
	1562	break;
	1563
	1564	do_exactf_utf8:
	1565	{
	1566	unsigned expansion;
	1567
	1568
	1569	/* If one of the operands is in utf8, we can't use the simpler
	1570	* folding above, due to the fact that many different characters
	1571	* can have the same fold, or portion of a fold, or different-
	1572	* length fold */
	1573	pat_string = STRING(c);
	1574	ln = STR_LEN(c); /* length to match in octets/bytes */
	1575	pat_end = pat_string + ln;
	1576	lnc = (UTF_PATTERN) /* length to match in characters */
	1577	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1578	: ln;
	1579
	1580	/* We have 'lnc' characters to match in the pattern, but because of
	1581	* multi-character folding, each character in the target can match
	1582	* up to 3 characters (Unicode guarantees it will never exceed
	1583	* this) if it is utf8-encoded; and up to 2 if not (based on the
	1584	* fact that the Latin 1 folds are already determined, and the
	1585	* only multi-char fold in that range is the sharp-s folding to
	1586	* 'ss'. Thus, a pattern character can match as little as 1/3 of a
	1587	* string character. Adjust lnc accordingly, rounding up, so that
	1588	* if we need to match at least 4+1/3 chars, that really is 5. */
	1589	expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
	1590	lnc = (lnc + expansion - 1) / expansion;
	1591
	1592	/* As in the non-UTF8 case, if we have to match 3 characters, and
	1593	* only 2 are left, it's guaranteed to fail, so don't start a
	1594	* match that would require us to go beyond the end of the string
	1595	*/
	1596	e = HOP3c(strend, -((I32)lnc), s);
	1597
	1598	if (!reginfo && e < s) {
	1599	e = s; /* Due to minlen logic of intuit() */
	1600	}
	1601
	1602	/* XXX Note that we could recalculate e to stop the loop earlier,
	1603	* as the worst case expansion above will rarely be met, and as we
	1604	* go along we would usually find that e moves further to the left.
	1605	* This would happen only after we reached the point in the loop
	1606	* where if there were no expansion we should fail. Unclear if
	1607	* worth the expense */
	1608
	1609	while (s <= e) {
	1610	char my_strend= (char )strend;
	1611	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	1612	pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
	1613	&& (!reginfo \|\| regtry(reginfo, &s)) )
	1614	{
	1615	goto got_it;
	1616	}
	1617	s += (utf8_target) ? UTF8SKIP(s) : 1;
	1618	}
	1619	break;
	1620	}
	1621	case BOUNDL:
	1622	PL_reg_flags \|= RF_tainted;
	1623	FBC_BOUND(isALNUM_LC,
	1624	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1625	isALNUM_LC_utf8((U8*)s));
	1626	break;
	1627	case NBOUNDL:
	1628	PL_reg_flags \|= RF_tainted;
	1629	FBC_NBOUND(isALNUM_LC,
	1630	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1631	isALNUM_LC_utf8((U8*)s));
	1632	break;
	1633	case BOUND:
	1634	FBC_BOUND(isWORDCHAR,
	1635	isALNUM_uni(tmp),
	1636	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1637	break;
	1638	case BOUNDA:
	1639	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1640	isWORDCHAR_A(tmp),
	1641	isWORDCHAR_A((U8*)s));
	1642	break;
	1643	case NBOUND:
	1644	FBC_NBOUND(isWORDCHAR,
	1645	isALNUM_uni(tmp),
	1646	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1647	break;
	1648	case NBOUNDA:
	1649	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1650	isWORDCHAR_A(tmp),
	1651	isWORDCHAR_A((U8*)s));
	1652	break;
	1653	case BOUNDU:
	1654	FBC_BOUND(isWORDCHAR_L1,
	1655	isALNUM_uni(tmp),
	1656	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1657	break;
	1658	case NBOUNDU:
	1659	FBC_NBOUND(isWORDCHAR_L1,
	1660	isALNUM_uni(tmp),
	1661	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1662	break;
	1663	case ALNUML:
	1664	REXEC_FBC_CSCAN_TAINT(
	1665	isALNUM_LC_utf8((U8*)s),
	1666	isALNUM_LC(*s)
	1667	);
	1668	break;
	1669	case ALNUMU:
	1670	REXEC_FBC_CSCAN_PRELOAD(
	1671	LOAD_UTF8_CHARCLASS_ALNUM(),
	1672	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1673	isWORDCHAR_L1((U8) *s)
	1674	);
	1675	break;
	1676	case ALNUM:
	1677	REXEC_FBC_CSCAN_PRELOAD(
	1678	LOAD_UTF8_CHARCLASS_ALNUM(),
	1679	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1680	isWORDCHAR((U8) *s)
	1681	);
	1682	break;
	1683	case ALNUMA:
	1684	/* Don't need to worry about utf8, as it can match only a single
	1685	* byte invariant character */
	1686	REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
	1687	break;
	1688	case NALNUMU:
	1689	REXEC_FBC_CSCAN_PRELOAD(
	1690	LOAD_UTF8_CHARCLASS_ALNUM(),
	1691	!swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1692	! isWORDCHAR_L1((U8) *s)
	1693	);
	1694	break;
	1695	case NALNUM:
	1696	REXEC_FBC_CSCAN_PRELOAD(
	1697	LOAD_UTF8_CHARCLASS_ALNUM(),
	1698	!swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target),
	1699	! isALNUM(*s)
	1700	);
	1701	break;
	1702	case NALNUMA:
	1703	REXEC_FBC_CSCAN(
	1704	!isWORDCHAR_A(*s),
	1705	!isWORDCHAR_A(*s)
	1706	);
	1707	break;
	1708	case NALNUML:
	1709	REXEC_FBC_CSCAN_TAINT(
	1710	!isALNUM_LC_utf8((U8*)s),
	1711	!isALNUM_LC(*s)
	1712	);
	1713	break;
	1714	case SPACEU:
	1715	REXEC_FBC_CSCAN(
	1716	is_XPERLSPACE_utf8(s),
	1717	isSPACE_L1((U8) *s)
	1718	);
	1719	break;
	1720	case SPACE:
	1721	REXEC_FBC_CSCAN(
	1722	is_XPERLSPACE_utf8(s),
	1723	isSPACE((U8) *s)
	1724	);
	1725	break;
	1726	case SPACEA:
	1727	/* Don't need to worry about utf8, as it can match only a single
	1728	* byte invariant character */
	1729	REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
	1730	break;
	1731	case SPACEL:
	1732	REXEC_FBC_CSCAN_TAINT(
	1733	isSPACE_LC_utf8((U8*)s),
	1734	isSPACE_LC(*s)
	1735	);
	1736	break;
	1737	case NSPACEU:
	1738	REXEC_FBC_CSCAN(
	1739	! is_XPERLSPACE_utf8(s),
	1740	! isSPACE_L1((U8) *s)
	1741	);
	1742	break;
	1743	case NSPACE:
	1744	REXEC_FBC_CSCAN(
	1745	! is_XPERLSPACE_utf8(s),
	1746	! isSPACE((U8) *s)
	1747	);
	1748	break;
	1749	case NSPACEA:
	1750	REXEC_FBC_CSCAN(
	1751	!isSPACE_A(*s),
	1752	!isSPACE_A(*s)
	1753	);
	1754	break;
	1755	case NSPACEL:
	1756	REXEC_FBC_CSCAN_TAINT(
	1757	!isSPACE_LC_utf8((U8*)s),
	1758	!isSPACE_LC(*s)
	1759	);
	1760	break;
	1761	case DIGIT:
	1762	REXEC_FBC_CSCAN_PRELOAD(
	1763	LOAD_UTF8_CHARCLASS_DIGIT(),
	1764	swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1765	isDIGIT(*s)
	1766	);
	1767	break;
	1768	case DIGITA:
	1769	/* Don't need to worry about utf8, as it can match only a single
	1770	* byte invariant character */
	1771	REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
	1772	break;
	1773	case DIGITL:
	1774	REXEC_FBC_CSCAN_TAINT(
	1775	isDIGIT_LC_utf8((U8*)s),
	1776	isDIGIT_LC(*s)
	1777	);
	1778	break;
	1779	case NDIGIT:
	1780	REXEC_FBC_CSCAN_PRELOAD(
	1781	LOAD_UTF8_CHARCLASS_DIGIT(),
	1782	!swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1783	!isDIGIT(*s)
	1784	);
	1785	break;
	1786	case NDIGITA:
	1787	REXEC_FBC_CSCAN(
	1788	!isDIGIT_A(*s),
	1789	!isDIGIT_A(*s)
	1790	);
	1791	break;
	1792	case NDIGITL:
	1793	REXEC_FBC_CSCAN_TAINT(
	1794	!isDIGIT_LC_utf8((U8*)s),
	1795	!isDIGIT_LC(*s)
	1796	);
	1797	break;
	1798	case LNBREAK:
	1799	REXEC_FBC_CSCAN(
	1800	is_LNBREAK_utf8_safe(s, strend),
	1801	is_LNBREAK_latin1_safe(s, strend)
	1802	);
	1803	break;
	1804	case VERTWS:
	1805	REXEC_FBC_CSCAN(
	1806	is_VERTWS_utf8_safe(s, strend),
	1807	is_VERTWS_latin1_safe(s, strend)
	1808	);
	1809	break;
	1810	case NVERTWS:
	1811	REXEC_FBC_CSCAN(
	1812	!is_VERTWS_utf8_safe(s, strend),
	1813	!is_VERTWS_latin1_safe(s, strend)
	1814	);
	1815	break;
	1816	case HORIZWS:
	1817	REXEC_FBC_CSCAN(
	1818	is_HORIZWS_utf8_safe(s, strend),
	1819	is_HORIZWS_latin1_safe(s, strend)
	1820	);
	1821	break;
	1822	case NHORIZWS:
	1823	REXEC_FBC_CSCAN(
	1824	!is_HORIZWS_utf8_safe(s, strend),
	1825	!is_HORIZWS_latin1_safe(s, strend)
	1826	);
	1827	break;
	1828	case POSIXA:
	1829	/* Don't need to worry about utf8, as it can match only a single
	1830	* byte invariant character. The flag in this node type is the
	1831	* class number to pass to _generic_isCC() to build a mask for
	1832	* searching in PL_charclass[] */
	1833	REXEC_FBC_CLASS_SCAN( _generic_isCC_A(*s, FLAGS(c)));
	1834	break;
	1835	case NPOSIXA:
	1836	REXEC_FBC_CSCAN(
	1837	!_generic_isCC_A(*s, FLAGS(c)),
	1838	!_generic_isCC_A(*s, FLAGS(c))
	1839	);
	1840	break;
	1841
	1842	case AHOCORASICKC:
	1843	case AHOCORASICK:
	1844	{
	1845	DECL_TRIE_TYPE(c);
	1846	/* what trie are we using right now */
	1847	reg_ac_data *aho
	1848	= (reg_ac_data*)progi->data->data[ ARG( c ) ];
	1849	reg_trie_data *trie
	1850	= (reg_trie_data*)progi->data->data[ aho->trie ];
	1851	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1852
	1853	const char *last_start = strend - trie->minlen;
	1854	#ifdef DEBUGGING
	1855	const char *real_start = s;
	1856	#endif
	1857	STRLEN maxlen = trie->maxlen;
	1858	SV *sv_points;
	1859	U8 *points; / map of where we were in the input string
	1860	when reading a given char. For ASCII this
	1861	is unnecessary overhead as the relationship
	1862	is always 1:1, but for Unicode, especially
	1863	case folded Unicode this is not true. */
	1864	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1865	U8 *bitmap=NULL;
	1866
	1867
	1868	GET_RE_DEBUG_FLAGS_DECL;
	1869
	1870	/* We can't just allocate points here. We need to wrap it in
	1871	* an SV so it gets freed properly if there is a croak while
	1872	* running the match */
	1873	ENTER;
	1874	SAVETMPS;
	1875	sv_points=newSV(maxlen * sizeof(U8 *));
	1876	SvCUR_set(sv_points,
	1877	maxlen * sizeof(U8 *));
	1878	SvPOK_on(sv_points);
	1879	sv_2mortal(sv_points);
	1880	points=(U8**)SvPV_nolen(sv_points );
	1881	if ( trie_type != trie_utf8_fold
	1882	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1883	{
	1884	if (trie->bitmap)
	1885	bitmap=(U8*)trie->bitmap;
	1886	else
	1887	bitmap=(U8*)ANYOF_BITMAP(c);
	1888	}
	1889	/* this is the Aho-Corasick algorithm modified a touch
	1890	to include special handling for long "unknown char"
	1891	sequences. The basic idea being that we use AC as long
	1892	as we are dealing with a possible matching char, when
	1893	we encounter an unknown char (and we have not encountered
	1894	an accepting state) we scan forward until we find a legal
	1895	starting char.
	1896	AC matching is basically that of trie matching, except
	1897	that when we encounter a failing transition, we fall back
	1898	to the current states "fail state", and try the current char
	1899	again, a process we repeat until we reach the root state,
	1900	state 1, or a legal transition. If we fail on the root state
	1901	then we can either terminate if we have reached an accepting
	1902	state previously, or restart the entire process from the beginning
	1903	if we have not.
	1904
	1905	*/
	1906	while (s <= last_start) {
	1907	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1908	U8 uc = (U8)s;
	1909	U16 charid = 0;
	1910	U32 base = 1;
	1911	U32 state = 1;
	1912	UV uvc = 0;
	1913	STRLEN len = 0;
	1914	STRLEN foldlen = 0;
	1915	U8 uscan = (U8)NULL;
	1916	U8 *leftmost = NULL;
	1917	#ifdef DEBUGGING
	1918	U32 accepted_word= 0;
	1919	#endif
	1920	U32 pointpos = 0;
	1921
	1922	while ( state && uc <= (U8*)strend ) {
	1923	int failed=0;
	1924	U32 word = aho->states[ state ].wordnum;
	1925
	1926	if( state==1 ) {
	1927	if ( bitmap ) {
	1928	DEBUG_TRIE_EXECUTE_r(
	1929	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1930	dump_exec_pos( (char *)uc, c, strend, real_start,
	1931	(char *)uc, utf8_target );
	1932	PerlIO_printf( Perl_debug_log,
	1933	" Scanning for legal start char...\n");
	1934	}
	1935	);
	1936	if (utf8_target) {
	1937	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1938	uc += UTF8SKIP(uc);
	1939	}
	1940	} else {
	1941	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1942	uc++;
	1943	}
	1944	}
	1945	s= (char *)uc;
	1946	}
	1947	if (uc >(U8*)last_start) break;
	1948	}
	1949
	1950	if ( word ) {
	1951	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1952	if (!leftmost \|\| lpos < leftmost) {
	1953	DEBUG_r(accepted_word=word);
	1954	leftmost= lpos;
	1955	}
	1956	if (base==0) break;
	1957
	1958	}
	1959	points[pointpos++ % maxlen]= uc;
	1960	if (foldlen \|\| uc < (U8*)strend) {
	1961	REXEC_TRIE_READ_CHAR(trie_type, trie,
	1962	widecharmap, uc,
	1963	uscan, len, uvc, charid, foldlen,
	1964	foldbuf, uniflags);
	1965	DEBUG_TRIE_EXECUTE_r({
	1966	dump_exec_pos( (char *)uc, c, strend,
	1967	real_start, s, utf8_target);
	1968	PerlIO_printf(Perl_debug_log,
	1969	" Charid:%3u CP:%4"UVxf" ",
	1970	charid, uvc);
	1971	});
	1972	}
	1973	else {
	1974	len = 0;
	1975	charid = 0;
	1976	}
	1977
	1978
	1979	do {
	1980	#ifdef DEBUGGING
	1981	word = aho->states[ state ].wordnum;
	1982	#endif
	1983	base = aho->states[ state ].trans.base;
	1984
	1985	DEBUG_TRIE_EXECUTE_r({
	1986	if (failed)
	1987	dump_exec_pos( (char *)uc, c, strend, real_start,
	1988	s, utf8_target );
	1989	PerlIO_printf( Perl_debug_log,
	1990	"%sState: %4"UVxf", word=%"UVxf,
	1991	failed ? " Fail transition to " : "",
	1992	(UV)state, (UV)word);
	1993	});
	1994	if ( base ) {
	1995	U32 tmp;
	1996	I32 offset;
	1997	if (charid &&
	1998	( ((offset = base + charid
	1999	- 1 - trie->uniquecharcount)) >= 0)
	2000	&& ((U32)offset < trie->lasttrans)
	2001	&& trie->trans[offset].check == state
	2002	&& (tmp=trie->trans[offset].next))
	2003	{
	2004	DEBUG_TRIE_EXECUTE_r(
	2005	PerlIO_printf( Perl_debug_log," - legal\n"));
	2006	state = tmp;
	2007	break;
	2008	}
	2009	else {
	2010	DEBUG_TRIE_EXECUTE_r(
	2011	PerlIO_printf( Perl_debug_log," - fail\n"));
	2012	failed = 1;
	2013	state = aho->fail[state];
	2014	}
	2015	}
	2016	else {
	2017	/* we must be accepting here */
	2018	DEBUG_TRIE_EXECUTE_r(
	2019	PerlIO_printf( Perl_debug_log," - accepting\n"));
	2020	failed = 1;
	2021	break;
	2022	}
	2023	} while(state);
	2024	uc += len;
	2025	if (failed) {
	2026	if (leftmost)
	2027	break;
	2028	if (!state) state = 1;
	2029	}
	2030	}
	2031	if ( aho->states[ state ].wordnum ) {
	2032	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	2033	if (!leftmost \|\| lpos < leftmost) {
	2034	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	2035	leftmost = lpos;
	2036	}
	2037	}
	2038	if (leftmost) {
	2039	s = (char*)leftmost;
	2040	DEBUG_TRIE_EXECUTE_r({
	2041	PerlIO_printf(
	2042	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	2043	(UV)accepted_word, (IV)(s - real_start)
	2044	);
	2045	});
	2046	if (!reginfo \|\| regtry(reginfo, &s)) {
	2047	FREETMPS;
	2048	LEAVE;
	2049	goto got_it;
	2050	}
	2051	s = HOPc(s,1);
	2052	DEBUG_TRIE_EXECUTE_r({
	2053	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	2054	});
	2055	} else {
	2056	DEBUG_TRIE_EXECUTE_r(
	2057	PerlIO_printf( Perl_debug_log,"No match.\n"));
	2058	break;
	2059	}
	2060	}
	2061	FREETMPS;
	2062	LEAVE;
	2063	}
	2064	break;
	2065	default:
	2066	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2067	break;
	2068	}
	2069	return 0;
	2070	got_it:
	2071	return s;
	2072	}
	2073
	2074
	2075	/*
	2076	- regexec_flags - match a regexp against a string
	2077	*/
	2078	I32
	2079	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, register char strend,
	2080	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	2081	/* stringarg: the point in the string at which to begin matching */
	2082	/* strend: pointer to null at end of string */
	2083	/* strbeg: real beginning of string */
	2084	/* minend: end of match must be >= minend bytes after stringarg. */
	2085	/* sv: SV being matched: only used for utf8 flag, pos() etc; string
	2086	* itself is accessed via the pointers above */
	2087	/* data: May be used for some additional optimizations.
	2088	Currently its only used, with a U32 cast, for transmitting
	2089	the ganch offset when doing a /g match. This will change */
	2090	/* nosave: For optimizations. */
	2091
	2092	{
	2093	dVAR;
	2094	struct regexp *const prog = ReANY(rx);
	2095	/register/ char *s;
	2096	regnode *c;
	2097	/register/ char *startpos = stringarg;
	2098	I32 minlen; /* must match at least this many chars */
	2099	I32 dontbother = 0; /* how many characters not to try at end */
	2100	I32 end_shift = 0; /* Same for the end. / / CC */
	2101	I32 scream_pos = -1; /* Internal iterator of scream. */
	2102	char *scream_olds = NULL;
	2103	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2104	I32 multiline;
	2105	RXi_GET_DECL(prog,progi);
	2106	regmatch_info reginfo; /* create some info to pass to regtry etc */
	2107	regexp_paren_pair *swap = NULL;
	2108	GET_RE_DEBUG_FLAGS_DECL;
	2109
	2110	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2111	PERL_UNUSED_ARG(data);
	2112
	2113	/* Be paranoid... */
	2114	if (prog == NULL \|\| startpos == NULL) {
	2115	Perl_croak(aTHX_ "NULL regexp parameter");
	2116	return 0;
	2117	}
	2118
	2119	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2120	reginfo.prog = rx; /* Yes, sorry that this is confusing. */
	2121
	2122	RX_MATCH_UTF8_set(rx, utf8_target);
	2123	DEBUG_EXECUTE_r(
	2124	debug_start_match(rx, utf8_target, startpos, strend,
	2125	"Matching");
	2126	);
	2127
	2128	minlen = prog->minlen;
	2129
	2130	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2131	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2132	"String too short [regexec_flags]...\n"));
	2133	goto phooey;
	2134	}
	2135
	2136
	2137	/* Check validity of program. */
	2138	if (UCHARAT(progi->program) != REG_MAGIC) {
	2139	Perl_croak(aTHX_ "corrupted regexp program");
	2140	}
	2141
	2142	PL_reg_flags = 0;
	2143	PL_reg_state.re_state_eval_setup_done = FALSE;
	2144	PL_reg_maxiter = 0;
	2145
	2146	if (RX_UTF8(rx))
	2147	PL_reg_flags \|= RF_utf8;
	2148
	2149	/* Mark beginning of line for ^ and lookbehind. */
	2150	reginfo.bol = startpos; /* XXX not used ??? */
	2151	PL_bostr = strbeg;
	2152	reginfo.sv = sv;
	2153
	2154	/* Mark end of line for $ (and such) */
	2155	PL_regeol = strend;
	2156
	2157	/* see how far we have to get to not match where we matched before */
	2158	reginfo.till = startpos+minend;
	2159
	2160	/* If there is a "must appear" string, look for it. */
	2161	s = startpos;
	2162
	2163	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	2164	MAGIC *mg;
	2165	if (flags & REXEC_IGNOREPOS){ /* Means: check only at start */
	2166	reginfo.ganch = startpos + prog->gofs;
	2167	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2168	"GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
	2169	} else if (sv && SvTYPE(sv) >= SVt_PVMG
	2170	&& SvMAGIC(sv)
	2171	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	2172	&& mg->mg_len >= 0) {
	2173	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	2174	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2175	"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
	2176
	2177	if (prog->extflags & RXf_ANCH_GPOS) {
	2178	if (s > reginfo.ganch)
	2179	goto phooey;
	2180	s = reginfo.ganch - prog->gofs;
	2181	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2182	"GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
	2183	if (s < strbeg)
	2184	goto phooey;
	2185	}
	2186	}
	2187	else if (data) {
	2188	reginfo.ganch = strbeg + PTR2UV(data);
	2189	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2190	"GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
	2191
	2192	} else { /* pos() not defined */
	2193	reginfo.ganch = strbeg;
	2194	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2195	"GPOS: reginfo.ganch = strbeg\n"));
	2196	}
	2197	}
	2198	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2199	/* We have to be careful. If the previous successful match
	2200	was from this regex we don't want a subsequent partially
	2201	successful match to clobber the old results.
	2202	So when we detect this possibility we add a swap buffer
	2203	to the re, and switch the buffer each match. If we fail
	2204	we switch it back, otherwise we leave it swapped.
	2205	*/
	2206	swap = prog->offs;
	2207	/* do we need a save destructor here for eval dies? */
	2208	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2209	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2210	"rex=0x%"UVxf" saving offs: orig=0x%"UVxf" new=0x%"UVxf"\n",
	2211	PTR2UV(prog),
	2212	PTR2UV(swap),
	2213	PTR2UV(prog->offs)
	2214	));
	2215	}
	2216	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	2217	re_scream_pos_data d;
	2218
	2219	d.scream_olds = &scream_olds;
	2220	d.scream_pos = &scream_pos;
	2221	s = re_intuit_start(rx, sv, s, strend, flags, &d);
	2222	if (!s) {
	2223	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	2224	goto phooey; /* not present */
	2225	}
	2226	}
	2227
	2228
	2229
	2230	/* Simplest case: anchored match need be tried only once. */
	2231	/* [unless only anchor is BOL and multiline is set] */
	2232	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2233	if (s == startpos && regtry(&reginfo, &startpos))
	2234	goto got_it;
	2235	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2236	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2237	{
	2238	char *end;
	2239
	2240	if (minlen)
	2241	dontbother = minlen - 1;
	2242	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2243	/* for multiline we only have to try after newlines */
	2244	if (prog->check_substr \|\| prog->check_utf8) {
	2245	/* because of the goto we can not easily reuse the macros for bifurcating the
	2246	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2247	if (utf8_target) {
	2248	if (s == startpos)
	2249	goto after_try_utf8;
	2250	while (1) {
	2251	if (regtry(&reginfo, &s)) {
	2252	goto got_it;
	2253	}
	2254	after_try_utf8:
	2255	if (s > end) {
	2256	goto phooey;
	2257	}
	2258	if (prog->extflags & RXf_USE_INTUIT) {
	2259	s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
	2260	if (!s) {
	2261	goto phooey;
	2262	}
	2263	}
	2264	else {
	2265	s += UTF8SKIP(s);
	2266	}
	2267	}
	2268	} /* end search for check string in unicode */
	2269	else {
	2270	if (s == startpos) {
	2271	goto after_try_latin;
	2272	}
	2273	while (1) {
	2274	if (regtry(&reginfo, &s)) {
	2275	goto got_it;
	2276	}
	2277	after_try_latin:
	2278	if (s > end) {
	2279	goto phooey;
	2280	}
	2281	if (prog->extflags & RXf_USE_INTUIT) {
	2282	s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
	2283	if (!s) {
	2284	goto phooey;
	2285	}
	2286	}
	2287	else {
	2288	s++;
	2289	}
	2290	}
	2291	} /* end search for check string in latin*/
	2292	} /* end search for check string */
	2293	else { /* search for newline */
	2294	if (s > startpos) {
	2295	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2296	s--;
	2297	}
	2298	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2299	while (s <= end) { /* note it could be possible to match at the end of the string */
	2300	if (s++ == '\n') { / don't need PL_utf8skip here */
	2301	if (regtry(&reginfo, &s))
	2302	goto got_it;
	2303	}
	2304	}
	2305	} /* end search for newline */
	2306	} /* end anchored/multiline check string search */
	2307	goto phooey;
	2308	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2309	{
	2310	/* the warning about reginfo.ganch being used without initialization
	2311	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	2312	and we only enter this block when the same bit is set. */
	2313	char *tmp_s = reginfo.ganch - prog->gofs;
	2314
	2315	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
	2316	goto got_it;
	2317	goto phooey;
	2318	}
	2319
	2320	/* Messy cases: unanchored match. */
	2321	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2322	/* we have /x+whatever/ */
	2323	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
	2324	char ch;
	2325	#ifdef DEBUGGING
	2326	int did_match = 0;
	2327	#endif
	2328	if (utf8_target) {
	2329	if (! prog->anchored_utf8) {
	2330	to_utf8_substr(prog);
	2331	}
	2332	ch = SvPVX_const(prog->anchored_utf8)[0];
	2333	REXEC_FBC_SCAN(
	2334	if (*s == ch) {
	2335	DEBUG_EXECUTE_r( did_match = 1 );
	2336	if (regtry(&reginfo, &s)) goto got_it;
	2337	s += UTF8SKIP(s);
	2338	while (s < strend && *s == ch)
	2339	s += UTF8SKIP(s);
	2340	}
	2341	);
	2342
	2343	}
	2344	else {
	2345	if (! prog->anchored_substr) {
	2346	if (! to_byte_substr(prog)) {
	2347	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2348	}
	2349	}
	2350	ch = SvPVX_const(prog->anchored_substr)[0];
	2351	REXEC_FBC_SCAN(
	2352	if (*s == ch) {
	2353	DEBUG_EXECUTE_r( did_match = 1 );
	2354	if (regtry(&reginfo, &s)) goto got_it;
	2355	s++;
	2356	while (s < strend && *s == ch)
	2357	s++;
	2358	}
	2359	);
	2360	}
	2361	DEBUG_EXECUTE_r(if (!did_match)
	2362	PerlIO_printf(Perl_debug_log,
	2363	"Did not find anchored character...\n")
	2364	);
	2365	}
	2366	else if (prog->anchored_substr != NULL
	2367	\|\| prog->anchored_utf8 != NULL
	2368	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2369	&& prog->float_max_offset < strend - s)) {
	2370	SV *must;
	2371	I32 back_max;
	2372	I32 back_min;
	2373	char *last;
	2374	char last1; / Last position checked before */
	2375	#ifdef DEBUGGING
	2376	int did_match = 0;
	2377	#endif
	2378	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2379	if (utf8_target) {
	2380	if (! prog->anchored_utf8) {
	2381	to_utf8_substr(prog);
	2382	}
	2383	must = prog->anchored_utf8;
	2384	}
	2385	else {
	2386	if (! prog->anchored_substr) {
	2387	if (! to_byte_substr(prog)) {
	2388	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2389	}
	2390	}
	2391	must = prog->anchored_substr;
	2392	}
	2393	back_max = back_min = prog->anchored_offset;
	2394	} else {
	2395	if (utf8_target) {
	2396	if (! prog->float_utf8) {
	2397	to_utf8_substr(prog);
	2398	}
	2399	must = prog->float_utf8;
	2400	}
	2401	else {
	2402	if (! prog->float_substr) {
	2403	if (! to_byte_substr(prog)) {
	2404	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2405	}
	2406	}
	2407	must = prog->float_substr;
	2408	}
	2409	back_max = prog->float_max_offset;
	2410	back_min = prog->float_min_offset;
	2411	}
	2412
	2413	if (back_min<0) {
	2414	last = strend;
	2415	} else {
	2416	last = HOP3c(strend, /* Cannot start after this */
	2417	-(I32)(CHR_SVLEN(must)
	2418	- (SvTAIL(must) != 0) + back_min), strbeg);
	2419	}
	2420	if (s > PL_bostr)
	2421	last1 = HOPc(s, -1);
	2422	else
	2423	last1 = s - 1; /* bogus */
	2424
	2425	/* XXXX check_substr already used to find "s", can optimize if
	2426	check_substr==must. */
	2427	scream_pos = -1;
	2428	dontbother = end_shift;
	2429	strend = HOPc(strend, -dontbother);
	2430	while ( (s <= last) &&
	2431	(s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2432	(unsigned char*)strend, must,
	2433	multiline ? FBMrf_MULTILINE : 0)) ) {
	2434	DEBUG_EXECUTE_r( did_match = 1 );
	2435	if (HOPc(s, -back_max) > last1) {
	2436	last1 = HOPc(s, -back_min);
	2437	s = HOPc(s, -back_max);
	2438	}
	2439	else {
	2440	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2441
	2442	last1 = HOPc(s, -back_min);
	2443	s = t;
	2444	}
	2445	if (utf8_target) {
	2446	while (s <= last1) {
	2447	if (regtry(&reginfo, &s))
	2448	goto got_it;
	2449	if (s >= last1) {
	2450	s++; /* to break out of outer loop */
	2451	break;
	2452	}
	2453	s += UTF8SKIP(s);
	2454	}
	2455	}
	2456	else {
	2457	while (s <= last1) {
	2458	if (regtry(&reginfo, &s))
	2459	goto got_it;
	2460	s++;
	2461	}
	2462	}
	2463	}
	2464	DEBUG_EXECUTE_r(if (!did_match) {
	2465	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2466	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2467	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2468	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2469	? "anchored" : "floating"),
	2470	quoted, RE_SV_TAIL(must));
	2471	});
	2472	goto phooey;
	2473	}
	2474	else if ( (c = progi->regstclass) ) {
	2475	if (minlen) {
	2476	const OPCODE op = OP(progi->regstclass);
	2477	/* don't bother with what can't match */
	2478	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2479	strend = HOPc(strend, -(minlen - 1));
	2480	}
	2481	DEBUG_EXECUTE_r({
	2482	SV * const prop = sv_newmortal();
	2483	regprop(prog, prop, c);
	2484	{
	2485	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2486	s,strend-s,60);
	2487	PerlIO_printf(Perl_debug_log,
	2488	"Matching stclass %.*s against %s (%d bytes)\n",
	2489	(int)SvCUR(prop), SvPVX_const(prop),
	2490	quoted, (int)(strend - s));
	2491	}
	2492	});
	2493	if (find_byclass(prog, c, s, strend, &reginfo))
	2494	goto got_it;
	2495	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2496	}
	2497	else {
	2498	dontbother = 0;
	2499	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2500	/* Trim the end. */
	2501	char *last= NULL;
	2502	SV* float_real;
	2503	STRLEN len;
	2504	const char *little;
	2505
	2506	if (utf8_target) {
	2507	if (! prog->float_utf8) {
	2508	to_utf8_substr(prog);
	2509	}
	2510	float_real = prog->float_utf8;
	2511	}
	2512	else {
	2513	if (! prog->float_substr) {
	2514	if (! to_byte_substr(prog)) {
	2515	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2516	}
	2517	}
	2518	float_real = prog->float_substr;
	2519	}
	2520
	2521	little = SvPV_const(float_real, len);
	2522	if (SvTAIL(float_real)) {
	2523	/* This means that float_real contains an artificial \n on
	2524	* the end due to the presence of something like this:
	2525	* /foo$/ where we can match both "foo" and "foo\n" at the
	2526	* end of the string. So we have to compare the end of the
	2527	* string first against the float_real without the \n and
	2528	* then against the full float_real with the string. We
	2529	* have to watch out for cases where the string might be
	2530	* smaller than the float_real or the float_real without
	2531	* the \n. */
	2532	char *checkpos= strend - len;
	2533	DEBUG_OPTIMISE_r(
	2534	PerlIO_printf(Perl_debug_log,
	2535	"%sChecking for float_real.%s\n",
	2536	PL_colors[4], PL_colors[5]));
	2537	if (checkpos + 1 < strbeg) {
	2538	/* can't match, even if we remove the trailing \n
	2539	* string is too short to match */
	2540	DEBUG_EXECUTE_r(
	2541	PerlIO_printf(Perl_debug_log,
	2542	"%sString shorter than required trailing substring, cannot match.%s\n",
	2543	PL_colors[4], PL_colors[5]));
	2544	goto phooey;
	2545	} else if (memEQ(checkpos + 1, little, len - 1)) {
	2546	/* can match, the end of the string matches without the
	2547	* "\n" */
	2548	last = checkpos + 1;
	2549	} else if (checkpos < strbeg) {
	2550	/* cant match, string is too short when the "\n" is
	2551	* included */
	2552	DEBUG_EXECUTE_r(
	2553	PerlIO_printf(Perl_debug_log,
	2554	"%sString does not contain required trailing substring, cannot match.%s\n",
	2555	PL_colors[4], PL_colors[5]));
	2556	goto phooey;
	2557	} else if (!multiline) {
	2558	/* non multiline match, so compare with the "\n" at the
	2559	* end of the string */
	2560	if (memEQ(checkpos, little, len)) {
	2561	last= checkpos;
	2562	} else {
	2563	DEBUG_EXECUTE_r(
	2564	PerlIO_printf(Perl_debug_log,
	2565	"%sString does not contain required trailing substring, cannot match.%s\n",
	2566	PL_colors[4], PL_colors[5]));
	2567	goto phooey;
	2568	}
	2569	} else {
	2570	/* multiline match, so we have to search for a place
	2571	* where the full string is located */
	2572	goto find_last;
	2573	}
	2574	} else {
	2575	find_last:
	2576	if (len)
	2577	last = rninstr(s, strend, little, little + len);
	2578	else
	2579	last = strend; /* matching "$" */
	2580	}
	2581	if (!last) {
	2582	/* at one point this block contained a comment which was
	2583	* probably incorrect, which said that this was a "should not
	2584	* happen" case. Even if it was true when it was written I am
	2585	* pretty sure it is not anymore, so I have removed the comment
	2586	* and replaced it with this one. Yves */
	2587	DEBUG_EXECUTE_r(
	2588	PerlIO_printf(Perl_debug_log,
	2589	"String does not contain required substring, cannot match.\n"
	2590	));
	2591	goto phooey;
	2592	}
	2593	dontbother = strend - last + prog->float_min_offset;
	2594	}
	2595	if (minlen && (dontbother < minlen))
	2596	dontbother = minlen - 1;
	2597	strend -= dontbother; /* this one's always in bytes! */
	2598	/* We don't know much -- general case. */
	2599	if (utf8_target) {
	2600	for (;;) {
	2601	if (regtry(&reginfo, &s))
	2602	goto got_it;
	2603	if (s >= strend)
	2604	break;
	2605	s += UTF8SKIP(s);
	2606	};
	2607	}
	2608	else {
	2609	do {
	2610	if (regtry(&reginfo, &s))
	2611	goto got_it;
	2612	} while (s++ < strend);
	2613	}
	2614	}
	2615
	2616	/* Failure. */
	2617	goto phooey;
	2618
	2619	got_it:
	2620	DEBUG_BUFFERS_r(
	2621	if (swap)
	2622	PerlIO_printf(Perl_debug_log,
	2623	"rex=0x%"UVxf" freeing offs: 0x%"UVxf"\n",
	2624	PTR2UV(prog),
	2625	PTR2UV(swap)
	2626	);
	2627	);
	2628	Safefree(swap);
	2629	RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
	2630
	2631	if (PL_reg_state.re_state_eval_setup_done)
	2632	restore_pos(aTHX_ prog);
	2633	if (RXp_PAREN_NAMES(prog))
	2634	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2635
	2636	/* make sure $`, $&, $', and $digit will work later */
	2637	if ( !(flags & REXEC_NOT_FIRST) ) {
	2638	if (flags & REXEC_COPY_STR) {
	2639	#ifdef PERL_OLD_COPY_ON_WRITE
	2640	if ((SvIsCOW(sv)
	2641	\|\| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
	2642	if (DEBUG_C_TEST) {
	2643	PerlIO_printf(Perl_debug_log,
	2644	"Copy on write: regexp capture, type %d\n",
	2645	(int) SvTYPE(sv));
	2646	}
	2647	RX_MATCH_COPY_FREE(rx);
	2648	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2649	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2650	assert (SvPOKp(prog->saved_copy));
	2651	prog->sublen = PL_regeol - strbeg;
	2652	prog->suboffset = 0;
	2653	prog->subcoffset = 0;
	2654	} else
	2655	#endif
	2656	{
	2657	I32 min = 0;
	2658	I32 max = PL_regeol - strbeg;
	2659	I32 sublen;
	2660
	2661	if ( (flags & REXEC_COPY_SKIP_POST)
	2662	&& !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
	2663	&& !(PL_sawampersand & SAWAMPERSAND_RIGHT)
	2664	) { /* don't copy $' part of string */
	2665	U32 n = 0;
	2666	max = -1;
	2667	/* calculate the right-most part of the string covered
	2668	* by a capture. Due to look-ahead, this may be to
	2669	* the right of $&, so we have to scan all captures */
	2670	while (n <= prog->lastparen) {
	2671	if (prog->offs[n].end > max)
	2672	max = prog->offs[n].end;
	2673	n++;
	2674	}
	2675	if (max == -1)
	2676	max = (PL_sawampersand & SAWAMPERSAND_LEFT)
	2677	? prog->offs[0].start
	2678	: 0;
	2679	assert(max >= 0 && max <= PL_regeol - strbeg);
	2680	}
	2681
	2682	if ( (flags & REXEC_COPY_SKIP_PRE)
	2683	&& !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
	2684	&& !(PL_sawampersand & SAWAMPERSAND_LEFT)
	2685	) { /* don't copy $` part of string */
	2686	U32 n = 0;
	2687	min = max;
	2688	/* calculate the left-most part of the string covered
	2689	* by a capture. Due to look-behind, this may be to
	2690	* the left of $&, so we have to scan all captures */
	2691	while (min && n <= prog->lastparen) {
	2692	if ( prog->offs[n].start != -1
	2693	&& prog->offs[n].start < min)
	2694	{
	2695	min = prog->offs[n].start;
	2696	}
	2697	n++;
	2698	}
	2699	if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
	2700	&& min > prog->offs[0].end
	2701	)
	2702	min = prog->offs[0].end;
	2703
	2704	}
	2705
	2706	assert(min >= 0 && min <= max && min <= PL_regeol - strbeg);
	2707	sublen = max - min;
	2708
	2709	if (RX_MATCH_COPIED(rx)) {
	2710	if (sublen > prog->sublen)
	2711	prog->subbeg =
	2712	(char*)saferealloc(prog->subbeg, sublen+1);
	2713	}
	2714	else
	2715	prog->subbeg = (char*)safemalloc(sublen+1);
	2716	Copy(strbeg + min, prog->subbeg, sublen, char);
	2717	prog->subbeg[sublen] = '\0';
	2718	prog->suboffset = min;
	2719	prog->sublen = sublen;
	2720	RX_MATCH_COPIED_on(rx);
	2721	}
	2722	prog->subcoffset = prog->suboffset;
	2723	if (prog->suboffset && utf8_target) {
	2724	/* Convert byte offset to chars.
	2725	* XXX ideally should only compute this if @-/@+
	2726	* has been seen, a la PL_sawampersand ??? */
	2727
	2728	/* If there's a direct correspondence between the
	2729	* string which we're matching and the original SV,
	2730	* then we can use the utf8 len cache associated with
	2731	* the SV. In particular, it means that under //g,
	2732	* sv_pos_b2u() will use the previously cached
	2733	* position to speed up working out the new length of
	2734	* subcoffset, rather than counting from the start of
	2735	* the string each time. This stops
	2736	* $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
	2737	* from going quadratic */
	2738	if (SvPOKp(sv) && SvPVX(sv) == strbeg)
	2739	sv_pos_b2u(sv, &(prog->subcoffset));
	2740	else
	2741	prog->subcoffset = utf8_length((U8*)strbeg,
	2742	(U8*)(strbeg+prog->suboffset));
	2743	}
	2744	}
	2745	else {
	2746	RX_MATCH_COPY_FREE(rx);
	2747	prog->subbeg = strbeg;
	2748	prog->suboffset = 0;
	2749	prog->subcoffset = 0;
	2750	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2751	}
	2752	}
	2753
	2754	return 1;
	2755
	2756	phooey:
	2757	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2758	PL_colors[4], PL_colors[5]));
	2759	if (PL_reg_state.re_state_eval_setup_done)
	2760	restore_pos(aTHX_ prog);
	2761	if (swap) {
	2762	/* we failed :-( roll it back */
	2763	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2764	"rex=0x%"UVxf" rolling back offs: freeing=0x%"UVxf" restoring=0x%"UVxf"\n",
	2765	PTR2UV(prog),
	2766	PTR2UV(prog->offs),
	2767	PTR2UV(swap)
	2768	));
	2769	Safefree(prog->offs);
	2770	prog->offs = swap;
	2771	}
	2772	return 0;
	2773	}
	2774
	2775
	2776	/* Set which rex is pointed to by PL_reg_state, handling ref counting.
	2777	* Do inc before dec, in case old and new rex are the same */
	2778	#define SET_reg_curpm(Re2) \
	2779	if (PL_reg_state.re_state_eval_setup_done) { \
	2780	(void)ReREFCNT_inc(Re2); \
	2781	ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
	2782	PM_SETRE((PL_reg_curpm), (Re2)); \
	2783	}
	2784
	2785
	2786	/*
	2787	- regtry - try match at specific point
	2788	*/
	2789	STATIC I32 /* 0 failure, 1 success */
	2790	S_regtry(pTHX_ regmatch_info reginfo, char *startposp)
	2791	{
	2792	dVAR;
	2793	CHECKPOINT lastcp;
	2794	REGEXP *const rx = reginfo->prog;
	2795	regexp *const prog = ReANY(rx);
	2796	I32 result;
	2797	RXi_GET_DECL(prog,progi);
	2798	GET_RE_DEBUG_FLAGS_DECL;
	2799
	2800	PERL_ARGS_ASSERT_REGTRY;
	2801
	2802	reginfo->cutpoint=NULL;
	2803
	2804	if ((prog->extflags & RXf_EVAL_SEEN)
	2805	&& !PL_reg_state.re_state_eval_setup_done)
	2806	{
	2807	MAGIC *mg;
	2808
	2809	PL_reg_state.re_state_eval_setup_done = TRUE;
	2810	if (reginfo->sv) {
	2811	/* Make $_ available to executed code. */
	2812	if (reginfo->sv != DEFSV) {
	2813	SAVE_DEFSV;
	2814	DEFSV_set(reginfo->sv);
	2815	}
	2816
	2817	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2818	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2819	/* prepare for quick setting of pos */
	2820	#ifdef PERL_OLD_COPY_ON_WRITE
	2821	if (SvIsCOW(reginfo->sv))
	2822	sv_force_normal_flags(reginfo->sv, 0);
	2823	#endif
	2824	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2825	&PL_vtbl_mglob, NULL, 0);
	2826	mg->mg_len = -1;
	2827	}
	2828	PL_reg_magic = mg;
	2829	PL_reg_oldpos = mg->mg_len;
	2830	SAVEDESTRUCTOR_X(restore_pos, prog);
	2831	}
	2832	if (!PL_reg_curpm) {
	2833	Newxz(PL_reg_curpm, 1, PMOP);
	2834	#ifdef USE_ITHREADS
	2835	{
	2836	SV* const repointer = &PL_sv_undef;
	2837	/* this regexp is also owned by the new PL_reg_curpm, which
	2838	will try to free it. */
	2839	av_push(PL_regex_padav, repointer);
	2840	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2841	PL_regex_pad = AvARRAY(PL_regex_padav);
	2842	}
	2843	#endif
	2844	}
	2845	SET_reg_curpm(rx);
	2846	PL_reg_oldcurpm = PL_curpm;
	2847	PL_curpm = PL_reg_curpm;
	2848	if (RXp_MATCH_COPIED(prog)) {
	2849	/* Here is a serious problem: we cannot rewrite subbeg,
	2850	since it may be needed if this match fails. Thus
	2851	$` inside (?{}) could fail... */
	2852	PL_reg_oldsaved = prog->subbeg;
	2853	PL_reg_oldsavedlen = prog->sublen;
	2854	PL_reg_oldsavedoffset = prog->suboffset;
	2855	PL_reg_oldsavedcoffset = prog->suboffset;
	2856	#ifdef PERL_OLD_COPY_ON_WRITE
	2857	PL_nrs = prog->saved_copy;
	2858	#endif
	2859	RXp_MATCH_COPIED_off(prog);
	2860	}
	2861	else
	2862	PL_reg_oldsaved = NULL;
	2863	prog->subbeg = PL_bostr;
	2864	prog->suboffset = 0;
	2865	prog->subcoffset = 0;
	2866	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2867	}
	2868	#ifdef DEBUGGING
	2869	PL_reg_starttry = *startposp;
	2870	#endif
	2871	prog->offs[0].start = *startposp - PL_bostr;
	2872	prog->lastparen = 0;
	2873	prog->lastcloseparen = 0;
	2874	PL_regsize = 0;
	2875
	2876	/* XXXX What this code is doing here?!!! There should be no need
	2877	to do this again and again, prog->lastparen should take care of
	2878	this! --ilya*/
	2879
	2880	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2881	* Actually, the code in regcppop() (which Ilya may be meaning by
	2882	* prog->lastparen), is not needed at all by the test suite
	2883	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2884	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2885	* Meanwhile, this code is needed for the
	2886	* above-mentioned test suite tests to succeed. The common theme
	2887	* on those tests seems to be returning null fields from matches.
	2888	* --jhi updated by dapm */
	2889	#if 1
	2890	if (prog->nparens) {
	2891	regexp_paren_pair *pp = prog->offs;
	2892	I32 i;
	2893	for (i = prog->nparens; i > (I32)prog->lastparen; i--) {
	2894	++pp;
	2895	pp->start = -1;
	2896	pp->end = -1;
	2897	}
	2898	}
	2899	#endif
	2900	REGCP_SET(lastcp);
	2901	result = regmatch(reginfo, *startposp, progi->program + 1);
	2902	if (result != -1) {
	2903	prog->offs[0].end = result;
	2904	return 1;
	2905	}
	2906	if (reginfo->cutpoint)
	2907	*startposp= reginfo->cutpoint;
	2908	REGCP_UNWIND(lastcp);
	2909	return 0;
	2910	}
	2911
	2912
	2913	#define sayYES goto yes
	2914	#define sayNO goto no
	2915	#define sayNO_SILENT goto no_silent
	2916
	2917	/* we dont use STMT_START/END here because it leads to
	2918	"unreachable code" warnings, which are bogus, but distracting. */
	2919	#define CACHEsayNO \
	2920	if (ST.cache_mask) \
	2921	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2922	sayNO
	2923
	2924	/* this is used to determine how far from the left messages like
	2925	'failed...' are printed. It should be set such that messages
	2926	are inline with the regop output that created them.
	2927	*/
	2928	#define REPORT_CODE_OFF 32
	2929
	2930
	2931	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2932	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2933	#define CHRTEST_NOT_A_CP_1 -999
	2934	#define CHRTEST_NOT_A_CP_2 -998
	2935
	2936	#define SLAB_FIRST(s) (&(s)->states[0])
	2937	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2938
	2939	/* grab a new slab and return the first slot in it */
	2940
	2941	STATIC regmatch_state *
	2942	S_push_slab(pTHX)
	2943	{
	2944	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2945	dMY_CXT;
	2946	#endif
	2947	regmatch_slab *s = PL_regmatch_slab->next;
	2948	if (!s) {
	2949	Newx(s, 1, regmatch_slab);
	2950	s->prev = PL_regmatch_slab;
	2951	s->next = NULL;
	2952	PL_regmatch_slab->next = s;
	2953	}
	2954	PL_regmatch_slab = s;
	2955	return SLAB_FIRST(s);
	2956	}
	2957
	2958
	2959	/* push a new state then goto it */
	2960
	2961	#define PUSH_STATE_GOTO(state, node, input) \
	2962	pushinput = input; \
	2963	scan = node; \
	2964	st->resume_state = state; \
	2965	goto push_state;
	2966
	2967	/* push a new state with success backtracking, then goto it */
	2968
	2969	#define PUSH_YES_STATE_GOTO(state, node, input) \
	2970	pushinput = input; \
	2971	scan = node; \
	2972	st->resume_state = state; \
	2973	goto push_yes_state;
	2974
	2975
	2976
	2977
	2978	/*
	2979
	2980	regmatch() - main matching routine
	2981
	2982	This is basically one big switch statement in a loop. We execute an op,
	2983	set 'next' to point the next op, and continue. If we come to a point which
	2984	we may need to backtrack to on failure such as (A\|B\|C), we push a
	2985	backtrack state onto the backtrack stack. On failure, we pop the top
	2986	state, and re-enter the loop at the state indicated. If there are no more
	2987	states to pop, we return failure.
	2988
	2989	Sometimes we also need to backtrack on success; for example /A+/, where
	2990	after successfully matching one A, we need to go back and try to
	2991	match another one; similarly for lookahead assertions: if the assertion
	2992	completes successfully, we backtrack to the state just before the assertion
	2993	and then carry on. In these cases, the pushed state is marked as
	2994	'backtrack on success too'. This marking is in fact done by a chain of
	2995	pointers, each pointing to the previous 'yes' state. On success, we pop to
	2996	the nearest yes state, discarding any intermediate failure-only states.
	2997	Sometimes a yes state is pushed just to force some cleanup code to be
	2998	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	2999	it to free the inner regex.
	3000
	3001	Note that failure backtracking rewinds the cursor position, while
	3002	success backtracking leaves it alone.
	3003
	3004	A pattern is complete when the END op is executed, while a subpattern
	3005	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	3006	ops trigger the "pop to last yes state if any, otherwise return true"
	3007	behaviour.
	3008
	3009	A common convention in this function is to use A and B to refer to the two
	3010	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	3011	the subpattern to be matched possibly multiple times, while B is the entire
	3012	rest of the pattern. Variable and state names reflect this convention.
	3013
	3014	The states in the main switch are the union of ops and failure/success of
	3015	substates associated with with that op. For example, IFMATCH is the op
	3016	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	3017	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	3018	successfully matched A and IFMATCH_A_fail is a state saying that we have
	3019	just failed to match A. Resume states always come in pairs. The backtrack
	3020	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	3021	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	3022	on success or failure.
	3023
	3024	The struct that holds a backtracking state is actually a big union, with
	3025	one variant for each major type of op. The variable st points to the
	3026	top-most backtrack struct. To make the code clearer, within each
	3027	block of code we #define ST to alias the relevant union.
	3028
	3029	Here's a concrete example of a (vastly oversimplified) IFMATCH
	3030	implementation:
	3031
	3032	switch (state) {
	3033	....
	3034
	3035	#define ST st->u.ifmatch
	3036
	3037	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3038	ST.foo = ...; // some state we wish to save
	3039	...
	3040	// push a yes backtrack state with a resume value of
	3041	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	3042	// first node of A:
	3043	PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
	3044	// NOTREACHED
	3045
	3046	case IFMATCH_A: // we have successfully executed A; now continue with B
	3047	next = B;
	3048	bar = ST.foo; // do something with the preserved value
	3049	break;
	3050
	3051	case IFMATCH_A_fail: // A failed, so the assertion failed
	3052	...; // do some housekeeping, then ...
	3053	sayNO; // propagate the failure
	3054
	3055	#undef ST
	3056
	3057	...
	3058	}
	3059
	3060	For any old-timers reading this who are familiar with the old recursive
	3061	approach, the code above is equivalent to:
	3062
	3063	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3064	{
	3065	int foo = ...
	3066	...
	3067	if (regmatch(A)) {
	3068	next = B;
	3069	bar = foo;
	3070	break;
	3071	}
	3072	...; // do some housekeeping, then ...
	3073	sayNO; // propagate the failure
	3074	}
	3075
	3076	The topmost backtrack state, pointed to by st, is usually free. If you
	3077	want to claim it, populate any ST.foo fields in it with values you wish to
	3078	save, then do one of
	3079
	3080	PUSH_STATE_GOTO(resume_state, node, newinput);
	3081	PUSH_YES_STATE_GOTO(resume_state, node, newinput);
	3082
	3083	which sets that backtrack state's resume value to 'resume_state', pushes a
	3084	new free entry to the top of the backtrack stack, then goes to 'node'.
	3085	On backtracking, the free slot is popped, and the saved state becomes the
	3086	new free state. An ST.foo field in this new top state can be temporarily
	3087	accessed to retrieve values, but once the main loop is re-entered, it
	3088	becomes available for reuse.
	3089
	3090	Note that the depth of the backtrack stack constantly increases during the
	3091	left-to-right execution of the pattern, rather than going up and down with
	3092	the pattern nesting. For example the stack is at its maximum at Z at the
	3093	end of the pattern, rather than at X in the following:
	3094
	3095	/(((X)+)+)+....(Y)+....Z/
	3096
	3097	The only exceptions to this are lookahead/behind assertions and the cut,
	3098	(?>A), which pop all the backtrack states associated with A before
	3099	continuing.
	3100
	3101	Backtrack state structs are allocated in slabs of about 4K in size.
	3102	PL_regmatch_state and st always point to the currently active state,
	3103	and PL_regmatch_slab points to the slab currently containing
	3104	PL_regmatch_state. The first time regmatch() is called, the first slab is
	3105	allocated, and is never freed until interpreter destruction. When the slab
	3106	is full, a new one is allocated and chained to the end. At exit from
	3107	regmatch(), slabs allocated since entry are freed.
	3108
	3109	*/
	3110
	3111
	3112	#define DEBUG_STATE_pp(pp) \
	3113	DEBUG_STATE_r({ \
	3114	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	3115	PerlIO_printf(Perl_debug_log, \
	3116	" %*s"pp" %s%s%s%s%s\n", \
	3117	depth*2, "", \
	3118	PL_reg_name[st->resume_state], \
	3119	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	3120	((st==yes_state) ? "Y" : ""), \
	3121	((st==mark_state) ? "M" : ""), \
	3122	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	3123	); \
	3124	});
	3125
	3126
	3127	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	3128
	3129	#ifdef DEBUGGING
	3130
	3131	STATIC void
	3132	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	3133	const char start, const char end, const char *blurb)
	3134	{
	3135	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	3136
	3137	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	3138
	3139	if (!PL_colorset)
	3140	reginitcolors();
	3141	{
	3142	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	3143	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	3144
	3145	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	3146	start, end - start, 60);
	3147
	3148	PerlIO_printf(Perl_debug_log,
	3149	"%s%s REx%s %s against %s\n",
	3150	PL_colors[4], blurb, PL_colors[5], s0, s1);
	3151
	3152	if (utf8_target\|\|utf8_pat)
	3153	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	3154	utf8_pat ? "pattern" : "",
	3155	utf8_pat && utf8_target ? " and " : "",
	3156	utf8_target ? "string" : ""
	3157	);
	3158	}
	3159	}
	3160
	3161	STATIC void
	3162	S_dump_exec_pos(pTHX_ const char *locinput,
	3163	const regnode *scan,
	3164	const char *loc_regeol,
	3165	const char *loc_bostr,
	3166	const char *loc_reg_starttry,
	3167	const bool utf8_target)
	3168	{
	3169	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	3170	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	3171	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	3172	/* The part of the string before starttry has one color
	3173	(pref0_len chars), between starttry and current
	3174	position another one (pref_len - pref0_len chars),
	3175	after the current position the third one.
	3176	We assume that pref0_len <= pref_len, otherwise we
	3177	decrease pref0_len. */
	3178	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	3179	? (5 + taill) - l : locinput - loc_bostr;
	3180	int pref0_len;
	3181
	3182	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	3183
	3184	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	3185	pref_len++;
	3186	pref0_len = pref_len - (locinput - loc_reg_starttry);
	3187	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	3188	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	3189	? (5 + taill) - pref_len : loc_regeol - locinput);
	3190	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	3191	l--;
	3192	if (pref0_len < 0)
	3193	pref0_len = 0;
	3194	if (pref0_len > pref_len)
	3195	pref0_len = pref_len;
	3196	{
	3197	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	3198
	3199	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	3200	(locinput - pref_len),pref0_len, 60, 4, 5);
	3201
	3202	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	3203	(locinput - pref_len + pref0_len),
	3204	pref_len - pref0_len, 60, 2, 3);
	3205
	3206	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	3207	locinput, loc_regeol - locinput, 10, 0, 1);
	3208
	3209	const STRLEN tlen=len0+len1+len2;
	3210	PerlIO_printf(Perl_debug_log,
	3211	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	3212	(IV)(locinput - loc_bostr),
	3213	len0, s0,
	3214	len1, s1,
	3215	(docolor ? "" : "> <"),
	3216	len2, s2,
	3217	(int)(tlen > 19 ? 0 : 19 - tlen),
	3218	"");
	3219	}
	3220	}
	3221
	3222	#endif
	3223
	3224	/* reg_check_named_buff_matched()
	3225	* Checks to see if a named buffer has matched. The data array of
	3226	* buffer numbers corresponding to the buffer is expected to reside
	3227	* in the regexp->data->data array in the slot stored in the ARG() of
	3228	* node involved. Note that this routine doesn't actually care about the
	3229	* name, that information is not preserved from compilation to execution.
	3230	* Returns the index of the leftmost defined buffer with the given name
	3231	* or 0 if non of the buffers matched.
	3232	*/
	3233	STATIC I32
	3234	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	3235	{
	3236	I32 n;
	3237	RXi_GET_DECL(rex,rexi);
	3238	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3239	I32 nums=(I32)SvPVX(sv_dat);
	3240
	3241	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3242
	3243	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3244	if ((I32)rex->lastparen >= nums[n] &&
	3245	rex->offs[nums[n]].end != -1)
	3246	{
	3247	return nums[n];
	3248	}
	3249	}
	3250	return 0;
	3251	}
	3252
	3253
	3254	/* free all slabs above current one - called during LEAVE_SCOPE */
	3255
	3256	STATIC void
	3257	S_clear_backtrack_stack(pTHX_ void *p)
	3258	{
	3259	regmatch_slab *s = PL_regmatch_slab->next;
	3260	PERL_UNUSED_ARG(p);
	3261
	3262	if (!s)
	3263	return;
	3264	PL_regmatch_slab->next = NULL;
	3265	while (s) {
	3266	regmatch_slab * const osl = s;
	3267	s = s->next;
	3268	Safefree(osl);
	3269	}
	3270	}
	3271	static bool
	3272	S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int c1p, U8 c1_utf8, int c2p, U8 c2_utf8)
	3273	{
	3274	/* This function determines if there are one or two characters that match
	3275	* the first character of the passed-in EXACTish node <text_node>, and if
	3276	* so, returns them in the passed-in pointers.
	3277	*
	3278	* If it determines that no possible character in the target string can
	3279	* match, it returns FALSE; otherwise TRUE. (The FALSE situation occurs if
	3280	* the first character in <text_node> requires UTF-8 to represent, and the
	3281	* target string isn't in UTF-8.)
	3282	*
	3283	* If there are more than two characters that could match the beginning of
	3284	* <text_node>, or if more context is required to determine a match or not,
	3285	* it sets both <c1p> and <c2p> to CHRTEST_VOID.
	3286	*
	3287	* The motiviation behind this function is to allow the caller to set up
	3288	* tight loops for matching. If <text_node> is of type EXACT, there is
	3289	* only one possible character that can match its first character, and so
	3290	* the situation is quite simple. But things get much more complicated if
	3291	* folding is involved. It may be that the first character of an EXACTFish
	3292	* node doesn't participate in any possible fold, e.g., punctuation, so it
	3293	* can be matched only by itself. The vast majority of characters that are
	3294	* in folds match just two things, their lower and upper-case equivalents.
	3295	* But not all are like that; some have multiple possible matches, or match
	3296	* sequences of more than one character. This function sorts all that out.
	3297	*
	3298	* Consider the patterns AB or A?B where A and B are arbitrary. In a
	3299	* loop of trying to match A*, we know we can't exit where the thing
	3300	* following it isn't a B. And something can't be a B unless it is the
	3301	* beginning of B. By putting a quick test for that beginning in a tight
	3302	* loop, we can rule out things that can't possibly be B without having to
	3303	* break out of the loop, thus avoiding work. Similarly, if A is a single
	3304	* character, we can make a tight loop matching A*, using the outputs of
	3305	* this function.
	3306	*
	3307	* If the target string to match isn't in UTF-8, and there aren't
	3308	* complications which require CHRTEST_VOID, <c1p> and <c2p> are set to
	3309	* the one or two possible octets (which are characters in this situation)
	3310	* that can match. In all cases, if there is only one character that can
	3311	* match, <c1p> and <c2p> will be identical.
	3312	*
	3313	* If the target string is in UTF-8, the buffers pointed to by <c1_utf8>
	3314	* and <c2_utf8> will contain the one or two UTF-8 sequences of bytes that
	3315	* can match the beginning of <text_node>. They should be declared with at
	3316	* least length UTF8_MAXBYTES+1. (If the target string isn't in UTF-8, it is
	3317	* undefined what these contain.) If one or both of the buffers are
	3318	* invariant under UTF-8, <c1p>, and <c2p> will also be set to the
	3319	* corresponding invariant. If variant, the corresponding *<c1p> and/or
	3320	* *<c2p> will be set to a negative number(s) that shouldn't match any code
	3321	* point (unless inappropriately coerced to unsigned). *<c1p> will equal
	3322	* <c2p> if and only if <c1_utf8> and <c2_utf8> are the same. /
	3323
	3324	const bool utf8_target = PL_reg_match_utf8;
	3325
	3326	UV c1 = CHRTEST_NOT_A_CP_1;
	3327	UV c2 = CHRTEST_NOT_A_CP_2;
	3328	bool use_chrtest_void = FALSE;
	3329
	3330	/* Used when we have both utf8 input and utf8 output, to avoid converting
	3331	* to/from code points */
	3332	bool utf8_has_been_setup = FALSE;
	3333
	3334	dVAR;
	3335
	3336	U8 pat = (U8)STRING(text_node);
	3337
	3338	if (OP(text_node) == EXACT) {
	3339
	3340	/* In an exact node, only one thing can be matched, that first
	3341	* character. If both the pat and the target are UTF-8, we can just
	3342	* copy the input to the output, avoiding finding the code point of
	3343	* that character */
	3344	if (! UTF_PATTERN) {
	3345	c2 = c1 = *pat;
	3346	}
	3347	else if (utf8_target) {
	3348	Copy(pat, c1_utf8, UTF8SKIP(pat), U8);
	3349	Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
	3350	utf8_has_been_setup = TRUE;
	3351	}
	3352	else {
	3353	c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
	3354	}
	3355	}
	3356	else /* an EXACTFish node */
	3357	if ((UTF_PATTERN
	3358	&& is_MULTI_CHAR_FOLD_utf8_safe(pat,
	3359	pat + STR_LEN(text_node)))
	3360	\|\| (! UTF_PATTERN
	3361	&& is_MULTI_CHAR_FOLD_latin1_safe(pat,
	3362	pat + STR_LEN(text_node))))
	3363	{
	3364	/* Multi-character folds require more context to sort out. Also
	3365	* PL_utf8_foldclosures used below doesn't handle them, so have to be
	3366	* handled outside this routine */
	3367	use_chrtest_void = TRUE;
	3368	}
	3369	else { /* an EXACTFish node which doesn't begin with a multi-char fold */
	3370	c1 = (UTF_PATTERN) ? valid_utf8_to_uvchr(pat, NULL) : *pat;
	3371	if (c1 > 256) {
	3372	/* Load the folds hash, if not already done */
	3373	SV** listp;
	3374	if (! PL_utf8_foldclosures) {
	3375	if (! PL_utf8_tofold) {
	3376	U8 dummy[UTF8_MAXBYTES+1];
	3377
	3378	/* Force loading this by folding an above-Latin1 char */
	3379	to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
	3380	assert(PL_utf8_tofold); /* Verify that worked */
	3381	}
	3382	PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
	3383	}
	3384
	3385	/* The fold closures data structure is a hash with the keys being
	3386	* the UTF-8 of every character that is folded to, like 'k', and
	3387	* the values each an array of all code points that fold to its
	3388	* key. e.g. [ 'k', 'K', KELVIN_SIGN ]. Multi-character folds are
	3389	* not included */
	3390	if ((! (listp = hv_fetch(PL_utf8_foldclosures,
	3391	(char *) pat,
	3392	UTF8SKIP(pat),
	3393	FALSE))))
	3394	{
	3395	/* Not found in the hash, therefore there are no folds
	3396	* containing it, so there is only a single character that
	3397	* could match */
	3398	c2 = c1;
	3399	}
	3400	else { /* Does participate in folds */
	3401	AV* list = (AV) listp;
	3402	if (av_len(list) != 1) {
	3403
	3404	/* If there aren't exactly two folds to this, it is outside
	3405	* the scope of this function */
	3406	use_chrtest_void = TRUE;
	3407	}
	3408	else { /* There are two. Get them */
	3409	SV** c_p = av_fetch(list, 0, FALSE);
	3410	if (c_p == NULL) {
	3411	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3412	}
	3413	c1 = SvUV(*c_p);
	3414
	3415	c_p = av_fetch(list, 1, FALSE);
	3416	if (c_p == NULL) {
	3417	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3418	}
	3419	c2 = SvUV(*c_p);
	3420
	3421	/* Folds that cross the 255/256 boundary are forbidden if
	3422	* EXACTFL, or EXACTFA and one is ASCIII. Since the
	3423	* pattern character is above 256, and its only other match
	3424	* is below 256, the only legal match will be to itself.
	3425	* We have thrown away the original, so have to compute
	3426	* which is the one above 255 */
	3427	if ((c1 < 256) != (c2 < 256)) {
	3428	if (OP(text_node) == EXACTFL
	3429	\|\| (OP(text_node) == EXACTFA
	3430	&& (isASCII(c1) \|\| isASCII(c2))))
	3431	{
	3432	if (c1 < 256) {
	3433	c1 = c2;
	3434	}
	3435	else {
	3436	c2 = c1;
	3437	}
	3438	}
	3439	}
	3440	}
	3441	}
	3442	}
	3443	else /* Here, c1 is < 255 */
	3444	if (utf8_target
	3445	&& HAS_NONLATIN1_FOLD_CLOSURE(c1)
	3446	&& OP(text_node) != EXACTFL
	3447	&& (OP(text_node) != EXACTFA \|\| ! isASCII(c1)))
	3448	{
	3449	/* Here, there could be something above Latin1 in the target which
	3450	* folds to this character in the pattern. All such cases except
	3451	* LATIN SMALL LETTER Y WITH DIAERESIS have more than two characters
	3452	* involved in their folds, so are outside the scope of this
	3453	* function */
	3454	if (UNLIKELY(c1 == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
	3455	c2 = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
	3456	}
	3457	else {
	3458	use_chrtest_void = TRUE;
	3459	}
	3460	}
	3461	else { /* Here nothing above Latin1 can fold to the pattern character */
	3462	switch (OP(text_node)) {
	3463
	3464	case EXACTFL: /* /l rules */
	3465	c2 = PL_fold_locale[c1];
	3466	break;
	3467
	3468	case EXACTF:
	3469	if (! utf8_target) { /* /d rules */
	3470	c2 = PL_fold[c1];
	3471	break;
	3472	}
	3473	/* FALLTHROUGH */
	3474	/* /u rules for all these. This happens to work for
	3475	* EXACTFA as nothing in Latin1 folds to ASCII */
	3476	case EXACTFA:
	3477	case EXACTFU_TRICKYFOLD:
	3478	case EXACTFU_SS:
	3479	case EXACTFU:
	3480	c2 = PL_fold_latin1[c1];
	3481	break;
	3482
	3483	default:
	3484	Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
	3485	assert(0); /* NOTREACHED */
	3486	}
	3487	}
	3488	}
	3489
	3490	/* Here have figured things out. Set up the returns */
	3491	if (use_chrtest_void) {
	3492	c2p = c1p = CHRTEST_VOID;
	3493	}
	3494	else if (utf8_target) {
	3495	if (! utf8_has_been_setup) { /* Don't have the utf8; must get it */
	3496	uvchr_to_utf8(c1_utf8, c1);
	3497	uvchr_to_utf8(c2_utf8, c2);
	3498	}
	3499
	3500	/* Invariants are stored in both the utf8 and byte outputs; Use
	3501	* negative numbers otherwise for the byte ones. Make sure that the
	3502	* byte ones are the same iff the utf8 ones are the same */
	3503	c1p = (UTF8_IS_INVARIANT(c1_utf8)) ? *c1_utf8 : CHRTEST_NOT_A_CP_1;
	3504	c2p = (UTF8_IS_INVARIANT(c2_utf8))
	3505	? *c2_utf8
	3506	: (c1 == c2)
	3507	? CHRTEST_NOT_A_CP_1
	3508	: CHRTEST_NOT_A_CP_2;
	3509	}
	3510	else if (c1 > 255) {
	3511	if (c2 > 255) { /* both possibilities are above what a non-utf8 string
	3512	can represent */
	3513	return FALSE;
	3514	}
	3515
	3516	c1p = c2p = c2; /* c2 is the only representable value */
	3517	}
	3518	else { /* c1 is representable; see about c2 */
	3519	*c1p = c1;
	3520	*c2p = (c2 < 256) ? c2 : c1;
	3521	}
	3522
	3523	return TRUE;
	3524	}
	3525
	3526	/* returns -1 on failure, $+[0] on success */
	3527	STATIC I32
	3528	S_regmatch(pTHX_ regmatch_info reginfo, char startpos, regnode *prog)
	3529	{
	3530	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3531	dMY_CXT;
	3532	#endif
	3533	dVAR;
	3534	const bool utf8_target = PL_reg_match_utf8;
	3535	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3536	REGEXP *rex_sv = reginfo->prog;
	3537	regexp *rex = ReANY(rex_sv);
	3538	RXi_GET_DECL(rex,rexi);
	3539	I32 oldsave;
	3540	/* the current state. This is a cached copy of PL_regmatch_state */
	3541	regmatch_state *st;
	3542	/* cache heavy used fields of st in registers */
	3543	regnode *scan;
	3544	regnode *next;
	3545	U32 n = 0; /* general value; init to avoid compiler warning */
	3546	I32 ln = 0; /* len or last; init to avoid compiler warning */
	3547	char *locinput = startpos;
	3548	char pushinput; / where to continue after a PUSH */
	3549	I32 nextchr; /* is always set to UCHARAT(locinput) */
	3550
	3551	bool result = 0; /* return value of S_regmatch */
	3552	int depth = 0; /* depth of backtrack stack */
	3553	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3554	const U32 max_nochange_depth =
	3555	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3556	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3557	regmatch_state yes_state = NULL; / state to pop to on success of
	3558	subpattern */
	3559	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3560	the stack on success we can update the mark_state as we go */
	3561	regmatch_state mark_state = NULL; / last mark state we have seen */
	3562	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3563	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3564	U32 state_num;
	3565	bool no_final = 0; /* prevent failure from backtracking? */
	3566	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3567	char *startpoint = locinput;
	3568	SV popmark = NULL; / are we looking for a mark? */
	3569	SV sv_commit = NULL; / last mark name seen in failure */
	3570	SV sv_yes_mark = NULL; / last mark name we have seen
	3571	during a successful match */
	3572	U32 lastopen = 0; /* last open we saw */
	3573	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3574	SV* const oreplsv = GvSV(PL_replgv);
	3575	/* these three flags are set by various ops to signal information to
	3576	* the very next op. They have a useful lifetime of exactly one loop
	3577	* iteration, and are not preserved or restored by state pushes/pops
	3578	*/
	3579	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3580	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3581	int logical = 0; /* the following EVAL is:
	3582	0: (?{...})
	3583	1: (?(?{...})X\|Y)
	3584	2: (??{...})
	3585	or the following IFMATCH/UNLESSM is:
	3586	false: plain (?=foo)
	3587	true: used as a condition: (?(?=foo))
	3588	*/
	3589	PAD* last_pad = NULL;
	3590	dMULTICALL;
	3591	I32 gimme = G_SCALAR;
	3592	CV caller_cv = NULL; / who called us */
	3593	CV last_pushed_cv = NULL; / most recently called (?{}) CV */
	3594	CHECKPOINT runops_cp; /* savestack position before executing EVAL */
	3595
	3596	#ifdef DEBUGGING
	3597	GET_RE_DEBUG_FLAGS_DECL;
	3598	#endif
	3599
	3600	/* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */
	3601	multicall_oldcatch = 0;
	3602	multicall_cv = NULL;
	3603	cx = NULL;
	3604	PERL_UNUSED_VAR(multicall_cop);
	3605	PERL_UNUSED_VAR(newsp);
	3606
	3607
	3608	PERL_ARGS_ASSERT_REGMATCH;
	3609
	3610	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3611	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3612	}));
	3613	/* on first ever call to regmatch, allocate first slab */
	3614	if (!PL_regmatch_slab) {
	3615	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3616	PL_regmatch_slab->prev = NULL;
	3617	PL_regmatch_slab->next = NULL;
	3618	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3619	}
	3620
	3621	oldsave = PL_savestack_ix;
	3622	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	3623	SAVEVPTR(PL_regmatch_slab);
	3624	SAVEVPTR(PL_regmatch_state);
	3625
	3626	/* grab next free state slot */
	3627	st = ++PL_regmatch_state;
	3628	if (st > SLAB_LAST(PL_regmatch_slab))
	3629	st = PL_regmatch_state = S_push_slab(aTHX);
	3630
	3631	/* Note that nextchr is a byte even in UTF */
	3632	SET_nextchr;
	3633	scan = prog;
	3634	while (scan != NULL) {
	3635
	3636	DEBUG_EXECUTE_r( {
	3637	SV * const prop = sv_newmortal();
	3638	regnode *rnext=regnext(scan);
	3639	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3640	regprop(rex, prop, scan);
	3641
	3642	PerlIO_printf(Perl_debug_log,
	3643	"%3"IVdf":%*s%s(%"IVdf")\n",
	3644	(IV)(scan - rexi->program), depth*2, "",
	3645	SvPVX_const(prop),
	3646	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3647	0 : (IV)(rnext - rexi->program));
	3648	});
	3649
	3650	next = scan + NEXT_OFF(scan);
	3651	if (next == scan)
	3652	next = NULL;
	3653	state_num = OP(scan);
	3654
	3655	reenter_switch:
	3656
	3657	SET_nextchr;
	3658	assert(nextchr < 256 && (nextchr >= 0 \|\| nextchr == NEXTCHR_EOS));
	3659
	3660	switch (state_num) {
	3661	case BOL: /* /^../ */
	3662	if (locinput == PL_bostr)
	3663	{
	3664	/* reginfo->till = reginfo->bol; */
	3665	break;
	3666	}
	3667	sayNO;
	3668
	3669	case MBOL: /* /^../m */
	3670	if (locinput == PL_bostr \|\|
	3671	(!NEXTCHR_IS_EOS && locinput[-1] == '\n'))
	3672	{
	3673	break;
	3674	}
	3675	sayNO;
	3676
	3677	case SBOL: /* /^../s */
	3678	if (locinput == PL_bostr)
	3679	break;
	3680	sayNO;
	3681
	3682	case GPOS: /* \G */
	3683	if (locinput == reginfo->ganch)
	3684	break;
	3685	sayNO;
	3686
	3687	case KEEPS: /* \K */
	3688	/* update the startpoint */
	3689	st->u.keeper.val = rex->offs[0].start;
	3690	rex->offs[0].start = locinput - PL_bostr;
	3691	PUSH_STATE_GOTO(KEEPS_next, next, locinput);
	3692	assert(0); /NOTREACHED/
	3693	case KEEPS_next_fail:
	3694	/* rollback the start point change */
	3695	rex->offs[0].start = st->u.keeper.val;
	3696	sayNO_SILENT;
	3697	assert(0); /NOTREACHED/
	3698
	3699	case EOL: /* /..$/ */
	3700	goto seol;
	3701
	3702	case MEOL: /* /..$/m */
	3703	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3704	sayNO;
	3705	break;
	3706
	3707	case SEOL: /* /..$/s */
	3708	seol:
	3709	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3710	sayNO;
	3711	if (PL_regeol - locinput > 1)
	3712	sayNO;
	3713	break;
	3714
	3715	case EOS: /* \z */
	3716	if (!NEXTCHR_IS_EOS)
	3717	sayNO;
	3718	break;
	3719
	3720	case SANY: /* /./s */
	3721	if (NEXTCHR_IS_EOS)
	3722	sayNO;
	3723	goto increment_locinput;
	3724
	3725	case CANY: /* \C */
	3726	if (NEXTCHR_IS_EOS)
	3727	sayNO;
	3728	locinput++;
	3729	break;
	3730
	3731	case REG_ANY: /* /./ */
	3732	if ((NEXTCHR_IS_EOS) \|\| nextchr == '\n')
	3733	sayNO;
	3734	goto increment_locinput;
	3735
	3736
	3737	#undef ST
	3738	#define ST st->u.trie
	3739	case TRIEC: /* (ab\|cd) with known charclass */
	3740	/* In this case the charclass data is available inline so
	3741	we can fail fast without a lot of extra overhead.
	3742	*/
	3743	if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
	3744	DEBUG_EXECUTE_r(
	3745	PerlIO_printf(Perl_debug_log,
	3746	"%*s %sfailed to match trie start class...%s\n",
	3747	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3748	);
	3749	sayNO_SILENT;
	3750	assert(0); /* NOTREACHED */
	3751	}
	3752	/* FALL THROUGH */
	3753	case TRIE: /* (ab\|cd) */
	3754	/* the basic plan of execution of the trie is:
	3755	* At the beginning, run though all the states, and
	3756	* find the longest-matching word. Also remember the position
	3757	* of the shortest matching word. For example, this pattern:
	3758	* 1 2 3 4 5
	3759	* ab\|a\|x\|abcd\|abc
	3760	* when matched against the string "abcde", will generate
	3761	* accept states for all words except 3, with the longest
	3762	* matching word being 4, and the shortest being 2 (with
	3763	* the position being after char 1 of the string).
	3764	*
	3765	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3766	* we run the remainder of the pattern; on each try setting
	3767	* the current position to the character following the word,
	3768	* returning to try the next word on failure.
	3769	*
	3770	* We avoid having to build a list of words at runtime by
	3771	* using a compile-time structure, wordinfo[].prev, which
	3772	* gives, for each word, the previous accepting word (if any).
	3773	* In the case above it would contain the mappings 1->2, 2->0,
	3774	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3775	* the longest word (4 above), a list of all words, by
	3776	* following the list of prev pointers; this gives us the
	3777	* unordered list 4,5,1,2. Then given the current word we have
	3778	* just tried, we can go through the list and find the
	3779	* next-biggest word to try (so if we just failed on word 2,
	3780	* the next in the list is 4).
	3781	*
	3782	* Since at runtime we don't record the matching position in
	3783	* the string for each word, we have to work that out for
	3784	* each word we're about to process. The wordinfo table holds
	3785	* the character length of each word; given that we recorded
	3786	* at the start: the position of the shortest word and its
	3787	* length in chars, we just need to move the pointer the
	3788	* difference between the two char lengths. Depending on
	3789	* Unicode status and folding, that's cheap or expensive.
	3790	*
	3791	* This algorithm is optimised for the case where are only a
	3792	* small number of accept states, i.e. 0,1, or maybe 2.
	3793	* With lots of accepts states, and having to try all of them,
	3794	* it becomes quadratic on number of accept states to find all
	3795	* the next words.
	3796	*/
	3797
	3798	{
	3799	/* what type of TRIE am I? (utf8 makes this contextual) */
	3800	DECL_TRIE_TYPE(scan);
	3801
	3802	/* what trie are we using right now */
	3803	reg_trie_data * const trie
	3804	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3805	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3806	U32 state = trie->startstate;
	3807
	3808	if ( trie->bitmap
	3809	&& (NEXTCHR_IS_EOS \|\| !TRIE_BITMAP_TEST(trie, nextchr)))
	3810	{
	3811	if (trie->states[ state ].wordnum) {
	3812	DEBUG_EXECUTE_r(
	3813	PerlIO_printf(Perl_debug_log,
	3814	"%*s %smatched empty string...%s\n",
	3815	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3816	);
	3817	if (!trie->jump)
	3818	break;
	3819	} else {
	3820	DEBUG_EXECUTE_r(
	3821	PerlIO_printf(Perl_debug_log,
	3822	"%*s %sfailed to match trie start class...%s\n",
	3823	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3824	);
	3825	sayNO_SILENT;
	3826	}
	3827	}
	3828
	3829	{
	3830	U8 uc = ( U8 )locinput;
	3831
	3832	STRLEN len = 0;
	3833	STRLEN foldlen = 0;
	3834	U8 uscan = (U8)NULL;
	3835	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3836	U32 charcount = 0; /* how many input chars we have matched */
	3837	U32 accepted = 0; /* have we seen any accepting states? */
	3838
	3839	ST.jump = trie->jump;
	3840	ST.me = scan;
	3841	ST.firstpos = NULL;
	3842	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3843	ST.nextword = 0;
	3844
	3845	/* fully traverse the TRIE; note the position of the
	3846	shortest accept state and the wordnum of the longest
	3847	accept state */
	3848
	3849	while ( state && uc <= (U8*)PL_regeol ) {
	3850	U32 base = trie->states[ state ].trans.base;
	3851	UV uvc = 0;
	3852	U16 charid = 0;
	3853	U16 wordnum;
	3854	wordnum = trie->states[ state ].wordnum;
	3855
	3856	if (wordnum) { /* it's an accept state */
	3857	if (!accepted) {
	3858	accepted = 1;
	3859	/* record first match position */
	3860	if (ST.longfold) {
	3861	ST.firstpos = (U8*)locinput;
	3862	ST.firstchars = 0;
	3863	}
	3864	else {
	3865	ST.firstpos = uc;
	3866	ST.firstchars = charcount;
	3867	}
	3868	}
	3869	if (!ST.nextword \|\| wordnum < ST.nextword)
	3870	ST.nextword = wordnum;
	3871	ST.topword = wordnum;
	3872	}
	3873
	3874	DEBUG_TRIE_EXECUTE_r({
	3875	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3876	PerlIO_printf( Perl_debug_log,
	3877	"%*s %sState: %4"UVxf" Accepted: %c ",
	3878	2+depth * 2, "", PL_colors[4],
	3879	(UV)state, (accepted ? 'Y' : 'N'));
	3880	});
	3881
	3882	/* read a char and goto next state */
	3883	if ( base && (foldlen \|\| uc < (U8*)PL_regeol)) {
	3884	I32 offset;
	3885	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3886	uscan, len, uvc, charid, foldlen,
	3887	foldbuf, uniflags);
	3888	charcount++;
	3889	if (foldlen>0)
	3890	ST.longfold = TRUE;
	3891	if (charid &&
	3892	( ((offset =
	3893	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3894
	3895	&& ((U32)offset < trie->lasttrans)
	3896	&& trie->trans[offset].check == state)
	3897	{
	3898	state = trie->trans[offset].next;
	3899	}
	3900	else {
	3901	state = 0;
	3902	}
	3903	uc += len;
	3904
	3905	}
	3906	else {
	3907	state = 0;
	3908	}
	3909	DEBUG_TRIE_EXECUTE_r(
	3910	PerlIO_printf( Perl_debug_log,
	3911	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3912	charid, uvc, (UV)state, PL_colors[5] );
	3913	);
	3914	}
	3915	if (!accepted)
	3916	sayNO;
	3917
	3918	/* calculate total number of accept states */
	3919	{
	3920	U16 w = ST.topword;
	3921	accepted = 0;
	3922	while (w) {
	3923	w = trie->wordinfo[w].prev;
	3924	accepted++;
	3925	}
	3926	ST.accepted = accepted;
	3927	}
	3928
	3929	DEBUG_EXECUTE_r(
	3930	PerlIO_printf( Perl_debug_log,
	3931	"%*s %sgot %"IVdf" possible matches%s\n",
	3932	REPORT_CODE_OFF + depth * 2, "",
	3933	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3934	);
	3935	goto trie_first_try; /* jump into the fail handler */
	3936	}}
	3937	assert(0); /* NOTREACHED */
	3938
	3939	case TRIE_next_fail: /* we failed - try next alternative */
	3940	{
	3941	U8 *uc;
	3942	if ( ST.jump) {
	3943	REGCP_UNWIND(ST.cp);
	3944	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	3945	}
	3946	if (!--ST.accepted) {
	3947	DEBUG_EXECUTE_r({
	3948	PerlIO_printf( Perl_debug_log,
	3949	"%*s %sTRIE failed...%s\n",
	3950	REPORT_CODE_OFF+depth*2, "",
	3951	PL_colors[4],
	3952	PL_colors[5] );
	3953	});
	3954	sayNO_SILENT;
	3955	}
	3956	{
	3957	/* Find next-highest word to process. Note that this code
	3958	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	3959	U16 min = 0;
	3960	U16 word;
	3961	U16 const nextword = ST.nextword;
	3962	reg_trie_wordinfo * const wordinfo
	3963	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	3964	for (word=ST.topword; word; word=wordinfo[word].prev) {
	3965	if (word > nextword && (!min \|\| word < min))
	3966	min = word;
	3967	}
	3968	ST.nextword = min;
	3969	}
	3970
	3971	trie_first_try:
	3972	if (do_cutgroup) {
	3973	do_cutgroup = 0;
	3974	no_final = 0;
	3975	}
	3976
	3977	if ( ST.jump) {
	3978	ST.lastparen = rex->lastparen;
	3979	ST.lastcloseparen = rex->lastcloseparen;
	3980	REGCP_SET(ST.cp);
	3981	}
	3982
	3983	/* find start char of end of current word */
	3984	{
	3985	U32 chars; /* how many chars to skip */
	3986	reg_trie_data * const trie
	3987	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	3988
	3989	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3990	>= ST.firstchars);
	3991	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3992	- ST.firstchars;
	3993	uc = ST.firstpos;
	3994
	3995	if (ST.longfold) {
	3996	/* the hard option - fold each char in turn and find
	3997	* its folded length (which may be different */
	3998	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	3999	STRLEN foldlen;
	4000	STRLEN len;
	4001	UV uvc;
	4002	U8 *uscan;
	4003
	4004	while (chars) {
	4005	if (utf8_target) {
	4006	uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
	4007	uniflags);
	4008	uc += len;
	4009	}
	4010	else {
	4011	uvc = *uc;
	4012	uc++;
	4013	}
	4014	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	4015	uscan = foldbuf;
	4016	while (foldlen) {
	4017	if (!--chars)
	4018	break;
	4019	uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
	4020	uniflags);
	4021	uscan += len;
	4022	foldlen -= len;
	4023	}
	4024	}
	4025	}
	4026	else {
	4027	if (utf8_target)
	4028	while (chars--)
	4029	uc += UTF8SKIP(uc);
	4030	else
	4031	uc += chars;
	4032	}
	4033	}
	4034
	4035	scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
	4036	? ST.jump[ST.nextword]
	4037	: NEXT_OFF(ST.me));
	4038
	4039	DEBUG_EXECUTE_r({
	4040	PerlIO_printf( Perl_debug_log,
	4041	"%*s %sTRIE matched word #%d, continuing%s\n",
	4042	REPORT_CODE_OFF+depth*2, "",
	4043	PL_colors[4],
	4044	ST.nextword,
	4045	PL_colors[5]
	4046	);
	4047	});
	4048
	4049	if (ST.accepted > 1 \|\| has_cutgroup) {
	4050	PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
	4051	assert(0); /* NOTREACHED */
	4052	}
	4053	/* only one choice left - just continue */
	4054	DEBUG_EXECUTE_r({
	4055	AV *const trie_words
	4056	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	4057	SV ** const tmp = av_fetch( trie_words,
	4058	ST.nextword-1, 0 );
	4059	SV *sv= tmp ? sv_newmortal() : NULL;
	4060
	4061	PerlIO_printf( Perl_debug_log,
	4062	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	4063	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	4064	ST.nextword,
	4065	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	4066	PL_colors[0], PL_colors[1],
	4067	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	4068	)
	4069	: "not compiled under -Dr",
	4070	PL_colors[5] );
	4071	});
	4072
	4073	locinput = (char*)uc;
	4074	continue; /* execute rest of RE */
	4075	assert(0); /* NOTREACHED */
	4076	}
	4077	#undef ST
	4078
	4079	case EXACT: { /* /abc/ */
	4080	char *s = STRING(scan);
	4081	ln = STR_LEN(scan);
	4082	if (utf8_target != UTF_PATTERN) {
	4083	/* The target and the pattern have differing utf8ness. */
	4084	char *l = locinput;
	4085	const char * const e = s + ln;
	4086
	4087	if (utf8_target) {
	4088	/* The target is utf8, the pattern is not utf8.
	4089	* Above-Latin1 code points can't match the pattern;
	4090	* invariants match exactly, and the other Latin1 ones need
	4091	* to be downgraded to a single byte in order to do the
	4092	* comparison. (If we could be confident that the target
	4093	* is not malformed, this could be refactored to have fewer
	4094	* tests by just assuming that if the first bytes match, it
	4095	* is an invariant, but there are tests in the test suite
	4096	* dealing with (??{...}) which violate this) */
	4097	while (s < e) {
	4098	if (l >= PL_regeol)
	4099	sayNO;
	4100	if (UTF8_IS_ABOVE_LATIN1(* (U8*) l)) {
	4101	sayNO;
	4102	}
	4103	if (UTF8_IS_INVARIANT((U8)l)) {
	4104	if (l != s) {
	4105	sayNO;
	4106	}
	4107	l++;
	4108	}
	4109	else {
	4110	if (TWO_BYTE_UTF8_TO_UNI(l, (l+1)) != * (U8*) s) {
	4111	sayNO;
	4112	}
	4113	l += 2;
	4114	}
	4115	s++;
	4116	}
	4117	}
	4118	else {
	4119	/* The target is not utf8, the pattern is utf8. */
	4120	while (s < e) {
	4121	if (l >= PL_regeol \|\| UTF8_IS_ABOVE_LATIN1(* (U8*) s))
	4122	{
	4123	sayNO;
	4124	}
	4125	if (UTF8_IS_INVARIANT((U8)s)) {
	4126	if (s != l) {
	4127	sayNO;
	4128	}
	4129	s++;
	4130	}
	4131	else {
	4132	if (TWO_BYTE_UTF8_TO_UNI(s, (s+1)) != * (U8*) l) {
	4133	sayNO;
	4134	}
	4135	s += 2;
	4136	}
	4137	l++;
	4138	}
	4139	}
	4140	locinput = l;
	4141	break;
	4142	}
	4143	/* The target and the pattern have the same utf8ness. */
	4144	/* Inline the first character, for speed. */
	4145	if (UCHARAT(s) != nextchr)
	4146	sayNO;
	4147	if (PL_regeol - locinput < ln)
	4148	sayNO;
	4149	if (ln > 1 && memNE(s, locinput, ln))
	4150	sayNO;
	4151	locinput += ln;
	4152	break;
	4153	}
	4154
	4155	case EXACTFL: { /* /abc/il */
	4156	re_fold_t folder;
	4157	const U8 * fold_array;
	4158	const char * s;
	4159	U32 fold_utf8_flags;
	4160
	4161	PL_reg_flags \|= RF_tainted;
	4162	folder = foldEQ_locale;
	4163	fold_array = PL_fold_locale;
	4164	fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
	4165	goto do_exactf;
	4166
	4167	case EXACTFU_SS: /* /\x{df}/iu */
	4168	case EXACTFU_TRICKYFOLD: /* /\x{390}/iu */
	4169	case EXACTFU: /* /abc/iu */
	4170	folder = foldEQ_latin1;
	4171	fold_array = PL_fold_latin1;
	4172	fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	4173	goto do_exactf;
	4174
	4175	case EXACTFA: /* /abc/iaa */
	4176	folder = foldEQ_latin1;
	4177	fold_array = PL_fold_latin1;
	4178	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4179	goto do_exactf;
	4180
	4181	case EXACTF: /* /abc/i */
	4182	folder = foldEQ;
	4183	fold_array = PL_fold;
	4184	fold_utf8_flags = 0;
	4185
	4186	do_exactf:
	4187	s = STRING(scan);
	4188	ln = STR_LEN(scan);
	4189
	4190	if (utf8_target \|\| UTF_PATTERN \|\| state_num == EXACTFU_SS) {
	4191	/* Either target or the pattern are utf8, or has the issue where
	4192	* the fold lengths may differ. */
	4193	const char * const l = locinput;
	4194	char *e = PL_regeol;
	4195
	4196	if (! foldEQ_utf8_flags(s, 0, ln, cBOOL(UTF_PATTERN),
	4197	l, &e, 0, utf8_target, fold_utf8_flags))
	4198	{
	4199	sayNO;
	4200	}
	4201	locinput = e;
	4202	break;
	4203	}
	4204
	4205	/* Neither the target nor the pattern are utf8 */
	4206	if (UCHARAT(s) != nextchr
	4207	&& !NEXTCHR_IS_EOS
	4208	&& UCHARAT(s) != fold_array[nextchr])
	4209	{
	4210	sayNO;
	4211	}
	4212	if (PL_regeol - locinput < ln)
	4213	sayNO;
	4214	if (ln > 1 && ! folder(s, locinput, ln))
	4215	sayNO;
	4216	locinput += ln;
	4217	break;
	4218	}
	4219
	4220	/* XXX Could improve efficiency by separating these all out using a
	4221	* macro or in-line function. At that point regcomp.c would no longer
	4222	* have to set the FLAGS fields of these */
	4223	case BOUNDL: /* /\b/l */
	4224	case NBOUNDL: /* /\B/l */
	4225	PL_reg_flags \|= RF_tainted;
	4226	/* FALL THROUGH */
	4227	case BOUND: /* /\b/ */
	4228	case BOUNDU: /* /\b/u */
	4229	case BOUNDA: /* /\b/a */
	4230	case NBOUND: /* /\B/ */
	4231	case NBOUNDU: /* /\B/u */
	4232	case NBOUNDA: /* /\B/a */
	4233	/* was last char in word? */
	4234	if (utf8_target
	4235	&& FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
	4236	&& FLAGS(scan) != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
	4237	{
	4238	if (locinput == PL_bostr)
	4239	ln = '\n';
	4240	else {
	4241	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	4242
	4243	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	4244	}
	4245	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	4246	ln = isALNUM_uni(ln);
	4247	if (NEXTCHR_IS_EOS)
	4248	n = 0;
	4249	else {
	4250	LOAD_UTF8_CHARCLASS_ALNUM();
	4251	n = swash_fetch(PL_utf8_alnum, (U8*)locinput,
	4252	utf8_target);
	4253	}
	4254	}
	4255	else {
	4256	ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
	4257	n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC_utf8((U8*)locinput);
	4258	}
	4259	}
	4260	else {
	4261
	4262	/* Here the string isn't utf8, or is utf8 and only ascii
	4263	* characters are to match \w. In the latter case looking at
	4264	* the byte just prior to the current one may be just the final
	4265	* byte of a multi-byte character. This is ok. There are two
	4266	* cases:
	4267	* 1) it is a single byte character, and then the test is doing
	4268	* just what it's supposed to.
	4269	* 2) it is a multi-byte character, in which case the final
	4270	* byte is never mistakable for ASCII, and so the test
	4271	* will say it is not a word character, which is the
	4272	* correct answer. */
	4273	ln = (locinput != PL_bostr) ?
	4274	UCHARAT(locinput - 1) : '\n';
	4275	switch (FLAGS(scan)) {
	4276	case REGEX_UNICODE_CHARSET:
	4277	ln = isWORDCHAR_L1(ln);
	4278	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
	4279	break;
	4280	case REGEX_LOCALE_CHARSET:
	4281	ln = isALNUM_LC(ln);
	4282	n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC(nextchr);
	4283	break;
	4284	case REGEX_DEPENDS_CHARSET:
	4285	ln = isALNUM(ln);
	4286	n = NEXTCHR_IS_EOS ? 0 : isALNUM(nextchr);
	4287	break;
	4288	case REGEX_ASCII_RESTRICTED_CHARSET:
	4289	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	4290	ln = isWORDCHAR_A(ln);
	4291	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_A(nextchr);
	4292	break;
	4293	default:
	4294	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	4295	break;
	4296	}
	4297	}
	4298	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	4299	* regcomp.sym */
	4300	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	4301	sayNO;
	4302	break;
	4303
	4304	case ANYOF: /* /[abc]/ */
	4305	if (NEXTCHR_IS_EOS)
	4306	sayNO;
	4307	if (utf8_target) {
	4308	if (!reginclass(rex, scan, (U8*)locinput, utf8_target))
	4309	sayNO;
	4310	locinput += UTF8SKIP(locinput);
	4311	break;
	4312	}
	4313	else {
	4314	if (!REGINCLASS(rex, scan, (U8*)locinput))
	4315	sayNO;
	4316	locinput++;
	4317	break;
	4318	}
	4319	break;
	4320
	4321	/* Special char classes: \d, \w etc.
	4322	* The defines start on line 166 or so */
	4323	CCC_TRY_U(ALNUM, NALNUM, isWORDCHAR,
	4324	ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
	4325	ALNUMU, NALNUMU, isWORDCHAR_L1,
	4326	ALNUMA, NALNUMA, isWORDCHAR_A,
	4327	alnum, "a");
	4328
	4329	case SPACEL:
	4330	PL_reg_flags \|= RF_tainted;
	4331	if (NEXTCHR_IS_EOS) {
	4332	sayNO;
	4333	}
	4334	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {
	4335	if (! isSPACE_LC_utf8((U8 *) locinput)) {
	4336	sayNO;
	4337	}
	4338	}
	4339	else if (! isSPACE_LC((U8) nextchr)) {
	4340	sayNO;
	4341	}
	4342	goto increment_locinput;
	4343
	4344	case NSPACEL:
	4345	PL_reg_flags \|= RF_tainted;
	4346	if (NEXTCHR_IS_EOS) {
	4347	sayNO;
	4348	}
	4349	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {
	4350	if (isSPACE_LC_utf8((U8 *) locinput)) {
	4351	sayNO;
	4352	}
	4353	}
	4354	else if (isSPACE_LC(nextchr)) {
	4355	sayNO;
	4356	}
	4357	goto increment_locinput;
	4358
	4359	case SPACE:
	4360	if (utf8_target) {
	4361	goto utf8_space;
	4362	}
	4363	/* FALL THROUGH */
	4364	case SPACEA:
	4365	if (NEXTCHR_IS_EOS \|\| ! isSPACE_A(nextchr)) {
	4366	sayNO;
	4367	}
	4368	/* Matched a utf8-invariant, so don't have to worry about utf8 */
	4369	locinput++;
	4370	break;
	4371
	4372	case NSPACE:
	4373	if (utf8_target) {
	4374	goto utf8_nspace;
	4375	}
	4376	/* FALL THROUGH */
	4377	case NSPACEA:
	4378	if (NEXTCHR_IS_EOS \|\| isSPACE_A(nextchr)) {
	4379	sayNO;
	4380	}
	4381	goto increment_locinput;
	4382
	4383	case SPACEU:
	4384	utf8_space:
	4385	if (NEXTCHR_IS_EOS \|\| ! is_XPERLSPACE(locinput, utf8_target)) {
	4386	sayNO;
	4387	}
	4388	goto increment_locinput;
	4389
	4390	case NSPACEU:
	4391	utf8_nspace:
	4392	if (NEXTCHR_IS_EOS \|\| is_XPERLSPACE(locinput, utf8_target)) {
	4393	sayNO;
	4394	}
	4395	goto increment_locinput;
	4396
	4397	CCC_TRY(DIGIT, NDIGIT, isDIGIT,
	4398	DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
	4399	DIGITA, NDIGITA, isDIGIT_A,
	4400	digit, "0");
	4401
	4402	case POSIXA: /* /[[:ascii:]]/ etc */
	4403	if (NEXTCHR_IS_EOS \|\| ! _generic_isCC_A(nextchr, FLAGS(scan))) {
	4404	sayNO;
	4405	}
	4406	/* Matched a utf8-invariant, so don't have to worry about utf8 */
	4407	locinput++;
	4408	break;
	4409
	4410	case NPOSIXA: /* /[^[:ascii:]]/ etc */
	4411	if (NEXTCHR_IS_EOS \|\| _generic_isCC_A(nextchr, FLAGS(scan))) {
	4412	sayNO;
	4413	}
	4414	goto increment_locinput;
	4415
	4416	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	4417	a Unicode extended Grapheme Cluster */
	4418	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	4419	extended Grapheme Cluster is:
	4420
	4421	CR LF
	4422	\| Prepend* Begin Extend*
	4423	\| .
	4424
	4425	Begin is: ( Special_Begin \| ! Control )
	4426	Special_Begin is: ( Regional-Indicator+ \| Hangul-syllable )
	4427	Extend is: ( Grapheme_Extend \| Spacing_Mark )
	4428	Control is: [ GCB_Control CR LF ]
	4429	Hangul-syllable is: ( T+ \| ( L* ( L \| ( LVT \| ( V \| LV ) V* ) T* ) ))
	4430
	4431	If we create a 'Regular_Begin' = Begin - Special_Begin, then
	4432	we can rewrite
	4433
	4434	Begin is ( Regular_Begin + Special Begin )
	4435
	4436	It turns out that 98.4% of all Unicode code points match
	4437	Regular_Begin. Doing it this way eliminates a table match in
	4438	the previous implementation for almost all Unicode code points.
	4439
	4440	There is a subtlety with Prepend* which showed up in testing.
	4441	Note that the Begin, and only the Begin is required in:
	4442	\| Prepend* Begin Extend*
	4443	Also, Begin contains '! Control'. A Prepend must be a
	4444	'! Control', which means it must also be a Begin. What it
	4445	comes down to is that if we match Prepend* and then find no
	4446	suitable Begin afterwards, that if we backtrack the last
	4447	Prepend, that one will be a suitable Begin.
	4448	*/
	4449
	4450	if (NEXTCHR_IS_EOS)
	4451	sayNO;
	4452	if (! utf8_target) {
	4453
	4454	/* Match either CR LF or '.', as all the other possibilities
	4455	* require utf8 */
	4456	locinput++; /* Match the . or CR */
	4457	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	4458	match the LF */
	4459	&& locinput < PL_regeol
	4460	&& UCHARAT(locinput) == '\n') locinput++;
	4461	}
	4462	else {
	4463
	4464	/* Utf8: See if is ( CR LF ); already know that locinput <
	4465	* PL_regeol, so locinput+1 is in bounds */
	4466	if ( nextchr == '\r' && locinput+1 < PL_regeol
	4467	&& UCHARAT(locinput + 1) == '\n')
	4468	{
	4469	locinput += 2;
	4470	}
	4471	else {
	4472	STRLEN len;
	4473
	4474	/* In case have to backtrack to beginning, then match '.' */
	4475	char *starting = locinput;
	4476
	4477	/* In case have to backtrack the last prepend */
	4478	char *previous_prepend = 0;
	4479
	4480	LOAD_UTF8_CHARCLASS_GCB();
	4481
	4482	/* Match (prepend)* */
	4483	while (locinput < PL_regeol
	4484	&& (len = is_GCB_Prepend_utf8(locinput)))
	4485	{
	4486	previous_prepend = locinput;
	4487	locinput += len;
	4488	}
	4489
	4490	/* As noted above, if we matched a prepend character, but
	4491	* the next thing won't match, back off the last prepend we
	4492	* matched, as it is guaranteed to match the begin */
	4493	if (previous_prepend
	4494	&& (locinput >= PL_regeol
	4495	\|\| (! swash_fetch(PL_utf8_X_regular_begin,
	4496	(U8*)locinput, utf8_target)
	4497	&& ! is_GCB_SPECIAL_BEGIN_utf8(locinput)))
	4498	)
	4499	{
	4500	locinput = previous_prepend;
	4501	}
	4502
	4503	/* Note that here we know PL_regeol > locinput, as we
	4504	* tested that upon input to this switch case, and if we
	4505	* moved locinput forward, we tested the result just above
	4506	* and it either passed, or we backed off so that it will
	4507	* now pass */
	4508	if (swash_fetch(PL_utf8_X_regular_begin,
	4509	(U8*)locinput, utf8_target)) {
	4510	locinput += UTF8SKIP(locinput);
	4511	}
	4512	else if (! is_GCB_SPECIAL_BEGIN_utf8(locinput)) {
	4513
	4514	/* Here did not match the required 'Begin' in the
	4515	* second term. So just match the very first
	4516	* character, the '.' of the final term of the regex */
	4517	locinput = starting + UTF8SKIP(starting);
	4518	goto exit_utf8;
	4519	} else {
	4520
	4521	/* Here is a special begin. It can be composed of
	4522	* several individual characters. One possibility is
	4523	* RI+ */
	4524	if ((len = is_GCB_RI_utf8(locinput))) {
	4525	locinput += len;
	4526	while (locinput < PL_regeol
	4527	&& (len = is_GCB_RI_utf8(locinput)))
	4528	{
	4529	locinput += len;
	4530	}
	4531	} else if ((len = is_GCB_T_utf8(locinput))) {
	4532	/* Another possibility is T+ */
	4533	locinput += len;
	4534	while (locinput < PL_regeol
	4535	&& (len = is_GCB_T_utf8(locinput)))
	4536	{
	4537	locinput += len;
	4538	}
	4539	} else {
	4540
	4541	/* Here, neither RI+ nor T+; must be some other
	4542	* Hangul. That means it is one of the others: L,
	4543	* LV, LVT or V, and matches:
	4544	* L* (L \| LVT T* \| V * V* T* \| LV V* T) /
	4545
	4546	/* Match L* */
	4547	while (locinput < PL_regeol
	4548	&& (len = is_GCB_L_utf8(locinput)))
	4549	{
	4550	locinput += len;
	4551	}
	4552
	4553	/* Here, have exhausted L*. If the next character
	4554	* is not an LV, LVT nor V, it means we had to have
	4555	* at least one L, so matches L+ in the original
	4556	* equation, we have a complete hangul syllable.
	4557	* Are done. */
	4558
	4559	if (locinput < PL_regeol
	4560	&& is_GCB_LV_LVT_V_utf8(locinput))
	4561	{
	4562
	4563	/* Otherwise keep going. Must be LV, LVT or V.
	4564	* See if LVT */
	4565	if (is_utf8_X_LVT((U8*)locinput)) {
	4566	locinput += UTF8SKIP(locinput);
	4567	} else {
	4568
	4569	/* Must be V or LV. Take it, then match
	4570	* V* */
	4571	locinput += UTF8SKIP(locinput);
	4572	while (locinput < PL_regeol
	4573	&& (len = is_GCB_V_utf8(locinput)))
	4574	{
	4575	locinput += len;
	4576	}
	4577	}
	4578
	4579	/* And any of LV, LVT, or V can be followed
	4580	* by T* */
	4581	while (locinput < PL_regeol
	4582	&& (len = is_GCB_T_utf8(locinput)))
	4583	{
	4584	locinput += len;
	4585	}
	4586	}
	4587	}
	4588	}
	4589
	4590	/* Match any extender */
	4591	while (locinput < PL_regeol
	4592	&& swash_fetch(PL_utf8_X_extend,
	4593	(U8*)locinput, utf8_target))
	4594	{
	4595	locinput += UTF8SKIP(locinput);
	4596	}
	4597	}
	4598	exit_utf8:
	4599	if (locinput > PL_regeol) sayNO;
	4600	}
	4601	break;
	4602
	4603	case NREFFL: /* /\g{name}/il */
	4604	{ /* The capture buffer cases. The ones beginning with N for the
	4605	named buffers just convert to the equivalent numbered and
	4606	pretend they were called as the corresponding numbered buffer
	4607	op. */
	4608	/* don't initialize these in the declaration, it makes C++
	4609	unhappy */
	4610	char *s;
	4611	char type;
	4612	re_fold_t folder;
	4613	const U8 *fold_array;
	4614	UV utf8_fold_flags;
	4615
	4616	PL_reg_flags \|= RF_tainted;
	4617	folder = foldEQ_locale;
	4618	fold_array = PL_fold_locale;
	4619	type = REFFL;
	4620	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4621	goto do_nref;
	4622
	4623	case NREFFA: /* /\g{name}/iaa */
	4624	folder = foldEQ_latin1;
	4625	fold_array = PL_fold_latin1;
	4626	type = REFFA;
	4627	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4628	goto do_nref;
	4629
	4630	case NREFFU: /* /\g{name}/iu */
	4631	folder = foldEQ_latin1;
	4632	fold_array = PL_fold_latin1;
	4633	type = REFFU;
	4634	utf8_fold_flags = 0;
	4635	goto do_nref;
	4636
	4637	case NREFF: /* /\g{name}/i */
	4638	folder = foldEQ;
	4639	fold_array = PL_fold;
	4640	type = REFF;
	4641	utf8_fold_flags = 0;
	4642	goto do_nref;
	4643
	4644	case NREF: /* /\g{name}/ */
	4645	type = REF;
	4646	folder = NULL;
	4647	fold_array = NULL;
	4648	utf8_fold_flags = 0;
	4649	do_nref:
	4650
	4651	/* For the named back references, find the corresponding buffer
	4652	* number */
	4653	n = reg_check_named_buff_matched(rex,scan);
	4654
	4655	if ( ! n ) {
	4656	sayNO;
	4657	}
	4658	goto do_nref_ref_common;
	4659
	4660	case REFFL: /* /\1/il */
	4661	PL_reg_flags \|= RF_tainted;
	4662	folder = foldEQ_locale;
	4663	fold_array = PL_fold_locale;
	4664	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4665	goto do_ref;
	4666
	4667	case REFFA: /* /\1/iaa */
	4668	folder = foldEQ_latin1;
	4669	fold_array = PL_fold_latin1;
	4670	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4671	goto do_ref;
	4672
	4673	case REFFU: /* /\1/iu */
	4674	folder = foldEQ_latin1;
	4675	fold_array = PL_fold_latin1;
	4676	utf8_fold_flags = 0;
	4677	goto do_ref;
	4678
	4679	case REFF: /* /\1/i */
	4680	folder = foldEQ;
	4681	fold_array = PL_fold;
	4682	utf8_fold_flags = 0;
	4683	goto do_ref;
	4684
	4685	case REF: /* /\1/ */
	4686	folder = NULL;
	4687	fold_array = NULL;
	4688	utf8_fold_flags = 0;
	4689
	4690	do_ref:
	4691	type = OP(scan);
	4692	n = ARG(scan); /* which paren pair */
	4693
	4694	do_nref_ref_common:
	4695	ln = rex->offs[n].start;
	4696	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4697	if (rex->lastparen < n \|\| ln == -1)
	4698	sayNO; /* Do not match unless seen CLOSEn. */
	4699	if (ln == rex->offs[n].end)
	4700	break;
	4701
	4702	s = PL_bostr + ln;
	4703	if (type != REF /* REF can do byte comparison */
	4704	&& (utf8_target \|\| type == REFFU))
	4705	{ /* XXX handle REFFL better */
	4706	char * limit = PL_regeol;
	4707
	4708	/* This call case insensitively compares the entire buffer
	4709	* at s, with the current input starting at locinput, but
	4710	* not going off the end given by PL_regeol, and returns in
	4711	* <limit> upon success, how much of the current input was
	4712	* matched */
	4713	if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
	4714	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	4715	{
	4716	sayNO;
	4717	}
	4718	locinput = limit;
	4719	break;
	4720	}
	4721
	4722	/* Not utf8: Inline the first character, for speed. */
	4723	if (!NEXTCHR_IS_EOS &&
	4724	UCHARAT(s) != nextchr &&
	4725	(type == REF \|\|
	4726	UCHARAT(s) != fold_array[nextchr]))
	4727	sayNO;
	4728	ln = rex->offs[n].end - ln;
	4729	if (locinput + ln > PL_regeol)
	4730	sayNO;
	4731	if (ln > 1 && (type == REF
	4732	? memNE(s, locinput, ln)
	4733	: ! folder(s, locinput, ln)))
	4734	sayNO;
	4735	locinput += ln;
	4736	break;
	4737	}
	4738
	4739	case NOTHING: /* null op; e.g. the 'nothing' following
	4740	* the '' in m{(a+\|b)}' */
	4741	break;
	4742	case TAIL: /* placeholder while compiling (A\|B\|C) */
	4743	break;
	4744
	4745	case BACK: /* ??? doesn't appear to be used ??? */
	4746	break;
	4747
	4748	#undef ST
	4749	#define ST st->u.eval
	4750	{
	4751	SV *ret;
	4752	REGEXP *re_sv;
	4753	regexp *re;
	4754	regexp_internal *rei;
	4755	regnode *startpoint;
	4756
	4757	case GOSTART: /* (?R) */
	4758	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4759	if (cur_eval && cur_eval->locinput==locinput) {
	4760	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4761	Perl_croak(aTHX_ "Infinite recursion in regex");
	4762	if ( ++nochange_depth > max_nochange_depth )
	4763	Perl_croak(aTHX_
	4764	"Pattern subroutine nesting without pos change"
	4765	" exceeded limit in regex");
	4766	} else {
	4767	nochange_depth = 0;
	4768	}
	4769	re_sv = rex_sv;
	4770	re = rex;
	4771	rei = rexi;
	4772	if (OP(scan)==GOSUB) {
	4773	startpoint = scan + ARG2L(scan);
	4774	ST.close_paren = ARG(scan);
	4775	} else {
	4776	startpoint = rei->program+1;
	4777	ST.close_paren = 0;
	4778	}
	4779	goto eval_recurse_doit;
	4780	assert(0); /* NOTREACHED */
	4781
	4782	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4783	if (cur_eval && cur_eval->locinput==locinput) {
	4784	if ( ++nochange_depth > max_nochange_depth )
	4785	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4786	} else {
	4787	nochange_depth = 0;
	4788	}
	4789	{
	4790	/* execute the code in the {...} */
	4791
	4792	dSP;
	4793	PADOFFSET before;
	4794	OP * const oop = PL_op;
	4795	COP * const ocurcop = PL_curcop;
	4796	OP *nop;
	4797	char *saved_regeol = PL_regeol;
	4798	struct re_save_state saved_state;
	4799	CV *newcv;
	4800
	4801	/* save all paren positions */
	4802	regcppush(rex, 0);
	4803	REGCP_SET(runops_cp);
	4804
	4805	/* To not corrupt the existing regex state while executing the
	4806	* eval we would normally put it on the save stack, like with
	4807	* save_re_context. However, re-evals have a weird scoping so we
	4808	* can't just add ENTER/LEAVE here. With that, things like
	4809	*
	4810	* (?{$a=2})(a(?{local$a=$a+1}))aakc(?{$b=$a})
	4811	*
	4812	* would break, as they expect the localisation to be unwound
	4813	* only when the re-engine backtracks through the bit that
	4814	* localised it.
	4815	*
	4816	* What we do instead is just saving the state in a local c
	4817	* variable.
	4818	*/
	4819	Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
	4820
	4821	PL_reg_state.re_reparsing = FALSE;
	4822
	4823	if (!caller_cv)
	4824	caller_cv = find_runcv(NULL);
	4825
	4826	n = ARG(scan);
	4827
	4828	if (rexi->data->what[n] == 'r') { /* code from an external qr */
	4829	newcv = (ReANY(
	4830	(REGEXP*)(rexi->data->data[n])
	4831	))->qr_anoncv
	4832	;
	4833	nop = (OP*)rexi->data->data[n+1];
	4834	}
	4835	else if (rexi->data->what[n] == 'l') { /* literal code */
	4836	newcv = caller_cv;
	4837	nop = (OP*)rexi->data->data[n];
	4838	assert(CvDEPTH(newcv));
	4839	}
	4840	else {
	4841	/* literal with own CV */
	4842	assert(rexi->data->what[n] == 'L');
	4843	newcv = rex->qr_anoncv;
	4844	nop = (OP*)rexi->data->data[n];
	4845	}
	4846
	4847	/* normally if we're about to execute code from the same
	4848	* CV that we used previously, we just use the existing
	4849	* CX stack entry. However, its possible that in the
	4850	* meantime we may have backtracked, popped from the save
	4851	* stack, and undone the SAVECOMPPAD(s) associated with
	4852	* PUSH_MULTICALL; in which case PL_comppad no longer
	4853	* points to newcv's pad. */
	4854	if (newcv != last_pushed_cv \|\| PL_comppad != last_pad)
	4855	{
	4856	I32 depth = (newcv == caller_cv) ? 0 : 1;
	4857	if (last_pushed_cv) {
	4858	CHANGE_MULTICALL_WITHDEPTH(newcv, depth);
	4859	}
	4860	else {
	4861	PUSH_MULTICALL_WITHDEPTH(newcv, depth);
	4862	}
	4863	last_pushed_cv = newcv;
	4864	}
	4865	else {
	4866	/* these assignments are just to silence compiler
	4867	* warnings */
	4868	multicall_cop = NULL;
	4869	newsp = NULL;
	4870	}
	4871	last_pad = PL_comppad;
	4872
	4873	/* the initial nextstate you would normally execute
	4874	* at the start of an eval (which would cause error
	4875	* messages to come from the eval), may be optimised
	4876	* away from the execution path in the regex code blocks;
	4877	* so manually set PL_curcop to it initially */
	4878	{
	4879	OP *o = cUNOPx(nop)->op_first;
	4880	assert(o->op_type == OP_NULL);
	4881	if (o->op_targ == OP_SCOPE) {
	4882	o = cUNOPo->op_first;
	4883	}
	4884	else {
	4885	assert(o->op_targ == OP_LEAVE);
	4886	o = cUNOPo->op_first;
	4887	assert(o->op_type == OP_ENTER);
	4888	o = o->op_sibling;
	4889	}
	4890
	4891	if (o->op_type != OP_STUB) {
	4892	assert( o->op_type == OP_NEXTSTATE
	4893	\|\| o->op_type == OP_DBSTATE
	4894	\|\| (o->op_type == OP_NULL
	4895	&& ( o->op_targ == OP_NEXTSTATE
	4896	\|\| o->op_targ == OP_DBSTATE
	4897	)
	4898	)
	4899	);
	4900	PL_curcop = (COP*)o;
	4901	}
	4902	}
	4903	nop = nop->op_next;
	4904
	4905	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	4906	" re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) );
	4907
	4908	rex->offs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	4909
	4910	if (sv_yes_mark) {
	4911	SV *sv_mrk = get_sv("REGMARK", 1);
	4912	sv_setsv(sv_mrk, sv_yes_mark);
	4913	}
	4914
	4915	/* we don't use MULTICALL here as we want to call the
	4916	* first op of the block of interest, rather than the
	4917	* first op of the sub */
	4918	before = SP-PL_stack_base;
	4919	PL_op = nop;
	4920	CALLRUNOPS(aTHX); /* Scalar context. */
	4921	SPAGAIN;
	4922	if (SP-PL_stack_base == before)
	4923	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	4924	else {
	4925	ret = POPs;
	4926	PUTBACK;
	4927	}
	4928
	4929	/* before restoring everything, evaluate the returned
	4930	* value, so that 'uninit' warnings don't use the wrong
	4931	* PL_op or pad. Also need to process any magic vars
	4932	* (e.g. $1) before parentheses are restored */
	4933
	4934	PL_op = NULL;
	4935
	4936	re_sv = NULL;
	4937	if (logical == 0) /* (?{})/ */
	4938	sv_setsv(save_scalar(PL_replgv), ret); /* $^R */
	4939	else if (logical == 1) { /* /(?(?{...})X\|Y)/ */
	4940	sw = cBOOL(SvTRUE(ret));
	4941	logical = 0;
	4942	}
	4943	else { /* /(??{}) */
	4944	/* if its overloaded, let the regex compiler handle
	4945	* it; otherwise extract regex, or stringify */
	4946	if (!SvAMAGIC(ret)) {
	4947	SV *sv = ret;
	4948	if (SvROK(sv))
	4949	sv = SvRV(sv);
	4950	if (SvTYPE(sv) == SVt_REGEXP)
	4951	re_sv = (REGEXP*) sv;
	4952	else if (SvSMAGICAL(sv)) {
	4953	MAGIC *mg = mg_find(sv, PERL_MAGIC_qr);
	4954	if (mg)
	4955	re_sv = (REGEXP *) mg->mg_obj;
	4956	}
	4957
	4958	/* force any magic, undef warnings here */
	4959	if (!re_sv) {
	4960	ret = sv_mortalcopy(ret);
	4961	(void) SvPV_force_nolen(ret);
	4962	}
	4963	}
	4964
	4965	}
	4966
	4967	Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
	4968
	4969	/* *** Note that at this point we don't restore
	4970	* PL_comppad, (or pop the CxSUB) on the assumption it may
	4971	* be used again soon. This is safe as long as nothing
	4972	* in the regexp code uses the pad ! */
	4973	PL_op = oop;
	4974	PL_curcop = ocurcop;
	4975	PL_regeol = saved_regeol;
	4976	S_regcp_restore(aTHX_ rex, runops_cp);
	4977
	4978	if (logical != 2)
	4979	break;
	4980	}
	4981
	4982	/* only /(??{})/ from now on */
	4983	logical = 0;
	4984	{
	4985	/* extract RE object from returned value; compiling if
	4986	* necessary */
	4987
	4988	if (re_sv) {
	4989	re_sv = reg_temp_copy(NULL, re_sv);
	4990	}
	4991	else {
	4992	U32 pm_flags = 0;
	4993	const I32 osize = PL_regsize;
	4994
	4995	if (SvUTF8(ret) && IN_BYTES) {
	4996	/* In use 'bytes': make a copy of the octet
	4997	* sequence, but without the flag on */
	4998	STRLEN len;
	4999	const char *const p = SvPV(ret, len);
	5000	ret = newSVpvn_flags(p, len, SVs_TEMP);
	5001	}
	5002	if (rex->intflags & PREGf_USE_RE_EVAL)
	5003	pm_flags \|= PMf_USE_RE_EVAL;
	5004
	5005	/* if we got here, it should be an engine which
	5006	* supports compiling code blocks and stuff */
	5007	assert(rex->engine && rex->engine->op_comp);
	5008	assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
	5009	re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
	5010	rex->engine, NULL, NULL,
	5011	/* copy /msix etc to inner pattern */
	5012	scan->flags,
	5013	pm_flags);
	5014
	5015	if (!(SvFLAGS(ret)
	5016	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	5017	\| SVs_GMG))) {
	5018	/* This isn't a first class regexp. Instead, it's
	5019	caching a regexp onto an existing, Perl visible
	5020	scalar. */
	5021	sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
	5022	}
	5023	PL_regsize = osize;
	5024	/* safe to do now that any $1 etc has been
	5025	* interpolated into the new pattern string and
	5026	* compiled */
	5027	S_regcp_restore(aTHX_ rex, runops_cp);
	5028	}
	5029	SAVEFREESV(re_sv);
	5030	re = ReANY(re_sv);
	5031	}
	5032	RXp_MATCH_COPIED_off(re);
	5033	re->subbeg = rex->subbeg;
	5034	re->sublen = rex->sublen;
	5035	re->suboffset = rex->suboffset;
	5036	re->subcoffset = rex->subcoffset;
	5037	rei = RXi_GET(re);
	5038	DEBUG_EXECUTE_r(
	5039	debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
	5040	"Matching embedded");
	5041	);
	5042	startpoint = rei->program + 1;
	5043	ST.close_paren = 0; /* only used for GOSUB */
	5044
	5045	eval_recurse_doit: /* Share code with GOSUB below this line */
	5046	/* run the pattern returned from (??{...}) */
	5047	ST.cp = regcppush(rex, 0); /* Save all the positions. */
	5048	REGCP_SET(ST.lastcp);
	5049
	5050	re->lastparen = 0;
	5051	re->lastcloseparen = 0;
	5052
	5053	PL_regsize = 0;
	5054
	5055	/* XXXX This is too dramatic a measure... */
	5056	PL_reg_maxiter = 0;
	5057
	5058	ST.toggle_reg_flags = PL_reg_flags;
	5059	if (RX_UTF8(re_sv))
	5060	PL_reg_flags \|= RF_utf8;
	5061	else
	5062	PL_reg_flags &= ~RF_utf8;
	5063	ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
	5064
	5065	ST.prev_rex = rex_sv;
	5066	ST.prev_curlyx = cur_curlyx;
	5067	rex_sv = re_sv;
	5068	SET_reg_curpm(rex_sv);
	5069	rex = re;
	5070	rexi = rei;
	5071	cur_curlyx = NULL;
	5072	ST.B = next;
	5073	ST.prev_eval = cur_eval;
	5074	cur_eval = st;
	5075	/* now continue from first node in postoned RE */
	5076	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
	5077	assert(0); /* NOTREACHED */
	5078	}
	5079
	5080	case EVAL_AB: /* cleanup after a successful (??{A})B */
	5081	/* note: this is called twice; first after popping B, then A */
	5082	PL_reg_flags ^= ST.toggle_reg_flags;
	5083	rex_sv = ST.prev_rex;
	5084	SET_reg_curpm(rex_sv);
	5085	rex = ReANY(rex_sv);
	5086	rexi = RXi_GET(rex);
	5087	regcpblow(ST.cp);
	5088	cur_eval = ST.prev_eval;
	5089	cur_curlyx = ST.prev_curlyx;
	5090
	5091	/* XXXX This is too dramatic a measure... */
	5092	PL_reg_maxiter = 0;
	5093	if ( nochange_depth )
	5094	nochange_depth--;
	5095	sayYES;
	5096
	5097
	5098	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	5099	/* note: this is called twice; first after popping B, then A */
	5100	PL_reg_flags ^= ST.toggle_reg_flags;
	5101	rex_sv = ST.prev_rex;
	5102	SET_reg_curpm(rex_sv);
	5103	rex = ReANY(rex_sv);
	5104	rexi = RXi_GET(rex);
	5105
	5106	REGCP_UNWIND(ST.lastcp);
	5107	regcppop(rex);
	5108	cur_eval = ST.prev_eval;
	5109	cur_curlyx = ST.prev_curlyx;
	5110	/* XXXX This is too dramatic a measure... */
	5111	PL_reg_maxiter = 0;
	5112	if ( nochange_depth )
	5113	nochange_depth--;
	5114	sayNO_SILENT;
	5115	#undef ST
	5116
	5117	case OPEN: /* ( */
	5118	n = ARG(scan); /* which paren pair */
	5119	rex->offs[n].start_tmp = locinput - PL_bostr;
	5120	if (n > PL_regsize)
	5121	PL_regsize = n;
	5122	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	5123	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf" tmp; regsize=%"UVuf"\n",
	5124	PTR2UV(rex),
	5125	PTR2UV(rex->offs),
	5126	(UV)n,
	5127	(IV)rex->offs[n].start_tmp,
	5128	(UV)PL_regsize
	5129	));
	5130	lastopen = n;
	5131	break;
	5132
	5133	/* XXX really need to log other places start/end are set too */
	5134	#define CLOSE_CAPTURE \
	5135	rex->offs[n].start = rex->offs[n].start_tmp; \
	5136	rex->offs[n].end = locinput - PL_bostr; \
	5137	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, \
	5138	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf"..%"IVdf"\n", \
	5139	PTR2UV(rex), \
	5140	PTR2UV(rex->offs), \
	5141	(UV)n, \
	5142	(IV)rex->offs[n].start, \
	5143	(IV)rex->offs[n].end \
	5144	))
	5145
	5146	case CLOSE: /* ) */
	5147	n = ARG(scan); /* which paren pair */
	5148	CLOSE_CAPTURE;
	5149	/*if (n > PL_regsize)
	5150	PL_regsize = n;*/
	5151	if (n > rex->lastparen)
	5152	rex->lastparen = n;
	5153	rex->lastcloseparen = n;
	5154	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	5155	goto fake_end;
	5156	}
	5157	break;
	5158
	5159	case ACCEPT: /* (ACCEPT) /
	5160	if (ARG(scan)){
	5161	regnode *cursor;
	5162	for (cursor=scan;
	5163	cursor && OP(cursor)!=END;
	5164	cursor=regnext(cursor))
	5165	{
	5166	if ( OP(cursor)==CLOSE ){
	5167	n = ARG(cursor);
	5168	if ( n <= lastopen ) {
	5169	CLOSE_CAPTURE;
	5170	/*if (n > PL_regsize)
	5171	PL_regsize = n;*/
	5172	if (n > rex->lastparen)
	5173	rex->lastparen = n;
	5174	rex->lastcloseparen = n;
	5175	if ( n == ARG(scan) \|\| (cur_eval &&
	5176	cur_eval->u.eval.close_paren == n))
	5177	break;
	5178	}
	5179	}
	5180	}
	5181	}
	5182	goto fake_end;
	5183	/NOTREACHED/
	5184
	5185	case GROUPP: /* (?(1)) */
	5186	n = ARG(scan); /* which paren pair */
	5187	sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
	5188	break;
	5189
	5190	case NGROUPP: /* (?(<name>)) */
	5191	/* reg_check_named_buff_matched returns 0 for no match */
	5192	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	5193	break;
	5194
	5195	case INSUBP: /* (?(R)) */
	5196	n = ARG(scan);
	5197	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	5198	break;
	5199
	5200	case DEFINEP: /* (?(DEFINE)) */
	5201	sw = 0;
	5202	break;
	5203
	5204	case IFTHEN: /* (?(cond)A\|B) */
	5205	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	5206	if (sw)
	5207	next = NEXTOPER(NEXTOPER(scan));
	5208	else {
	5209	next = scan + ARG(scan);
	5210	if (OP(next) == IFTHEN) /* Fake one. */
	5211	next = NEXTOPER(NEXTOPER(next));
	5212	}
	5213	break;
	5214
	5215	case LOGICAL: /* modifier for EVAL and IFMATCH */
	5216	logical = scan->flags;
	5217	break;
	5218
	5219	/*******************************************************************
	5220
	5221	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	5222	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	5223	STAR/PLUS/CURLY/CURLYN are used instead.)
	5224
	5225	A*B is compiled as <CURLYX><A><WHILEM><B>
	5226
	5227	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	5228	state, which contains the current count, initialised to -1. It also sets
	5229	cur_curlyx to point to this state, with any previous value saved in the
	5230	state block.
	5231
	5232	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	5233	since the pattern may possibly match zero times (i.e. it's a while {} loop
	5234	rather than a do {} while loop).
	5235
	5236	Each entry to WHILEM represents a successful match of A. The count in the
	5237	CURLYX block is incremented, another WHILEM state is pushed, and execution
	5238	passes to A or B depending on greediness and the current count.
	5239
	5240	For example, if matching against the string a1a2a3b (where the aN are
	5241	substrings that match /A/), then the match progresses as follows: (the
	5242	pushed states are interspersed with the bits of strings matched so far):
	5243
	5244	<CURLYX cnt=-1>
	5245	<CURLYX cnt=0><WHILEM>
	5246	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	5247	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	5248	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	5249	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	5250
	5251	(Contrast this with something like CURLYM, which maintains only a single
	5252	backtrack state:
	5253
	5254	<CURLYM cnt=0> a1
	5255	a1 <CURLYM cnt=1> a2
	5256	a1 a2 <CURLYM cnt=2> a3
	5257	a1 a2 a3 <CURLYM cnt=3> b
	5258	)
	5259
	5260	Each WHILEM state block marks a point to backtrack to upon partial failure
	5261	of A or B, and also contains some minor state data related to that
	5262	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	5263	overall state, such as the count, and pointers to the A and B ops.
	5264
	5265	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	5266	must always point to the current CURLYX block, the rules are:
	5267
	5268	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	5269	and set cur_curlyx to point the new block.
	5270
	5271	When popping the CURLYX block after a successful or unsuccessful match,
	5272	restore the previous cur_curlyx.
	5273
	5274	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	5275	to the outer one saved in the CURLYX block.
	5276
	5277	When popping the WHILEM block after a successful or unsuccessful B match,
	5278	restore the previous cur_curlyx.
	5279
	5280	Here's an example for the pattern (AI* BI)*BO
	5281	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	5282
	5283	cur_
	5284	curlyx backtrack stack
	5285	------ ---------------
	5286	NULL
	5287	CO <CO prev=NULL> <WO>
	5288	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5289	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5290	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	5291
	5292	At this point the pattern succeeds, and we work back down the stack to
	5293	clean up, restoring as we go:
	5294
	5295	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5296	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5297	CO <CO prev=NULL> <WO>
	5298	NULL
	5299
	5300	*******************************************************************/
	5301
	5302	#define ST st->u.curlyx
	5303
	5304	case CURLYX: /* start of /AB/ (for complex A) /
	5305	{
	5306	/* No need to save/restore up to this paren */
	5307	I32 parenfloor = scan->flags;
	5308
	5309	assert(next); /* keep Coverity happy */
	5310	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	5311	next += ARG(next);
	5312
	5313	/* XXXX Probably it is better to teach regpush to support
	5314	parenfloor > PL_regsize... */
	5315	if (parenfloor > (I32)rex->lastparen)
	5316	parenfloor = rex->lastparen; /* Pessimization... */
	5317
	5318	ST.prev_curlyx= cur_curlyx;
	5319	cur_curlyx = st;
	5320	ST.cp = PL_savestack_ix;
	5321
	5322	/* these fields contain the state of the current curly.
	5323	* they are accessed by subsequent WHILEMs */
	5324	ST.parenfloor = parenfloor;
	5325	ST.me = scan;
	5326	ST.B = next;
	5327	ST.minmod = minmod;
	5328	minmod = 0;
	5329	ST.count = -1; /* this will be updated by WHILEM */
	5330	ST.lastloc = NULL; /* this will be updated by WHILEM */
	5331
	5332	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
	5333	assert(0); /* NOTREACHED */
	5334	}
	5335
	5336	case CURLYX_end: /* just finished matching all of AB /
	5337	cur_curlyx = ST.prev_curlyx;
	5338	sayYES;
	5339	assert(0); /* NOTREACHED */
	5340
	5341	case CURLYX_end_fail: /* just failed to match all of AB /
	5342	regcpblow(ST.cp);
	5343	cur_curlyx = ST.prev_curlyx;
	5344	sayNO;
	5345	assert(0); /* NOTREACHED */
	5346
	5347
	5348	#undef ST
	5349	#define ST st->u.whilem
	5350
	5351	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	5352	{
	5353	/* see the discussion above about CURLYX/WHILEM */
	5354	I32 n;
	5355	int min = ARG1(cur_curlyx->u.curlyx.me);
	5356	int max = ARG2(cur_curlyx->u.curlyx.me);
	5357	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	5358
	5359	assert(cur_curlyx); /* keep Coverity happy */
	5360	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	5361	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	5362	ST.cache_offset = 0;
	5363	ST.cache_mask = 0;
	5364
	5365
	5366	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5367	"%*s whilem: matched %ld out of %d..%d\n",
	5368	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	5369	);
	5370
	5371	/* First just match a string of min A's. */
	5372
	5373	if (n < min) {
	5374	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
	5375	cur_curlyx->u.curlyx.lastloc = locinput;
	5376	REGCP_SET(ST.lastcp);
	5377
	5378	PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
	5379	assert(0); /* NOTREACHED */
	5380	}
	5381
	5382	/* If degenerate A matches "", assume A done. */
	5383
	5384	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	5385	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5386	"%*s whilem: empty match detected, trying continuation...\n",
	5387	REPORT_CODE_OFF+depth*2, "")
	5388	);
	5389	goto do_whilem_B_max;
	5390	}
	5391
	5392	/* super-linear cache processing */
	5393
	5394	if (scan->flags) {
	5395
	5396	if (!PL_reg_maxiter) {
	5397	/* start the countdown: Postpone detection until we
	5398	* know the match is not that much linear. */
	5399	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	5400	/* possible overflow for long strings and many CURLYX's */
	5401	if (PL_reg_maxiter < 0)
	5402	PL_reg_maxiter = I32_MAX;
	5403	PL_reg_leftiter = PL_reg_maxiter;
	5404	}
	5405
	5406	if (PL_reg_leftiter-- == 0) {
	5407	/* initialise cache */
	5408	const I32 size = (PL_reg_maxiter + 7)/8;
	5409	if (PL_reg_poscache) {
	5410	if ((I32)PL_reg_poscache_size < size) {
	5411	Renew(PL_reg_poscache, size, char);
	5412	PL_reg_poscache_size = size;
	5413	}
	5414	Zero(PL_reg_poscache, size, char);
	5415	}
	5416	else {
	5417	PL_reg_poscache_size = size;
	5418	Newxz(PL_reg_poscache, size, char);
	5419	}
	5420	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5421	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	5422	PL_colors[4], PL_colors[5])
	5423	);
	5424	}
	5425
	5426	if (PL_reg_leftiter < 0) {
	5427	/* have we already failed at this position? */
	5428	I32 offset, mask;
	5429	offset = (scan->flags & 0xf) - 1
	5430	+ (locinput - PL_bostr) * (scan->flags>>4);
	5431	mask = 1 << (offset % 8);
	5432	offset /= 8;
	5433	if (PL_reg_poscache[offset] & mask) {
	5434	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5435	"%*s whilem: (cache) already tried at this position...\n",
	5436	REPORT_CODE_OFF+depth*2, "")
	5437	);
	5438	sayNO; /* cache records failure */
	5439	}
	5440	ST.cache_offset = offset;
	5441	ST.cache_mask = mask;
	5442	}
	5443	}
	5444
	5445	/* Prefer B over A for minimal matching. */
	5446
	5447	if (cur_curlyx->u.curlyx.minmod) {
	5448	ST.save_curlyx = cur_curlyx;
	5449	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5450	ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor);
	5451	REGCP_SET(ST.lastcp);
	5452	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
	5453	locinput);
	5454	assert(0); /* NOTREACHED */
	5455	}
	5456
	5457	/* Prefer A over B for maximal matching. */
	5458
	5459	if (n < max) { /* More greed allowed? */
	5460	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
	5461	cur_curlyx->u.curlyx.lastloc = locinput;
	5462	REGCP_SET(ST.lastcp);
	5463	PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
	5464	assert(0); /* NOTREACHED */
	5465	}
	5466	goto do_whilem_B_max;
	5467	}
	5468	assert(0); /* NOTREACHED */
	5469
	5470	case WHILEM_B_min: /* just matched B in a minimal match */
	5471	case WHILEM_B_max: /* just matched B in a maximal match */
	5472	cur_curlyx = ST.save_curlyx;
	5473	sayYES;
	5474	assert(0); /* NOTREACHED */
	5475
	5476	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	5477	cur_curlyx = ST.save_curlyx;
	5478	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5479	cur_curlyx->u.curlyx.count--;
	5480	CACHEsayNO;
	5481	assert(0); /* NOTREACHED */
	5482
	5483	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	5484	/* FALL THROUGH */
	5485	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	5486	REGCP_UNWIND(ST.lastcp);
	5487	regcppop(rex);
	5488	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5489	cur_curlyx->u.curlyx.count--;
	5490	CACHEsayNO;
	5491	assert(0); /* NOTREACHED */
	5492
	5493	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	5494	REGCP_UNWIND(ST.lastcp);
	5495	regcppop(rex); /* Restore some previous $<digit>s? */
	5496	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5497	"%*s whilem: failed, trying continuation...\n",
	5498	REPORT_CODE_OFF+depth*2, "")
	5499	);
	5500	do_whilem_B_max:
	5501	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5502	&& ckWARN(WARN_REGEXP)
	5503	&& !(PL_reg_flags & RF_warned))
	5504	{
	5505	PL_reg_flags \|= RF_warned;
	5506	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5507	"Complex regular subexpression recursion limit (%d) "
	5508	"exceeded",
	5509	REG_INFTY - 1);
	5510	}
	5511
	5512	/* now try B */
	5513	ST.save_curlyx = cur_curlyx;
	5514	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5515	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
	5516	locinput);
	5517	assert(0); /* NOTREACHED */
	5518
	5519	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	5520	cur_curlyx = ST.save_curlyx;
	5521	REGCP_UNWIND(ST.lastcp);
	5522	regcppop(rex);
	5523
	5524	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	5525	/* Maximum greed exceeded */
	5526	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5527	&& ckWARN(WARN_REGEXP)
	5528	&& !(PL_reg_flags & RF_warned))
	5529	{
	5530	PL_reg_flags \|= RF_warned;
	5531	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5532	"Complex regular subexpression recursion "
	5533	"limit (%d) exceeded",
	5534	REG_INFTY - 1);
	5535	}
	5536	cur_curlyx->u.curlyx.count--;
	5537	CACHEsayNO;
	5538	}
	5539
	5540	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5541	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	5542	);
	5543	/* Try grabbing another A and see if it helps. */
	5544	cur_curlyx->u.curlyx.lastloc = locinput;
	5545	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
	5546	REGCP_SET(ST.lastcp);
	5547	PUSH_STATE_GOTO(WHILEM_A_min,
	5548	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
	5549	locinput);
	5550	assert(0); /* NOTREACHED */
	5551
	5552	#undef ST
	5553	#define ST st->u.branch
	5554
	5555	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	5556	next = scan + ARG(scan);
	5557	if (next == scan)
	5558	next = NULL;
	5559	scan = NEXTOPER(scan);
	5560	/* FALL THROUGH */
	5561
	5562	case BRANCH: /* /(...\|A\|...)/ */
	5563	scan = NEXTOPER(scan); /* scan now points to inner node */
	5564	ST.lastparen = rex->lastparen;
	5565	ST.lastcloseparen = rex->lastcloseparen;
	5566	ST.next_branch = next;
	5567	REGCP_SET(ST.cp);
	5568
	5569	/* Now go into the branch */
	5570	if (has_cutgroup) {
	5571	PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
	5572	} else {
	5573	PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
	5574	}
	5575	assert(0); /* NOTREACHED */
	5576
	5577	case CUTGROUP: /* /(THEN)/ /
	5578	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	5579	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5580	PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
	5581	assert(0); /* NOTREACHED */
	5582
	5583	case CUTGROUP_next_fail:
	5584	do_cutgroup = 1;
	5585	no_final = 1;
	5586	if (st->u.mark.mark_name)
	5587	sv_commit = st->u.mark.mark_name;
	5588	sayNO;
	5589	assert(0); /* NOTREACHED */
	5590
	5591	case BRANCH_next:
	5592	sayYES;
	5593	assert(0); /* NOTREACHED */
	5594
	5595	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	5596	if (do_cutgroup) {
	5597	do_cutgroup = 0;
	5598	no_final = 0;
	5599	}
	5600	REGCP_UNWIND(ST.cp);
	5601	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5602	scan = ST.next_branch;
	5603	/* no more branches? */
	5604	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	5605	DEBUG_EXECUTE_r({
	5606	PerlIO_printf( Perl_debug_log,
	5607	"%*s %sBRANCH failed...%s\n",
	5608	REPORT_CODE_OFF+depth*2, "",
	5609	PL_colors[4],
	5610	PL_colors[5] );
	5611	});
	5612	sayNO_SILENT;
	5613	}
	5614	continue; /* execute next BRANCH[J] op */
	5615	assert(0); /* NOTREACHED */
	5616
	5617	case MINMOD: /* next op will be non-greedy, e.g. A? /
	5618	minmod = 1;
	5619	break;
	5620
	5621	#undef ST
	5622	#define ST st->u.curlym
	5623
	5624	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	5625
	5626	/* This is an optimisation of CURLYX that enables us to push
	5627	* only a single backtracking state, no matter how many matches
	5628	* there are in {m,n}. It relies on the pattern being constant
	5629	* length, with no parens to influence future backrefs
	5630	*/
	5631
	5632	ST.me = scan;
	5633	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5634
	5635	ST.lastparen = rex->lastparen;
	5636	ST.lastcloseparen = rex->lastcloseparen;
	5637
	5638	/* if paren positive, emulate an OPEN/CLOSE around A */
	5639	if (ST.me->flags) {
	5640	U32 paren = ST.me->flags;
	5641	if (paren > PL_regsize)
	5642	PL_regsize = paren;
	5643	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	5644	}
	5645	ST.A = scan;
	5646	ST.B = next;
	5647	ST.alen = 0;
	5648	ST.count = 0;
	5649	ST.minmod = minmod;
	5650	minmod = 0;
	5651	ST.c1 = CHRTEST_UNINIT;
	5652	REGCP_SET(ST.cp);
	5653
	5654	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	5655	goto curlym_do_B;
	5656
	5657	curlym_do_A: /* execute the A in /A{m,n}B/ */
	5658	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
	5659	assert(0); /* NOTREACHED */
	5660
	5661	case CURLYM_A: /* we've just matched an A */
	5662	ST.count++;
	5663	/* after first match, determine A's length: u.curlym.alen */
	5664	if (ST.count == 1) {
	5665	if (PL_reg_match_utf8) {
	5666	char *s = st->locinput;
	5667	while (s < locinput) {
	5668	ST.alen++;
	5669	s += UTF8SKIP(s);
	5670	}
	5671	}
	5672	else {
	5673	ST.alen = locinput - st->locinput;
	5674	}
	5675	if (ST.alen == 0)
	5676	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	5677	}
	5678	DEBUG_EXECUTE_r(
	5679	PerlIO_printf(Perl_debug_log,
	5680	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	5681	(int)(REPORT_CODE_OFF+(depth*2)), "",
	5682	(IV) ST.count, (IV)ST.alen)
	5683	);
	5684
	5685	if (cur_eval && cur_eval->u.eval.close_paren &&
	5686	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5687	goto fake_end;
	5688
	5689	{
	5690	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	5691	if ( max == REG_INFTY \|\| ST.count < max )
	5692	goto curlym_do_A; /* try to match another A */
	5693	}
	5694	goto curlym_do_B; /* try to match B */
	5695
	5696	case CURLYM_A_fail: /* just failed to match an A */
	5697	REGCP_UNWIND(ST.cp);
	5698
	5699	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	5700	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5701	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5702	sayNO;
	5703
	5704	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5705	if (ST.c1 == CHRTEST_UNINIT) {
	5706	/* calculate c1 and c2 for possible match of 1st char
	5707	* following curly */
	5708	ST.c1 = ST.c2 = CHRTEST_VOID;
	5709	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5710	regnode *text_node = ST.B;
	5711	if (! HAS_TEXT(text_node))
	5712	FIND_NEXT_IMPT(text_node);
	5713	/* this used to be
	5714
	5715	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5716
	5717	But the former is redundant in light of the latter.
	5718
	5719	if this changes back then the macro for
	5720	IS_TEXT and friends need to change.
	5721	*/
	5722	if (PL_regkind[OP(text_node)] == EXACT) {
	5723	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	5724	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
	5725	{
	5726	sayNO;
	5727	}
	5728	}
	5729	}
	5730	}
	5731
	5732	DEBUG_EXECUTE_r(
	5733	PerlIO_printf(Perl_debug_log,
	5734	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5735	(int)(REPORT_CODE_OFF+(depth*2)),
	5736	"", (IV)ST.count)
	5737	);
	5738	if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
	5739	if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
	5740	if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	5741	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	5742	{
	5743	/* simulate B failing */
	5744	DEBUG_OPTIMISE_r(
	5745	PerlIO_printf(Perl_debug_log,
	5746	"%*s CURLYM Fast bail next target=U+%"UVXf" c1=U+%"UVXf" c2=U+%"UVXf"\n",
	5747	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5748	valid_utf8_to_uvchr((U8 *) locinput, NULL),
	5749	valid_utf8_to_uvchr(ST.c1_utf8, NULL),
	5750	valid_utf8_to_uvchr(ST.c2_utf8, NULL))
	5751	);
	5752	state_num = CURLYM_B_fail;
	5753	goto reenter_switch;
	5754	}
	5755	}
	5756	else if (nextchr != ST.c1 && nextchr != ST.c2) {
	5757	/* simulate B failing */
	5758	DEBUG_OPTIMISE_r(
	5759	PerlIO_printf(Perl_debug_log,
	5760	"%*s CURLYM Fast bail next target=U+%X c1=U+%X c2=U+%X\n",
	5761	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5762	(int) nextchr, ST.c1, ST.c2)
	5763	);
	5764	state_num = CURLYM_B_fail;
	5765	goto reenter_switch;
	5766	}
	5767	}
	5768
	5769	if (ST.me->flags) {
	5770	/* emulate CLOSE: mark current A as captured */
	5771	I32 paren = ST.me->flags;
	5772	if (ST.count) {
	5773	rex->offs[paren].start
	5774	= HOPc(locinput, -ST.alen) - PL_bostr;
	5775	rex->offs[paren].end = locinput - PL_bostr;
	5776	if ((U32)paren > rex->lastparen)
	5777	rex->lastparen = paren;
	5778	rex->lastcloseparen = paren;
	5779	}
	5780	else
	5781	rex->offs[paren].end = -1;
	5782	if (cur_eval && cur_eval->u.eval.close_paren &&
	5783	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5784	{
	5785	if (ST.count)
	5786	goto fake_end;
	5787	else
	5788	sayNO;
	5789	}
	5790	}
	5791
	5792	PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
	5793	assert(0); /* NOTREACHED */
	5794
	5795	case CURLYM_B_fail: /* just failed to match a B */
	5796	REGCP_UNWIND(ST.cp);
	5797	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5798	if (ST.minmod) {
	5799	I32 max = ARG2(ST.me);
	5800	if (max != REG_INFTY && ST.count == max)
	5801	sayNO;
	5802	goto curlym_do_A; /* try to match a further A */
	5803	}
	5804	/* backtrack one A */
	5805	if (ST.count == ARG1(ST.me) /* min */)
	5806	sayNO;
	5807	ST.count--;
	5808	SET_locinput(HOPc(locinput, -ST.alen));
	5809	goto curlym_do_B; /* try to match B */
	5810
	5811	#undef ST
	5812	#define ST st->u.curly
	5813
	5814	#define CURLY_SETPAREN(paren, success) \
	5815	if (paren) { \
	5816	if (success) { \
	5817	rex->offs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	5818	rex->offs[paren].end = locinput - PL_bostr; \
	5819	if (paren > rex->lastparen) \
	5820	rex->lastparen = paren; \
	5821	rex->lastcloseparen = paren; \
	5822	} \
	5823	else { \
	5824	rex->offs[paren].end = -1; \
	5825	rex->lastparen = ST.lastparen; \
	5826	rex->lastcloseparen = ST.lastcloseparen; \
	5827	} \
	5828	}
	5829
	5830	case STAR: /* /AB/ where A is width 1 char /
	5831	ST.paren = 0;
	5832	ST.min = 0;
	5833	ST.max = REG_INFTY;
	5834	scan = NEXTOPER(scan);
	5835	goto repeat;
	5836
	5837	case PLUS: /* /A+B/ where A is width 1 char */
	5838	ST.paren = 0;
	5839	ST.min = 1;
	5840	ST.max = REG_INFTY;
	5841	scan = NEXTOPER(scan);
	5842	goto repeat;
	5843
	5844	case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
	5845	ST.paren = scan->flags; /* Which paren to set */
	5846	ST.lastparen = rex->lastparen;
	5847	ST.lastcloseparen = rex->lastcloseparen;
	5848	if (ST.paren > PL_regsize)
	5849	PL_regsize = ST.paren;
	5850	ST.min = ARG1(scan); /* min to match */
	5851	ST.max = ARG2(scan); /* max to match */
	5852	if (cur_eval && cur_eval->u.eval.close_paren &&
	5853	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5854	ST.min=1;
	5855	ST.max=1;
	5856	}
	5857	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	5858	goto repeat;
	5859
	5860	case CURLY: /* /A{m,n}B/ where A is width 1 char */
	5861	ST.paren = 0;
	5862	ST.min = ARG1(scan); /* min to match */
	5863	ST.max = ARG2(scan); /* max to match */
	5864	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5865	repeat:
	5866	/*
	5867	* Lookahead to avoid useless match attempts
	5868	* when we know what character comes next.
	5869	*
	5870	* Used to only do .x and .?x, but now it allows
	5871	* for )'s, ('s and (?{ ... })'s to be in the way
	5872	* of the quantifier and the EXACT-like node. -- japhy
	5873	*/
	5874
	5875	assert(ST.min <= ST.max);
	5876	if (! HAS_TEXT(next) && ! JUMPABLE(next)) {
	5877	ST.c1 = ST.c2 = CHRTEST_VOID;
	5878	}
	5879	else {
	5880	regnode *text_node = next;
	5881
	5882	if (! HAS_TEXT(text_node))
	5883	FIND_NEXT_IMPT(text_node);
	5884
	5885	if (! HAS_TEXT(text_node))
	5886	ST.c1 = ST.c2 = CHRTEST_VOID;
	5887	else {
	5888	if ( PL_regkind[OP(text_node)] != EXACT ) {
	5889	ST.c1 = ST.c2 = CHRTEST_VOID;
	5890	}
	5891	else {
	5892
	5893	/* Currently we only get here when
	5894
	5895	PL_rekind[OP(text_node)] == EXACT
	5896
	5897	if this changes back then the macro for IS_TEXT and
	5898	friends need to change. */
	5899	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	5900	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
	5901	{
	5902	sayNO;
	5903	}
	5904	}
	5905	}
	5906	}
	5907
	5908	ST.A = scan;
	5909	ST.B = next;
	5910	if (minmod) {
	5911	char *li = locinput;
	5912	minmod = 0;
	5913	if (ST.min && regrepeat(rex, &li, ST.A, ST.min, depth) < ST.min)
	5914	sayNO;
	5915	SET_locinput(li);
	5916	ST.count = ST.min;
	5917	REGCP_SET(ST.cp);
	5918	if (ST.c1 == CHRTEST_VOID)
	5919	goto curly_try_B_min;
	5920
	5921	ST.oldloc = locinput;
	5922
	5923	/* set ST.maxpos to the furthest point along the
	5924	* string that could possibly match */
	5925	if (ST.max == REG_INFTY) {
	5926	ST.maxpos = PL_regeol - 1;
	5927	if (utf8_target)
	5928	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	5929	ST.maxpos--;
	5930	}
	5931	else if (utf8_target) {
	5932	int m = ST.max - ST.min;
	5933	for (ST.maxpos = locinput;
	5934	m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
	5935	ST.maxpos += UTF8SKIP(ST.maxpos);
	5936	}
	5937	else {
	5938	ST.maxpos = locinput + ST.max - ST.min;
	5939	if (ST.maxpos >= PL_regeol)
	5940	ST.maxpos = PL_regeol - 1;
	5941	}
	5942	goto curly_try_B_min_known;
	5943
	5944	}
	5945	else {
	5946	/* avoid taking address of locinput, so it can remain
	5947	* a register var */
	5948	char *li = locinput;
	5949	ST.count = regrepeat(rex, &li, ST.A, ST.max, depth);
	5950	if (ST.count < ST.min)
	5951	sayNO;
	5952	SET_locinput(li);
	5953	if ((ST.count > ST.min)
	5954	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	5955	{
	5956	/* A{m,n} must come at the end of the string, there's
	5957	* no point in backing off ... */
	5958	ST.min = ST.count;
	5959	/* ...except that $ and \Z can match before and after
	5960	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	5961	We may back off by one in this case. */
	5962	if (UCHARAT(locinput - 1) == '\n' && OP(ST.B) != EOS)
	5963	ST.min--;
	5964	}
	5965	REGCP_SET(ST.cp);
	5966	goto curly_try_B_max;
	5967	}
	5968	assert(0); /* NOTREACHED */
	5969
	5970
	5971	case CURLY_B_min_known_fail:
	5972	/* failed to find B in a non-greedy match where c1,c2 valid */
	5973
	5974	REGCP_UNWIND(ST.cp);
	5975	if (ST.paren) {
	5976	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5977	}
	5978	/* Couldn't or didn't -- move forward. */
	5979	ST.oldloc = locinput;
	5980	if (utf8_target)
	5981	locinput += UTF8SKIP(locinput);
	5982	else
	5983	locinput++;
	5984	ST.count++;
	5985	curly_try_B_min_known:
	5986	/* find the next place where 'B' could work, then call B */
	5987	{
	5988	int n;
	5989	if (utf8_target) {
	5990	n = (ST.oldloc == locinput) ? 0 : 1;
	5991	if (ST.c1 == ST.c2) {
	5992	/* set n to utf8_distance(oldloc, locinput) */
	5993	while (locinput <= ST.maxpos
	5994	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
	5995	{
	5996	locinput += UTF8SKIP(locinput);
	5997	n++;
	5998	}
	5999	}
	6000	else {
	6001	/* set n to utf8_distance(oldloc, locinput) */
	6002	while (locinput <= ST.maxpos
	6003	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	6004	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	6005	{
	6006	locinput += UTF8SKIP(locinput);
	6007	n++;
	6008	}
	6009	}
	6010	}
	6011	else { /* Not utf8_target */
	6012	if (ST.c1 == ST.c2) {
	6013	while (locinput <= ST.maxpos &&
	6014	UCHARAT(locinput) != ST.c1)
	6015	locinput++;
	6016	}
	6017	else {
	6018	while (locinput <= ST.maxpos
	6019	&& UCHARAT(locinput) != ST.c1
	6020	&& UCHARAT(locinput) != ST.c2)
	6021	locinput++;
	6022	}
	6023	n = locinput - ST.oldloc;
	6024	}
	6025	if (locinput > ST.maxpos)
	6026	sayNO;
	6027	if (n) {
	6028	/* In /a{m,n}b/, ST.oldloc is at "a" x m, locinput is
	6029	* at b; check that everything between oldloc and
	6030	* locinput matches */
	6031	char *li = ST.oldloc;
	6032	ST.count += n;
	6033	if (regrepeat(rex, &li, ST.A, n, depth) < n)
	6034	sayNO;
	6035	assert(n == REG_INFTY \|\| locinput == li);
	6036	}
	6037	CURLY_SETPAREN(ST.paren, ST.count);
	6038	if (cur_eval && cur_eval->u.eval.close_paren &&
	6039	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6040	goto fake_end;
	6041	}
	6042	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
	6043	}
	6044	assert(0); /* NOTREACHED */
	6045
	6046
	6047	case CURLY_B_min_fail:
	6048	/* failed to find B in a non-greedy match where c1,c2 invalid */
	6049
	6050	REGCP_UNWIND(ST.cp);
	6051	if (ST.paren) {
	6052	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6053	}
	6054	/* failed -- move forward one */
	6055	{
	6056	char *li = locinput;
	6057	if (!regrepeat(rex, &li, ST.A, 1, depth)) {
	6058	sayNO;
	6059	}
	6060	locinput = li;
	6061	}
	6062	{
	6063	ST.count++;
	6064	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	6065	ST.count > 0)) /* count overflow ? */
	6066	{
	6067	curly_try_B_min:
	6068	CURLY_SETPAREN(ST.paren, ST.count);
	6069	if (cur_eval && cur_eval->u.eval.close_paren &&
	6070	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6071	goto fake_end;
	6072	}
	6073	PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
	6074	}
	6075	}
	6076	sayNO;
	6077	assert(0); /* NOTREACHED */
	6078
	6079
	6080	curly_try_B_max:
	6081	/* a successful greedy match: now try to match B */
	6082	if (cur_eval && cur_eval->u.eval.close_paren &&
	6083	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6084	goto fake_end;
	6085	}
	6086	{
	6087	bool could_match = locinput < PL_regeol;
	6088
	6089	/* If it could work, try it. */
	6090	if (ST.c1 != CHRTEST_VOID && could_match) {
	6091	if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
	6092	{
	6093	could_match = memEQ(locinput,
	6094	ST.c1_utf8,
	6095	UTF8SKIP(locinput))
	6096	\|\| memEQ(locinput,
	6097	ST.c2_utf8,
	6098	UTF8SKIP(locinput));
	6099	}
	6100	else {
	6101	could_match = UCHARAT(locinput) == ST.c1
	6102	\|\| UCHARAT(locinput) == ST.c2;
	6103	}
	6104	}
	6105	if (ST.c1 == CHRTEST_VOID \|\| could_match) {
	6106	CURLY_SETPAREN(ST.paren, ST.count);
	6107	PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
	6108	assert(0); /* NOTREACHED */
	6109	}
	6110	}
	6111	/* FALL THROUGH */
	6112
	6113	case CURLY_B_max_fail:
	6114	/* failed to find B in a greedy match */
	6115
	6116	REGCP_UNWIND(ST.cp);
	6117	if (ST.paren) {
	6118	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6119	}
	6120	/* back up. */
	6121	if (--ST.count < ST.min)
	6122	sayNO;
	6123	locinput = HOPc(locinput, -1);
	6124	goto curly_try_B_max;
	6125
	6126	#undef ST
	6127
	6128	case END: /* last op of main pattern */
	6129	fake_end:
	6130	if (cur_eval) {
	6131	/* we've just finished A in /(??{A})B/; now continue with B */
	6132	st->u.eval.toggle_reg_flags
	6133	= cur_eval->u.eval.toggle_reg_flags;
	6134	PL_reg_flags ^= st->u.eval.toggle_reg_flags;
	6135
	6136	st->u.eval.prev_rex = rex_sv; /* inner */
	6137	st->u.eval.cp = regcppush(rex, 0); /* Save all the positions. */
	6138	rex_sv = cur_eval->u.eval.prev_rex;
	6139	SET_reg_curpm(rex_sv);
	6140	rex = ReANY(rex_sv);
	6141	rexi = RXi_GET(rex);
	6142	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	6143
	6144	REGCP_SET(st->u.eval.lastcp);
	6145
	6146	/* Restore parens of the outer rex without popping the
	6147	* savestack */
	6148	S_regcp_restore(aTHX_ rex, cur_eval->u.eval.lastcp);
	6149
	6150	st->u.eval.prev_eval = cur_eval;
	6151	cur_eval = cur_eval->u.eval.prev_eval;
	6152	DEBUG_EXECUTE_r(
	6153	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	6154	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	6155	if ( nochange_depth )
	6156	nochange_depth--;
	6157
	6158	PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
	6159	locinput); /* match B */
	6160	}
	6161
	6162	if (locinput < reginfo->till) {
	6163	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	6164	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	6165	PL_colors[4],
	6166	(long)(locinput - PL_reg_starttry),
	6167	(long)(reginfo->till - PL_reg_starttry),
	6168	PL_colors[5]));
	6169
	6170	sayNO_SILENT; /* Cannot match: too short. */
	6171	}
	6172	sayYES; /* Success! */
	6173
	6174	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	6175	DEBUG_EXECUTE_r(
	6176	PerlIO_printf(Perl_debug_log,
	6177	"%*s %ssubpattern success...%s\n",
	6178	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	6179	sayYES; /* Success! */
	6180
	6181	#undef ST
	6182	#define ST st->u.ifmatch
	6183
	6184	{
	6185	char *newstart;
	6186
	6187	case SUSPEND: /* (?>A) */
	6188	ST.wanted = 1;
	6189	newstart = locinput;
	6190	goto do_ifmatch;
	6191
	6192	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	6193	ST.wanted = 0;
	6194	goto ifmatch_trivial_fail_test;
	6195
	6196	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	6197	ST.wanted = 1;
	6198	ifmatch_trivial_fail_test:
	6199	if (scan->flags) {
	6200	char * const s = HOPBACKc(locinput, scan->flags);
	6201	if (!s) {
	6202	/* trivial fail */
	6203	if (logical) {
	6204	logical = 0;
	6205	sw = 1 - cBOOL(ST.wanted);
	6206	}
	6207	else if (ST.wanted)
	6208	sayNO;
	6209	next = scan + ARG(scan);
	6210	if (next == scan)
	6211	next = NULL;
	6212	break;
	6213	}
	6214	newstart = s;
	6215	}
	6216	else
	6217	newstart = locinput;
	6218
	6219	do_ifmatch:
	6220	ST.me = scan;
	6221	ST.logical = logical;
	6222	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	6223
	6224	/* execute body of (?...A) */
	6225	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
	6226	assert(0); /* NOTREACHED */
	6227	}
	6228
	6229	case IFMATCH_A_fail: /* body of (?...A) failed */
	6230	ST.wanted = !ST.wanted;
	6231	/* FALL THROUGH */
	6232
	6233	case IFMATCH_A: /* body of (?...A) succeeded */
	6234	if (ST.logical) {
	6235	sw = cBOOL(ST.wanted);
	6236	}
	6237	else if (!ST.wanted)
	6238	sayNO;
	6239
	6240	if (OP(ST.me) != SUSPEND) {
	6241	/* restore old position except for (?>...) */
	6242	locinput = st->locinput;
	6243	}
	6244	scan = ST.me + ARG(ST.me);
	6245	if (scan == ST.me)
	6246	scan = NULL;
	6247	continue; /* execute B */
	6248
	6249	#undef ST
	6250
	6251	case LONGJMP: /* alternative with many branches compiles to
	6252	* (BRANCHJ; EXACT ...; LONGJMP ) x N */
	6253	next = scan + ARG(scan);
	6254	if (next == scan)
	6255	next = NULL;
	6256	break;
	6257
	6258	case COMMIT: /* (COMMIT) /
	6259	reginfo->cutpoint = PL_regeol;
	6260	/* FALLTHROUGH */
	6261
	6262	case PRUNE: /* (PRUNE) /
	6263	if (!scan->flags)
	6264	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6265	PUSH_STATE_GOTO(COMMIT_next, next, locinput);
	6266	assert(0); /* NOTREACHED */
	6267
	6268	case COMMIT_next_fail:
	6269	no_final = 1;
	6270	/* FALLTHROUGH */
	6271
	6272	case OPFAIL: /* (FAIL) /
	6273	sayNO;
	6274	assert(0); /* NOTREACHED */
	6275
	6276	#define ST st->u.mark
	6277	case MARKPOINT: /* (MARK:foo) /
	6278	ST.prev_mark = mark_state;
	6279	ST.mark_name = sv_commit = sv_yes_mark
	6280	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6281	mark_state = st;
	6282	ST.mark_loc = locinput;
	6283	PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
	6284	assert(0); /* NOTREACHED */
	6285
	6286	case MARKPOINT_next:
	6287	mark_state = ST.prev_mark;
	6288	sayYES;
	6289	assert(0); /* NOTREACHED */
	6290
	6291	case MARKPOINT_next_fail:
	6292	if (popmark && sv_eq(ST.mark_name,popmark))
	6293	{
	6294	if (ST.mark_loc > startpoint)
	6295	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6296	popmark = NULL; /* we found our mark */
	6297	sv_commit = ST.mark_name;
	6298
	6299	DEBUG_EXECUTE_r({
	6300	PerlIO_printf(Perl_debug_log,
	6301	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	6302	REPORT_CODE_OFF+depth*2, "",
	6303	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	6304	});
	6305	}
	6306	mark_state = ST.prev_mark;
	6307	sv_yes_mark = mark_state ?
	6308	mark_state->u.mark.mark_name : NULL;
	6309	sayNO;
	6310	assert(0); /* NOTREACHED */
	6311
	6312	case SKIP: /* (SKIP) /
	6313	if (scan->flags) {
	6314	/* (SKIP) : if we fail we cut here/
	6315	ST.mark_name = NULL;
	6316	ST.mark_loc = locinput;
	6317	PUSH_STATE_GOTO(SKIP_next,next, locinput);
	6318	} else {
	6319	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	6320	otherwise do nothing. Meaning we need to scan
	6321	*/
	6322	regmatch_state *cur = mark_state;
	6323	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6324
	6325	while (cur) {
	6326	if ( sv_eq( cur->u.mark.mark_name,
	6327	find ) )
	6328	{
	6329	ST.mark_name = find;
	6330	PUSH_STATE_GOTO( SKIP_next, next, locinput);
	6331	}
	6332	cur = cur->u.mark.prev_mark;
	6333	}
	6334	}
	6335	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	6336	break;
	6337
	6338	case SKIP_next_fail:
	6339	if (ST.mark_name) {
	6340	/* (*CUT:NAME) - Set up to search for the name as we
	6341	collapse the stack*/
	6342	popmark = ST.mark_name;
	6343	} else {
	6344	/* (CUT) - No name, we cut here./
	6345	if (ST.mark_loc > startpoint)
	6346	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6347	/* but we set sv_commit to latest mark_name if there
	6348	is one so they can test to see how things lead to this
	6349	cut */
	6350	if (mark_state)
	6351	sv_commit=mark_state->u.mark.mark_name;
	6352	}
	6353	no_final = 1;
	6354	sayNO;
	6355	assert(0); /* NOTREACHED */
	6356	#undef ST
	6357
	6358	case LNBREAK: /* \R */
	6359	if ((n=is_LNBREAK_safe(locinput, PL_regeol, utf8_target))) {
	6360	locinput += n;
	6361	} else
	6362	sayNO;
	6363	break;
	6364
	6365	#define CASE_CLASS(nAmE) \
	6366	case nAmE: \
	6367	if (NEXTCHR_IS_EOS) \
	6368	sayNO; \
	6369	if ((n=is_##nAmE(locinput,utf8_target))) { \
	6370	locinput += n; \
	6371	} else \
	6372	sayNO; \
	6373	break; \
	6374	case N##nAmE: \
	6375	if (NEXTCHR_IS_EOS) \
	6376	sayNO; \
	6377	if ((n=is_##nAmE(locinput,utf8_target))) { \
	6378	sayNO; \
	6379	} else { \
	6380	locinput += UTF8SKIP(locinput); \
	6381	} \
	6382	break
	6383
	6384	CASE_CLASS(VERTWS); /* \v \V */
	6385	CASE_CLASS(HORIZWS); /* \h \H */
	6386	#undef CASE_CLASS
	6387
	6388	default:
	6389	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	6390	PTR2UV(scan), OP(scan));
	6391	Perl_croak(aTHX_ "regexp memory corruption");
	6392
	6393	/* this is a point to jump to in order to increment
	6394	* locinput by one character */
	6395	increment_locinput:
	6396	assert(!NEXTCHR_IS_EOS);
	6397	if (utf8_target) {
	6398	locinput += PL_utf8skip[nextchr];
	6399	/* locinput is allowed to go 1 char off the end, but not 2+ */
	6400	if (locinput > PL_regeol)
	6401	sayNO;
	6402	}
	6403	else
	6404	locinput++;
	6405	break;
	6406
	6407	} /* end switch */
	6408
	6409	/* switch break jumps here */
	6410	scan = next; /* prepare to execute the next op and ... */
	6411	continue; /* ... jump back to the top, reusing st */
	6412	assert(0); /* NOTREACHED */
	6413
	6414	push_yes_state:
	6415	/* push a state that backtracks on success */
	6416	st->u.yes.prev_yes_state = yes_state;
	6417	yes_state = st;
	6418	/* FALL THROUGH */
	6419	push_state:
	6420	/* push a new regex state, then continue at scan */
	6421	{
	6422	regmatch_state *newst;
	6423
	6424	DEBUG_STACK_r({
	6425	regmatch_state *cur = st;
	6426	regmatch_state *curyes = yes_state;
	6427	int curd = depth;
	6428	regmatch_slab *slab = PL_regmatch_slab;
	6429	for (;curd > -1;cur--,curd--) {
	6430	if (cur < SLAB_FIRST(slab)) {
	6431	slab = slab->prev;
	6432	cur = SLAB_LAST(slab);
	6433	}
	6434	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	6435	REPORT_CODE_OFF + 2 + depth * 2,"",
	6436	curd, PL_reg_name[cur->resume_state],
	6437	(curyes == cur) ? "yes" : ""
	6438	);
	6439	if (curyes == cur)
	6440	curyes = cur->u.yes.prev_yes_state;
	6441	}
	6442	} else
	6443	DEBUG_STATE_pp("push")
	6444	);
	6445	depth++;
	6446	st->locinput = locinput;
	6447	newst = st+1;
	6448	if (newst > SLAB_LAST(PL_regmatch_slab))
	6449	newst = S_push_slab(aTHX);
	6450	PL_regmatch_state = newst;
	6451
	6452	locinput = pushinput;
	6453	st = newst;
	6454	continue;
	6455	assert(0); /* NOTREACHED */
	6456	}
	6457	}
	6458
	6459	/*
	6460	* We get here only if there's trouble -- normally "case END" is
	6461	* the terminating point.
	6462	*/
	6463	Perl_croak(aTHX_ "corrupted regexp pointers");
	6464	/NOTREACHED/
	6465	sayNO;
	6466
	6467	yes:
	6468	if (yes_state) {
	6469	/* we have successfully completed a subexpression, but we must now
	6470	* pop to the state marked by yes_state and continue from there */
	6471	assert(st != yes_state);
	6472	#ifdef DEBUGGING
	6473	while (st != yes_state) {
	6474	st--;
	6475	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6476	PL_regmatch_slab = PL_regmatch_slab->prev;
	6477	st = SLAB_LAST(PL_regmatch_slab);
	6478	}
	6479	DEBUG_STATE_r({
	6480	if (no_final) {
	6481	DEBUG_STATE_pp("pop (no final)");
	6482	} else {
	6483	DEBUG_STATE_pp("pop (yes)");
	6484	}
	6485	});
	6486	depth--;
	6487	}
	6488	#else
	6489	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	6490	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	6491	{
	6492	/* not in this slab, pop slab */
	6493	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	6494	PL_regmatch_slab = PL_regmatch_slab->prev;
	6495	st = SLAB_LAST(PL_regmatch_slab);
	6496	}
	6497	depth -= (st - yes_state);
	6498	#endif
	6499	st = yes_state;
	6500	yes_state = st->u.yes.prev_yes_state;
	6501	PL_regmatch_state = st;
	6502
	6503	if (no_final)
	6504	locinput= st->locinput;
	6505	state_num = st->resume_state + no_final;
	6506	goto reenter_switch;
	6507	}
	6508
	6509	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	6510	PL_colors[4], PL_colors[5]));
	6511
	6512	if (PL_reg_state.re_state_eval_setup_done) {
	6513	/* each successfully executed (?{...}) block does the equivalent of
	6514	* local $^R = do {...}
	6515	* When popping the save stack, all these locals would be undone;
	6516	* bypass this by setting the outermost saved $^R to the latest
	6517	* value */
	6518	if (oreplsv != GvSV(PL_replgv))
	6519	sv_setsv(oreplsv, GvSV(PL_replgv));
	6520	}
	6521	result = 1;
	6522	goto final_exit;
	6523
	6524	no:
	6525	DEBUG_EXECUTE_r(
	6526	PerlIO_printf(Perl_debug_log,
	6527	"%*s %sfailed...%s\n",
	6528	REPORT_CODE_OFF+depth*2, "",
	6529	PL_colors[4], PL_colors[5])
	6530	);
	6531
	6532	no_silent:
	6533	if (no_final) {
	6534	if (yes_state) {
	6535	goto yes;
	6536	} else {
	6537	goto final_exit;
	6538	}
	6539	}
	6540	if (depth) {
	6541	/* there's a previous state to backtrack to */
	6542	st--;
	6543	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6544	PL_regmatch_slab = PL_regmatch_slab->prev;
	6545	st = SLAB_LAST(PL_regmatch_slab);
	6546	}
	6547	PL_regmatch_state = st;
	6548	locinput= st->locinput;
	6549
	6550	DEBUG_STATE_pp("pop");
	6551	depth--;
	6552	if (yes_state == st)
	6553	yes_state = st->u.yes.prev_yes_state;
	6554
	6555	state_num = st->resume_state + 1; /* failure = success + 1 */
	6556	goto reenter_switch;
	6557	}
	6558	result = 0;
	6559
	6560	final_exit:
	6561	if (rex->intflags & PREGf_VERBARG_SEEN) {
	6562	SV *sv_err = get_sv("REGERROR", 1);
	6563	SV *sv_mrk = get_sv("REGMARK", 1);
	6564	if (result) {
	6565	sv_commit = &PL_sv_no;
	6566	if (!sv_yes_mark)
	6567	sv_yes_mark = &PL_sv_yes;
	6568	} else {
	6569	if (!sv_commit)
	6570	sv_commit = &PL_sv_yes;
	6571	sv_yes_mark = &PL_sv_no;
	6572	}
	6573	sv_setsv(sv_err, sv_commit);
	6574	sv_setsv(sv_mrk, sv_yes_mark);
	6575	}
	6576
	6577
	6578	if (last_pushed_cv) {
	6579	dSP;
	6580	POP_MULTICALL;
	6581	PERL_UNUSED_VAR(SP);
	6582	}
	6583
	6584	/* clean up; in particular, free all slabs above current one */
	6585	LEAVE_SCOPE(oldsave);
	6586
	6587	assert(!result \|\| locinput - PL_bostr >= 0);
	6588	return result ? locinput - PL_bostr : -1;
	6589	}
	6590
	6591	/*
	6592	- regrepeat - repeatedly match something simple, report how many
	6593	*
	6594	* What 'simple' means is a node which can be the operand of a quantifier like
	6595	* '+', or {1,3}
	6596	*
	6597	* startposp - pointer a pointer to the start position. This is updated
	6598	* to point to the byte following the highest successful
	6599	* match.
	6600	* p - the regnode to be repeatedly matched against.
	6601	* max - maximum number of things to match.
	6602	* depth - (for debugging) backtracking depth.
	6603	*/
	6604	STATIC I32
	6605	S_regrepeat(pTHX_ const regexp prog, char startposp, const regnode p, I32 max, int depth)
	6606	{
	6607	dVAR;
	6608	char scan; / Pointer to current position in target string */
	6609	I32 c;
	6610	char loceol = PL_regeol; / local version */
	6611	I32 hardcount = 0; /* How many matches so far */
	6612	bool utf8_target = PL_reg_match_utf8;
	6613	UV utf8_flags;
	6614	#ifndef DEBUGGING
	6615	PERL_UNUSED_ARG(depth);
	6616	#endif
	6617
	6618	PERL_ARGS_ASSERT_REGREPEAT;
	6619
	6620	scan = *startposp;
	6621	if (max == REG_INFTY)
	6622	max = I32_MAX;
	6623	else if (! utf8_target && scan + max < loceol)
	6624	loceol = scan + max;
	6625
	6626	/* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
	6627	* to the maximum of how far we should go in it (leaving it set to the real
	6628	* end, if the maximum permissible would take us beyond that). This allows
	6629	* us to make the loop exit condition that we haven't gone past <loceol> to
	6630	* also mean that we haven't exceeded the max permissible count, saving a
	6631	* test each time through the loop. But it assumes that the OP matches a
	6632	* single byte, which is true for most of the OPs below when applied to a
	6633	* non-UTF-8 target. Those relatively few OPs that don't have this
	6634	* characteristic will have to compensate.
	6635	*
	6636	* There is no adjustment for UTF-8 targets, as the number of bytes per
	6637	* character varies. OPs will have to test both that the count is less
	6638	* than the max permissible (using <hardcount> to keep track), and that we
	6639	* are still within the bounds of the string (using <loceol>. A few OPs
	6640	* match a single byte no matter what the encoding. They can omit the max
	6641	* test if, for the UTF-8 case, they do the adjustment that was skipped
	6642	* above.
	6643	*
	6644	* Thus, the code above sets things up for the common case; and exceptional
	6645	* cases need extra work; the common case is to make sure <scan> doesn't
	6646	* go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
	6647	* count doesn't exceed the maximum permissible */
	6648
	6649	switch (OP(p)) {
	6650	case REG_ANY:
	6651	if (utf8_target) {
	6652	while (scan < loceol && hardcount < max && *scan != '\n') {
	6653	scan += UTF8SKIP(scan);
	6654	hardcount++;
	6655	}
	6656	} else {
	6657	while (scan < loceol && *scan != '\n')
	6658	scan++;
	6659	}
	6660	break;
	6661	case SANY:
	6662	if (utf8_target) {
	6663	while (scan < loceol && hardcount < max) {
	6664	scan += UTF8SKIP(scan);
	6665	hardcount++;
	6666	}
	6667	}
	6668	else
	6669	scan = loceol;
	6670	break;
	6671	case CANY: /* Move <scan> forward <max> bytes, unless goes off end */
	6672	if (utf8_target && scan + max < loceol) {
	6673
	6674	/* <loceol> hadn't been adjusted in the UTF-8 case */
	6675	scan += max;
	6676	}
	6677	else {
	6678	scan = loceol;
	6679	}
	6680	break;
	6681	case EXACT:
	6682	assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
	6683
	6684	c = (U8)*STRING(p);
	6685
	6686	/* Can use a simple loop if the pattern char to match on is invariant
	6687	* under UTF-8, or both target and pattern aren't UTF-8. Note that we
	6688	* can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
	6689	* true iff it doesn't matter if the argument is in UTF-8 or not */
	6690	if (UTF8_IS_INVARIANT(c) \|\| (! utf8_target && ! UTF_PATTERN)) {
	6691	if (utf8_target && scan + max < loceol) {
	6692	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6693	* since here, to match at all, 1 char == 1 byte */
	6694	loceol = scan + max;
	6695	}
	6696	while (scan < loceol && UCHARAT(scan) == c) {
	6697	scan++;
	6698	}
	6699	}
	6700	else if (UTF_PATTERN) {
	6701	if (utf8_target) {
	6702	STRLEN scan_char_len;
	6703
	6704	/* When both target and pattern are UTF-8, we have to do
	6705	* string EQ */
	6706	while (hardcount < max
	6707	&& scan + (scan_char_len = UTF8SKIP(scan)) <= loceol
	6708	&& scan_char_len <= STR_LEN(p)
	6709	&& memEQ(scan, STRING(p), scan_char_len))
	6710	{
	6711	scan += scan_char_len;
	6712	hardcount++;
	6713	}
	6714	}
	6715	else if (! UTF8_IS_ABOVE_LATIN1(c)) {
	6716
	6717	/* Target isn't utf8; convert the character in the UTF-8
	6718	* pattern to non-UTF8, and do a simple loop */
	6719	c = TWO_BYTE_UTF8_TO_UNI(c, *(STRING(p) + 1));
	6720	while (scan < loceol && UCHARAT(scan) == c) {
	6721	scan++;
	6722	}
	6723	} /* else pattern char is above Latin1, can't possibly match the
	6724	non-UTF-8 target */
	6725	}
	6726	else {
	6727
	6728	/* Here, the string must be utf8; pattern isn't, and <c> is
	6729	* different in utf8 than not, so can't compare them directly.
	6730	* Outside the loop, find the two utf8 bytes that represent c, and
	6731	* then look for those in sequence in the utf8 string */
	6732	U8 high = UTF8_TWO_BYTE_HI(c);
	6733	U8 low = UTF8_TWO_BYTE_LO(c);
	6734
	6735	while (hardcount < max
	6736	&& scan + 1 < loceol
	6737	&& UCHARAT(scan) == high
	6738	&& UCHARAT(scan + 1) == low)
	6739	{
	6740	scan += 2;
	6741	hardcount++;
	6742	}
	6743	}
	6744	break;
	6745
	6746	case EXACTFA:
	6747	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6748	goto do_exactf;
	6749
	6750	case EXACTFL:
	6751	PL_reg_flags \|= RF_tainted;
	6752	utf8_flags = FOLDEQ_UTF8_LOCALE;
	6753	goto do_exactf;
	6754
	6755	case EXACTF:
	6756	utf8_flags = 0;
	6757	goto do_exactf;
	6758
	6759	case EXACTFU_SS:
	6760	case EXACTFU_TRICKYFOLD:
	6761	case EXACTFU:
	6762	utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	6763
	6764	do_exactf: {
	6765	int c1, c2;
	6766	U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
	6767
	6768	assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
	6769
	6770	if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8)) {
	6771	if (c1 == CHRTEST_VOID) {
	6772	/* Use full Unicode fold matching */
	6773	char *tmpeol = PL_regeol;
	6774	STRLEN pat_len = (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1;
	6775	while (hardcount < max
	6776	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	6777	STRING(p), NULL, pat_len,
	6778	cBOOL(UTF_PATTERN), utf8_flags))
	6779	{
	6780	scan = tmpeol;
	6781	tmpeol = PL_regeol;
	6782	hardcount++;
	6783	}
	6784	}
	6785	else if (utf8_target) {
	6786	if (c1 == c2) {
	6787	while (scan < loceol
	6788	&& hardcount < max
	6789	&& memEQ(scan, c1_utf8, UTF8SKIP(scan)))
	6790	{
	6791	scan += UTF8SKIP(scan);
	6792	hardcount++;
	6793	}
	6794	}
	6795	else {
	6796	while (scan < loceol
	6797	&& hardcount < max
	6798	&& (memEQ(scan, c1_utf8, UTF8SKIP(scan))
	6799	\|\| memEQ(scan, c2_utf8, UTF8SKIP(scan))))
	6800	{
	6801	scan += UTF8SKIP(scan);
	6802	hardcount++;
	6803	}
	6804	}
	6805	}
	6806	else if (c1 == c2) {
	6807	while (scan < loceol && UCHARAT(scan) == c1) {
	6808	scan++;
	6809	}
	6810	}
	6811	else {
	6812	while (scan < loceol &&
	6813	(UCHARAT(scan) == c1 \|\| UCHARAT(scan) == c2))
	6814	{
	6815	scan++;
	6816	}
	6817	}
	6818	}
	6819	break;
	6820	}
	6821	case ANYOF:
	6822	if (utf8_target) {
	6823	STRLEN inclasslen;
	6824	while (hardcount < max
	6825	&& scan + (inclasslen = UTF8SKIP(scan)) <= loceol
	6826	&& reginclass(prog, p, (U8*)scan, utf8_target))
	6827	{
	6828	scan += inclasslen;
	6829	hardcount++;
	6830	}
	6831	} else {
	6832	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	6833	scan++;
	6834	}
	6835	break;
	6836	case ALNUMU:
	6837	if (utf8_target) {
	6838	utf8_wordchar:
	6839	LOAD_UTF8_CHARCLASS_ALNUM();
	6840	while (hardcount < max && scan < loceol &&
	6841	swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6842	{
	6843	scan += UTF8SKIP(scan);
	6844	hardcount++;
	6845	}
	6846	} else {
	6847	while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
	6848	scan++;
	6849	}
	6850	}
	6851	break;
	6852	case ALNUM:
	6853	if (utf8_target)
	6854	goto utf8_wordchar;
	6855	while (scan < loceol && isALNUM((U8) *scan)) {
	6856	scan++;
	6857	}
	6858	break;
	6859	case ALNUMA:
	6860	if (utf8_target && scan + max < loceol) {
	6861
	6862	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6863	* since here, to match, 1 char == 1 byte */
	6864	loceol = scan + max;
	6865	}
	6866	while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
	6867	scan++;
	6868	}
	6869	break;
	6870	case ALNUML:
	6871	PL_reg_flags \|= RF_tainted;
	6872	if (utf8_target) {
	6873	while (hardcount < max && scan < loceol &&
	6874	isALNUM_LC_utf8((U8*)scan)) {
	6875	scan += UTF8SKIP(scan);
	6876	hardcount++;
	6877	}
	6878	} else {
	6879	while (scan < loceol && isALNUM_LC(*scan))
	6880	scan++;
	6881	}
	6882	break;
	6883	case NALNUMU:
	6884	if (utf8_target) {
	6885
	6886	utf8_Nwordchar:
	6887
	6888	LOAD_UTF8_CHARCLASS_ALNUM();
	6889	while (hardcount < max && scan < loceol &&
	6890	! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6891	{
	6892	scan += UTF8SKIP(scan);
	6893	hardcount++;
	6894	}
	6895	} else {
	6896	while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
	6897	scan++;
	6898	}
	6899	}
	6900	break;
	6901	case NALNUM:
	6902	if (utf8_target)
	6903	goto utf8_Nwordchar;
	6904	while (scan < loceol && ! isALNUM((U8) *scan)) {
	6905	scan++;
	6906	}
	6907	break;
	6908
	6909	case POSIXA:
	6910	if (utf8_target && scan + max < loceol) {
	6911
	6912	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6913	* since here, to match, 1 char == 1 byte */
	6914	loceol = scan + max;
	6915	}
	6916	while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
	6917	scan++;
	6918	}
	6919	break;
	6920	case NPOSIXA:
	6921	if (utf8_target) {
	6922	while (scan < loceol && hardcount < max
	6923	&& ! _generic_isCC_A((U8) *scan, FLAGS(p)))
	6924	{
	6925	scan += UTF8SKIP(scan);
	6926	hardcount++;
	6927	}
	6928	}
	6929	else {
	6930	while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
	6931	scan++;
	6932	}
	6933	}
	6934	break;
	6935	case NALNUMA:
	6936	if (utf8_target) {
	6937	while (scan < loceol && hardcount < max
	6938	&& ! isWORDCHAR_A((U8) *scan))
	6939	{
	6940	scan += UTF8SKIP(scan);
	6941	hardcount++;
	6942	}
	6943	}
	6944	else {
	6945	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6946	scan++;
	6947	}
	6948	}
	6949	break;
	6950	case NALNUML:
	6951	PL_reg_flags \|= RF_tainted;
	6952	if (utf8_target) {
	6953	while (hardcount < max && scan < loceol &&
	6954	!isALNUM_LC_utf8((U8*)scan)) {
	6955	scan += UTF8SKIP(scan);
	6956	hardcount++;
	6957	}
	6958	} else {
	6959	while (scan < loceol && !isALNUM_LC(*scan))
	6960	scan++;
	6961	}
	6962	break;
	6963	case SPACEU:
	6964	if (utf8_target) {
	6965
	6966	utf8_space:
	6967
	6968	while (hardcount < max && scan < loceol
	6969	&& is_XPERLSPACE_utf8((U8*)scan))
	6970	{
	6971	scan += UTF8SKIP(scan);
	6972	hardcount++;
	6973	}
	6974	break;
	6975	}
	6976	else {
	6977	while (scan < loceol && isSPACE_L1((U8) *scan)) {
	6978	scan++;
	6979	}
	6980	break;
	6981	}
	6982	case SPACE:
	6983	if (utf8_target)
	6984	goto utf8_space;
	6985
	6986	while (scan < loceol && isSPACE((U8) *scan)) {
	6987	scan++;
	6988	}
	6989	break;
	6990	case SPACEA:
	6991	if (utf8_target && scan + max < loceol) {
	6992
	6993	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6994	* since here, to match, 1 char == 1 byte */
	6995	loceol = scan + max;
	6996	}
	6997	while (scan < loceol && isSPACE_A((U8) *scan)) {
	6998	scan++;
	6999	}
	7000	break;
	7001	case SPACEL:
	7002	PL_reg_flags \|= RF_tainted;
	7003	if (utf8_target) {
	7004	while (hardcount < max && scan < loceol &&
	7005	isSPACE_LC_utf8((U8*)scan)) {
	7006	scan += UTF8SKIP(scan);
	7007	hardcount++;
	7008	}
	7009	} else {
	7010	while (scan < loceol && isSPACE_LC(*scan))
	7011	scan++;
	7012	}
	7013	break;
	7014	case NSPACEU:
	7015	if (utf8_target) {
	7016
	7017	utf8_Nspace:
	7018
	7019	while (hardcount < max && scan < loceol
	7020	&& ! is_XPERLSPACE_utf8((U8*)scan))
	7021	{
	7022	scan += UTF8SKIP(scan);
	7023	hardcount++;
	7024	}
	7025	break;
	7026	}
	7027	else {
	7028	while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
	7029	scan++;
	7030	}
	7031	}
	7032	break;
	7033	case NSPACE:
	7034	if (utf8_target)
	7035	goto utf8_Nspace;
	7036
	7037	while (scan < loceol && ! isSPACE((U8) *scan)) {
	7038	scan++;
	7039	}
	7040	break;
	7041	case NSPACEA:
	7042	if (utf8_target) {
	7043	while (hardcount < max && scan < loceol
	7044	&& ! isSPACE_A((U8) *scan))
	7045	{
	7046	scan += UTF8SKIP(scan);
	7047	hardcount++;
	7048	}
	7049	}
	7050	else {
	7051	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	7052	scan++;
	7053	}
	7054	}
	7055	break;
	7056	case NSPACEL:
	7057	PL_reg_flags \|= RF_tainted;
	7058	if (utf8_target) {
	7059	while (hardcount < max && scan < loceol &&
	7060	!isSPACE_LC_utf8((U8*)scan)) {
	7061	scan += UTF8SKIP(scan);
	7062	hardcount++;
	7063	}
	7064	} else {
	7065	while (scan < loceol && !isSPACE_LC(*scan))
	7066	scan++;
	7067	}
	7068	break;
	7069	case DIGIT:
	7070	if (utf8_target) {
	7071	LOAD_UTF8_CHARCLASS_DIGIT();
	7072	while (hardcount < max && scan < loceol &&
	7073	swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	7074	scan += UTF8SKIP(scan);
	7075	hardcount++;
	7076	}
	7077	} else {
	7078	while (scan < loceol && isDIGIT(*scan))
	7079	scan++;
	7080	}
	7081	break;
	7082	case DIGITA:
	7083	if (utf8_target && scan + max < loceol) {
	7084
	7085	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	7086	* since here, to match, 1 char == 1 byte */
	7087	loceol = scan + max;
	7088	}
	7089	while (scan < loceol && isDIGIT_A((U8) *scan)) {
	7090	scan++;
	7091	}
	7092	break;
	7093	case DIGITL:
	7094	PL_reg_flags \|= RF_tainted;
	7095	if (utf8_target) {
	7096	while (hardcount < max && scan < loceol &&
	7097	isDIGIT_LC_utf8((U8*)scan)) {
	7098	scan += UTF8SKIP(scan);
	7099	hardcount++;
	7100	}
	7101	} else {
	7102	while (scan < loceol && isDIGIT_LC(*scan))
	7103	scan++;
	7104	}
	7105	break;
	7106	case NDIGIT:
	7107	if (utf8_target) {
	7108	LOAD_UTF8_CHARCLASS_DIGIT();
	7109	while (hardcount < max && scan < loceol &&
	7110	!swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	7111	scan += UTF8SKIP(scan);
	7112	hardcount++;
	7113	}
	7114	} else {
	7115	while (scan < loceol && !isDIGIT(*scan))
	7116	scan++;
	7117	}
	7118	break;
	7119	case NDIGITA:
	7120	if (utf8_target) {
	7121	while (hardcount < max && scan < loceol
	7122	&& ! isDIGIT_A((U8) *scan)) {
	7123	scan += UTF8SKIP(scan);
	7124	hardcount++;
	7125	}
	7126	}
	7127	else {
	7128	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	7129	scan++;
	7130	}
	7131	}
	7132	break;
	7133	case NDIGITL:
	7134	PL_reg_flags \|= RF_tainted;
	7135	if (utf8_target) {
	7136	while (hardcount < max && scan < loceol &&
	7137	!isDIGIT_LC_utf8((U8*)scan)) {
	7138	scan += UTF8SKIP(scan);
	7139	hardcount++;
	7140	}
	7141	} else {
	7142	while (scan < loceol && !isDIGIT_LC(*scan))
	7143	scan++;
	7144	}
	7145	break;
	7146	case LNBREAK:
	7147	if (utf8_target) {
	7148	while (hardcount < max && scan < loceol &&
	7149	(c=is_LNBREAK_utf8_safe(scan, loceol))) {
	7150	scan += c;
	7151	hardcount++;
	7152	}
	7153	} else {
	7154	/* LNBREAK can match one or two latin chars, which is ok, but we
	7155	* have to use hardcount in this situation, and throw away the
	7156	* adjustment to <loceol> done before the switch statement */
	7157	loceol = PL_regeol;
	7158	while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
	7159	scan+=c;
	7160	hardcount++;
	7161	}
	7162	}
	7163	break;
	7164	case HORIZWS:
	7165	if (utf8_target) {
	7166	while (hardcount < max && scan < loceol &&
	7167	(c=is_HORIZWS_utf8_safe(scan, loceol)))
	7168	{
	7169	scan += c;
	7170	hardcount++;
	7171	}
	7172	} else {
	7173	while (scan < loceol && is_HORIZWS_latin1_safe(scan, loceol))
	7174	scan++;
	7175	}
	7176	break;
	7177	case NHORIZWS:
	7178	if (utf8_target) {
	7179	while (hardcount < max && scan < loceol &&
	7180	!is_HORIZWS_utf8_safe(scan, loceol))
	7181	{
	7182	scan += UTF8SKIP(scan);
	7183	hardcount++;
	7184	}
	7185	} else {
	7186	while (scan < loceol && !is_HORIZWS_latin1_safe(scan, loceol))
	7187	scan++;
	7188
	7189	}
	7190	break;
	7191	case VERTWS:
	7192	if (utf8_target) {
	7193	while (hardcount < max && scan < loceol &&
	7194	(c=is_VERTWS_utf8_safe(scan, loceol)))
	7195	{
	7196	scan += c;
	7197	hardcount++;
	7198	}
	7199	} else {
	7200	while (scan < loceol && is_VERTWS_latin1_safe(scan, loceol))
	7201	scan++;
	7202
	7203	}
	7204	break;
	7205	case NVERTWS:
	7206	if (utf8_target) {
	7207	while (hardcount < max && scan < loceol &&
	7208	!is_VERTWS_utf8_safe(scan, loceol))
	7209	{
	7210	scan += UTF8SKIP(scan);
	7211	hardcount++;
	7212	}
	7213	} else {
	7214	while (scan < loceol && !is_VERTWS_latin1_safe(scan, loceol))
	7215	scan++;
	7216
	7217	}
	7218	break;
	7219
	7220	case BOUND:
	7221	case BOUNDA:
	7222	case BOUNDL:
	7223	case BOUNDU:
	7224	case EOS:
	7225	case GPOS:
	7226	case KEEPS:
	7227	case NBOUND:
	7228	case NBOUNDA:
	7229	case NBOUNDL:
	7230	case NBOUNDU:
	7231	case OPFAIL:
	7232	case SBOL:
	7233	case SEOL:
	7234	/* These are all 0 width, so match right here or not at all. */
	7235	break;
	7236
	7237	default:
	7238	Perl_croak(aTHX_ "panic: regrepeat() called with unrecognized node type %d='%s'", OP(p), PL_reg_name[OP(p)]);
	7239	assert(0); /* NOTREACHED */
	7240
	7241	}
	7242
	7243	if (hardcount)
	7244	c = hardcount;
	7245	else
	7246	c = scan - *startposp;
	7247	*startposp = scan;
	7248
	7249	DEBUG_r({
	7250	GET_RE_DEBUG_FLAGS_DECL;
	7251	DEBUG_EXECUTE_r({
	7252	SV * const prop = sv_newmortal();
	7253	regprop(prog, prop, p);
	7254	PerlIO_printf(Perl_debug_log,
	7255	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	7256	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	7257	});
	7258	});
	7259
	7260	return(c);
	7261	}
	7262
	7263
	7264	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	7265	/*
	7266	- regclass_swash - prepare the utf8 swash. Wraps the shared core version to
	7267	create a copy so that changes the caller makes won't change the shared one.
	7268	If <altsvp> is non-null, will return NULL in it, for back-compat.
	7269	*/
	7270	SV *
	7271	Perl_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV listsvp, SV altsvp)
	7272	{
	7273	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	7274
	7275	if (altsvp) {
	7276	*altsvp = NULL;
	7277	}
	7278
	7279	return newSVsv(core_regclass_swash(prog, node, doinit, listsvp));
	7280	}
	7281	#endif
	7282
	7283	STATIC SV *
	7284	S_core_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV** listsvp)
	7285	{
	7286	/* Returns the swash for the input 'node' in the regex 'prog'.
	7287	* If <doinit> is true, will attempt to create the swash if not already
	7288	* done.
	7289	* If <listsvp> is non-null, will return the swash initialization string in
	7290	* it.
	7291	* Tied intimately to how regcomp.c sets up the data structure */
	7292
	7293	dVAR;
	7294	SV *sw = NULL;
	7295	SV *si = NULL;
	7296	SV* invlist = NULL;
	7297
	7298	RXi_GET_DECL(prog,progi);
	7299	const struct reg_data * const data = prog ? progi->data : NULL;
	7300
	7301	PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH;
	7302
	7303	assert(ANYOF_NONBITMAP(node));
	7304
	7305	if (data && data->count) {
	7306	const U32 n = ARG(node);
	7307
	7308	if (data->what[n] == 's') {
	7309	SV * const rv = MUTABLE_SV(data->data[n]);
	7310	AV * const av = MUTABLE_AV(SvRV(rv));
	7311	SV **const ary = AvARRAY(av);
	7312	U8 swash_init_flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	7313
	7314	si = ary; / ary[0] = the string to initialize the swash with */
	7315
	7316	/* Elements 2 and 3 are either both present or both absent. [2] is
	7317	* any inversion list generated at compile time; [3] indicates if
	7318	* that inversion list has any user-defined properties in it. */
	7319	if (av_len(av) >= 2) {
	7320	invlist = ary[2];
	7321	if (SvUV(ary[3])) {
	7322	swash_init_flags \|= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
	7323	}
	7324	}
	7325	else {
	7326	invlist = NULL;
	7327	}
	7328
	7329	/* Element [1] is reserved for the set-up swash. If already there,
	7330	* return it; if not, create it and store it there */
	7331	if (SvROK(ary[1])) {
	7332	sw = ary[1];
	7333	}
	7334	else if (si && doinit) {
	7335
	7336	sw = _core_swash_init("utf8", /* the utf8 package */
	7337	"", /* nameless */
	7338	si,
	7339	1, /* binary */
	7340	0, /* not from tr/// */
	7341	invlist,
	7342	&swash_init_flags);
	7343	(void)av_store(av, 1, sw);
	7344	}
	7345	}
	7346	}
	7347
	7348	if (listsvp) {
	7349	SV* matches_string = newSVpvn("", 0);
	7350
	7351	/* Use the swash, if any, which has to have incorporated into it all
	7352	* possibilities */
	7353	if ((! sw \|\| (invlist = _get_swash_invlist(sw)) == NULL)
	7354	&& (si && si != &PL_sv_undef))
	7355	{
	7356
	7357	/* If no swash, use the input initialization string, if available */
	7358	sv_catsv(matches_string, si);
	7359	}
	7360
	7361	/* Add the inversion list to whatever we have. This may have come from
	7362	* the swash, or from an input parameter */
	7363	if (invlist) {
	7364	sv_catsv(matches_string, _invlist_contents(invlist));
	7365	}
	7366	*listsvp = matches_string;
	7367	}
	7368
	7369	return sw;
	7370	}
	7371
	7372	/*
	7373	- reginclass - determine if a character falls into a character class
	7374
	7375	n is the ANYOF regnode
	7376	p is the target string
	7377	utf8_target tells whether p is in UTF-8.
	7378
	7379	Returns true if matched; false otherwise.
	7380
	7381	Note that this can be a synthetic start class, a combination of various
	7382	nodes, so things you think might be mutually exclusive, such as locale,
	7383	aren't. It can match both locale and non-locale
	7384
	7385	*/
	7386
	7387	STATIC bool
	7388	S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, register const bool utf8_target)
	7389	{
	7390	dVAR;
	7391	const char flags = ANYOF_FLAGS(n);
	7392	bool match = FALSE;
	7393	UV c = *p;
	7394
	7395	PERL_ARGS_ASSERT_REGINCLASS;
	7396
	7397	/* If c is not already the code point, get it. Note that
	7398	* UTF8_IS_INVARIANT() works even if not in UTF-8 */
	7399	if (! UTF8_IS_INVARIANT(c) && utf8_target) {
	7400	STRLEN c_len = 0;
	7401	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
	7402	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	7403	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	7404	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	7405	* UTF8_ALLOW_FFFF */
	7406	if (c_len == (STRLEN)-1)
	7407	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	7408	}
	7409
	7410	/* If this character is potentially in the bitmap, check it */
	7411	if (c < 256) {
	7412	if (ANYOF_BITMAP_TEST(n, c))
	7413	match = TRUE;
	7414	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	7415	&& ! utf8_target
	7416	&& ! isASCII(c))
	7417	{
	7418	match = TRUE;
	7419	}
	7420	else if (flags & ANYOF_LOCALE) {
	7421	PL_reg_flags \|= RF_tainted;
	7422
	7423	if ((flags & ANYOF_LOC_FOLD)
	7424	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	7425	{
	7426	match = TRUE;
	7427	}
	7428	else if (ANYOF_CLASS_TEST_ANY_SET(n) &&
	7429	((ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) \|\|
	7430	(ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) \|\|
	7431	(ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) \|\|
	7432	(ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) \|\|
	7433	(ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) \|\|
	7434	(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) \|\|
	7435	(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) \|\|
	7436	(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) \|\|
	7437	(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) \|\|
	7438	(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) \|\|
	7439	(ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII_LC(c)) \|\|
	7440	(ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII_LC(c)) \|\|
	7441	(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) \|\|
	7442	(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) \|\|
	7443	(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) \|\|
	7444	(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) \|\|
	7445	(ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) \|\|
	7446	(ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) \|\|
	7447	(ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) \|\|
	7448	(ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) \|\|
	7449	(ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) \|\|
	7450	(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) \|\|
	7451	(ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) \|\|
	7452	(ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) \|\|
	7453	(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) \|\|
	7454	(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) \|\|
	7455	(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) \|\|
	7456	(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) \|\|
	7457	(ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK_LC(c)) \|\|
	7458	(ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK_LC(c))
	7459	) /* How's that for a conditional? */
	7460	) {
	7461	match = TRUE;
	7462	}
	7463	}
	7464	}
	7465
	7466	/* If the bitmap didn't (or couldn't) match, and something outside the
	7467	* bitmap could match, try that. Locale nodes specify completely the
	7468	* behavior of code points in the bit map (otherwise, a utf8 target would
	7469	* cause them to be treated as Unicode and not locale), except in
	7470	* the very unlikely event when this node is a synthetic start class, which
	7471	* could be a combination of locale and non-locale nodes. So allow locale
	7472	* to match for the synthetic start class, which will give a false
	7473	* positive that will be resolved when the match is done again as not part
	7474	* of the synthetic start class */
	7475	if (!match) {
	7476	if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
	7477	match = TRUE; /* Everything above 255 matches */
	7478	}
	7479	else if (ANYOF_NONBITMAP(n)
	7480	&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
	7481	\|\| (utf8_target
	7482	&& (c >=256
	7483	\|\| (! (flags & ANYOF_LOCALE))
	7484	\|\| (flags & ANYOF_IS_SYNTHETIC)))))
	7485	{
	7486	SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
	7487	if (sw) {
	7488	U8 * utf8_p;
	7489	if (utf8_target) {
	7490	utf8_p = (U8 *) p;
	7491	} else { /* Convert to utf8 */
	7492	STRLEN len = 1;
	7493	utf8_p = bytes_to_utf8(p, &len);
	7494	}
	7495
	7496	if (swash_fetch(sw, utf8_p, TRUE)) {
	7497	match = TRUE;
	7498	}
	7499
	7500	/* If we allocated a string above, free it */
	7501	if (! utf8_target) Safefree(utf8_p);
	7502	}
	7503	}
	7504
	7505	if (UNICODE_IS_SUPER(c)
	7506	&& (flags & ANYOF_WARN_SUPER)
	7507	&& ckWARN_d(WARN_NON_UNICODE))
	7508	{
	7509	Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
	7510	"Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", c);
	7511	}
	7512	}
	7513
	7514	/* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
	7515	return cBOOL(flags & ANYOF_INVERT) ^ match;
	7516	}
	7517
	7518	STATIC U8 *
	7519	S_reghop3(U8 s, I32 off, const U8 lim)
	7520	{
	7521	/* return the position 'off' UTF-8 characters away from 's', forward if
	7522	* 'off' >= 0, backwards if negative. But don't go outside of position
	7523	* 'lim', which better be < s if off < 0 */
	7524
	7525	dVAR;
	7526
	7527	PERL_ARGS_ASSERT_REGHOP3;
	7528
	7529	if (off >= 0) {
	7530	while (off-- && s < lim) {
	7531	/* XXX could check well-formedness here */
	7532	s += UTF8SKIP(s);
	7533	}
	7534	}
	7535	else {
	7536	while (off++ && s > lim) {
	7537	s--;
	7538	if (UTF8_IS_CONTINUED(*s)) {
	7539	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7540	s--;
	7541	}
	7542	/* XXX could check well-formedness here */
	7543	}
	7544	}
	7545	return s;
	7546	}
	7547
	7548	#ifdef XXX_dmq
	7549	/* there are a bunch of places where we use two reghop3's that should
	7550	be replaced with this routine. but since thats not done yet
	7551	we ifdef it out - dmq
	7552	*/
	7553	STATIC U8 *
	7554	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	7555	{
	7556	dVAR;
	7557
	7558	PERL_ARGS_ASSERT_REGHOP4;
	7559
	7560	if (off >= 0) {
	7561	while (off-- && s < rlim) {
	7562	/* XXX could check well-formedness here */
	7563	s += UTF8SKIP(s);
	7564	}
	7565	}
	7566	else {
	7567	while (off++ && s > llim) {
	7568	s--;
	7569	if (UTF8_IS_CONTINUED(*s)) {
	7570	while (s > llim && UTF8_IS_CONTINUATION(*s))
	7571	s--;
	7572	}
	7573	/* XXX could check well-formedness here */
	7574	}
	7575	}
	7576	return s;
	7577	}
	7578	#endif
	7579
	7580	STATIC U8 *
	7581	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	7582	{
	7583	dVAR;
	7584
	7585	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	7586
	7587	if (off >= 0) {
	7588	while (off-- && s < lim) {
	7589	/* XXX could check well-formedness here */
	7590	s += UTF8SKIP(s);
	7591	}
	7592	if (off >= 0)
	7593	return NULL;
	7594	}
	7595	else {
	7596	while (off++ && s > lim) {
	7597	s--;
	7598	if (UTF8_IS_CONTINUED(*s)) {
	7599	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7600	s--;
	7601	}
	7602	/* XXX could check well-formedness here */
	7603	}
	7604	if (off <= 0)
	7605	return NULL;
	7606	}
	7607	return s;
	7608	}
	7609
	7610	static void
	7611	restore_pos(pTHX_ void *arg)
	7612	{
	7613	dVAR;
	7614	regexp * const rex = (regexp *)arg;
	7615	if (PL_reg_state.re_state_eval_setup_done) {
	7616	if (PL_reg_oldsaved) {
	7617	rex->subbeg = PL_reg_oldsaved;
	7618	rex->sublen = PL_reg_oldsavedlen;
	7619	rex->suboffset = PL_reg_oldsavedoffset;
	7620	rex->subcoffset = PL_reg_oldsavedcoffset;
	7621	#ifdef PERL_OLD_COPY_ON_WRITE
	7622	rex->saved_copy = PL_nrs;
	7623	#endif
	7624	RXp_MATCH_COPIED_on(rex);
	7625	}
	7626	PL_reg_magic->mg_len = PL_reg_oldpos;
	7627	PL_reg_state.re_state_eval_setup_done = FALSE;
	7628	PL_curpm = PL_reg_oldcurpm;
	7629	}
	7630	}
	7631
	7632	STATIC void
	7633	S_to_utf8_substr(pTHX_ register regexp *prog)
	7634	{
	7635	/* Converts substr fields in prog from bytes to UTF-8, calling fbm_compile
	7636	* on the converted value */
	7637
	7638	int i = 1;
	7639
	7640	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	7641
	7642	do {
	7643	if (prog->substrs->data[i].substr
	7644	&& !prog->substrs->data[i].utf8_substr) {
	7645	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	7646	prog->substrs->data[i].utf8_substr = sv;
	7647	sv_utf8_upgrade(sv);
	7648	if (SvVALID(prog->substrs->data[i].substr)) {
	7649	if (SvTAIL(prog->substrs->data[i].substr)) {
	7650	/* Trim the trailing \n that fbm_compile added last
	7651	time. */
	7652	SvCUR_set(sv, SvCUR(sv) - 1);
	7653	/* Whilst this makes the SV technically "invalid" (as its
	7654	buffer is no longer followed by "\0") when fbm_compile()
	7655	adds the "\n" back, a "\0" is restored. */
	7656	fbm_compile(sv, FBMcf_TAIL);
	7657	} else
	7658	fbm_compile(sv, 0);
	7659	}
	7660	if (prog->substrs->data[i].substr == prog->check_substr)
	7661	prog->check_utf8 = sv;
	7662	}
	7663	} while (i--);
	7664	}
	7665
	7666	STATIC bool
	7667	S_to_byte_substr(pTHX_ register regexp *prog)
	7668	{
	7669	/* Converts substr fields in prog from UTF-8 to bytes, calling fbm_compile
	7670	* on the converted value; returns FALSE if can't be converted. */
	7671
	7672	dVAR;
	7673	int i = 1;
	7674
	7675	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	7676
	7677	do {
	7678	if (prog->substrs->data[i].utf8_substr
	7679	&& !prog->substrs->data[i].substr) {
	7680	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	7681	if (! sv_utf8_downgrade(sv, TRUE)) {
	7682	return FALSE;
	7683	}
	7684	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	7685	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	7686	/* Trim the trailing \n that fbm_compile added last
	7687	time. */
	7688	SvCUR_set(sv, SvCUR(sv) - 1);
	7689	fbm_compile(sv, FBMcf_TAIL);
	7690	} else
	7691	fbm_compile(sv, 0);
	7692	}
	7693	prog->substrs->data[i].substr = sv;
	7694	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	7695	prog->check_substr = sv;
	7696	}
	7697	} while (i--);
	7698
	7699	return TRUE;
	7700	}
	7701
	7702	/* These constants are for finding GCB=LV and GCB=LVT. These are for the
	7703	* pre-composed Hangul syllables, which are all in a contiguous block and
	7704	* arranged there in such a way so as to facilitate alorithmic determination of
	7705	* their characteristics. As such, they don't need a swash, but can be
	7706	* determined by simple arithmetic. Almost all are GCB=LVT, but every 28th one
	7707	* is a GCB=LV */
	7708	#define SBASE 0xAC00 /* Start of block */
	7709	#define SCount 11172 /* Length of block */
	7710	#define TCount 28
	7711
	7712	#if 0 /* This routine is not currently used */
	7713	PERL_STATIC_INLINE bool
	7714	S_is_utf8_X_LV(pTHX_ const U8 *p)
	7715	{
	7716	/* Unlike most other similarly named routines here, this does not create a
	7717	* swash, so swash_fetch() cannot be used on PL_utf8_X_LV. */
	7718
	7719	dVAR;
	7720
	7721	UV cp = valid_utf8_to_uvchr(p, NULL);
	7722
	7723	PERL_ARGS_ASSERT_IS_UTF8_X_LV;
	7724
	7725	/* The earliest Unicode releases did not have these precomposed Hangul
	7726	* syllables. Set to point to undef in that case, so will return false on
	7727	* every call */
	7728	if (! PL_utf8_X_LV) { /* Set up if this is the first time called */
	7729	PL_utf8_X_LV = swash_init("utf8", "_X_GCB_LV", &PL_sv_undef, 1, 0);
	7730	if (_invlist_len(_get_swash_invlist(PL_utf8_X_LV)) == 0) {
	7731	SvREFCNT_dec(PL_utf8_X_LV);
	7732	PL_utf8_X_LV = &PL_sv_undef;
	7733	}
	7734	}
	7735
	7736	return (PL_utf8_X_LV != &PL_sv_undef
	7737	&& cp >= SBASE && cp < SBASE + SCount
	7738	&& (cp - SBASE) % TCount == 0); /* Only every TCount one is LV */
	7739	}
	7740	#endif
	7741
	7742	PERL_STATIC_INLINE bool
	7743	S_is_utf8_X_LVT(pTHX_ const U8 *p)
	7744	{
	7745	/* Unlike most other similarly named routines here, this does not create a
	7746	* swash, so swash_fetch() cannot be used on PL_utf8_X_LVT. */
	7747
	7748	dVAR;
	7749
	7750	UV cp = valid_utf8_to_uvchr(p, NULL);
	7751
	7752	PERL_ARGS_ASSERT_IS_UTF8_X_LVT;
	7753
	7754	/* The earliest Unicode releases did not have these precomposed Hangul
	7755	* syllables. Set to point to undef in that case, so will return false on
	7756	* every call */
	7757	if (! PL_utf8_X_LVT) { /* Set up if this is the first time called */
	7758	PL_utf8_X_LVT = swash_init("utf8", "_X_GCB_LVT", &PL_sv_undef, 1, 0);
	7759	if (_invlist_len(_get_swash_invlist(PL_utf8_X_LVT)) == 0) {
	7760	SvREFCNT_dec(PL_utf8_X_LVT);
	7761	PL_utf8_X_LVT = &PL_sv_undef;
	7762	}
	7763	}
	7764
	7765	return (PL_utf8_X_LVT != &PL_sv_undef
	7766	&& cp >= SBASE && cp < SBASE + SCount
	7767	&& (cp - SBASE) % TCount != 0); /* All but every TCount one is LV */
	7768	}
	7769
	7770	/*
	7771	* Local variables:
	7772	* c-indentation-style: bsd
	7773	* c-basic-offset: 4
	7774	* indent-tabs-mode: nil
	7775	* End:
	7776	*
	7777	* ex: set ts=8 sts=4 sw=4 et:
	7778	*/