perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/* At least one required character in the target string is expressible only in
	41	* UTF-8. */
	42	static const char* const non_utf8_target_but_utf8_required
	43	= "Can't match, because target string needs to be in UTF-8\n";
	44
	45	#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
	46	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
	47	goto target; \
	48	} STMT_END
	49
	50	/*
	51	* pregcomp and pregexec -- regsub and regerror are not used in perl
	52	*
	53	* Copyright (c) 1986 by University of Toronto.
	54	* Written by Henry Spencer. Not derived from licensed software.
	55	*
	56	* Permission is granted to anyone to use this software for any
	57	* purpose on any computer system, and to redistribute it freely,
	58	* subject to the following restrictions:
	59	*
	60	* 1. The author is not responsible for the consequences of use of
	61	* this software, no matter how awful, even if they arise
	62	* from defects in it.
	63	*
	64	* 2. The origin of this software must not be misrepresented, either
	65	* by explicit claim or by omission.
	66	*
	67	* 3. Altered versions must be plainly marked as such, and must not
	68	* be misrepresented as being the original software.
	69	*
	70	**** Alterations to Henry's code are...
	71	****
	72	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	73	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	74	**** by Larry Wall and others
	75	****
	76	**** You may distribute under the terms of either the GNU General Public
	77	**** License or the Artistic License, as specified in the README file.
	78	*
	79	* Beware that some of this code is subtly aware of the way operator
	80	* precedence is structured in regular expressions. Serious changes in
	81	* regular-expression syntax might require a total rethink.
	82	*/
	83	#include "EXTERN.h"
	84	#define PERL_IN_REGEXEC_C
	85	#include "perl.h"
	86
	87	#ifdef PERL_IN_XSUB_RE
	88	# include "re_comp.h"
	89	#else
	90	# include "regcomp.h"
	91	#endif
	92
	93	#include "inline_invlist.c"
	94	#include "unicode_constants.h"
	95
	96	#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
	97
	98	#ifndef STATIC
	99	#define STATIC static
	100	#endif
	101
	102	/* Valid for non-utf8 strings: avoids the reginclass
	103	* call if there are no complications: i.e., if everything matchable is
	104	* straight forward in the bitmap */
	105	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0) \
	106	: ANYOF_BITMAP_TEST(p,*(c)))
	107
	108	/*
	109	* Forwards.
	110	*/
	111
	112	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	113	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	114
	115	#define HOPc(pos,off) \
	116	(char *)(PL_reg_match_utf8 \
	117	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	118	: (U8*)(pos + off))
	119	#define HOPBACKc(pos, off) \
	120	(char*)(PL_reg_match_utf8\
	121	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	122	: (pos - off >= PL_bostr) \
	123	? (U8*)pos - off \
	124	: NULL)
	125
	126	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	127	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	128
	129
	130	#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
	131	#define NEXTCHR_IS_EOS (nextchr < 0)
	132
	133	#define SET_nextchr \
	134	nextchr = ((locinput < PL_regeol) ? UCHARAT(locinput) : NEXTCHR_EOS)
	135
	136	#define SET_locinput(p) \
	137	locinput = (p); \
	138	SET_nextchr
	139
	140
	141	#define LOAD_UTF8_CHARCLASS(swash_ptr, property_name) STMT_START { \
	142	if (!swash_ptr) { \
	143	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; \
	144	swash_ptr = _core_swash_init("utf8", property_name, &PL_sv_undef, \
	145	1, 0, NULL, &flags); \
	146	assert(swash_ptr); \
	147	} \
	148	} STMT_END
	149
	150	/* If in debug mode, we test that a known character properly matches */
	151	#ifdef DEBUGGING
	152	# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
	153	property_name, \
	154	utf8_char_in_property) \
	155	LOAD_UTF8_CHARCLASS(swash_ptr, property_name); \
	156	assert(swash_fetch(swash_ptr, (U8 *) utf8_char_in_property, TRUE));
	157	#else
	158	# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
	159	property_name, \
	160	utf8_char_in_property) \
	161	LOAD_UTF8_CHARCLASS(swash_ptr, property_name)
	162	#endif
	163
	164	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS_DEBUG_TEST( \
	165	PL_utf8_swash_ptrs[_CC_WORDCHAR], \
	166	swash_property_names[_CC_WORDCHAR], \
	167	GREEK_SMALL_LETTER_IOTA_UTF8)
	168
	169	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	170	STMT_START { \
	171	LOAD_UTF8_CHARCLASS_DEBUG_TEST(PL_utf8_X_regular_begin, \
	172	"_X_regular_begin", \
	173	GREEK_SMALL_LETTER_IOTA_UTF8); \
	174	LOAD_UTF8_CHARCLASS_DEBUG_TEST(PL_utf8_X_extend, \
	175	"_X_extend", \
	176	COMBINING_GRAVE_ACCENT_UTF8); \
	177	} STMT_END
	178
	179	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	180	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	181
	182	/* for use after a quantifier and before an EXACT-like node -- japhy */
	183	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	184	*
	185	* NOTE that nothing that affects backtracking should be in here, specifically
	186	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	187	* node that is in between two EXACT like nodes when ascertaining what the required
	188	* "follow" character is. This should probably be moved to regex compile time
	189	* although it may be done at run time beause of the REF possibility - more
	190	* investigation required. -- demerphq
	191	*/
	192	#define JUMPABLE(rn) ( \
	193	OP(rn) == OPEN \|\| \
	194	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	195	OP(rn) == EVAL \|\| \
	196	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	197	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	198	OP(rn) == KEEPS \|\| \
	199	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	200	)
	201	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	202
	203	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	204
	205	#if 0
	206	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	207	we don't need this definition. */
	208	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	209	#define IS_TEXTF(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTF \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	210	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	211
	212	#else
	213	/* ... so we use this as its faster. */
	214	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	215	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFU_TRICKYFOLD \|\| OP(rn) == EXACTFA)
	216	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	217	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	218
	219	#endif
	220
	221	/*
	222	Search for mandatory following text node; for lookahead, the text must
	223	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	224	*/
	225	#define FIND_NEXT_IMPT(rn) STMT_START { \
	226	while (JUMPABLE(rn)) { \
	227	const OPCODE type = OP(rn); \
	228	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	229	rn = NEXTOPER(NEXTOPER(rn)); \
	230	else if (type == PLUS) \
	231	rn = NEXTOPER(rn); \
	232	else if (type == IFMATCH) \
	233	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	234	else rn += NEXT_OFF(rn); \
	235	} \
	236	} STMT_END
	237
	238	/* These constants are for finding GCB=LV and GCB=LVT in the CLUMP regnode.
	239	* These are for the pre-composed Hangul syllables, which are all in a
	240	* contiguous block and arranged there in such a way so as to facilitate
	241	* alorithmic determination of their characteristics. As such, they don't need
	242	* a swash, but can be determined by simple arithmetic. Almost all are
	243	* GCB=LVT, but every 28th one is a GCB=LV */
	244	#define SBASE 0xAC00 /* Start of block */
	245	#define SCount 11172 /* Length of block */
	246	#define TCount 28
	247
	248	static void restore_pos(pTHX_ void *arg);
	249
	250	#define REGCP_PAREN_ELEMS 3
	251	#define REGCP_OTHER_ELEMS 3
	252	#define REGCP_FRAME_ELEMS 1
	253	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	254	* are needed for the regexp context stack bookkeeping. */
	255
	256	STATIC CHECKPOINT
	257	S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen)
	258	{
	259	dVAR;
	260	const int retval = PL_savestack_ix;
	261	const int paren_elems_to_push =
	262	(maxopenparen - parenfloor) * REGCP_PAREN_ELEMS;
	263	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	264	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	265	I32 p;
	266	GET_RE_DEBUG_FLAGS_DECL;
	267
	268	PERL_ARGS_ASSERT_REGCPPUSH;
	269
	270	if (paren_elems_to_push < 0)
	271	Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0",
	272	paren_elems_to_push);
	273
	274	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	275	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	276	" out of range (%lu-%ld)",
	277	total_elems,
	278	(unsigned long)maxopenparen,
	279	(long)parenfloor);
	280
	281	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	282
	283	DEBUG_BUFFERS_r(
	284	if ((int)maxopenparen > (int)parenfloor)
	285	PerlIO_printf(Perl_debug_log,
	286	"rex=0x%"UVxf" offs=0x%"UVxf": saving capture indices:\n",
	287	PTR2UV(rex),
	288	PTR2UV(rex->offs)
	289	);
	290	);
	291	for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
	292	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	293	SSPUSHINT(rex->offs[p].end);
	294	SSPUSHINT(rex->offs[p].start);
	295	SSPUSHINT(rex->offs[p].start_tmp);
	296	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	297	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
	298	(UV)p,
	299	(IV)rex->offs[p].start,
	300	(IV)rex->offs[p].start_tmp,
	301	(IV)rex->offs[p].end
	302	));
	303	}
	304	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	305	SSPUSHINT(maxopenparen);
	306	SSPUSHINT(rex->lastparen);
	307	SSPUSHINT(rex->lastcloseparen);
	308	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	309
	310	return retval;
	311	}
	312
	313	/* These are needed since we do not localize EVAL nodes: */
	314	#define REGCP_SET(cp) \
	315	DEBUG_STATE_r( \
	316	PerlIO_printf(Perl_debug_log, \
	317	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	318	(IV)PL_savestack_ix)); \
	319	cp = PL_savestack_ix
	320
	321	#define REGCP_UNWIND(cp) \
	322	DEBUG_STATE_r( \
	323	if (cp != PL_savestack_ix) \
	324	PerlIO_printf(Perl_debug_log, \
	325	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	326	(IV)(cp), (IV)PL_savestack_ix)); \
	327	regcpblow(cp)
	328
	329	#define UNWIND_PAREN(lp, lcp) \
	330	for (n = rex->lastparen; n > lp; n--) \
	331	rex->offs[n].end = -1; \
	332	rex->lastparen = n; \
	333	rex->lastcloseparen = lcp;
	334
	335
	336	STATIC void
	337	S_regcppop(pTHX_ regexp rex, U32 maxopenparen_p)
	338	{
	339	dVAR;
	340	UV i;
	341	U32 paren;
	342	GET_RE_DEBUG_FLAGS_DECL;
	343
	344	PERL_ARGS_ASSERT_REGCPPOP;
	345
	346	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	347	i = SSPOPUV;
	348	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	349	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	350	rex->lastcloseparen = SSPOPINT;
	351	rex->lastparen = SSPOPINT;
	352	*maxopenparen_p = SSPOPINT;
	353
	354	i -= REGCP_OTHER_ELEMS;
	355	/* Now restore the parentheses context. */
	356	DEBUG_BUFFERS_r(
	357	if (i \|\| rex->lastparen + 1 <= rex->nparens)
	358	PerlIO_printf(Perl_debug_log,
	359	"rex=0x%"UVxf" offs=0x%"UVxf": restoring capture indices to:\n",
	360	PTR2UV(rex),
	361	PTR2UV(rex->offs)
	362	);
	363	);
	364	paren = *maxopenparen_p;
	365	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	366	I32 tmps;
	367	rex->offs[paren].start_tmp = SSPOPINT;
	368	rex->offs[paren].start = SSPOPINT;
	369	tmps = SSPOPINT;
	370	if (paren <= rex->lastparen)
	371	rex->offs[paren].end = tmps;
	372	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	373	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	374	(UV)paren,
	375	(IV)rex->offs[paren].start,
	376	(IV)rex->offs[paren].start_tmp,
	377	(IV)rex->offs[paren].end,
	378	(paren > rex->lastparen ? "(skipped)" : ""));
	379	);
	380	paren--;
	381	}
	382	#if 1
	383	/* It would seem that the similar code in regtry()
	384	* already takes care of this, and in fact it is in
	385	* a better location to since this code can #if 0-ed out
	386	* but the code in regtry() is needed or otherwise tests
	387	* requiring null fields (pat.t#187 and split.t#{13,14}
	388	* (as of patchlevel 7877) will fail. Then again,
	389	* this code seems to be necessary or otherwise
	390	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	391	* --jhi updated by dapm */
	392	for (i = rex->lastparen + 1; i <= rex->nparens; i++) {
	393	if (i > *maxopenparen_p)
	394	rex->offs[i].start = -1;
	395	rex->offs[i].end = -1;
	396	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	397	" \\%"UVuf": %s ..-1 undeffing\n",
	398	(UV)i,
	399	(i > *maxopenparen_p) ? "-1" : " "
	400	));
	401	}
	402	#endif
	403	}
	404
	405	/* restore the parens and associated vars at savestack position ix,
	406	* but without popping the stack */
	407
	408	STATIC void
	409	S_regcp_restore(pTHX_ regexp rex, I32 ix, U32 maxopenparen_p)
	410	{
	411	I32 tmpix = PL_savestack_ix;
	412	PL_savestack_ix = ix;
	413	regcppop(rex, maxopenparen_p);
	414	PL_savestack_ix = tmpix;
	415	}
	416
	417	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	418
	419	STATIC bool
	420	S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
	421	{
	422	/* Returns a boolean as to whether or not 'character' is a member of the
	423	* Posix character class given by 'classnum' that should be equivalent to a
	424	* value in the typedef '_char_class_number'.
	425	*
	426	* Ideally this could be replaced by a just an array of function pointers
	427	* to the C library functions that implement the macros this calls.
	428	* However, to compile, the precise function signatures are required, and
	429	* these may vary from platform to to platform. To avoid having to figure
	430	* out what those all are on each platform, I (khw) am using this method,
	431	* which adds an extra layer of function call overhead (unless the C
	432	* optimizer strips it away). But we don't particularly care about
	433	* performance with locales anyway. */
	434
	435	switch ((_char_class_number) classnum) {
	436	case _CC_ENUM_ALPHANUMERIC: return isALPHANUMERIC_LC(character);
	437	case _CC_ENUM_ALPHA: return isALPHA_LC(character);
	438	case _CC_ENUM_ASCII: return isASCII_LC(character);
	439	case _CC_ENUM_BLANK: return isBLANK_LC(character);
	440	case _CC_ENUM_CASED: return isLOWER_LC(character)
	441	\|\| isUPPER_LC(character);
	442	case _CC_ENUM_CNTRL: return isCNTRL_LC(character);
	443	case _CC_ENUM_DIGIT: return isDIGIT_LC(character);
	444	case _CC_ENUM_GRAPH: return isGRAPH_LC(character);
	445	case _CC_ENUM_LOWER: return isLOWER_LC(character);
	446	case _CC_ENUM_PRINT: return isPRINT_LC(character);
	447	case _CC_ENUM_PSXSPC: return isPSXSPC_LC(character);
	448	case _CC_ENUM_PUNCT: return isPUNCT_LC(character);
	449	case _CC_ENUM_SPACE: return isSPACE_LC(character);
	450	case _CC_ENUM_UPPER: return isUPPER_LC(character);
	451	case _CC_ENUM_WORDCHAR: return isWORDCHAR_LC(character);
	452	case _CC_ENUM_XDIGIT: return isXDIGIT_LC(character);
	453	default: /* VERTSPACE should never occur in locales */
	454	Perl_croak(aTHX_ "panic: isFOO_lc() has an unexpected character class '%d'", classnum);
	455	}
	456
	457	assert(0); /* NOTREACHED */
	458	return FALSE;
	459	}
	460
	461	STATIC bool
	462	S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
	463	{
	464	/* Returns a boolean as to whether or not the (well-formed) UTF-8-encoded
	465	* 'character' is a member of the Posix character class given by 'classnum'
	466	* that should be equivalent to a value in the typedef
	467	* '_char_class_number'.
	468	*
	469	* This just calls isFOO_lc on the code point for the character if it is in
	470	* the range 0-255. Outside that range, all characters avoid Unicode
	471	* rules, ignoring any locale. So use the Unicode function if this class
	472	* requires a swash, and use the Unicode macro otherwise. */
	473
	474	PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
	475
	476	if (UTF8_IS_INVARIANT(*character)) {
	477	return isFOO_lc(classnum, *character);
	478	}
	479	else if (UTF8_IS_DOWNGRADEABLE_START(*character)) {
	480	return isFOO_lc(classnum,
	481	TWO_BYTE_UTF8_TO_UNI(character, (character + 1)));
	482	}
	483
	484	if (classnum < _FIRST_NON_SWASH_CC) {
	485
	486	/* Initialize the swash unless done already */
	487	if (! PL_utf8_swash_ptrs[classnum]) {
	488	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	489	PL_utf8_swash_ptrs[classnum] = _core_swash_init("utf8",
	490	swash_property_names[classnum], &PL_sv_undef, 1, 0, NULL, &flags);
	491	}
	492
	493	return cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum], (U8 *)
	494	character,
	495	TRUE /* is UTF */ ));
	496	}
	497
	498	switch ((_char_class_number) classnum) {
	499	case _CC_ENUM_SPACE:
	500	case _CC_ENUM_PSXSPC: return is_XPERLSPACE_high(character);
	501
	502	case _CC_ENUM_BLANK: return is_HORIZWS_high(character);
	503	case _CC_ENUM_XDIGIT: return is_XDIGIT_high(character);
	504	case _CC_ENUM_VERTSPACE: return is_VERTWS_high(character);
	505	default: return 0; /* Things like CNTRL are always
	506	below 256 */
	507	}
	508
	509	assert(0); /* NOTREACHED */
	510	return FALSE;
	511	}
	512
	513	/*
	514	* pregexec and friends
	515	*/
	516
	517	#ifndef PERL_IN_XSUB_RE
	518	/*
	519	- pregexec - match a regexp against a string
	520	*/
	521	I32
	522	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
	523	char strbeg, I32 minend, SV screamer, U32 nosave)
	524	/* stringarg: the point in the string at which to begin matching */
	525	/* strend: pointer to null at end of string */
	526	/* strbeg: real beginning of string */
	527	/* minend: end of match must be >= minend bytes after stringarg. */
	528	/* screamer: SV being matched: only used for utf8 flag, pos() etc; string
	529	* itself is accessed via the pointers above */
	530	/* nosave: For optimizations. */
	531	{
	532	PERL_ARGS_ASSERT_PREGEXEC;
	533
	534	return
	535	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	536	nosave ? 0 : REXEC_COPY_STR);
	537	}
	538	#endif
	539
	540	/*
	541	* Need to implement the following flags for reg_anch:
	542	*
	543	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	544	* USE_INTUIT_ML
	545	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	546	* INTUIT_AUTORITATIVE_ML
	547	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	548	* INTUIT_ONCE_ML
	549	*
	550	* Another flag for this function: SECOND_TIME (so that float substrs
	551	* with giant delta may be not rechecked).
	552	*/
	553
	554	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	555
	556	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	557	Otherwise, only SvCUR(sv) is used to get strbeg. */
	558
	559	/* XXXX We assume that strpos is strbeg unless sv. */
	560
	561	/* XXXX Some places assume that there is a fixed substring.
	562	An update may be needed if optimizer marks as "INTUITable"
	563	RExen without fixed substrings. Similarly, it is assumed that
	564	lengths of all the strings are no more than minlen, thus they
	565	cannot come from lookahead.
	566	(Or minlen should take into account lookahead.)
	567	NOTE: Some of this comment is not correct. minlen does now take account
	568	of lookahead/behind. Further research is required. -- demerphq
	569
	570	*/
	571
	572	/* A failure to find a constant substring means that there is no need to make
	573	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	574	finding a substring too deep into the string means that fewer calls to
	575	regtry() should be needed.
	576
	577	REx compiler's optimizer found 4 possible hints:
	578	a) Anchored substring;
	579	b) Fixed substring;
	580	c) Whether we are anchored (beginning-of-line or \G);
	581	d) First node (of those at offset 0) which may distinguish positions;
	582	We use a)b)d) and multiline-part of c), and try to find a position in the
	583	string which does not contradict any of them.
	584	*/
	585
	586	/* Most of decisions we do here should have been done at compile time.
	587	The nodes of the REx which we used for the search should have been
	588	deleted from the finite automaton. */
	589
	590	char *
	591	Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV sv, char strpos,
	592	char strend, const U32 flags, re_scream_pos_data data)
	593	{
	594	dVAR;
	595	struct regexp *const prog = ReANY(rx);
	596	I32 start_shift = 0;
	597	/* Should be nonnegative! */
	598	I32 end_shift = 0;
	599	char *s;
	600	SV *check;
	601	char *strbeg;
	602	char *t;
	603	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	604	I32 ml_anch;
	605	char other_last = NULL; / other substr checked before this */
	606	char check_at = NULL; / check substr found at this pos */
	607	char checked_upto = NULL; / how far into the string we have already checked using find_byclass*/
	608	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	609	RXi_GET_DECL(prog,progi);
	610	bool is_utf8_pat;
	611	#ifdef DEBUGGING
	612	const char * const i_strpos = strpos;
	613	#endif
	614	GET_RE_DEBUG_FLAGS_DECL;
	615
	616	PERL_ARGS_ASSERT_RE_INTUIT_START;
	617	PERL_UNUSED_ARG(flags);
	618	PERL_UNUSED_ARG(data);
	619
	620	RX_MATCH_UTF8_set(rx,utf8_target);
	621
	622	is_utf8_pat = cBOOL(RX_UTF8(rx));
	623
	624	DEBUG_EXECUTE_r(
	625	debug_start_match(rx, utf8_target, strpos, strend,
	626	sv ? "Guessing start of match in sv for"
	627	: "Guessing start of match in string for");
	628	);
	629
	630	/* CHR_DIST() would be more correct here but it makes things slow. */
	631	if (prog->minlen > strend - strpos) {
	632	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	633	"String too short... [re_intuit_start]\n"));
	634	goto fail;
	635	}
	636
	637	/* XXX we need to pass strbeg as a separate arg: the following is
	638	* guesswork and can be wrong... */
	639	if (sv && SvPOK(sv)) {
	640	char * p = SvPVX(sv);
	641	STRLEN cur = SvCUR(sv);
	642	if (p <= strpos && strpos < p + cur) {
	643	strbeg = p;
	644	assert(p <= strend && strend <= p + cur);
	645	}
	646	else
	647	strbeg = strend - cur;
	648	}
	649	else
	650	strbeg = strpos;
	651
	652	PL_regeol = strend;
	653	if (utf8_target) {
	654	if (!prog->check_utf8 && prog->check_substr)
	655	to_utf8_substr(prog);
	656	check = prog->check_utf8;
	657	} else {
	658	if (!prog->check_substr && prog->check_utf8) {
	659	if (! to_byte_substr(prog)) {
	660	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
	661	}
	662	}
	663	check = prog->check_substr;
	664	}
	665	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	666	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	667	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	668	&& !multiline ) ); /* Check after \n? */
	669
	670	if (!ml_anch) {
	671	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	672	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	673	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	674	&& sv && !SvROK(sv)
	675	&& (strpos != strbeg)) {
	676	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	677	goto fail;
	678	}
	679	if (prog->check_offset_min == prog->check_offset_max
	680	&& !(prog->extflags & RXf_CANY_SEEN)
	681	&& ! multiline) /* /m can cause \n's to match that aren't
	682	accounted for in the string max length.
	683	See [perl #115242] */
	684	{
	685	/* Substring at constant offset from beg-of-str... */
	686	I32 slen;
	687
	688	s = HOP3c(strpos, prog->check_offset_min, strend);
	689
	690	if (SvTAIL(check)) {
	691	slen = SvCUR(check); /* >= 1 */
	692
	693	if ( strend - s > slen \|\| strend - s < slen - 1
	694	\|\| (strend - s == slen && strend[-1] != '\n')) {
	695	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	696	goto fail_finish;
	697	}
	698	/* Now should match s[0..slen-2] */
	699	slen--;
	700	if (slen && (SvPVX_const(check) != s
	701	\|\| (slen > 1
	702	&& memNE(SvPVX_const(check), s, slen)))) {
	703	report_neq:
	704	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	705	goto fail_finish;
	706	}
	707	}
	708	else if (SvPVX_const(check) != s
	709	\|\| ((slen = SvCUR(check)) > 1
	710	&& memNE(SvPVX_const(check), s, slen)))
	711	goto report_neq;
	712	check_at = s;
	713	goto success_at_start;
	714	}
	715	}
	716	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	717	s = strpos;
	718	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	719	end_shift = prog->check_end_shift;
	720
	721	if (!ml_anch) {
	722	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	723	- (SvTAIL(check) != 0);
	724	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	725
	726	if (end_shift < eshift)
	727	end_shift = eshift;
	728	}
	729	}
	730	else { /* Can match at random position */
	731	ml_anch = 0;
	732	s = strpos;
	733	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	734	end_shift = prog->check_end_shift;
	735
	736	/* end shift should be non negative here */
	737	}
	738
	739	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	740	if (end_shift < 0)
	741	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	742	(IV)end_shift, RX_PRECOMP(prog));
	743	#endif
	744
	745	restart:
	746	/* Find a possible match in the region s..strend by looking for
	747	the "check" substring in the region corrected by start/end_shift. */
	748
	749	{
	750	I32 srch_start_shift = start_shift;
	751	I32 srch_end_shift = end_shift;
	752	U8* start_point;
	753	U8* end_point;
	754	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	755	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	756	srch_start_shift = strbeg - s;
	757	}
	758	DEBUG_OPTIMISE_MORE_r({
	759	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	760	(IV)prog->check_offset_min,
	761	(IV)srch_start_shift,
	762	(IV)srch_end_shift,
	763	(IV)prog->check_end_shift);
	764	});
	765
	766	if (prog->extflags & RXf_CANY_SEEN) {
	767	start_point= (U8*)(s + srch_start_shift);
	768	end_point= (U8*)(strend - srch_end_shift);
	769	} else {
	770	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	771	end_point= HOP3(strend, -srch_end_shift, strbeg);
	772	}
	773	DEBUG_OPTIMISE_MORE_r({
	774	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	775	(int)(end_point - start_point),
	776	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	777	start_point);
	778	});
	779
	780	s = fbm_instr( start_point, end_point,
	781	check, multiline ? FBMrf_MULTILINE : 0);
	782	}
	783	/* Update the count-of-usability, remove useless subpatterns,
	784	unshift s. */
	785
	786	DEBUG_EXECUTE_r({
	787	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	788	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	789	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	790	(s ? "Found" : "Did not find"),
	791	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	792	? "anchored" : "floating"),
	793	quoted,
	794	RE_SV_TAIL(check),
	795	(s ? " at offset " : "...\n") );
	796	});
	797
	798	if (!s)
	799	goto fail_finish;
	800	/* Finish the diagnostic message */
	801	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	802
	803	/* XXX dmq: first branch is for positive lookbehind...
	804	Our check string is offset from the beginning of the pattern.
	805	So we need to do any stclass tests offset forward from that
	806	point. I think. :-(
	807	*/
	808
	809
	810
	811	check_at=s;
	812
	813
	814	/* Got a candidate. Check MBOL anchoring, and the other substr.
	815	Start with the other substr.
	816	XXXX no SCREAM optimization yet - and a very coarse implementation
	817	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	818	always match. Probably should be marked during compile...
	819	Probably it is right to do no SCREAM here...
	820	*/
	821
	822	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	823	: (prog->float_substr && prog->anchored_substr))
	824	{
	825	/* Take into account the "other" substring. */
	826	/* XXXX May be hopelessly wrong for UTF... */
	827	if (!other_last)
	828	other_last = strpos;
	829	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	830	do_other_anchored:
	831	{
	832	char * const last = HOP3c(s, -start_shift, strbeg);
	833	char last1, last2;
	834	char * const saved_s = s;
	835	SV* must;
	836
	837	t = s - prog->check_offset_max;
	838	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	839	&& (!utf8_target
	840	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	841	&& t > strpos)))
	842	NOOP;
	843	else
	844	t = strpos;
	845	t = HOP3c(t, prog->anchored_offset, strend);
	846	if (t < other_last) /* These positions already checked */
	847	t = other_last;
	848	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	849	if (last < last1)
	850	last1 = last;
	851	/* XXXX It is not documented what units *_offsets are in.
	852	We assume bytes, but this is clearly wrong.
	853	Meaning this code needs to be carefully reviewed for errors.
	854	dmq.
	855	*/
	856
	857	/* On end-of-str: see comment below. */
	858	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	859	if (must == &PL_sv_undef) {
	860	s = (char*)NULL;
	861	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	862	}
	863	else
	864	s = fbm_instr(
	865	(unsigned char*)t,
	866	HOP3(HOP3(last1, prog->anchored_offset, strend)
	867	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	868	must,
	869	multiline ? FBMrf_MULTILINE : 0
	870	);
	871	DEBUG_EXECUTE_r({
	872	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	873	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	874	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	875	(s ? "Found" : "Contradicts"),
	876	quoted, RE_SV_TAIL(must));
	877	});
	878
	879
	880	if (!s) {
	881	if (last1 >= last2) {
	882	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	883	", giving up...\n"));
	884	goto fail_finish;
	885	}
	886	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	887	", trying floating at offset %ld...\n",
	888	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	889	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	890	s = HOP3c(last, 1, strend);
	891	goto restart;
	892	}
	893	else {
	894	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	895	(long)(s - i_strpos)));
	896	t = HOP3c(s, -prog->anchored_offset, strbeg);
	897	other_last = HOP3c(s, 1, strend);
	898	s = saved_s;
	899	if (t == strpos)
	900	goto try_at_start;
	901	goto try_at_offset;
	902	}
	903	}
	904	}
	905	else { /* Take into account the floating substring. */
	906	char last, last1;
	907	char * const saved_s = s;
	908	SV* must;
	909
	910	t = HOP3c(s, -start_shift, strbeg);
	911	last1 = last =
	912	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	913	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	914	last = HOP3c(t, prog->float_max_offset, strend);
	915	s = HOP3c(t, prog->float_min_offset, strend);
	916	if (s < other_last)
	917	s = other_last;
	918	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	919	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	920	/* fbm_instr() takes into account exact value of end-of-str
	921	if the check is SvTAIL(ed). Since false positives are OK,
	922	and end-of-str is not later than strend we are OK. */
	923	if (must == &PL_sv_undef) {
	924	s = (char*)NULL;
	925	DEBUG_r(must = prog->float_utf8); /* for debug message */
	926	}
	927	else
	928	s = fbm_instr((unsigned char*)s,
	929	(unsigned char*)last + SvCUR(must)
	930	- (SvTAIL(must)!=0),
	931	must, multiline ? FBMrf_MULTILINE : 0);
	932	DEBUG_EXECUTE_r({
	933	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	934	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	935	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	936	(s ? "Found" : "Contradicts"),
	937	quoted, RE_SV_TAIL(must));
	938	});
	939	if (!s) {
	940	if (last1 == last) {
	941	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	942	", giving up...\n"));
	943	goto fail_finish;
	944	}
	945	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	946	", trying anchored starting at offset %ld...\n",
	947	(long)(saved_s + 1 - i_strpos)));
	948	other_last = last;
	949	s = HOP3c(t, 1, strend);
	950	goto restart;
	951	}
	952	else {
	953	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	954	(long)(s - i_strpos)));
	955	other_last = s; /* Fix this later. --Hugo */
	956	s = saved_s;
	957	if (t == strpos)
	958	goto try_at_start;
	959	goto try_at_offset;
	960	}
	961	}
	962	}
	963
	964
	965	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	966
	967	DEBUG_OPTIMISE_MORE_r(
	968	PerlIO_printf(Perl_debug_log,
	969	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	970	(IV)prog->check_offset_min,
	971	(IV)prog->check_offset_max,
	972	(IV)(s-strpos),
	973	(IV)(t-strpos),
	974	(IV)(t-s),
	975	(IV)(strend-strpos)
	976	)
	977	);
	978
	979	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	980	&& (!utf8_target
	981	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	982	&& t > strpos)))
	983	{
	984	/* Fixed substring is found far enough so that the match
	985	cannot start at strpos. */
	986	try_at_offset:
	987	if (ml_anch && t[-1] != '\n') {
	988	/* Eventually fbm_*() should handle this, but often
	989	anchored_offset is not 0, so this check will not be wasted. */
	990	/* XXXX In the code below we prefer to look for "^" even in
	991	presence of anchored substrings. And we search even
	992	beyond the found float position. These pessimizations
	993	are historical artefacts only. */
	994	find_anchor:
	995	while (t < strend - prog->minlen) {
	996	if (*t == '\n') {
	997	if (t < check_at - prog->check_offset_min) {
	998	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	999	/* Since we moved from the found position,
	1000	we definitely contradict the found anchored
	1001	substr. Due to the above check we do not
	1002	contradict "check" substr.
	1003	Thus we can arrive here only if check substr
	1004	is float. Redo checking for "other"=="fixed".
	1005	*/
	1006	strpos = t + 1;
	1007	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	1008	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	1009	goto do_other_anchored;
	1010	}
	1011	/* We don't contradict the found floating substring. */
	1012	/* XXXX Why not check for STCLASS? */
	1013	s = t + 1;
	1014	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	1015	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	1016	goto set_useful;
	1017	}
	1018	/* Position contradicts check-string */
	1019	/* XXXX probably better to look for check-string
	1020	than for "\n", so one should lower the limit for t? */
	1021	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	1022	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	1023	other_last = strpos = s = t + 1;
	1024	goto restart;
	1025	}
	1026	t++;
	1027	}
	1028	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	1029	PL_colors[0], PL_colors[1]));
	1030	goto fail_finish;
	1031	}
	1032	else {
	1033	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1034	PL_colors[0], PL_colors[1]));
	1035	}
	1036	s = t;
	1037	set_useful:
	1038	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1039	}
	1040	else {
	1041	/* The found string does not prohibit matching at strpos,
	1042	- no optimization of calling REx engine can be performed,
	1043	unless it was an MBOL and we are not after MBOL,
	1044	or a future STCLASS check will fail this. */
	1045	try_at_start:
	1046	/* Even in this situation we may use MBOL flag if strpos is offset
	1047	wrt the start of the string. */
	1048	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	1049	&& (strpos != strbeg) && strpos[-1] != '\n'
	1050	/* May be due to an implicit anchor of m{.foo} /
	1051	&& !(prog->intflags & PREGf_IMPLICIT))
	1052	{
	1053	t = strpos;
	1054	goto find_anchor;
	1055	}
	1056	DEBUG_EXECUTE_r( if (ml_anch)
	1057	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1058	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1059	);
	1060	success_at_start:
	1061	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1062	&& (utf8_target ? (
	1063	prog->check_utf8 /* Could be deleted already */
	1064	&& --BmUSEFUL(prog->check_utf8) < 0
	1065	&& (prog->check_utf8 == prog->float_utf8)
	1066	) : (
	1067	prog->check_substr /* Could be deleted already */
	1068	&& --BmUSEFUL(prog->check_substr) < 0
	1069	&& (prog->check_substr == prog->float_substr)
	1070	)))
	1071	{
	1072	/* If flags & SOMETHING - do not do it many times on the same match */
	1073	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1074	/* XXX Does the destruction order has to change with utf8_target? */
	1075	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1076	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1077	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1078	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1079	check = NULL; /* abort */
	1080	s = strpos;
	1081	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1082	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1083	if (prog->intflags & PREGf_IMPLICIT)
	1084	prog->extflags &= ~RXf_ANCH_MBOL;
	1085	/* XXXX This is a remnant of the old implementation. It
	1086	looks wasteful, since now INTUIT can use many
	1087	other heuristics. */
	1088	prog->extflags &= ~RXf_USE_INTUIT;
	1089	/* XXXX What other flags might need to be cleared in this branch? */
	1090	}
	1091	else
	1092	s = strpos;
	1093	}
	1094
	1095	/* Last resort... */
	1096	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1097	/* trie stclasses are too expensive to use here, we are better off to
	1098	leave it to regmatch itself */
	1099	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1100	/* minlen == 0 is possible if regstclass is \b or \B,
	1101	and the fixed substr is ''$.
	1102	Since minlen is already taken into account, s+1 is before strend;
	1103	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1104	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1105	regstclass does not come from lookahead... */
	1106	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1107	This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
	1108	const U8* const str = (U8*)STRING(progi->regstclass);
	1109	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1110	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	1111	: 1);
	1112	char * endpos;
	1113	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1114	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1115	else if (prog->float_substr \|\| prog->float_utf8)
	1116	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1117	else
	1118	endpos= strend;
	1119
	1120	if (checked_upto < s)
	1121	checked_upto = s;
	1122	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1123	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1124
	1125	t = s;
	1126	s = find_byclass(prog, progi->regstclass, checked_upto, endpos,
	1127	NULL, is_utf8_pat);
	1128	if (s) {
	1129	checked_upto = s;
	1130	} else {
	1131	#ifdef DEBUGGING
	1132	const char *what = NULL;
	1133	#endif
	1134	if (endpos == strend) {
	1135	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1136	"Could not match STCLASS...\n") );
	1137	goto fail;
	1138	}
	1139	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1140	"This position contradicts STCLASS...\n") );
	1141	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1142	goto fail;
	1143	checked_upto = HOPBACKc(endpos, start_shift);
	1144	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1145	(IV)start_shift, (IV)(check_at - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1146	/* Contradict one of substrings */
	1147	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1148	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1149	DEBUG_EXECUTE_r( what = "anchored" );
	1150	hop_and_restart:
	1151	s = HOP3c(t, 1, strend);
	1152	if (s + start_shift + end_shift > strend) {
	1153	/* XXXX Should be taken into account earlier? */
	1154	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1155	"Could not match STCLASS...\n") );
	1156	goto fail;
	1157	}
	1158	if (!check)
	1159	goto giveup;
	1160	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1161	"Looking for %s substr starting at offset %ld...\n",
	1162	what, (long)(s + start_shift - i_strpos)) );
	1163	goto restart;
	1164	}
	1165	/* Have both, check_string is floating */
	1166	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1167	goto retry_floating_check;
	1168	/* Recheck anchored substring, but not floating... */
	1169	s = check_at;
	1170	if (!check)
	1171	goto giveup;
	1172	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1173	"Looking for anchored substr starting at offset %ld...\n",
	1174	(long)(other_last - i_strpos)) );
	1175	goto do_other_anchored;
	1176	}
	1177	/* Another way we could have checked stclass at the
	1178	current position only: */
	1179	if (ml_anch) {
	1180	s = t = t + 1;
	1181	if (!check)
	1182	goto giveup;
	1183	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1184	"Looking for /%s^%s/m starting at offset %ld...\n",
	1185	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1186	goto try_at_offset;
	1187	}
	1188	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1189	goto fail;
	1190	/* Check is floating substring. */
	1191	retry_floating_check:
	1192	t = check_at - start_shift;
	1193	DEBUG_EXECUTE_r( what = "floating" );
	1194	goto hop_and_restart;
	1195	}
	1196	if (t != s) {
	1197	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1198	"By STCLASS: moving %ld --> %ld\n",
	1199	(long)(t - i_strpos), (long)(s - i_strpos))
	1200	);
	1201	}
	1202	else {
	1203	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1204	"Does not contradict STCLASS...\n");
	1205	);
	1206	}
	1207	}
	1208	giveup:
	1209	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1210	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1211	PL_colors[5], (long)(s - i_strpos)) );
	1212	return s;
	1213
	1214	fail_finish: /* Substring not found */
	1215	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1216	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1217	fail:
	1218	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1219	PL_colors[4], PL_colors[5]));
	1220	return NULL;
	1221	}
	1222
	1223	#define DECL_TRIE_TYPE(scan) \
	1224	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	1225	trie_type = ((scan->flags == EXACT) \
	1226	? (utf8_target ? trie_utf8 : trie_plain) \
	1227	: (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
	1228
	1229	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
	1230	STMT_START { \
	1231	STRLEN skiplen; \
	1232	switch (trie_type) { \
	1233	case trie_utf8_fold: \
	1234	if ( foldlen>0 ) { \
	1235	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1236	foldlen -= len; \
	1237	uscan += len; \
	1238	len=0; \
	1239	} else { \
	1240	uvc = to_utf8_fold( (const U8*) uc, foldbuf, &foldlen ); \
	1241	len = UTF8SKIP(uc); \
	1242	skiplen = UNISKIP( uvc ); \
	1243	foldlen -= skiplen; \
	1244	uscan = foldbuf + skiplen; \
	1245	} \
	1246	break; \
	1247	case trie_latin_utf8_fold: \
	1248	if ( foldlen>0 ) { \
	1249	uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1250	foldlen -= len; \
	1251	uscan += len; \
	1252	len=0; \
	1253	} else { \
	1254	len = 1; \
	1255	uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, 1); \
	1256	skiplen = UNISKIP( uvc ); \
	1257	foldlen -= skiplen; \
	1258	uscan = foldbuf + skiplen; \
	1259	} \
	1260	break; \
	1261	case trie_utf8: \
	1262	uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
	1263	break; \
	1264	case trie_plain: \
	1265	uvc = (UV)*uc; \
	1266	len = 1; \
	1267	} \
	1268	if (uvc < 256) { \
	1269	charid = trie->charmap[ uvc ]; \
	1270	} \
	1271	else { \
	1272	charid = 0; \
	1273	if (widecharmap) { \
	1274	SV** const svpp = hv_fetch(widecharmap, \
	1275	(char*)&uvc, sizeof(UV), 0); \
	1276	if (svpp) \
	1277	charid = (U16)SvIV(*svpp); \
	1278	} \
	1279	} \
	1280	} STMT_END
	1281
	1282	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1283	STMT_START { \
	1284	while (s <= e) { \
	1285	if ( (CoNd) \
	1286	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1287	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1288	goto got_it; \
	1289	s++; \
	1290	} \
	1291	} STMT_END
	1292
	1293	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1294	STMT_START { \
	1295	while (s < strend) { \
	1296	CoDe \
	1297	s += UTF8SKIP(s); \
	1298	} \
	1299	} STMT_END
	1300
	1301	#define REXEC_FBC_SCAN(CoDe) \
	1302	STMT_START { \
	1303	while (s < strend) { \
	1304	CoDe \
	1305	s++; \
	1306	} \
	1307	} STMT_END
	1308
	1309	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1310	REXEC_FBC_UTF8_SCAN( \
	1311	if (CoNd) { \
	1312	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1313	goto got_it; \
	1314	else \
	1315	tmp = doevery; \
	1316	} \
	1317	else \
	1318	tmp = 1; \
	1319	)
	1320
	1321	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1322	REXEC_FBC_SCAN( \
	1323	if (CoNd) { \
	1324	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1325	goto got_it; \
	1326	else \
	1327	tmp = doevery; \
	1328	} \
	1329	else \
	1330	tmp = 1; \
	1331	)
	1332
	1333	#define REXEC_FBC_TRYIT \
	1334	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1335	goto got_it
	1336
	1337	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1338	if (utf8_target) { \
	1339	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1340	} \
	1341	else { \
	1342	REXEC_FBC_CLASS_SCAN(CoNd); \
	1343	}
	1344
	1345	#define DUMP_EXEC_POS(li,s,doutf8) \
	1346	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1347
	1348
	1349	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1350	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1351	tmp = TEST_NON_UTF8(tmp); \
	1352	REXEC_FBC_UTF8_SCAN( \
	1353	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1354	tmp = !tmp; \
	1355	IF_SUCCESS; \
	1356	} \
	1357	else { \
	1358	IF_FAIL; \
	1359	} \
	1360	); \
	1361
	1362	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1363	if (s == PL_bostr) { \
	1364	tmp = '\n'; \
	1365	} \
	1366	else { \
	1367	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr); \
	1368	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
	1369	} \
	1370	tmp = TeSt1_UtF8; \
	1371	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1372	REXEC_FBC_UTF8_SCAN( \
	1373	if (tmp == ! (TeSt2_UtF8)) { \
	1374	tmp = !tmp; \
	1375	IF_SUCCESS; \
	1376	} \
	1377	else { \
	1378	IF_FAIL; \
	1379	} \
	1380	); \
	1381
	1382	/* The only difference between the BOUND and NBOUND cases is that
	1383	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1384	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1385	* with the other one being empty */
	1386	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1387	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1388
	1389	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1390	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1391
	1392	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1393	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1394
	1395	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1396	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1397
	1398
	1399	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1400	* be passed in completely with the variable name being tested, which isn't
	1401	* such a clean interface, but this is easier to read than it was before. We
	1402	* are looking for the boundary (or non-boundary between a word and non-word
	1403	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1404	* must be different. Find the "wordness" of the character just prior to this
	1405	* one, and compare it with the wordness of this one. If they differ, we have
	1406	* a boundary. At the beginning of the string, pretend that the previous
	1407	* character was a new-line */
	1408	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1409	if (utf8_target) { \
	1410	UTF8_CODE \
	1411	} \
	1412	else { /* Not utf8 */ \
	1413	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1414	tmp = TEST_NON_UTF8(tmp); \
	1415	REXEC_FBC_SCAN( \
	1416	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1417	tmp = !tmp; \
	1418	IF_SUCCESS; \
	1419	} \
	1420	else { \
	1421	IF_FAIL; \
	1422	} \
	1423	); \
	1424	} \
	1425	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s))) \
	1426	goto got_it;
	1427
	1428	/* We know what class REx starts with. Try to find this position... */
	1429	/* if reginfo is NULL, its a dryrun */
	1430	/* annoyingly all the vars in this routine have different names from their counterparts
	1431	in regmatch. /grrr */
	1432
	1433	STATIC char *
	1434	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1435	const char strend, regmatch_info reginfo, bool is_utf8_pat)
	1436	{
	1437	dVAR;
	1438	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1439	char pat_string; / The pattern's exactish string */
	1440	char pat_end; / ptr to end char of pat_string */
	1441	re_fold_t folder; /* Function for computing non-utf8 folds */
	1442	const U8 fold_array; / array for folding ords < 256 */
	1443	STRLEN ln;
	1444	STRLEN lnc;
	1445	U8 c1;
	1446	U8 c2;
	1447	char *e;
	1448	I32 tmp = 1; /* Scratch variable? */
	1449	const bool utf8_target = PL_reg_match_utf8;
	1450	UV utf8_fold_flags = 0;
	1451	bool to_complement = FALSE; /* Invert the result? Taking the xor of this
	1452	with a result inverts that result, as 0^1 =
	1453	1 and 1^1 = 0 */
	1454	_char_class_number classnum;
	1455
	1456	RXi_GET_DECL(prog,progi);
	1457
	1458	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1459
	1460	/* We know what class it must start with. */
	1461	switch (OP(c)) {
	1462	case ANYOF:
	1463	case ANYOF_SYNTHETIC:
	1464	case ANYOF_WARN_SUPER:
	1465	if (utf8_target) {
	1466	REXEC_FBC_UTF8_CLASS_SCAN(
	1467	reginclass(prog, c, (U8*)s, utf8_target));
	1468	}
	1469	else {
	1470	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
	1471	}
	1472	break;
	1473	case CANY:
	1474	REXEC_FBC_SCAN(
	1475	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1476	goto got_it;
	1477	else
	1478	tmp = doevery;
	1479	);
	1480	break;
	1481
	1482	case EXACTFA:
	1483	if (is_utf8_pat \|\| utf8_target) {
	1484	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1485	goto do_exactf_utf8;
	1486	}
	1487	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1488	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1489	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1490
	1491	case EXACTF:
	1492	if (utf8_target) {
	1493
	1494	/* regcomp.c already folded this if pattern is in UTF-8 */
	1495	utf8_fold_flags = 0;
	1496	goto do_exactf_utf8;
	1497	}
	1498	fold_array = PL_fold;
	1499	folder = foldEQ;
	1500	goto do_exactf_non_utf8;
	1501
	1502	case EXACTFL:
	1503	if (is_utf8_pat \|\| utf8_target) {
	1504	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	1505	goto do_exactf_utf8;
	1506	}
	1507	fold_array = PL_fold_locale;
	1508	folder = foldEQ_locale;
	1509	goto do_exactf_non_utf8;
	1510
	1511	case EXACTFU_SS:
	1512	if (is_utf8_pat) {
	1513	utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
	1514	}
	1515	goto do_exactf_utf8;
	1516
	1517	case EXACTFU_TRICKYFOLD:
	1518	case EXACTFU:
	1519	if (is_utf8_pat \|\| utf8_target) {
	1520	utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1521	goto do_exactf_utf8;
	1522	}
	1523
	1524	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1525	* so we don't have to worry here about this single special case
	1526	* in the Latin1 range */
	1527	fold_array = PL_fold_latin1;
	1528	folder = foldEQ_latin1;
	1529
	1530	/* FALL THROUGH */
	1531
	1532	do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
	1533	are no glitches with fold-length differences
	1534	between the target string and pattern */
	1535
	1536	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1537	* first character of the EXACTF* node and then, if necessary,
	1538	* case-insensitively compare the full text of the node. c1 is the
	1539	* first character. c2 is its fold. This logic will not work for
	1540	* Unicode semantics and the german sharp ss, which hence should
	1541	* not be compiled into a node that gets here. */
	1542	pat_string = STRING(c);
	1543	ln = STR_LEN(c); /* length to match in octets/bytes */
	1544
	1545	/* We know that we have to match at least 'ln' bytes (which is the
	1546	* same as characters, since not utf8). If we have to match 3
	1547	* characters, and there are only 2 availabe, we know without
	1548	* trying that it will fail; so don't start a match past the
	1549	* required minimum number from the far end */
	1550	e = HOP3c(strend, -((I32)ln), s);
	1551
	1552	if (!reginfo && e < s) {
	1553	e = s; /* Due to minlen logic of intuit() */
	1554	}
	1555
	1556	c1 = *pat_string;
	1557	c2 = fold_array[c1];
	1558	if (c1 == c2) { /* If char and fold are the same */
	1559	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1560	}
	1561	else {
	1562	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1563	}
	1564	break;
	1565
	1566	do_exactf_utf8:
	1567	{
	1568	unsigned expansion;
	1569
	1570	/* If one of the operands is in utf8, we can't use the simpler folding
	1571	* above, due to the fact that many different characters can have the
	1572	* same fold, or portion of a fold, or different- length fold */
	1573	pat_string = STRING(c);
	1574	ln = STR_LEN(c); /* length to match in octets/bytes */
	1575	pat_end = pat_string + ln;
	1576	lnc = is_utf8_pat /* length to match in characters */
	1577	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1578	: ln;
	1579
	1580	/* We have 'lnc' characters to match in the pattern, but because of
	1581	* multi-character folding, each character in the target can match
	1582	* up to 3 characters (Unicode guarantees it will never exceed
	1583	* this) if it is utf8-encoded; and up to 2 if not (based on the
	1584	* fact that the Latin 1 folds are already determined, and the
	1585	* only multi-char fold in that range is the sharp-s folding to
	1586	* 'ss'. Thus, a pattern character can match as little as 1/3 of a
	1587	* string character. Adjust lnc accordingly, rounding up, so that
	1588	* if we need to match at least 4+1/3 chars, that really is 5. */
	1589	expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
	1590	lnc = (lnc + expansion - 1) / expansion;
	1591
	1592	/* As in the non-UTF8 case, if we have to match 3 characters, and
	1593	* only 2 are left, it's guaranteed to fail, so don't start a
	1594	* match that would require us to go beyond the end of the string
	1595	*/
	1596	e = HOP3c(strend, -((I32)lnc), s);
	1597
	1598	if (!reginfo && e < s) {
	1599	e = s; /* Due to minlen logic of intuit() */
	1600	}
	1601
	1602	/* XXX Note that we could recalculate e to stop the loop earlier,
	1603	* as the worst case expansion above will rarely be met, and as we
	1604	* go along we would usually find that e moves further to the left.
	1605	* This would happen only after we reached the point in the loop
	1606	* where if there were no expansion we should fail. Unclear if
	1607	* worth the expense */
	1608
	1609	while (s <= e) {
	1610	char my_strend= (char )strend;
	1611	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	1612	pat_string, NULL, ln, is_utf8_pat, utf8_fold_flags)
	1613	&& (!reginfo \|\| regtry(reginfo, &s)) )
	1614	{
	1615	goto got_it;
	1616	}
	1617	s += (utf8_target) ? UTF8SKIP(s) : 1;
	1618	}
	1619	break;
	1620	}
	1621	case BOUNDL:
	1622	RXp_MATCH_TAINTED_on(prog);
	1623	FBC_BOUND(isWORDCHAR_LC,
	1624	isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1625	isWORDCHAR_LC_utf8((U8*)s));
	1626	break;
	1627	case NBOUNDL:
	1628	RXp_MATCH_TAINTED_on(prog);
	1629	FBC_NBOUND(isWORDCHAR_LC,
	1630	isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1631	isWORDCHAR_LC_utf8((U8*)s));
	1632	break;
	1633	case BOUND:
	1634	FBC_BOUND(isWORDCHAR,
	1635	isWORDCHAR_uni(tmp),
	1636	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1637	break;
	1638	case BOUNDA:
	1639	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1640	isWORDCHAR_A(tmp),
	1641	isWORDCHAR_A((U8*)s));
	1642	break;
	1643	case NBOUND:
	1644	FBC_NBOUND(isWORDCHAR,
	1645	isWORDCHAR_uni(tmp),
	1646	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1647	break;
	1648	case NBOUNDA:
	1649	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1650	isWORDCHAR_A(tmp),
	1651	isWORDCHAR_A((U8*)s));
	1652	break;
	1653	case BOUNDU:
	1654	FBC_BOUND(isWORDCHAR_L1,
	1655	isWORDCHAR_uni(tmp),
	1656	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1657	break;
	1658	case NBOUNDU:
	1659	FBC_NBOUND(isWORDCHAR_L1,
	1660	isWORDCHAR_uni(tmp),
	1661	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1662	break;
	1663	case LNBREAK:
	1664	REXEC_FBC_CSCAN(is_LNBREAK_utf8_safe(s, strend),
	1665	is_LNBREAK_latin1_safe(s, strend)
	1666	);
	1667	break;
	1668
	1669	/* The argument to all the POSIX node types is the class number to pass to
	1670	* _generic_isCC() to build a mask for searching in PL_charclass[] */
	1671
	1672	case NPOSIXL:
	1673	to_complement = 1;
	1674	/* FALLTHROUGH */
	1675
	1676	case POSIXL:
	1677	RXp_MATCH_TAINTED_on(prog);
	1678	REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
	1679	to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
	1680	break;
	1681
	1682	case NPOSIXD:
	1683	to_complement = 1;
	1684	/* FALLTHROUGH */
	1685
	1686	case POSIXD:
	1687	if (utf8_target) {
	1688	goto posix_utf8;
	1689	}
	1690	goto posixa;
	1691
	1692	case NPOSIXA:
	1693	if (utf8_target) {
	1694	/* The complement of something that matches only ASCII matches all
	1695	* UTF-8 variant code points, plus everything in ASCII that isn't
	1696	* in the class */
	1697	REXEC_FBC_UTF8_CLASS_SCAN(! UTF8_IS_INVARIANT(*s)
	1698	\|\| ! _generic_isCC_A(*s, FLAGS(c)));
	1699	break;
	1700	}
	1701
	1702	to_complement = 1;
	1703	/* FALLTHROUGH */
	1704
	1705	case POSIXA:
	1706	posixa:
	1707	/* Don't need to worry about utf8, as it can match only a single
	1708	* byte invariant character. */
	1709	REXEC_FBC_CLASS_SCAN(
	1710	to_complement ^ cBOOL(_generic_isCC_A(*s, FLAGS(c))));
	1711	break;
	1712
	1713	case NPOSIXU:
	1714	to_complement = 1;
	1715	/* FALLTHROUGH */
	1716
	1717	case POSIXU:
	1718	if (! utf8_target) {
	1719	REXEC_FBC_CLASS_SCAN(to_complement ^ cBOOL(_generic_isCC(*s,
	1720	FLAGS(c))));
	1721	}
	1722	else {
	1723
	1724	posix_utf8:
	1725	classnum = (_char_class_number) FLAGS(c);
	1726	if (classnum < _FIRST_NON_SWASH_CC) {
	1727	while (s < strend) {
	1728
	1729	/* We avoid loading in the swash as long as possible, but
	1730	* should we have to, we jump to a separate loop. This
	1731	* extra 'if' statement is what keeps this code from being
	1732	* just a call to REXEC_FBC_UTF8_CLASS_SCAN() */
	1733	if (UTF8_IS_ABOVE_LATIN1(*s)) {
	1734	goto found_above_latin1;
	1735	}
	1736	if ((UTF8_IS_INVARIANT(*s)
	1737	&& to_complement ^ cBOOL(_generic_isCC((U8) *s,
	1738	classnum)))
	1739	\|\| (UTF8_IS_DOWNGRADEABLE_START(*s)
	1740	&& to_complement ^ cBOOL(
	1741	_generic_isCC(TWO_BYTE_UTF8_TO_UNI(s, (s + 1)),
	1742	classnum))))
	1743	{
	1744	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1745	goto got_it;
	1746	else {
	1747	tmp = doevery;
	1748	}
	1749	}
	1750	else {
	1751	tmp = 1;
	1752	}
	1753	s += UTF8SKIP(s);
	1754	}
	1755	}
	1756	else switch (classnum) { /* These classes are implemented as
	1757	macros */
	1758	case _CC_ENUM_SPACE: /* XXX would require separate code if we
	1759	revert the change of \v matching this */
	1760	/* FALL THROUGH */
	1761
	1762	case _CC_ENUM_PSXSPC:
	1763	REXEC_FBC_UTF8_CLASS_SCAN(
	1764	to_complement ^ cBOOL(isSPACE_utf8(s)));
	1765	break;
	1766
	1767	case _CC_ENUM_BLANK:
	1768	REXEC_FBC_UTF8_CLASS_SCAN(
	1769	to_complement ^ cBOOL(isBLANK_utf8(s)));
	1770	break;
	1771
	1772	case _CC_ENUM_XDIGIT:
	1773	REXEC_FBC_UTF8_CLASS_SCAN(
	1774	to_complement ^ cBOOL(isXDIGIT_utf8(s)));
	1775	break;
	1776
	1777	case _CC_ENUM_VERTSPACE:
	1778	REXEC_FBC_UTF8_CLASS_SCAN(
	1779	to_complement ^ cBOOL(isVERTWS_utf8(s)));
	1780	break;
	1781
	1782	case _CC_ENUM_CNTRL:
	1783	REXEC_FBC_UTF8_CLASS_SCAN(
	1784	to_complement ^ cBOOL(isCNTRL_utf8(s)));
	1785	break;
	1786
	1787	default:
	1788	Perl_croak(aTHX_ "panic: find_byclass() node %d='%s' has an unexpected character class '%d'", OP(c), PL_reg_name[OP(c)], classnum);
	1789	assert(0); /* NOTREACHED */
	1790	}
	1791	}
	1792	break;
	1793
	1794	found_above_latin1: /* Here we have to load a swash to get the result
	1795	for the current code point */
	1796	if (! PL_utf8_swash_ptrs[classnum]) {
	1797	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	1798	PL_utf8_swash_ptrs[classnum] =
	1799	_core_swash_init("utf8", swash_property_names[classnum],
	1800	&PL_sv_undef, 1, 0, NULL, &flags);
	1801	}
	1802
	1803	/* This is a copy of the loop above for swash classes, though using the
	1804	* FBC macro instead of being expanded out. Since we've loaded the
	1805	* swash, we don't have to check for that each time through the loop */
	1806	REXEC_FBC_UTF8_CLASS_SCAN(
	1807	to_complement ^ cBOOL(_generic_utf8(
	1808	classnum,
	1809	s,
	1810	swash_fetch(PL_utf8_swash_ptrs[classnum],
	1811	(U8 *) s, TRUE))));
	1812	break;
	1813
	1814	case AHOCORASICKC:
	1815	case AHOCORASICK:
	1816	{
	1817	DECL_TRIE_TYPE(c);
	1818	/* what trie are we using right now */
	1819	reg_ac_data aho = (reg_ac_data)progi->data->data[ ARG( c ) ];
	1820	reg_trie_data trie = (reg_trie_data)progi->data->data[ aho->trie ];
	1821	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1822
	1823	const char *last_start = strend - trie->minlen;
	1824	#ifdef DEBUGGING
	1825	const char *real_start = s;
	1826	#endif
	1827	STRLEN maxlen = trie->maxlen;
	1828	SV *sv_points;
	1829	U8 *points; / map of where we were in the input string
	1830	when reading a given char. For ASCII this
	1831	is unnecessary overhead as the relationship
	1832	is always 1:1, but for Unicode, especially
	1833	case folded Unicode this is not true. */
	1834	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1835	U8 *bitmap=NULL;
	1836
	1837
	1838	GET_RE_DEBUG_FLAGS_DECL;
	1839
	1840	/* We can't just allocate points here. We need to wrap it in
	1841	* an SV so it gets freed properly if there is a croak while
	1842	* running the match */
	1843	ENTER;
	1844	SAVETMPS;
	1845	sv_points=newSV(maxlen * sizeof(U8 *));
	1846	SvCUR_set(sv_points,
	1847	maxlen * sizeof(U8 *));
	1848	SvPOK_on(sv_points);
	1849	sv_2mortal(sv_points);
	1850	points=(U8**)SvPV_nolen(sv_points );
	1851	if ( trie_type != trie_utf8_fold
	1852	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1853	{
	1854	if (trie->bitmap)
	1855	bitmap=(U8*)trie->bitmap;
	1856	else
	1857	bitmap=(U8*)ANYOF_BITMAP(c);
	1858	}
	1859	/* this is the Aho-Corasick algorithm modified a touch
	1860	to include special handling for long "unknown char" sequences.
	1861	The basic idea being that we use AC as long as we are dealing
	1862	with a possible matching char, when we encounter an unknown char
	1863	(and we have not encountered an accepting state) we scan forward
	1864	until we find a legal starting char.
	1865	AC matching is basically that of trie matching, except that when
	1866	we encounter a failing transition, we fall back to the current
	1867	states "fail state", and try the current char again, a process
	1868	we repeat until we reach the root state, state 1, or a legal
	1869	transition. If we fail on the root state then we can either
	1870	terminate if we have reached an accepting state previously, or
	1871	restart the entire process from the beginning if we have not.
	1872
	1873	*/
	1874	while (s <= last_start) {
	1875	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1876	U8 uc = (U8)s;
	1877	U16 charid = 0;
	1878	U32 base = 1;
	1879	U32 state = 1;
	1880	UV uvc = 0;
	1881	STRLEN len = 0;
	1882	STRLEN foldlen = 0;
	1883	U8 uscan = (U8)NULL;
	1884	U8 *leftmost = NULL;
	1885	#ifdef DEBUGGING
	1886	U32 accepted_word= 0;
	1887	#endif
	1888	U32 pointpos = 0;
	1889
	1890	while ( state && uc <= (U8*)strend ) {
	1891	int failed=0;
	1892	U32 word = aho->states[ state ].wordnum;
	1893
	1894	if( state==1 ) {
	1895	if ( bitmap ) {
	1896	DEBUG_TRIE_EXECUTE_r(
	1897	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1898	dump_exec_pos( (char *)uc, c, strend, real_start,
	1899	(char *)uc, utf8_target );
	1900	PerlIO_printf( Perl_debug_log,
	1901	" Scanning for legal start char...\n");
	1902	}
	1903	);
	1904	if (utf8_target) {
	1905	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1906	uc += UTF8SKIP(uc);
	1907	}
	1908	} else {
	1909	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1910	uc++;
	1911	}
	1912	}
	1913	s= (char *)uc;
	1914	}
	1915	if (uc >(U8*)last_start) break;
	1916	}
	1917
	1918	if ( word ) {
	1919	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1920	if (!leftmost \|\| lpos < leftmost) {
	1921	DEBUG_r(accepted_word=word);
	1922	leftmost= lpos;
	1923	}
	1924	if (base==0) break;
	1925
	1926	}
	1927	points[pointpos++ % maxlen]= uc;
	1928	if (foldlen \|\| uc < (U8*)strend) {
	1929	REXEC_TRIE_READ_CHAR(trie_type, trie,
	1930	widecharmap, uc,
	1931	uscan, len, uvc, charid, foldlen,
	1932	foldbuf, uniflags);
	1933	DEBUG_TRIE_EXECUTE_r({
	1934	dump_exec_pos( (char *)uc, c, strend,
	1935	real_start, s, utf8_target);
	1936	PerlIO_printf(Perl_debug_log,
	1937	" Charid:%3u CP:%4"UVxf" ",
	1938	charid, uvc);
	1939	});
	1940	}
	1941	else {
	1942	len = 0;
	1943	charid = 0;
	1944	}
	1945
	1946
	1947	do {
	1948	#ifdef DEBUGGING
	1949	word = aho->states[ state ].wordnum;
	1950	#endif
	1951	base = aho->states[ state ].trans.base;
	1952
	1953	DEBUG_TRIE_EXECUTE_r({
	1954	if (failed)
	1955	dump_exec_pos( (char *)uc, c, strend, real_start,
	1956	s, utf8_target );
	1957	PerlIO_printf( Perl_debug_log,
	1958	"%sState: %4"UVxf", word=%"UVxf,
	1959	failed ? " Fail transition to " : "",
	1960	(UV)state, (UV)word);
	1961	});
	1962	if ( base ) {
	1963	U32 tmp;
	1964	I32 offset;
	1965	if (charid &&
	1966	( ((offset = base + charid
	1967	- 1 - trie->uniquecharcount)) >= 0)
	1968	&& ((U32)offset < trie->lasttrans)
	1969	&& trie->trans[offset].check == state
	1970	&& (tmp=trie->trans[offset].next))
	1971	{
	1972	DEBUG_TRIE_EXECUTE_r(
	1973	PerlIO_printf( Perl_debug_log," - legal\n"));
	1974	state = tmp;
	1975	break;
	1976	}
	1977	else {
	1978	DEBUG_TRIE_EXECUTE_r(
	1979	PerlIO_printf( Perl_debug_log," - fail\n"));
	1980	failed = 1;
	1981	state = aho->fail[state];
	1982	}
	1983	}
	1984	else {
	1985	/* we must be accepting here */
	1986	DEBUG_TRIE_EXECUTE_r(
	1987	PerlIO_printf( Perl_debug_log," - accepting\n"));
	1988	failed = 1;
	1989	break;
	1990	}
	1991	} while(state);
	1992	uc += len;
	1993	if (failed) {
	1994	if (leftmost)
	1995	break;
	1996	if (!state) state = 1;
	1997	}
	1998	}
	1999	if ( aho->states[ state ].wordnum ) {
	2000	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	2001	if (!leftmost \|\| lpos < leftmost) {
	2002	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	2003	leftmost = lpos;
	2004	}
	2005	}
	2006	if (leftmost) {
	2007	s = (char*)leftmost;
	2008	DEBUG_TRIE_EXECUTE_r({
	2009	PerlIO_printf(
	2010	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	2011	(UV)accepted_word, (IV)(s - real_start)
	2012	);
	2013	});
	2014	if (!reginfo \|\| regtry(reginfo, &s)) {
	2015	FREETMPS;
	2016	LEAVE;
	2017	goto got_it;
	2018	}
	2019	s = HOPc(s,1);
	2020	DEBUG_TRIE_EXECUTE_r({
	2021	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	2022	});
	2023	} else {
	2024	DEBUG_TRIE_EXECUTE_r(
	2025	PerlIO_printf( Perl_debug_log,"No match.\n"));
	2026	break;
	2027	}
	2028	}
	2029	FREETMPS;
	2030	LEAVE;
	2031	}
	2032	break;
	2033	default:
	2034	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2035	break;
	2036	}
	2037	return 0;
	2038	got_it:
	2039	return s;
	2040	}
	2041
	2042
	2043	/*
	2044	- regexec_flags - match a regexp against a string
	2045	*/
	2046	I32
	2047	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, char strend,
	2048	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	2049	/* stringarg: the point in the string at which to begin matching */
	2050	/* strend: pointer to null at end of string */
	2051	/* strbeg: real beginning of string */
	2052	/* minend: end of match must be >= minend bytes after stringarg. */
	2053	/* sv: SV being matched: only used for utf8 flag, pos() etc; string
	2054	* itself is accessed via the pointers above */
	2055	/* data: May be used for some additional optimizations.
	2056	Currently its only used, with a U32 cast, for transmitting
	2057	the ganch offset when doing a /g match. This will change */
	2058	/* nosave: For optimizations. */
	2059
	2060	{
	2061	dVAR;
	2062	struct regexp *const prog = ReANY(rx);
	2063	char *s;
	2064	regnode *c;
	2065	char *startpos = stringarg;
	2066	I32 minlen; /* must match at least this many chars */
	2067	I32 dontbother = 0; /* how many characters not to try at end */
	2068	I32 end_shift = 0; /* Same for the end. / / CC */
	2069	I32 scream_pos = -1; /* Internal iterator of scream. */
	2070	char *scream_olds = NULL;
	2071	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2072	I32 multiline;
	2073	RXi_GET_DECL(prog,progi);
	2074	regmatch_info reginfo; /* create some info to pass to regtry etc */
	2075	regexp_paren_pair *swap = NULL;
	2076	GET_RE_DEBUG_FLAGS_DECL;
	2077
	2078	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2079	PERL_UNUSED_ARG(data);
	2080
	2081	/* Be paranoid... */
	2082	if (prog == NULL \|\| startpos == NULL) {
	2083	Perl_croak(aTHX_ "NULL regexp parameter");
	2084	return 0;
	2085	}
	2086
	2087	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2088	reginfo.prog = rx; /* Yes, sorry that this is confusing. */
	2089
	2090	RX_MATCH_UTF8_set(rx, utf8_target);
	2091	DEBUG_EXECUTE_r(
	2092	debug_start_match(rx, utf8_target, startpos, strend,
	2093	"Matching");
	2094	);
	2095
	2096	minlen = prog->minlen;
	2097
	2098	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2099	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2100	"String too short [regexec_flags]...\n"));
	2101	goto phooey;
	2102	}
	2103
	2104
	2105	/* Check validity of program. */
	2106	if (UCHARAT(progi->program) != REG_MAGIC) {
	2107	Perl_croak(aTHX_ "corrupted regexp program");
	2108	}
	2109
	2110	RX_MATCH_TAINTED_off(rx);
	2111	PL_reg_state.re_state_eval_setup_done = FALSE;
	2112	PL_reg_maxiter = 0;
	2113
	2114	reginfo.is_utf8_pat = cBOOL(RX_UTF8(rx));
	2115	reginfo.warned = FALSE;
	2116	/* Mark beginning of line for ^ and lookbehind. */
	2117	reginfo.bol = startpos; /* XXX not used ??? */
	2118	PL_bostr = strbeg;
	2119	reginfo.sv = sv;
	2120
	2121	/* Mark end of line for $ (and such) */
	2122	PL_regeol = strend;
	2123
	2124	/* see how far we have to get to not match where we matched before */
	2125	reginfo.till = startpos+minend;
	2126
	2127	/* If there is a "must appear" string, look for it. */
	2128	s = startpos;
	2129
	2130	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	2131	MAGIC *mg;
	2132	if (flags & REXEC_IGNOREPOS){ /* Means: check only at start */
	2133	reginfo.ganch = startpos + prog->gofs;
	2134	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2135	"GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
	2136	} else if (sv && SvTYPE(sv) >= SVt_PVMG
	2137	&& SvMAGIC(sv)
	2138	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	2139	&& mg->mg_len >= 0) {
	2140	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	2141	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2142	"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
	2143
	2144	if (prog->extflags & RXf_ANCH_GPOS) {
	2145	if (s > reginfo.ganch)
	2146	goto phooey;
	2147	s = reginfo.ganch - prog->gofs;
	2148	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2149	"GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
	2150	if (s < strbeg)
	2151	goto phooey;
	2152	}
	2153	}
	2154	else if (data) {
	2155	reginfo.ganch = strbeg + PTR2UV(data);
	2156	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2157	"GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
	2158
	2159	} else { /* pos() not defined */
	2160	reginfo.ganch = strbeg;
	2161	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2162	"GPOS: reginfo.ganch = strbeg\n"));
	2163	}
	2164	}
	2165	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2166	/* We have to be careful. If the previous successful match
	2167	was from this regex we don't want a subsequent partially
	2168	successful match to clobber the old results.
	2169	So when we detect this possibility we add a swap buffer
	2170	to the re, and switch the buffer each match. If we fail,
	2171	we switch it back; otherwise we leave it swapped.
	2172	*/
	2173	swap = prog->offs;
	2174	/* do we need a save destructor here for eval dies? */
	2175	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2176	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2177	"rex=0x%"UVxf" saving offs: orig=0x%"UVxf" new=0x%"UVxf"\n",
	2178	PTR2UV(prog),
	2179	PTR2UV(swap),
	2180	PTR2UV(prog->offs)
	2181	));
	2182	}
	2183	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	2184	re_scream_pos_data d;
	2185
	2186	d.scream_olds = &scream_olds;
	2187	d.scream_pos = &scream_pos;
	2188	s = re_intuit_start(rx, sv, s, strend, flags, &d);
	2189	if (!s) {
	2190	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	2191	goto phooey; /* not present */
	2192	}
	2193	}
	2194
	2195
	2196
	2197	/* Simplest case: anchored match need be tried only once. */
	2198	/* [unless only anchor is BOL and multiline is set] */
	2199	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2200	if (s == startpos && regtry(&reginfo, &startpos))
	2201	goto got_it;
	2202	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2203	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2204	{
	2205	char *end;
	2206
	2207	if (minlen)
	2208	dontbother = minlen - 1;
	2209	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2210	/* for multiline we only have to try after newlines */
	2211	if (prog->check_substr \|\| prog->check_utf8) {
	2212	/* because of the goto we can not easily reuse the macros for bifurcating the
	2213	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2214	if (utf8_target) {
	2215	if (s == startpos)
	2216	goto after_try_utf8;
	2217	while (1) {
	2218	if (regtry(&reginfo, &s)) {
	2219	goto got_it;
	2220	}
	2221	after_try_utf8:
	2222	if (s > end) {
	2223	goto phooey;
	2224	}
	2225	if (prog->extflags & RXf_USE_INTUIT) {
	2226	s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
	2227	if (!s) {
	2228	goto phooey;
	2229	}
	2230	}
	2231	else {
	2232	s += UTF8SKIP(s);
	2233	}
	2234	}
	2235	} /* end search for check string in unicode */
	2236	else {
	2237	if (s == startpos) {
	2238	goto after_try_latin;
	2239	}
	2240	while (1) {
	2241	if (regtry(&reginfo, &s)) {
	2242	goto got_it;
	2243	}
	2244	after_try_latin:
	2245	if (s > end) {
	2246	goto phooey;
	2247	}
	2248	if (prog->extflags & RXf_USE_INTUIT) {
	2249	s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
	2250	if (!s) {
	2251	goto phooey;
	2252	}
	2253	}
	2254	else {
	2255	s++;
	2256	}
	2257	}
	2258	} /* end search for check string in latin*/
	2259	} /* end search for check string */
	2260	else { /* search for newline */
	2261	if (s > startpos) {
	2262	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2263	s--;
	2264	}
	2265	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2266	while (s <= end) { /* note it could be possible to match at the end of the string */
	2267	if (s++ == '\n') { / don't need PL_utf8skip here */
	2268	if (regtry(&reginfo, &s))
	2269	goto got_it;
	2270	}
	2271	}
	2272	} /* end search for newline */
	2273	} /* end anchored/multiline check string search */
	2274	goto phooey;
	2275	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2276	{
	2277	/* the warning about reginfo.ganch being used without initialization
	2278	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	2279	and we only enter this block when the same bit is set. */
	2280	char *tmp_s = reginfo.ganch - prog->gofs;
	2281
	2282	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
	2283	goto got_it;
	2284	goto phooey;
	2285	}
	2286
	2287	/* Messy cases: unanchored match. */
	2288	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2289	/* we have /x+whatever/ */
	2290	/* it must be a one character string (XXXX Except is_utf8_pat?) */
	2291	char ch;
	2292	#ifdef DEBUGGING
	2293	int did_match = 0;
	2294	#endif
	2295	if (utf8_target) {
	2296	if (! prog->anchored_utf8) {
	2297	to_utf8_substr(prog);
	2298	}
	2299	ch = SvPVX_const(prog->anchored_utf8)[0];
	2300	REXEC_FBC_SCAN(
	2301	if (*s == ch) {
	2302	DEBUG_EXECUTE_r( did_match = 1 );
	2303	if (regtry(&reginfo, &s)) goto got_it;
	2304	s += UTF8SKIP(s);
	2305	while (s < strend && *s == ch)
	2306	s += UTF8SKIP(s);
	2307	}
	2308	);
	2309
	2310	}
	2311	else {
	2312	if (! prog->anchored_substr) {
	2313	if (! to_byte_substr(prog)) {
	2314	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2315	}
	2316	}
	2317	ch = SvPVX_const(prog->anchored_substr)[0];
	2318	REXEC_FBC_SCAN(
	2319	if (*s == ch) {
	2320	DEBUG_EXECUTE_r( did_match = 1 );
	2321	if (regtry(&reginfo, &s)) goto got_it;
	2322	s++;
	2323	while (s < strend && *s == ch)
	2324	s++;
	2325	}
	2326	);
	2327	}
	2328	DEBUG_EXECUTE_r(if (!did_match)
	2329	PerlIO_printf(Perl_debug_log,
	2330	"Did not find anchored character...\n")
	2331	);
	2332	}
	2333	else if (prog->anchored_substr != NULL
	2334	\|\| prog->anchored_utf8 != NULL
	2335	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2336	&& prog->float_max_offset < strend - s)) {
	2337	SV *must;
	2338	I32 back_max;
	2339	I32 back_min;
	2340	char *last;
	2341	char last1; / Last position checked before */
	2342	#ifdef DEBUGGING
	2343	int did_match = 0;
	2344	#endif
	2345	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2346	if (utf8_target) {
	2347	if (! prog->anchored_utf8) {
	2348	to_utf8_substr(prog);
	2349	}
	2350	must = prog->anchored_utf8;
	2351	}
	2352	else {
	2353	if (! prog->anchored_substr) {
	2354	if (! to_byte_substr(prog)) {
	2355	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2356	}
	2357	}
	2358	must = prog->anchored_substr;
	2359	}
	2360	back_max = back_min = prog->anchored_offset;
	2361	} else {
	2362	if (utf8_target) {
	2363	if (! prog->float_utf8) {
	2364	to_utf8_substr(prog);
	2365	}
	2366	must = prog->float_utf8;
	2367	}
	2368	else {
	2369	if (! prog->float_substr) {
	2370	if (! to_byte_substr(prog)) {
	2371	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2372	}
	2373	}
	2374	must = prog->float_substr;
	2375	}
	2376	back_max = prog->float_max_offset;
	2377	back_min = prog->float_min_offset;
	2378	}
	2379
	2380	if (back_min<0) {
	2381	last = strend;
	2382	} else {
	2383	last = HOP3c(strend, /* Cannot start after this */
	2384	-(I32)(CHR_SVLEN(must)
	2385	- (SvTAIL(must) != 0) + back_min), strbeg);
	2386	}
	2387	if (s > PL_bostr)
	2388	last1 = HOPc(s, -1);
	2389	else
	2390	last1 = s - 1; /* bogus */
	2391
	2392	/* XXXX check_substr already used to find "s", can optimize if
	2393	check_substr==must. */
	2394	scream_pos = -1;
	2395	dontbother = end_shift;
	2396	strend = HOPc(strend, -dontbother);
	2397	while ( (s <= last) &&
	2398	(s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2399	(unsigned char*)strend, must,
	2400	multiline ? FBMrf_MULTILINE : 0)) ) {
	2401	DEBUG_EXECUTE_r( did_match = 1 );
	2402	if (HOPc(s, -back_max) > last1) {
	2403	last1 = HOPc(s, -back_min);
	2404	s = HOPc(s, -back_max);
	2405	}
	2406	else {
	2407	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2408
	2409	last1 = HOPc(s, -back_min);
	2410	s = t;
	2411	}
	2412	if (utf8_target) {
	2413	while (s <= last1) {
	2414	if (regtry(&reginfo, &s))
	2415	goto got_it;
	2416	if (s >= last1) {
	2417	s++; /* to break out of outer loop */
	2418	break;
	2419	}
	2420	s += UTF8SKIP(s);
	2421	}
	2422	}
	2423	else {
	2424	while (s <= last1) {
	2425	if (regtry(&reginfo, &s))
	2426	goto got_it;
	2427	s++;
	2428	}
	2429	}
	2430	}
	2431	DEBUG_EXECUTE_r(if (!did_match) {
	2432	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2433	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2434	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2435	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2436	? "anchored" : "floating"),
	2437	quoted, RE_SV_TAIL(must));
	2438	});
	2439	goto phooey;
	2440	}
	2441	else if ( (c = progi->regstclass) ) {
	2442	if (minlen) {
	2443	const OPCODE op = OP(progi->regstclass);
	2444	/* don't bother with what can't match */
	2445	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2446	strend = HOPc(strend, -(minlen - 1));
	2447	}
	2448	DEBUG_EXECUTE_r({
	2449	SV * const prop = sv_newmortal();
	2450	regprop(prog, prop, c);
	2451	{
	2452	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2453	s,strend-s,60);
	2454	PerlIO_printf(Perl_debug_log,
	2455	"Matching stclass %.*s against %s (%d bytes)\n",
	2456	(int)SvCUR(prop), SvPVX_const(prop),
	2457	quoted, (int)(strend - s));
	2458	}
	2459	});
	2460	if (find_byclass(prog, c, s, strend, &reginfo, reginfo.is_utf8_pat))
	2461	goto got_it;
	2462	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2463	}
	2464	else {
	2465	dontbother = 0;
	2466	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2467	/* Trim the end. */
	2468	char *last= NULL;
	2469	SV* float_real;
	2470	STRLEN len;
	2471	const char *little;
	2472
	2473	if (utf8_target) {
	2474	if (! prog->float_utf8) {
	2475	to_utf8_substr(prog);
	2476	}
	2477	float_real = prog->float_utf8;
	2478	}
	2479	else {
	2480	if (! prog->float_substr) {
	2481	if (! to_byte_substr(prog)) {
	2482	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2483	}
	2484	}
	2485	float_real = prog->float_substr;
	2486	}
	2487
	2488	little = SvPV_const(float_real, len);
	2489	if (SvTAIL(float_real)) {
	2490	/* This means that float_real contains an artificial \n on
	2491	* the end due to the presence of something like this:
	2492	* /foo$/ where we can match both "foo" and "foo\n" at the
	2493	* end of the string. So we have to compare the end of the
	2494	* string first against the float_real without the \n and
	2495	* then against the full float_real with the string. We
	2496	* have to watch out for cases where the string might be
	2497	* smaller than the float_real or the float_real without
	2498	* the \n. */
	2499	char *checkpos= strend - len;
	2500	DEBUG_OPTIMISE_r(
	2501	PerlIO_printf(Perl_debug_log,
	2502	"%sChecking for float_real.%s\n",
	2503	PL_colors[4], PL_colors[5]));
	2504	if (checkpos + 1 < strbeg) {
	2505	/* can't match, even if we remove the trailing \n
	2506	* string is too short to match */
	2507	DEBUG_EXECUTE_r(
	2508	PerlIO_printf(Perl_debug_log,
	2509	"%sString shorter than required trailing substring, cannot match.%s\n",
	2510	PL_colors[4], PL_colors[5]));
	2511	goto phooey;
	2512	} else if (memEQ(checkpos + 1, little, len - 1)) {
	2513	/* can match, the end of the string matches without the
	2514	* "\n" */
	2515	last = checkpos + 1;
	2516	} else if (checkpos < strbeg) {
	2517	/* cant match, string is too short when the "\n" is
	2518	* included */
	2519	DEBUG_EXECUTE_r(
	2520	PerlIO_printf(Perl_debug_log,
	2521	"%sString does not contain required trailing substring, cannot match.%s\n",
	2522	PL_colors[4], PL_colors[5]));
	2523	goto phooey;
	2524	} else if (!multiline) {
	2525	/* non multiline match, so compare with the "\n" at the
	2526	* end of the string */
	2527	if (memEQ(checkpos, little, len)) {
	2528	last= checkpos;
	2529	} else {
	2530	DEBUG_EXECUTE_r(
	2531	PerlIO_printf(Perl_debug_log,
	2532	"%sString does not contain required trailing substring, cannot match.%s\n",
	2533	PL_colors[4], PL_colors[5]));
	2534	goto phooey;
	2535	}
	2536	} else {
	2537	/* multiline match, so we have to search for a place
	2538	* where the full string is located */
	2539	goto find_last;
	2540	}
	2541	} else {
	2542	find_last:
	2543	if (len)
	2544	last = rninstr(s, strend, little, little + len);
	2545	else
	2546	last = strend; /* matching "$" */
	2547	}
	2548	if (!last) {
	2549	/* at one point this block contained a comment which was
	2550	* probably incorrect, which said that this was a "should not
	2551	* happen" case. Even if it was true when it was written I am
	2552	* pretty sure it is not anymore, so I have removed the comment
	2553	* and replaced it with this one. Yves */
	2554	DEBUG_EXECUTE_r(
	2555	PerlIO_printf(Perl_debug_log,
	2556	"String does not contain required substring, cannot match.\n"
	2557	));
	2558	goto phooey;
	2559	}
	2560	dontbother = strend - last + prog->float_min_offset;
	2561	}
	2562	if (minlen && (dontbother < minlen))
	2563	dontbother = minlen - 1;
	2564	strend -= dontbother; /* this one's always in bytes! */
	2565	/* We don't know much -- general case. */
	2566	if (utf8_target) {
	2567	for (;;) {
	2568	if (regtry(&reginfo, &s))
	2569	goto got_it;
	2570	if (s >= strend)
	2571	break;
	2572	s += UTF8SKIP(s);
	2573	};
	2574	}
	2575	else {
	2576	do {
	2577	if (regtry(&reginfo, &s))
	2578	goto got_it;
	2579	} while (s++ < strend);
	2580	}
	2581	}
	2582
	2583	/* Failure. */
	2584	goto phooey;
	2585
	2586	got_it:
	2587	DEBUG_BUFFERS_r(
	2588	if (swap)
	2589	PerlIO_printf(Perl_debug_log,
	2590	"rex=0x%"UVxf" freeing offs: 0x%"UVxf"\n",
	2591	PTR2UV(prog),
	2592	PTR2UV(swap)
	2593	);
	2594	);
	2595	Safefree(swap);
	2596
	2597	if (PL_reg_state.re_state_eval_setup_done)
	2598	restore_pos(aTHX_ prog);
	2599	if (RXp_PAREN_NAMES(prog))
	2600	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2601
	2602	/* make sure $`, $&, $', and $digit will work later */
	2603	if ( !(flags & REXEC_NOT_FIRST) ) {
	2604	if (flags & REXEC_COPY_STR) {
	2605	#ifdef PERL_ANY_COW
	2606	if (SvCANCOW(sv)) {
	2607	if (DEBUG_C_TEST) {
	2608	PerlIO_printf(Perl_debug_log,
	2609	"Copy on write: regexp capture, type %d\n",
	2610	(int) SvTYPE(sv));
	2611	}
	2612	RX_MATCH_COPY_FREE(rx);
	2613	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2614	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2615	assert (SvPOKp(prog->saved_copy));
	2616	prog->sublen = PL_regeol - strbeg;
	2617	prog->suboffset = 0;
	2618	prog->subcoffset = 0;
	2619	} else
	2620	#endif
	2621	{
	2622	I32 min = 0;
	2623	I32 max = PL_regeol - strbeg;
	2624	I32 sublen;
	2625
	2626	if ( (flags & REXEC_COPY_SKIP_POST)
	2627	&& !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
	2628	&& !(PL_sawampersand & SAWAMPERSAND_RIGHT)
	2629	) { /* don't copy $' part of string */
	2630	U32 n = 0;
	2631	max = -1;
	2632	/* calculate the right-most part of the string covered
	2633	* by a capture. Due to look-ahead, this may be to
	2634	* the right of $&, so we have to scan all captures */
	2635	while (n <= prog->lastparen) {
	2636	if (prog->offs[n].end > max)
	2637	max = prog->offs[n].end;
	2638	n++;
	2639	}
	2640	if (max == -1)
	2641	max = (PL_sawampersand & SAWAMPERSAND_LEFT)
	2642	? prog->offs[0].start
	2643	: 0;
	2644	assert(max >= 0 && max <= PL_regeol - strbeg);
	2645	}
	2646
	2647	if ( (flags & REXEC_COPY_SKIP_PRE)
	2648	&& !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
	2649	&& !(PL_sawampersand & SAWAMPERSAND_LEFT)
	2650	) { /* don't copy $` part of string */
	2651	U32 n = 0;
	2652	min = max;
	2653	/* calculate the left-most part of the string covered
	2654	* by a capture. Due to look-behind, this may be to
	2655	* the left of $&, so we have to scan all captures */
	2656	while (min && n <= prog->lastparen) {
	2657	if ( prog->offs[n].start != -1
	2658	&& prog->offs[n].start < min)
	2659	{
	2660	min = prog->offs[n].start;
	2661	}
	2662	n++;
	2663	}
	2664	if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
	2665	&& min > prog->offs[0].end
	2666	)
	2667	min = prog->offs[0].end;
	2668
	2669	}
	2670
	2671	assert(min >= 0 && min <= max && min <= PL_regeol - strbeg);
	2672	sublen = max - min;
	2673
	2674	if (RX_MATCH_COPIED(rx)) {
	2675	if (sublen > prog->sublen)
	2676	prog->subbeg =
	2677	(char*)saferealloc(prog->subbeg, sublen+1);
	2678	}
	2679	else
	2680	prog->subbeg = (char*)safemalloc(sublen+1);
	2681	Copy(strbeg + min, prog->subbeg, sublen, char);
	2682	prog->subbeg[sublen] = '\0';
	2683	prog->suboffset = min;
	2684	prog->sublen = sublen;
	2685	RX_MATCH_COPIED_on(rx);
	2686	}
	2687	prog->subcoffset = prog->suboffset;
	2688	if (prog->suboffset && utf8_target) {
	2689	/* Convert byte offset to chars.
	2690	* XXX ideally should only compute this if @-/@+
	2691	* has been seen, a la PL_sawampersand ??? */
	2692
	2693	/* If there's a direct correspondence between the
	2694	* string which we're matching and the original SV,
	2695	* then we can use the utf8 len cache associated with
	2696	* the SV. In particular, it means that under //g,
	2697	* sv_pos_b2u() will use the previously cached
	2698	* position to speed up working out the new length of
	2699	* subcoffset, rather than counting from the start of
	2700	* the string each time. This stops
	2701	* $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
	2702	* from going quadratic */
	2703	if (SvPOKp(sv) && SvPVX(sv) == strbeg)
	2704	sv_pos_b2u(sv, &(prog->subcoffset));
	2705	else
	2706	prog->subcoffset = utf8_length((U8*)strbeg,
	2707	(U8*)(strbeg+prog->suboffset));
	2708	}
	2709	}
	2710	else {
	2711	RX_MATCH_COPY_FREE(rx);
	2712	prog->subbeg = strbeg;
	2713	prog->suboffset = 0;
	2714	prog->subcoffset = 0;
	2715	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2716	}
	2717	}
	2718
	2719	return 1;
	2720
	2721	phooey:
	2722	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2723	PL_colors[4], PL_colors[5]));
	2724	if (PL_reg_state.re_state_eval_setup_done)
	2725	restore_pos(aTHX_ prog);
	2726	if (swap) {
	2727	/* we failed :-( roll it back */
	2728	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2729	"rex=0x%"UVxf" rolling back offs: freeing=0x%"UVxf" restoring=0x%"UVxf"\n",
	2730	PTR2UV(prog),
	2731	PTR2UV(prog->offs),
	2732	PTR2UV(swap)
	2733	));
	2734	Safefree(prog->offs);
	2735	prog->offs = swap;
	2736	}
	2737	return 0;
	2738	}
	2739
	2740
	2741	/* Set which rex is pointed to by PL_reg_state, handling ref counting.
	2742	* Do inc before dec, in case old and new rex are the same */
	2743	#define SET_reg_curpm(Re2) \
	2744	if (PL_reg_state.re_state_eval_setup_done) { \
	2745	(void)ReREFCNT_inc(Re2); \
	2746	ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
	2747	PM_SETRE((PL_reg_curpm), (Re2)); \
	2748	}
	2749
	2750
	2751	/*
	2752	- regtry - try match at specific point
	2753	*/
	2754	STATIC I32 /* 0 failure, 1 success */
	2755	S_regtry(pTHX_ regmatch_info reginfo, char *startposp)
	2756	{
	2757	dVAR;
	2758	CHECKPOINT lastcp;
	2759	REGEXP *const rx = reginfo->prog;
	2760	regexp *const prog = ReANY(rx);
	2761	I32 result;
	2762	RXi_GET_DECL(prog,progi);
	2763	GET_RE_DEBUG_FLAGS_DECL;
	2764
	2765	PERL_ARGS_ASSERT_REGTRY;
	2766
	2767	reginfo->cutpoint=NULL;
	2768
	2769	if ((prog->extflags & RXf_EVAL_SEEN)
	2770	&& !PL_reg_state.re_state_eval_setup_done)
	2771	{
	2772	MAGIC *mg;
	2773
	2774	PL_reg_state.re_state_eval_setup_done = TRUE;
	2775	if (reginfo->sv) {
	2776	/* Make $_ available to executed code. */
	2777	if (reginfo->sv != DEFSV) {
	2778	SAVE_DEFSV;
	2779	DEFSV_set(reginfo->sv);
	2780	}
	2781
	2782	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2783	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2784	/* prepare for quick setting of pos */
	2785	#ifdef PERL_OLD_COPY_ON_WRITE
	2786	if (SvIsCOW(reginfo->sv))
	2787	sv_force_normal_flags(reginfo->sv, 0);
	2788	#endif
	2789	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2790	&PL_vtbl_mglob, NULL, 0);
	2791	mg->mg_len = -1;
	2792	}
	2793	PL_reg_magic = mg;
	2794	PL_reg_oldpos = mg->mg_len;
	2795	SAVEDESTRUCTOR_X(restore_pos, prog);
	2796	}
	2797	if (!PL_reg_curpm) {
	2798	Newxz(PL_reg_curpm, 1, PMOP);
	2799	#ifdef USE_ITHREADS
	2800	{
	2801	SV* const repointer = &PL_sv_undef;
	2802	/* this regexp is also owned by the new PL_reg_curpm, which
	2803	will try to free it. */
	2804	av_push(PL_regex_padav, repointer);
	2805	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2806	PL_regex_pad = AvARRAY(PL_regex_padav);
	2807	}
	2808	#endif
	2809	}
	2810	SET_reg_curpm(rx);
	2811	PL_reg_oldcurpm = PL_curpm;
	2812	PL_curpm = PL_reg_curpm;
	2813	if (RXp_MATCH_COPIED(prog)) {
	2814	/* Here is a serious problem: we cannot rewrite subbeg,
	2815	since it may be needed if this match fails. Thus
	2816	$` inside (?{}) could fail... */
	2817	PL_reg_oldsaved = prog->subbeg;
	2818	PL_reg_oldsavedlen = prog->sublen;
	2819	PL_reg_oldsavedoffset = prog->suboffset;
	2820	PL_reg_oldsavedcoffset = prog->suboffset;
	2821	#ifdef PERL_ANY_COW
	2822	PL_nrs = prog->saved_copy;
	2823	#endif
	2824	RXp_MATCH_COPIED_off(prog);
	2825	}
	2826	else
	2827	PL_reg_oldsaved = NULL;
	2828	prog->subbeg = PL_bostr;
	2829	prog->suboffset = 0;
	2830	prog->subcoffset = 0;
	2831	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2832	}
	2833	#ifdef DEBUGGING
	2834	PL_reg_starttry = *startposp;
	2835	#endif
	2836	prog->offs[0].start = *startposp - PL_bostr;
	2837	prog->lastparen = 0;
	2838	prog->lastcloseparen = 0;
	2839
	2840	/* XXXX What this code is doing here?!!! There should be no need
	2841	to do this again and again, prog->lastparen should take care of
	2842	this! --ilya*/
	2843
	2844	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2845	* Actually, the code in regcppop() (which Ilya may be meaning by
	2846	* prog->lastparen), is not needed at all by the test suite
	2847	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2848	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2849	* Meanwhile, this code is needed for the
	2850	* above-mentioned test suite tests to succeed. The common theme
	2851	* on those tests seems to be returning null fields from matches.
	2852	* --jhi updated by dapm */
	2853	#if 1
	2854	if (prog->nparens) {
	2855	regexp_paren_pair *pp = prog->offs;
	2856	I32 i;
	2857	for (i = prog->nparens; i > (I32)prog->lastparen; i--) {
	2858	++pp;
	2859	pp->start = -1;
	2860	pp->end = -1;
	2861	}
	2862	}
	2863	#endif
	2864	REGCP_SET(lastcp);
	2865	result = regmatch(reginfo, *startposp, progi->program + 1);
	2866	if (result != -1) {
	2867	prog->offs[0].end = result;
	2868	return 1;
	2869	}
	2870	if (reginfo->cutpoint)
	2871	*startposp= reginfo->cutpoint;
	2872	REGCP_UNWIND(lastcp);
	2873	return 0;
	2874	}
	2875
	2876
	2877	#define sayYES goto yes
	2878	#define sayNO goto no
	2879	#define sayNO_SILENT goto no_silent
	2880
	2881	/* we dont use STMT_START/END here because it leads to
	2882	"unreachable code" warnings, which are bogus, but distracting. */
	2883	#define CACHEsayNO \
	2884	if (ST.cache_mask) \
	2885	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2886	sayNO
	2887
	2888	/* this is used to determine how far from the left messages like
	2889	'failed...' are printed. It should be set such that messages
	2890	are inline with the regop output that created them.
	2891	*/
	2892	#define REPORT_CODE_OFF 32
	2893
	2894
	2895	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2896	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2897	#define CHRTEST_NOT_A_CP_1 -999
	2898	#define CHRTEST_NOT_A_CP_2 -998
	2899
	2900	#define SLAB_FIRST(s) (&(s)->states[0])
	2901	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2902
	2903	/* grab a new slab and return the first slot in it */
	2904
	2905	STATIC regmatch_state *
	2906	S_push_slab(pTHX)
	2907	{
	2908	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2909	dMY_CXT;
	2910	#endif
	2911	regmatch_slab *s = PL_regmatch_slab->next;
	2912	if (!s) {
	2913	Newx(s, 1, regmatch_slab);
	2914	s->prev = PL_regmatch_slab;
	2915	s->next = NULL;
	2916	PL_regmatch_slab->next = s;
	2917	}
	2918	PL_regmatch_slab = s;
	2919	return SLAB_FIRST(s);
	2920	}
	2921
	2922
	2923	/* push a new state then goto it */
	2924
	2925	#define PUSH_STATE_GOTO(state, node, input) \
	2926	pushinput = input; \
	2927	scan = node; \
	2928	st->resume_state = state; \
	2929	goto push_state;
	2930
	2931	/* push a new state with success backtracking, then goto it */
	2932
	2933	#define PUSH_YES_STATE_GOTO(state, node, input) \
	2934	pushinput = input; \
	2935	scan = node; \
	2936	st->resume_state = state; \
	2937	goto push_yes_state;
	2938
	2939
	2940
	2941
	2942	/*
	2943
	2944	regmatch() - main matching routine
	2945
	2946	This is basically one big switch statement in a loop. We execute an op,
	2947	set 'next' to point the next op, and continue. If we come to a point which
	2948	we may need to backtrack to on failure such as (A\|B\|C), we push a
	2949	backtrack state onto the backtrack stack. On failure, we pop the top
	2950	state, and re-enter the loop at the state indicated. If there are no more
	2951	states to pop, we return failure.
	2952
	2953	Sometimes we also need to backtrack on success; for example /A+/, where
	2954	after successfully matching one A, we need to go back and try to
	2955	match another one; similarly for lookahead assertions: if the assertion
	2956	completes successfully, we backtrack to the state just before the assertion
	2957	and then carry on. In these cases, the pushed state is marked as
	2958	'backtrack on success too'. This marking is in fact done by a chain of
	2959	pointers, each pointing to the previous 'yes' state. On success, we pop to
	2960	the nearest yes state, discarding any intermediate failure-only states.
	2961	Sometimes a yes state is pushed just to force some cleanup code to be
	2962	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	2963	it to free the inner regex.
	2964
	2965	Note that failure backtracking rewinds the cursor position, while
	2966	success backtracking leaves it alone.
	2967
	2968	A pattern is complete when the END op is executed, while a subpattern
	2969	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	2970	ops trigger the "pop to last yes state if any, otherwise return true"
	2971	behaviour.
	2972
	2973	A common convention in this function is to use A and B to refer to the two
	2974	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	2975	the subpattern to be matched possibly multiple times, while B is the entire
	2976	rest of the pattern. Variable and state names reflect this convention.
	2977
	2978	The states in the main switch are the union of ops and failure/success of
	2979	substates associated with with that op. For example, IFMATCH is the op
	2980	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	2981	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	2982	successfully matched A and IFMATCH_A_fail is a state saying that we have
	2983	just failed to match A. Resume states always come in pairs. The backtrack
	2984	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	2985	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	2986	on success or failure.
	2987
	2988	The struct that holds a backtracking state is actually a big union, with
	2989	one variant for each major type of op. The variable st points to the
	2990	top-most backtrack struct. To make the code clearer, within each
	2991	block of code we #define ST to alias the relevant union.
	2992
	2993	Here's a concrete example of a (vastly oversimplified) IFMATCH
	2994	implementation:
	2995
	2996	switch (state) {
	2997	....
	2998
	2999	#define ST st->u.ifmatch
	3000
	3001	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3002	ST.foo = ...; // some state we wish to save
	3003	...
	3004	// push a yes backtrack state with a resume value of
	3005	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	3006	// first node of A:
	3007	PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
	3008	// NOTREACHED
	3009
	3010	case IFMATCH_A: // we have successfully executed A; now continue with B
	3011	next = B;
	3012	bar = ST.foo; // do something with the preserved value
	3013	break;
	3014
	3015	case IFMATCH_A_fail: // A failed, so the assertion failed
	3016	...; // do some housekeeping, then ...
	3017	sayNO; // propagate the failure
	3018
	3019	#undef ST
	3020
	3021	...
	3022	}
	3023
	3024	For any old-timers reading this who are familiar with the old recursive
	3025	approach, the code above is equivalent to:
	3026
	3027	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3028	{
	3029	int foo = ...
	3030	...
	3031	if (regmatch(A)) {
	3032	next = B;
	3033	bar = foo;
	3034	break;
	3035	}
	3036	...; // do some housekeeping, then ...
	3037	sayNO; // propagate the failure
	3038	}
	3039
	3040	The topmost backtrack state, pointed to by st, is usually free. If you
	3041	want to claim it, populate any ST.foo fields in it with values you wish to
	3042	save, then do one of
	3043
	3044	PUSH_STATE_GOTO(resume_state, node, newinput);
	3045	PUSH_YES_STATE_GOTO(resume_state, node, newinput);
	3046
	3047	which sets that backtrack state's resume value to 'resume_state', pushes a
	3048	new free entry to the top of the backtrack stack, then goes to 'node'.
	3049	On backtracking, the free slot is popped, and the saved state becomes the
	3050	new free state. An ST.foo field in this new top state can be temporarily
	3051	accessed to retrieve values, but once the main loop is re-entered, it
	3052	becomes available for reuse.
	3053
	3054	Note that the depth of the backtrack stack constantly increases during the
	3055	left-to-right execution of the pattern, rather than going up and down with
	3056	the pattern nesting. For example the stack is at its maximum at Z at the
	3057	end of the pattern, rather than at X in the following:
	3058
	3059	/(((X)+)+)+....(Y)+....Z/
	3060
	3061	The only exceptions to this are lookahead/behind assertions and the cut,
	3062	(?>A), which pop all the backtrack states associated with A before
	3063	continuing.
	3064
	3065	Backtrack state structs are allocated in slabs of about 4K in size.
	3066	PL_regmatch_state and st always point to the currently active state,
	3067	and PL_regmatch_slab points to the slab currently containing
	3068	PL_regmatch_state. The first time regmatch() is called, the first slab is
	3069	allocated, and is never freed until interpreter destruction. When the slab
	3070	is full, a new one is allocated and chained to the end. At exit from
	3071	regmatch(), slabs allocated since entry are freed.
	3072
	3073	*/
	3074
	3075
	3076	#define DEBUG_STATE_pp(pp) \
	3077	DEBUG_STATE_r({ \
	3078	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	3079	PerlIO_printf(Perl_debug_log, \
	3080	" %*s"pp" %s%s%s%s%s\n", \
	3081	depth*2, "", \
	3082	PL_reg_name[st->resume_state], \
	3083	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	3084	((st==yes_state) ? "Y" : ""), \
	3085	((st==mark_state) ? "M" : ""), \
	3086	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	3087	); \
	3088	});
	3089
	3090
	3091	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	3092
	3093	#ifdef DEBUGGING
	3094
	3095	STATIC void
	3096	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	3097	const char start, const char end, const char *blurb)
	3098	{
	3099	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	3100
	3101	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	3102
	3103	if (!PL_colorset)
	3104	reginitcolors();
	3105	{
	3106	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	3107	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	3108
	3109	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	3110	start, end - start, 60);
	3111
	3112	PerlIO_printf(Perl_debug_log,
	3113	"%s%s REx%s %s against %s\n",
	3114	PL_colors[4], blurb, PL_colors[5], s0, s1);
	3115
	3116	if (utf8_target\|\|utf8_pat)
	3117	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	3118	utf8_pat ? "pattern" : "",
	3119	utf8_pat && utf8_target ? " and " : "",
	3120	utf8_target ? "string" : ""
	3121	);
	3122	}
	3123	}
	3124
	3125	STATIC void
	3126	S_dump_exec_pos(pTHX_ const char *locinput,
	3127	const regnode *scan,
	3128	const char *loc_regeol,
	3129	const char *loc_bostr,
	3130	const char *loc_reg_starttry,
	3131	const bool utf8_target)
	3132	{
	3133	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	3134	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	3135	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	3136	/* The part of the string before starttry has one color
	3137	(pref0_len chars), between starttry and current
	3138	position another one (pref_len - pref0_len chars),
	3139	after the current position the third one.
	3140	We assume that pref0_len <= pref_len, otherwise we
	3141	decrease pref0_len. */
	3142	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	3143	? (5 + taill) - l : locinput - loc_bostr;
	3144	int pref0_len;
	3145
	3146	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	3147
	3148	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	3149	pref_len++;
	3150	pref0_len = pref_len - (locinput - loc_reg_starttry);
	3151	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	3152	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	3153	? (5 + taill) - pref_len : loc_regeol - locinput);
	3154	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	3155	l--;
	3156	if (pref0_len < 0)
	3157	pref0_len = 0;
	3158	if (pref0_len > pref_len)
	3159	pref0_len = pref_len;
	3160	{
	3161	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	3162
	3163	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	3164	(locinput - pref_len),pref0_len, 60, 4, 5);
	3165
	3166	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	3167	(locinput - pref_len + pref0_len),
	3168	pref_len - pref0_len, 60, 2, 3);
	3169
	3170	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	3171	locinput, loc_regeol - locinput, 10, 0, 1);
	3172
	3173	const STRLEN tlen=len0+len1+len2;
	3174	PerlIO_printf(Perl_debug_log,
	3175	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	3176	(IV)(locinput - loc_bostr),
	3177	len0, s0,
	3178	len1, s1,
	3179	(docolor ? "" : "> <"),
	3180	len2, s2,
	3181	(int)(tlen > 19 ? 0 : 19 - tlen),
	3182	"");
	3183	}
	3184	}
	3185
	3186	#endif
	3187
	3188	/* reg_check_named_buff_matched()
	3189	* Checks to see if a named buffer has matched. The data array of
	3190	* buffer numbers corresponding to the buffer is expected to reside
	3191	* in the regexp->data->data array in the slot stored in the ARG() of
	3192	* node involved. Note that this routine doesn't actually care about the
	3193	* name, that information is not preserved from compilation to execution.
	3194	* Returns the index of the leftmost defined buffer with the given name
	3195	* or 0 if non of the buffers matched.
	3196	*/
	3197	STATIC I32
	3198	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	3199	{
	3200	I32 n;
	3201	RXi_GET_DECL(rex,rexi);
	3202	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3203	I32 nums=(I32)SvPVX(sv_dat);
	3204
	3205	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3206
	3207	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3208	if ((I32)rex->lastparen >= nums[n] &&
	3209	rex->offs[nums[n]].end != -1)
	3210	{
	3211	return nums[n];
	3212	}
	3213	}
	3214	return 0;
	3215	}
	3216
	3217
	3218	/* free all slabs above current one - called during LEAVE_SCOPE */
	3219
	3220	STATIC void
	3221	S_clear_backtrack_stack(pTHX_ void *p)
	3222	{
	3223	regmatch_slab *s = PL_regmatch_slab->next;
	3224	PERL_UNUSED_ARG(p);
	3225
	3226	if (!s)
	3227	return;
	3228	PL_regmatch_slab->next = NULL;
	3229	while (s) {
	3230	regmatch_slab * const osl = s;
	3231	s = s->next;
	3232	Safefree(osl);
	3233	}
	3234	}
	3235	static bool
	3236	S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
	3237	U8* c1_utf8, int c2p, U8 c2_utf8, bool is_utf8_pat)
	3238	{
	3239	/* This function determines if there are one or two characters that match
	3240	* the first character of the passed-in EXACTish node <text_node>, and if
	3241	* so, returns them in the passed-in pointers.
	3242	*
	3243	* If it determines that no possible character in the target string can
	3244	* match, it returns FALSE; otherwise TRUE. (The FALSE situation occurs if
	3245	* the first character in <text_node> requires UTF-8 to represent, and the
	3246	* target string isn't in UTF-8.)
	3247	*
	3248	* If there are more than two characters that could match the beginning of
	3249	* <text_node>, or if more context is required to determine a match or not,
	3250	* it sets both <c1p> and <c2p> to CHRTEST_VOID.
	3251	*
	3252	* The motiviation behind this function is to allow the caller to set up
	3253	* tight loops for matching. If <text_node> is of type EXACT, there is
	3254	* only one possible character that can match its first character, and so
	3255	* the situation is quite simple. But things get much more complicated if
	3256	* folding is involved. It may be that the first character of an EXACTFish
	3257	* node doesn't participate in any possible fold, e.g., punctuation, so it
	3258	* can be matched only by itself. The vast majority of characters that are
	3259	* in folds match just two things, their lower and upper-case equivalents.
	3260	* But not all are like that; some have multiple possible matches, or match
	3261	* sequences of more than one character. This function sorts all that out.
	3262	*
	3263	* Consider the patterns AB or A?B where A and B are arbitrary. In a
	3264	* loop of trying to match A*, we know we can't exit where the thing
	3265	* following it isn't a B. And something can't be a B unless it is the
	3266	* beginning of B. By putting a quick test for that beginning in a tight
	3267	* loop, we can rule out things that can't possibly be B without having to
	3268	* break out of the loop, thus avoiding work. Similarly, if A is a single
	3269	* character, we can make a tight loop matching A*, using the outputs of
	3270	* this function.
	3271	*
	3272	* If the target string to match isn't in UTF-8, and there aren't
	3273	* complications which require CHRTEST_VOID, <c1p> and <c2p> are set to
	3274	* the one or two possible octets (which are characters in this situation)
	3275	* that can match. In all cases, if there is only one character that can
	3276	* match, <c1p> and <c2p> will be identical.
	3277	*
	3278	* If the target string is in UTF-8, the buffers pointed to by <c1_utf8>
	3279	* and <c2_utf8> will contain the one or two UTF-8 sequences of bytes that
	3280	* can match the beginning of <text_node>. They should be declared with at
	3281	* least length UTF8_MAXBYTES+1. (If the target string isn't in UTF-8, it is
	3282	* undefined what these contain.) If one or both of the buffers are
	3283	* invariant under UTF-8, <c1p>, and <c2p> will also be set to the
	3284	* corresponding invariant. If variant, the corresponding *<c1p> and/or
	3285	* *<c2p> will be set to a negative number(s) that shouldn't match any code
	3286	* point (unless inappropriately coerced to unsigned). *<c1p> will equal
	3287	* <c2p> if and only if <c1_utf8> and <c2_utf8> are the same. /
	3288
	3289	const bool utf8_target = PL_reg_match_utf8;
	3290
	3291	UV c1 = CHRTEST_NOT_A_CP_1;
	3292	UV c2 = CHRTEST_NOT_A_CP_2;
	3293	bool use_chrtest_void = FALSE;
	3294
	3295	/* Used when we have both utf8 input and utf8 output, to avoid converting
	3296	* to/from code points */
	3297	bool utf8_has_been_setup = FALSE;
	3298
	3299	dVAR;
	3300
	3301	U8 pat = (U8)STRING(text_node);
	3302
	3303	if (OP(text_node) == EXACT) {
	3304
	3305	/* In an exact node, only one thing can be matched, that first
	3306	* character. If both the pat and the target are UTF-8, we can just
	3307	* copy the input to the output, avoiding finding the code point of
	3308	* that character */
	3309	if (!is_utf8_pat) {
	3310	c2 = c1 = *pat;
	3311	}
	3312	else if (utf8_target) {
	3313	Copy(pat, c1_utf8, UTF8SKIP(pat), U8);
	3314	Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
	3315	utf8_has_been_setup = TRUE;
	3316	}
	3317	else {
	3318	c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
	3319	}
	3320	}
	3321	else /* an EXACTFish node */
	3322	if ((is_utf8_pat
	3323	&& is_MULTI_CHAR_FOLD_utf8_safe(pat,
	3324	pat + STR_LEN(text_node)))
	3325	\|\| (!is_utf8_pat
	3326	&& is_MULTI_CHAR_FOLD_latin1_safe(pat,
	3327	pat + STR_LEN(text_node))))
	3328	{
	3329	/* Multi-character folds require more context to sort out. Also
	3330	* PL_utf8_foldclosures used below doesn't handle them, so have to be
	3331	* handled outside this routine */
	3332	use_chrtest_void = TRUE;
	3333	}
	3334	else { /* an EXACTFish node which doesn't begin with a multi-char fold */
	3335	c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
	3336	if (c1 > 256) {
	3337	/* Load the folds hash, if not already done */
	3338	SV** listp;
	3339	if (! PL_utf8_foldclosures) {
	3340	if (! PL_utf8_tofold) {
	3341	U8 dummy[UTF8_MAXBYTES+1];
	3342
	3343	/* Force loading this by folding an above-Latin1 char */
	3344	to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
	3345	assert(PL_utf8_tofold); /* Verify that worked */
	3346	}
	3347	PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
	3348	}
	3349
	3350	/* The fold closures data structure is a hash with the keys being
	3351	* the UTF-8 of every character that is folded to, like 'k', and
	3352	* the values each an array of all code points that fold to its
	3353	* key. e.g. [ 'k', 'K', KELVIN_SIGN ]. Multi-character folds are
	3354	* not included */
	3355	if ((! (listp = hv_fetch(PL_utf8_foldclosures,
	3356	(char *) pat,
	3357	UTF8SKIP(pat),
	3358	FALSE))))
	3359	{
	3360	/* Not found in the hash, therefore there are no folds
	3361	* containing it, so there is only a single character that
	3362	* could match */
	3363	c2 = c1;
	3364	}
	3365	else { /* Does participate in folds */
	3366	AV* list = (AV) listp;
	3367	if (av_len(list) != 1) {
	3368
	3369	/* If there aren't exactly two folds to this, it is outside
	3370	* the scope of this function */
	3371	use_chrtest_void = TRUE;
	3372	}
	3373	else { /* There are two. Get them */
	3374	SV** c_p = av_fetch(list, 0, FALSE);
	3375	if (c_p == NULL) {
	3376	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3377	}
	3378	c1 = SvUV(*c_p);
	3379
	3380	c_p = av_fetch(list, 1, FALSE);
	3381	if (c_p == NULL) {
	3382	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3383	}
	3384	c2 = SvUV(*c_p);
	3385
	3386	/* Folds that cross the 255/256 boundary are forbidden if
	3387	* EXACTFL, or EXACTFA and one is ASCIII. Since the
	3388	* pattern character is above 256, and its only other match
	3389	* is below 256, the only legal match will be to itself.
	3390	* We have thrown away the original, so have to compute
	3391	* which is the one above 255 */
	3392	if ((c1 < 256) != (c2 < 256)) {
	3393	if (OP(text_node) == EXACTFL
	3394	\|\| (OP(text_node) == EXACTFA
	3395	&& (isASCII(c1) \|\| isASCII(c2))))
	3396	{
	3397	if (c1 < 256) {
	3398	c1 = c2;
	3399	}
	3400	else {
	3401	c2 = c1;
	3402	}
	3403	}
	3404	}
	3405	}
	3406	}
	3407	}
	3408	else /* Here, c1 is < 255 */
	3409	if (utf8_target
	3410	&& HAS_NONLATIN1_FOLD_CLOSURE(c1)
	3411	&& OP(text_node) != EXACTFL
	3412	&& (OP(text_node) != EXACTFA \|\| ! isASCII(c1)))
	3413	{
	3414	/* Here, there could be something above Latin1 in the target which
	3415	* folds to this character in the pattern. All such cases except
	3416	* LATIN SMALL LETTER Y WITH DIAERESIS have more than two characters
	3417	* involved in their folds, so are outside the scope of this
	3418	* function */
	3419	if (UNLIKELY(c1 == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
	3420	c2 = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
	3421	}
	3422	else {
	3423	use_chrtest_void = TRUE;
	3424	}
	3425	}
	3426	else { /* Here nothing above Latin1 can fold to the pattern character */
	3427	switch (OP(text_node)) {
	3428
	3429	case EXACTFL: /* /l rules */
	3430	c2 = PL_fold_locale[c1];
	3431	break;
	3432
	3433	case EXACTF:
	3434	if (! utf8_target) { /* /d rules */
	3435	c2 = PL_fold[c1];
	3436	break;
	3437	}
	3438	/* FALLTHROUGH */
	3439	/* /u rules for all these. This happens to work for
	3440	* EXACTFA as nothing in Latin1 folds to ASCII */
	3441	case EXACTFA:
	3442	case EXACTFU_TRICKYFOLD:
	3443	case EXACTFU_SS:
	3444	case EXACTFU:
	3445	c2 = PL_fold_latin1[c1];
	3446	break;
	3447
	3448	default:
	3449	Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
	3450	assert(0); /* NOTREACHED */
	3451	}
	3452	}
	3453	}
	3454
	3455	/* Here have figured things out. Set up the returns */
	3456	if (use_chrtest_void) {
	3457	c2p = c1p = CHRTEST_VOID;
	3458	}
	3459	else if (utf8_target) {
	3460	if (! utf8_has_been_setup) { /* Don't have the utf8; must get it */
	3461	uvchr_to_utf8(c1_utf8, c1);
	3462	uvchr_to_utf8(c2_utf8, c2);
	3463	}
	3464
	3465	/* Invariants are stored in both the utf8 and byte outputs; Use
	3466	* negative numbers otherwise for the byte ones. Make sure that the
	3467	* byte ones are the same iff the utf8 ones are the same */
	3468	c1p = (UTF8_IS_INVARIANT(c1_utf8)) ? *c1_utf8 : CHRTEST_NOT_A_CP_1;
	3469	c2p = (UTF8_IS_INVARIANT(c2_utf8))
	3470	? *c2_utf8
	3471	: (c1 == c2)
	3472	? CHRTEST_NOT_A_CP_1
	3473	: CHRTEST_NOT_A_CP_2;
	3474	}
	3475	else if (c1 > 255) {
	3476	if (c2 > 255) { /* both possibilities are above what a non-utf8 string
	3477	can represent */
	3478	return FALSE;
	3479	}
	3480
	3481	c1p = c2p = c2; /* c2 is the only representable value */
	3482	}
	3483	else { /* c1 is representable; see about c2 */
	3484	*c1p = c1;
	3485	*c2p = (c2 < 256) ? c2 : c1;
	3486	}
	3487
	3488	return TRUE;
	3489	}
	3490
	3491	/* returns -1 on failure, $+[0] on success */
	3492	STATIC I32
	3493	S_regmatch(pTHX_ regmatch_info reginfo, char startpos, regnode *prog)
	3494	{
	3495	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3496	dMY_CXT;
	3497	#endif
	3498	dVAR;
	3499	const bool utf8_target = PL_reg_match_utf8;
	3500	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3501	REGEXP *rex_sv = reginfo->prog;
	3502	regexp *rex = ReANY(rex_sv);
	3503	RXi_GET_DECL(rex,rexi);
	3504	I32 oldsave;
	3505	/* the current state. This is a cached copy of PL_regmatch_state */
	3506	regmatch_state *st;
	3507	/* cache heavy used fields of st in registers */
	3508	regnode *scan;
	3509	regnode *next;
	3510	U32 n = 0; /* general value; init to avoid compiler warning */
	3511	I32 ln = 0; /* len or last; init to avoid compiler warning */
	3512	char *locinput = startpos;
	3513	char pushinput; / where to continue after a PUSH */
	3514	I32 nextchr; /* is always set to UCHARAT(locinput) */
	3515
	3516	bool result = 0; /* return value of S_regmatch */
	3517	int depth = 0; /* depth of backtrack stack */
	3518	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3519	const U32 max_nochange_depth =
	3520	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3521	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3522	regmatch_state yes_state = NULL; / state to pop to on success of
	3523	subpattern */
	3524	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3525	the stack on success we can update the mark_state as we go */
	3526	regmatch_state mark_state = NULL; / last mark state we have seen */
	3527	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3528	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3529	U32 state_num;
	3530	bool no_final = 0; /* prevent failure from backtracking? */
	3531	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3532	char *startpoint = locinput;
	3533	SV popmark = NULL; / are we looking for a mark? */
	3534	SV sv_commit = NULL; / last mark name seen in failure */
	3535	SV sv_yes_mark = NULL; / last mark name we have seen
	3536	during a successful match */
	3537	U32 lastopen = 0; /* last open we saw */
	3538	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3539	SV* const oreplsv = GvSV(PL_replgv);
	3540	/* these three flags are set by various ops to signal information to
	3541	* the very next op. They have a useful lifetime of exactly one loop
	3542	* iteration, and are not preserved or restored by state pushes/pops
	3543	*/
	3544	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3545	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3546	int logical = 0; /* the following EVAL is:
	3547	0: (?{...})
	3548	1: (?(?{...})X\|Y)
	3549	2: (??{...})
	3550	or the following IFMATCH/UNLESSM is:
	3551	false: plain (?=foo)
	3552	true: used as a condition: (?(?=foo))
	3553	*/
	3554	PAD* last_pad = NULL;
	3555	dMULTICALL;
	3556	I32 gimme = G_SCALAR;
	3557	CV caller_cv = NULL; / who called us */
	3558	CV last_pushed_cv = NULL; / most recently called (?{}) CV */
	3559	CHECKPOINT runops_cp; /* savestack position before executing EVAL */
	3560	U32 maxopenparen = 0; /* max '(' index seen so far */
	3561	int to_complement; /* Invert the result? */
	3562	_char_class_number classnum;
	3563	bool is_utf8_pat = reginfo->is_utf8_pat;
	3564
	3565	#ifdef DEBUGGING
	3566	GET_RE_DEBUG_FLAGS_DECL;
	3567	#endif
	3568
	3569	/* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */
	3570	multicall_oldcatch = 0;
	3571	multicall_cv = NULL;
	3572	cx = NULL;
	3573	PERL_UNUSED_VAR(multicall_cop);
	3574	PERL_UNUSED_VAR(newsp);
	3575
	3576
	3577	PERL_ARGS_ASSERT_REGMATCH;
	3578
	3579	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3580	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3581	}));
	3582	/* on first ever call to regmatch, allocate first slab */
	3583	if (!PL_regmatch_slab) {
	3584	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3585	PL_regmatch_slab->prev = NULL;
	3586	PL_regmatch_slab->next = NULL;
	3587	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3588	}
	3589
	3590	oldsave = PL_savestack_ix;
	3591	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	3592	SAVEVPTR(PL_regmatch_slab);
	3593	SAVEVPTR(PL_regmatch_state);
	3594
	3595	/* grab next free state slot */
	3596	st = ++PL_regmatch_state;
	3597	if (st > SLAB_LAST(PL_regmatch_slab))
	3598	st = PL_regmatch_state = S_push_slab(aTHX);
	3599
	3600	/* Note that nextchr is a byte even in UTF */
	3601	SET_nextchr;
	3602	scan = prog;
	3603	while (scan != NULL) {
	3604
	3605	DEBUG_EXECUTE_r( {
	3606	SV * const prop = sv_newmortal();
	3607	regnode *rnext=regnext(scan);
	3608	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3609	regprop(rex, prop, scan);
	3610
	3611	PerlIO_printf(Perl_debug_log,
	3612	"%3"IVdf":%*s%s(%"IVdf")\n",
	3613	(IV)(scan - rexi->program), depth*2, "",
	3614	SvPVX_const(prop),
	3615	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3616	0 : (IV)(rnext - rexi->program));
	3617	});
	3618
	3619	next = scan + NEXT_OFF(scan);
	3620	if (next == scan)
	3621	next = NULL;
	3622	state_num = OP(scan);
	3623
	3624	reenter_switch:
	3625	to_complement = 0;
	3626
	3627	SET_nextchr;
	3628	assert(nextchr < 256 && (nextchr >= 0 \|\| nextchr == NEXTCHR_EOS));
	3629
	3630	switch (state_num) {
	3631	case BOL: /* /^../ */
	3632	if (locinput == PL_bostr)
	3633	{
	3634	/* reginfo->till = reginfo->bol; */
	3635	break;
	3636	}
	3637	sayNO;
	3638
	3639	case MBOL: /* /^../m */
	3640	if (locinput == PL_bostr \|\|
	3641	(!NEXTCHR_IS_EOS && locinput[-1] == '\n'))
	3642	{
	3643	break;
	3644	}
	3645	sayNO;
	3646
	3647	case SBOL: /* /^../s */
	3648	if (locinput == PL_bostr)
	3649	break;
	3650	sayNO;
	3651
	3652	case GPOS: /* \G */
	3653	if (locinput == reginfo->ganch)
	3654	break;
	3655	sayNO;
	3656
	3657	case KEEPS: /* \K */
	3658	/* update the startpoint */
	3659	st->u.keeper.val = rex->offs[0].start;
	3660	rex->offs[0].start = locinput - PL_bostr;
	3661	PUSH_STATE_GOTO(KEEPS_next, next, locinput);
	3662	assert(0); /NOTREACHED/
	3663	case KEEPS_next_fail:
	3664	/* rollback the start point change */
	3665	rex->offs[0].start = st->u.keeper.val;
	3666	sayNO_SILENT;
	3667	assert(0); /NOTREACHED/
	3668
	3669	case EOL: /* /..$/ */
	3670	goto seol;
	3671
	3672	case MEOL: /* /..$/m */
	3673	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3674	sayNO;
	3675	break;
	3676
	3677	case SEOL: /* /..$/s */
	3678	seol:
	3679	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3680	sayNO;
	3681	if (PL_regeol - locinput > 1)
	3682	sayNO;
	3683	break;
	3684
	3685	case EOS: /* \z */
	3686	if (!NEXTCHR_IS_EOS)
	3687	sayNO;
	3688	break;
	3689
	3690	case SANY: /* /./s */
	3691	if (NEXTCHR_IS_EOS)
	3692	sayNO;
	3693	goto increment_locinput;
	3694
	3695	case CANY: /* \C */
	3696	if (NEXTCHR_IS_EOS)
	3697	sayNO;
	3698	locinput++;
	3699	break;
	3700
	3701	case REG_ANY: /* /./ */
	3702	if ((NEXTCHR_IS_EOS) \|\| nextchr == '\n')
	3703	sayNO;
	3704	goto increment_locinput;
	3705
	3706
	3707	#undef ST
	3708	#define ST st->u.trie
	3709	case TRIEC: /* (ab\|cd) with known charclass */
	3710	/* In this case the charclass data is available inline so
	3711	we can fail fast without a lot of extra overhead.
	3712	*/
	3713	if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
	3714	DEBUG_EXECUTE_r(
	3715	PerlIO_printf(Perl_debug_log,
	3716	"%*s %sfailed to match trie start class...%s\n",
	3717	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3718	);
	3719	sayNO_SILENT;
	3720	assert(0); /* NOTREACHED */
	3721	}
	3722	/* FALL THROUGH */
	3723	case TRIE: /* (ab\|cd) */
	3724	/* the basic plan of execution of the trie is:
	3725	* At the beginning, run though all the states, and
	3726	* find the longest-matching word. Also remember the position
	3727	* of the shortest matching word. For example, this pattern:
	3728	* 1 2 3 4 5
	3729	* ab\|a\|x\|abcd\|abc
	3730	* when matched against the string "abcde", will generate
	3731	* accept states for all words except 3, with the longest
	3732	* matching word being 4, and the shortest being 2 (with
	3733	* the position being after char 1 of the string).
	3734	*
	3735	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3736	* we run the remainder of the pattern; on each try setting
	3737	* the current position to the character following the word,
	3738	* returning to try the next word on failure.
	3739	*
	3740	* We avoid having to build a list of words at runtime by
	3741	* using a compile-time structure, wordinfo[].prev, which
	3742	* gives, for each word, the previous accepting word (if any).
	3743	* In the case above it would contain the mappings 1->2, 2->0,
	3744	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3745	* the longest word (4 above), a list of all words, by
	3746	* following the list of prev pointers; this gives us the
	3747	* unordered list 4,5,1,2. Then given the current word we have
	3748	* just tried, we can go through the list and find the
	3749	* next-biggest word to try (so if we just failed on word 2,
	3750	* the next in the list is 4).
	3751	*
	3752	* Since at runtime we don't record the matching position in
	3753	* the string for each word, we have to work that out for
	3754	* each word we're about to process. The wordinfo table holds
	3755	* the character length of each word; given that we recorded
	3756	* at the start: the position of the shortest word and its
	3757	* length in chars, we just need to move the pointer the
	3758	* difference between the two char lengths. Depending on
	3759	* Unicode status and folding, that's cheap or expensive.
	3760	*
	3761	* This algorithm is optimised for the case where are only a
	3762	* small number of accept states, i.e. 0,1, or maybe 2.
	3763	* With lots of accepts states, and having to try all of them,
	3764	* it becomes quadratic on number of accept states to find all
	3765	* the next words.
	3766	*/
	3767
	3768	{
	3769	/* what type of TRIE am I? (utf8 makes this contextual) */
	3770	DECL_TRIE_TYPE(scan);
	3771
	3772	/* what trie are we using right now */
	3773	reg_trie_data * const trie
	3774	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3775	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3776	U32 state = trie->startstate;
	3777
	3778	if ( trie->bitmap
	3779	&& (NEXTCHR_IS_EOS \|\| !TRIE_BITMAP_TEST(trie, nextchr)))
	3780	{
	3781	if (trie->states[ state ].wordnum) {
	3782	DEBUG_EXECUTE_r(
	3783	PerlIO_printf(Perl_debug_log,
	3784	"%*s %smatched empty string...%s\n",
	3785	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3786	);
	3787	if (!trie->jump)
	3788	break;
	3789	} else {
	3790	DEBUG_EXECUTE_r(
	3791	PerlIO_printf(Perl_debug_log,
	3792	"%*s %sfailed to match trie start class...%s\n",
	3793	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3794	);
	3795	sayNO_SILENT;
	3796	}
	3797	}
	3798
	3799	{
	3800	U8 uc = ( U8 )locinput;
	3801
	3802	STRLEN len = 0;
	3803	STRLEN foldlen = 0;
	3804	U8 uscan = (U8)NULL;
	3805	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3806	U32 charcount = 0; /* how many input chars we have matched */
	3807	U32 accepted = 0; /* have we seen any accepting states? */
	3808
	3809	ST.jump = trie->jump;
	3810	ST.me = scan;
	3811	ST.firstpos = NULL;
	3812	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3813	ST.nextword = 0;
	3814
	3815	/* fully traverse the TRIE; note the position of the
	3816	shortest accept state and the wordnum of the longest
	3817	accept state */
	3818
	3819	while ( state && uc <= (U8*)PL_regeol ) {
	3820	U32 base = trie->states[ state ].trans.base;
	3821	UV uvc = 0;
	3822	U16 charid = 0;
	3823	U16 wordnum;
	3824	wordnum = trie->states[ state ].wordnum;
	3825
	3826	if (wordnum) { /* it's an accept state */
	3827	if (!accepted) {
	3828	accepted = 1;
	3829	/* record first match position */
	3830	if (ST.longfold) {
	3831	ST.firstpos = (U8*)locinput;
	3832	ST.firstchars = 0;
	3833	}
	3834	else {
	3835	ST.firstpos = uc;
	3836	ST.firstchars = charcount;
	3837	}
	3838	}
	3839	if (!ST.nextword \|\| wordnum < ST.nextword)
	3840	ST.nextword = wordnum;
	3841	ST.topword = wordnum;
	3842	}
	3843
	3844	DEBUG_TRIE_EXECUTE_r({
	3845	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3846	PerlIO_printf( Perl_debug_log,
	3847	"%*s %sState: %4"UVxf" Accepted: %c ",
	3848	2+depth * 2, "", PL_colors[4],
	3849	(UV)state, (accepted ? 'Y' : 'N'));
	3850	});
	3851
	3852	/* read a char and goto next state */
	3853	if ( base && (foldlen \|\| uc < (U8*)PL_regeol)) {
	3854	I32 offset;
	3855	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3856	uscan, len, uvc, charid, foldlen,
	3857	foldbuf, uniflags);
	3858	charcount++;
	3859	if (foldlen>0)
	3860	ST.longfold = TRUE;
	3861	if (charid &&
	3862	( ((offset =
	3863	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3864
	3865	&& ((U32)offset < trie->lasttrans)
	3866	&& trie->trans[offset].check == state)
	3867	{
	3868	state = trie->trans[offset].next;
	3869	}
	3870	else {
	3871	state = 0;
	3872	}
	3873	uc += len;
	3874
	3875	}
	3876	else {
	3877	state = 0;
	3878	}
	3879	DEBUG_TRIE_EXECUTE_r(
	3880	PerlIO_printf( Perl_debug_log,
	3881	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3882	charid, uvc, (UV)state, PL_colors[5] );
	3883	);
	3884	}
	3885	if (!accepted)
	3886	sayNO;
	3887
	3888	/* calculate total number of accept states */
	3889	{
	3890	U16 w = ST.topword;
	3891	accepted = 0;
	3892	while (w) {
	3893	w = trie->wordinfo[w].prev;
	3894	accepted++;
	3895	}
	3896	ST.accepted = accepted;
	3897	}
	3898
	3899	DEBUG_EXECUTE_r(
	3900	PerlIO_printf( Perl_debug_log,
	3901	"%*s %sgot %"IVdf" possible matches%s\n",
	3902	REPORT_CODE_OFF + depth * 2, "",
	3903	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3904	);
	3905	goto trie_first_try; /* jump into the fail handler */
	3906	}}
	3907	assert(0); /* NOTREACHED */
	3908
	3909	case TRIE_next_fail: /* we failed - try next alternative */
	3910	{
	3911	U8 *uc;
	3912	if ( ST.jump) {
	3913	REGCP_UNWIND(ST.cp);
	3914	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	3915	}
	3916	if (!--ST.accepted) {
	3917	DEBUG_EXECUTE_r({
	3918	PerlIO_printf( Perl_debug_log,
	3919	"%*s %sTRIE failed...%s\n",
	3920	REPORT_CODE_OFF+depth*2, "",
	3921	PL_colors[4],
	3922	PL_colors[5] );
	3923	});
	3924	sayNO_SILENT;
	3925	}
	3926	{
	3927	/* Find next-highest word to process. Note that this code
	3928	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	3929	U16 min = 0;
	3930	U16 word;
	3931	U16 const nextword = ST.nextword;
	3932	reg_trie_wordinfo * const wordinfo
	3933	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	3934	for (word=ST.topword; word; word=wordinfo[word].prev) {
	3935	if (word > nextword && (!min \|\| word < min))
	3936	min = word;
	3937	}
	3938	ST.nextword = min;
	3939	}
	3940
	3941	trie_first_try:
	3942	if (do_cutgroup) {
	3943	do_cutgroup = 0;
	3944	no_final = 0;
	3945	}
	3946
	3947	if ( ST.jump) {
	3948	ST.lastparen = rex->lastparen;
	3949	ST.lastcloseparen = rex->lastcloseparen;
	3950	REGCP_SET(ST.cp);
	3951	}
	3952
	3953	/* find start char of end of current word */
	3954	{
	3955	U32 chars; /* how many chars to skip */
	3956	reg_trie_data * const trie
	3957	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	3958
	3959	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3960	>= ST.firstchars);
	3961	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3962	- ST.firstchars;
	3963	uc = ST.firstpos;
	3964
	3965	if (ST.longfold) {
	3966	/* the hard option - fold each char in turn and find
	3967	* its folded length (which may be different */
	3968	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	3969	STRLEN foldlen;
	3970	STRLEN len;
	3971	UV uvc;
	3972	U8 *uscan;
	3973
	3974	while (chars) {
	3975	if (utf8_target) {
	3976	uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
	3977	uniflags);
	3978	uc += len;
	3979	}
	3980	else {
	3981	uvc = *uc;
	3982	uc++;
	3983	}
	3984	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	3985	uscan = foldbuf;
	3986	while (foldlen) {
	3987	if (!--chars)
	3988	break;
	3989	uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
	3990	uniflags);
	3991	uscan += len;
	3992	foldlen -= len;
	3993	}
	3994	}
	3995	}
	3996	else {
	3997	if (utf8_target)
	3998	while (chars--)
	3999	uc += UTF8SKIP(uc);
	4000	else
	4001	uc += chars;
	4002	}
	4003	}
	4004
	4005	scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
	4006	? ST.jump[ST.nextword]
	4007	: NEXT_OFF(ST.me));
	4008
	4009	DEBUG_EXECUTE_r({
	4010	PerlIO_printf( Perl_debug_log,
	4011	"%*s %sTRIE matched word #%d, continuing%s\n",
	4012	REPORT_CODE_OFF+depth*2, "",
	4013	PL_colors[4],
	4014	ST.nextword,
	4015	PL_colors[5]
	4016	);
	4017	});
	4018
	4019	if (ST.accepted > 1 \|\| has_cutgroup) {
	4020	PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
	4021	assert(0); /* NOTREACHED */
	4022	}
	4023	/* only one choice left - just continue */
	4024	DEBUG_EXECUTE_r({
	4025	AV *const trie_words
	4026	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	4027	SV ** const tmp = av_fetch( trie_words,
	4028	ST.nextword-1, 0 );
	4029	SV *sv= tmp ? sv_newmortal() : NULL;
	4030
	4031	PerlIO_printf( Perl_debug_log,
	4032	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	4033	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	4034	ST.nextword,
	4035	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	4036	PL_colors[0], PL_colors[1],
	4037	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	4038	)
	4039	: "not compiled under -Dr",
	4040	PL_colors[5] );
	4041	});
	4042
	4043	locinput = (char*)uc;
	4044	continue; /* execute rest of RE */
	4045	assert(0); /* NOTREACHED */
	4046	}
	4047	#undef ST
	4048
	4049	case EXACT: { /* /abc/ */
	4050	char *s = STRING(scan);
	4051	ln = STR_LEN(scan);
	4052	if (utf8_target != is_utf8_pat) {
	4053	/* The target and the pattern have differing utf8ness. */
	4054	char *l = locinput;
	4055	const char * const e = s + ln;
	4056
	4057	if (utf8_target) {
	4058	/* The target is utf8, the pattern is not utf8.
	4059	* Above-Latin1 code points can't match the pattern;
	4060	* invariants match exactly, and the other Latin1 ones need
	4061	* to be downgraded to a single byte in order to do the
	4062	* comparison. (If we could be confident that the target
	4063	* is not malformed, this could be refactored to have fewer
	4064	* tests by just assuming that if the first bytes match, it
	4065	* is an invariant, but there are tests in the test suite
	4066	* dealing with (??{...}) which violate this) */
	4067	while (s < e) {
	4068	if (l >= PL_regeol \|\| UTF8_IS_ABOVE_LATIN1(* (U8*) l)) {
	4069	sayNO;
	4070	}
	4071	if (UTF8_IS_INVARIANT((U8)l)) {
	4072	if (l != s) {
	4073	sayNO;
	4074	}
	4075	l++;
	4076	}
	4077	else {
	4078	if (TWO_BYTE_UTF8_TO_UNI(l, (l+1)) != * (U8*) s) {
	4079	sayNO;
	4080	}
	4081	l += 2;
	4082	}
	4083	s++;
	4084	}
	4085	}
	4086	else {
	4087	/* The target is not utf8, the pattern is utf8. */
	4088	while (s < e) {
	4089	if (l >= PL_regeol \|\| UTF8_IS_ABOVE_LATIN1(* (U8*) s))
	4090	{
	4091	sayNO;
	4092	}
	4093	if (UTF8_IS_INVARIANT((U8)s)) {
	4094	if (s != l) {
	4095	sayNO;
	4096	}
	4097	s++;
	4098	}
	4099	else {
	4100	if (TWO_BYTE_UTF8_TO_UNI(s, (s+1)) != * (U8*) l) {
	4101	sayNO;
	4102	}
	4103	s += 2;
	4104	}
	4105	l++;
	4106	}
	4107	}
	4108	locinput = l;
	4109	}
	4110	else {
	4111	/* The target and the pattern have the same utf8ness. */
	4112	/* Inline the first character, for speed. */
	4113	if (PL_regeol - locinput < ln
	4114	\|\| UCHARAT(s) != nextchr
	4115	\|\| (ln > 1 && memNE(s, locinput, ln)))
	4116	{
	4117	sayNO;
	4118	}
	4119	locinput += ln;
	4120	}
	4121	break;
	4122	}
	4123
	4124	case EXACTFL: { /* /abc/il */
	4125	re_fold_t folder;
	4126	const U8 * fold_array;
	4127	const char * s;
	4128	U32 fold_utf8_flags;
	4129
	4130	RX_MATCH_TAINTED_on(reginfo->prog);
	4131	folder = foldEQ_locale;
	4132	fold_array = PL_fold_locale;
	4133	fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
	4134	goto do_exactf;
	4135
	4136	case EXACTFU_SS: /* /\x{df}/iu */
	4137	case EXACTFU_TRICKYFOLD: /* /\x{390}/iu */
	4138	case EXACTFU: /* /abc/iu */
	4139	folder = foldEQ_latin1;
	4140	fold_array = PL_fold_latin1;
	4141	fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	4142	goto do_exactf;
	4143
	4144	case EXACTFA: /* /abc/iaa */
	4145	folder = foldEQ_latin1;
	4146	fold_array = PL_fold_latin1;
	4147	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4148	goto do_exactf;
	4149
	4150	case EXACTF: /* /abc/i */
	4151	folder = foldEQ;
	4152	fold_array = PL_fold;
	4153	fold_utf8_flags = 0;
	4154
	4155	do_exactf:
	4156	s = STRING(scan);
	4157	ln = STR_LEN(scan);
	4158
	4159	if (utf8_target \|\| is_utf8_pat \|\| state_num == EXACTFU_SS) {
	4160	/* Either target or the pattern are utf8, or has the issue where
	4161	* the fold lengths may differ. */
	4162	const char * const l = locinput;
	4163	char *e = PL_regeol;
	4164
	4165	if (! foldEQ_utf8_flags(s, 0, ln, is_utf8_pat,
	4166	l, &e, 0, utf8_target, fold_utf8_flags))
	4167	{
	4168	sayNO;
	4169	}
	4170	locinput = e;
	4171	break;
	4172	}
	4173
	4174	/* Neither the target nor the pattern are utf8 */
	4175	if (UCHARAT(s) != nextchr
	4176	&& !NEXTCHR_IS_EOS
	4177	&& UCHARAT(s) != fold_array[nextchr])
	4178	{
	4179	sayNO;
	4180	}
	4181	if (PL_regeol - locinput < ln)
	4182	sayNO;
	4183	if (ln > 1 && ! folder(s, locinput, ln))
	4184	sayNO;
	4185	locinput += ln;
	4186	break;
	4187	}
	4188
	4189	/* XXX Could improve efficiency by separating these all out using a
	4190	* macro or in-line function. At that point regcomp.c would no longer
	4191	* have to set the FLAGS fields of these */
	4192	case BOUNDL: /* /\b/l */
	4193	case NBOUNDL: /* /\B/l */
	4194	RX_MATCH_TAINTED_on(reginfo->prog);
	4195	/* FALL THROUGH */
	4196	case BOUND: /* /\b/ */
	4197	case BOUNDU: /* /\b/u */
	4198	case BOUNDA: /* /\b/a */
	4199	case NBOUND: /* /\B/ */
	4200	case NBOUNDU: /* /\B/u */
	4201	case NBOUNDA: /* /\B/a */
	4202	/* was last char in word? */
	4203	if (utf8_target
	4204	&& FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
	4205	&& FLAGS(scan) != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
	4206	{
	4207	if (locinput == PL_bostr)
	4208	ln = '\n';
	4209	else {
	4210	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	4211
	4212	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	4213	}
	4214	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	4215	ln = isWORDCHAR_uni(ln);
	4216	if (NEXTCHR_IS_EOS)
	4217	n = 0;
	4218	else {
	4219	LOAD_UTF8_CHARCLASS_ALNUM();
	4220	n = swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)locinput,
	4221	utf8_target);
	4222	}
	4223	}
	4224	else {
	4225	ln = isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(ln));
	4226	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC_utf8((U8*)locinput);
	4227	}
	4228	}
	4229	else {
	4230
	4231	/* Here the string isn't utf8, or is utf8 and only ascii
	4232	* characters are to match \w. In the latter case looking at
	4233	* the byte just prior to the current one may be just the final
	4234	* byte of a multi-byte character. This is ok. There are two
	4235	* cases:
	4236	* 1) it is a single byte character, and then the test is doing
	4237	* just what it's supposed to.
	4238	* 2) it is a multi-byte character, in which case the final
	4239	* byte is never mistakable for ASCII, and so the test
	4240	* will say it is not a word character, which is the
	4241	* correct answer. */
	4242	ln = (locinput != PL_bostr) ?
	4243	UCHARAT(locinput - 1) : '\n';
	4244	switch (FLAGS(scan)) {
	4245	case REGEX_UNICODE_CHARSET:
	4246	ln = isWORDCHAR_L1(ln);
	4247	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
	4248	break;
	4249	case REGEX_LOCALE_CHARSET:
	4250	ln = isWORDCHAR_LC(ln);
	4251	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC(nextchr);
	4252	break;
	4253	case REGEX_DEPENDS_CHARSET:
	4254	ln = isWORDCHAR(ln);
	4255	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR(nextchr);
	4256	break;
	4257	case REGEX_ASCII_RESTRICTED_CHARSET:
	4258	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	4259	ln = isWORDCHAR_A(ln);
	4260	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_A(nextchr);
	4261	break;
	4262	default:
	4263	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	4264	break;
	4265	}
	4266	}
	4267	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	4268	* regcomp.sym */
	4269	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	4270	sayNO;
	4271	break;
	4272
	4273	case ANYOF: /* /[abc]/ */
	4274	case ANYOF_WARN_SUPER:
	4275	if (NEXTCHR_IS_EOS)
	4276	sayNO;
	4277	if (utf8_target) {
	4278	if (!reginclass(rex, scan, (U8*)locinput, utf8_target))
	4279	sayNO;
	4280	locinput += UTF8SKIP(locinput);
	4281	}
	4282	else {
	4283	if (!REGINCLASS(rex, scan, (U8*)locinput))
	4284	sayNO;
	4285	locinput++;
	4286	}
	4287	break;
	4288
	4289	/* The argument (FLAGS) to all the POSIX node types is the class number
	4290	* */
	4291
	4292	case NPOSIXL: /* \W or [:^punct:] etc. under /l */
	4293	to_complement = 1;
	4294	/* FALLTHROUGH */
	4295
	4296	case POSIXL: /* \w or [:punct:] etc. under /l */
	4297	if (NEXTCHR_IS_EOS)
	4298	sayNO;
	4299
	4300	/* The locale hasn't influenced the outcome before this, so defer
	4301	* tainting until now */
	4302	RX_MATCH_TAINTED_on(reginfo->prog);
	4303
	4304	/* Use isFOO_lc() for characters within Latin1. (Note that
	4305	* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
	4306	* wouldn't be invariant) */
	4307	if (UTF8_IS_INVARIANT(nextchr) \|\| ! utf8_target) {
	4308	if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) {
	4309	sayNO;
	4310	}
	4311	}
	4312	else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
	4313	if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
	4314	(U8) TWO_BYTE_UTF8_TO_UNI(nextchr,
	4315	*(locinput + 1))))))
	4316	{
	4317	sayNO;
	4318	}
	4319	}
	4320	else { /* Here, must be an above Latin-1 code point */
	4321	goto utf8_posix_not_eos;
	4322	}
	4323
	4324	/* Here, must be utf8 */
	4325	locinput += UTF8SKIP(locinput);
	4326	break;
	4327
	4328	case NPOSIXD: /* \W or [:^punct:] etc. under /d */
	4329	to_complement = 1;
	4330	/* FALLTHROUGH */
	4331
	4332	case POSIXD: /* \w or [:punct:] etc. under /d */
	4333	if (utf8_target) {
	4334	goto utf8_posix;
	4335	}
	4336	goto posixa;
	4337
	4338	case NPOSIXA: /* \W or [:^punct:] etc. under /a */
	4339
	4340	if (NEXTCHR_IS_EOS) {
	4341	sayNO;
	4342	}
	4343
	4344	/* All UTF-8 variants match */
	4345	if (! UTF8_IS_INVARIANT(nextchr)) {
	4346	goto increment_locinput;
	4347	}
	4348
	4349	to_complement = 1;
	4350	/* FALLTHROUGH */
	4351
	4352	case POSIXA: /* \w or [:punct:] etc. under /a */
	4353
	4354	posixa:
	4355	/* We get here through POSIXD, NPOSIXD, and NPOSIXA when not in
	4356	* UTF-8, and also from NPOSIXA even in UTF-8 when the current
	4357	* character is a single byte */
	4358
	4359	if (NEXTCHR_IS_EOS
	4360	\|\| ! (to_complement ^ cBOOL(_generic_isCC_A(nextchr,
	4361	FLAGS(scan)))))
	4362	{
	4363	sayNO;
	4364	}
	4365
	4366	/* Here we are either not in utf8, or we matched a utf8-invariant,
	4367	* so the next char is the next byte */
	4368	locinput++;
	4369	break;
	4370
	4371	case NPOSIXU: /* \W or [:^punct:] etc. under /u */
	4372	to_complement = 1;
	4373	/* FALLTHROUGH */
	4374
	4375	case POSIXU: /* \w or [:punct:] etc. under /u */
	4376	utf8_posix:
	4377	if (NEXTCHR_IS_EOS) {
	4378	sayNO;
	4379	}
	4380	utf8_posix_not_eos:
	4381
	4382	/* Use _generic_isCC() for characters within Latin1. (Note that
	4383	* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
	4384	* wouldn't be invariant) */
	4385	if (UTF8_IS_INVARIANT(nextchr) \|\| ! utf8_target) {
	4386	if (! (to_complement ^ cBOOL(_generic_isCC(nextchr,
	4387	FLAGS(scan)))))
	4388	{
	4389	sayNO;
	4390	}
	4391	locinput++;
	4392	}
	4393	else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
	4394	if (! (to_complement
	4395	^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_UNI(nextchr,
	4396	*(locinput + 1)),
	4397	FLAGS(scan)))))
	4398	{
	4399	sayNO;
	4400	}
	4401	locinput += 2;
	4402	}
	4403	else { /* Handle above Latin-1 code points */
	4404	classnum = (_char_class_number) FLAGS(scan);
	4405	if (classnum < _FIRST_NON_SWASH_CC) {
	4406
	4407	/* Here, uses a swash to find such code points. Load if if
	4408	* not done already */
	4409	if (! PL_utf8_swash_ptrs[classnum]) {
	4410	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	4411	PL_utf8_swash_ptrs[classnum]
	4412	= _core_swash_init("utf8",
	4413	swash_property_names[classnum],
	4414	&PL_sv_undef, 1, 0, NULL, &flags);
	4415	}
	4416	if (! (to_complement
	4417	^ cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum],
	4418	(U8 *) locinput, TRUE))))
	4419	{
	4420	sayNO;
	4421	}
	4422	}
	4423	else { /* Here, uses macros to find above Latin-1 code points */
	4424	switch (classnum) {
	4425	case _CC_ENUM_SPACE: /* XXX would require separate
	4426	code if we revert the change
	4427	of \v matching this */
	4428	case _CC_ENUM_PSXSPC:
	4429	if (! (to_complement
	4430	^ cBOOL(is_XPERLSPACE_high(locinput))))
	4431	{
	4432	sayNO;
	4433	}
	4434	break;
	4435	case _CC_ENUM_BLANK:
	4436	if (! (to_complement
	4437	^ cBOOL(is_HORIZWS_high(locinput))))
	4438	{
	4439	sayNO;
	4440	}
	4441	break;
	4442	case _CC_ENUM_XDIGIT:
	4443	if (! (to_complement
	4444	^ cBOOL(is_XDIGIT_high(locinput))))
	4445	{
	4446	sayNO;
	4447	}
	4448	break;
	4449	case _CC_ENUM_VERTSPACE:
	4450	if (! (to_complement
	4451	^ cBOOL(is_VERTWS_high(locinput))))
	4452	{
	4453	sayNO;
	4454	}
	4455	break;
	4456	default: /* The rest, e.g. [:cntrl:], can't match
	4457	above Latin1 */
	4458	if (! to_complement) {
	4459	sayNO;
	4460	}
	4461	break;
	4462	}
	4463	}
	4464	locinput += UTF8SKIP(locinput);
	4465	}
	4466	break;
	4467
	4468	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	4469	a Unicode extended Grapheme Cluster */
	4470	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	4471	extended Grapheme Cluster is:
	4472
	4473	CR LF
	4474	\| Prepend* Begin Extend*
	4475	\| .
	4476
	4477	Begin is: ( Special_Begin \| ! Control )
	4478	Special_Begin is: ( Regional-Indicator+ \| Hangul-syllable )
	4479	Extend is: ( Grapheme_Extend \| Spacing_Mark )
	4480	Control is: [ GCB_Control \| CR \| LF ]
	4481	Hangul-syllable is: ( T+ \| ( L* ( L \| ( LVT \| ( V \| LV ) V* ) T* ) ))
	4482
	4483	If we create a 'Regular_Begin' = Begin - Special_Begin, then
	4484	we can rewrite
	4485
	4486	Begin is ( Regular_Begin + Special Begin )
	4487
	4488	It turns out that 98.4% of all Unicode code points match
	4489	Regular_Begin. Doing it this way eliminates a table match in
	4490	the previous implementation for almost all Unicode code points.
	4491
	4492	There is a subtlety with Prepend* which showed up in testing.
	4493	Note that the Begin, and only the Begin is required in:
	4494	\| Prepend* Begin Extend*
	4495	Also, Begin contains '! Control'. A Prepend must be a
	4496	'! Control', which means it must also be a Begin. What it
	4497	comes down to is that if we match Prepend* and then find no
	4498	suitable Begin afterwards, that if we backtrack the last
	4499	Prepend, that one will be a suitable Begin.
	4500	*/
	4501
	4502	if (NEXTCHR_IS_EOS)
	4503	sayNO;
	4504	if (! utf8_target) {
	4505
	4506	/* Match either CR LF or '.', as all the other possibilities
	4507	* require utf8 */
	4508	locinput++; /* Match the . or CR */
	4509	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	4510	match the LF */
	4511	&& locinput < PL_regeol
	4512	&& UCHARAT(locinput) == '\n')
	4513	{
	4514	locinput++;
	4515	}
	4516	}
	4517	else {
	4518
	4519	/* Utf8: See if is ( CR LF ); already know that locinput <
	4520	* PL_regeol, so locinput+1 is in bounds */
	4521	if ( nextchr == '\r' && locinput+1 < PL_regeol
	4522	&& UCHARAT(locinput + 1) == '\n')
	4523	{
	4524	locinput += 2;
	4525	}
	4526	else {
	4527	STRLEN len;
	4528
	4529	/* In case have to backtrack to beginning, then match '.' */
	4530	char *starting = locinput;
	4531
	4532	/* In case have to backtrack the last prepend */
	4533	char *previous_prepend = NULL;
	4534
	4535	LOAD_UTF8_CHARCLASS_GCB();
	4536
	4537	/* Match (prepend)* */
	4538	while (locinput < PL_regeol
	4539	&& (len = is_GCB_Prepend_utf8(locinput)))
	4540	{
	4541	previous_prepend = locinput;
	4542	locinput += len;
	4543	}
	4544
	4545	/* As noted above, if we matched a prepend character, but
	4546	* the next thing won't match, back off the last prepend we
	4547	* matched, as it is guaranteed to match the begin */
	4548	if (previous_prepend
	4549	&& (locinput >= PL_regeol
	4550	\|\| (! swash_fetch(PL_utf8_X_regular_begin,
	4551	(U8*)locinput, utf8_target)
	4552	&& ! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)))
	4553	)
	4554	{
	4555	locinput = previous_prepend;
	4556	}
	4557
	4558	/* Note that here we know PL_regeol > locinput, as we
	4559	* tested that upon input to this switch case, and if we
	4560	* moved locinput forward, we tested the result just above
	4561	* and it either passed, or we backed off so that it will
	4562	* now pass */
	4563	if (swash_fetch(PL_utf8_X_regular_begin,
	4564	(U8*)locinput, utf8_target)) {
	4565	locinput += UTF8SKIP(locinput);
	4566	}
	4567	else if (! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)) {
	4568
	4569	/* Here did not match the required 'Begin' in the
	4570	* second term. So just match the very first
	4571	* character, the '.' of the final term of the regex */
	4572	locinput = starting + UTF8SKIP(starting);
	4573	goto exit_utf8;
	4574	} else {
	4575
	4576	/* Here is a special begin. It can be composed of
	4577	* several individual characters. One possibility is
	4578	* RI+ */
	4579	if ((len = is_GCB_RI_utf8(locinput))) {
	4580	locinput += len;
	4581	while (locinput < PL_regeol
	4582	&& (len = is_GCB_RI_utf8(locinput)))
	4583	{
	4584	locinput += len;
	4585	}
	4586	} else if ((len = is_GCB_T_utf8(locinput))) {
	4587	/* Another possibility is T+ */
	4588	locinput += len;
	4589	while (locinput < PL_regeol
	4590	&& (len = is_GCB_T_utf8(locinput)))
	4591	{
	4592	locinput += len;
	4593	}
	4594	} else {
	4595
	4596	/* Here, neither RI+ nor T+; must be some other
	4597	* Hangul. That means it is one of the others: L,
	4598	* LV, LVT or V, and matches:
	4599	* L* (L \| LVT T* \| V * V* T* \| LV V* T) /
	4600
	4601	/* Match L* */
	4602	while (locinput < PL_regeol
	4603	&& (len = is_GCB_L_utf8(locinput)))
	4604	{
	4605	locinput += len;
	4606	}
	4607
	4608	/* Here, have exhausted L*. If the next character
	4609	* is not an LV, LVT nor V, it means we had to have
	4610	* at least one L, so matches L+ in the original
	4611	* equation, we have a complete hangul syllable.
	4612	* Are done. */
	4613
	4614	if (locinput < PL_regeol
	4615	&& is_GCB_LV_LVT_V_utf8(locinput))
	4616	{
	4617	/* Otherwise keep going. Must be LV, LVT or V.
	4618	* See if LVT, by first ruling out V, then LV */
	4619	if (! is_GCB_V_utf8(locinput)
	4620	/* All but every TCount one is LV */
	4621	&& (valid_utf8_to_uvchr((U8 *) locinput,
	4622	NULL)
	4623	- SBASE)
	4624	% TCount != 0)
	4625	{
	4626	locinput += UTF8SKIP(locinput);
	4627	} else {
	4628
	4629	/* Must be V or LV. Take it, then match
	4630	* V* */
	4631	locinput += UTF8SKIP(locinput);
	4632	while (locinput < PL_regeol
	4633	&& (len = is_GCB_V_utf8(locinput)))
	4634	{
	4635	locinput += len;
	4636	}
	4637	}
	4638
	4639	/* And any of LV, LVT, or V can be followed
	4640	* by T* */
	4641	while (locinput < PL_regeol
	4642	&& (len = is_GCB_T_utf8(locinput)))
	4643	{
	4644	locinput += len;
	4645	}
	4646	}
	4647	}
	4648	}
	4649
	4650	/* Match any extender */
	4651	while (locinput < PL_regeol
	4652	&& swash_fetch(PL_utf8_X_extend,
	4653	(U8*)locinput, utf8_target))
	4654	{
	4655	locinput += UTF8SKIP(locinput);
	4656	}
	4657	}
	4658	exit_utf8:
	4659	if (locinput > PL_regeol) sayNO;
	4660	}
	4661	break;
	4662
	4663	case NREFFL: /* /\g{name}/il */
	4664	{ /* The capture buffer cases. The ones beginning with N for the
	4665	named buffers just convert to the equivalent numbered and
	4666	pretend they were called as the corresponding numbered buffer
	4667	op. */
	4668	/* don't initialize these in the declaration, it makes C++
	4669	unhappy */
	4670	char *s;
	4671	char type;
	4672	re_fold_t folder;
	4673	const U8 *fold_array;
	4674	UV utf8_fold_flags;
	4675
	4676	RX_MATCH_TAINTED_on(reginfo->prog);
	4677	folder = foldEQ_locale;
	4678	fold_array = PL_fold_locale;
	4679	type = REFFL;
	4680	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4681	goto do_nref;
	4682
	4683	case NREFFA: /* /\g{name}/iaa */
	4684	folder = foldEQ_latin1;
	4685	fold_array = PL_fold_latin1;
	4686	type = REFFA;
	4687	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4688	goto do_nref;
	4689
	4690	case NREFFU: /* /\g{name}/iu */
	4691	folder = foldEQ_latin1;
	4692	fold_array = PL_fold_latin1;
	4693	type = REFFU;
	4694	utf8_fold_flags = 0;
	4695	goto do_nref;
	4696
	4697	case NREFF: /* /\g{name}/i */
	4698	folder = foldEQ;
	4699	fold_array = PL_fold;
	4700	type = REFF;
	4701	utf8_fold_flags = 0;
	4702	goto do_nref;
	4703
	4704	case NREF: /* /\g{name}/ */
	4705	type = REF;
	4706	folder = NULL;
	4707	fold_array = NULL;
	4708	utf8_fold_flags = 0;
	4709	do_nref:
	4710
	4711	/* For the named back references, find the corresponding buffer
	4712	* number */
	4713	n = reg_check_named_buff_matched(rex,scan);
	4714
	4715	if ( ! n ) {
	4716	sayNO;
	4717	}
	4718	goto do_nref_ref_common;
	4719
	4720	case REFFL: /* /\1/il */
	4721	RX_MATCH_TAINTED_on(reginfo->prog);
	4722	folder = foldEQ_locale;
	4723	fold_array = PL_fold_locale;
	4724	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4725	goto do_ref;
	4726
	4727	case REFFA: /* /\1/iaa */
	4728	folder = foldEQ_latin1;
	4729	fold_array = PL_fold_latin1;
	4730	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4731	goto do_ref;
	4732
	4733	case REFFU: /* /\1/iu */
	4734	folder = foldEQ_latin1;
	4735	fold_array = PL_fold_latin1;
	4736	utf8_fold_flags = 0;
	4737	goto do_ref;
	4738
	4739	case REFF: /* /\1/i */
	4740	folder = foldEQ;
	4741	fold_array = PL_fold;
	4742	utf8_fold_flags = 0;
	4743	goto do_ref;
	4744
	4745	case REF: /* /\1/ */
	4746	folder = NULL;
	4747	fold_array = NULL;
	4748	utf8_fold_flags = 0;
	4749
	4750	do_ref:
	4751	type = OP(scan);
	4752	n = ARG(scan); /* which paren pair */
	4753
	4754	do_nref_ref_common:
	4755	ln = rex->offs[n].start;
	4756	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4757	if (rex->lastparen < n \|\| ln == -1)
	4758	sayNO; /* Do not match unless seen CLOSEn. */
	4759	if (ln == rex->offs[n].end)
	4760	break;
	4761
	4762	s = PL_bostr + ln;
	4763	if (type != REF /* REF can do byte comparison */
	4764	&& (utf8_target \|\| type == REFFU))
	4765	{ /* XXX handle REFFL better */
	4766	char * limit = PL_regeol;
	4767
	4768	/* This call case insensitively compares the entire buffer
	4769	* at s, with the current input starting at locinput, but
	4770	* not going off the end given by PL_regeol, and returns in
	4771	* <limit> upon success, how much of the current input was
	4772	* matched */
	4773	if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
	4774	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	4775	{
	4776	sayNO;
	4777	}
	4778	locinput = limit;
	4779	break;
	4780	}
	4781
	4782	/* Not utf8: Inline the first character, for speed. */
	4783	if (!NEXTCHR_IS_EOS &&
	4784	UCHARAT(s) != nextchr &&
	4785	(type == REF \|\|
	4786	UCHARAT(s) != fold_array[nextchr]))
	4787	sayNO;
	4788	ln = rex->offs[n].end - ln;
	4789	if (locinput + ln > PL_regeol)
	4790	sayNO;
	4791	if (ln > 1 && (type == REF
	4792	? memNE(s, locinput, ln)
	4793	: ! folder(s, locinput, ln)))
	4794	sayNO;
	4795	locinput += ln;
	4796	break;
	4797	}
	4798
	4799	case NOTHING: /* null op; e.g. the 'nothing' following
	4800	* the '' in m{(a+\|b)}' */
	4801	break;
	4802	case TAIL: /* placeholder while compiling (A\|B\|C) */
	4803	break;
	4804
	4805	case BACK: /* ??? doesn't appear to be used ??? */
	4806	break;
	4807
	4808	#undef ST
	4809	#define ST st->u.eval
	4810	{
	4811	SV *ret;
	4812	REGEXP *re_sv;
	4813	regexp *re;
	4814	regexp_internal *rei;
	4815	regnode *startpoint;
	4816
	4817	case GOSTART: /* (?R) */
	4818	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4819	if (cur_eval && cur_eval->locinput==locinput) {
	4820	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4821	Perl_croak(aTHX_ "Infinite recursion in regex");
	4822	if ( ++nochange_depth > max_nochange_depth )
	4823	Perl_croak(aTHX_
	4824	"Pattern subroutine nesting without pos change"
	4825	" exceeded limit in regex");
	4826	} else {
	4827	nochange_depth = 0;
	4828	}
	4829	re_sv = rex_sv;
	4830	re = rex;
	4831	rei = rexi;
	4832	if (OP(scan)==GOSUB) {
	4833	startpoint = scan + ARG2L(scan);
	4834	ST.close_paren = ARG(scan);
	4835	} else {
	4836	startpoint = rei->program+1;
	4837	ST.close_paren = 0;
	4838	}
	4839	goto eval_recurse_doit;
	4840	assert(0); /* NOTREACHED */
	4841
	4842	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4843	if (cur_eval && cur_eval->locinput==locinput) {
	4844	if ( ++nochange_depth > max_nochange_depth )
	4845	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4846	} else {
	4847	nochange_depth = 0;
	4848	}
	4849	{
	4850	/* execute the code in the {...} */
	4851
	4852	dSP;
	4853	IV before;
	4854	OP * const oop = PL_op;
	4855	COP * const ocurcop = PL_curcop;
	4856	OP *nop;
	4857	char *saved_regeol = PL_regeol;
	4858	struct re_save_state saved_state;
	4859	CV *newcv;
	4860
	4861	/* save all paren positions */
	4862	regcppush(rex, 0, maxopenparen);
	4863	REGCP_SET(runops_cp);
	4864
	4865	/* To not corrupt the existing regex state while executing the
	4866	* eval we would normally put it on the save stack, like with
	4867	* save_re_context. However, re-evals have a weird scoping so we
	4868	* can't just add ENTER/LEAVE here. With that, things like
	4869	*
	4870	* (?{$a=2})(a(?{local$a=$a+1}))aakc(?{$b=$a})
	4871	*
	4872	* would break, as they expect the localisation to be unwound
	4873	* only when the re-engine backtracks through the bit that
	4874	* localised it.
	4875	*
	4876	* What we do instead is just saving the state in a local c
	4877	* variable.
	4878	*/
	4879	Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
	4880
	4881	if (!caller_cv)
	4882	caller_cv = find_runcv(NULL);
	4883
	4884	n = ARG(scan);
	4885
	4886	if (rexi->data->what[n] == 'r') { /* code from an external qr */
	4887	newcv = (ReANY(
	4888	(REGEXP*)(rexi->data->data[n])
	4889	))->qr_anoncv
	4890	;
	4891	nop = (OP*)rexi->data->data[n+1];
	4892	}
	4893	else if (rexi->data->what[n] == 'l') { /* literal code */
	4894	newcv = caller_cv;
	4895	nop = (OP*)rexi->data->data[n];
	4896	assert(CvDEPTH(newcv));
	4897	}
	4898	else {
	4899	/* literal with own CV */
	4900	assert(rexi->data->what[n] == 'L');
	4901	newcv = rex->qr_anoncv;
	4902	nop = (OP*)rexi->data->data[n];
	4903	}
	4904
	4905	/* normally if we're about to execute code from the same
	4906	* CV that we used previously, we just use the existing
	4907	* CX stack entry. However, its possible that in the
	4908	* meantime we may have backtracked, popped from the save
	4909	* stack, and undone the SAVECOMPPAD(s) associated with
	4910	* PUSH_MULTICALL; in which case PL_comppad no longer
	4911	* points to newcv's pad. */
	4912	if (newcv != last_pushed_cv \|\| PL_comppad != last_pad)
	4913	{
	4914	U8 flags = (CXp_SUB_RE \|
	4915	((newcv == caller_cv) ? CXp_SUB_RE_FAKE : 0));
	4916	if (last_pushed_cv) {
	4917	CHANGE_MULTICALL_FLAGS(newcv, flags);
	4918	}
	4919	else {
	4920	PUSH_MULTICALL_FLAGS(newcv, flags);
	4921	}
	4922	last_pushed_cv = newcv;
	4923	}
	4924	else {
	4925	/* these assignments are just to silence compiler
	4926	* warnings */
	4927	multicall_cop = NULL;
	4928	newsp = NULL;
	4929	}
	4930	last_pad = PL_comppad;
	4931
	4932	/* the initial nextstate you would normally execute
	4933	* at the start of an eval (which would cause error
	4934	* messages to come from the eval), may be optimised
	4935	* away from the execution path in the regex code blocks;
	4936	* so manually set PL_curcop to it initially */
	4937	{
	4938	OP *o = cUNOPx(nop)->op_first;
	4939	assert(o->op_type == OP_NULL);
	4940	if (o->op_targ == OP_SCOPE) {
	4941	o = cUNOPo->op_first;
	4942	}
	4943	else {
	4944	assert(o->op_targ == OP_LEAVE);
	4945	o = cUNOPo->op_first;
	4946	assert(o->op_type == OP_ENTER);
	4947	o = o->op_sibling;
	4948	}
	4949
	4950	if (o->op_type != OP_STUB) {
	4951	assert( o->op_type == OP_NEXTSTATE
	4952	\|\| o->op_type == OP_DBSTATE
	4953	\|\| (o->op_type == OP_NULL
	4954	&& ( o->op_targ == OP_NEXTSTATE
	4955	\|\| o->op_targ == OP_DBSTATE
	4956	)
	4957	)
	4958	);
	4959	PL_curcop = (COP*)o;
	4960	}
	4961	}
	4962	nop = nop->op_next;
	4963
	4964	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	4965	" re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) );
	4966
	4967	rex->offs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	4968
	4969	if (sv_yes_mark) {
	4970	SV *sv_mrk = get_sv("REGMARK", 1);
	4971	sv_setsv(sv_mrk, sv_yes_mark);
	4972	}
	4973
	4974	/* we don't use MULTICALL here as we want to call the
	4975	* first op of the block of interest, rather than the
	4976	* first op of the sub */
	4977	before = (IV)(SP-PL_stack_base);
	4978	PL_op = nop;
	4979	CALLRUNOPS(aTHX); /* Scalar context. */
	4980	SPAGAIN;
	4981	if ((IV)(SP-PL_stack_base) == before)
	4982	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	4983	else {
	4984	ret = POPs;
	4985	PUTBACK;
	4986	}
	4987
	4988	/* before restoring everything, evaluate the returned
	4989	* value, so that 'uninit' warnings don't use the wrong
	4990	* PL_op or pad. Also need to process any magic vars
	4991	* (e.g. $1) before parentheses are restored */
	4992
	4993	PL_op = NULL;
	4994
	4995	re_sv = NULL;
	4996	if (logical == 0) /* (?{})/ */
	4997	sv_setsv(save_scalar(PL_replgv), ret); /* $^R */
	4998	else if (logical == 1) { /* /(?(?{...})X\|Y)/ */
	4999	sw = cBOOL(SvTRUE(ret));
	5000	logical = 0;
	5001	}
	5002	else { /* /(??{}) */
	5003	/* if its overloaded, let the regex compiler handle
	5004	* it; otherwise extract regex, or stringify */
	5005	if (!SvAMAGIC(ret)) {
	5006	SV *sv = ret;
	5007	if (SvROK(sv))
	5008	sv = SvRV(sv);
	5009	if (SvTYPE(sv) == SVt_REGEXP)
	5010	re_sv = (REGEXP*) sv;
	5011	else if (SvSMAGICAL(sv)) {
	5012	MAGIC *mg = mg_find(sv, PERL_MAGIC_qr);
	5013	if (mg)
	5014	re_sv = (REGEXP *) mg->mg_obj;
	5015	}
	5016
	5017	/* force any magic, undef warnings here */
	5018	if (!re_sv) {
	5019	ret = sv_mortalcopy(ret);
	5020	(void) SvPV_force_nolen(ret);
	5021	}
	5022	}
	5023
	5024	}
	5025
	5026	Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
	5027
	5028	/* *** Note that at this point we don't restore
	5029	* PL_comppad, (or pop the CxSUB) on the assumption it may
	5030	* be used again soon. This is safe as long as nothing
	5031	* in the regexp code uses the pad ! */
	5032	PL_op = oop;
	5033	PL_curcop = ocurcop;
	5034	PL_regeol = saved_regeol;
	5035	S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
	5036
	5037	if (logical != 2)
	5038	break;
	5039	}
	5040
	5041	/* only /(??{})/ from now on */
	5042	logical = 0;
	5043	{
	5044	/* extract RE object from returned value; compiling if
	5045	* necessary */
	5046
	5047	if (re_sv) {
	5048	re_sv = reg_temp_copy(NULL, re_sv);
	5049	}
	5050	else {
	5051	U32 pm_flags = 0;
	5052
	5053	if (SvUTF8(ret) && IN_BYTES) {
	5054	/* In use 'bytes': make a copy of the octet
	5055	* sequence, but without the flag on */
	5056	STRLEN len;
	5057	const char *const p = SvPV(ret, len);
	5058	ret = newSVpvn_flags(p, len, SVs_TEMP);
	5059	}
	5060	if (rex->intflags & PREGf_USE_RE_EVAL)
	5061	pm_flags \|= PMf_USE_RE_EVAL;
	5062
	5063	/* if we got here, it should be an engine which
	5064	* supports compiling code blocks and stuff */
	5065	assert(rex->engine && rex->engine->op_comp);
	5066	assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
	5067	re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
	5068	rex->engine, NULL, NULL,
	5069	/* copy /msix etc to inner pattern */
	5070	scan->flags,
	5071	pm_flags);
	5072
	5073	if (!(SvFLAGS(ret)
	5074	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	5075	\| SVs_GMG))) {
	5076	/* This isn't a first class regexp. Instead, it's
	5077	caching a regexp onto an existing, Perl visible
	5078	scalar. */
	5079	sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
	5080	}
	5081	/* safe to do now that any $1 etc has been
	5082	* interpolated into the new pattern string and
	5083	* compiled */
	5084	S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
	5085	}
	5086	SAVEFREESV(re_sv);
	5087	re = ReANY(re_sv);
	5088	}
	5089	RXp_MATCH_COPIED_off(re);
	5090	re->subbeg = rex->subbeg;
	5091	re->sublen = rex->sublen;
	5092	re->suboffset = rex->suboffset;
	5093	re->subcoffset = rex->subcoffset;
	5094	rei = RXi_GET(re);
	5095	DEBUG_EXECUTE_r(
	5096	debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
	5097	"Matching embedded");
	5098	);
	5099	startpoint = rei->program + 1;
	5100	ST.close_paren = 0; /* only used for GOSUB */
	5101
	5102	eval_recurse_doit: /* Share code with GOSUB below this line */
	5103	/* run the pattern returned from (??{...}) */
	5104
	5105	/* Save all the positions. */
	5106	ST.cp = regcppush(rex, 0, maxopenparen);
	5107	REGCP_SET(ST.lastcp);
	5108
	5109	re->lastparen = 0;
	5110	re->lastcloseparen = 0;
	5111
	5112	maxopenparen = 0;
	5113
	5114	/* XXXX This is too dramatic a measure... */
	5115	PL_reg_maxiter = 0;
	5116
	5117	ST.saved_utf8_pat = is_utf8_pat;
	5118	is_utf8_pat = cBOOL(RX_UTF8(re_sv));
	5119
	5120	ST.prev_rex = rex_sv;
	5121	ST.prev_curlyx = cur_curlyx;
	5122	rex_sv = re_sv;
	5123	SET_reg_curpm(rex_sv);
	5124	rex = re;
	5125	rexi = rei;
	5126	cur_curlyx = NULL;
	5127	ST.B = next;
	5128	ST.prev_eval = cur_eval;
	5129	cur_eval = st;
	5130	/* now continue from first node in postoned RE */
	5131	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
	5132	assert(0); /* NOTREACHED */
	5133	}
	5134
	5135	case EVAL_AB: /* cleanup after a successful (??{A})B */
	5136	/* note: this is called twice; first after popping B, then A */
	5137	is_utf8_pat = ST.saved_utf8_pat;
	5138	rex_sv = ST.prev_rex;
	5139	SET_reg_curpm(rex_sv);
	5140	rex = ReANY(rex_sv);
	5141	rexi = RXi_GET(rex);
	5142	regcpblow(ST.cp);
	5143	cur_eval = ST.prev_eval;
	5144	cur_curlyx = ST.prev_curlyx;
	5145
	5146	/* XXXX This is too dramatic a measure... */
	5147	PL_reg_maxiter = 0;
	5148	if ( nochange_depth )
	5149	nochange_depth--;
	5150	sayYES;
	5151
	5152
	5153	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	5154	/* note: this is called twice; first after popping B, then A */
	5155	is_utf8_pat = ST.saved_utf8_pat;
	5156	rex_sv = ST.prev_rex;
	5157	SET_reg_curpm(rex_sv);
	5158	rex = ReANY(rex_sv);
	5159	rexi = RXi_GET(rex);
	5160
	5161	REGCP_UNWIND(ST.lastcp);
	5162	regcppop(rex, &maxopenparen);
	5163	cur_eval = ST.prev_eval;
	5164	cur_curlyx = ST.prev_curlyx;
	5165	/* XXXX This is too dramatic a measure... */
	5166	PL_reg_maxiter = 0;
	5167	if ( nochange_depth )
	5168	nochange_depth--;
	5169	sayNO_SILENT;
	5170	#undef ST
	5171
	5172	case OPEN: /* ( */
	5173	n = ARG(scan); /* which paren pair */
	5174	rex->offs[n].start_tmp = locinput - PL_bostr;
	5175	if (n > maxopenparen)
	5176	maxopenparen = n;
	5177	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	5178	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf" tmp; maxopenparen=%"UVuf"\n",
	5179	PTR2UV(rex),
	5180	PTR2UV(rex->offs),
	5181	(UV)n,
	5182	(IV)rex->offs[n].start_tmp,
	5183	(UV)maxopenparen
	5184	));
	5185	lastopen = n;
	5186	break;
	5187
	5188	/* XXX really need to log other places start/end are set too */
	5189	#define CLOSE_CAPTURE \
	5190	rex->offs[n].start = rex->offs[n].start_tmp; \
	5191	rex->offs[n].end = locinput - PL_bostr; \
	5192	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, \
	5193	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf"..%"IVdf"\n", \
	5194	PTR2UV(rex), \
	5195	PTR2UV(rex->offs), \
	5196	(UV)n, \
	5197	(IV)rex->offs[n].start, \
	5198	(IV)rex->offs[n].end \
	5199	))
	5200
	5201	case CLOSE: /* ) */
	5202	n = ARG(scan); /* which paren pair */
	5203	CLOSE_CAPTURE;
	5204	if (n > rex->lastparen)
	5205	rex->lastparen = n;
	5206	rex->lastcloseparen = n;
	5207	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	5208	goto fake_end;
	5209	}
	5210	break;
	5211
	5212	case ACCEPT: /* (ACCEPT) /
	5213	if (ARG(scan)){
	5214	regnode *cursor;
	5215	for (cursor=scan;
	5216	cursor && OP(cursor)!=END;
	5217	cursor=regnext(cursor))
	5218	{
	5219	if ( OP(cursor)==CLOSE ){
	5220	n = ARG(cursor);
	5221	if ( n <= lastopen ) {
	5222	CLOSE_CAPTURE;
	5223	if (n > rex->lastparen)
	5224	rex->lastparen = n;
	5225	rex->lastcloseparen = n;
	5226	if ( n == ARG(scan) \|\| (cur_eval &&
	5227	cur_eval->u.eval.close_paren == n))
	5228	break;
	5229	}
	5230	}
	5231	}
	5232	}
	5233	goto fake_end;
	5234	/NOTREACHED/
	5235
	5236	case GROUPP: /* (?(1)) */
	5237	n = ARG(scan); /* which paren pair */
	5238	sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
	5239	break;
	5240
	5241	case NGROUPP: /* (?(<name>)) */
	5242	/* reg_check_named_buff_matched returns 0 for no match */
	5243	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	5244	break;
	5245
	5246	case INSUBP: /* (?(R)) */
	5247	n = ARG(scan);
	5248	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	5249	break;
	5250
	5251	case DEFINEP: /* (?(DEFINE)) */
	5252	sw = 0;
	5253	break;
	5254
	5255	case IFTHEN: /* (?(cond)A\|B) */
	5256	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	5257	if (sw)
	5258	next = NEXTOPER(NEXTOPER(scan));
	5259	else {
	5260	next = scan + ARG(scan);
	5261	if (OP(next) == IFTHEN) /* Fake one. */
	5262	next = NEXTOPER(NEXTOPER(next));
	5263	}
	5264	break;
	5265
	5266	case LOGICAL: /* modifier for EVAL and IFMATCH */
	5267	logical = scan->flags;
	5268	break;
	5269
	5270	/*******************************************************************
	5271
	5272	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	5273	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	5274	STAR/PLUS/CURLY/CURLYN are used instead.)
	5275
	5276	A*B is compiled as <CURLYX><A><WHILEM><B>
	5277
	5278	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	5279	state, which contains the current count, initialised to -1. It also sets
	5280	cur_curlyx to point to this state, with any previous value saved in the
	5281	state block.
	5282
	5283	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	5284	since the pattern may possibly match zero times (i.e. it's a while {} loop
	5285	rather than a do {} while loop).
	5286
	5287	Each entry to WHILEM represents a successful match of A. The count in the
	5288	CURLYX block is incremented, another WHILEM state is pushed, and execution
	5289	passes to A or B depending on greediness and the current count.
	5290
	5291	For example, if matching against the string a1a2a3b (where the aN are
	5292	substrings that match /A/), then the match progresses as follows: (the
	5293	pushed states are interspersed with the bits of strings matched so far):
	5294
	5295	<CURLYX cnt=-1>
	5296	<CURLYX cnt=0><WHILEM>
	5297	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	5298	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	5299	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	5300	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	5301
	5302	(Contrast this with something like CURLYM, which maintains only a single
	5303	backtrack state:
	5304
	5305	<CURLYM cnt=0> a1
	5306	a1 <CURLYM cnt=1> a2
	5307	a1 a2 <CURLYM cnt=2> a3
	5308	a1 a2 a3 <CURLYM cnt=3> b
	5309	)
	5310
	5311	Each WHILEM state block marks a point to backtrack to upon partial failure
	5312	of A or B, and also contains some minor state data related to that
	5313	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	5314	overall state, such as the count, and pointers to the A and B ops.
	5315
	5316	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	5317	must always point to the current CURLYX block, the rules are:
	5318
	5319	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	5320	and set cur_curlyx to point the new block.
	5321
	5322	When popping the CURLYX block after a successful or unsuccessful match,
	5323	restore the previous cur_curlyx.
	5324
	5325	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	5326	to the outer one saved in the CURLYX block.
	5327
	5328	When popping the WHILEM block after a successful or unsuccessful B match,
	5329	restore the previous cur_curlyx.
	5330
	5331	Here's an example for the pattern (AI* BI)*BO
	5332	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	5333
	5334	cur_
	5335	curlyx backtrack stack
	5336	------ ---------------
	5337	NULL
	5338	CO <CO prev=NULL> <WO>
	5339	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5340	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5341	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	5342
	5343	At this point the pattern succeeds, and we work back down the stack to
	5344	clean up, restoring as we go:
	5345
	5346	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5347	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5348	CO <CO prev=NULL> <WO>
	5349	NULL
	5350
	5351	*******************************************************************/
	5352
	5353	#define ST st->u.curlyx
	5354
	5355	case CURLYX: /* start of /AB/ (for complex A) /
	5356	{
	5357	/* No need to save/restore up to this paren */
	5358	I32 parenfloor = scan->flags;
	5359
	5360	assert(next); /* keep Coverity happy */
	5361	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	5362	next += ARG(next);
	5363
	5364	/* XXXX Probably it is better to teach regpush to support
	5365	parenfloor > maxopenparen ... */
	5366	if (parenfloor > (I32)rex->lastparen)
	5367	parenfloor = rex->lastparen; /* Pessimization... */
	5368
	5369	ST.prev_curlyx= cur_curlyx;
	5370	cur_curlyx = st;
	5371	ST.cp = PL_savestack_ix;
	5372
	5373	/* these fields contain the state of the current curly.
	5374	* they are accessed by subsequent WHILEMs */
	5375	ST.parenfloor = parenfloor;
	5376	ST.me = scan;
	5377	ST.B = next;
	5378	ST.minmod = minmod;
	5379	minmod = 0;
	5380	ST.count = -1; /* this will be updated by WHILEM */
	5381	ST.lastloc = NULL; /* this will be updated by WHILEM */
	5382
	5383	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
	5384	assert(0); /* NOTREACHED */
	5385	}
	5386
	5387	case CURLYX_end: /* just finished matching all of AB /
	5388	cur_curlyx = ST.prev_curlyx;
	5389	sayYES;
	5390	assert(0); /* NOTREACHED */
	5391
	5392	case CURLYX_end_fail: /* just failed to match all of AB /
	5393	regcpblow(ST.cp);
	5394	cur_curlyx = ST.prev_curlyx;
	5395	sayNO;
	5396	assert(0); /* NOTREACHED */
	5397
	5398
	5399	#undef ST
	5400	#define ST st->u.whilem
	5401
	5402	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	5403	{
	5404	/* see the discussion above about CURLYX/WHILEM */
	5405	I32 n;
	5406	int min = ARG1(cur_curlyx->u.curlyx.me);
	5407	int max = ARG2(cur_curlyx->u.curlyx.me);
	5408	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	5409
	5410	assert(cur_curlyx); /* keep Coverity happy */
	5411	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	5412	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	5413	ST.cache_offset = 0;
	5414	ST.cache_mask = 0;
	5415
	5416
	5417	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5418	"%*s whilem: matched %ld out of %d..%d\n",
	5419	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	5420	);
	5421
	5422	/* First just match a string of min A's. */
	5423
	5424	if (n < min) {
	5425	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5426	maxopenparen);
	5427	cur_curlyx->u.curlyx.lastloc = locinput;
	5428	REGCP_SET(ST.lastcp);
	5429
	5430	PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
	5431	assert(0); /* NOTREACHED */
	5432	}
	5433
	5434	/* If degenerate A matches "", assume A done. */
	5435
	5436	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	5437	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5438	"%*s whilem: empty match detected, trying continuation...\n",
	5439	REPORT_CODE_OFF+depth*2, "")
	5440	);
	5441	goto do_whilem_B_max;
	5442	}
	5443
	5444	/* super-linear cache processing */
	5445
	5446	if (scan->flags) {
	5447
	5448	if (!PL_reg_maxiter) {
	5449	/* start the countdown: Postpone detection until we
	5450	* know the match is not that much linear. */
	5451	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	5452	/* possible overflow for long strings and many CURLYX's */
	5453	if (PL_reg_maxiter < 0)
	5454	PL_reg_maxiter = I32_MAX;
	5455	PL_reg_leftiter = PL_reg_maxiter;
	5456	}
	5457
	5458	if (PL_reg_leftiter-- == 0) {
	5459	/* initialise cache */
	5460	const I32 size = (PL_reg_maxiter + 7)/8;
	5461	if (PL_reg_poscache) {
	5462	if ((I32)PL_reg_poscache_size < size) {
	5463	Renew(PL_reg_poscache, size, char);
	5464	PL_reg_poscache_size = size;
	5465	}
	5466	Zero(PL_reg_poscache, size, char);
	5467	}
	5468	else {
	5469	PL_reg_poscache_size = size;
	5470	Newxz(PL_reg_poscache, size, char);
	5471	}
	5472	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5473	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	5474	PL_colors[4], PL_colors[5])
	5475	);
	5476	}
	5477
	5478	if (PL_reg_leftiter < 0) {
	5479	/* have we already failed at this position? */
	5480	I32 offset, mask;
	5481	offset = (scan->flags & 0xf) - 1
	5482	+ (locinput - PL_bostr) * (scan->flags>>4);
	5483	mask = 1 << (offset % 8);
	5484	offset /= 8;
	5485	if (PL_reg_poscache[offset] & mask) {
	5486	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5487	"%*s whilem: (cache) already tried at this position...\n",
	5488	REPORT_CODE_OFF+depth*2, "")
	5489	);
	5490	sayNO; /* cache records failure */
	5491	}
	5492	ST.cache_offset = offset;
	5493	ST.cache_mask = mask;
	5494	}
	5495	}
	5496
	5497	/* Prefer B over A for minimal matching. */
	5498
	5499	if (cur_curlyx->u.curlyx.minmod) {
	5500	ST.save_curlyx = cur_curlyx;
	5501	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5502	ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor,
	5503	maxopenparen);
	5504	REGCP_SET(ST.lastcp);
	5505	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
	5506	locinput);
	5507	assert(0); /* NOTREACHED */
	5508	}
	5509
	5510	/* Prefer A over B for maximal matching. */
	5511
	5512	if (n < max) { /* More greed allowed? */
	5513	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5514	maxopenparen);
	5515	cur_curlyx->u.curlyx.lastloc = locinput;
	5516	REGCP_SET(ST.lastcp);
	5517	PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
	5518	assert(0); /* NOTREACHED */
	5519	}
	5520	goto do_whilem_B_max;
	5521	}
	5522	assert(0); /* NOTREACHED */
	5523
	5524	case WHILEM_B_min: /* just matched B in a minimal match */
	5525	case WHILEM_B_max: /* just matched B in a maximal match */
	5526	cur_curlyx = ST.save_curlyx;
	5527	sayYES;
	5528	assert(0); /* NOTREACHED */
	5529
	5530	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	5531	cur_curlyx = ST.save_curlyx;
	5532	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5533	cur_curlyx->u.curlyx.count--;
	5534	CACHEsayNO;
	5535	assert(0); /* NOTREACHED */
	5536
	5537	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	5538	/* FALL THROUGH */
	5539	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	5540	REGCP_UNWIND(ST.lastcp);
	5541	regcppop(rex, &maxopenparen);
	5542	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5543	cur_curlyx->u.curlyx.count--;
	5544	CACHEsayNO;
	5545	assert(0); /* NOTREACHED */
	5546
	5547	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	5548	REGCP_UNWIND(ST.lastcp);
	5549	regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
	5550	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5551	"%*s whilem: failed, trying continuation...\n",
	5552	REPORT_CODE_OFF+depth*2, "")
	5553	);
	5554	do_whilem_B_max:
	5555	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5556	&& ckWARN(WARN_REGEXP)
	5557	&& !reginfo->warned)
	5558	{
	5559	reginfo->warned = TRUE;
	5560	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5561	"Complex regular subexpression recursion limit (%d) "
	5562	"exceeded",
	5563	REG_INFTY - 1);
	5564	}
	5565
	5566	/* now try B */
	5567	ST.save_curlyx = cur_curlyx;
	5568	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5569	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
	5570	locinput);
	5571	assert(0); /* NOTREACHED */
	5572
	5573	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	5574	cur_curlyx = ST.save_curlyx;
	5575	REGCP_UNWIND(ST.lastcp);
	5576	regcppop(rex, &maxopenparen);
	5577
	5578	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	5579	/* Maximum greed exceeded */
	5580	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5581	&& ckWARN(WARN_REGEXP)
	5582	&& !reginfo->warned)
	5583	{
	5584	reginfo->warned = TRUE;
	5585	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5586	"Complex regular subexpression recursion "
	5587	"limit (%d) exceeded",
	5588	REG_INFTY - 1);
	5589	}
	5590	cur_curlyx->u.curlyx.count--;
	5591	CACHEsayNO;
	5592	}
	5593
	5594	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5595	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	5596	);
	5597	/* Try grabbing another A and see if it helps. */
	5598	cur_curlyx->u.curlyx.lastloc = locinput;
	5599	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5600	maxopenparen);
	5601	REGCP_SET(ST.lastcp);
	5602	PUSH_STATE_GOTO(WHILEM_A_min,
	5603	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
	5604	locinput);
	5605	assert(0); /* NOTREACHED */
	5606
	5607	#undef ST
	5608	#define ST st->u.branch
	5609
	5610	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	5611	next = scan + ARG(scan);
	5612	if (next == scan)
	5613	next = NULL;
	5614	scan = NEXTOPER(scan);
	5615	/* FALL THROUGH */
	5616
	5617	case BRANCH: /* /(...\|A\|...)/ */
	5618	scan = NEXTOPER(scan); /* scan now points to inner node */
	5619	ST.lastparen = rex->lastparen;
	5620	ST.lastcloseparen = rex->lastcloseparen;
	5621	ST.next_branch = next;
	5622	REGCP_SET(ST.cp);
	5623
	5624	/* Now go into the branch */
	5625	if (has_cutgroup) {
	5626	PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
	5627	} else {
	5628	PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
	5629	}
	5630	assert(0); /* NOTREACHED */
	5631
	5632	case CUTGROUP: /* /(THEN)/ /
	5633	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	5634	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5635	PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
	5636	assert(0); /* NOTREACHED */
	5637
	5638	case CUTGROUP_next_fail:
	5639	do_cutgroup = 1;
	5640	no_final = 1;
	5641	if (st->u.mark.mark_name)
	5642	sv_commit = st->u.mark.mark_name;
	5643	sayNO;
	5644	assert(0); /* NOTREACHED */
	5645
	5646	case BRANCH_next:
	5647	sayYES;
	5648	assert(0); /* NOTREACHED */
	5649
	5650	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	5651	if (do_cutgroup) {
	5652	do_cutgroup = 0;
	5653	no_final = 0;
	5654	}
	5655	REGCP_UNWIND(ST.cp);
	5656	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5657	scan = ST.next_branch;
	5658	/* no more branches? */
	5659	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	5660	DEBUG_EXECUTE_r({
	5661	PerlIO_printf( Perl_debug_log,
	5662	"%*s %sBRANCH failed...%s\n",
	5663	REPORT_CODE_OFF+depth*2, "",
	5664	PL_colors[4],
	5665	PL_colors[5] );
	5666	});
	5667	sayNO_SILENT;
	5668	}
	5669	continue; /* execute next BRANCH[J] op */
	5670	assert(0); /* NOTREACHED */
	5671
	5672	case MINMOD: /* next op will be non-greedy, e.g. A? /
	5673	minmod = 1;
	5674	break;
	5675
	5676	#undef ST
	5677	#define ST st->u.curlym
	5678
	5679	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	5680
	5681	/* This is an optimisation of CURLYX that enables us to push
	5682	* only a single backtracking state, no matter how many matches
	5683	* there are in {m,n}. It relies on the pattern being constant
	5684	* length, with no parens to influence future backrefs
	5685	*/
	5686
	5687	ST.me = scan;
	5688	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5689
	5690	ST.lastparen = rex->lastparen;
	5691	ST.lastcloseparen = rex->lastcloseparen;
	5692
	5693	/* if paren positive, emulate an OPEN/CLOSE around A */
	5694	if (ST.me->flags) {
	5695	U32 paren = ST.me->flags;
	5696	if (paren > maxopenparen)
	5697	maxopenparen = paren;
	5698	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	5699	}
	5700	ST.A = scan;
	5701	ST.B = next;
	5702	ST.alen = 0;
	5703	ST.count = 0;
	5704	ST.minmod = minmod;
	5705	minmod = 0;
	5706	ST.c1 = CHRTEST_UNINIT;
	5707	REGCP_SET(ST.cp);
	5708
	5709	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	5710	goto curlym_do_B;
	5711
	5712	curlym_do_A: /* execute the A in /A{m,n}B/ */
	5713	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
	5714	assert(0); /* NOTREACHED */
	5715
	5716	case CURLYM_A: /* we've just matched an A */
	5717	ST.count++;
	5718	/* after first match, determine A's length: u.curlym.alen */
	5719	if (ST.count == 1) {
	5720	if (PL_reg_match_utf8) {
	5721	char *s = st->locinput;
	5722	while (s < locinput) {
	5723	ST.alen++;
	5724	s += UTF8SKIP(s);
	5725	}
	5726	}
	5727	else {
	5728	ST.alen = locinput - st->locinput;
	5729	}
	5730	if (ST.alen == 0)
	5731	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	5732	}
	5733	DEBUG_EXECUTE_r(
	5734	PerlIO_printf(Perl_debug_log,
	5735	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	5736	(int)(REPORT_CODE_OFF+(depth*2)), "",
	5737	(IV) ST.count, (IV)ST.alen)
	5738	);
	5739
	5740	if (cur_eval && cur_eval->u.eval.close_paren &&
	5741	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5742	goto fake_end;
	5743
	5744	{
	5745	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	5746	if ( max == REG_INFTY \|\| ST.count < max )
	5747	goto curlym_do_A; /* try to match another A */
	5748	}
	5749	goto curlym_do_B; /* try to match B */
	5750
	5751	case CURLYM_A_fail: /* just failed to match an A */
	5752	REGCP_UNWIND(ST.cp);
	5753
	5754	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	5755	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5756	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5757	sayNO;
	5758
	5759	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5760	if (ST.c1 == CHRTEST_UNINIT) {
	5761	/* calculate c1 and c2 for possible match of 1st char
	5762	* following curly */
	5763	ST.c1 = ST.c2 = CHRTEST_VOID;
	5764	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5765	regnode *text_node = ST.B;
	5766	if (! HAS_TEXT(text_node))
	5767	FIND_NEXT_IMPT(text_node);
	5768	/* this used to be
	5769
	5770	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5771
	5772	But the former is redundant in light of the latter.
	5773
	5774	if this changes back then the macro for
	5775	IS_TEXT and friends need to change.
	5776	*/
	5777	if (PL_regkind[OP(text_node)] == EXACT) {
	5778	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	5779	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
	5780	is_utf8_pat))
	5781	{
	5782	sayNO;
	5783	}
	5784	}
	5785	}
	5786	}
	5787
	5788	DEBUG_EXECUTE_r(
	5789	PerlIO_printf(Perl_debug_log,
	5790	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5791	(int)(REPORT_CODE_OFF+(depth*2)),
	5792	"", (IV)ST.count)
	5793	);
	5794	if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
	5795	if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
	5796	if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	5797	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	5798	{
	5799	/* simulate B failing */
	5800	DEBUG_OPTIMISE_r(
	5801	PerlIO_printf(Perl_debug_log,
	5802	"%*s CURLYM Fast bail next target=U+%"UVXf" c1=U+%"UVXf" c2=U+%"UVXf"\n",
	5803	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5804	valid_utf8_to_uvchr((U8 *) locinput, NULL),
	5805	valid_utf8_to_uvchr(ST.c1_utf8, NULL),
	5806	valid_utf8_to_uvchr(ST.c2_utf8, NULL))
	5807	);
	5808	state_num = CURLYM_B_fail;
	5809	goto reenter_switch;
	5810	}
	5811	}
	5812	else if (nextchr != ST.c1 && nextchr != ST.c2) {
	5813	/* simulate B failing */
	5814	DEBUG_OPTIMISE_r(
	5815	PerlIO_printf(Perl_debug_log,
	5816	"%*s CURLYM Fast bail next target=U+%X c1=U+%X c2=U+%X\n",
	5817	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5818	(int) nextchr, ST.c1, ST.c2)
	5819	);
	5820	state_num = CURLYM_B_fail;
	5821	goto reenter_switch;
	5822	}
	5823	}
	5824
	5825	if (ST.me->flags) {
	5826	/* emulate CLOSE: mark current A as captured */
	5827	I32 paren = ST.me->flags;
	5828	if (ST.count) {
	5829	rex->offs[paren].start
	5830	= HOPc(locinput, -ST.alen) - PL_bostr;
	5831	rex->offs[paren].end = locinput - PL_bostr;
	5832	if ((U32)paren > rex->lastparen)
	5833	rex->lastparen = paren;
	5834	rex->lastcloseparen = paren;
	5835	}
	5836	else
	5837	rex->offs[paren].end = -1;
	5838	if (cur_eval && cur_eval->u.eval.close_paren &&
	5839	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5840	{
	5841	if (ST.count)
	5842	goto fake_end;
	5843	else
	5844	sayNO;
	5845	}
	5846	}
	5847
	5848	PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
	5849	assert(0); /* NOTREACHED */
	5850
	5851	case CURLYM_B_fail: /* just failed to match a B */
	5852	REGCP_UNWIND(ST.cp);
	5853	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5854	if (ST.minmod) {
	5855	I32 max = ARG2(ST.me);
	5856	if (max != REG_INFTY && ST.count == max)
	5857	sayNO;
	5858	goto curlym_do_A; /* try to match a further A */
	5859	}
	5860	/* backtrack one A */
	5861	if (ST.count == ARG1(ST.me) /* min */)
	5862	sayNO;
	5863	ST.count--;
	5864	SET_locinput(HOPc(locinput, -ST.alen));
	5865	goto curlym_do_B; /* try to match B */
	5866
	5867	#undef ST
	5868	#define ST st->u.curly
	5869
	5870	#define CURLY_SETPAREN(paren, success) \
	5871	if (paren) { \
	5872	if (success) { \
	5873	rex->offs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	5874	rex->offs[paren].end = locinput - PL_bostr; \
	5875	if (paren > rex->lastparen) \
	5876	rex->lastparen = paren; \
	5877	rex->lastcloseparen = paren; \
	5878	} \
	5879	else { \
	5880	rex->offs[paren].end = -1; \
	5881	rex->lastparen = ST.lastparen; \
	5882	rex->lastcloseparen = ST.lastcloseparen; \
	5883	} \
	5884	}
	5885
	5886	case STAR: /* /AB/ where A is width 1 char /
	5887	ST.paren = 0;
	5888	ST.min = 0;
	5889	ST.max = REG_INFTY;
	5890	scan = NEXTOPER(scan);
	5891	goto repeat;
	5892
	5893	case PLUS: /* /A+B/ where A is width 1 char */
	5894	ST.paren = 0;
	5895	ST.min = 1;
	5896	ST.max = REG_INFTY;
	5897	scan = NEXTOPER(scan);
	5898	goto repeat;
	5899
	5900	case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
	5901	ST.paren = scan->flags; /* Which paren to set */
	5902	ST.lastparen = rex->lastparen;
	5903	ST.lastcloseparen = rex->lastcloseparen;
	5904	if (ST.paren > maxopenparen)
	5905	maxopenparen = ST.paren;
	5906	ST.min = ARG1(scan); /* min to match */
	5907	ST.max = ARG2(scan); /* max to match */
	5908	if (cur_eval && cur_eval->u.eval.close_paren &&
	5909	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5910	ST.min=1;
	5911	ST.max=1;
	5912	}
	5913	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	5914	goto repeat;
	5915
	5916	case CURLY: /* /A{m,n}B/ where A is width 1 char */
	5917	ST.paren = 0;
	5918	ST.min = ARG1(scan); /* min to match */
	5919	ST.max = ARG2(scan); /* max to match */
	5920	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5921	repeat:
	5922	/*
	5923	* Lookahead to avoid useless match attempts
	5924	* when we know what character comes next.
	5925	*
	5926	* Used to only do .x and .?x, but now it allows
	5927	* for )'s, ('s and (?{ ... })'s to be in the way
	5928	* of the quantifier and the EXACT-like node. -- japhy
	5929	*/
	5930
	5931	assert(ST.min <= ST.max);
	5932	if (! HAS_TEXT(next) && ! JUMPABLE(next)) {
	5933	ST.c1 = ST.c2 = CHRTEST_VOID;
	5934	}
	5935	else {
	5936	regnode *text_node = next;
	5937
	5938	if (! HAS_TEXT(text_node))
	5939	FIND_NEXT_IMPT(text_node);
	5940
	5941	if (! HAS_TEXT(text_node))
	5942	ST.c1 = ST.c2 = CHRTEST_VOID;
	5943	else {
	5944	if ( PL_regkind[OP(text_node)] != EXACT ) {
	5945	ST.c1 = ST.c2 = CHRTEST_VOID;
	5946	}
	5947	else {
	5948
	5949	/* Currently we only get here when
	5950
	5951	PL_rekind[OP(text_node)] == EXACT
	5952
	5953	if this changes back then the macro for IS_TEXT and
	5954	friends need to change. */
	5955	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	5956	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
	5957	is_utf8_pat))
	5958	{
	5959	sayNO;
	5960	}
	5961	}
	5962	}
	5963	}
	5964
	5965	ST.A = scan;
	5966	ST.B = next;
	5967	if (minmod) {
	5968	char *li = locinput;
	5969	minmod = 0;
	5970	if (ST.min &&
	5971	regrepeat(rex, &li, ST.A, ST.min, depth, is_utf8_pat)
	5972	< ST.min)
	5973	sayNO;
	5974	SET_locinput(li);
	5975	ST.count = ST.min;
	5976	REGCP_SET(ST.cp);
	5977	if (ST.c1 == CHRTEST_VOID)
	5978	goto curly_try_B_min;
	5979
	5980	ST.oldloc = locinput;
	5981
	5982	/* set ST.maxpos to the furthest point along the
	5983	* string that could possibly match */
	5984	if (ST.max == REG_INFTY) {
	5985	ST.maxpos = PL_regeol - 1;
	5986	if (utf8_target)
	5987	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	5988	ST.maxpos--;
	5989	}
	5990	else if (utf8_target) {
	5991	int m = ST.max - ST.min;
	5992	for (ST.maxpos = locinput;
	5993	m >0 && ST.maxpos < PL_regeol; m--)
	5994	ST.maxpos += UTF8SKIP(ST.maxpos);
	5995	}
	5996	else {
	5997	ST.maxpos = locinput + ST.max - ST.min;
	5998	if (ST.maxpos >= PL_regeol)
	5999	ST.maxpos = PL_regeol - 1;
	6000	}
	6001	goto curly_try_B_min_known;
	6002
	6003	}
	6004	else {
	6005	/* avoid taking address of locinput, so it can remain
	6006	* a register var */
	6007	char *li = locinput;
	6008	ST.count = regrepeat(rex, &li, ST.A, ST.max, depth,
	6009	is_utf8_pat);
	6010	if (ST.count < ST.min)
	6011	sayNO;
	6012	SET_locinput(li);
	6013	if ((ST.count > ST.min)
	6014	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	6015	{
	6016	/* A{m,n} must come at the end of the string, there's
	6017	* no point in backing off ... */
	6018	ST.min = ST.count;
	6019	/* ...except that $ and \Z can match before and after
	6020	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	6021	We may back off by one in this case. */
	6022	if (UCHARAT(locinput - 1) == '\n' && OP(ST.B) != EOS)
	6023	ST.min--;
	6024	}
	6025	REGCP_SET(ST.cp);
	6026	goto curly_try_B_max;
	6027	}
	6028	assert(0); /* NOTREACHED */
	6029
	6030
	6031	case CURLY_B_min_known_fail:
	6032	/* failed to find B in a non-greedy match where c1,c2 valid */
	6033
	6034	REGCP_UNWIND(ST.cp);
	6035	if (ST.paren) {
	6036	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6037	}
	6038	/* Couldn't or didn't -- move forward. */
	6039	ST.oldloc = locinput;
	6040	if (utf8_target)
	6041	locinput += UTF8SKIP(locinput);
	6042	else
	6043	locinput++;
	6044	ST.count++;
	6045	curly_try_B_min_known:
	6046	/* find the next place where 'B' could work, then call B */
	6047	{
	6048	int n;
	6049	if (utf8_target) {
	6050	n = (ST.oldloc == locinput) ? 0 : 1;
	6051	if (ST.c1 == ST.c2) {
	6052	/* set n to utf8_distance(oldloc, locinput) */
	6053	while (locinput <= ST.maxpos
	6054	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
	6055	{
	6056	locinput += UTF8SKIP(locinput);
	6057	n++;
	6058	}
	6059	}
	6060	else {
	6061	/* set n to utf8_distance(oldloc, locinput) */
	6062	while (locinput <= ST.maxpos
	6063	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	6064	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	6065	{
	6066	locinput += UTF8SKIP(locinput);
	6067	n++;
	6068	}
	6069	}
	6070	}
	6071	else { /* Not utf8_target */
	6072	if (ST.c1 == ST.c2) {
	6073	while (locinput <= ST.maxpos &&
	6074	UCHARAT(locinput) != ST.c1)
	6075	locinput++;
	6076	}
	6077	else {
	6078	while (locinput <= ST.maxpos
	6079	&& UCHARAT(locinput) != ST.c1
	6080	&& UCHARAT(locinput) != ST.c2)
	6081	locinput++;
	6082	}
	6083	n = locinput - ST.oldloc;
	6084	}
	6085	if (locinput > ST.maxpos)
	6086	sayNO;
	6087	if (n) {
	6088	/* In /a{m,n}b/, ST.oldloc is at "a" x m, locinput is
	6089	* at b; check that everything between oldloc and
	6090	* locinput matches */
	6091	char *li = ST.oldloc;
	6092	ST.count += n;
	6093	if (regrepeat(rex, &li, ST.A, n, depth, is_utf8_pat) < n)
	6094	sayNO;
	6095	assert(n == REG_INFTY \|\| locinput == li);
	6096	}
	6097	CURLY_SETPAREN(ST.paren, ST.count);
	6098	if (cur_eval && cur_eval->u.eval.close_paren &&
	6099	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6100	goto fake_end;
	6101	}
	6102	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
	6103	}
	6104	assert(0); /* NOTREACHED */
	6105
	6106
	6107	case CURLY_B_min_fail:
	6108	/* failed to find B in a non-greedy match where c1,c2 invalid */
	6109
	6110	REGCP_UNWIND(ST.cp);
	6111	if (ST.paren) {
	6112	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6113	}
	6114	/* failed -- move forward one */
	6115	{
	6116	char *li = locinput;
	6117	if (!regrepeat(rex, &li, ST.A, 1, depth, is_utf8_pat)) {
	6118	sayNO;
	6119	}
	6120	locinput = li;
	6121	}
	6122	{
	6123	ST.count++;
	6124	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	6125	ST.count > 0)) /* count overflow ? */
	6126	{
	6127	curly_try_B_min:
	6128	CURLY_SETPAREN(ST.paren, ST.count);
	6129	if (cur_eval && cur_eval->u.eval.close_paren &&
	6130	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6131	goto fake_end;
	6132	}
	6133	PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
	6134	}
	6135	}
	6136	sayNO;
	6137	assert(0); /* NOTREACHED */
	6138
	6139
	6140	curly_try_B_max:
	6141	/* a successful greedy match: now try to match B */
	6142	if (cur_eval && cur_eval->u.eval.close_paren &&
	6143	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6144	goto fake_end;
	6145	}
	6146	{
	6147	bool could_match = locinput < PL_regeol;
	6148
	6149	/* If it could work, try it. */
	6150	if (ST.c1 != CHRTEST_VOID && could_match) {
	6151	if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
	6152	{
	6153	could_match = memEQ(locinput,
	6154	ST.c1_utf8,
	6155	UTF8SKIP(locinput))
	6156	\|\| memEQ(locinput,
	6157	ST.c2_utf8,
	6158	UTF8SKIP(locinput));
	6159	}
	6160	else {
	6161	could_match = UCHARAT(locinput) == ST.c1
	6162	\|\| UCHARAT(locinput) == ST.c2;
	6163	}
	6164	}
	6165	if (ST.c1 == CHRTEST_VOID \|\| could_match) {
	6166	CURLY_SETPAREN(ST.paren, ST.count);
	6167	PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
	6168	assert(0); /* NOTREACHED */
	6169	}
	6170	}
	6171	/* FALL THROUGH */
	6172
	6173	case CURLY_B_max_fail:
	6174	/* failed to find B in a greedy match */
	6175
	6176	REGCP_UNWIND(ST.cp);
	6177	if (ST.paren) {
	6178	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6179	}
	6180	/* back up. */
	6181	if (--ST.count < ST.min)
	6182	sayNO;
	6183	locinput = HOPc(locinput, -1);
	6184	goto curly_try_B_max;
	6185
	6186	#undef ST
	6187
	6188	case END: /* last op of main pattern */
	6189	fake_end:
	6190	if (cur_eval) {
	6191	/* we've just finished A in /(??{A})B/; now continue with B */
	6192	st->u.eval.saved_utf8_pat = is_utf8_pat;
	6193	is_utf8_pat = cur_eval->u.eval.saved_utf8_pat;
	6194
	6195	st->u.eval.prev_rex = rex_sv; /* inner */
	6196
	6197	/* Save all the positions. */
	6198	st->u.eval.cp = regcppush(rex, 0, maxopenparen);
	6199	rex_sv = cur_eval->u.eval.prev_rex;
	6200	SET_reg_curpm(rex_sv);
	6201	rex = ReANY(rex_sv);
	6202	rexi = RXi_GET(rex);
	6203	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	6204
	6205	REGCP_SET(st->u.eval.lastcp);
	6206
	6207	/* Restore parens of the outer rex without popping the
	6208	* savestack */
	6209	S_regcp_restore(aTHX_ rex, cur_eval->u.eval.lastcp,
	6210	&maxopenparen);
	6211
	6212	st->u.eval.prev_eval = cur_eval;
	6213	cur_eval = cur_eval->u.eval.prev_eval;
	6214	DEBUG_EXECUTE_r(
	6215	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	6216	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	6217	if ( nochange_depth )
	6218	nochange_depth--;
	6219
	6220	PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
	6221	locinput); /* match B */
	6222	}
	6223
	6224	if (locinput < reginfo->till) {
	6225	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	6226	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	6227	PL_colors[4],
	6228	(long)(locinput - PL_reg_starttry),
	6229	(long)(reginfo->till - PL_reg_starttry),
	6230	PL_colors[5]));
	6231
	6232	sayNO_SILENT; /* Cannot match: too short. */
	6233	}
	6234	sayYES; /* Success! */
	6235
	6236	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	6237	DEBUG_EXECUTE_r(
	6238	PerlIO_printf(Perl_debug_log,
	6239	"%*s %ssubpattern success...%s\n",
	6240	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	6241	sayYES; /* Success! */
	6242
	6243	#undef ST
	6244	#define ST st->u.ifmatch
	6245
	6246	{
	6247	char *newstart;
	6248
	6249	case SUSPEND: /* (?>A) */
	6250	ST.wanted = 1;
	6251	newstart = locinput;
	6252	goto do_ifmatch;
	6253
	6254	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	6255	ST.wanted = 0;
	6256	goto ifmatch_trivial_fail_test;
	6257
	6258	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	6259	ST.wanted = 1;
	6260	ifmatch_trivial_fail_test:
	6261	if (scan->flags) {
	6262	char * const s = HOPBACKc(locinput, scan->flags);
	6263	if (!s) {
	6264	/* trivial fail */
	6265	if (logical) {
	6266	logical = 0;
	6267	sw = 1 - cBOOL(ST.wanted);
	6268	}
	6269	else if (ST.wanted)
	6270	sayNO;
	6271	next = scan + ARG(scan);
	6272	if (next == scan)
	6273	next = NULL;
	6274	break;
	6275	}
	6276	newstart = s;
	6277	}
	6278	else
	6279	newstart = locinput;
	6280
	6281	do_ifmatch:
	6282	ST.me = scan;
	6283	ST.logical = logical;
	6284	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	6285
	6286	/* execute body of (?...A) */
	6287	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
	6288	assert(0); /* NOTREACHED */
	6289	}
	6290
	6291	case IFMATCH_A_fail: /* body of (?...A) failed */
	6292	ST.wanted = !ST.wanted;
	6293	/* FALL THROUGH */
	6294
	6295	case IFMATCH_A: /* body of (?...A) succeeded */
	6296	if (ST.logical) {
	6297	sw = cBOOL(ST.wanted);
	6298	}
	6299	else if (!ST.wanted)
	6300	sayNO;
	6301
	6302	if (OP(ST.me) != SUSPEND) {
	6303	/* restore old position except for (?>...) */
	6304	locinput = st->locinput;
	6305	}
	6306	scan = ST.me + ARG(ST.me);
	6307	if (scan == ST.me)
	6308	scan = NULL;
	6309	continue; /* execute B */
	6310
	6311	#undef ST
	6312
	6313	case LONGJMP: /* alternative with many branches compiles to
	6314	* (BRANCHJ; EXACT ...; LONGJMP ) x N */
	6315	next = scan + ARG(scan);
	6316	if (next == scan)
	6317	next = NULL;
	6318	break;
	6319
	6320	case COMMIT: /* (COMMIT) /
	6321	reginfo->cutpoint = PL_regeol;
	6322	/* FALLTHROUGH */
	6323
	6324	case PRUNE: /* (PRUNE) /
	6325	if (!scan->flags)
	6326	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6327	PUSH_STATE_GOTO(COMMIT_next, next, locinput);
	6328	assert(0); /* NOTREACHED */
	6329
	6330	case COMMIT_next_fail:
	6331	no_final = 1;
	6332	/* FALLTHROUGH */
	6333
	6334	case OPFAIL: /* (FAIL) /
	6335	sayNO;
	6336	assert(0); /* NOTREACHED */
	6337
	6338	#define ST st->u.mark
	6339	case MARKPOINT: /* (MARK:foo) /
	6340	ST.prev_mark = mark_state;
	6341	ST.mark_name = sv_commit = sv_yes_mark
	6342	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6343	mark_state = st;
	6344	ST.mark_loc = locinput;
	6345	PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
	6346	assert(0); /* NOTREACHED */
	6347
	6348	case MARKPOINT_next:
	6349	mark_state = ST.prev_mark;
	6350	sayYES;
	6351	assert(0); /* NOTREACHED */
	6352
	6353	case MARKPOINT_next_fail:
	6354	if (popmark && sv_eq(ST.mark_name,popmark))
	6355	{
	6356	if (ST.mark_loc > startpoint)
	6357	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6358	popmark = NULL; /* we found our mark */
	6359	sv_commit = ST.mark_name;
	6360
	6361	DEBUG_EXECUTE_r({
	6362	PerlIO_printf(Perl_debug_log,
	6363	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	6364	REPORT_CODE_OFF+depth*2, "",
	6365	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	6366	});
	6367	}
	6368	mark_state = ST.prev_mark;
	6369	sv_yes_mark = mark_state ?
	6370	mark_state->u.mark.mark_name : NULL;
	6371	sayNO;
	6372	assert(0); /* NOTREACHED */
	6373
	6374	case SKIP: /* (SKIP) /
	6375	if (scan->flags) {
	6376	/* (SKIP) : if we fail we cut here/
	6377	ST.mark_name = NULL;
	6378	ST.mark_loc = locinput;
	6379	PUSH_STATE_GOTO(SKIP_next,next, locinput);
	6380	} else {
	6381	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	6382	otherwise do nothing. Meaning we need to scan
	6383	*/
	6384	regmatch_state *cur = mark_state;
	6385	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6386
	6387	while (cur) {
	6388	if ( sv_eq( cur->u.mark.mark_name,
	6389	find ) )
	6390	{
	6391	ST.mark_name = find;
	6392	PUSH_STATE_GOTO( SKIP_next, next, locinput);
	6393	}
	6394	cur = cur->u.mark.prev_mark;
	6395	}
	6396	}
	6397	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	6398	break;
	6399
	6400	case SKIP_next_fail:
	6401	if (ST.mark_name) {
	6402	/* (*CUT:NAME) - Set up to search for the name as we
	6403	collapse the stack*/
	6404	popmark = ST.mark_name;
	6405	} else {
	6406	/* (CUT) - No name, we cut here./
	6407	if (ST.mark_loc > startpoint)
	6408	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6409	/* but we set sv_commit to latest mark_name if there
	6410	is one so they can test to see how things lead to this
	6411	cut */
	6412	if (mark_state)
	6413	sv_commit=mark_state->u.mark.mark_name;
	6414	}
	6415	no_final = 1;
	6416	sayNO;
	6417	assert(0); /* NOTREACHED */
	6418	#undef ST
	6419
	6420	case LNBREAK: /* \R */
	6421	if ((n=is_LNBREAK_safe(locinput, PL_regeol, utf8_target))) {
	6422	locinput += n;
	6423	} else
	6424	sayNO;
	6425	break;
	6426
	6427	default:
	6428	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	6429	PTR2UV(scan), OP(scan));
	6430	Perl_croak(aTHX_ "regexp memory corruption");
	6431
	6432	/* this is a point to jump to in order to increment
	6433	* locinput by one character */
	6434	increment_locinput:
	6435	assert(!NEXTCHR_IS_EOS);
	6436	if (utf8_target) {
	6437	locinput += PL_utf8skip[nextchr];
	6438	/* locinput is allowed to go 1 char off the end, but not 2+ */
	6439	if (locinput > PL_regeol)
	6440	sayNO;
	6441	}
	6442	else
	6443	locinput++;
	6444	break;
	6445
	6446	} /* end switch */
	6447
	6448	/* switch break jumps here */
	6449	scan = next; /* prepare to execute the next op and ... */
	6450	continue; /* ... jump back to the top, reusing st */
	6451	assert(0); /* NOTREACHED */
	6452
	6453	push_yes_state:
	6454	/* push a state that backtracks on success */
	6455	st->u.yes.prev_yes_state = yes_state;
	6456	yes_state = st;
	6457	/* FALL THROUGH */
	6458	push_state:
	6459	/* push a new regex state, then continue at scan */
	6460	{
	6461	regmatch_state *newst;
	6462
	6463	DEBUG_STACK_r({
	6464	regmatch_state *cur = st;
	6465	regmatch_state *curyes = yes_state;
	6466	int curd = depth;
	6467	regmatch_slab *slab = PL_regmatch_slab;
	6468	for (;curd > -1;cur--,curd--) {
	6469	if (cur < SLAB_FIRST(slab)) {
	6470	slab = slab->prev;
	6471	cur = SLAB_LAST(slab);
	6472	}
	6473	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	6474	REPORT_CODE_OFF + 2 + depth * 2,"",
	6475	curd, PL_reg_name[cur->resume_state],
	6476	(curyes == cur) ? "yes" : ""
	6477	);
	6478	if (curyes == cur)
	6479	curyes = cur->u.yes.prev_yes_state;
	6480	}
	6481	} else
	6482	DEBUG_STATE_pp("push")
	6483	);
	6484	depth++;
	6485	st->locinput = locinput;
	6486	newst = st+1;
	6487	if (newst > SLAB_LAST(PL_regmatch_slab))
	6488	newst = S_push_slab(aTHX);
	6489	PL_regmatch_state = newst;
	6490
	6491	locinput = pushinput;
	6492	st = newst;
	6493	continue;
	6494	assert(0); /* NOTREACHED */
	6495	}
	6496	}
	6497
	6498	/*
	6499	* We get here only if there's trouble -- normally "case END" is
	6500	* the terminating point.
	6501	*/
	6502	Perl_croak(aTHX_ "corrupted regexp pointers");
	6503	/NOTREACHED/
	6504	sayNO;
	6505
	6506	yes:
	6507	if (yes_state) {
	6508	/* we have successfully completed a subexpression, but we must now
	6509	* pop to the state marked by yes_state and continue from there */
	6510	assert(st != yes_state);
	6511	#ifdef DEBUGGING
	6512	while (st != yes_state) {
	6513	st--;
	6514	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6515	PL_regmatch_slab = PL_regmatch_slab->prev;
	6516	st = SLAB_LAST(PL_regmatch_slab);
	6517	}
	6518	DEBUG_STATE_r({
	6519	if (no_final) {
	6520	DEBUG_STATE_pp("pop (no final)");
	6521	} else {
	6522	DEBUG_STATE_pp("pop (yes)");
	6523	}
	6524	});
	6525	depth--;
	6526	}
	6527	#else
	6528	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	6529	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	6530	{
	6531	/* not in this slab, pop slab */
	6532	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	6533	PL_regmatch_slab = PL_regmatch_slab->prev;
	6534	st = SLAB_LAST(PL_regmatch_slab);
	6535	}
	6536	depth -= (st - yes_state);
	6537	#endif
	6538	st = yes_state;
	6539	yes_state = st->u.yes.prev_yes_state;
	6540	PL_regmatch_state = st;
	6541
	6542	if (no_final)
	6543	locinput= st->locinput;
	6544	state_num = st->resume_state + no_final;
	6545	goto reenter_switch;
	6546	}
	6547
	6548	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	6549	PL_colors[4], PL_colors[5]));
	6550
	6551	if (PL_reg_state.re_state_eval_setup_done) {
	6552	/* each successfully executed (?{...}) block does the equivalent of
	6553	* local $^R = do {...}
	6554	* When popping the save stack, all these locals would be undone;
	6555	* bypass this by setting the outermost saved $^R to the latest
	6556	* value */
	6557	if (oreplsv != GvSV(PL_replgv))
	6558	sv_setsv(oreplsv, GvSV(PL_replgv));
	6559	}
	6560	result = 1;
	6561	goto final_exit;
	6562
	6563	no:
	6564	DEBUG_EXECUTE_r(
	6565	PerlIO_printf(Perl_debug_log,
	6566	"%*s %sfailed...%s\n",
	6567	REPORT_CODE_OFF+depth*2, "",
	6568	PL_colors[4], PL_colors[5])
	6569	);
	6570
	6571	no_silent:
	6572	if (no_final) {
	6573	if (yes_state) {
	6574	goto yes;
	6575	} else {
	6576	goto final_exit;
	6577	}
	6578	}
	6579	if (depth) {
	6580	/* there's a previous state to backtrack to */
	6581	st--;
	6582	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6583	PL_regmatch_slab = PL_regmatch_slab->prev;
	6584	st = SLAB_LAST(PL_regmatch_slab);
	6585	}
	6586	PL_regmatch_state = st;
	6587	locinput= st->locinput;
	6588
	6589	DEBUG_STATE_pp("pop");
	6590	depth--;
	6591	if (yes_state == st)
	6592	yes_state = st->u.yes.prev_yes_state;
	6593
	6594	state_num = st->resume_state + 1; /* failure = success + 1 */
	6595	goto reenter_switch;
	6596	}
	6597	result = 0;
	6598
	6599	final_exit:
	6600	if (rex->intflags & PREGf_VERBARG_SEEN) {
	6601	SV *sv_err = get_sv("REGERROR", 1);
	6602	SV *sv_mrk = get_sv("REGMARK", 1);
	6603	if (result) {
	6604	sv_commit = &PL_sv_no;
	6605	if (!sv_yes_mark)
	6606	sv_yes_mark = &PL_sv_yes;
	6607	} else {
	6608	if (!sv_commit)
	6609	sv_commit = &PL_sv_yes;
	6610	sv_yes_mark = &PL_sv_no;
	6611	}
	6612	sv_setsv(sv_err, sv_commit);
	6613	sv_setsv(sv_mrk, sv_yes_mark);
	6614	}
	6615
	6616
	6617	if (last_pushed_cv) {
	6618	dSP;
	6619	POP_MULTICALL;
	6620	PERL_UNUSED_VAR(SP);
	6621	}
	6622
	6623	/* clean up; in particular, free all slabs above current one */
	6624	LEAVE_SCOPE(oldsave);
	6625
	6626	assert(!result \|\| locinput - PL_bostr >= 0);
	6627	return result ? locinput - PL_bostr : -1;
	6628	}
	6629
	6630	/*
	6631	- regrepeat - repeatedly match something simple, report how many
	6632	*
	6633	* What 'simple' means is a node which can be the operand of a quantifier like
	6634	* '+', or {1,3}
	6635	*
	6636	* startposp - pointer a pointer to the start position. This is updated
	6637	* to point to the byte following the highest successful
	6638	* match.
	6639	* p - the regnode to be repeatedly matched against.
	6640	* max - maximum number of things to match.
	6641	* depth - (for debugging) backtracking depth.
	6642	*/
	6643	STATIC I32
	6644	S_regrepeat(pTHX_ regexp prog, char startposp, const regnode p,
	6645	I32 max, int depth, bool is_utf8_pat)
	6646	{
	6647	dVAR;
	6648	char scan; / Pointer to current position in target string */
	6649	I32 c;
	6650	char loceol = PL_regeol; / local version */
	6651	I32 hardcount = 0; /* How many matches so far */
	6652	bool utf8_target = PL_reg_match_utf8;
	6653	int to_complement = 0; /* Invert the result? */
	6654	UV utf8_flags;
	6655	_char_class_number classnum;
	6656	#ifndef DEBUGGING
	6657	PERL_UNUSED_ARG(depth);
	6658	#endif
	6659
	6660	PERL_ARGS_ASSERT_REGREPEAT;
	6661
	6662	scan = *startposp;
	6663	if (max == REG_INFTY)
	6664	max = I32_MAX;
	6665	else if (! utf8_target && scan + max < loceol)
	6666	loceol = scan + max;
	6667
	6668	/* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
	6669	* to the maximum of how far we should go in it (leaving it set to the real
	6670	* end, if the maximum permissible would take us beyond that). This allows
	6671	* us to make the loop exit condition that we haven't gone past <loceol> to
	6672	* also mean that we haven't exceeded the max permissible count, saving a
	6673	* test each time through the loop. But it assumes that the OP matches a
	6674	* single byte, which is true for most of the OPs below when applied to a
	6675	* non-UTF-8 target. Those relatively few OPs that don't have this
	6676	* characteristic will have to compensate.
	6677	*
	6678	* There is no adjustment for UTF-8 targets, as the number of bytes per
	6679	* character varies. OPs will have to test both that the count is less
	6680	* than the max permissible (using <hardcount> to keep track), and that we
	6681	* are still within the bounds of the string (using <loceol>. A few OPs
	6682	* match a single byte no matter what the encoding. They can omit the max
	6683	* test if, for the UTF-8 case, they do the adjustment that was skipped
	6684	* above.
	6685	*
	6686	* Thus, the code above sets things up for the common case; and exceptional
	6687	* cases need extra work; the common case is to make sure <scan> doesn't
	6688	* go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
	6689	* count doesn't exceed the maximum permissible */
	6690
	6691	switch (OP(p)) {
	6692	case REG_ANY:
	6693	if (utf8_target) {
	6694	while (scan < loceol && hardcount < max && *scan != '\n') {
	6695	scan += UTF8SKIP(scan);
	6696	hardcount++;
	6697	}
	6698	} else {
	6699	while (scan < loceol && *scan != '\n')
	6700	scan++;
	6701	}
	6702	break;
	6703	case SANY:
	6704	if (utf8_target) {
	6705	while (scan < loceol && hardcount < max) {
	6706	scan += UTF8SKIP(scan);
	6707	hardcount++;
	6708	}
	6709	}
	6710	else
	6711	scan = loceol;
	6712	break;
	6713	case CANY: /* Move <scan> forward <max> bytes, unless goes off end */
	6714	if (utf8_target && scan + max < loceol) {
	6715
	6716	/* <loceol> hadn't been adjusted in the UTF-8 case */
	6717	scan += max;
	6718	}
	6719	else {
	6720	scan = loceol;
	6721	}
	6722	break;
	6723	case EXACT:
	6724	assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
	6725
	6726	c = (U8)*STRING(p);
	6727
	6728	/* Can use a simple loop if the pattern char to match on is invariant
	6729	* under UTF-8, or both target and pattern aren't UTF-8. Note that we
	6730	* can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
	6731	* true iff it doesn't matter if the argument is in UTF-8 or not */
	6732	if (UTF8_IS_INVARIANT(c) \|\| (! utf8_target && ! is_utf8_pat)) {
	6733	if (utf8_target && scan + max < loceol) {
	6734	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6735	* since here, to match at all, 1 char == 1 byte */
	6736	loceol = scan + max;
	6737	}
	6738	while (scan < loceol && UCHARAT(scan) == c) {
	6739	scan++;
	6740	}
	6741	}
	6742	else if (is_utf8_pat) {
	6743	if (utf8_target) {
	6744	STRLEN scan_char_len;
	6745
	6746	/* When both target and pattern are UTF-8, we have to do
	6747	* string EQ */
	6748	while (hardcount < max
	6749	&& scan < loceol
	6750	&& (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
	6751	&& memEQ(scan, STRING(p), scan_char_len))
	6752	{
	6753	scan += scan_char_len;
	6754	hardcount++;
	6755	}
	6756	}
	6757	else if (! UTF8_IS_ABOVE_LATIN1(c)) {
	6758
	6759	/* Target isn't utf8; convert the character in the UTF-8
	6760	* pattern to non-UTF8, and do a simple loop */
	6761	c = TWO_BYTE_UTF8_TO_UNI(c, *(STRING(p) + 1));
	6762	while (scan < loceol && UCHARAT(scan) == c) {
	6763	scan++;
	6764	}
	6765	} /* else pattern char is above Latin1, can't possibly match the
	6766	non-UTF-8 target */
	6767	}
	6768	else {
	6769
	6770	/* Here, the string must be utf8; pattern isn't, and <c> is
	6771	* different in utf8 than not, so can't compare them directly.
	6772	* Outside the loop, find the two utf8 bytes that represent c, and
	6773	* then look for those in sequence in the utf8 string */
	6774	U8 high = UTF8_TWO_BYTE_HI(c);
	6775	U8 low = UTF8_TWO_BYTE_LO(c);
	6776
	6777	while (hardcount < max
	6778	&& scan + 1 < loceol
	6779	&& UCHARAT(scan) == high
	6780	&& UCHARAT(scan + 1) == low)
	6781	{
	6782	scan += 2;
	6783	hardcount++;
	6784	}
	6785	}
	6786	break;
	6787
	6788	case EXACTFA:
	6789	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6790	goto do_exactf;
	6791
	6792	case EXACTFL:
	6793	RXp_MATCH_TAINTED_on(prog);
	6794	utf8_flags = FOLDEQ_UTF8_LOCALE;
	6795	goto do_exactf;
	6796
	6797	case EXACTF:
	6798	utf8_flags = 0;
	6799	goto do_exactf;
	6800
	6801	case EXACTFU_SS:
	6802	case EXACTFU_TRICKYFOLD:
	6803	case EXACTFU:
	6804	utf8_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	6805
	6806	do_exactf: {
	6807	int c1, c2;
	6808	U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
	6809
	6810	assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
	6811
	6812	if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
	6813	is_utf8_pat))
	6814	{
	6815	if (c1 == CHRTEST_VOID) {
	6816	/* Use full Unicode fold matching */
	6817	char *tmpeol = PL_regeol;
	6818	STRLEN pat_len = is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
	6819	while (hardcount < max
	6820	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	6821	STRING(p), NULL, pat_len,
	6822	is_utf8_pat, utf8_flags))
	6823	{
	6824	scan = tmpeol;
	6825	tmpeol = PL_regeol;
	6826	hardcount++;
	6827	}
	6828	}
	6829	else if (utf8_target) {
	6830	if (c1 == c2) {
	6831	while (scan < loceol
	6832	&& hardcount < max
	6833	&& memEQ(scan, c1_utf8, UTF8SKIP(scan)))
	6834	{
	6835	scan += UTF8SKIP(scan);
	6836	hardcount++;
	6837	}
	6838	}
	6839	else {
	6840	while (scan < loceol
	6841	&& hardcount < max
	6842	&& (memEQ(scan, c1_utf8, UTF8SKIP(scan))
	6843	\|\| memEQ(scan, c2_utf8, UTF8SKIP(scan))))
	6844	{
	6845	scan += UTF8SKIP(scan);
	6846	hardcount++;
	6847	}
	6848	}
	6849	}
	6850	else if (c1 == c2) {
	6851	while (scan < loceol && UCHARAT(scan) == c1) {
	6852	scan++;
	6853	}
	6854	}
	6855	else {
	6856	while (scan < loceol &&
	6857	(UCHARAT(scan) == c1 \|\| UCHARAT(scan) == c2))
	6858	{
	6859	scan++;
	6860	}
	6861	}
	6862	}
	6863	break;
	6864	}
	6865	case ANYOF:
	6866	case ANYOF_WARN_SUPER:
	6867	if (utf8_target) {
	6868	while (hardcount < max
	6869	&& scan < loceol
	6870	&& reginclass(prog, p, (U8*)scan, utf8_target))
	6871	{
	6872	scan += UTF8SKIP(scan);
	6873	hardcount++;
	6874	}
	6875	} else {
	6876	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	6877	scan++;
	6878	}
	6879	break;
	6880
	6881	/* The argument (FLAGS) to all the POSIX node types is the class number */
	6882
	6883	case NPOSIXL:
	6884	to_complement = 1;
	6885	/* FALLTHROUGH */
	6886
	6887	case POSIXL:
	6888	RXp_MATCH_TAINTED_on(prog);
	6889	if (! utf8_target) {
	6890	while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
	6891	*scan)))
	6892	{
	6893	scan++;
	6894	}
	6895	} else {
	6896	while (hardcount < max && scan < loceol
	6897	&& to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
	6898	(U8 *) scan)))
	6899	{
	6900	scan += UTF8SKIP(scan);
	6901	hardcount++;
	6902	}
	6903	}
	6904	break;
	6905
	6906	case POSIXD:
	6907	if (utf8_target) {
	6908	goto utf8_posix;
	6909	}
	6910	/* FALLTHROUGH */
	6911
	6912	case POSIXA:
	6913	if (utf8_target && scan + max < loceol) {
	6914
	6915	/* We didn't adjust <loceol> at the beginning of this routine
	6916	* because is UTF-8, but it is actually ok to do so, since here, to
	6917	* match, 1 char == 1 byte. */
	6918	loceol = scan + max;
	6919	}
	6920	while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
	6921	scan++;
	6922	}
	6923	break;
	6924
	6925	case NPOSIXD:
	6926	if (utf8_target) {
	6927	to_complement = 1;
	6928	goto utf8_posix;
	6929	}
	6930	/* FALL THROUGH */
	6931
	6932	case NPOSIXA:
	6933	if (! utf8_target) {
	6934	while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
	6935	scan++;
	6936	}
	6937	}
	6938	else {
	6939
	6940	/* The complement of something that matches only ASCII matches all
	6941	* UTF-8 variant code points, plus everything in ASCII that isn't
	6942	* in the class. */
	6943	while (hardcount < max && scan < loceol
	6944	&& (! UTF8_IS_INVARIANT(*scan)
	6945	\|\| ! _generic_isCC_A((U8) *scan, FLAGS(p))))
	6946	{
	6947	scan += UTF8SKIP(scan);
	6948	hardcount++;
	6949	}
	6950	}
	6951	break;
	6952
	6953	case NPOSIXU:
	6954	to_complement = 1;
	6955	/* FALLTHROUGH */
	6956
	6957	case POSIXU:
	6958	if (! utf8_target) {
	6959	while (scan < loceol && to_complement
	6960	^ cBOOL(_generic_isCC((U8) *scan, FLAGS(p))))
	6961	{
	6962	scan++;
	6963	}
	6964	}
	6965	else {
	6966	utf8_posix:
	6967	classnum = (_char_class_number) FLAGS(p);
	6968	if (classnum < _FIRST_NON_SWASH_CC) {
	6969
	6970	/* Here, a swash is needed for above-Latin1 code points.
	6971	* Process as many Latin1 code points using the built-in rules.
	6972	* Go to another loop to finish processing upon encountering
	6973	* the first Latin1 code point. We could do that in this loop
	6974	* as well, but the other way saves having to test if the swash
	6975	* has been loaded every time through the loop: extra space to
	6976	* save a test. */
	6977	while (hardcount < max && scan < loceol) {
	6978	if (UTF8_IS_INVARIANT(*scan)) {
	6979	if (! (to_complement ^ cBOOL(_generic_isCC((U8) *scan,
	6980	classnum))))
	6981	{
	6982	break;
	6983	}
	6984	scan++;
	6985	}
	6986	else if (UTF8_IS_DOWNGRADEABLE_START(*scan)) {
	6987	if (! (to_complement
	6988	^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_UNI(*scan,
	6989	*(scan + 1)),
	6990	classnum))))
	6991	{
	6992	break;
	6993	}
	6994	scan += 2;
	6995	}
	6996	else {
	6997	goto found_above_latin1;
	6998	}
	6999
	7000	hardcount++;
	7001	}
	7002	}
	7003	else {
	7004	/* For these character classes, the knowledge of how to handle
	7005	* every code point is compiled in to Perl via a macro. This
	7006	* code is written for making the loops as tight as possible.
	7007	* It could be refactored to save space instead */
	7008	switch (classnum) {
	7009	case _CC_ENUM_SPACE: /* XXX would require separate code
	7010	if we revert the change of \v
	7011	matching this */
	7012	/* FALL THROUGH */
	7013	case _CC_ENUM_PSXSPC:
	7014	while (hardcount < max
	7015	&& scan < loceol
	7016	&& (to_complement ^ cBOOL(isSPACE_utf8(scan))))
	7017	{
	7018	scan += UTF8SKIP(scan);
	7019	hardcount++;
	7020	}
	7021	break;
	7022	case _CC_ENUM_BLANK:
	7023	while (hardcount < max
	7024	&& scan < loceol
	7025	&& (to_complement ^ cBOOL(isBLANK_utf8(scan))))
	7026	{
	7027	scan += UTF8SKIP(scan);
	7028	hardcount++;
	7029	}
	7030	break;
	7031	case _CC_ENUM_XDIGIT:
	7032	while (hardcount < max
	7033	&& scan < loceol
	7034	&& (to_complement ^ cBOOL(isXDIGIT_utf8(scan))))
	7035	{
	7036	scan += UTF8SKIP(scan);
	7037	hardcount++;
	7038	}
	7039	break;
	7040	case _CC_ENUM_VERTSPACE:
	7041	while (hardcount < max
	7042	&& scan < loceol
	7043	&& (to_complement ^ cBOOL(isVERTWS_utf8(scan))))
	7044	{
	7045	scan += UTF8SKIP(scan);
	7046	hardcount++;
	7047	}
	7048	break;
	7049	case _CC_ENUM_CNTRL:
	7050	while (hardcount < max
	7051	&& scan < loceol
	7052	&& (to_complement ^ cBOOL(isCNTRL_utf8(scan))))
	7053	{
	7054	scan += UTF8SKIP(scan);
	7055	hardcount++;
	7056	}
	7057	break;
	7058	default:
	7059	Perl_croak(aTHX_ "panic: regrepeat() node %d='%s' has an unexpected character class '%d'", OP(p), PL_reg_name[OP(p)], classnum);
	7060	}
	7061	}
	7062	}
	7063	break;
	7064
	7065	found_above_latin1: /* Continuation of POSIXU and NPOSIXU */
	7066
	7067	/* Load the swash if not already present */
	7068	if (! PL_utf8_swash_ptrs[classnum]) {
	7069	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	7070	PL_utf8_swash_ptrs[classnum] = _core_swash_init(
	7071	"utf8", swash_property_names[classnum],
	7072	&PL_sv_undef, 1, 0, NULL, &flags);
	7073	}
	7074
	7075	while (hardcount < max && scan < loceol
	7076	&& to_complement ^ cBOOL(_generic_utf8(
	7077	classnum,
	7078	scan,
	7079	swash_fetch(PL_utf8_swash_ptrs[classnum],
	7080	(U8 *) scan,
	7081	TRUE))))
	7082	{
	7083	scan += UTF8SKIP(scan);
	7084	hardcount++;
	7085	}
	7086	break;
	7087
	7088	case LNBREAK:
	7089	if (utf8_target) {
	7090	while (hardcount < max && scan < loceol &&
	7091	(c=is_LNBREAK_utf8_safe(scan, loceol))) {
	7092	scan += c;
	7093	hardcount++;
	7094	}
	7095	} else {
	7096	/* LNBREAK can match one or two latin chars, which is ok, but we
	7097	* have to use hardcount in this situation, and throw away the
	7098	* adjustment to <loceol> done before the switch statement */
	7099	loceol = PL_regeol;
	7100	while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
	7101	scan+=c;
	7102	hardcount++;
	7103	}
	7104	}
	7105	break;
	7106
	7107	case BOUND:
	7108	case BOUNDA:
	7109	case BOUNDL:
	7110	case BOUNDU:
	7111	case EOS:
	7112	case GPOS:
	7113	case KEEPS:
	7114	case NBOUND:
	7115	case NBOUNDA:
	7116	case NBOUNDL:
	7117	case NBOUNDU:
	7118	case OPFAIL:
	7119	case SBOL:
	7120	case SEOL:
	7121	/* These are all 0 width, so match right here or not at all. */
	7122	break;
	7123
	7124	default:
	7125	Perl_croak(aTHX_ "panic: regrepeat() called with unrecognized node type %d='%s'", OP(p), PL_reg_name[OP(p)]);
	7126	assert(0); /* NOTREACHED */
	7127
	7128	}
	7129
	7130	if (hardcount)
	7131	c = hardcount;
	7132	else
	7133	c = scan - *startposp;
	7134	*startposp = scan;
	7135
	7136	DEBUG_r({
	7137	GET_RE_DEBUG_FLAGS_DECL;
	7138	DEBUG_EXECUTE_r({
	7139	SV * const prop = sv_newmortal();
	7140	regprop(prog, prop, p);
	7141	PerlIO_printf(Perl_debug_log,
	7142	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	7143	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	7144	});
	7145	});
	7146
	7147	return(c);
	7148	}
	7149
	7150
	7151	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	7152	/*
	7153	- regclass_swash - prepare the utf8 swash. Wraps the shared core version to
	7154	create a copy so that changes the caller makes won't change the shared one.
	7155	If <altsvp> is non-null, will return NULL in it, for back-compat.
	7156	*/
	7157	SV *
	7158	Perl_regclass_swash(pTHX_ const regexp prog, const regnode node, bool doinit, SV listsvp, SV altsvp)
	7159	{
	7160	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	7161
	7162	if (altsvp) {
	7163	*altsvp = NULL;
	7164	}
	7165
	7166	return newSVsv(core_regclass_swash(prog, node, doinit, listsvp));
	7167	}
	7168	#endif
	7169
	7170	STATIC SV *
	7171	S_core_regclass_swash(pTHX_ const regexp prog, const regnode node, bool doinit, SV** listsvp)
	7172	{
	7173	/* Returns the swash for the input 'node' in the regex 'prog'.
	7174	* If <doinit> is true, will attempt to create the swash if not already
	7175	* done.
	7176	* If <listsvp> is non-null, will return the swash initialization string in
	7177	* it.
	7178	* Tied intimately to how regcomp.c sets up the data structure */
	7179
	7180	dVAR;
	7181	SV *sw = NULL;
	7182	SV *si = NULL;
	7183	SV* invlist = NULL;
	7184
	7185	RXi_GET_DECL(prog,progi);
	7186	const struct reg_data * const data = prog ? progi->data : NULL;
	7187
	7188	PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH;
	7189
	7190	assert(ANYOF_NONBITMAP(node));
	7191
	7192	if (data && data->count) {
	7193	const U32 n = ARG(node);
	7194
	7195	if (data->what[n] == 's') {
	7196	SV * const rv = MUTABLE_SV(data->data[n]);
	7197	AV * const av = MUTABLE_AV(SvRV(rv));
	7198	SV **const ary = AvARRAY(av);
	7199	U8 swash_init_flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	7200
	7201	si = ary; / ary[0] = the string to initialize the swash with */
	7202
	7203	/* Elements 2 and 3 are either both present or both absent. [2] is
	7204	* any inversion list generated at compile time; [3] indicates if
	7205	* that inversion list has any user-defined properties in it. */
	7206	if (av_len(av) >= 2) {
	7207	invlist = ary[2];
	7208	if (SvUV(ary[3])) {
	7209	swash_init_flags \|= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
	7210	}
	7211	}
	7212	else {
	7213	invlist = NULL;
	7214	}
	7215
	7216	/* Element [1] is reserved for the set-up swash. If already there,
	7217	* return it; if not, create it and store it there */
	7218	if (SvROK(ary[1])) {
	7219	sw = ary[1];
	7220	}
	7221	else if (si && doinit) {
	7222
	7223	sw = _core_swash_init("utf8", /* the utf8 package */
	7224	"", /* nameless */
	7225	si,
	7226	1, /* binary */
	7227	0, /* not from tr/// */
	7228	invlist,
	7229	&swash_init_flags);
	7230	(void)av_store(av, 1, sw);
	7231	}
	7232	}
	7233	}
	7234
	7235	if (listsvp) {
	7236	SV* matches_string = newSVpvn("", 0);
	7237
	7238	/* Use the swash, if any, which has to have incorporated into it all
	7239	* possibilities */
	7240	if ((! sw \|\| (invlist = _get_swash_invlist(sw)) == NULL)
	7241	&& (si && si != &PL_sv_undef))
	7242	{
	7243
	7244	/* If no swash, use the input initialization string, if available */
	7245	sv_catsv(matches_string, si);
	7246	}
	7247
	7248	/* Add the inversion list to whatever we have. This may have come from
	7249	* the swash, or from an input parameter */
	7250	if (invlist) {
	7251	sv_catsv(matches_string, _invlist_contents(invlist));
	7252	}
	7253	*listsvp = matches_string;
	7254	}
	7255
	7256	return sw;
	7257	}
	7258
	7259	/*
	7260	- reginclass - determine if a character falls into a character class
	7261
	7262	n is the ANYOF regnode
	7263	p is the target string
	7264	utf8_target tells whether p is in UTF-8.
	7265
	7266	Returns true if matched; false otherwise.
	7267
	7268	Note that this can be a synthetic start class, a combination of various
	7269	nodes, so things you think might be mutually exclusive, such as locale,
	7270	aren't. It can match both locale and non-locale
	7271
	7272	*/
	7273
	7274	STATIC bool
	7275	S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
	7276	{
	7277	dVAR;
	7278	const char flags = ANYOF_FLAGS(n);
	7279	bool match = FALSE;
	7280	UV c = *p;
	7281
	7282	PERL_ARGS_ASSERT_REGINCLASS;
	7283
	7284	/* If c is not already the code point, get it. Note that
	7285	* UTF8_IS_INVARIANT() works even if not in UTF-8 */
	7286	if (! UTF8_IS_INVARIANT(c) && utf8_target) {
	7287	STRLEN c_len = 0;
	7288	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
	7289	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	7290	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	7291	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	7292	* UTF8_ALLOW_FFFF */
	7293	if (c_len == (STRLEN)-1)
	7294	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	7295	}
	7296
	7297	/* If this character is potentially in the bitmap, check it */
	7298	if (c < 256) {
	7299	if (ANYOF_BITMAP_TEST(n, c))
	7300	match = TRUE;
	7301	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	7302	&& ! utf8_target
	7303	&& ! isASCII(c))
	7304	{
	7305	match = TRUE;
	7306	}
	7307	else if (flags & ANYOF_LOCALE) {
	7308	RXp_MATCH_TAINTED_on(prog);
	7309
	7310	if ((flags & ANYOF_LOC_FOLD)
	7311	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	7312	{
	7313	match = TRUE;
	7314	}
	7315	else if (ANYOF_CLASS_TEST_ANY_SET(n)) {
	7316
	7317	/* The data structure is arranged so bits 0, 2, 4, ... are set
	7318	* if the class includes the Posix character class given by
	7319	* bit/2; and 1, 3, 5, ... are set if the class includes the
	7320	* complemented Posix class given by int(bit/2). So we loop
	7321	* through the bits, each time changing whether we complement
	7322	* the result or not. Suppose for the sake of illustration
	7323	* that bits 0-3 mean respectively, \w, \W, \s, \S. If bit 0
	7324	* is set, it means there is a match for this ANYOF node if the
	7325	* character is in the class given by the expression (0 / 2 = 0
	7326	* = \w). If it is in that class, isFOO_lc() will return 1,
	7327	* and since 'to_complement' is 0, the result will stay TRUE,
	7328	* and we exit the loop. Suppose instead that bit 0 is 0, but
	7329	* bit 1 is 1. That means there is a match if the character
	7330	* matches \W. We won't bother to call isFOO_lc() on bit 0,
	7331	* but will on bit 1. On the second iteration 'to_complement'
	7332	* will be 1, so the exclusive or will reverse things, so we
	7333	* are testing for \W. On the third iteration, 'to_complement'
	7334	* will be 0, and we would be testing for \s; the fourth
	7335	* iteration would test for \S, etc.
	7336	*
	7337	* Note that this code assumes that all the classes are closed
	7338	* under folding. For example, if a character matches \w, then
	7339	* its fold does too; and vice versa. This should be true for
	7340	* any well-behaved locale for all the currently defined Posix
	7341	* classes, except for :lower: and :upper:, which are handled
	7342	* by the pseudo-class :cased: which matches if either of the
	7343	* other two does. To get rid of this assumption, an outer
	7344	* loop could be used below to iterate over both the source
	7345	* character, and its fold (if different) */
	7346
	7347	int count = 0;
	7348	int to_complement = 0;
	7349	while (count < ANYOF_MAX) {
	7350	if (ANYOF_CLASS_TEST(n, count)
	7351	&& to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))
	7352	{
	7353	match = TRUE;
	7354	break;
	7355	}
	7356	count++;
	7357	to_complement ^= 1;
	7358	}
	7359	}
	7360	}
	7361	}
	7362
	7363	/* If the bitmap didn't (or couldn't) match, and something outside the
	7364	* bitmap could match, try that. Locale nodes specify completely the
	7365	* behavior of code points in the bit map (otherwise, a utf8 target would
	7366	* cause them to be treated as Unicode and not locale), except in
	7367	* the very unlikely event when this node is a synthetic start class, which
	7368	* could be a combination of locale and non-locale nodes. So allow locale
	7369	* to match for the synthetic start class, which will give a false
	7370	* positive that will be resolved when the match is done again as not part
	7371	* of the synthetic start class */
	7372	if (!match) {
	7373	if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
	7374	match = TRUE; /* Everything above 255 matches */
	7375	}
	7376	else if (ANYOF_NONBITMAP(n)
	7377	&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
	7378	\|\| (utf8_target
	7379	&& (c >=256
	7380	\|\| (! (flags & ANYOF_LOCALE))
	7381	\|\| OP(n) == ANYOF_SYNTHETIC))))
	7382	{
	7383	SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
	7384	if (sw) {
	7385	U8 * utf8_p;
	7386	if (utf8_target) {
	7387	utf8_p = (U8 *) p;
	7388	} else { /* Convert to utf8 */
	7389	STRLEN len = 1;
	7390	utf8_p = bytes_to_utf8(p, &len);
	7391	}
	7392
	7393	if (swash_fetch(sw, utf8_p, TRUE)) {
	7394	match = TRUE;
	7395	}
	7396
	7397	/* If we allocated a string above, free it */
	7398	if (! utf8_target) Safefree(utf8_p);
	7399	}
	7400	}
	7401
	7402	if (UNICODE_IS_SUPER(c)
	7403	&& OP(n) == ANYOF_WARN_SUPER
	7404	&& ckWARN_d(WARN_NON_UNICODE))
	7405	{
	7406	Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
	7407	"Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", c);
	7408	}
	7409	}
	7410
	7411	/* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
	7412	return cBOOL(flags & ANYOF_INVERT) ^ match;
	7413	}
	7414
	7415	STATIC U8 *
	7416	S_reghop3(U8 s, I32 off, const U8 lim)
	7417	{
	7418	/* return the position 'off' UTF-8 characters away from 's', forward if
	7419	* 'off' >= 0, backwards if negative. But don't go outside of position
	7420	* 'lim', which better be < s if off < 0 */
	7421
	7422	dVAR;
	7423
	7424	PERL_ARGS_ASSERT_REGHOP3;
	7425
	7426	if (off >= 0) {
	7427	while (off-- && s < lim) {
	7428	/* XXX could check well-formedness here */
	7429	s += UTF8SKIP(s);
	7430	}
	7431	}
	7432	else {
	7433	while (off++ && s > lim) {
	7434	s--;
	7435	if (UTF8_IS_CONTINUED(*s)) {
	7436	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7437	s--;
	7438	}
	7439	/* XXX could check well-formedness here */
	7440	}
	7441	}
	7442	return s;
	7443	}
	7444
	7445	#ifdef XXX_dmq
	7446	/* there are a bunch of places where we use two reghop3's that should
	7447	be replaced with this routine. but since thats not done yet
	7448	we ifdef it out - dmq
	7449	*/
	7450	STATIC U8 *
	7451	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	7452	{
	7453	dVAR;
	7454
	7455	PERL_ARGS_ASSERT_REGHOP4;
	7456
	7457	if (off >= 0) {
	7458	while (off-- && s < rlim) {
	7459	/* XXX could check well-formedness here */
	7460	s += UTF8SKIP(s);
	7461	}
	7462	}
	7463	else {
	7464	while (off++ && s > llim) {
	7465	s--;
	7466	if (UTF8_IS_CONTINUED(*s)) {
	7467	while (s > llim && UTF8_IS_CONTINUATION(*s))
	7468	s--;
	7469	}
	7470	/* XXX could check well-formedness here */
	7471	}
	7472	}
	7473	return s;
	7474	}
	7475	#endif
	7476
	7477	STATIC U8 *
	7478	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	7479	{
	7480	dVAR;
	7481
	7482	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	7483
	7484	if (off >= 0) {
	7485	while (off-- && s < lim) {
	7486	/* XXX could check well-formedness here */
	7487	s += UTF8SKIP(s);
	7488	}
	7489	if (off >= 0)
	7490	return NULL;
	7491	}
	7492	else {
	7493	while (off++ && s > lim) {
	7494	s--;
	7495	if (UTF8_IS_CONTINUED(*s)) {
	7496	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7497	s--;
	7498	}
	7499	/* XXX could check well-formedness here */
	7500	}
	7501	if (off <= 0)
	7502	return NULL;
	7503	}
	7504	return s;
	7505	}
	7506
	7507	static void
	7508	restore_pos(pTHX_ void *arg)
	7509	{
	7510	dVAR;
	7511	regexp * const rex = (regexp *)arg;
	7512	if (PL_reg_state.re_state_eval_setup_done) {
	7513	if (PL_reg_oldsaved) {
	7514	rex->subbeg = PL_reg_oldsaved;
	7515	rex->sublen = PL_reg_oldsavedlen;
	7516	rex->suboffset = PL_reg_oldsavedoffset;
	7517	rex->subcoffset = PL_reg_oldsavedcoffset;
	7518	#ifdef PERL_ANY_COW
	7519	rex->saved_copy = PL_nrs;
	7520	#endif
	7521	RXp_MATCH_COPIED_on(rex);
	7522	}
	7523	PL_reg_magic->mg_len = PL_reg_oldpos;
	7524	PL_reg_state.re_state_eval_setup_done = FALSE;
	7525	PL_curpm = PL_reg_oldcurpm;
	7526	}
	7527	}
	7528
	7529	STATIC void
	7530	S_to_utf8_substr(pTHX_ regexp *prog)
	7531	{
	7532	/* Converts substr fields in prog from bytes to UTF-8, calling fbm_compile
	7533	* on the converted value */
	7534
	7535	int i = 1;
	7536
	7537	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	7538
	7539	do {
	7540	if (prog->substrs->data[i].substr
	7541	&& !prog->substrs->data[i].utf8_substr) {
	7542	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	7543	prog->substrs->data[i].utf8_substr = sv;
	7544	sv_utf8_upgrade(sv);
	7545	if (SvVALID(prog->substrs->data[i].substr)) {
	7546	if (SvTAIL(prog->substrs->data[i].substr)) {
	7547	/* Trim the trailing \n that fbm_compile added last
	7548	time. */
	7549	SvCUR_set(sv, SvCUR(sv) - 1);
	7550	/* Whilst this makes the SV technically "invalid" (as its
	7551	buffer is no longer followed by "\0") when fbm_compile()
	7552	adds the "\n" back, a "\0" is restored. */
	7553	fbm_compile(sv, FBMcf_TAIL);
	7554	} else
	7555	fbm_compile(sv, 0);
	7556	}
	7557	if (prog->substrs->data[i].substr == prog->check_substr)
	7558	prog->check_utf8 = sv;
	7559	}
	7560	} while (i--);
	7561	}
	7562
	7563	STATIC bool
	7564	S_to_byte_substr(pTHX_ regexp *prog)
	7565	{
	7566	/* Converts substr fields in prog from UTF-8 to bytes, calling fbm_compile
	7567	* on the converted value; returns FALSE if can't be converted. */
	7568
	7569	dVAR;
	7570	int i = 1;
	7571
	7572	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	7573
	7574	do {
	7575	if (prog->substrs->data[i].utf8_substr
	7576	&& !prog->substrs->data[i].substr) {
	7577	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	7578	if (! sv_utf8_downgrade(sv, TRUE)) {
	7579	return FALSE;
	7580	}
	7581	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	7582	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	7583	/* Trim the trailing \n that fbm_compile added last
	7584	time. */
	7585	SvCUR_set(sv, SvCUR(sv) - 1);
	7586	fbm_compile(sv, FBMcf_TAIL);
	7587	} else
	7588	fbm_compile(sv, 0);
	7589	}
	7590	prog->substrs->data[i].substr = sv;
	7591	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	7592	prog->check_substr = sv;
	7593	}
	7594	} while (i--);
	7595
	7596	return TRUE;
	7597	}
	7598
	7599	/*
	7600	* Local variables:
	7601	* c-indentation-style: bsd
	7602	* c-basic-offset: 4
	7603	* indent-tabs-mode: nil
	7604	* End:
	7605	*
	7606	* ex: set ts=8 sts=4 sw=4 et:
	7607	*/