perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/*
	41	* pregcomp and pregexec -- regsub and regerror are not used in perl
	42	*
	43	* Copyright (c) 1986 by University of Toronto.
	44	* Written by Henry Spencer. Not derived from licensed software.
	45	*
	46	* Permission is granted to anyone to use this software for any
	47	* purpose on any computer system, and to redistribute it freely,
	48	* subject to the following restrictions:
	49	*
	50	* 1. The author is not responsible for the consequences of use of
	51	* this software, no matter how awful, even if they arise
	52	* from defects in it.
	53	*
	54	* 2. The origin of this software must not be misrepresented, either
	55	* by explicit claim or by omission.
	56	*
	57	* 3. Altered versions must be plainly marked as such, and must not
	58	* be misrepresented as being the original software.
	59	*
	60	**** Alterations to Henry's code are...
	61	****
	62	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	63	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	64	**** by Larry Wall and others
	65	****
	66	**** You may distribute under the terms of either the GNU General Public
	67	**** License or the Artistic License, as specified in the README file.
	68	*
	69	* Beware that some of this code is subtly aware of the way operator
	70	* precedence is structured in regular expressions. Serious changes in
	71	* regular-expression syntax might require a total rethink.
	72	*/
	73	#include "EXTERN.h"
	74	#define PERL_IN_REGEXEC_C
	75	#include "perl.h"
	76
	77	#ifdef PERL_IN_XSUB_RE
	78	# include "re_comp.h"
	79	#else
	80	# include "regcomp.h"
	81	#endif
	82
	83	#include "inline_invlist.c"
	84	#include "unicode_constants.h"
	85
	86	#ifdef DEBUGGING
	87	/* At least one required character in the target string is expressible only in
	88	* UTF-8. */
	89	static const char* const non_utf8_target_but_utf8_required
	90	= "Can't match, because target string needs to be in UTF-8\n";
	91	#endif
	92
	93	#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
	94	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
	95	goto target; \
	96	} STMT_END
	97
	98	#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
	99
	100	#ifndef STATIC
	101	#define STATIC static
	102	#endif
	103
	104	/* Valid only for non-utf8 strings: avoids the reginclass
	105	* call if there are no complications: i.e., if everything matchable is
	106	* straight forward in the bitmap */
	107	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,c+1,0) \
	108	: ANYOF_BITMAP_TEST(p,*(c)))
	109
	110	/*
	111	* Forwards.
	112	*/
	113
	114	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	115	#define CHR_DIST(a,b) (reginfo->is_utf8_target ? utf8_distance(a,b) : a - b)
	116
	117	#define HOPc(pos,off) \
	118	(char *)(reginfo->is_utf8_target \
	119	? reghop3((U8*)pos, off, \
	120	(U8*)(off >= 0 ? reginfo->strend : reginfo->strbeg)) \
	121	: (U8*)(pos + off))
	122	#define HOPBACKc(pos, off) \
	123	(char*)(reginfo->is_utf8_target \
	124	? reghopmaybe3((U8)pos, -off, (U8)(reginfo->strbeg)) \
	125	: (pos - off >= reginfo->strbeg) \
	126	? (U8*)pos - off \
	127	: NULL)
	128
	129	#define HOP3(pos,off,lim) (reginfo->is_utf8_target ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	130	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	131
	132
	133	#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
	134	#define NEXTCHR_IS_EOS (nextchr < 0)
	135
	136	#define SET_nextchr \
	137	nextchr = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
	138
	139	#define SET_locinput(p) \
	140	locinput = (p); \
	141	SET_nextchr
	142
	143
	144	#define LOAD_UTF8_CHARCLASS(swash_ptr, property_name) STMT_START { \
	145	if (!swash_ptr) { \
	146	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; \
	147	swash_ptr = _core_swash_init("utf8", property_name, &PL_sv_undef, \
	148	1, 0, NULL, &flags); \
	149	assert(swash_ptr); \
	150	} \
	151	} STMT_END
	152
	153	/* If in debug mode, we test that a known character properly matches */
	154	#ifdef DEBUGGING
	155	# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
	156	property_name, \
	157	utf8_char_in_property) \
	158	LOAD_UTF8_CHARCLASS(swash_ptr, property_name); \
	159	assert(swash_fetch(swash_ptr, (U8 *) utf8_char_in_property, TRUE));
	160	#else
	161	# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
	162	property_name, \
	163	utf8_char_in_property) \
	164	LOAD_UTF8_CHARCLASS(swash_ptr, property_name)
	165	#endif
	166
	167	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS_DEBUG_TEST( \
	168	PL_utf8_swash_ptrs[_CC_WORDCHAR], \
	169	swash_property_names[_CC_WORDCHAR], \
	170	LATIN_CAPITAL_LETTER_SHARP_S_UTF8);
	171
	172	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	173	STMT_START { \
	174	LOAD_UTF8_CHARCLASS_DEBUG_TEST(PL_utf8_X_regular_begin, \
	175	"_X_regular_begin", \
	176	LATIN_CAPITAL_LETTER_SHARP_S_UTF8); \
	177	LOAD_UTF8_CHARCLASS_DEBUG_TEST(PL_utf8_X_extend, \
	178	"_X_extend", \
	179	COMBINING_GRAVE_ACCENT_UTF8); \
	180	} STMT_END
	181
	182	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	183	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	184
	185	/* for use after a quantifier and before an EXACT-like node -- japhy */
	186	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	187	*
	188	* NOTE that nothing that affects backtracking should be in here, specifically
	189	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	190	* node that is in between two EXACT like nodes when ascertaining what the required
	191	* "follow" character is. This should probably be moved to regex compile time
	192	* although it may be done at run time beause of the REF possibility - more
	193	* investigation required. -- demerphq
	194	*/
	195	#define JUMPABLE(rn) ( \
	196	OP(rn) == OPEN \|\| \
	197	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	198	OP(rn) == EVAL \|\| \
	199	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	200	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	201	OP(rn) == KEEPS \|\| \
	202	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	203	)
	204	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	205
	206	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	207
	208	#if 0
	209	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	210	we don't need this definition. */
	211	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	212	#define IS_TEXTF(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTFA_NO_TRIE \|\| OP(rn)==EXACTF \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	213	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	214
	215	#else
	216	/* ... so we use this as its faster. */
	217	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	218	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn) == EXACTFA \|\| OP(rn) == EXACTFA_NO_TRIE)
	219	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	220	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	221
	222	#endif
	223
	224	/*
	225	Search for mandatory following text node; for lookahead, the text must
	226	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	227	*/
	228	#define FIND_NEXT_IMPT(rn) STMT_START { \
	229	while (JUMPABLE(rn)) { \
	230	const OPCODE type = OP(rn); \
	231	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	232	rn = NEXTOPER(NEXTOPER(rn)); \
	233	else if (type == PLUS) \
	234	rn = NEXTOPER(rn); \
	235	else if (type == IFMATCH) \
	236	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	237	else rn += NEXT_OFF(rn); \
	238	} \
	239	} STMT_END
	240
	241	/* These constants are for finding GCB=LV and GCB=LVT in the CLUMP regnode.
	242	* These are for the pre-composed Hangul syllables, which are all in a
	243	* contiguous block and arranged there in such a way so as to facilitate
	244	* alorithmic determination of their characteristics. As such, they don't need
	245	* a swash, but can be determined by simple arithmetic. Almost all are
	246	* GCB=LVT, but every 28th one is a GCB=LV */
	247	#define SBASE 0xAC00 /* Start of block */
	248	#define SCount 11172 /* Length of block */
	249	#define TCount 28
	250
	251	#define SLAB_FIRST(s) (&(s)->states[0])
	252	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	253
	254	static void S_setup_eval_state(pTHX_ regmatch_info *const reginfo);
	255	static void S_cleanup_regmatch_info_aux(pTHX_ void *arg);
	256	static regmatch_state * S_push_slab(pTHX);
	257
	258	#define REGCP_PAREN_ELEMS 3
	259	#define REGCP_OTHER_ELEMS 3
	260	#define REGCP_FRAME_ELEMS 1
	261	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	262	* are needed for the regexp context stack bookkeeping. */
	263
	264	STATIC CHECKPOINT
	265	S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen)
	266	{
	267	dVAR;
	268	const int retval = PL_savestack_ix;
	269	const int paren_elems_to_push =
	270	(maxopenparen - parenfloor) * REGCP_PAREN_ELEMS;
	271	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	272	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	273	I32 p;
	274	GET_RE_DEBUG_FLAGS_DECL;
	275
	276	PERL_ARGS_ASSERT_REGCPPUSH;
	277
	278	if (paren_elems_to_push < 0)
	279	Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0",
	280	paren_elems_to_push);
	281
	282	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	283	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	284	" out of range (%lu-%ld)",
	285	total_elems,
	286	(unsigned long)maxopenparen,
	287	(long)parenfloor);
	288
	289	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	290
	291	DEBUG_BUFFERS_r(
	292	if ((int)maxopenparen > (int)parenfloor)
	293	PerlIO_printf(Perl_debug_log,
	294	"rex=0x%"UVxf" offs=0x%"UVxf": saving capture indices:\n",
	295	PTR2UV(rex),
	296	PTR2UV(rex->offs)
	297	);
	298	);
	299	for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
	300	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	301	SSPUSHIV(rex->offs[p].end);
	302	SSPUSHIV(rex->offs[p].start);
	303	SSPUSHINT(rex->offs[p].start_tmp);
	304	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	305	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
	306	(UV)p,
	307	(IV)rex->offs[p].start,
	308	(IV)rex->offs[p].start_tmp,
	309	(IV)rex->offs[p].end
	310	));
	311	}
	312	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	313	SSPUSHINT(maxopenparen);
	314	SSPUSHINT(rex->lastparen);
	315	SSPUSHINT(rex->lastcloseparen);
	316	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	317
	318	return retval;
	319	}
	320
	321	/* These are needed since we do not localize EVAL nodes: */
	322	#define REGCP_SET(cp) \
	323	DEBUG_STATE_r( \
	324	PerlIO_printf(Perl_debug_log, \
	325	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	326	(IV)PL_savestack_ix)); \
	327	cp = PL_savestack_ix
	328
	329	#define REGCP_UNWIND(cp) \
	330	DEBUG_STATE_r( \
	331	if (cp != PL_savestack_ix) \
	332	PerlIO_printf(Perl_debug_log, \
	333	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	334	(IV)(cp), (IV)PL_savestack_ix)); \
	335	regcpblow(cp)
	336
	337	#define UNWIND_PAREN(lp, lcp) \
	338	for (n = rex->lastparen; n > lp; n--) \
	339	rex->offs[n].end = -1; \
	340	rex->lastparen = n; \
	341	rex->lastcloseparen = lcp;
	342
	343
	344	STATIC void
	345	S_regcppop(pTHX_ regexp rex, U32 maxopenparen_p)
	346	{
	347	dVAR;
	348	UV i;
	349	U32 paren;
	350	GET_RE_DEBUG_FLAGS_DECL;
	351
	352	PERL_ARGS_ASSERT_REGCPPOP;
	353
	354	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	355	i = SSPOPUV;
	356	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	357	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	358	rex->lastcloseparen = SSPOPINT;
	359	rex->lastparen = SSPOPINT;
	360	*maxopenparen_p = SSPOPINT;
	361
	362	i -= REGCP_OTHER_ELEMS;
	363	/* Now restore the parentheses context. */
	364	DEBUG_BUFFERS_r(
	365	if (i \|\| rex->lastparen + 1 <= rex->nparens)
	366	PerlIO_printf(Perl_debug_log,
	367	"rex=0x%"UVxf" offs=0x%"UVxf": restoring capture indices to:\n",
	368	PTR2UV(rex),
	369	PTR2UV(rex->offs)
	370	);
	371	);
	372	paren = *maxopenparen_p;
	373	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	374	SSize_t tmps;
	375	rex->offs[paren].start_tmp = SSPOPINT;
	376	rex->offs[paren].start = SSPOPIV;
	377	tmps = SSPOPIV;
	378	if (paren <= rex->lastparen)
	379	rex->offs[paren].end = tmps;
	380	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	381	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	382	(UV)paren,
	383	(IV)rex->offs[paren].start,
	384	(IV)rex->offs[paren].start_tmp,
	385	(IV)rex->offs[paren].end,
	386	(paren > rex->lastparen ? "(skipped)" : ""));
	387	);
	388	paren--;
	389	}
	390	#if 1
	391	/* It would seem that the similar code in regtry()
	392	* already takes care of this, and in fact it is in
	393	* a better location to since this code can #if 0-ed out
	394	* but the code in regtry() is needed or otherwise tests
	395	* requiring null fields (pat.t#187 and split.t#{13,14}
	396	* (as of patchlevel 7877) will fail. Then again,
	397	* this code seems to be necessary or otherwise
	398	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	399	* --jhi updated by dapm */
	400	for (i = rex->lastparen + 1; i <= rex->nparens; i++) {
	401	if (i > *maxopenparen_p)
	402	rex->offs[i].start = -1;
	403	rex->offs[i].end = -1;
	404	DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
	405	" \\%"UVuf": %s ..-1 undeffing\n",
	406	(UV)i,
	407	(i > *maxopenparen_p) ? "-1" : " "
	408	));
	409	}
	410	#endif
	411	}
	412
	413	/* restore the parens and associated vars at savestack position ix,
	414	* but without popping the stack */
	415
	416	STATIC void
	417	S_regcp_restore(pTHX_ regexp rex, I32 ix, U32 maxopenparen_p)
	418	{
	419	I32 tmpix = PL_savestack_ix;
	420	PL_savestack_ix = ix;
	421	regcppop(rex, maxopenparen_p);
	422	PL_savestack_ix = tmpix;
	423	}
	424
	425	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	426
	427	STATIC bool
	428	S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
	429	{
	430	/* Returns a boolean as to whether or not 'character' is a member of the
	431	* Posix character class given by 'classnum' that should be equivalent to a
	432	* value in the typedef '_char_class_number'.
	433	*
	434	* Ideally this could be replaced by a just an array of function pointers
	435	* to the C library functions that implement the macros this calls.
	436	* However, to compile, the precise function signatures are required, and
	437	* these may vary from platform to to platform. To avoid having to figure
	438	* out what those all are on each platform, I (khw) am using this method,
	439	* which adds an extra layer of function call overhead (unless the C
	440	* optimizer strips it away). But we don't particularly care about
	441	* performance with locales anyway. */
	442
	443	switch ((_char_class_number) classnum) {
	444	case _CC_ENUM_ALPHANUMERIC: return isALPHANUMERIC_LC(character);
	445	case _CC_ENUM_ALPHA: return isALPHA_LC(character);
	446	case _CC_ENUM_ASCII: return isASCII_LC(character);
	447	case _CC_ENUM_BLANK: return isBLANK_LC(character);
	448	case _CC_ENUM_CASED: return isLOWER_LC(character)
	449	\|\| isUPPER_LC(character);
	450	case _CC_ENUM_CNTRL: return isCNTRL_LC(character);
	451	case _CC_ENUM_DIGIT: return isDIGIT_LC(character);
	452	case _CC_ENUM_GRAPH: return isGRAPH_LC(character);
	453	case _CC_ENUM_LOWER: return isLOWER_LC(character);
	454	case _CC_ENUM_PRINT: return isPRINT_LC(character);
	455	case _CC_ENUM_PSXSPC: return isPSXSPC_LC(character);
	456	case _CC_ENUM_PUNCT: return isPUNCT_LC(character);
	457	case _CC_ENUM_SPACE: return isSPACE_LC(character);
	458	case _CC_ENUM_UPPER: return isUPPER_LC(character);
	459	case _CC_ENUM_WORDCHAR: return isWORDCHAR_LC(character);
	460	case _CC_ENUM_XDIGIT: return isXDIGIT_LC(character);
	461	default: /* VERTSPACE should never occur in locales */
	462	Perl_croak(aTHX_ "panic: isFOO_lc() has an unexpected character class '%d'", classnum);
	463	}
	464
	465	assert(0); /* NOTREACHED */
	466	return FALSE;
	467	}
	468
	469	STATIC bool
	470	S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
	471	{
	472	/* Returns a boolean as to whether or not the (well-formed) UTF-8-encoded
	473	* 'character' is a member of the Posix character class given by 'classnum'
	474	* that should be equivalent to a value in the typedef
	475	* '_char_class_number'.
	476	*
	477	* This just calls isFOO_lc on the code point for the character if it is in
	478	* the range 0-255. Outside that range, all characters avoid Unicode
	479	* rules, ignoring any locale. So use the Unicode function if this class
	480	* requires a swash, and use the Unicode macro otherwise. */
	481
	482	PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
	483
	484	if (UTF8_IS_INVARIANT(*character)) {
	485	return isFOO_lc(classnum, *character);
	486	}
	487	else if (UTF8_IS_DOWNGRADEABLE_START(*character)) {
	488	return isFOO_lc(classnum,
	489	TWO_BYTE_UTF8_TO_NATIVE(character, (character + 1)));
	490	}
	491
	492	if (classnum < _FIRST_NON_SWASH_CC) {
	493
	494	/* Initialize the swash unless done already */
	495	if (! PL_utf8_swash_ptrs[classnum]) {
	496	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	497	PL_utf8_swash_ptrs[classnum] = _core_swash_init("utf8",
	498	swash_property_names[classnum], &PL_sv_undef, 1, 0, NULL, &flags);
	499	}
	500
	501	return cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum], (U8 *)
	502	character,
	503	TRUE /* is UTF */ ));
	504	}
	505
	506	switch ((_char_class_number) classnum) {
	507	case _CC_ENUM_SPACE:
	508	case _CC_ENUM_PSXSPC: return is_XPERLSPACE_high(character);
	509
	510	case _CC_ENUM_BLANK: return is_HORIZWS_high(character);
	511	case _CC_ENUM_XDIGIT: return is_XDIGIT_high(character);
	512	case _CC_ENUM_VERTSPACE: return is_VERTWS_high(character);
	513	default: return 0; /* Things like CNTRL are always
	514	below 256 */
	515	}
	516
	517	assert(0); /* NOTREACHED */
	518	return FALSE;
	519	}
	520
	521	/*
	522	* pregexec and friends
	523	*/
	524
	525	#ifndef PERL_IN_XSUB_RE
	526	/*
	527	- pregexec - match a regexp against a string
	528	*/
	529	I32
	530	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
	531	char strbeg, SSize_t minend, SV screamer, U32 nosave)
	532	/* stringarg: the point in the string at which to begin matching */
	533	/* strend: pointer to null at end of string */
	534	/* strbeg: real beginning of string */
	535	/* minend: end of match must be >= minend bytes after stringarg. */
	536	/* screamer: SV being matched: only used for utf8 flag, pos() etc; string
	537	* itself is accessed via the pointers above */
	538	/* nosave: For optimizations. */
	539	{
	540	PERL_ARGS_ASSERT_PREGEXEC;
	541
	542	return
	543	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	544	nosave ? 0 : REXEC_COPY_STR);
	545	}
	546	#endif
	547
	548	/*
	549	* Need to implement the following flags for reg_anch:
	550	*
	551	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	552	* USE_INTUIT_ML
	553	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	554	* INTUIT_AUTORITATIVE_ML
	555	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	556	* INTUIT_ONCE_ML
	557	*
	558	* Another flag for this function: SECOND_TIME (so that float substrs
	559	* with giant delta may be not rechecked).
	560	*/
	561
	562	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	563	Otherwise, only SvCUR(sv) is used to get strbeg. */
	564
	565	/* XXXX Some places assume that there is a fixed substring.
	566	An update may be needed if optimizer marks as "INTUITable"
	567	RExen without fixed substrings. Similarly, it is assumed that
	568	lengths of all the strings are no more than minlen, thus they
	569	cannot come from lookahead.
	570	(Or minlen should take into account lookahead.)
	571	NOTE: Some of this comment is not correct. minlen does now take account
	572	of lookahead/behind. Further research is required. -- demerphq
	573
	574	*/
	575
	576	/* A failure to find a constant substring means that there is no need to make
	577	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	578	finding a substring too deep into the string means that fewer calls to
	579	regtry() should be needed.
	580
	581	REx compiler's optimizer found 4 possible hints:
	582	a) Anchored substring;
	583	b) Fixed substring;
	584	c) Whether we are anchored (beginning-of-line or \G);
	585	d) First node (of those at offset 0) which may distinguish positions;
	586	We use a)b)d) and multiline-part of c), and try to find a position in the
	587	string which does not contradict any of them.
	588	*/
	589
	590	/* Most of decisions we do here should have been done at compile time.
	591	The nodes of the REx which we used for the search should have been
	592	deleted from the finite automaton. */
	593
	594	/* args:
	595	* rx: the regex to match against
	596	* sv: the SV being matched: only used for utf8 flag; the string
	597	* itself is accessed via the pointers below. Note that on
	598	* something like an overloaded SV, SvPOK(sv) may be false
	599	* and the string pointers may point to something unrelated to
	600	* the SV itself.
	601	* strbeg: real beginning of string
	602	* strpos: the point in the string at which to begin matching
	603	* strend: pointer to the byte following the last char of the string
	604	* flags currently unused; set to 0
	605	* data: currently unused; set to NULL
	606	*/
	607
	608	char *
	609	Perl_re_intuit_start(pTHX_
	610	REGEXP * const rx,
	611	SV *sv,
	612	const char * const strbeg,
	613	char *strpos,
	614	char *strend,
	615	const U32 flags,
	616	re_scream_pos_data *data)
	617	{
	618	dVAR;
	619	struct regexp *const prog = ReANY(rx);
	620	SSize_t start_shift = 0;
	621	/* Should be nonnegative! */
	622	SSize_t end_shift = 0;
	623	char *s;
	624	SV *check;
	625	char *t;
	626	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	627	I32 ml_anch;
	628	char other_last = NULL; / other substr checked before this */
	629	char check_at = NULL; / check substr found at this pos */
	630	char checked_upto = NULL; / how far into the string we have already checked using find_byclass*/
	631	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	632	RXi_GET_DECL(prog,progi);
	633	regmatch_info reginfo_buf; /* create some info to pass to find_byclass */
	634	regmatch_info *const reginfo = &reginfo_buf;
	635	#ifdef DEBUGGING
	636	const char * const i_strpos = strpos;
	637	#endif
	638	GET_RE_DEBUG_FLAGS_DECL;
	639
	640	PERL_ARGS_ASSERT_RE_INTUIT_START;
	641	PERL_UNUSED_ARG(flags);
	642	PERL_UNUSED_ARG(data);
	643
	644	/* CHR_DIST() would be more correct here but it makes things slow. */
	645	if (prog->minlen > strend - strpos) {
	646	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	647	"String too short... [re_intuit_start]\n"));
	648	goto fail;
	649	}
	650
	651	reginfo->is_utf8_target = cBOOL(utf8_target);
	652	reginfo->info_aux = NULL;
	653	reginfo->strbeg = strbeg;
	654	reginfo->strend = strend;
	655	reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
	656	reginfo->intuit = 1;
	657	/* not actually used within intuit, but zero for safety anyway */
	658	reginfo->poscache_maxiter = 0;
	659
	660	if (utf8_target) {
	661	if (!prog->check_utf8 && prog->check_substr)
	662	to_utf8_substr(prog);
	663	check = prog->check_utf8;
	664	} else {
	665	if (!prog->check_substr && prog->check_utf8) {
	666	if (! to_byte_substr(prog)) {
	667	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
	668	}
	669	}
	670	check = prog->check_substr;
	671	}
	672	if (prog->extflags & RXf_ANCH) { /* Match at \G, beg-of-str or after \n */
	673	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	674	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	675	&& !multiline ) ); /* Check after \n? */
	676
	677	if (!ml_anch) {
	678	/* we are only allowed to match at BOS or \G */
	679
	680	if (prog->extflags & RXf_ANCH_GPOS) {
	681	/* in this case, we hope(!) that the caller has already
	682	* set strpos to pos()-gofs, and will already have checked
	683	* that this anchor position is legal
	684	*/
	685	;
	686	}
	687	else if (!(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	688	&& (strpos != strbeg))
	689	{
	690	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	691	goto fail;
	692	}
	693	if (prog->check_offset_min == prog->check_offset_max
	694	&& !(prog->extflags & RXf_CANY_SEEN)
	695	&& ! multiline) /* /m can cause \n's to match that aren't
	696	accounted for in the string max length.
	697	See [perl #115242] */
	698	{
	699	/* Substring at constant offset from beg-of-str... */
	700	SSize_t slen;
	701
	702	s = HOP3c(strpos, prog->check_offset_min, strend);
	703
	704	if (SvTAIL(check)) {
	705	slen = SvCUR(check); /* >= 1 */
	706
	707	if ( strend - s > slen \|\| strend - s < slen - 1
	708	\|\| (strend - s == slen && strend[-1] != '\n')) {
	709	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	710	goto fail_finish;
	711	}
	712	/* Now should match s[0..slen-2] */
	713	slen--;
	714	if (slen && (SvPVX_const(check) != s
	715	\|\| (slen > 1
	716	&& memNE(SvPVX_const(check), s, slen)))) {
	717	report_neq:
	718	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	719	goto fail_finish;
	720	}
	721	}
	722	else if (SvPVX_const(check) != s
	723	\|\| ((slen = SvCUR(check)) > 1
	724	&& memNE(SvPVX_const(check), s, slen)))
	725	goto report_neq;
	726	check_at = s;
	727	goto success_at_start;
	728	}
	729	}
	730	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	731	s = strpos;
	732	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	733	end_shift = prog->check_end_shift;
	734
	735	if (!ml_anch) {
	736	const SSize_t end = prog->check_offset_max + CHR_SVLEN(check)
	737	- (SvTAIL(check) != 0);
	738	const SSize_t eshift = CHR_DIST((U8)strend, (U8)s) - end;
	739
	740	if (end_shift < eshift)
	741	end_shift = eshift;
	742	}
	743	}
	744	else { /* Can match at random position */
	745	ml_anch = 0;
	746	s = strpos;
	747	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	748	end_shift = prog->check_end_shift;
	749
	750	/* end shift should be non negative here */
	751	}
	752
	753	#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
	754	if (end_shift < 0)
	755	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	756	(IV)end_shift, RX_PRECOMP(prog));
	757	#endif
	758
	759	restart:
	760	/* Find a possible match in the region s..strend by looking for
	761	the "check" substring in the region corrected by start/end_shift. */
	762
	763	{
	764	SSize_t srch_start_shift = start_shift;
	765	SSize_t srch_end_shift = end_shift;
	766	U8* start_point;
	767	U8* end_point;
	768	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	769	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	770	srch_start_shift = strbeg - s;
	771	}
	772	DEBUG_OPTIMISE_MORE_r({
	773	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	774	(IV)prog->check_offset_min,
	775	(IV)srch_start_shift,
	776	(IV)srch_end_shift,
	777	(IV)prog->check_end_shift);
	778	});
	779
	780	if (prog->extflags & RXf_CANY_SEEN) {
	781	start_point= (U8*)(s + srch_start_shift);
	782	end_point= (U8*)(strend - srch_end_shift);
	783	} else {
	784	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	785	end_point= HOP3(strend, -srch_end_shift, strbeg);
	786	}
	787	DEBUG_OPTIMISE_MORE_r({
	788	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	789	(int)(end_point - start_point),
	790	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	791	start_point);
	792	});
	793
	794	s = fbm_instr( start_point, end_point,
	795	check, multiline ? FBMrf_MULTILINE : 0);
	796	}
	797	/* Update the count-of-usability, remove useless subpatterns,
	798	unshift s. */
	799
	800	DEBUG_EXECUTE_r({
	801	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	802	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	803	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	804	(s ? "Found" : "Did not find"),
	805	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	806	? "anchored" : "floating"),
	807	quoted,
	808	RE_SV_TAIL(check),
	809	(s ? " at offset " : "...\n") );
	810	});
	811
	812	if (!s)
	813	goto fail_finish;
	814	/* Finish the diagnostic message */
	815	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	816
	817	/* XXX dmq: first branch is for positive lookbehind...
	818	Our check string is offset from the beginning of the pattern.
	819	So we need to do any stclass tests offset forward from that
	820	point. I think. :-(
	821	*/
	822
	823
	824
	825	check_at=s;
	826
	827
	828	/* Got a candidate. Check MBOL anchoring, and the other substr.
	829	Start with the other substr.
	830	XXXX no SCREAM optimization yet - and a very coarse implementation
	831	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	832	always match. Probably should be marked during compile...
	833	Probably it is right to do no SCREAM here...
	834	*/
	835
	836	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	837	: (prog->float_substr && prog->anchored_substr))
	838	{
	839	/* Take into account the "other" substring. */
	840	/* XXXX May be hopelessly wrong for UTF... */
	841	if (!other_last)
	842	other_last = strpos;
	843	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	844	do_other_anchored:
	845	{
	846	char * const last = HOP3c(s, -start_shift, strbeg);
	847	char last1, last2;
	848	char * const saved_s = s;
	849	SV* must;
	850
	851	t = s - prog->check_offset_max;
	852	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	853	&& (!utf8_target
	854	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	855	&& t > strpos)))
	856	NOOP;
	857	else
	858	t = strpos;
	859	t = HOP3c(t, prog->anchored_offset, strend);
	860	if (t < other_last) /* These positions already checked */
	861	t = other_last;
	862	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	863	if (last < last1)
	864	last1 = last;
	865	/* XXXX It is not documented what units *_offsets are in.
	866	We assume bytes, but this is clearly wrong.
	867	Meaning this code needs to be carefully reviewed for errors.
	868	dmq.
	869	*/
	870
	871	/* On end-of-str: see comment below. */
	872	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	873	if (must == &PL_sv_undef) {
	874	s = (char*)NULL;
	875	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	876	}
	877	else
	878	s = fbm_instr(
	879	(unsigned char*)t,
	880	HOP3(HOP3(last1, prog->anchored_offset, strend)
	881	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	882	must,
	883	multiline ? FBMrf_MULTILINE : 0
	884	);
	885	DEBUG_EXECUTE_r({
	886	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	887	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	888	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	889	(s ? "Found" : "Contradicts"),
	890	quoted, RE_SV_TAIL(must));
	891	});
	892
	893
	894	if (!s) {
	895	if (last1 >= last2) {
	896	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	897	", giving up...\n"));
	898	goto fail_finish;
	899	}
	900	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	901	", trying floating at offset %ld...\n",
	902	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	903	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	904	s = HOP3c(last, 1, strend);
	905	goto restart;
	906	}
	907	else {
	908	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	909	(long)(s - i_strpos)));
	910	t = HOP3c(s, -prog->anchored_offset, strbeg);
	911	other_last = HOP3c(s, 1, strend);
	912	s = saved_s;
	913	if (t == strpos)
	914	goto try_at_start;
	915	goto try_at_offset;
	916	}
	917	}
	918	}
	919	else { /* Take into account the floating substring. */
	920	char last, last1;
	921	char * const saved_s = s;
	922	SV* must;
	923
	924	t = HOP3c(s, -start_shift, strbeg);
	925	last1 = last =
	926	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	927	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	928	last = HOP3c(t, prog->float_max_offset, strend);
	929	s = HOP3c(t, prog->float_min_offset, strend);
	930	if (s < other_last)
	931	s = other_last;
	932	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	933	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	934	/* fbm_instr() takes into account exact value of end-of-str
	935	if the check is SvTAIL(ed). Since false positives are OK,
	936	and end-of-str is not later than strend we are OK. */
	937	if (must == &PL_sv_undef) {
	938	s = (char*)NULL;
	939	DEBUG_r(must = prog->float_utf8); /* for debug message */
	940	}
	941	else
	942	s = fbm_instr((unsigned char*)s,
	943	(unsigned char*)last + SvCUR(must)
	944	- (SvTAIL(must)!=0),
	945	must, multiline ? FBMrf_MULTILINE : 0);
	946	DEBUG_EXECUTE_r({
	947	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	948	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	949	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	950	(s ? "Found" : "Contradicts"),
	951	quoted, RE_SV_TAIL(must));
	952	});
	953	if (!s) {
	954	if (last1 == last) {
	955	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	956	", giving up...\n"));
	957	goto fail_finish;
	958	}
	959	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	960	", trying anchored starting at offset %ld...\n",
	961	(long)(saved_s + 1 - i_strpos)));
	962	other_last = last;
	963	s = HOP3c(t, 1, strend);
	964	goto restart;
	965	}
	966	else {
	967	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	968	(long)(s - i_strpos)));
	969	other_last = s; /* Fix this later. --Hugo */
	970	s = saved_s;
	971	if (t == strpos)
	972	goto try_at_start;
	973	goto try_at_offset;
	974	}
	975	}
	976	}
	977
	978
	979	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	980
	981	DEBUG_OPTIMISE_MORE_r(
	982	PerlIO_printf(Perl_debug_log,
	983	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	984	(IV)prog->check_offset_min,
	985	(IV)prog->check_offset_max,
	986	(IV)(s-strpos),
	987	(IV)(t-strpos),
	988	(IV)(t-s),
	989	(IV)(strend-strpos)
	990	)
	991	);
	992
	993	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	994	&& (!utf8_target
	995	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	996	&& t > strpos)))
	997	{
	998	/* Fixed substring is found far enough so that the match
	999	cannot start at strpos. */
	1000	try_at_offset:
	1001	if (ml_anch && t[-1] != '\n') {
	1002	/* Eventually fbm_*() should handle this, but often
	1003	anchored_offset is not 0, so this check will not be wasted. */
	1004	/* XXXX In the code below we prefer to look for "^" even in
	1005	presence of anchored substrings. And we search even
	1006	beyond the found float position. These pessimizations
	1007	are historical artefacts only. */
	1008	find_anchor:
	1009	while (t < strend - prog->minlen) {
	1010	if (*t == '\n') {
	1011	if (t < check_at - prog->check_offset_min) {
	1012	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	1013	/* Since we moved from the found position,
	1014	we definitely contradict the found anchored
	1015	substr. Due to the above check we do not
	1016	contradict "check" substr.
	1017	Thus we can arrive here only if check substr
	1018	is float. Redo checking for "other"=="fixed".
	1019	*/
	1020	strpos = t + 1;
	1021	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	1022	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	1023	goto do_other_anchored;
	1024	}
	1025	/* We don't contradict the found floating substring. */
	1026	/* XXXX Why not check for STCLASS? */
	1027	s = t + 1;
	1028	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	1029	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	1030	goto set_useful;
	1031	}
	1032	/* Position contradicts check-string */
	1033	/* XXXX probably better to look for check-string
	1034	than for "\n", so one should lower the limit for t? */
	1035	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	1036	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	1037	other_last = strpos = s = t + 1;
	1038	goto restart;
	1039	}
	1040	t++;
	1041	}
	1042	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	1043	PL_colors[0], PL_colors[1]));
	1044	goto fail_finish;
	1045	}
	1046	else {
	1047	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1048	PL_colors[0], PL_colors[1]));
	1049	}
	1050	s = t;
	1051	set_useful:
	1052	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1053	}
	1054	else {
	1055	/* The found string does not prohibit matching at strpos,
	1056	- no optimization of calling REx engine can be performed,
	1057	unless it was an MBOL and we are not after MBOL,
	1058	or a future STCLASS check will fail this. */
	1059	try_at_start:
	1060	/* Even in this situation we may use MBOL flag if strpos is offset
	1061	wrt the start of the string. */
	1062	if (ml_anch && (strpos != strbeg) && strpos[-1] != '\n'
	1063	/* May be due to an implicit anchor of m{.foo} /
	1064	&& !(prog->intflags & PREGf_IMPLICIT))
	1065	{
	1066	t = strpos;
	1067	goto find_anchor;
	1068	}
	1069	DEBUG_EXECUTE_r( if (ml_anch)
	1070	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1071	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1072	);
	1073	success_at_start:
	1074	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1075	&& (utf8_target ? (
	1076	prog->check_utf8 /* Could be deleted already */
	1077	&& --BmUSEFUL(prog->check_utf8) < 0
	1078	&& (prog->check_utf8 == prog->float_utf8)
	1079	) : (
	1080	prog->check_substr /* Could be deleted already */
	1081	&& --BmUSEFUL(prog->check_substr) < 0
	1082	&& (prog->check_substr == prog->float_substr)
	1083	)))
	1084	{
	1085	/* If flags & SOMETHING - do not do it many times on the same match */
	1086	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1087	/* XXX Does the destruction order has to change with utf8_target? */
	1088	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1089	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1090	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1091	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1092	check = NULL; /* abort */
	1093	s = strpos;
	1094	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1095	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1096	if (prog->intflags & PREGf_IMPLICIT)
	1097	prog->extflags &= ~RXf_ANCH_MBOL;
	1098	/* XXXX This is a remnant of the old implementation. It
	1099	looks wasteful, since now INTUIT can use many
	1100	other heuristics. */
	1101	prog->extflags &= ~RXf_USE_INTUIT;
	1102	/* XXXX What other flags might need to be cleared in this branch? */
	1103	}
	1104	else
	1105	s = strpos;
	1106	}
	1107
	1108	/* Last resort... */
	1109	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1110	/* trie stclasses are too expensive to use here, we are better off to
	1111	leave it to regmatch itself */
	1112	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1113	/* minlen == 0 is possible if regstclass is \b or \B,
	1114	and the fixed substr is ''$.
	1115	Since minlen is already taken into account, s+1 is before strend;
	1116	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1117	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1118	regstclass does not come from lookahead... */
	1119	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1120	This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
	1121	const U8* const str = (U8*)STRING(progi->regstclass);
	1122	/* XXX this value could be pre-computed */
	1123	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1124	? (reginfo->is_utf8_pat
	1125	? utf8_distance(str + STR_LEN(progi->regstclass), str)
	1126	: STR_LEN(progi->regstclass))
	1127	: 1);
	1128	char * endpos;
	1129	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1130	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1131	else if (prog->float_substr \|\| prog->float_utf8)
	1132	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1133	else
	1134	endpos= strend;
	1135
	1136	if (checked_upto < s)
	1137	checked_upto = s;
	1138	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1139	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1140
	1141	t = s;
	1142	s = find_byclass(prog, progi->regstclass, checked_upto, endpos,
	1143	reginfo);
	1144	if (s) {
	1145	checked_upto = s;
	1146	} else {
	1147	#ifdef DEBUGGING
	1148	const char *what = NULL;
	1149	#endif
	1150	if (endpos == strend) {
	1151	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1152	"Could not match STCLASS...\n") );
	1153	goto fail;
	1154	}
	1155	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1156	"This position contradicts STCLASS...\n") );
	1157	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1158	goto fail;
	1159	checked_upto = HOPBACKc(endpos, start_shift);
	1160	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
	1161	(IV)start_shift, (IV)(check_at - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
	1162	/* Contradict one of substrings */
	1163	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1164	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1165	DEBUG_EXECUTE_r( what = "anchored" );
	1166	hop_and_restart:
	1167	s = HOP3c(t, 1, strend);
	1168	if (s + start_shift + end_shift > strend) {
	1169	/* XXXX Should be taken into account earlier? */
	1170	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1171	"Could not match STCLASS...\n") );
	1172	goto fail;
	1173	}
	1174	if (!check)
	1175	goto giveup;
	1176	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1177	"Looking for %s substr starting at offset %ld...\n",
	1178	what, (long)(s + start_shift - i_strpos)) );
	1179	goto restart;
	1180	}
	1181	/* Have both, check_string is floating */
	1182	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1183	goto retry_floating_check;
	1184	/* Recheck anchored substring, but not floating... */
	1185	s = check_at;
	1186	if (!check)
	1187	goto giveup;
	1188	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1189	"Looking for anchored substr starting at offset %ld...\n",
	1190	(long)(other_last - i_strpos)) );
	1191	goto do_other_anchored;
	1192	}
	1193	/* Another way we could have checked stclass at the
	1194	current position only: */
	1195	if (ml_anch) {
	1196	s = t = t + 1;
	1197	if (!check)
	1198	goto giveup;
	1199	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1200	"Looking for /%s^%s/m starting at offset %ld...\n",
	1201	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1202	goto try_at_offset;
	1203	}
	1204	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1205	goto fail;
	1206	/* Check is floating substring. */
	1207	retry_floating_check:
	1208	t = check_at - start_shift;
	1209	DEBUG_EXECUTE_r( what = "floating" );
	1210	goto hop_and_restart;
	1211	}
	1212	if (t != s) {
	1213	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1214	"By STCLASS: moving %ld --> %ld\n",
	1215	(long)(t - i_strpos), (long)(s - i_strpos))
	1216	);
	1217	}
	1218	else {
	1219	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1220	"Does not contradict STCLASS...\n");
	1221	);
	1222	}
	1223	}
	1224	giveup:
	1225	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1226	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1227	PL_colors[5], (long)(s - i_strpos)) );
	1228	return s;
	1229
	1230	fail_finish: /* Substring not found */
	1231	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1232	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1233	fail:
	1234	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1235	PL_colors[4], PL_colors[5]));
	1236	return NULL;
	1237	}
	1238
	1239	#define DECL_TRIE_TYPE(scan) \
	1240	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \
	1241	trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold } \
	1242	trie_type = ((scan->flags == EXACT) \
	1243	? (utf8_target ? trie_utf8 : trie_plain) \
	1244	: (scan->flags == EXACTFA) \
	1245	? (utf8_target ? trie_utf8_exactfa_fold : trie_latin_utf8_exactfa_fold) \
	1246	: (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
	1247
	1248	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
	1249	STMT_START { \
	1250	STRLEN skiplen; \
	1251	U8 flags = FOLD_FLAGS_FULL; \
	1252	switch (trie_type) { \
	1253	case trie_utf8_exactfa_fold: \
	1254	flags \|= FOLD_FLAGS_NOMIX_ASCII; \
	1255	/* FALL THROUGH */ \
	1256	case trie_utf8_fold: \
	1257	if ( foldlen>0 ) { \
	1258	uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1259	foldlen -= len; \
	1260	uscan += len; \
	1261	len=0; \
	1262	} else { \
	1263	uvc = _to_utf8_fold_flags( (const U8*) uc, foldbuf, &foldlen, flags, NULL); \
	1264	len = UTF8SKIP(uc); \
	1265	skiplen = UNISKIP( uvc ); \
	1266	foldlen -= skiplen; \
	1267	uscan = foldbuf + skiplen; \
	1268	} \
	1269	break; \
	1270	case trie_latin_utf8_exactfa_fold: \
	1271	flags \|= FOLD_FLAGS_NOMIX_ASCII; \
	1272	/* FALL THROUGH */ \
	1273	case trie_latin_utf8_fold: \
	1274	if ( foldlen>0 ) { \
	1275	uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1276	foldlen -= len; \
	1277	uscan += len; \
	1278	len=0; \
	1279	} else { \
	1280	len = 1; \
	1281	uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, flags); \
	1282	skiplen = UNISKIP( uvc ); \
	1283	foldlen -= skiplen; \
	1284	uscan = foldbuf + skiplen; \
	1285	} \
	1286	break; \
	1287	case trie_utf8: \
	1288	uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
	1289	break; \
	1290	case trie_plain: \
	1291	uvc = (UV)*uc; \
	1292	len = 1; \
	1293	} \
	1294	if (uvc < 256) { \
	1295	charid = trie->charmap[ uvc ]; \
	1296	} \
	1297	else { \
	1298	charid = 0; \
	1299	if (widecharmap) { \
	1300	SV** const svpp = hv_fetch(widecharmap, \
	1301	(char*)&uvc, sizeof(UV), 0); \
	1302	if (svpp) \
	1303	charid = (U16)SvIV(*svpp); \
	1304	} \
	1305	} \
	1306	} STMT_END
	1307
	1308	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1309	STMT_START { \
	1310	while (s <= e) { \
	1311	if ( (CoNd) \
	1312	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1313	&& (reginfo->intuit \|\| regtry(reginfo, &s)) )\
	1314	goto got_it; \
	1315	s++; \
	1316	} \
	1317	} STMT_END
	1318
	1319	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1320	STMT_START { \
	1321	while (s < strend) { \
	1322	CoDe \
	1323	s += UTF8SKIP(s); \
	1324	} \
	1325	} STMT_END
	1326
	1327	#define REXEC_FBC_SCAN(CoDe) \
	1328	STMT_START { \
	1329	while (s < strend) { \
	1330	CoDe \
	1331	s++; \
	1332	} \
	1333	} STMT_END
	1334
	1335	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1336	REXEC_FBC_UTF8_SCAN( \
	1337	if (CoNd) { \
	1338	if (tmp && (reginfo->intuit \|\| regtry(reginfo, &s))) \
	1339	goto got_it; \
	1340	else \
	1341	tmp = doevery; \
	1342	} \
	1343	else \
	1344	tmp = 1; \
	1345	)
	1346
	1347	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1348	REXEC_FBC_SCAN( \
	1349	if (CoNd) { \
	1350	if (tmp && (reginfo->intuit \|\| regtry(reginfo, &s))) \
	1351	goto got_it; \
	1352	else \
	1353	tmp = doevery; \
	1354	} \
	1355	else \
	1356	tmp = 1; \
	1357	)
	1358
	1359	#define REXEC_FBC_TRYIT \
	1360	if ((reginfo->intuit \|\| regtry(reginfo, &s))) \
	1361	goto got_it
	1362
	1363	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1364	if (utf8_target) { \
	1365	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1366	} \
	1367	else { \
	1368	REXEC_FBC_CLASS_SCAN(CoNd); \
	1369	}
	1370
	1371	#define DUMP_EXEC_POS(li,s,doutf8) \
	1372	dump_exec_pos(li,s,(reginfo->strend),(reginfo->strbeg), \
	1373	startpos, doutf8)
	1374
	1375
	1376	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1377	tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) : '\n'; \
	1378	tmp = TEST_NON_UTF8(tmp); \
	1379	REXEC_FBC_UTF8_SCAN( \
	1380	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1381	tmp = !tmp; \
	1382	IF_SUCCESS; \
	1383	} \
	1384	else { \
	1385	IF_FAIL; \
	1386	} \
	1387	); \
	1388
	1389	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1390	if (s == reginfo->strbeg) { \
	1391	tmp = '\n'; \
	1392	} \
	1393	else { \
	1394	U8 * const r = reghop3((U8)s, -1, (U8)reginfo->strbeg); \
	1395	tmp = utf8n_to_uvchr(r, (U8*) reginfo->strend - r, \
	1396	0, UTF8_ALLOW_DEFAULT); \
	1397	} \
	1398	tmp = TeSt1_UtF8; \
	1399	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1400	REXEC_FBC_UTF8_SCAN( \
	1401	if (tmp == ! (TeSt2_UtF8)) { \
	1402	tmp = !tmp; \
	1403	IF_SUCCESS; \
	1404	} \
	1405	else { \
	1406	IF_FAIL; \
	1407	} \
	1408	); \
	1409
	1410	/* The only difference between the BOUND and NBOUND cases is that
	1411	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1412	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1413	* with the other one being empty */
	1414	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1415	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1416
	1417	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1418	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1419
	1420	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1421	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1422
	1423	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1424	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1425
	1426
	1427	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1428	* be passed in completely with the variable name being tested, which isn't
	1429	* such a clean interface, but this is easier to read than it was before. We
	1430	* are looking for the boundary (or non-boundary between a word and non-word
	1431	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1432	* must be different. Find the "wordness" of the character just prior to this
	1433	* one, and compare it with the wordness of this one. If they differ, we have
	1434	* a boundary. At the beginning of the string, pretend that the previous
	1435	* character was a new-line */
	1436	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1437	if (utf8_target) { \
	1438	UTF8_CODE \
	1439	} \
	1440	else { /* Not utf8 */ \
	1441	tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) : '\n'; \
	1442	tmp = TEST_NON_UTF8(tmp); \
	1443	REXEC_FBC_SCAN( \
	1444	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1445	tmp = !tmp; \
	1446	IF_SUCCESS; \
	1447	} \
	1448	else { \
	1449	IF_FAIL; \
	1450	} \
	1451	); \
	1452	} \
	1453	if ((!prog->minlen && tmp) && (reginfo->intuit \|\| regtry(reginfo, &s))) \
	1454	goto got_it;
	1455
	1456	/* We know what class REx starts with. Try to find this position... */
	1457	/* if reginfo->intuit, its a dryrun */
	1458	/* annoyingly all the vars in this routine have different names from their counterparts
	1459	in regmatch. /grrr */
	1460
	1461	STATIC char *
	1462	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1463	const char strend, regmatch_info reginfo)
	1464	{
	1465	dVAR;
	1466	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1467	char pat_string; / The pattern's exactish string */
	1468	char pat_end; / ptr to end char of pat_string */
	1469	re_fold_t folder; /* Function for computing non-utf8 folds */
	1470	const U8 fold_array; / array for folding ords < 256 */
	1471	STRLEN ln;
	1472	STRLEN lnc;
	1473	U8 c1;
	1474	U8 c2;
	1475	char *e;
	1476	I32 tmp = 1; /* Scratch variable? */
	1477	const bool utf8_target = reginfo->is_utf8_target;
	1478	UV utf8_fold_flags = 0;
	1479	const bool is_utf8_pat = reginfo->is_utf8_pat;
	1480	bool to_complement = FALSE; /* Invert the result? Taking the xor of this
	1481	with a result inverts that result, as 0^1 =
	1482	1 and 1^1 = 0 */
	1483	_char_class_number classnum;
	1484
	1485	RXi_GET_DECL(prog,progi);
	1486
	1487	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1488
	1489	/* We know what class it must start with. */
	1490	switch (OP(c)) {
	1491	case ANYOF:
	1492	case ANYOF_SYNTHETIC:
	1493	if (utf8_target) {
	1494	REXEC_FBC_UTF8_CLASS_SCAN(
	1495	reginclass(prog, c, (U8)s, (U8) strend, utf8_target));
	1496	}
	1497	else {
	1498	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
	1499	}
	1500	break;
	1501	case CANY:
	1502	REXEC_FBC_SCAN(
	1503	if (tmp && (reginfo->intuit \|\| regtry(reginfo, &s)))
	1504	goto got_it;
	1505	else
	1506	tmp = doevery;
	1507	);
	1508	break;
	1509
	1510	case EXACTFA_NO_TRIE: /* This node only generated for non-utf8 patterns */
	1511	assert(! is_utf8_pat);
	1512	/* FALL THROUGH */
	1513	case EXACTFA:
	1514	if (is_utf8_pat \|\| utf8_target) {
	1515	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1516	goto do_exactf_utf8;
	1517	}
	1518	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1519	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1520	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1521
	1522	case EXACTF: /* This node only generated for non-utf8 patterns */
	1523	assert(! is_utf8_pat);
	1524	if (utf8_target) {
	1525	utf8_fold_flags = 0;
	1526	goto do_exactf_utf8;
	1527	}
	1528	fold_array = PL_fold;
	1529	folder = foldEQ;
	1530	goto do_exactf_non_utf8;
	1531
	1532	case EXACTFL:
	1533	if (is_utf8_pat \|\| utf8_target) {
	1534	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	1535	goto do_exactf_utf8;
	1536	}
	1537	fold_array = PL_fold_locale;
	1538	folder = foldEQ_locale;
	1539	goto do_exactf_non_utf8;
	1540
	1541	case EXACTFU_SS:
	1542	if (is_utf8_pat) {
	1543	utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
	1544	}
	1545	goto do_exactf_utf8;
	1546
	1547	case EXACTFU:
	1548	if (is_utf8_pat \|\| utf8_target) {
	1549	utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1550	goto do_exactf_utf8;
	1551	}
	1552
	1553	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1554	* so we don't have to worry here about this single special case
	1555	* in the Latin1 range */
	1556	fold_array = PL_fold_latin1;
	1557	folder = foldEQ_latin1;
	1558
	1559	/* FALL THROUGH */
	1560
	1561	do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
	1562	are no glitches with fold-length differences
	1563	between the target string and pattern */
	1564
	1565	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1566	* first character of the EXACTF* node and then, if necessary,
	1567	* case-insensitively compare the full text of the node. c1 is the
	1568	* first character. c2 is its fold. This logic will not work for
	1569	* Unicode semantics and the german sharp ss, which hence should
	1570	* not be compiled into a node that gets here. */
	1571	pat_string = STRING(c);
	1572	ln = STR_LEN(c); /* length to match in octets/bytes */
	1573
	1574	/* We know that we have to match at least 'ln' bytes (which is the
	1575	* same as characters, since not utf8). If we have to match 3
	1576	* characters, and there are only 2 availabe, we know without
	1577	* trying that it will fail; so don't start a match past the
	1578	* required minimum number from the far end */
	1579	e = HOP3c(strend, -((SSize_t)ln), s);
	1580
	1581	if (reginfo->intuit && e < s) {
	1582	e = s; /* Due to minlen logic of intuit() */
	1583	}
	1584
	1585	c1 = *pat_string;
	1586	c2 = fold_array[c1];
	1587	if (c1 == c2) { /* If char and fold are the same */
	1588	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1589	}
	1590	else {
	1591	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1592	}
	1593	break;
	1594
	1595	do_exactf_utf8:
	1596	{
	1597	unsigned expansion;
	1598
	1599	/* If one of the operands is in utf8, we can't use the simpler folding
	1600	* above, due to the fact that many different characters can have the
	1601	* same fold, or portion of a fold, or different- length fold */
	1602	pat_string = STRING(c);
	1603	ln = STR_LEN(c); /* length to match in octets/bytes */
	1604	pat_end = pat_string + ln;
	1605	lnc = is_utf8_pat /* length to match in characters */
	1606	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1607	: ln;
	1608
	1609	/* We have 'lnc' characters to match in the pattern, but because of
	1610	* multi-character folding, each character in the target can match
	1611	* up to 3 characters (Unicode guarantees it will never exceed
	1612	* this) if it is utf8-encoded; and up to 2 if not (based on the
	1613	* fact that the Latin 1 folds are already determined, and the
	1614	* only multi-char fold in that range is the sharp-s folding to
	1615	* 'ss'. Thus, a pattern character can match as little as 1/3 of a
	1616	* string character. Adjust lnc accordingly, rounding up, so that
	1617	* if we need to match at least 4+1/3 chars, that really is 5. */
	1618	expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
	1619	lnc = (lnc + expansion - 1) / expansion;
	1620
	1621	/* As in the non-UTF8 case, if we have to match 3 characters, and
	1622	* only 2 are left, it's guaranteed to fail, so don't start a
	1623	* match that would require us to go beyond the end of the string
	1624	*/
	1625	e = HOP3c(strend, -((SSize_t)lnc), s);
	1626
	1627	if (reginfo->intuit && e < s) {
	1628	e = s; /* Due to minlen logic of intuit() */
	1629	}
	1630
	1631	/* XXX Note that we could recalculate e to stop the loop earlier,
	1632	* as the worst case expansion above will rarely be met, and as we
	1633	* go along we would usually find that e moves further to the left.
	1634	* This would happen only after we reached the point in the loop
	1635	* where if there were no expansion we should fail. Unclear if
	1636	* worth the expense */
	1637
	1638	while (s <= e) {
	1639	char my_strend= (char )strend;
	1640	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	1641	pat_string, NULL, ln, is_utf8_pat, utf8_fold_flags)
	1642	&& (reginfo->intuit \|\| regtry(reginfo, &s)) )
	1643	{
	1644	goto got_it;
	1645	}
	1646	s += (utf8_target) ? UTF8SKIP(s) : 1;
	1647	}
	1648	break;
	1649	}
	1650	case BOUNDL:
	1651	RXp_MATCH_TAINTED_on(prog);
	1652	FBC_BOUND(isWORDCHAR_LC,
	1653	isWORDCHAR_LC_uvchr(tmp),
	1654	isWORDCHAR_LC_utf8((U8*)s));
	1655	break;
	1656	case NBOUNDL:
	1657	RXp_MATCH_TAINTED_on(prog);
	1658	FBC_NBOUND(isWORDCHAR_LC,
	1659	isWORDCHAR_LC_uvchr(tmp),
	1660	isWORDCHAR_LC_utf8((U8*)s));
	1661	break;
	1662	case BOUND:
	1663	FBC_BOUND(isWORDCHAR,
	1664	isWORDCHAR_uni(tmp),
	1665	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1666	break;
	1667	case BOUNDA:
	1668	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1669	isWORDCHAR_A(tmp),
	1670	isWORDCHAR_A((U8*)s));
	1671	break;
	1672	case NBOUND:
	1673	FBC_NBOUND(isWORDCHAR,
	1674	isWORDCHAR_uni(tmp),
	1675	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1676	break;
	1677	case NBOUNDA:
	1678	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1679	isWORDCHAR_A(tmp),
	1680	isWORDCHAR_A((U8*)s));
	1681	break;
	1682	case BOUNDU:
	1683	FBC_BOUND(isWORDCHAR_L1,
	1684	isWORDCHAR_uni(tmp),
	1685	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1686	break;
	1687	case NBOUNDU:
	1688	FBC_NBOUND(isWORDCHAR_L1,
	1689	isWORDCHAR_uni(tmp),
	1690	cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
	1691	break;
	1692	case LNBREAK:
	1693	REXEC_FBC_CSCAN(is_LNBREAK_utf8_safe(s, strend),
	1694	is_LNBREAK_latin1_safe(s, strend)
	1695	);
	1696	break;
	1697
	1698	/* The argument to all the POSIX node types is the class number to pass to
	1699	* _generic_isCC() to build a mask for searching in PL_charclass[] */
	1700
	1701	case NPOSIXL:
	1702	to_complement = 1;
	1703	/* FALLTHROUGH */
	1704
	1705	case POSIXL:
	1706	RXp_MATCH_TAINTED_on(prog);
	1707	REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
	1708	to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
	1709	break;
	1710
	1711	case NPOSIXD:
	1712	to_complement = 1;
	1713	/* FALLTHROUGH */
	1714
	1715	case POSIXD:
	1716	if (utf8_target) {
	1717	goto posix_utf8;
	1718	}
	1719	goto posixa;
	1720
	1721	case NPOSIXA:
	1722	if (utf8_target) {
	1723	/* The complement of something that matches only ASCII matches all
	1724	* UTF-8 variant code points, plus everything in ASCII that isn't
	1725	* in the class */
	1726	REXEC_FBC_UTF8_CLASS_SCAN(! UTF8_IS_INVARIANT(*s)
	1727	\|\| ! _generic_isCC_A(*s, FLAGS(c)));
	1728	break;
	1729	}
	1730
	1731	to_complement = 1;
	1732	/* FALLTHROUGH */
	1733
	1734	case POSIXA:
	1735	posixa:
	1736	/* Don't need to worry about utf8, as it can match only a single
	1737	* byte invariant character. */
	1738	REXEC_FBC_CLASS_SCAN(
	1739	to_complement ^ cBOOL(_generic_isCC_A(*s, FLAGS(c))));
	1740	break;
	1741
	1742	case NPOSIXU:
	1743	to_complement = 1;
	1744	/* FALLTHROUGH */
	1745
	1746	case POSIXU:
	1747	if (! utf8_target) {
	1748	REXEC_FBC_CLASS_SCAN(to_complement ^ cBOOL(_generic_isCC(*s,
	1749	FLAGS(c))));
	1750	}
	1751	else {
	1752
	1753	posix_utf8:
	1754	classnum = (_char_class_number) FLAGS(c);
	1755	if (classnum < _FIRST_NON_SWASH_CC) {
	1756	while (s < strend) {
	1757
	1758	/* We avoid loading in the swash as long as possible, but
	1759	* should we have to, we jump to a separate loop. This
	1760	* extra 'if' statement is what keeps this code from being
	1761	* just a call to REXEC_FBC_UTF8_CLASS_SCAN() */
	1762	if (UTF8_IS_ABOVE_LATIN1(*s)) {
	1763	goto found_above_latin1;
	1764	}
	1765	if ((UTF8_IS_INVARIANT(*s)
	1766	&& to_complement ^ cBOOL(_generic_isCC((U8) *s,
	1767	classnum)))
	1768	\|\| (UTF8_IS_DOWNGRADEABLE_START(*s)
	1769	&& to_complement ^ cBOOL(
	1770	_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*s,
	1771	*(s + 1)),
	1772	classnum))))
	1773	{
	1774	if (tmp && (reginfo->intuit \|\| regtry(reginfo, &s)))
	1775	goto got_it;
	1776	else {
	1777	tmp = doevery;
	1778	}
	1779	}
	1780	else {
	1781	tmp = 1;
	1782	}
	1783	s += UTF8SKIP(s);
	1784	}
	1785	}
	1786	else switch (classnum) { /* These classes are implemented as
	1787	macros */
	1788	case _CC_ENUM_SPACE: /* XXX would require separate code if we
	1789	revert the change of \v matching this */
	1790	/* FALL THROUGH */
	1791
	1792	case _CC_ENUM_PSXSPC:
	1793	REXEC_FBC_UTF8_CLASS_SCAN(
	1794	to_complement ^ cBOOL(isSPACE_utf8(s)));
	1795	break;
	1796
	1797	case _CC_ENUM_BLANK:
	1798	REXEC_FBC_UTF8_CLASS_SCAN(
	1799	to_complement ^ cBOOL(isBLANK_utf8(s)));
	1800	break;
	1801
	1802	case _CC_ENUM_XDIGIT:
	1803	REXEC_FBC_UTF8_CLASS_SCAN(
	1804	to_complement ^ cBOOL(isXDIGIT_utf8(s)));
	1805	break;
	1806
	1807	case _CC_ENUM_VERTSPACE:
	1808	REXEC_FBC_UTF8_CLASS_SCAN(
	1809	to_complement ^ cBOOL(isVERTWS_utf8(s)));
	1810	break;
	1811
	1812	case _CC_ENUM_CNTRL:
	1813	REXEC_FBC_UTF8_CLASS_SCAN(
	1814	to_complement ^ cBOOL(isCNTRL_utf8(s)));
	1815	break;
	1816
	1817	default:
	1818	Perl_croak(aTHX_ "panic: find_byclass() node %d='%s' has an unexpected character class '%d'", OP(c), PL_reg_name[OP(c)], classnum);
	1819	assert(0); /* NOTREACHED */
	1820	}
	1821	}
	1822	break;
	1823
	1824	found_above_latin1: /* Here we have to load a swash to get the result
	1825	for the current code point */
	1826	if (! PL_utf8_swash_ptrs[classnum]) {
	1827	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	1828	PL_utf8_swash_ptrs[classnum] =
	1829	_core_swash_init("utf8", swash_property_names[classnum],
	1830	&PL_sv_undef, 1, 0, NULL, &flags);
	1831	}
	1832
	1833	/* This is a copy of the loop above for swash classes, though using the
	1834	* FBC macro instead of being expanded out. Since we've loaded the
	1835	* swash, we don't have to check for that each time through the loop */
	1836	REXEC_FBC_UTF8_CLASS_SCAN(
	1837	to_complement ^ cBOOL(_generic_utf8(
	1838	classnum,
	1839	s,
	1840	swash_fetch(PL_utf8_swash_ptrs[classnum],
	1841	(U8 *) s, TRUE))));
	1842	break;
	1843
	1844	case AHOCORASICKC:
	1845	case AHOCORASICK:
	1846	{
	1847	DECL_TRIE_TYPE(c);
	1848	/* what trie are we using right now */
	1849	reg_ac_data aho = (reg_ac_data)progi->data->data[ ARG( c ) ];
	1850	reg_trie_data trie = (reg_trie_data)progi->data->data[ aho->trie ];
	1851	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1852
	1853	const char *last_start = strend - trie->minlen;
	1854	#ifdef DEBUGGING
	1855	const char *real_start = s;
	1856	#endif
	1857	STRLEN maxlen = trie->maxlen;
	1858	SV *sv_points;
	1859	U8 *points; / map of where we were in the input string
	1860	when reading a given char. For ASCII this
	1861	is unnecessary overhead as the relationship
	1862	is always 1:1, but for Unicode, especially
	1863	case folded Unicode this is not true. */
	1864	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1865	U8 *bitmap=NULL;
	1866
	1867
	1868	GET_RE_DEBUG_FLAGS_DECL;
	1869
	1870	/* We can't just allocate points here. We need to wrap it in
	1871	* an SV so it gets freed properly if there is a croak while
	1872	* running the match */
	1873	ENTER;
	1874	SAVETMPS;
	1875	sv_points=newSV(maxlen * sizeof(U8 *));
	1876	SvCUR_set(sv_points,
	1877	maxlen * sizeof(U8 *));
	1878	SvPOK_on(sv_points);
	1879	sv_2mortal(sv_points);
	1880	points=(U8**)SvPV_nolen(sv_points );
	1881	if ( trie_type != trie_utf8_fold
	1882	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1883	{
	1884	if (trie->bitmap)
	1885	bitmap=(U8*)trie->bitmap;
	1886	else
	1887	bitmap=(U8*)ANYOF_BITMAP(c);
	1888	}
	1889	/* this is the Aho-Corasick algorithm modified a touch
	1890	to include special handling for long "unknown char" sequences.
	1891	The basic idea being that we use AC as long as we are dealing
	1892	with a possible matching char, when we encounter an unknown char
	1893	(and we have not encountered an accepting state) we scan forward
	1894	until we find a legal starting char.
	1895	AC matching is basically that of trie matching, except that when
	1896	we encounter a failing transition, we fall back to the current
	1897	states "fail state", and try the current char again, a process
	1898	we repeat until we reach the root state, state 1, or a legal
	1899	transition. If we fail on the root state then we can either
	1900	terminate if we have reached an accepting state previously, or
	1901	restart the entire process from the beginning if we have not.
	1902
	1903	*/
	1904	while (s <= last_start) {
	1905	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1906	U8 uc = (U8)s;
	1907	U16 charid = 0;
	1908	U32 base = 1;
	1909	U32 state = 1;
	1910	UV uvc = 0;
	1911	STRLEN len = 0;
	1912	STRLEN foldlen = 0;
	1913	U8 uscan = (U8)NULL;
	1914	U8 *leftmost = NULL;
	1915	#ifdef DEBUGGING
	1916	U32 accepted_word= 0;
	1917	#endif
	1918	U32 pointpos = 0;
	1919
	1920	while ( state && uc <= (U8*)strend ) {
	1921	int failed=0;
	1922	U32 word = aho->states[ state ].wordnum;
	1923
	1924	if( state==1 ) {
	1925	if ( bitmap ) {
	1926	DEBUG_TRIE_EXECUTE_r(
	1927	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1928	dump_exec_pos( (char *)uc, c, strend, real_start,
	1929	(char *)uc, utf8_target );
	1930	PerlIO_printf( Perl_debug_log,
	1931	" Scanning for legal start char...\n");
	1932	}
	1933	);
	1934	if (utf8_target) {
	1935	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1936	uc += UTF8SKIP(uc);
	1937	}
	1938	} else {
	1939	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1940	uc++;
	1941	}
	1942	}
	1943	s= (char *)uc;
	1944	}
	1945	if (uc >(U8*)last_start) break;
	1946	}
	1947
	1948	if ( word ) {
	1949	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1950	if (!leftmost \|\| lpos < leftmost) {
	1951	DEBUG_r(accepted_word=word);
	1952	leftmost= lpos;
	1953	}
	1954	if (base==0) break;
	1955
	1956	}
	1957	points[pointpos++ % maxlen]= uc;
	1958	if (foldlen \|\| uc < (U8*)strend) {
	1959	REXEC_TRIE_READ_CHAR(trie_type, trie,
	1960	widecharmap, uc,
	1961	uscan, len, uvc, charid, foldlen,
	1962	foldbuf, uniflags);
	1963	DEBUG_TRIE_EXECUTE_r({
	1964	dump_exec_pos( (char *)uc, c, strend,
	1965	real_start, s, utf8_target);
	1966	PerlIO_printf(Perl_debug_log,
	1967	" Charid:%3u CP:%4"UVxf" ",
	1968	charid, uvc);
	1969	});
	1970	}
	1971	else {
	1972	len = 0;
	1973	charid = 0;
	1974	}
	1975
	1976
	1977	do {
	1978	#ifdef DEBUGGING
	1979	word = aho->states[ state ].wordnum;
	1980	#endif
	1981	base = aho->states[ state ].trans.base;
	1982
	1983	DEBUG_TRIE_EXECUTE_r({
	1984	if (failed)
	1985	dump_exec_pos( (char *)uc, c, strend, real_start,
	1986	s, utf8_target );
	1987	PerlIO_printf( Perl_debug_log,
	1988	"%sState: %4"UVxf", word=%"UVxf,
	1989	failed ? " Fail transition to " : "",
	1990	(UV)state, (UV)word);
	1991	});
	1992	if ( base ) {
	1993	U32 tmp;
	1994	I32 offset;
	1995	if (charid &&
	1996	( ((offset = base + charid
	1997	- 1 - trie->uniquecharcount)) >= 0)
	1998	&& ((U32)offset < trie->lasttrans)
	1999	&& trie->trans[offset].check == state
	2000	&& (tmp=trie->trans[offset].next))
	2001	{
	2002	DEBUG_TRIE_EXECUTE_r(
	2003	PerlIO_printf( Perl_debug_log," - legal\n"));
	2004	state = tmp;
	2005	break;
	2006	}
	2007	else {
	2008	DEBUG_TRIE_EXECUTE_r(
	2009	PerlIO_printf( Perl_debug_log," - fail\n"));
	2010	failed = 1;
	2011	state = aho->fail[state];
	2012	}
	2013	}
	2014	else {
	2015	/* we must be accepting here */
	2016	DEBUG_TRIE_EXECUTE_r(
	2017	PerlIO_printf( Perl_debug_log," - accepting\n"));
	2018	failed = 1;
	2019	break;
	2020	}
	2021	} while(state);
	2022	uc += len;
	2023	if (failed) {
	2024	if (leftmost)
	2025	break;
	2026	if (!state) state = 1;
	2027	}
	2028	}
	2029	if ( aho->states[ state ].wordnum ) {
	2030	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	2031	if (!leftmost \|\| lpos < leftmost) {
	2032	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	2033	leftmost = lpos;
	2034	}
	2035	}
	2036	if (leftmost) {
	2037	s = (char*)leftmost;
	2038	DEBUG_TRIE_EXECUTE_r({
	2039	PerlIO_printf(
	2040	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	2041	(UV)accepted_word, (IV)(s - real_start)
	2042	);
	2043	});
	2044	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2045	FREETMPS;
	2046	LEAVE;
	2047	goto got_it;
	2048	}
	2049	s = HOPc(s,1);
	2050	DEBUG_TRIE_EXECUTE_r({
	2051	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	2052	});
	2053	} else {
	2054	DEBUG_TRIE_EXECUTE_r(
	2055	PerlIO_printf( Perl_debug_log,"No match.\n"));
	2056	break;
	2057	}
	2058	}
	2059	FREETMPS;
	2060	LEAVE;
	2061	}
	2062	break;
	2063	default:
	2064	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2065	break;
	2066	}
	2067	return 0;
	2068	got_it:
	2069	return s;
	2070	}
	2071
	2072	/* set RX_SAVED_COPY, RX_SUBBEG etc.
	2073	* flags have same meanings as with regexec_flags() */
	2074
	2075	static void
	2076	S_reg_set_capture_string(pTHX_ REGEXP * const rx,
	2077	char *strbeg,
	2078	char *strend,
	2079	SV *sv,
	2080	U32 flags,
	2081	bool utf8_target)
	2082	{
	2083	struct regexp *const prog = ReANY(rx);
	2084
	2085	if (flags & REXEC_COPY_STR) {
	2086	#ifdef PERL_ANY_COW
	2087	if (SvCANCOW(sv)) {
	2088	if (DEBUG_C_TEST) {
	2089	PerlIO_printf(Perl_debug_log,
	2090	"Copy on write: regexp capture, type %d\n",
	2091	(int) SvTYPE(sv));
	2092	}
	2093	/* Create a new COW SV to share the match string and store
	2094	* in saved_copy, unless the current COW SV in saved_copy
	2095	* is valid and suitable for our purpose */
	2096	if (( prog->saved_copy
	2097	&& SvIsCOW(prog->saved_copy)
	2098	&& SvPOKp(prog->saved_copy)
	2099	&& SvIsCOW(sv)
	2100	&& SvPOKp(sv)
	2101	&& SvPVX(sv) == SvPVX(prog->saved_copy)))
	2102	{
	2103	/* just reuse saved_copy SV */
	2104	if (RXp_MATCH_COPIED(prog)) {
	2105	Safefree(prog->subbeg);
	2106	RXp_MATCH_COPIED_off(prog);
	2107	}
	2108	}
	2109	else {
	2110	/* create new COW SV to share string */
	2111	RX_MATCH_COPY_FREE(rx);
	2112	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2113	}
	2114	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2115	assert (SvPOKp(prog->saved_copy));
	2116	prog->sublen = strend - strbeg;
	2117	prog->suboffset = 0;
	2118	prog->subcoffset = 0;
	2119	} else
	2120	#endif
	2121	{
	2122	SSize_t min = 0;
	2123	SSize_t max = strend - strbeg;
	2124	SSize_t sublen;
	2125
	2126	if ( (flags & REXEC_COPY_SKIP_POST)
	2127	&& !(prog->extflags & RXf_PMf_KEEPCOPY) /* //p */
	2128	&& !(PL_sawampersand & SAWAMPERSAND_RIGHT)
	2129	) { /* don't copy $' part of string */
	2130	U32 n = 0;
	2131	max = -1;
	2132	/* calculate the right-most part of the string covered
	2133	* by a capture. Due to look-ahead, this may be to
	2134	* the right of $&, so we have to scan all captures */
	2135	while (n <= prog->lastparen) {
	2136	if (prog->offs[n].end > max)
	2137	max = prog->offs[n].end;
	2138	n++;
	2139	}
	2140	if (max == -1)
	2141	max = (PL_sawampersand & SAWAMPERSAND_LEFT)
	2142	? prog->offs[0].start
	2143	: 0;
	2144	assert(max >= 0 && max <= strend - strbeg);
	2145	}
	2146
	2147	if ( (flags & REXEC_COPY_SKIP_PRE)
	2148	&& !(prog->extflags & RXf_PMf_KEEPCOPY) /* //p */
	2149	&& !(PL_sawampersand & SAWAMPERSAND_LEFT)
	2150	) { /* don't copy $` part of string */
	2151	U32 n = 0;
	2152	min = max;
	2153	/* calculate the left-most part of the string covered
	2154	* by a capture. Due to look-behind, this may be to
	2155	* the left of $&, so we have to scan all captures */
	2156	while (min && n <= prog->lastparen) {
	2157	if ( prog->offs[n].start != -1
	2158	&& prog->offs[n].start < min)
	2159	{
	2160	min = prog->offs[n].start;
	2161	}
	2162	n++;
	2163	}
	2164	if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
	2165	&& min > prog->offs[0].end
	2166	)
	2167	min = prog->offs[0].end;
	2168
	2169	}
	2170
	2171	assert(min >= 0 && min <= max && min <= strend - strbeg);
	2172	sublen = max - min;
	2173
	2174	if (RX_MATCH_COPIED(rx)) {
	2175	if (sublen > prog->sublen)
	2176	prog->subbeg =
	2177	(char*)saferealloc(prog->subbeg, sublen+1);
	2178	}
	2179	else
	2180	prog->subbeg = (char*)safemalloc(sublen+1);
	2181	Copy(strbeg + min, prog->subbeg, sublen, char);
	2182	prog->subbeg[sublen] = '\0';
	2183	prog->suboffset = min;
	2184	prog->sublen = sublen;
	2185	RX_MATCH_COPIED_on(rx);
	2186	}
	2187	prog->subcoffset = prog->suboffset;
	2188	if (prog->suboffset && utf8_target) {
	2189	/* Convert byte offset to chars.
	2190	* XXX ideally should only compute this if @-/@+
	2191	* has been seen, a la PL_sawampersand ??? */
	2192
	2193	/* If there's a direct correspondence between the
	2194	* string which we're matching and the original SV,
	2195	* then we can use the utf8 len cache associated with
	2196	* the SV. In particular, it means that under //g,
	2197	* sv_pos_b2u() will use the previously cached
	2198	* position to speed up working out the new length of
	2199	* subcoffset, rather than counting from the start of
	2200	* the string each time. This stops
	2201	* $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
	2202	* from going quadratic */
	2203	if (SvPOKp(sv) && SvPVX(sv) == strbeg)
	2204	prog->subcoffset = sv_pos_b2u_flags(sv, prog->subcoffset,
	2205	SV_GMAGIC\|SV_CONST_RETURN);
	2206	else
	2207	prog->subcoffset = utf8_length((U8*)strbeg,
	2208	(U8*)(strbeg+prog->suboffset));
	2209	}
	2210	}
	2211	else {
	2212	RX_MATCH_COPY_FREE(rx);
	2213	prog->subbeg = strbeg;
	2214	prog->suboffset = 0;
	2215	prog->subcoffset = 0;
	2216	prog->sublen = strend - strbeg;
	2217	}
	2218	}
	2219
	2220
	2221
	2222
	2223	/*
	2224	- regexec_flags - match a regexp against a string
	2225	*/
	2226	I32
	2227	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, char strend,
	2228	char strbeg, SSize_t minend, SV sv, void *data, U32 flags)
	2229	/* stringarg: the point in the string at which to begin matching */
	2230	/* strend: pointer to null at end of string */
	2231	/* strbeg: real beginning of string */
	2232	/* minend: end of match must be >= minend bytes after stringarg. */
	2233	/* sv: SV being matched: only used for utf8 flag, pos() etc; string
	2234	* itself is accessed via the pointers above */
	2235	/* data: May be used for some additional optimizations.
	2236	Currently unused. */
	2237	/* flags: For optimizations. See REXEC_* in regexp.h */
	2238
	2239	{
	2240	dVAR;
	2241	struct regexp *const prog = ReANY(rx);
	2242	char *s;
	2243	regnode *c;
	2244	char *startpos;
	2245	SSize_t minlen; /* must match at least this many chars */
	2246	SSize_t dontbother = 0; /* how many characters not to try at end */
	2247	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2248	I32 multiline;
	2249	RXi_GET_DECL(prog,progi);
	2250	regmatch_info reginfo_buf; /* create some info to pass to regtry etc */
	2251	regmatch_info *const reginfo = &reginfo_buf;
	2252	regexp_paren_pair *swap = NULL;
	2253	I32 oldsave;
	2254	GET_RE_DEBUG_FLAGS_DECL;
	2255
	2256	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2257	PERL_UNUSED_ARG(data);
	2258
	2259	/* Be paranoid... */
	2260	if (prog == NULL \|\| stringarg == NULL) {
	2261	Perl_croak(aTHX_ "NULL regexp parameter");
	2262	return 0;
	2263	}
	2264
	2265	DEBUG_EXECUTE_r(
	2266	debug_start_match(rx, utf8_target, stringarg, strend,
	2267	"Matching");
	2268	);
	2269
	2270	startpos = stringarg;
	2271
	2272	if (prog->extflags & RXf_GPOS_SEEN) {
	2273	MAGIC *mg;
	2274
	2275	/* set reginfo->ganch, the position where \G can match */
	2276
	2277	reginfo->ganch =
	2278	(flags & REXEC_IGNOREPOS)
	2279	? stringarg /* use start pos rather than pos() */
	2280	: (sv && (mg = mg_find_mglob(sv)) && mg->mg_len >= 0)
	2281	/* Defined pos(): */
	2282	? strbeg + MgBYTEPOS(mg, sv, strbeg, strend-strbeg)
	2283	: strbeg; /* pos() not defined; use start of string */
	2284
	2285	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2286	"GPOS ganch set to strbeg[%"IVdf"]\n", (IV)(reginfo->ganch - strbeg)));
	2287
	2288	/* in the presence of \G, we may need to start looking earlier in
	2289	* the string than the suggested start point of stringarg:
	2290	* if prog->gofs is set, then that's a known, fixed minimum
	2291	* offset, such as
	2292	* /..\G/: gofs = 2
	2293	* /ab\|c\G/: gofs = 1
	2294	* or if the minimum offset isn't known, then we have to go back
	2295	* to the start of the string, e.g. /w+\G/
	2296	*/
	2297
	2298	if (prog->extflags & RXf_ANCH_GPOS) {
	2299	startpos = reginfo->ganch - prog->gofs;
	2300	if (startpos <
	2301	((flags & REXEC_FAIL_ON_UNDERFLOW) ? stringarg : strbeg))
	2302	{
	2303	DEBUG_r(PerlIO_printf(Perl_debug_log,
	2304	"fail: ganch-gofs before earliest possible start\n"));
	2305	return 0;
	2306	}
	2307	}
	2308	else if (prog->gofs) {
	2309	if (startpos - prog->gofs < strbeg)
	2310	startpos = strbeg;
	2311	else
	2312	startpos -= prog->gofs;
	2313	}
	2314	else if (prog->extflags & RXf_GPOS_FLOAT)
	2315	startpos = strbeg;
	2316	}
	2317
	2318	minlen = prog->minlen;
	2319	if ((startpos + minlen) > strend \|\| startpos < strbeg) {
	2320	DEBUG_r(PerlIO_printf(Perl_debug_log,
	2321	"Regex match can't succeed, so not even tried\n"));
	2322	return 0;
	2323	}
	2324
	2325	/* at the end of this function, we'll do a LEAVE_SCOPE(oldsave),
	2326	* which will call destuctors to reset PL_regmatch_state, free higher
	2327	* PL_regmatch_slabs, and clean up regmatch_info_aux and
	2328	* regmatch_info_aux_eval */
	2329
	2330	oldsave = PL_savestack_ix;
	2331
	2332	s = startpos;
	2333
	2334	if ((prog->extflags & RXf_USE_INTUIT)
	2335	&& !(flags & REXEC_CHECKED))
	2336	{
	2337	s = re_intuit_start(rx, sv, strbeg, startpos, strend,
	2338	flags, NULL);
	2339	if (!s)
	2340	return 0;
	2341
	2342	if (prog->extflags & RXf_CHECK_ALL) {
	2343	/* we can match based purely on the result of INTUIT.
	2344	* Set up captures etc just for $& and $-[0]
	2345	* (an intuit-only match wont have $1,$2,..) */
	2346	assert(!prog->nparens);
	2347
	2348	/* s/// doesn't like it if $& is earlier than where we asked it to
	2349	* start searching (which can happen on something like /.\G/) */
	2350	if ( (flags & REXEC_FAIL_ON_UNDERFLOW)
	2351	&& (s < stringarg))
	2352	{
	2353	/* this should only be possible under \G */
	2354	assert(prog->extflags & RXf_GPOS_SEEN);
	2355	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2356	"matched, but failing for REXEC_FAIL_ON_UNDERFLOW\n"));
	2357	goto phooey;
	2358	}
	2359
	2360	/* match via INTUIT shouldn't have any captures.
	2361	* Let @-, @+, $^N know */
	2362	prog->lastparen = prog->lastcloseparen = 0;
	2363	RX_MATCH_UTF8_set(rx, utf8_target);
	2364	prog->offs[0].start = s - strbeg;
	2365	prog->offs[0].end = utf8_target
	2366	? (char)utf8_hop((U8)s, prog->minlenret) - strbeg
	2367	: s - strbeg + prog->minlenret;
	2368	if ( !(flags & REXEC_NOT_FIRST) )
	2369	S_reg_set_capture_string(aTHX_ rx,
	2370	strbeg, strend,
	2371	sv, flags, utf8_target);
	2372
	2373	return 1;
	2374	}
	2375	}
	2376
	2377	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2378
	2379	if (strend - s < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2380	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2381	"String too short [regexec_flags]...\n"));
	2382	goto phooey;
	2383	}
	2384
	2385	/* Check validity of program. */
	2386	if (UCHARAT(progi->program) != REG_MAGIC) {
	2387	Perl_croak(aTHX_ "corrupted regexp program");
	2388	}
	2389
	2390	RX_MATCH_TAINTED_off(rx);
	2391
	2392	reginfo->prog = rx; /* Yes, sorry that this is confusing. */
	2393	reginfo->intuit = 0;
	2394	reginfo->is_utf8_target = cBOOL(utf8_target);
	2395	reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
	2396	reginfo->warned = FALSE;
	2397	reginfo->strbeg = strbeg;
	2398	reginfo->sv = sv;
	2399	reginfo->poscache_maxiter = 0; /* not yet started a countdown */
	2400	reginfo->strend = strend;
	2401	/* see how far we have to get to not match where we matched before */
	2402	reginfo->till = stringarg + minend;
	2403
	2404	if (prog->extflags & RXf_EVAL_SEEN && SvPADTMP(sv) && !IS_PADGV(sv)) {
	2405	/* SAVEFREESV, not sv_mortalcopy, as this SV must last until after
	2406	S_cleanup_regmatch_info_aux has executed (registered by
	2407	SAVEDESTRUCTOR_X below). S_cleanup_regmatch_info_aux modifies
	2408	magic belonging to this SV.
	2409	Not newSVsv, either, as it does not COW.
	2410	*/
	2411	reginfo->sv = newSV(0);
	2412	SvSetSV_nosteal(reginfo->sv, sv);
	2413	SAVEFREESV(reginfo->sv);
	2414	}
	2415
	2416	/* reserve next 2 or 3 slots in PL_regmatch_state:
	2417	* slot N+0: may currently be in use: skip it
	2418	* slot N+1: use for regmatch_info_aux struct
	2419	* slot N+2: use for regmatch_info_aux_eval struct if we have (?{})'s
	2420	* slot N+3: ready for use by regmatch()
	2421	*/
	2422
	2423	{
	2424	regmatch_state *old_regmatch_state;
	2425	regmatch_slab *old_regmatch_slab;
	2426	int i, max = (prog->extflags & RXf_EVAL_SEEN) ? 2 : 1;
	2427
	2428	/* on first ever match, allocate first slab */
	2429	if (!PL_regmatch_slab) {
	2430	Newx(PL_regmatch_slab, 1, regmatch_slab);
	2431	PL_regmatch_slab->prev = NULL;
	2432	PL_regmatch_slab->next = NULL;
	2433	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	2434	}
	2435
	2436	old_regmatch_state = PL_regmatch_state;
	2437	old_regmatch_slab = PL_regmatch_slab;
	2438
	2439	for (i=0; i <= max; i++) {
	2440	if (i == 1)
	2441	reginfo->info_aux = &(PL_regmatch_state->u.info_aux);
	2442	else if (i ==2)
	2443	reginfo->info_aux_eval =
	2444	reginfo->info_aux->info_aux_eval =
	2445	&(PL_regmatch_state->u.info_aux_eval);
	2446
	2447	if (++PL_regmatch_state > SLAB_LAST(PL_regmatch_slab))
	2448	PL_regmatch_state = S_push_slab(aTHX);
	2449	}
	2450
	2451	/* note initial PL_regmatch_state position; at end of match we'll
	2452	* pop back to there and free any higher slabs */
	2453
	2454	reginfo->info_aux->old_regmatch_state = old_regmatch_state;
	2455	reginfo->info_aux->old_regmatch_slab = old_regmatch_slab;
	2456	reginfo->info_aux->poscache = NULL;
	2457
	2458	SAVEDESTRUCTOR_X(S_cleanup_regmatch_info_aux, reginfo->info_aux);
	2459
	2460	if ((prog->extflags & RXf_EVAL_SEEN))
	2461	S_setup_eval_state(aTHX_ reginfo);
	2462	else
	2463	reginfo->info_aux_eval = reginfo->info_aux->info_aux_eval = NULL;
	2464	}
	2465
	2466	/* If there is a "must appear" string, look for it. */
	2467
	2468	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2469	/* We have to be careful. If the previous successful match
	2470	was from this regex we don't want a subsequent partially
	2471	successful match to clobber the old results.
	2472	So when we detect this possibility we add a swap buffer
	2473	to the re, and switch the buffer each match. If we fail,
	2474	we switch it back; otherwise we leave it swapped.
	2475	*/
	2476	swap = prog->offs;
	2477	/* do we need a save destructor here for eval dies? */
	2478	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2479	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2480	"rex=0x%"UVxf" saving offs: orig=0x%"UVxf" new=0x%"UVxf"\n",
	2481	PTR2UV(prog),
	2482	PTR2UV(swap),
	2483	PTR2UV(prog->offs)
	2484	));
	2485	}
	2486
	2487	/* Simplest case: anchored match need be tried only once. */
	2488	/* [unless only anchor is BOL and multiline is set] */
	2489	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2490	if (s == startpos && regtry(reginfo, &s))
	2491	goto got_it;
	2492	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2493	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2494	{
	2495	char *end;
	2496
	2497	if (minlen)
	2498	dontbother = minlen - 1;
	2499	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2500	/* for multiline we only have to try after newlines */
	2501	if (prog->check_substr \|\| prog->check_utf8) {
	2502	/* because of the goto we can not easily reuse the macros for bifurcating the
	2503	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2504	if (utf8_target) {
	2505	if (s == startpos)
	2506	goto after_try_utf8;
	2507	while (1) {
	2508	if (regtry(reginfo, &s)) {
	2509	goto got_it;
	2510	}
	2511	after_try_utf8:
	2512	if (s > end) {
	2513	goto phooey;
	2514	}
	2515	if (prog->extflags & RXf_USE_INTUIT) {
	2516	s = re_intuit_start(rx, sv, strbeg,
	2517	s + UTF8SKIP(s), strend, flags, NULL);
	2518	if (!s) {
	2519	goto phooey;
	2520	}
	2521	}
	2522	else {
	2523	s += UTF8SKIP(s);
	2524	}
	2525	}
	2526	} /* end search for check string in unicode */
	2527	else {
	2528	if (s == startpos) {
	2529	goto after_try_latin;
	2530	}
	2531	while (1) {
	2532	if (regtry(reginfo, &s)) {
	2533	goto got_it;
	2534	}
	2535	after_try_latin:
	2536	if (s > end) {
	2537	goto phooey;
	2538	}
	2539	if (prog->extflags & RXf_USE_INTUIT) {
	2540	s = re_intuit_start(rx, sv, strbeg,
	2541	s + 1, strend, flags, NULL);
	2542	if (!s) {
	2543	goto phooey;
	2544	}
	2545	}
	2546	else {
	2547	s++;
	2548	}
	2549	}
	2550	} /* end search for check string in latin*/
	2551	} /* end search for check string */
	2552	else { /* search for newline */
	2553	if (s > startpos) {
	2554	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2555	s--;
	2556	}
	2557	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2558	while (s <= end) { /* note it could be possible to match at the end of the string */
	2559	if (s++ == '\n') { / don't need PL_utf8skip here */
	2560	if (regtry(reginfo, &s))
	2561	goto got_it;
	2562	}
	2563	}
	2564	} /* end search for newline */
	2565	} /* end anchored/multiline check string search */
	2566	goto phooey;
	2567	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2568	{
	2569	/* For anchored \G, the only position it can match from is
	2570	* (ganch-gofs); we already set startpos to this above; if intuit
	2571	* moved us on from there, we can't possibly succeed */
	2572	assert(startpos == reginfo->ganch - prog->gofs);
	2573	if (s == startpos && regtry(reginfo, &s))
	2574	goto got_it;
	2575	goto phooey;
	2576	}
	2577
	2578	/* Messy cases: unanchored match. */
	2579	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2580	/* we have /x+whatever/ */
	2581	/* it must be a one character string (XXXX Except is_utf8_pat?) */
	2582	char ch;
	2583	#ifdef DEBUGGING
	2584	int did_match = 0;
	2585	#endif
	2586	if (utf8_target) {
	2587	if (! prog->anchored_utf8) {
	2588	to_utf8_substr(prog);
	2589	}
	2590	ch = SvPVX_const(prog->anchored_utf8)[0];
	2591	REXEC_FBC_SCAN(
	2592	if (*s == ch) {
	2593	DEBUG_EXECUTE_r( did_match = 1 );
	2594	if (regtry(reginfo, &s)) goto got_it;
	2595	s += UTF8SKIP(s);
	2596	while (s < strend && *s == ch)
	2597	s += UTF8SKIP(s);
	2598	}
	2599	);
	2600
	2601	}
	2602	else {
	2603	if (! prog->anchored_substr) {
	2604	if (! to_byte_substr(prog)) {
	2605	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2606	}
	2607	}
	2608	ch = SvPVX_const(prog->anchored_substr)[0];
	2609	REXEC_FBC_SCAN(
	2610	if (*s == ch) {
	2611	DEBUG_EXECUTE_r( did_match = 1 );
	2612	if (regtry(reginfo, &s)) goto got_it;
	2613	s++;
	2614	while (s < strend && *s == ch)
	2615	s++;
	2616	}
	2617	);
	2618	}
	2619	DEBUG_EXECUTE_r(if (!did_match)
	2620	PerlIO_printf(Perl_debug_log,
	2621	"Did not find anchored character...\n")
	2622	);
	2623	}
	2624	else if (prog->anchored_substr != NULL
	2625	\|\| prog->anchored_utf8 != NULL
	2626	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2627	&& prog->float_max_offset < strend - s)) {
	2628	SV *must;
	2629	SSize_t back_max;
	2630	SSize_t back_min;
	2631	char *last;
	2632	char last1; / Last position checked before */
	2633	#ifdef DEBUGGING
	2634	int did_match = 0;
	2635	#endif
	2636	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2637	if (utf8_target) {
	2638	if (! prog->anchored_utf8) {
	2639	to_utf8_substr(prog);
	2640	}
	2641	must = prog->anchored_utf8;
	2642	}
	2643	else {
	2644	if (! prog->anchored_substr) {
	2645	if (! to_byte_substr(prog)) {
	2646	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2647	}
	2648	}
	2649	must = prog->anchored_substr;
	2650	}
	2651	back_max = back_min = prog->anchored_offset;
	2652	} else {
	2653	if (utf8_target) {
	2654	if (! prog->float_utf8) {
	2655	to_utf8_substr(prog);
	2656	}
	2657	must = prog->float_utf8;
	2658	}
	2659	else {
	2660	if (! prog->float_substr) {
	2661	if (! to_byte_substr(prog)) {
	2662	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2663	}
	2664	}
	2665	must = prog->float_substr;
	2666	}
	2667	back_max = prog->float_max_offset;
	2668	back_min = prog->float_min_offset;
	2669	}
	2670
	2671	if (back_min<0) {
	2672	last = strend;
	2673	} else {
	2674	last = HOP3c(strend, /* Cannot start after this */
	2675	-(SSize_t)(CHR_SVLEN(must)
	2676	- (SvTAIL(must) != 0) + back_min), strbeg);
	2677	}
	2678	if (s > reginfo->strbeg)
	2679	last1 = HOPc(s, -1);
	2680	else
	2681	last1 = s - 1; /* bogus */
	2682
	2683	/* XXXX check_substr already used to find "s", can optimize if
	2684	check_substr==must. */
	2685	dontbother = 0;
	2686	strend = HOPc(strend, -dontbother);
	2687	while ( (s <= last) &&
	2688	(s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2689	(unsigned char*)strend, must,
	2690	multiline ? FBMrf_MULTILINE : 0)) ) {
	2691	DEBUG_EXECUTE_r( did_match = 1 );
	2692	if (HOPc(s, -back_max) > last1) {
	2693	last1 = HOPc(s, -back_min);
	2694	s = HOPc(s, -back_max);
	2695	}
	2696	else {
	2697	char * const t = (last1 >= reginfo->strbeg)
	2698	? HOPc(last1, 1) : last1 + 1;
	2699
	2700	last1 = HOPc(s, -back_min);
	2701	s = t;
	2702	}
	2703	if (utf8_target) {
	2704	while (s <= last1) {
	2705	if (regtry(reginfo, &s))
	2706	goto got_it;
	2707	if (s >= last1) {
	2708	s++; /* to break out of outer loop */
	2709	break;
	2710	}
	2711	s += UTF8SKIP(s);
	2712	}
	2713	}
	2714	else {
	2715	while (s <= last1) {
	2716	if (regtry(reginfo, &s))
	2717	goto got_it;
	2718	s++;
	2719	}
	2720	}
	2721	}
	2722	DEBUG_EXECUTE_r(if (!did_match) {
	2723	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2724	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2725	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2726	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2727	? "anchored" : "floating"),
	2728	quoted, RE_SV_TAIL(must));
	2729	});
	2730	goto phooey;
	2731	}
	2732	else if ( (c = progi->regstclass) ) {
	2733	if (minlen) {
	2734	const OPCODE op = OP(progi->regstclass);
	2735	/* don't bother with what can't match */
	2736	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2737	strend = HOPc(strend, -(minlen - 1));
	2738	}
	2739	DEBUG_EXECUTE_r({
	2740	SV * const prop = sv_newmortal();
	2741	regprop(prog, prop, c);
	2742	{
	2743	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2744	s,strend-s,60);
	2745	PerlIO_printf(Perl_debug_log,
	2746	"Matching stclass %.*s against %s (%d bytes)\n",
	2747	(int)SvCUR(prop), SvPVX_const(prop),
	2748	quoted, (int)(strend - s));
	2749	}
	2750	});
	2751	if (find_byclass(prog, c, s, strend, reginfo))
	2752	goto got_it;
	2753	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2754	}
	2755	else {
	2756	dontbother = 0;
	2757	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2758	/* Trim the end. */
	2759	char *last= NULL;
	2760	SV* float_real;
	2761	STRLEN len;
	2762	const char *little;
	2763
	2764	if (utf8_target) {
	2765	if (! prog->float_utf8) {
	2766	to_utf8_substr(prog);
	2767	}
	2768	float_real = prog->float_utf8;
	2769	}
	2770	else {
	2771	if (! prog->float_substr) {
	2772	if (! to_byte_substr(prog)) {
	2773	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	2774	}
	2775	}
	2776	float_real = prog->float_substr;
	2777	}
	2778
	2779	little = SvPV_const(float_real, len);
	2780	if (SvTAIL(float_real)) {
	2781	/* This means that float_real contains an artificial \n on
	2782	* the end due to the presence of something like this:
	2783	* /foo$/ where we can match both "foo" and "foo\n" at the
	2784	* end of the string. So we have to compare the end of the
	2785	* string first against the float_real without the \n and
	2786	* then against the full float_real with the string. We
	2787	* have to watch out for cases where the string might be
	2788	* smaller than the float_real or the float_real without
	2789	* the \n. */
	2790	char *checkpos= strend - len;
	2791	DEBUG_OPTIMISE_r(
	2792	PerlIO_printf(Perl_debug_log,
	2793	"%sChecking for float_real.%s\n",
	2794	PL_colors[4], PL_colors[5]));
	2795	if (checkpos + 1 < strbeg) {
	2796	/* can't match, even if we remove the trailing \n
	2797	* string is too short to match */
	2798	DEBUG_EXECUTE_r(
	2799	PerlIO_printf(Perl_debug_log,
	2800	"%sString shorter than required trailing substring, cannot match.%s\n",
	2801	PL_colors[4], PL_colors[5]));
	2802	goto phooey;
	2803	} else if (memEQ(checkpos + 1, little, len - 1)) {
	2804	/* can match, the end of the string matches without the
	2805	* "\n" */
	2806	last = checkpos + 1;
	2807	} else if (checkpos < strbeg) {
	2808	/* cant match, string is too short when the "\n" is
	2809	* included */
	2810	DEBUG_EXECUTE_r(
	2811	PerlIO_printf(Perl_debug_log,
	2812	"%sString does not contain required trailing substring, cannot match.%s\n",
	2813	PL_colors[4], PL_colors[5]));
	2814	goto phooey;
	2815	} else if (!multiline) {
	2816	/* non multiline match, so compare with the "\n" at the
	2817	* end of the string */
	2818	if (memEQ(checkpos, little, len)) {
	2819	last= checkpos;
	2820	} else {
	2821	DEBUG_EXECUTE_r(
	2822	PerlIO_printf(Perl_debug_log,
	2823	"%sString does not contain required trailing substring, cannot match.%s\n",
	2824	PL_colors[4], PL_colors[5]));
	2825	goto phooey;
	2826	}
	2827	} else {
	2828	/* multiline match, so we have to search for a place
	2829	* where the full string is located */
	2830	goto find_last;
	2831	}
	2832	} else {
	2833	find_last:
	2834	if (len)
	2835	last = rninstr(s, strend, little, little + len);
	2836	else
	2837	last = strend; /* matching "$" */
	2838	}
	2839	if (!last) {
	2840	/* at one point this block contained a comment which was
	2841	* probably incorrect, which said that this was a "should not
	2842	* happen" case. Even if it was true when it was written I am
	2843	* pretty sure it is not anymore, so I have removed the comment
	2844	* and replaced it with this one. Yves */
	2845	DEBUG_EXECUTE_r(
	2846	PerlIO_printf(Perl_debug_log,
	2847	"String does not contain required substring, cannot match.\n"
	2848	));
	2849	goto phooey;
	2850	}
	2851	dontbother = strend - last + prog->float_min_offset;
	2852	}
	2853	if (minlen && (dontbother < minlen))
	2854	dontbother = minlen - 1;
	2855	strend -= dontbother; /* this one's always in bytes! */
	2856	/* We don't know much -- general case. */
	2857	if (utf8_target) {
	2858	for (;;) {
	2859	if (regtry(reginfo, &s))
	2860	goto got_it;
	2861	if (s >= strend)
	2862	break;
	2863	s += UTF8SKIP(s);
	2864	};
	2865	}
	2866	else {
	2867	do {
	2868	if (regtry(reginfo, &s))
	2869	goto got_it;
	2870	} while (s++ < strend);
	2871	}
	2872	}
	2873
	2874	/* Failure. */
	2875	goto phooey;
	2876
	2877	got_it:
	2878	/* s/// doesn't like it if $& is earlier than where we asked it to
	2879	* start searching (which can happen on something like /.\G/) */
	2880	if ( (flags & REXEC_FAIL_ON_UNDERFLOW)
	2881	&& (prog->offs[0].start < stringarg - strbeg))
	2882	{
	2883	/* this should only be possible under \G */
	2884	assert(prog->extflags & RXf_GPOS_SEEN);
	2885	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2886	"matched, but failing for REXEC_FAIL_ON_UNDERFLOW\n"));
	2887	goto phooey;
	2888	}
	2889
	2890	DEBUG_BUFFERS_r(
	2891	if (swap)
	2892	PerlIO_printf(Perl_debug_log,
	2893	"rex=0x%"UVxf" freeing offs: 0x%"UVxf"\n",
	2894	PTR2UV(prog),
	2895	PTR2UV(swap)
	2896	);
	2897	);
	2898	Safefree(swap);
	2899
	2900	/* clean up; this will trigger destructors that will free all slabs
	2901	* above the current one, and cleanup the regmatch_info_aux
	2902	* and regmatch_info_aux_eval sructs */
	2903
	2904	LEAVE_SCOPE(oldsave);
	2905
	2906	if (RXp_PAREN_NAMES(prog))
	2907	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2908
	2909	RX_MATCH_UTF8_set(rx, utf8_target);
	2910
	2911	/* make sure $`, $&, $', and $digit will work later */
	2912	if ( !(flags & REXEC_NOT_FIRST) )
	2913	S_reg_set_capture_string(aTHX_ rx,
	2914	strbeg, reginfo->strend,
	2915	sv, flags, utf8_target);
	2916
	2917	return 1;
	2918
	2919	phooey:
	2920	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2921	PL_colors[4], PL_colors[5]));
	2922
	2923	/* clean up; this will trigger destructors that will free all slabs
	2924	* above the current one, and cleanup the regmatch_info_aux
	2925	* and regmatch_info_aux_eval sructs */
	2926
	2927	LEAVE_SCOPE(oldsave);
	2928
	2929	if (swap) {
	2930	/* we failed :-( roll it back */
	2931	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	2932	"rex=0x%"UVxf" rolling back offs: freeing=0x%"UVxf" restoring=0x%"UVxf"\n",
	2933	PTR2UV(prog),
	2934	PTR2UV(prog->offs),
	2935	PTR2UV(swap)
	2936	));
	2937	Safefree(prog->offs);
	2938	prog->offs = swap;
	2939	}
	2940	return 0;
	2941	}
	2942
	2943
	2944	/* Set which rex is pointed to by PL_reg_curpm, handling ref counting.
	2945	* Do inc before dec, in case old and new rex are the same */
	2946	#define SET_reg_curpm(Re2) \
	2947	if (reginfo->info_aux_eval) { \
	2948	(void)ReREFCNT_inc(Re2); \
	2949	ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
	2950	PM_SETRE((PL_reg_curpm), (Re2)); \
	2951	}
	2952
	2953
	2954	/*
	2955	- regtry - try match at specific point
	2956	*/
	2957	STATIC I32 /* 0 failure, 1 success */
	2958	S_regtry(pTHX_ regmatch_info reginfo, char *startposp)
	2959	{
	2960	dVAR;
	2961	CHECKPOINT lastcp;
	2962	REGEXP *const rx = reginfo->prog;
	2963	regexp *const prog = ReANY(rx);
	2964	SSize_t result;
	2965	RXi_GET_DECL(prog,progi);
	2966	GET_RE_DEBUG_FLAGS_DECL;
	2967
	2968	PERL_ARGS_ASSERT_REGTRY;
	2969
	2970	reginfo->cutpoint=NULL;
	2971
	2972	prog->offs[0].start = *startposp - reginfo->strbeg;
	2973	prog->lastparen = 0;
	2974	prog->lastcloseparen = 0;
	2975
	2976	/* XXXX What this code is doing here?!!! There should be no need
	2977	to do this again and again, prog->lastparen should take care of
	2978	this! --ilya*/
	2979
	2980	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2981	* Actually, the code in regcppop() (which Ilya may be meaning by
	2982	* prog->lastparen), is not needed at all by the test suite
	2983	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2984	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2985	* Meanwhile, this code is needed for the
	2986	* above-mentioned test suite tests to succeed. The common theme
	2987	* on those tests seems to be returning null fields from matches.
	2988	* --jhi updated by dapm */
	2989	#if 1
	2990	if (prog->nparens) {
	2991	regexp_paren_pair *pp = prog->offs;
	2992	I32 i;
	2993	for (i = prog->nparens; i > (I32)prog->lastparen; i--) {
	2994	++pp;
	2995	pp->start = -1;
	2996	pp->end = -1;
	2997	}
	2998	}
	2999	#endif
	3000	REGCP_SET(lastcp);
	3001	result = regmatch(reginfo, *startposp, progi->program + 1);
	3002	if (result != -1) {
	3003	prog->offs[0].end = result;
	3004	return 1;
	3005	}
	3006	if (reginfo->cutpoint)
	3007	*startposp= reginfo->cutpoint;
	3008	REGCP_UNWIND(lastcp);
	3009	return 0;
	3010	}
	3011
	3012
	3013	#define sayYES goto yes
	3014	#define sayNO goto no
	3015	#define sayNO_SILENT goto no_silent
	3016
	3017	/* we dont use STMT_START/END here because it leads to
	3018	"unreachable code" warnings, which are bogus, but distracting. */
	3019	#define CACHEsayNO \
	3020	if (ST.cache_mask) \
	3021	reginfo->info_aux->poscache[ST.cache_offset] \|= ST.cache_mask; \
	3022	sayNO
	3023
	3024	/* this is used to determine how far from the left messages like
	3025	'failed...' are printed. It should be set such that messages
	3026	are inline with the regop output that created them.
	3027	*/
	3028	#define REPORT_CODE_OFF 32
	3029
	3030
	3031	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	3032	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	3033	#define CHRTEST_NOT_A_CP_1 -999
	3034	#define CHRTEST_NOT_A_CP_2 -998
	3035
	3036	/* grab a new slab and return the first slot in it */
	3037
	3038	STATIC regmatch_state *
	3039	S_push_slab(pTHX)
	3040	{
	3041	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3042	dMY_CXT;
	3043	#endif
	3044	regmatch_slab *s = PL_regmatch_slab->next;
	3045	if (!s) {
	3046	Newx(s, 1, regmatch_slab);
	3047	s->prev = PL_regmatch_slab;
	3048	s->next = NULL;
	3049	PL_regmatch_slab->next = s;
	3050	}
	3051	PL_regmatch_slab = s;
	3052	return SLAB_FIRST(s);
	3053	}
	3054
	3055
	3056	/* push a new state then goto it */
	3057
	3058	#define PUSH_STATE_GOTO(state, node, input) \
	3059	pushinput = input; \
	3060	scan = node; \
	3061	st->resume_state = state; \
	3062	goto push_state;
	3063
	3064	/* push a new state with success backtracking, then goto it */
	3065
	3066	#define PUSH_YES_STATE_GOTO(state, node, input) \
	3067	pushinput = input; \
	3068	scan = node; \
	3069	st->resume_state = state; \
	3070	goto push_yes_state;
	3071
	3072
	3073
	3074
	3075	/*
	3076
	3077	regmatch() - main matching routine
	3078
	3079	This is basically one big switch statement in a loop. We execute an op,
	3080	set 'next' to point the next op, and continue. If we come to a point which
	3081	we may need to backtrack to on failure such as (A\|B\|C), we push a
	3082	backtrack state onto the backtrack stack. On failure, we pop the top
	3083	state, and re-enter the loop at the state indicated. If there are no more
	3084	states to pop, we return failure.
	3085
	3086	Sometimes we also need to backtrack on success; for example /A+/, where
	3087	after successfully matching one A, we need to go back and try to
	3088	match another one; similarly for lookahead assertions: if the assertion
	3089	completes successfully, we backtrack to the state just before the assertion
	3090	and then carry on. In these cases, the pushed state is marked as
	3091	'backtrack on success too'. This marking is in fact done by a chain of
	3092	pointers, each pointing to the previous 'yes' state. On success, we pop to
	3093	the nearest yes state, discarding any intermediate failure-only states.
	3094	Sometimes a yes state is pushed just to force some cleanup code to be
	3095	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	3096	it to free the inner regex.
	3097
	3098	Note that failure backtracking rewinds the cursor position, while
	3099	success backtracking leaves it alone.
	3100
	3101	A pattern is complete when the END op is executed, while a subpattern
	3102	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	3103	ops trigger the "pop to last yes state if any, otherwise return true"
	3104	behaviour.
	3105
	3106	A common convention in this function is to use A and B to refer to the two
	3107	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	3108	the subpattern to be matched possibly multiple times, while B is the entire
	3109	rest of the pattern. Variable and state names reflect this convention.
	3110
	3111	The states in the main switch are the union of ops and failure/success of
	3112	substates associated with with that op. For example, IFMATCH is the op
	3113	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	3114	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	3115	successfully matched A and IFMATCH_A_fail is a state saying that we have
	3116	just failed to match A. Resume states always come in pairs. The backtrack
	3117	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	3118	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	3119	on success or failure.
	3120
	3121	The struct that holds a backtracking state is actually a big union, with
	3122	one variant for each major type of op. The variable st points to the
	3123	top-most backtrack struct. To make the code clearer, within each
	3124	block of code we #define ST to alias the relevant union.
	3125
	3126	Here's a concrete example of a (vastly oversimplified) IFMATCH
	3127	implementation:
	3128
	3129	switch (state) {
	3130	....
	3131
	3132	#define ST st->u.ifmatch
	3133
	3134	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3135	ST.foo = ...; // some state we wish to save
	3136	...
	3137	// push a yes backtrack state with a resume value of
	3138	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	3139	// first node of A:
	3140	PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
	3141	// NOTREACHED
	3142
	3143	case IFMATCH_A: // we have successfully executed A; now continue with B
	3144	next = B;
	3145	bar = ST.foo; // do something with the preserved value
	3146	break;
	3147
	3148	case IFMATCH_A_fail: // A failed, so the assertion failed
	3149	...; // do some housekeeping, then ...
	3150	sayNO; // propagate the failure
	3151
	3152	#undef ST
	3153
	3154	...
	3155	}
	3156
	3157	For any old-timers reading this who are familiar with the old recursive
	3158	approach, the code above is equivalent to:
	3159
	3160	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3161	{
	3162	int foo = ...
	3163	...
	3164	if (regmatch(A)) {
	3165	next = B;
	3166	bar = foo;
	3167	break;
	3168	}
	3169	...; // do some housekeeping, then ...
	3170	sayNO; // propagate the failure
	3171	}
	3172
	3173	The topmost backtrack state, pointed to by st, is usually free. If you
	3174	want to claim it, populate any ST.foo fields in it with values you wish to
	3175	save, then do one of
	3176
	3177	PUSH_STATE_GOTO(resume_state, node, newinput);
	3178	PUSH_YES_STATE_GOTO(resume_state, node, newinput);
	3179
	3180	which sets that backtrack state's resume value to 'resume_state', pushes a
	3181	new free entry to the top of the backtrack stack, then goes to 'node'.
	3182	On backtracking, the free slot is popped, and the saved state becomes the
	3183	new free state. An ST.foo field in this new top state can be temporarily
	3184	accessed to retrieve values, but once the main loop is re-entered, it
	3185	becomes available for reuse.
	3186
	3187	Note that the depth of the backtrack stack constantly increases during the
	3188	left-to-right execution of the pattern, rather than going up and down with
	3189	the pattern nesting. For example the stack is at its maximum at Z at the
	3190	end of the pattern, rather than at X in the following:
	3191
	3192	/(((X)+)+)+....(Y)+....Z/
	3193
	3194	The only exceptions to this are lookahead/behind assertions and the cut,
	3195	(?>A), which pop all the backtrack states associated with A before
	3196	continuing.
	3197
	3198	Backtrack state structs are allocated in slabs of about 4K in size.
	3199	PL_regmatch_state and st always point to the currently active state,
	3200	and PL_regmatch_slab points to the slab currently containing
	3201	PL_regmatch_state. The first time regmatch() is called, the first slab is
	3202	allocated, and is never freed until interpreter destruction. When the slab
	3203	is full, a new one is allocated and chained to the end. At exit from
	3204	regmatch(), slabs allocated since entry are freed.
	3205
	3206	*/
	3207
	3208
	3209	#define DEBUG_STATE_pp(pp) \
	3210	DEBUG_STATE_r({ \
	3211	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	3212	PerlIO_printf(Perl_debug_log, \
	3213	" %*s"pp" %s%s%s%s%s\n", \
	3214	depth*2, "", \
	3215	PL_reg_name[st->resume_state], \
	3216	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	3217	((st==yes_state) ? "Y" : ""), \
	3218	((st==mark_state) ? "M" : ""), \
	3219	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	3220	); \
	3221	});
	3222
	3223
	3224	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	3225
	3226	#ifdef DEBUGGING
	3227
	3228	STATIC void
	3229	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	3230	const char start, const char end, const char *blurb)
	3231	{
	3232	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	3233
	3234	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	3235
	3236	if (!PL_colorset)
	3237	reginitcolors();
	3238	{
	3239	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	3240	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	3241
	3242	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	3243	start, end - start, 60);
	3244
	3245	PerlIO_printf(Perl_debug_log,
	3246	"%s%s REx%s %s against %s\n",
	3247	PL_colors[4], blurb, PL_colors[5], s0, s1);
	3248
	3249	if (utf8_target\|\|utf8_pat)
	3250	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	3251	utf8_pat ? "pattern" : "",
	3252	utf8_pat && utf8_target ? " and " : "",
	3253	utf8_target ? "string" : ""
	3254	);
	3255	}
	3256	}
	3257
	3258	STATIC void
	3259	S_dump_exec_pos(pTHX_ const char *locinput,
	3260	const regnode *scan,
	3261	const char *loc_regeol,
	3262	const char *loc_bostr,
	3263	const char *loc_reg_starttry,
	3264	const bool utf8_target)
	3265	{
	3266	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	3267	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	3268	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	3269	/* The part of the string before starttry has one color
	3270	(pref0_len chars), between starttry and current
	3271	position another one (pref_len - pref0_len chars),
	3272	after the current position the third one.
	3273	We assume that pref0_len <= pref_len, otherwise we
	3274	decrease pref0_len. */
	3275	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	3276	? (5 + taill) - l : locinput - loc_bostr;
	3277	int pref0_len;
	3278
	3279	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	3280
	3281	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	3282	pref_len++;
	3283	pref0_len = pref_len - (locinput - loc_reg_starttry);
	3284	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	3285	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	3286	? (5 + taill) - pref_len : loc_regeol - locinput);
	3287	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	3288	l--;
	3289	if (pref0_len < 0)
	3290	pref0_len = 0;
	3291	if (pref0_len > pref_len)
	3292	pref0_len = pref_len;
	3293	{
	3294	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	3295
	3296	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	3297	(locinput - pref_len),pref0_len, 60, 4, 5);
	3298
	3299	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	3300	(locinput - pref_len + pref0_len),
	3301	pref_len - pref0_len, 60, 2, 3);
	3302
	3303	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	3304	locinput, loc_regeol - locinput, 10, 0, 1);
	3305
	3306	const STRLEN tlen=len0+len1+len2;
	3307	PerlIO_printf(Perl_debug_log,
	3308	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	3309	(IV)(locinput - loc_bostr),
	3310	len0, s0,
	3311	len1, s1,
	3312	(docolor ? "" : "> <"),
	3313	len2, s2,
	3314	(int)(tlen > 19 ? 0 : 19 - tlen),
	3315	"");
	3316	}
	3317	}
	3318
	3319	#endif
	3320
	3321	/* reg_check_named_buff_matched()
	3322	* Checks to see if a named buffer has matched. The data array of
	3323	* buffer numbers corresponding to the buffer is expected to reside
	3324	* in the regexp->data->data array in the slot stored in the ARG() of
	3325	* node involved. Note that this routine doesn't actually care about the
	3326	* name, that information is not preserved from compilation to execution.
	3327	* Returns the index of the leftmost defined buffer with the given name
	3328	* or 0 if non of the buffers matched.
	3329	*/
	3330	STATIC I32
	3331	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	3332	{
	3333	I32 n;
	3334	RXi_GET_DECL(rex,rexi);
	3335	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3336	I32 nums=(I32)SvPVX(sv_dat);
	3337
	3338	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3339
	3340	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3341	if ((I32)rex->lastparen >= nums[n] &&
	3342	rex->offs[nums[n]].end != -1)
	3343	{
	3344	return nums[n];
	3345	}
	3346	}
	3347	return 0;
	3348	}
	3349
	3350
	3351	static bool
	3352	S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
	3353	U8* c1_utf8, int c2p, U8 c2_utf8, regmatch_info *reginfo)
	3354	{
	3355	/* This function determines if there are one or two characters that match
	3356	* the first character of the passed-in EXACTish node <text_node>, and if
	3357	* so, returns them in the passed-in pointers.
	3358	*
	3359	* If it determines that no possible character in the target string can
	3360	* match, it returns FALSE; otherwise TRUE. (The FALSE situation occurs if
	3361	* the first character in <text_node> requires UTF-8 to represent, and the
	3362	* target string isn't in UTF-8.)
	3363	*
	3364	* If there are more than two characters that could match the beginning of
	3365	* <text_node>, or if more context is required to determine a match or not,
	3366	* it sets both <c1p> and <c2p> to CHRTEST_VOID.
	3367	*
	3368	* The motiviation behind this function is to allow the caller to set up
	3369	* tight loops for matching. If <text_node> is of type EXACT, there is
	3370	* only one possible character that can match its first character, and so
	3371	* the situation is quite simple. But things get much more complicated if
	3372	* folding is involved. It may be that the first character of an EXACTFish
	3373	* node doesn't participate in any possible fold, e.g., punctuation, so it
	3374	* can be matched only by itself. The vast majority of characters that are
	3375	* in folds match just two things, their lower and upper-case equivalents.
	3376	* But not all are like that; some have multiple possible matches, or match
	3377	* sequences of more than one character. This function sorts all that out.
	3378	*
	3379	* Consider the patterns AB or A?B where A and B are arbitrary. In a
	3380	* loop of trying to match A*, we know we can't exit where the thing
	3381	* following it isn't a B. And something can't be a B unless it is the
	3382	* beginning of B. By putting a quick test for that beginning in a tight
	3383	* loop, we can rule out things that can't possibly be B without having to
	3384	* break out of the loop, thus avoiding work. Similarly, if A is a single
	3385	* character, we can make a tight loop matching A*, using the outputs of
	3386	* this function.
	3387	*
	3388	* If the target string to match isn't in UTF-8, and there aren't
	3389	* complications which require CHRTEST_VOID, <c1p> and <c2p> are set to
	3390	* the one or two possible octets (which are characters in this situation)
	3391	* that can match. In all cases, if there is only one character that can
	3392	* match, <c1p> and <c2p> will be identical.
	3393	*
	3394	* If the target string is in UTF-8, the buffers pointed to by <c1_utf8>
	3395	* and <c2_utf8> will contain the one or two UTF-8 sequences of bytes that
	3396	* can match the beginning of <text_node>. They should be declared with at
	3397	* least length UTF8_MAXBYTES+1. (If the target string isn't in UTF-8, it is
	3398	* undefined what these contain.) If one or both of the buffers are
	3399	* invariant under UTF-8, <c1p>, and <c2p> will also be set to the
	3400	* corresponding invariant. If variant, the corresponding *<c1p> and/or
	3401	* *<c2p> will be set to a negative number(s) that shouldn't match any code
	3402	* point (unless inappropriately coerced to unsigned). *<c1p> will equal
	3403	* <c2p> if and only if <c1_utf8> and <c2_utf8> are the same. /
	3404
	3405	const bool utf8_target = reginfo->is_utf8_target;
	3406
	3407	UV c1 = CHRTEST_NOT_A_CP_1;
	3408	UV c2 = CHRTEST_NOT_A_CP_2;
	3409	bool use_chrtest_void = FALSE;
	3410	const bool is_utf8_pat = reginfo->is_utf8_pat;
	3411
	3412	/* Used when we have both utf8 input and utf8 output, to avoid converting
	3413	* to/from code points */
	3414	bool utf8_has_been_setup = FALSE;
	3415
	3416	dVAR;
	3417
	3418	U8 pat = (U8)STRING(text_node);
	3419
	3420	if (OP(text_node) == EXACT) {
	3421
	3422	/* In an exact node, only one thing can be matched, that first
	3423	* character. If both the pat and the target are UTF-8, we can just
	3424	* copy the input to the output, avoiding finding the code point of
	3425	* that character */
	3426	if (!is_utf8_pat) {
	3427	c2 = c1 = *pat;
	3428	}
	3429	else if (utf8_target) {
	3430	Copy(pat, c1_utf8, UTF8SKIP(pat), U8);
	3431	Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
	3432	utf8_has_been_setup = TRUE;
	3433	}
	3434	else {
	3435	c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
	3436	}
	3437	}
	3438	else /* an EXACTFish node */
	3439	if ((is_utf8_pat
	3440	&& is_MULTI_CHAR_FOLD_utf8_safe(pat,
	3441	pat + STR_LEN(text_node)))
	3442	\|\| (!is_utf8_pat
	3443	&& is_MULTI_CHAR_FOLD_latin1_safe(pat,
	3444	pat + STR_LEN(text_node))))
	3445	{
	3446	/* Multi-character folds require more context to sort out. Also
	3447	* PL_utf8_foldclosures used below doesn't handle them, so have to be
	3448	* handled outside this routine */
	3449	use_chrtest_void = TRUE;
	3450	}
	3451	else { /* an EXACTFish node which doesn't begin with a multi-char fold */
	3452	c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
	3453	if (c1 > 256) {
	3454	/* Load the folds hash, if not already done */
	3455	SV** listp;
	3456	if (! PL_utf8_foldclosures) {
	3457	if (! PL_utf8_tofold) {
	3458	U8 dummy[UTF8_MAXBYTES_CASE+1];
	3459
	3460	/* Force loading this by folding an above-Latin1 char */
	3461	to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
	3462	assert(PL_utf8_tofold); /* Verify that worked */
	3463	}
	3464	PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
	3465	}
	3466
	3467	/* The fold closures data structure is a hash with the keys being
	3468	* the UTF-8 of every character that is folded to, like 'k', and
	3469	* the values each an array of all code points that fold to its
	3470	* key. e.g. [ 'k', 'K', KELVIN_SIGN ]. Multi-character folds are
	3471	* not included */
	3472	if ((! (listp = hv_fetch(PL_utf8_foldclosures,
	3473	(char *) pat,
	3474	UTF8SKIP(pat),
	3475	FALSE))))
	3476	{
	3477	/* Not found in the hash, therefore there are no folds
	3478	* containing it, so there is only a single character that
	3479	* could match */
	3480	c2 = c1;
	3481	}
	3482	else { /* Does participate in folds */
	3483	AV* list = (AV) listp;
	3484	if (av_len(list) != 1) {
	3485
	3486	/* If there aren't exactly two folds to this, it is outside
	3487	* the scope of this function */
	3488	use_chrtest_void = TRUE;
	3489	}
	3490	else { /* There are two. Get them */
	3491	SV** c_p = av_fetch(list, 0, FALSE);
	3492	if (c_p == NULL) {
	3493	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3494	}
	3495	c1 = SvUV(*c_p);
	3496
	3497	c_p = av_fetch(list, 1, FALSE);
	3498	if (c_p == NULL) {
	3499	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	3500	}
	3501	c2 = SvUV(*c_p);
	3502
	3503	/* Folds that cross the 255/256 boundary are forbidden if
	3504	* EXACTFL, or EXACTFA and one is ASCIII. Since the
	3505	* pattern character is above 256, and its only other match
	3506	* is below 256, the only legal match will be to itself.
	3507	* We have thrown away the original, so have to compute
	3508	* which is the one above 255 */
	3509	if ((c1 < 256) != (c2 < 256)) {
	3510	if (OP(text_node) == EXACTFL
	3511	\|\| ((OP(text_node) == EXACTFA
	3512	\|\| OP(text_node) == EXACTFA_NO_TRIE)
	3513	&& (isASCII(c1) \|\| isASCII(c2))))
	3514	{
	3515	if (c1 < 256) {
	3516	c1 = c2;
	3517	}
	3518	else {
	3519	c2 = c1;
	3520	}
	3521	}
	3522	}
	3523	}
	3524	}
	3525	}
	3526	else /* Here, c1 is < 255 */
	3527	if (utf8_target
	3528	&& HAS_NONLATIN1_FOLD_CLOSURE(c1)
	3529	&& OP(text_node) != EXACTFL
	3530	&& ((OP(text_node) != EXACTFA
	3531	&& OP(text_node) != EXACTFA_NO_TRIE)
	3532	\|\| ! isASCII(c1)))
	3533	{
	3534	/* Here, there could be something above Latin1 in the target which
	3535	* folds to this character in the pattern. All such cases except
	3536	* LATIN SMALL LETTER Y WITH DIAERESIS have more than two characters
	3537	* involved in their folds, so are outside the scope of this
	3538	* function */
	3539	if (UNLIKELY(c1 == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
	3540	c2 = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
	3541	}
	3542	else {
	3543	use_chrtest_void = TRUE;
	3544	}
	3545	}
	3546	else { /* Here nothing above Latin1 can fold to the pattern character */
	3547	switch (OP(text_node)) {
	3548
	3549	case EXACTFL: /* /l rules */
	3550	c2 = PL_fold_locale[c1];
	3551	break;
	3552
	3553	case EXACTF: /* This node only generated for non-utf8
	3554	patterns */
	3555	assert(! is_utf8_pat);
	3556	if (! utf8_target) { /* /d rules */
	3557	c2 = PL_fold[c1];
	3558	break;
	3559	}
	3560	/* FALLTHROUGH */
	3561	/* /u rules for all these. This happens to work for
	3562	* EXACTFA as nothing in Latin1 folds to ASCII */
	3563	case EXACTFA_NO_TRIE: /* This node only generated for
	3564	non-utf8 patterns */
	3565	assert(! is_utf8_pat);
	3566	/* FALL THROUGH */
	3567	case EXACTFA:
	3568	case EXACTFU_SS:
	3569	case EXACTFU:
	3570	c2 = PL_fold_latin1[c1];
	3571	break;
	3572
	3573	default:
	3574	Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
	3575	assert(0); /* NOTREACHED */
	3576	}
	3577	}
	3578	}
	3579
	3580	/* Here have figured things out. Set up the returns */
	3581	if (use_chrtest_void) {
	3582	c2p = c1p = CHRTEST_VOID;
	3583	}
	3584	else if (utf8_target) {
	3585	if (! utf8_has_been_setup) { /* Don't have the utf8; must get it */
	3586	uvchr_to_utf8(c1_utf8, c1);
	3587	uvchr_to_utf8(c2_utf8, c2);
	3588	}
	3589
	3590	/* Invariants are stored in both the utf8 and byte outputs; Use
	3591	* negative numbers otherwise for the byte ones. Make sure that the
	3592	* byte ones are the same iff the utf8 ones are the same */
	3593	c1p = (UTF8_IS_INVARIANT(c1_utf8)) ? *c1_utf8 : CHRTEST_NOT_A_CP_1;
	3594	c2p = (UTF8_IS_INVARIANT(c2_utf8))
	3595	? *c2_utf8
	3596	: (c1 == c2)
	3597	? CHRTEST_NOT_A_CP_1
	3598	: CHRTEST_NOT_A_CP_2;
	3599	}
	3600	else if (c1 > 255) {
	3601	if (c2 > 255) { /* both possibilities are above what a non-utf8 string
	3602	can represent */
	3603	return FALSE;
	3604	}
	3605
	3606	c1p = c2p = c2; /* c2 is the only representable value */
	3607	}
	3608	else { /* c1 is representable; see about c2 */
	3609	*c1p = c1;
	3610	*c2p = (c2 < 256) ? c2 : c1;
	3611	}
	3612
	3613	return TRUE;
	3614	}
	3615
	3616	/* returns -1 on failure, $+[0] on success */
	3617	STATIC SSize_t
	3618	S_regmatch(pTHX_ regmatch_info reginfo, char startpos, regnode *prog)
	3619	{
	3620	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3621	dMY_CXT;
	3622	#endif
	3623	dVAR;
	3624	const bool utf8_target = reginfo->is_utf8_target;
	3625	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3626	REGEXP *rex_sv = reginfo->prog;
	3627	regexp *rex = ReANY(rex_sv);
	3628	RXi_GET_DECL(rex,rexi);
	3629	/* the current state. This is a cached copy of PL_regmatch_state */
	3630	regmatch_state *st;
	3631	/* cache heavy used fields of st in registers */
	3632	regnode *scan;
	3633	regnode *next;
	3634	U32 n = 0; /* general value; init to avoid compiler warning */
	3635	SSize_t ln = 0; /* len or last; init to avoid compiler warning */
	3636	char *locinput = startpos;
	3637	char pushinput; / where to continue after a PUSH */
	3638	I32 nextchr; /* is always set to UCHARAT(locinput) */
	3639
	3640	bool result = 0; /* return value of S_regmatch */
	3641	int depth = 0; /* depth of backtrack stack */
	3642	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3643	const U32 max_nochange_depth =
	3644	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3645	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3646	regmatch_state yes_state = NULL; / state to pop to on success of
	3647	subpattern */
	3648	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3649	the stack on success we can update the mark_state as we go */
	3650	regmatch_state mark_state = NULL; / last mark state we have seen */
	3651	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3652	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3653	U32 state_num;
	3654	bool no_final = 0; /* prevent failure from backtracking? */
	3655	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3656	char *startpoint = locinput;
	3657	SV popmark = NULL; / are we looking for a mark? */
	3658	SV sv_commit = NULL; / last mark name seen in failure */
	3659	SV sv_yes_mark = NULL; / last mark name we have seen
	3660	during a successful match */
	3661	U32 lastopen = 0; /* last open we saw */
	3662	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3663	SV* const oreplsv = GvSVn(PL_replgv);
	3664	/* these three flags are set by various ops to signal information to
	3665	* the very next op. They have a useful lifetime of exactly one loop
	3666	* iteration, and are not preserved or restored by state pushes/pops
	3667	*/
	3668	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3669	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3670	int logical = 0; /* the following EVAL is:
	3671	0: (?{...})
	3672	1: (?(?{...})X\|Y)
	3673	2: (??{...})
	3674	or the following IFMATCH/UNLESSM is:
	3675	false: plain (?=foo)
	3676	true: used as a condition: (?(?=foo))
	3677	*/
	3678	PAD* last_pad = NULL;
	3679	dMULTICALL;
	3680	I32 gimme = G_SCALAR;
	3681	CV caller_cv = NULL; / who called us */
	3682	CV last_pushed_cv = NULL; / most recently called (?{}) CV */
	3683	CHECKPOINT runops_cp; /* savestack position before executing EVAL */
	3684	U32 maxopenparen = 0; /* max '(' index seen so far */
	3685	int to_complement; /* Invert the result? */
	3686	_char_class_number classnum;
	3687	bool is_utf8_pat = reginfo->is_utf8_pat;
	3688
	3689	#ifdef DEBUGGING
	3690	GET_RE_DEBUG_FLAGS_DECL;
	3691	#endif
	3692
	3693	/* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */
	3694	multicall_oldcatch = 0;
	3695	multicall_cv = NULL;
	3696	cx = NULL;
	3697	PERL_UNUSED_VAR(multicall_cop);
	3698	PERL_UNUSED_VAR(newsp);
	3699
	3700
	3701	PERL_ARGS_ASSERT_REGMATCH;
	3702
	3703	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3704	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3705	}));
	3706
	3707	st = PL_regmatch_state;
	3708
	3709	/* Note that nextchr is a byte even in UTF */
	3710	SET_nextchr;
	3711	scan = prog;
	3712	while (scan != NULL) {
	3713
	3714	DEBUG_EXECUTE_r( {
	3715	SV * const prop = sv_newmortal();
	3716	regnode *rnext=regnext(scan);
	3717	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3718	regprop(rex, prop, scan);
	3719
	3720	PerlIO_printf(Perl_debug_log,
	3721	"%3"IVdf":%*s%s(%"IVdf")\n",
	3722	(IV)(scan - rexi->program), depth*2, "",
	3723	SvPVX_const(prop),
	3724	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3725	0 : (IV)(rnext - rexi->program));
	3726	});
	3727
	3728	next = scan + NEXT_OFF(scan);
	3729	if (next == scan)
	3730	next = NULL;
	3731	state_num = OP(scan);
	3732
	3733	reenter_switch:
	3734	to_complement = 0;
	3735
	3736	SET_nextchr;
	3737	assert(nextchr < 256 && (nextchr >= 0 \|\| nextchr == NEXTCHR_EOS));
	3738
	3739	switch (state_num) {
	3740	case BOL: /* /^../ */
	3741	if (locinput == reginfo->strbeg)
	3742	break;
	3743	sayNO;
	3744
	3745	case MBOL: /* /^../m */
	3746	if (locinput == reginfo->strbeg \|\|
	3747	(!NEXTCHR_IS_EOS && locinput[-1] == '\n'))
	3748	{
	3749	break;
	3750	}
	3751	sayNO;
	3752
	3753	case SBOL: /* /^../s */
	3754	if (locinput == reginfo->strbeg)
	3755	break;
	3756	sayNO;
	3757
	3758	case GPOS: /* \G */
	3759	if (locinput == reginfo->ganch)
	3760	break;
	3761	sayNO;
	3762
	3763	case KEEPS: /* \K */
	3764	/* update the startpoint */
	3765	st->u.keeper.val = rex->offs[0].start;
	3766	rex->offs[0].start = locinput - reginfo->strbeg;
	3767	PUSH_STATE_GOTO(KEEPS_next, next, locinput);
	3768	assert(0); /NOTREACHED/
	3769	case KEEPS_next_fail:
	3770	/* rollback the start point change */
	3771	rex->offs[0].start = st->u.keeper.val;
	3772	sayNO_SILENT;
	3773	assert(0); /NOTREACHED/
	3774
	3775	case MEOL: /* /..$/m */
	3776	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3777	sayNO;
	3778	break;
	3779
	3780	case EOL: /* /..$/ */
	3781	/* FALL THROUGH */
	3782	case SEOL: /* /..$/s */
	3783	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	3784	sayNO;
	3785	if (reginfo->strend - locinput > 1)
	3786	sayNO;
	3787	break;
	3788
	3789	case EOS: /* \z */
	3790	if (!NEXTCHR_IS_EOS)
	3791	sayNO;
	3792	break;
	3793
	3794	case SANY: /* /./s */
	3795	if (NEXTCHR_IS_EOS)
	3796	sayNO;
	3797	goto increment_locinput;
	3798
	3799	case CANY: /* \C */
	3800	if (NEXTCHR_IS_EOS)
	3801	sayNO;
	3802	locinput++;
	3803	break;
	3804
	3805	case REG_ANY: /* /./ */
	3806	if ((NEXTCHR_IS_EOS) \|\| nextchr == '\n')
	3807	sayNO;
	3808	goto increment_locinput;
	3809
	3810
	3811	#undef ST
	3812	#define ST st->u.trie
	3813	case TRIEC: /* (ab\|cd) with known charclass */
	3814	/* In this case the charclass data is available inline so
	3815	we can fail fast without a lot of extra overhead.
	3816	*/
	3817	if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
	3818	DEBUG_EXECUTE_r(
	3819	PerlIO_printf(Perl_debug_log,
	3820	"%*s %sfailed to match trie start class...%s\n",
	3821	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3822	);
	3823	sayNO_SILENT;
	3824	assert(0); /* NOTREACHED */
	3825	}
	3826	/* FALL THROUGH */
	3827	case TRIE: /* (ab\|cd) */
	3828	/* the basic plan of execution of the trie is:
	3829	* At the beginning, run though all the states, and
	3830	* find the longest-matching word. Also remember the position
	3831	* of the shortest matching word. For example, this pattern:
	3832	* 1 2 3 4 5
	3833	* ab\|a\|x\|abcd\|abc
	3834	* when matched against the string "abcde", will generate
	3835	* accept states for all words except 3, with the longest
	3836	* matching word being 4, and the shortest being 2 (with
	3837	* the position being after char 1 of the string).
	3838	*
	3839	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3840	* we run the remainder of the pattern; on each try setting
	3841	* the current position to the character following the word,
	3842	* returning to try the next word on failure.
	3843	*
	3844	* We avoid having to build a list of words at runtime by
	3845	* using a compile-time structure, wordinfo[].prev, which
	3846	* gives, for each word, the previous accepting word (if any).
	3847	* In the case above it would contain the mappings 1->2, 2->0,
	3848	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3849	* the longest word (4 above), a list of all words, by
	3850	* following the list of prev pointers; this gives us the
	3851	* unordered list 4,5,1,2. Then given the current word we have
	3852	* just tried, we can go through the list and find the
	3853	* next-biggest word to try (so if we just failed on word 2,
	3854	* the next in the list is 4).
	3855	*
	3856	* Since at runtime we don't record the matching position in
	3857	* the string for each word, we have to work that out for
	3858	* each word we're about to process. The wordinfo table holds
	3859	* the character length of each word; given that we recorded
	3860	* at the start: the position of the shortest word and its
	3861	* length in chars, we just need to move the pointer the
	3862	* difference between the two char lengths. Depending on
	3863	* Unicode status and folding, that's cheap or expensive.
	3864	*
	3865	* This algorithm is optimised for the case where are only a
	3866	* small number of accept states, i.e. 0,1, or maybe 2.
	3867	* With lots of accepts states, and having to try all of them,
	3868	* it becomes quadratic on number of accept states to find all
	3869	* the next words.
	3870	*/
	3871
	3872	{
	3873	/* what type of TRIE am I? (utf8 makes this contextual) */
	3874	DECL_TRIE_TYPE(scan);
	3875
	3876	/* what trie are we using right now */
	3877	reg_trie_data * const trie
	3878	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3879	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3880	U32 state = trie->startstate;
	3881
	3882	if ( trie->bitmap
	3883	&& (NEXTCHR_IS_EOS \|\| !TRIE_BITMAP_TEST(trie, nextchr)))
	3884	{
	3885	if (trie->states[ state ].wordnum) {
	3886	DEBUG_EXECUTE_r(
	3887	PerlIO_printf(Perl_debug_log,
	3888	"%*s %smatched empty string...%s\n",
	3889	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3890	);
	3891	if (!trie->jump)
	3892	break;
	3893	} else {
	3894	DEBUG_EXECUTE_r(
	3895	PerlIO_printf(Perl_debug_log,
	3896	"%*s %sfailed to match trie start class...%s\n",
	3897	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3898	);
	3899	sayNO_SILENT;
	3900	}
	3901	}
	3902
	3903	{
	3904	U8 uc = ( U8 )locinput;
	3905
	3906	STRLEN len = 0;
	3907	STRLEN foldlen = 0;
	3908	U8 uscan = (U8)NULL;
	3909	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3910	U32 charcount = 0; /* how many input chars we have matched */
	3911	U32 accepted = 0; /* have we seen any accepting states? */
	3912
	3913	ST.jump = trie->jump;
	3914	ST.me = scan;
	3915	ST.firstpos = NULL;
	3916	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3917	ST.nextword = 0;
	3918
	3919	/* fully traverse the TRIE; note the position of the
	3920	shortest accept state and the wordnum of the longest
	3921	accept state */
	3922
	3923	while ( state && uc <= (U8*)(reginfo->strend) ) {
	3924	U32 base = trie->states[ state ].trans.base;
	3925	UV uvc = 0;
	3926	U16 charid = 0;
	3927	U16 wordnum;
	3928	wordnum = trie->states[ state ].wordnum;
	3929
	3930	if (wordnum) { /* it's an accept state */
	3931	if (!accepted) {
	3932	accepted = 1;
	3933	/* record first match position */
	3934	if (ST.longfold) {
	3935	ST.firstpos = (U8*)locinput;
	3936	ST.firstchars = 0;
	3937	}
	3938	else {
	3939	ST.firstpos = uc;
	3940	ST.firstchars = charcount;
	3941	}
	3942	}
	3943	if (!ST.nextword \|\| wordnum < ST.nextword)
	3944	ST.nextword = wordnum;
	3945	ST.topword = wordnum;
	3946	}
	3947
	3948	DEBUG_TRIE_EXECUTE_r({
	3949	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3950	PerlIO_printf( Perl_debug_log,
	3951	"%*s %sState: %4"UVxf" Accepted: %c ",
	3952	2+depth * 2, "", PL_colors[4],
	3953	(UV)state, (accepted ? 'Y' : 'N'));
	3954	});
	3955
	3956	/* read a char and goto next state */
	3957	if ( base && (foldlen \|\| uc < (U8*)(reginfo->strend))) {
	3958	I32 offset;
	3959	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3960	uscan, len, uvc, charid, foldlen,
	3961	foldbuf, uniflags);
	3962	charcount++;
	3963	if (foldlen>0)
	3964	ST.longfold = TRUE;
	3965	if (charid &&
	3966	( ((offset =
	3967	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3968
	3969	&& ((U32)offset < trie->lasttrans)
	3970	&& trie->trans[offset].check == state)
	3971	{
	3972	state = trie->trans[offset].next;
	3973	}
	3974	else {
	3975	state = 0;
	3976	}
	3977	uc += len;
	3978
	3979	}
	3980	else {
	3981	state = 0;
	3982	}
	3983	DEBUG_TRIE_EXECUTE_r(
	3984	PerlIO_printf( Perl_debug_log,
	3985	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3986	charid, uvc, (UV)state, PL_colors[5] );
	3987	);
	3988	}
	3989	if (!accepted)
	3990	sayNO;
	3991
	3992	/* calculate total number of accept states */
	3993	{
	3994	U16 w = ST.topword;
	3995	accepted = 0;
	3996	while (w) {
	3997	w = trie->wordinfo[w].prev;
	3998	accepted++;
	3999	}
	4000	ST.accepted = accepted;
	4001	}
	4002
	4003	DEBUG_EXECUTE_r(
	4004	PerlIO_printf( Perl_debug_log,
	4005	"%*s %sgot %"IVdf" possible matches%s\n",
	4006	REPORT_CODE_OFF + depth * 2, "",
	4007	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	4008	);
	4009	goto trie_first_try; /* jump into the fail handler */
	4010	}}
	4011	assert(0); /* NOTREACHED */
	4012
	4013	case TRIE_next_fail: /* we failed - try next alternative */
	4014	{
	4015	U8 *uc;
	4016	if ( ST.jump) {
	4017	REGCP_UNWIND(ST.cp);
	4018	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	4019	}
	4020	if (!--ST.accepted) {
	4021	DEBUG_EXECUTE_r({
	4022	PerlIO_printf( Perl_debug_log,
	4023	"%*s %sTRIE failed...%s\n",
	4024	REPORT_CODE_OFF+depth*2, "",
	4025	PL_colors[4],
	4026	PL_colors[5] );
	4027	});
	4028	sayNO_SILENT;
	4029	}
	4030	{
	4031	/* Find next-highest word to process. Note that this code
	4032	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	4033	U16 min = 0;
	4034	U16 word;
	4035	U16 const nextword = ST.nextword;
	4036	reg_trie_wordinfo * const wordinfo
	4037	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	4038	for (word=ST.topword; word; word=wordinfo[word].prev) {
	4039	if (word > nextword && (!min \|\| word < min))
	4040	min = word;
	4041	}
	4042	ST.nextword = min;
	4043	}
	4044
	4045	trie_first_try:
	4046	if (do_cutgroup) {
	4047	do_cutgroup = 0;
	4048	no_final = 0;
	4049	}
	4050
	4051	if ( ST.jump) {
	4052	ST.lastparen = rex->lastparen;
	4053	ST.lastcloseparen = rex->lastcloseparen;
	4054	REGCP_SET(ST.cp);
	4055	}
	4056
	4057	/* find start char of end of current word */
	4058	{
	4059	U32 chars; /* how many chars to skip */
	4060	reg_trie_data * const trie
	4061	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	4062
	4063	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	4064	>= ST.firstchars);
	4065	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	4066	- ST.firstchars;
	4067	uc = ST.firstpos;
	4068
	4069	if (ST.longfold) {
	4070	/* the hard option - fold each char in turn and find
	4071	* its folded length (which may be different */
	4072	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	4073	STRLEN foldlen;
	4074	STRLEN len;
	4075	UV uvc;
	4076	U8 *uscan;
	4077
	4078	while (chars) {
	4079	if (utf8_target) {
	4080	uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
	4081	uniflags);
	4082	uc += len;
	4083	}
	4084	else {
	4085	uvc = *uc;
	4086	uc++;
	4087	}
	4088	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	4089	uscan = foldbuf;
	4090	while (foldlen) {
	4091	if (!--chars)
	4092	break;
	4093	uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
	4094	uniflags);
	4095	uscan += len;
	4096	foldlen -= len;
	4097	}
	4098	}
	4099	}
	4100	else {
	4101	if (utf8_target)
	4102	while (chars--)
	4103	uc += UTF8SKIP(uc);
	4104	else
	4105	uc += chars;
	4106	}
	4107	}
	4108
	4109	scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
	4110	? ST.jump[ST.nextword]
	4111	: NEXT_OFF(ST.me));
	4112
	4113	DEBUG_EXECUTE_r({
	4114	PerlIO_printf( Perl_debug_log,
	4115	"%*s %sTRIE matched word #%d, continuing%s\n",
	4116	REPORT_CODE_OFF+depth*2, "",
	4117	PL_colors[4],
	4118	ST.nextword,
	4119	PL_colors[5]
	4120	);
	4121	});
	4122
	4123	if (ST.accepted > 1 \|\| has_cutgroup) {
	4124	PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
	4125	assert(0); /* NOTREACHED */
	4126	}
	4127	/* only one choice left - just continue */
	4128	DEBUG_EXECUTE_r({
	4129	AV *const trie_words
	4130	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	4131	SV ** const tmp = av_fetch( trie_words,
	4132	ST.nextword-1, 0 );
	4133	SV *sv= tmp ? sv_newmortal() : NULL;
	4134
	4135	PerlIO_printf( Perl_debug_log,
	4136	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	4137	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	4138	ST.nextword,
	4139	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	4140	PL_colors[0], PL_colors[1],
	4141	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	4142	)
	4143	: "not compiled under -Dr",
	4144	PL_colors[5] );
	4145	});
	4146
	4147	locinput = (char*)uc;
	4148	continue; /* execute rest of RE */
	4149	assert(0); /* NOTREACHED */
	4150	}
	4151	#undef ST
	4152
	4153	case EXACT: { /* /abc/ */
	4154	char *s = STRING(scan);
	4155	ln = STR_LEN(scan);
	4156	if (utf8_target != is_utf8_pat) {
	4157	/* The target and the pattern have differing utf8ness. */
	4158	char *l = locinput;
	4159	const char * const e = s + ln;
	4160
	4161	if (utf8_target) {
	4162	/* The target is utf8, the pattern is not utf8.
	4163	* Above-Latin1 code points can't match the pattern;
	4164	* invariants match exactly, and the other Latin1 ones need
	4165	* to be downgraded to a single byte in order to do the
	4166	* comparison. (If we could be confident that the target
	4167	* is not malformed, this could be refactored to have fewer
	4168	* tests by just assuming that if the first bytes match, it
	4169	* is an invariant, but there are tests in the test suite
	4170	* dealing with (??{...}) which violate this) */
	4171	while (s < e) {
	4172	if (l >= reginfo->strend
	4173	\|\| UTF8_IS_ABOVE_LATIN1(* (U8*) l))
	4174	{
	4175	sayNO;
	4176	}
	4177	if (UTF8_IS_INVARIANT((U8)l)) {
	4178	if (l != s) {
	4179	sayNO;
	4180	}
	4181	l++;
	4182	}
	4183	else {
	4184	if (TWO_BYTE_UTF8_TO_NATIVE(l, (l+1)) != * (U8*) s)
	4185	{
	4186	sayNO;
	4187	}
	4188	l += 2;
	4189	}
	4190	s++;
	4191	}
	4192	}
	4193	else {
	4194	/* The target is not utf8, the pattern is utf8. */
	4195	while (s < e) {
	4196	if (l >= reginfo->strend
	4197	\|\| UTF8_IS_ABOVE_LATIN1(* (U8*) s))
	4198	{
	4199	sayNO;
	4200	}
	4201	if (UTF8_IS_INVARIANT((U8)s)) {
	4202	if (s != l) {
	4203	sayNO;
	4204	}
	4205	s++;
	4206	}
	4207	else {
	4208	if (TWO_BYTE_UTF8_TO_NATIVE(s, (s+1)) != * (U8*) l)
	4209	{
	4210	sayNO;
	4211	}
	4212	s += 2;
	4213	}
	4214	l++;
	4215	}
	4216	}
	4217	locinput = l;
	4218	}
	4219	else {
	4220	/* The target and the pattern have the same utf8ness. */
	4221	/* Inline the first character, for speed. */
	4222	if (reginfo->strend - locinput < ln
	4223	\|\| UCHARAT(s) != nextchr
	4224	\|\| (ln > 1 && memNE(s, locinput, ln)))
	4225	{
	4226	sayNO;
	4227	}
	4228	locinput += ln;
	4229	}
	4230	break;
	4231	}
	4232
	4233	case EXACTFL: { /* /abc/il */
	4234	re_fold_t folder;
	4235	const U8 * fold_array;
	4236	const char * s;
	4237	U32 fold_utf8_flags;
	4238
	4239	RX_MATCH_TAINTED_on(reginfo->prog);
	4240	folder = foldEQ_locale;
	4241	fold_array = PL_fold_locale;
	4242	fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
	4243	goto do_exactf;
	4244
	4245	case EXACTFU_SS: /* /\x{df}/iu */
	4246	case EXACTFU: /* /abc/iu */
	4247	folder = foldEQ_latin1;
	4248	fold_array = PL_fold_latin1;
	4249	fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	4250	goto do_exactf;
	4251
	4252	case EXACTFA_NO_TRIE: /* This node only generated for non-utf8
	4253	patterns */
	4254	assert(! is_utf8_pat);
	4255	/* FALL THROUGH */
	4256	case EXACTFA: /* /abc/iaa */
	4257	folder = foldEQ_latin1;
	4258	fold_array = PL_fold_latin1;
	4259	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4260	goto do_exactf;
	4261
	4262	case EXACTF: /* /abc/i This node only generated for
	4263	non-utf8 patterns */
	4264	assert(! is_utf8_pat);
	4265	folder = foldEQ;
	4266	fold_array = PL_fold;
	4267	fold_utf8_flags = 0;
	4268
	4269	do_exactf:
	4270	s = STRING(scan);
	4271	ln = STR_LEN(scan);
	4272
	4273	if (utf8_target \|\| is_utf8_pat \|\| state_num == EXACTFU_SS) {
	4274	/* Either target or the pattern are utf8, or has the issue where
	4275	* the fold lengths may differ. */
	4276	const char * const l = locinput;
	4277	char *e = reginfo->strend;
	4278
	4279	if (! foldEQ_utf8_flags(s, 0, ln, is_utf8_pat,
	4280	l, &e, 0, utf8_target, fold_utf8_flags))
	4281	{
	4282	sayNO;
	4283	}
	4284	locinput = e;
	4285	break;
	4286	}
	4287
	4288	/* Neither the target nor the pattern are utf8 */
	4289	if (UCHARAT(s) != nextchr
	4290	&& !NEXTCHR_IS_EOS
	4291	&& UCHARAT(s) != fold_array[nextchr])
	4292	{
	4293	sayNO;
	4294	}
	4295	if (reginfo->strend - locinput < ln)
	4296	sayNO;
	4297	if (ln > 1 && ! folder(s, locinput, ln))
	4298	sayNO;
	4299	locinput += ln;
	4300	break;
	4301	}
	4302
	4303	/* XXX Could improve efficiency by separating these all out using a
	4304	* macro or in-line function. At that point regcomp.c would no longer
	4305	* have to set the FLAGS fields of these */
	4306	case BOUNDL: /* /\b/l */
	4307	case NBOUNDL: /* /\B/l */
	4308	RX_MATCH_TAINTED_on(reginfo->prog);
	4309	/* FALL THROUGH */
	4310	case BOUND: /* /\b/ */
	4311	case BOUNDU: /* /\b/u */
	4312	case BOUNDA: /* /\b/a */
	4313	case NBOUND: /* /\B/ */
	4314	case NBOUNDU: /* /\B/u */
	4315	case NBOUNDA: /* /\B/a */
	4316	/* was last char in word? */
	4317	if (utf8_target
	4318	&& FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
	4319	&& FLAGS(scan) != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
	4320	{
	4321	if (locinput == reginfo->strbeg)
	4322	ln = '\n';
	4323	else {
	4324	const U8 * const r =
	4325	reghop3((U8)locinput, -1, (U8)(reginfo->strbeg));
	4326
	4327	ln = utf8n_to_uvchr(r, (U8*) reginfo->strend - r,
	4328	0, uniflags);
	4329	}
	4330	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	4331	ln = isWORDCHAR_uni(ln);
	4332	if (NEXTCHR_IS_EOS)
	4333	n = 0;
	4334	else {
	4335	LOAD_UTF8_CHARCLASS_ALNUM();
	4336	n = swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)locinput,
	4337	utf8_target);
	4338	}
	4339	}
	4340	else {
	4341	ln = isWORDCHAR_LC_uvchr(ln);
	4342	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC_utf8((U8*)locinput);
	4343	}
	4344	}
	4345	else {
	4346
	4347	/* Here the string isn't utf8, or is utf8 and only ascii
	4348	* characters are to match \w. In the latter case looking at
	4349	* the byte just prior to the current one may be just the final
	4350	* byte of a multi-byte character. This is ok. There are two
	4351	* cases:
	4352	* 1) it is a single byte character, and then the test is doing
	4353	* just what it's supposed to.
	4354	* 2) it is a multi-byte character, in which case the final
	4355	* byte is never mistakable for ASCII, and so the test
	4356	* will say it is not a word character, which is the
	4357	* correct answer. */
	4358	ln = (locinput != reginfo->strbeg) ?
	4359	UCHARAT(locinput - 1) : '\n';
	4360	switch (FLAGS(scan)) {
	4361	case REGEX_UNICODE_CHARSET:
	4362	ln = isWORDCHAR_L1(ln);
	4363	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
	4364	break;
	4365	case REGEX_LOCALE_CHARSET:
	4366	ln = isWORDCHAR_LC(ln);
	4367	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC(nextchr);
	4368	break;
	4369	case REGEX_DEPENDS_CHARSET:
	4370	ln = isWORDCHAR(ln);
	4371	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR(nextchr);
	4372	break;
	4373	case REGEX_ASCII_RESTRICTED_CHARSET:
	4374	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	4375	ln = isWORDCHAR_A(ln);
	4376	n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_A(nextchr);
	4377	break;
	4378	default:
	4379	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	4380	break;
	4381	}
	4382	}
	4383	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	4384	* regcomp.sym */
	4385	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	4386	sayNO;
	4387	break;
	4388
	4389	case ANYOF: /* /[abc]/ */
	4390	if (NEXTCHR_IS_EOS)
	4391	sayNO;
	4392	if (utf8_target) {
	4393	if (!reginclass(rex, scan, (U8)locinput, (U8)reginfo->strend,
	4394	utf8_target))
	4395	sayNO;
	4396	locinput += UTF8SKIP(locinput);
	4397	}
	4398	else {
	4399	if (!REGINCLASS(rex, scan, (U8*)locinput))
	4400	sayNO;
	4401	locinput++;
	4402	}
	4403	break;
	4404
	4405	/* The argument (FLAGS) to all the POSIX node types is the class number
	4406	* */
	4407
	4408	case NPOSIXL: /* \W or [:^punct:] etc. under /l */
	4409	to_complement = 1;
	4410	/* FALLTHROUGH */
	4411
	4412	case POSIXL: /* \w or [:punct:] etc. under /l */
	4413	if (NEXTCHR_IS_EOS)
	4414	sayNO;
	4415
	4416	/* The locale hasn't influenced the outcome before this, so defer
	4417	* tainting until now */
	4418	RX_MATCH_TAINTED_on(reginfo->prog);
	4419
	4420	/* Use isFOO_lc() for characters within Latin1. (Note that
	4421	* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
	4422	* wouldn't be invariant) */
	4423	if (UTF8_IS_INVARIANT(nextchr) \|\| ! utf8_target) {
	4424	if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) {
	4425	sayNO;
	4426	}
	4427	}
	4428	else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
	4429	if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
	4430	(U8) TWO_BYTE_UTF8_TO_NATIVE(nextchr,
	4431	*(locinput + 1))))))
	4432	{
	4433	sayNO;
	4434	}
	4435	}
	4436	else { /* Here, must be an above Latin-1 code point */
	4437	goto utf8_posix_not_eos;
	4438	}
	4439
	4440	/* Here, must be utf8 */
	4441	locinput += UTF8SKIP(locinput);
	4442	break;
	4443
	4444	case NPOSIXD: /* \W or [:^punct:] etc. under /d */
	4445	to_complement = 1;
	4446	/* FALLTHROUGH */
	4447
	4448	case POSIXD: /* \w or [:punct:] etc. under /d */
	4449	if (utf8_target) {
	4450	goto utf8_posix;
	4451	}
	4452	goto posixa;
	4453
	4454	case NPOSIXA: /* \W or [:^punct:] etc. under /a */
	4455
	4456	if (NEXTCHR_IS_EOS) {
	4457	sayNO;
	4458	}
	4459
	4460	/* All UTF-8 variants match */
	4461	if (! UTF8_IS_INVARIANT(nextchr)) {
	4462	goto increment_locinput;
	4463	}
	4464
	4465	to_complement = 1;
	4466	/* FALLTHROUGH */
	4467
	4468	case POSIXA: /* \w or [:punct:] etc. under /a */
	4469
	4470	posixa:
	4471	/* We get here through POSIXD, NPOSIXD, and NPOSIXA when not in
	4472	* UTF-8, and also from NPOSIXA even in UTF-8 when the current
	4473	* character is a single byte */
	4474
	4475	if (NEXTCHR_IS_EOS
	4476	\|\| ! (to_complement ^ cBOOL(_generic_isCC_A(nextchr,
	4477	FLAGS(scan)))))
	4478	{
	4479	sayNO;
	4480	}
	4481
	4482	/* Here we are either not in utf8, or we matched a utf8-invariant,
	4483	* so the next char is the next byte */
	4484	locinput++;
	4485	break;
	4486
	4487	case NPOSIXU: /* \W or [:^punct:] etc. under /u */
	4488	to_complement = 1;
	4489	/* FALLTHROUGH */
	4490
	4491	case POSIXU: /* \w or [:punct:] etc. under /u */
	4492	utf8_posix:
	4493	if (NEXTCHR_IS_EOS) {
	4494	sayNO;
	4495	}
	4496	utf8_posix_not_eos:
	4497
	4498	/* Use _generic_isCC() for characters within Latin1. (Note that
	4499	* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
	4500	* wouldn't be invariant) */
	4501	if (UTF8_IS_INVARIANT(nextchr) \|\| ! utf8_target) {
	4502	if (! (to_complement ^ cBOOL(_generic_isCC(nextchr,
	4503	FLAGS(scan)))))
	4504	{
	4505	sayNO;
	4506	}
	4507	locinput++;
	4508	}
	4509	else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
	4510	if (! (to_complement
	4511	^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(nextchr,
	4512	*(locinput + 1)),
	4513	FLAGS(scan)))))
	4514	{
	4515	sayNO;
	4516	}
	4517	locinput += 2;
	4518	}
	4519	else { /* Handle above Latin-1 code points */
	4520	classnum = (_char_class_number) FLAGS(scan);
	4521	if (classnum < _FIRST_NON_SWASH_CC) {
	4522
	4523	/* Here, uses a swash to find such code points. Load if if
	4524	* not done already */
	4525	if (! PL_utf8_swash_ptrs[classnum]) {
	4526	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	4527	PL_utf8_swash_ptrs[classnum]
	4528	= _core_swash_init("utf8",
	4529	swash_property_names[classnum],
	4530	&PL_sv_undef, 1, 0, NULL, &flags);
	4531	}
	4532	if (! (to_complement
	4533	^ cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum],
	4534	(U8 *) locinput, TRUE))))
	4535	{
	4536	sayNO;
	4537	}
	4538	}
	4539	else { /* Here, uses macros to find above Latin-1 code points */
	4540	switch (classnum) {
	4541	case _CC_ENUM_SPACE: /* XXX would require separate
	4542	code if we revert the change
	4543	of \v matching this */
	4544	case _CC_ENUM_PSXSPC:
	4545	if (! (to_complement
	4546	^ cBOOL(is_XPERLSPACE_high(locinput))))
	4547	{
	4548	sayNO;
	4549	}
	4550	break;
	4551	case _CC_ENUM_BLANK:
	4552	if (! (to_complement
	4553	^ cBOOL(is_HORIZWS_high(locinput))))
	4554	{
	4555	sayNO;
	4556	}
	4557	break;
	4558	case _CC_ENUM_XDIGIT:
	4559	if (! (to_complement
	4560	^ cBOOL(is_XDIGIT_high(locinput))))
	4561	{
	4562	sayNO;
	4563	}
	4564	break;
	4565	case _CC_ENUM_VERTSPACE:
	4566	if (! (to_complement
	4567	^ cBOOL(is_VERTWS_high(locinput))))
	4568	{
	4569	sayNO;
	4570	}
	4571	break;
	4572	default: /* The rest, e.g. [:cntrl:], can't match
	4573	above Latin1 */
	4574	if (! to_complement) {
	4575	sayNO;
	4576	}
	4577	break;
	4578	}
	4579	}
	4580	locinput += UTF8SKIP(locinput);
	4581	}
	4582	break;
	4583
	4584	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	4585	a Unicode extended Grapheme Cluster */
	4586	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	4587	extended Grapheme Cluster is:
	4588
	4589	CR LF
	4590	\| Prepend* Begin Extend*
	4591	\| .
	4592
	4593	Begin is: ( Special_Begin \| ! Control )
	4594	Special_Begin is: ( Regional-Indicator+ \| Hangul-syllable )
	4595	Extend is: ( Grapheme_Extend \| Spacing_Mark )
	4596	Control is: [ GCB_Control \| CR \| LF ]
	4597	Hangul-syllable is: ( T+ \| ( L* ( L \| ( LVT \| ( V \| LV ) V* ) T* ) ))
	4598
	4599	If we create a 'Regular_Begin' = Begin - Special_Begin, then
	4600	we can rewrite
	4601
	4602	Begin is ( Regular_Begin + Special Begin )
	4603
	4604	It turns out that 98.4% of all Unicode code points match
	4605	Regular_Begin. Doing it this way eliminates a table match in
	4606	the previous implementation for almost all Unicode code points.
	4607
	4608	There is a subtlety with Prepend* which showed up in testing.
	4609	Note that the Begin, and only the Begin is required in:
	4610	\| Prepend* Begin Extend*
	4611	Also, Begin contains '! Control'. A Prepend must be a
	4612	'! Control', which means it must also be a Begin. What it
	4613	comes down to is that if we match Prepend* and then find no
	4614	suitable Begin afterwards, that if we backtrack the last
	4615	Prepend, that one will be a suitable Begin.
	4616	*/
	4617
	4618	if (NEXTCHR_IS_EOS)
	4619	sayNO;
	4620	if (! utf8_target) {
	4621
	4622	/* Match either CR LF or '.', as all the other possibilities
	4623	* require utf8 */
	4624	locinput++; /* Match the . or CR */
	4625	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	4626	match the LF */
	4627	&& locinput < reginfo->strend
	4628	&& UCHARAT(locinput) == '\n')
	4629	{
	4630	locinput++;
	4631	}
	4632	}
	4633	else {
	4634
	4635	/* Utf8: See if is ( CR LF ); already know that locinput <
	4636	* reginfo->strend, so locinput+1 is in bounds */
	4637	if ( nextchr == '\r' && locinput+1 < reginfo->strend
	4638	&& UCHARAT(locinput + 1) == '\n')
	4639	{
	4640	locinput += 2;
	4641	}
	4642	else {
	4643	STRLEN len;
	4644
	4645	/* In case have to backtrack to beginning, then match '.' */
	4646	char *starting = locinput;
	4647
	4648	/* In case have to backtrack the last prepend */
	4649	char *previous_prepend = NULL;
	4650
	4651	LOAD_UTF8_CHARCLASS_GCB();
	4652
	4653	/* Match (prepend)* */
	4654	while (locinput < reginfo->strend
	4655	&& (len = is_GCB_Prepend_utf8(locinput)))
	4656	{
	4657	previous_prepend = locinput;
	4658	locinput += len;
	4659	}
	4660
	4661	/* As noted above, if we matched a prepend character, but
	4662	* the next thing won't match, back off the last prepend we
	4663	* matched, as it is guaranteed to match the begin */
	4664	if (previous_prepend
	4665	&& (locinput >= reginfo->strend
	4666	\|\| (! swash_fetch(PL_utf8_X_regular_begin,
	4667	(U8*)locinput, utf8_target)
	4668	&& ! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)))
	4669	)
	4670	{
	4671	locinput = previous_prepend;
	4672	}
	4673
	4674	/* Note that here we know reginfo->strend > locinput, as we
	4675	* tested that upon input to this switch case, and if we
	4676	* moved locinput forward, we tested the result just above
	4677	* and it either passed, or we backed off so that it will
	4678	* now pass */
	4679	if (swash_fetch(PL_utf8_X_regular_begin,
	4680	(U8*)locinput, utf8_target)) {
	4681	locinput += UTF8SKIP(locinput);
	4682	}
	4683	else if (! is_GCB_SPECIAL_BEGIN_START_utf8(locinput)) {
	4684
	4685	/* Here did not match the required 'Begin' in the
	4686	* second term. So just match the very first
	4687	* character, the '.' of the final term of the regex */
	4688	locinput = starting + UTF8SKIP(starting);
	4689	goto exit_utf8;
	4690	} else {
	4691
	4692	/* Here is a special begin. It can be composed of
	4693	* several individual characters. One possibility is
	4694	* RI+ */
	4695	if ((len = is_GCB_RI_utf8(locinput))) {
	4696	locinput += len;
	4697	while (locinput < reginfo->strend
	4698	&& (len = is_GCB_RI_utf8(locinput)))
	4699	{
	4700	locinput += len;
	4701	}
	4702	} else if ((len = is_GCB_T_utf8(locinput))) {
	4703	/* Another possibility is T+ */
	4704	locinput += len;
	4705	while (locinput < reginfo->strend
	4706	&& (len = is_GCB_T_utf8(locinput)))
	4707	{
	4708	locinput += len;
	4709	}
	4710	} else {
	4711
	4712	/* Here, neither RI+ nor T+; must be some other
	4713	* Hangul. That means it is one of the others: L,
	4714	* LV, LVT or V, and matches:
	4715	* L* (L \| LVT T* \| V * V* T* \| LV V* T) /
	4716
	4717	/* Match L* */
	4718	while (locinput < reginfo->strend
	4719	&& (len = is_GCB_L_utf8(locinput)))
	4720	{
	4721	locinput += len;
	4722	}
	4723
	4724	/* Here, have exhausted L*. If the next character
	4725	* is not an LV, LVT nor V, it means we had to have
	4726	* at least one L, so matches L+ in the original
	4727	* equation, we have a complete hangul syllable.
	4728	* Are done. */
	4729
	4730	if (locinput < reginfo->strend
	4731	&& is_GCB_LV_LVT_V_utf8(locinput))
	4732	{
	4733	/* Otherwise keep going. Must be LV, LVT or V.
	4734	* See if LVT, by first ruling out V, then LV */
	4735	if (! is_GCB_V_utf8(locinput)
	4736	/* All but every TCount one is LV */
	4737	&& (valid_utf8_to_uvchr((U8 *) locinput,
	4738	NULL)
	4739	- SBASE)
	4740	% TCount != 0)
	4741	{
	4742	locinput += UTF8SKIP(locinput);
	4743	} else {
	4744
	4745	/* Must be V or LV. Take it, then match
	4746	* V* */
	4747	locinput += UTF8SKIP(locinput);
	4748	while (locinput < reginfo->strend
	4749	&& (len = is_GCB_V_utf8(locinput)))
	4750	{
	4751	locinput += len;
	4752	}
	4753	}
	4754
	4755	/* And any of LV, LVT, or V can be followed
	4756	* by T* */
	4757	while (locinput < reginfo->strend
	4758	&& (len = is_GCB_T_utf8(locinput)))
	4759	{
	4760	locinput += len;
	4761	}
	4762	}
	4763	}
	4764	}
	4765
	4766	/* Match any extender */
	4767	while (locinput < reginfo->strend
	4768	&& swash_fetch(PL_utf8_X_extend,
	4769	(U8*)locinput, utf8_target))
	4770	{
	4771	locinput += UTF8SKIP(locinput);
	4772	}
	4773	}
	4774	exit_utf8:
	4775	if (locinput > reginfo->strend) sayNO;
	4776	}
	4777	break;
	4778
	4779	case NREFFL: /* /\g{name}/il */
	4780	{ /* The capture buffer cases. The ones beginning with N for the
	4781	named buffers just convert to the equivalent numbered and
	4782	pretend they were called as the corresponding numbered buffer
	4783	op. */
	4784	/* don't initialize these in the declaration, it makes C++
	4785	unhappy */
	4786	const char *s;
	4787	char type;
	4788	re_fold_t folder;
	4789	const U8 *fold_array;
	4790	UV utf8_fold_flags;
	4791
	4792	RX_MATCH_TAINTED_on(reginfo->prog);
	4793	folder = foldEQ_locale;
	4794	fold_array = PL_fold_locale;
	4795	type = REFFL;
	4796	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4797	goto do_nref;
	4798
	4799	case NREFFA: /* /\g{name}/iaa */
	4800	folder = foldEQ_latin1;
	4801	fold_array = PL_fold_latin1;
	4802	type = REFFA;
	4803	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4804	goto do_nref;
	4805
	4806	case NREFFU: /* /\g{name}/iu */
	4807	folder = foldEQ_latin1;
	4808	fold_array = PL_fold_latin1;
	4809	type = REFFU;
	4810	utf8_fold_flags = 0;
	4811	goto do_nref;
	4812
	4813	case NREFF: /* /\g{name}/i */
	4814	folder = foldEQ;
	4815	fold_array = PL_fold;
	4816	type = REFF;
	4817	utf8_fold_flags = 0;
	4818	goto do_nref;
	4819
	4820	case NREF: /* /\g{name}/ */
	4821	type = REF;
	4822	folder = NULL;
	4823	fold_array = NULL;
	4824	utf8_fold_flags = 0;
	4825	do_nref:
	4826
	4827	/* For the named back references, find the corresponding buffer
	4828	* number */
	4829	n = reg_check_named_buff_matched(rex,scan);
	4830
	4831	if ( ! n ) {
	4832	sayNO;
	4833	}
	4834	goto do_nref_ref_common;
	4835
	4836	case REFFL: /* /\1/il */
	4837	RX_MATCH_TAINTED_on(reginfo->prog);
	4838	folder = foldEQ_locale;
	4839	fold_array = PL_fold_locale;
	4840	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4841	goto do_ref;
	4842
	4843	case REFFA: /* /\1/iaa */
	4844	folder = foldEQ_latin1;
	4845	fold_array = PL_fold_latin1;
	4846	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4847	goto do_ref;
	4848
	4849	case REFFU: /* /\1/iu */
	4850	folder = foldEQ_latin1;
	4851	fold_array = PL_fold_latin1;
	4852	utf8_fold_flags = 0;
	4853	goto do_ref;
	4854
	4855	case REFF: /* /\1/i */
	4856	folder = foldEQ;
	4857	fold_array = PL_fold;
	4858	utf8_fold_flags = 0;
	4859	goto do_ref;
	4860
	4861	case REF: /* /\1/ */
	4862	folder = NULL;
	4863	fold_array = NULL;
	4864	utf8_fold_flags = 0;
	4865
	4866	do_ref:
	4867	type = OP(scan);
	4868	n = ARG(scan); /* which paren pair */
	4869
	4870	do_nref_ref_common:
	4871	ln = rex->offs[n].start;
	4872	reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */
	4873	if (rex->lastparen < n \|\| ln == -1)
	4874	sayNO; /* Do not match unless seen CLOSEn. */
	4875	if (ln == rex->offs[n].end)
	4876	break;
	4877
	4878	s = reginfo->strbeg + ln;
	4879	if (type != REF /* REF can do byte comparison */
	4880	&& (utf8_target \|\| type == REFFU))
	4881	{ /* XXX handle REFFL better */
	4882	char * limit = reginfo->strend;
	4883
	4884	/* This call case insensitively compares the entire buffer
	4885	* at s, with the current input starting at locinput, but
	4886	* not going off the end given by reginfo->strend, and
	4887	* returns in <limit> upon success, how much of the
	4888	* current input was matched */
	4889	if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
	4890	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	4891	{
	4892	sayNO;
	4893	}
	4894	locinput = limit;
	4895	break;
	4896	}
	4897
	4898	/* Not utf8: Inline the first character, for speed. */
	4899	if (!NEXTCHR_IS_EOS &&
	4900	UCHARAT(s) != nextchr &&
	4901	(type == REF \|\|
	4902	UCHARAT(s) != fold_array[nextchr]))
	4903	sayNO;
	4904	ln = rex->offs[n].end - ln;
	4905	if (locinput + ln > reginfo->strend)
	4906	sayNO;
	4907	if (ln > 1 && (type == REF
	4908	? memNE(s, locinput, ln)
	4909	: ! folder(s, locinput, ln)))
	4910	sayNO;
	4911	locinput += ln;
	4912	break;
	4913	}
	4914
	4915	case NOTHING: /* null op; e.g. the 'nothing' following
	4916	* the '' in m{(a+\|b)}' */
	4917	break;
	4918	case TAIL: /* placeholder while compiling (A\|B\|C) */
	4919	break;
	4920
	4921	case BACK: /* ??? doesn't appear to be used ??? */
	4922	break;
	4923
	4924	#undef ST
	4925	#define ST st->u.eval
	4926	{
	4927	SV *ret;
	4928	REGEXP *re_sv;
	4929	regexp *re;
	4930	regexp_internal *rei;
	4931	regnode *startpoint;
	4932
	4933	case GOSTART: /* (?R) */
	4934	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4935	if (cur_eval && cur_eval->locinput==locinput) {
	4936	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4937	Perl_croak(aTHX_ "Infinite recursion in regex");
	4938	if ( ++nochange_depth > max_nochange_depth )
	4939	Perl_croak(aTHX_
	4940	"Pattern subroutine nesting without pos change"
	4941	" exceeded limit in regex");
	4942	} else {
	4943	nochange_depth = 0;
	4944	}
	4945	re_sv = rex_sv;
	4946	re = rex;
	4947	rei = rexi;
	4948	if (OP(scan)==GOSUB) {
	4949	startpoint = scan + ARG2L(scan);
	4950	ST.close_paren = ARG(scan);
	4951	} else {
	4952	startpoint = rei->program+1;
	4953	ST.close_paren = 0;
	4954	}
	4955	goto eval_recurse_doit;
	4956	assert(0); /* NOTREACHED */
	4957
	4958	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4959	if (cur_eval && cur_eval->locinput==locinput) {
	4960	if ( ++nochange_depth > max_nochange_depth )
	4961	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4962	} else {
	4963	nochange_depth = 0;
	4964	}
	4965	{
	4966	/* execute the code in the {...} */
	4967
	4968	dSP;
	4969	IV before;
	4970	OP * const oop = PL_op;
	4971	COP * const ocurcop = PL_curcop;
	4972	OP *nop;
	4973	CV *newcv;
	4974
	4975	/* save all paren positions */
	4976	regcppush(rex, 0, maxopenparen);
	4977	REGCP_SET(runops_cp);
	4978
	4979	if (!caller_cv)
	4980	caller_cv = find_runcv(NULL);
	4981
	4982	n = ARG(scan);
	4983
	4984	if (rexi->data->what[n] == 'r') { /* code from an external qr */
	4985	newcv = (ReANY(
	4986	(REGEXP*)(rexi->data->data[n])
	4987	))->qr_anoncv
	4988	;
	4989	nop = (OP*)rexi->data->data[n+1];
	4990	}
	4991	else if (rexi->data->what[n] == 'l') { /* literal code */
	4992	newcv = caller_cv;
	4993	nop = (OP*)rexi->data->data[n];
	4994	assert(CvDEPTH(newcv));
	4995	}
	4996	else {
	4997	/* literal with own CV */
	4998	assert(rexi->data->what[n] == 'L');
	4999	newcv = rex->qr_anoncv;
	5000	nop = (OP*)rexi->data->data[n];
	5001	}
	5002
	5003	/* normally if we're about to execute code from the same
	5004	* CV that we used previously, we just use the existing
	5005	* CX stack entry. However, its possible that in the
	5006	* meantime we may have backtracked, popped from the save
	5007	* stack, and undone the SAVECOMPPAD(s) associated with
	5008	* PUSH_MULTICALL; in which case PL_comppad no longer
	5009	* points to newcv's pad. */
	5010	if (newcv != last_pushed_cv \|\| PL_comppad != last_pad)
	5011	{
	5012	U8 flags = (CXp_SUB_RE \|
	5013	((newcv == caller_cv) ? CXp_SUB_RE_FAKE : 0));
	5014	if (last_pushed_cv) {
	5015	CHANGE_MULTICALL_FLAGS(newcv, flags);
	5016	}
	5017	else {
	5018	PUSH_MULTICALL_FLAGS(newcv, flags);
	5019	}
	5020	last_pushed_cv = newcv;
	5021	}
	5022	else {
	5023	/* these assignments are just to silence compiler
	5024	* warnings */
	5025	multicall_cop = NULL;
	5026	newsp = NULL;
	5027	}
	5028	last_pad = PL_comppad;
	5029
	5030	/* the initial nextstate you would normally execute
	5031	* at the start of an eval (which would cause error
	5032	* messages to come from the eval), may be optimised
	5033	* away from the execution path in the regex code blocks;
	5034	* so manually set PL_curcop to it initially */
	5035	{
	5036	OP *o = cUNOPx(nop)->op_first;
	5037	assert(o->op_type == OP_NULL);
	5038	if (o->op_targ == OP_SCOPE) {
	5039	o = cUNOPo->op_first;
	5040	}
	5041	else {
	5042	assert(o->op_targ == OP_LEAVE);
	5043	o = cUNOPo->op_first;
	5044	assert(o->op_type == OP_ENTER);
	5045	o = o->op_sibling;
	5046	}
	5047
	5048	if (o->op_type != OP_STUB) {
	5049	assert( o->op_type == OP_NEXTSTATE
	5050	\|\| o->op_type == OP_DBSTATE
	5051	\|\| (o->op_type == OP_NULL
	5052	&& ( o->op_targ == OP_NEXTSTATE
	5053	\|\| o->op_targ == OP_DBSTATE
	5054	)
	5055	)
	5056	);
	5057	PL_curcop = (COP*)o;
	5058	}
	5059	}
	5060	nop = nop->op_next;
	5061
	5062	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	5063	" re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) );
	5064
	5065	rex->offs[0].end = locinput - reginfo->strbeg;
	5066	if (reginfo->info_aux_eval->pos_magic)
	5067	MgBYTEPOS_set(reginfo->info_aux_eval->pos_magic,
	5068	reginfo->sv, reginfo->strbeg,
	5069	locinput - reginfo->strbeg);
	5070
	5071	if (sv_yes_mark) {
	5072	SV *sv_mrk = get_sv("REGMARK", 1);
	5073	sv_setsv(sv_mrk, sv_yes_mark);
	5074	}
	5075
	5076	/* we don't use MULTICALL here as we want to call the
	5077	* first op of the block of interest, rather than the
	5078	* first op of the sub */
	5079	before = (IV)(SP-PL_stack_base);
	5080	PL_op = nop;
	5081	CALLRUNOPS(aTHX); /* Scalar context. */
	5082	SPAGAIN;
	5083	if ((IV)(SP-PL_stack_base) == before)
	5084	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	5085	else {
	5086	ret = POPs;
	5087	PUTBACK;
	5088	}
	5089
	5090	/* before restoring everything, evaluate the returned
	5091	* value, so that 'uninit' warnings don't use the wrong
	5092	* PL_op or pad. Also need to process any magic vars
	5093	* (e.g. $1) before parentheses are restored */
	5094
	5095	PL_op = NULL;
	5096
	5097	re_sv = NULL;
	5098	if (logical == 0) /* (?{})/ */
	5099	sv_setsv(save_scalar(PL_replgv), ret); /* $^R */
	5100	else if (logical == 1) { /* /(?(?{...})X\|Y)/ */
	5101	sw = cBOOL(SvTRUE(ret));
	5102	logical = 0;
	5103	}
	5104	else { /* /(??{}) */
	5105	/* if its overloaded, let the regex compiler handle
	5106	* it; otherwise extract regex, or stringify */
	5107	if (SvGMAGICAL(ret))
	5108	ret = sv_mortalcopy(ret);
	5109	if (!SvAMAGIC(ret)) {
	5110	SV *sv = ret;
	5111	if (SvROK(sv))
	5112	sv = SvRV(sv);
	5113	if (SvTYPE(sv) == SVt_REGEXP)
	5114	re_sv = (REGEXP*) sv;
	5115	else if (SvSMAGICAL(ret)) {
	5116	MAGIC *mg = mg_find(ret, PERL_MAGIC_qr);
	5117	if (mg)
	5118	re_sv = (REGEXP *) mg->mg_obj;
	5119	}
	5120
	5121	/* force any undef warnings here */
	5122	if (!re_sv && !SvPOK(ret) && !SvNIOK(ret)) {
	5123	ret = sv_mortalcopy(ret);
	5124	(void) SvPV_force_nolen(ret);
	5125	}
	5126	}
	5127
	5128	}
	5129
	5130	/* *** Note that at this point we don't restore
	5131	* PL_comppad, (or pop the CxSUB) on the assumption it may
	5132	* be used again soon. This is safe as long as nothing
	5133	* in the regexp code uses the pad ! */
	5134	PL_op = oop;
	5135	PL_curcop = ocurcop;
	5136	S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
	5137	PL_curpm = PL_reg_curpm;
	5138
	5139	if (logical != 2)
	5140	break;
	5141	}
	5142
	5143	/* only /(??{})/ from now on */
	5144	logical = 0;
	5145	{
	5146	/* extract RE object from returned value; compiling if
	5147	* necessary */
	5148
	5149	if (re_sv) {
	5150	re_sv = reg_temp_copy(NULL, re_sv);
	5151	}
	5152	else {
	5153	U32 pm_flags = 0;
	5154
	5155	if (SvUTF8(ret) && IN_BYTES) {
	5156	/* In use 'bytes': make a copy of the octet
	5157	* sequence, but without the flag on */
	5158	STRLEN len;
	5159	const char *const p = SvPV(ret, len);
	5160	ret = newSVpvn_flags(p, len, SVs_TEMP);
	5161	}
	5162	if (rex->intflags & PREGf_USE_RE_EVAL)
	5163	pm_flags \|= PMf_USE_RE_EVAL;
	5164
	5165	/* if we got here, it should be an engine which
	5166	* supports compiling code blocks and stuff */
	5167	assert(rex->engine && rex->engine->op_comp);
	5168	assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
	5169	re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
	5170	rex->engine, NULL, NULL,
	5171	/* copy /msix etc to inner pattern */
	5172	scan->flags,
	5173	pm_flags);
	5174
	5175	if (!(SvFLAGS(ret)
	5176	& (SVs_TEMP \| SVs_GMG \| SVf_ROK))
	5177	&& (!SvPADTMP(ret) \|\| SvREADONLY(ret))) {
	5178	/* This isn't a first class regexp. Instead, it's
	5179	caching a regexp onto an existing, Perl visible
	5180	scalar. */
	5181	sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
	5182	}
	5183	}
	5184	SAVEFREESV(re_sv);
	5185	re = ReANY(re_sv);
	5186	}
	5187	RXp_MATCH_COPIED_off(re);
	5188	re->subbeg = rex->subbeg;
	5189	re->sublen = rex->sublen;
	5190	re->suboffset = rex->suboffset;
	5191	re->subcoffset = rex->subcoffset;
	5192	rei = RXi_GET(re);
	5193	DEBUG_EXECUTE_r(
	5194	debug_start_match(re_sv, utf8_target, locinput,
	5195	reginfo->strend, "Matching embedded");
	5196	);
	5197	startpoint = rei->program + 1;
	5198	ST.close_paren = 0; /* only used for GOSUB */
	5199
	5200	eval_recurse_doit: /* Share code with GOSUB below this line */
	5201	/* run the pattern returned from (??{...}) */
	5202
	5203	/* Save all the positions. */
	5204	ST.cp = regcppush(rex, 0, maxopenparen);
	5205	REGCP_SET(ST.lastcp);
	5206
	5207	re->lastparen = 0;
	5208	re->lastcloseparen = 0;
	5209
	5210	maxopenparen = 0;
	5211
	5212	/* invalidate the S-L poscache. We're now executing a
	5213	* different set of WHILEM ops (and their associated
	5214	* indexes) against the same string, so the bits in the
	5215	* cache are meaningless. Setting maxiter to zero forces
	5216	* the cache to be invalidated and zeroed before reuse.
	5217	* XXX This is too dramatic a measure. Ideally we should
	5218	* save the old cache and restore when running the outer
	5219	* pattern again */
	5220	reginfo->poscache_maxiter = 0;
	5221
	5222	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(re_sv));
	5223
	5224	ST.prev_rex = rex_sv;
	5225	ST.prev_curlyx = cur_curlyx;
	5226	rex_sv = re_sv;
	5227	SET_reg_curpm(rex_sv);
	5228	rex = re;
	5229	rexi = rei;
	5230	cur_curlyx = NULL;
	5231	ST.B = next;
	5232	ST.prev_eval = cur_eval;
	5233	cur_eval = st;
	5234	/* now continue from first node in postoned RE */
	5235	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
	5236	assert(0); /* NOTREACHED */
	5237	}
	5238
	5239	case EVAL_AB: /* cleanup after a successful (??{A})B */
	5240	/* note: this is called twice; first after popping B, then A */
	5241	rex_sv = ST.prev_rex;
	5242	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
	5243	SET_reg_curpm(rex_sv);
	5244	rex = ReANY(rex_sv);
	5245	rexi = RXi_GET(rex);
	5246	regcpblow(ST.cp);
	5247	cur_eval = ST.prev_eval;
	5248	cur_curlyx = ST.prev_curlyx;
	5249
	5250	/* Invalidate cache. See "invalidate" comment above. */
	5251	reginfo->poscache_maxiter = 0;
	5252	if ( nochange_depth )
	5253	nochange_depth--;
	5254	sayYES;
	5255
	5256
	5257	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	5258	/* note: this is called twice; first after popping B, then A */
	5259	rex_sv = ST.prev_rex;
	5260	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
	5261	SET_reg_curpm(rex_sv);
	5262	rex = ReANY(rex_sv);
	5263	rexi = RXi_GET(rex);
	5264
	5265	REGCP_UNWIND(ST.lastcp);
	5266	regcppop(rex, &maxopenparen);
	5267	cur_eval = ST.prev_eval;
	5268	cur_curlyx = ST.prev_curlyx;
	5269	/* Invalidate cache. See "invalidate" comment above. */
	5270	reginfo->poscache_maxiter = 0;
	5271	if ( nochange_depth )
	5272	nochange_depth--;
	5273	sayNO_SILENT;
	5274	#undef ST
	5275
	5276	case OPEN: /* ( */
	5277	n = ARG(scan); /* which paren pair */
	5278	rex->offs[n].start_tmp = locinput - reginfo->strbeg;
	5279	if (n > maxopenparen)
	5280	maxopenparen = n;
	5281	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	5282	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf" tmp; maxopenparen=%"UVuf"\n",
	5283	PTR2UV(rex),
	5284	PTR2UV(rex->offs),
	5285	(UV)n,
	5286	(IV)rex->offs[n].start_tmp,
	5287	(UV)maxopenparen
	5288	));
	5289	lastopen = n;
	5290	break;
	5291
	5292	/* XXX really need to log other places start/end are set too */
	5293	#define CLOSE_CAPTURE \
	5294	rex->offs[n].start = rex->offs[n].start_tmp; \
	5295	rex->offs[n].end = locinput - reginfo->strbeg; \
	5296	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, \
	5297	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf"..%"IVdf"\n", \
	5298	PTR2UV(rex), \
	5299	PTR2UV(rex->offs), \
	5300	(UV)n, \
	5301	(IV)rex->offs[n].start, \
	5302	(IV)rex->offs[n].end \
	5303	))
	5304
	5305	case CLOSE: /* ) */
	5306	n = ARG(scan); /* which paren pair */
	5307	CLOSE_CAPTURE;
	5308	if (n > rex->lastparen)
	5309	rex->lastparen = n;
	5310	rex->lastcloseparen = n;
	5311	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	5312	goto fake_end;
	5313	}
	5314	break;
	5315
	5316	case ACCEPT: /* (ACCEPT) /
	5317	if (ARG(scan)){
	5318	regnode *cursor;
	5319	for (cursor=scan;
	5320	cursor && OP(cursor)!=END;
	5321	cursor=regnext(cursor))
	5322	{
	5323	if ( OP(cursor)==CLOSE ){
	5324	n = ARG(cursor);
	5325	if ( n <= lastopen ) {
	5326	CLOSE_CAPTURE;
	5327	if (n > rex->lastparen)
	5328	rex->lastparen = n;
	5329	rex->lastcloseparen = n;
	5330	if ( n == ARG(scan) \|\| (cur_eval &&
	5331	cur_eval->u.eval.close_paren == n))
	5332	break;
	5333	}
	5334	}
	5335	}
	5336	}
	5337	goto fake_end;
	5338	/NOTREACHED/
	5339
	5340	case GROUPP: /* (?(1)) */
	5341	n = ARG(scan); /* which paren pair */
	5342	sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
	5343	break;
	5344
	5345	case NGROUPP: /* (?(<name>)) */
	5346	/* reg_check_named_buff_matched returns 0 for no match */
	5347	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	5348	break;
	5349
	5350	case INSUBP: /* (?(R)) */
	5351	n = ARG(scan);
	5352	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	5353	break;
	5354
	5355	case DEFINEP: /* (?(DEFINE)) */
	5356	sw = 0;
	5357	break;
	5358
	5359	case IFTHEN: /* (?(cond)A\|B) */
	5360	reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */
	5361	if (sw)
	5362	next = NEXTOPER(NEXTOPER(scan));
	5363	else {
	5364	next = scan + ARG(scan);
	5365	if (OP(next) == IFTHEN) /* Fake one. */
	5366	next = NEXTOPER(NEXTOPER(next));
	5367	}
	5368	break;
	5369
	5370	case LOGICAL: /* modifier for EVAL and IFMATCH */
	5371	logical = scan->flags;
	5372	break;
	5373
	5374	/*******************************************************************
	5375
	5376	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	5377	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	5378	STAR/PLUS/CURLY/CURLYN are used instead.)
	5379
	5380	A*B is compiled as <CURLYX><A><WHILEM><B>
	5381
	5382	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	5383	state, which contains the current count, initialised to -1. It also sets
	5384	cur_curlyx to point to this state, with any previous value saved in the
	5385	state block.
	5386
	5387	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	5388	since the pattern may possibly match zero times (i.e. it's a while {} loop
	5389	rather than a do {} while loop).
	5390
	5391	Each entry to WHILEM represents a successful match of A. The count in the
	5392	CURLYX block is incremented, another WHILEM state is pushed, and execution
	5393	passes to A or B depending on greediness and the current count.
	5394
	5395	For example, if matching against the string a1a2a3b (where the aN are
	5396	substrings that match /A/), then the match progresses as follows: (the
	5397	pushed states are interspersed with the bits of strings matched so far):
	5398
	5399	<CURLYX cnt=-1>
	5400	<CURLYX cnt=0><WHILEM>
	5401	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	5402	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	5403	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	5404	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	5405
	5406	(Contrast this with something like CURLYM, which maintains only a single
	5407	backtrack state:
	5408
	5409	<CURLYM cnt=0> a1
	5410	a1 <CURLYM cnt=1> a2
	5411	a1 a2 <CURLYM cnt=2> a3
	5412	a1 a2 a3 <CURLYM cnt=3> b
	5413	)
	5414
	5415	Each WHILEM state block marks a point to backtrack to upon partial failure
	5416	of A or B, and also contains some minor state data related to that
	5417	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	5418	overall state, such as the count, and pointers to the A and B ops.
	5419
	5420	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	5421	must always point to the current CURLYX block, the rules are:
	5422
	5423	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	5424	and set cur_curlyx to point the new block.
	5425
	5426	When popping the CURLYX block after a successful or unsuccessful match,
	5427	restore the previous cur_curlyx.
	5428
	5429	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	5430	to the outer one saved in the CURLYX block.
	5431
	5432	When popping the WHILEM block after a successful or unsuccessful B match,
	5433	restore the previous cur_curlyx.
	5434
	5435	Here's an example for the pattern (AI* BI)*BO
	5436	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	5437
	5438	cur_
	5439	curlyx backtrack stack
	5440	------ ---------------
	5441	NULL
	5442	CO <CO prev=NULL> <WO>
	5443	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5444	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5445	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	5446
	5447	At this point the pattern succeeds, and we work back down the stack to
	5448	clean up, restoring as we go:
	5449
	5450	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	5451	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	5452	CO <CO prev=NULL> <WO>
	5453	NULL
	5454
	5455	*******************************************************************/
	5456
	5457	#define ST st->u.curlyx
	5458
	5459	case CURLYX: /* start of /AB/ (for complex A) /
	5460	{
	5461	/* No need to save/restore up to this paren */
	5462	I32 parenfloor = scan->flags;
	5463
	5464	assert(next); /* keep Coverity happy */
	5465	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	5466	next += ARG(next);
	5467
	5468	/* XXXX Probably it is better to teach regpush to support
	5469	parenfloor > maxopenparen ... */
	5470	if (parenfloor > (I32)rex->lastparen)
	5471	parenfloor = rex->lastparen; /* Pessimization... */
	5472
	5473	ST.prev_curlyx= cur_curlyx;
	5474	cur_curlyx = st;
	5475	ST.cp = PL_savestack_ix;
	5476
	5477	/* these fields contain the state of the current curly.
	5478	* they are accessed by subsequent WHILEMs */
	5479	ST.parenfloor = parenfloor;
	5480	ST.me = scan;
	5481	ST.B = next;
	5482	ST.minmod = minmod;
	5483	minmod = 0;
	5484	ST.count = -1; /* this will be updated by WHILEM */
	5485	ST.lastloc = NULL; /* this will be updated by WHILEM */
	5486
	5487	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
	5488	assert(0); /* NOTREACHED */
	5489	}
	5490
	5491	case CURLYX_end: /* just finished matching all of AB /
	5492	cur_curlyx = ST.prev_curlyx;
	5493	sayYES;
	5494	assert(0); /* NOTREACHED */
	5495
	5496	case CURLYX_end_fail: /* just failed to match all of AB /
	5497	regcpblow(ST.cp);
	5498	cur_curlyx = ST.prev_curlyx;
	5499	sayNO;
	5500	assert(0); /* NOTREACHED */
	5501
	5502
	5503	#undef ST
	5504	#define ST st->u.whilem
	5505
	5506	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	5507	{
	5508	/* see the discussion above about CURLYX/WHILEM */
	5509	I32 n;
	5510	int min = ARG1(cur_curlyx->u.curlyx.me);
	5511	int max = ARG2(cur_curlyx->u.curlyx.me);
	5512	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	5513
	5514	assert(cur_curlyx); /* keep Coverity happy */
	5515	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	5516	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	5517	ST.cache_offset = 0;
	5518	ST.cache_mask = 0;
	5519
	5520
	5521	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5522	"%*s whilem: matched %ld out of %d..%d\n",
	5523	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	5524	);
	5525
	5526	/* First just match a string of min A's. */
	5527
	5528	if (n < min) {
	5529	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5530	maxopenparen);
	5531	cur_curlyx->u.curlyx.lastloc = locinput;
	5532	REGCP_SET(ST.lastcp);
	5533
	5534	PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
	5535	assert(0); /* NOTREACHED */
	5536	}
	5537
	5538	/* If degenerate A matches "", assume A done. */
	5539
	5540	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	5541	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5542	"%*s whilem: empty match detected, trying continuation...\n",
	5543	REPORT_CODE_OFF+depth*2, "")
	5544	);
	5545	goto do_whilem_B_max;
	5546	}
	5547
	5548	/* super-linear cache processing.
	5549	*
	5550	* The idea here is that for certain types of CURLYX/WHILEM -
	5551	* principally those whose upper bound is infinity (and
	5552	* excluding regexes that have things like \1 and other very
	5553	* non-regular expresssiony things), then if a pattern like
	5554	* /....A*.../ fails and we backtrack to the WHILEM, then we
	5555	* make a note that this particular WHILEM op was at string
	5556	* position 47 (say) when the rest of pattern failed. Then, if
	5557	* we ever find ourselves back at that WHILEM, and at string
	5558	* position 47 again, we can just fail immediately rather than
	5559	* running the rest of the pattern again.
	5560	*
	5561	* This is very handy when patterns start to go
	5562	* 'super-linear', like in (a+)(a+)(a+)*, where you end up
	5563	* with a combinatorial explosion of backtracking.
	5564	*
	5565	* The cache is implemented as a bit array, with one bit per
	5566	* string byte position per WHILEM op (up to 16) - so its
	5567	* between 0.25 and 2x the string size.
	5568	*
	5569	* To avoid allocating a poscache buffer every time, we do an
	5570	* initially countdown; only after we have executed a WHILEM
	5571	* op (string-length x #WHILEMs) times do we allocate the
	5572	* cache.
	5573	*
	5574	* The top 4 bits of scan->flags byte say how many different
	5575	* relevant CURLLYX/WHILEM op pairs there are, while the
	5576	* bottom 4-bits is the identifying index number of this
	5577	* WHILEM.
	5578	*/
	5579
	5580	if (scan->flags) {
	5581
	5582	if (!reginfo->poscache_maxiter) {
	5583	/* start the countdown: Postpone detection until we
	5584	* know the match is not that much linear. */
	5585	reginfo->poscache_maxiter
	5586	= (reginfo->strend - reginfo->strbeg + 1)
	5587	* (scan->flags>>4);
	5588	/* possible overflow for long strings and many CURLYX's */
	5589	if (reginfo->poscache_maxiter < 0)
	5590	reginfo->poscache_maxiter = I32_MAX;
	5591	reginfo->poscache_iter = reginfo->poscache_maxiter;
	5592	}
	5593
	5594	if (reginfo->poscache_iter-- == 0) {
	5595	/* initialise cache */
	5596	const SSize_t size = (reginfo->poscache_maxiter + 7)/8;
	5597	regmatch_info_aux *const aux = reginfo->info_aux;
	5598	if (aux->poscache) {
	5599	if ((SSize_t)reginfo->poscache_size < size) {
	5600	Renew(aux->poscache, size, char);
	5601	reginfo->poscache_size = size;
	5602	}
	5603	Zero(aux->poscache, size, char);
	5604	}
	5605	else {
	5606	reginfo->poscache_size = size;
	5607	Newxz(aux->poscache, size, char);
	5608	}
	5609	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5610	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	5611	PL_colors[4], PL_colors[5])
	5612	);
	5613	}
	5614
	5615	if (reginfo->poscache_iter < 0) {
	5616	/* have we already failed at this position? */
	5617	SSize_t offset, mask;
	5618
	5619	reginfo->poscache_iter = -1; /* stop eventual underflow */
	5620	offset = (scan->flags & 0xf) - 1
	5621	+ (locinput - reginfo->strbeg)
	5622	* (scan->flags>>4);
	5623	mask = 1 << (offset % 8);
	5624	offset /= 8;
	5625	if (reginfo->info_aux->poscache[offset] & mask) {
	5626	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	5627	"%*s whilem: (cache) already tried at this position...\n",
	5628	REPORT_CODE_OFF+depth*2, "")
	5629	);
	5630	sayNO; /* cache records failure */
	5631	}
	5632	ST.cache_offset = offset;
	5633	ST.cache_mask = mask;
	5634	}
	5635	}
	5636
	5637	/* Prefer B over A for minimal matching. */
	5638
	5639	if (cur_curlyx->u.curlyx.minmod) {
	5640	ST.save_curlyx = cur_curlyx;
	5641	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5642	ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor,
	5643	maxopenparen);
	5644	REGCP_SET(ST.lastcp);
	5645	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
	5646	locinput);
	5647	assert(0); /* NOTREACHED */
	5648	}
	5649
	5650	/* Prefer A over B for maximal matching. */
	5651
	5652	if (n < max) { /* More greed allowed? */
	5653	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5654	maxopenparen);
	5655	cur_curlyx->u.curlyx.lastloc = locinput;
	5656	REGCP_SET(ST.lastcp);
	5657	PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
	5658	assert(0); /* NOTREACHED */
	5659	}
	5660	goto do_whilem_B_max;
	5661	}
	5662	assert(0); /* NOTREACHED */
	5663
	5664	case WHILEM_B_min: /* just matched B in a minimal match */
	5665	case WHILEM_B_max: /* just matched B in a maximal match */
	5666	cur_curlyx = ST.save_curlyx;
	5667	sayYES;
	5668	assert(0); /* NOTREACHED */
	5669
	5670	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	5671	cur_curlyx = ST.save_curlyx;
	5672	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5673	cur_curlyx->u.curlyx.count--;
	5674	CACHEsayNO;
	5675	assert(0); /* NOTREACHED */
	5676
	5677	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	5678	/* FALL THROUGH */
	5679	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	5680	REGCP_UNWIND(ST.lastcp);
	5681	regcppop(rex, &maxopenparen);
	5682	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	5683	cur_curlyx->u.curlyx.count--;
	5684	CACHEsayNO;
	5685	assert(0); /* NOTREACHED */
	5686
	5687	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	5688	REGCP_UNWIND(ST.lastcp);
	5689	regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
	5690	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5691	"%*s whilem: failed, trying continuation...\n",
	5692	REPORT_CODE_OFF+depth*2, "")
	5693	);
	5694	do_whilem_B_max:
	5695	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5696	&& ckWARN(WARN_REGEXP)
	5697	&& !reginfo->warned)
	5698	{
	5699	reginfo->warned = TRUE;
	5700	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5701	"Complex regular subexpression recursion limit (%d) "
	5702	"exceeded",
	5703	REG_INFTY - 1);
	5704	}
	5705
	5706	/* now try B */
	5707	ST.save_curlyx = cur_curlyx;
	5708	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	5709	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
	5710	locinput);
	5711	assert(0); /* NOTREACHED */
	5712
	5713	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	5714	cur_curlyx = ST.save_curlyx;
	5715	REGCP_UNWIND(ST.lastcp);
	5716	regcppop(rex, &maxopenparen);
	5717
	5718	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	5719	/* Maximum greed exceeded */
	5720	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	5721	&& ckWARN(WARN_REGEXP)
	5722	&& !reginfo->warned)
	5723	{
	5724	reginfo->warned = TRUE;
	5725	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	5726	"Complex regular subexpression recursion "
	5727	"limit (%d) exceeded",
	5728	REG_INFTY - 1);
	5729	}
	5730	cur_curlyx->u.curlyx.count--;
	5731	CACHEsayNO;
	5732	}
	5733
	5734	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5735	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	5736	);
	5737	/* Try grabbing another A and see if it helps. */
	5738	cur_curlyx->u.curlyx.lastloc = locinput;
	5739	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	5740	maxopenparen);
	5741	REGCP_SET(ST.lastcp);
	5742	PUSH_STATE_GOTO(WHILEM_A_min,
	5743	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
	5744	locinput);
	5745	assert(0); /* NOTREACHED */
	5746
	5747	#undef ST
	5748	#define ST st->u.branch
	5749
	5750	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	5751	next = scan + ARG(scan);
	5752	if (next == scan)
	5753	next = NULL;
	5754	scan = NEXTOPER(scan);
	5755	/* FALL THROUGH */
	5756
	5757	case BRANCH: /* /(...\|A\|...)/ */
	5758	scan = NEXTOPER(scan); /* scan now points to inner node */
	5759	ST.lastparen = rex->lastparen;
	5760	ST.lastcloseparen = rex->lastcloseparen;
	5761	ST.next_branch = next;
	5762	REGCP_SET(ST.cp);
	5763
	5764	/* Now go into the branch */
	5765	if (has_cutgroup) {
	5766	PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
	5767	} else {
	5768	PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
	5769	}
	5770	assert(0); /* NOTREACHED */
	5771
	5772	case CUTGROUP: /* /(THEN)/ /
	5773	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	5774	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5775	PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
	5776	assert(0); /* NOTREACHED */
	5777
	5778	case CUTGROUP_next_fail:
	5779	do_cutgroup = 1;
	5780	no_final = 1;
	5781	if (st->u.mark.mark_name)
	5782	sv_commit = st->u.mark.mark_name;
	5783	sayNO;
	5784	assert(0); /* NOTREACHED */
	5785
	5786	case BRANCH_next:
	5787	sayYES;
	5788	assert(0); /* NOTREACHED */
	5789
	5790	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	5791	if (do_cutgroup) {
	5792	do_cutgroup = 0;
	5793	no_final = 0;
	5794	}
	5795	REGCP_UNWIND(ST.cp);
	5796	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5797	scan = ST.next_branch;
	5798	/* no more branches? */
	5799	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	5800	DEBUG_EXECUTE_r({
	5801	PerlIO_printf( Perl_debug_log,
	5802	"%*s %sBRANCH failed...%s\n",
	5803	REPORT_CODE_OFF+depth*2, "",
	5804	PL_colors[4],
	5805	PL_colors[5] );
	5806	});
	5807	sayNO_SILENT;
	5808	}
	5809	continue; /* execute next BRANCH[J] op */
	5810	assert(0); /* NOTREACHED */
	5811
	5812	case MINMOD: /* next op will be non-greedy, e.g. A? /
	5813	minmod = 1;
	5814	break;
	5815
	5816	#undef ST
	5817	#define ST st->u.curlym
	5818
	5819	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	5820
	5821	/* This is an optimisation of CURLYX that enables us to push
	5822	* only a single backtracking state, no matter how many matches
	5823	* there are in {m,n}. It relies on the pattern being constant
	5824	* length, with no parens to influence future backrefs
	5825	*/
	5826
	5827	ST.me = scan;
	5828	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5829
	5830	ST.lastparen = rex->lastparen;
	5831	ST.lastcloseparen = rex->lastcloseparen;
	5832
	5833	/* if paren positive, emulate an OPEN/CLOSE around A */
	5834	if (ST.me->flags) {
	5835	U32 paren = ST.me->flags;
	5836	if (paren > maxopenparen)
	5837	maxopenparen = paren;
	5838	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	5839	}
	5840	ST.A = scan;
	5841	ST.B = next;
	5842	ST.alen = 0;
	5843	ST.count = 0;
	5844	ST.minmod = minmod;
	5845	minmod = 0;
	5846	ST.c1 = CHRTEST_UNINIT;
	5847	REGCP_SET(ST.cp);
	5848
	5849	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	5850	goto curlym_do_B;
	5851
	5852	curlym_do_A: /* execute the A in /A{m,n}B/ */
	5853	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
	5854	assert(0); /* NOTREACHED */
	5855
	5856	case CURLYM_A: /* we've just matched an A */
	5857	ST.count++;
	5858	/* after first match, determine A's length: u.curlym.alen */
	5859	if (ST.count == 1) {
	5860	if (reginfo->is_utf8_target) {
	5861	char *s = st->locinput;
	5862	while (s < locinput) {
	5863	ST.alen++;
	5864	s += UTF8SKIP(s);
	5865	}
	5866	}
	5867	else {
	5868	ST.alen = locinput - st->locinput;
	5869	}
	5870	if (ST.alen == 0)
	5871	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	5872	}
	5873	DEBUG_EXECUTE_r(
	5874	PerlIO_printf(Perl_debug_log,
	5875	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	5876	(int)(REPORT_CODE_OFF+(depth*2)), "",
	5877	(IV) ST.count, (IV)ST.alen)
	5878	);
	5879
	5880	if (cur_eval && cur_eval->u.eval.close_paren &&
	5881	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5882	goto fake_end;
	5883
	5884	{
	5885	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	5886	if ( max == REG_INFTY \|\| ST.count < max )
	5887	goto curlym_do_A; /* try to match another A */
	5888	}
	5889	goto curlym_do_B; /* try to match B */
	5890
	5891	case CURLYM_A_fail: /* just failed to match an A */
	5892	REGCP_UNWIND(ST.cp);
	5893
	5894	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	5895	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5896	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5897	sayNO;
	5898
	5899	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5900	if (ST.c1 == CHRTEST_UNINIT) {
	5901	/* calculate c1 and c2 for possible match of 1st char
	5902	* following curly */
	5903	ST.c1 = ST.c2 = CHRTEST_VOID;
	5904	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5905	regnode *text_node = ST.B;
	5906	if (! HAS_TEXT(text_node))
	5907	FIND_NEXT_IMPT(text_node);
	5908	/* this used to be
	5909
	5910	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5911
	5912	But the former is redundant in light of the latter.
	5913
	5914	if this changes back then the macro for
	5915	IS_TEXT and friends need to change.
	5916	*/
	5917	if (PL_regkind[OP(text_node)] == EXACT) {
	5918	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	5919	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
	5920	reginfo))
	5921	{
	5922	sayNO;
	5923	}
	5924	}
	5925	}
	5926	}
	5927
	5928	DEBUG_EXECUTE_r(
	5929	PerlIO_printf(Perl_debug_log,
	5930	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5931	(int)(REPORT_CODE_OFF+(depth*2)),
	5932	"", (IV)ST.count)
	5933	);
	5934	if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
	5935	if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
	5936	if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	5937	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	5938	{
	5939	/* simulate B failing */
	5940	DEBUG_OPTIMISE_r(
	5941	PerlIO_printf(Perl_debug_log,
	5942	"%*s CURLYM Fast bail next target=0x%"UVXf" c1=0x%"UVXf" c2=0x%"UVXf"\n",
	5943	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5944	valid_utf8_to_uvchr((U8 *) locinput, NULL),
	5945	valid_utf8_to_uvchr(ST.c1_utf8, NULL),
	5946	valid_utf8_to_uvchr(ST.c2_utf8, NULL))
	5947	);
	5948	state_num = CURLYM_B_fail;
	5949	goto reenter_switch;
	5950	}
	5951	}
	5952	else if (nextchr != ST.c1 && nextchr != ST.c2) {
	5953	/* simulate B failing */
	5954	DEBUG_OPTIMISE_r(
	5955	PerlIO_printf(Perl_debug_log,
	5956	"%*s CURLYM Fast bail next target=0x%X c1=0x%X c2=0x%X\n",
	5957	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5958	(int) nextchr, ST.c1, ST.c2)
	5959	);
	5960	state_num = CURLYM_B_fail;
	5961	goto reenter_switch;
	5962	}
	5963	}
	5964
	5965	if (ST.me->flags) {
	5966	/* emulate CLOSE: mark current A as captured */
	5967	I32 paren = ST.me->flags;
	5968	if (ST.count) {
	5969	rex->offs[paren].start
	5970	= HOPc(locinput, -ST.alen) - reginfo->strbeg;
	5971	rex->offs[paren].end = locinput - reginfo->strbeg;
	5972	if ((U32)paren > rex->lastparen)
	5973	rex->lastparen = paren;
	5974	rex->lastcloseparen = paren;
	5975	}
	5976	else
	5977	rex->offs[paren].end = -1;
	5978	if (cur_eval && cur_eval->u.eval.close_paren &&
	5979	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5980	{
	5981	if (ST.count)
	5982	goto fake_end;
	5983	else
	5984	sayNO;
	5985	}
	5986	}
	5987
	5988	PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
	5989	assert(0); /* NOTREACHED */
	5990
	5991	case CURLYM_B_fail: /* just failed to match a B */
	5992	REGCP_UNWIND(ST.cp);
	5993	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5994	if (ST.minmod) {
	5995	I32 max = ARG2(ST.me);
	5996	if (max != REG_INFTY && ST.count == max)
	5997	sayNO;
	5998	goto curlym_do_A; /* try to match a further A */
	5999	}
	6000	/* backtrack one A */
	6001	if (ST.count == ARG1(ST.me) /* min */)
	6002	sayNO;
	6003	ST.count--;
	6004	SET_locinput(HOPc(locinput, -ST.alen));
	6005	goto curlym_do_B; /* try to match B */
	6006
	6007	#undef ST
	6008	#define ST st->u.curly
	6009
	6010	#define CURLY_SETPAREN(paren, success) \
	6011	if (paren) { \
	6012	if (success) { \
	6013	rex->offs[paren].start = HOPc(locinput, -1) - reginfo->strbeg; \
	6014	rex->offs[paren].end = locinput - reginfo->strbeg; \
	6015	if (paren > rex->lastparen) \
	6016	rex->lastparen = paren; \
	6017	rex->lastcloseparen = paren; \
	6018	} \
	6019	else { \
	6020	rex->offs[paren].end = -1; \
	6021	rex->lastparen = ST.lastparen; \
	6022	rex->lastcloseparen = ST.lastcloseparen; \
	6023	} \
	6024	}
	6025
	6026	case STAR: /* /AB/ where A is width 1 char /
	6027	ST.paren = 0;
	6028	ST.min = 0;
	6029	ST.max = REG_INFTY;
	6030	scan = NEXTOPER(scan);
	6031	goto repeat;
	6032
	6033	case PLUS: /* /A+B/ where A is width 1 char */
	6034	ST.paren = 0;
	6035	ST.min = 1;
	6036	ST.max = REG_INFTY;
	6037	scan = NEXTOPER(scan);
	6038	goto repeat;
	6039
	6040	case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
	6041	ST.paren = scan->flags; /* Which paren to set */
	6042	ST.lastparen = rex->lastparen;
	6043	ST.lastcloseparen = rex->lastcloseparen;
	6044	if (ST.paren > maxopenparen)
	6045	maxopenparen = ST.paren;
	6046	ST.min = ARG1(scan); /* min to match */
	6047	ST.max = ARG2(scan); /* max to match */
	6048	if (cur_eval && cur_eval->u.eval.close_paren &&
	6049	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6050	ST.min=1;
	6051	ST.max=1;
	6052	}
	6053	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	6054	goto repeat;
	6055
	6056	case CURLY: /* /A{m,n}B/ where A is width 1 char */
	6057	ST.paren = 0;
	6058	ST.min = ARG1(scan); /* min to match */
	6059	ST.max = ARG2(scan); /* max to match */
	6060	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	6061	repeat:
	6062	/*
	6063	* Lookahead to avoid useless match attempts
	6064	* when we know what character comes next.
	6065	*
	6066	* Used to only do .x and .?x, but now it allows
	6067	* for )'s, ('s and (?{ ... })'s to be in the way
	6068	* of the quantifier and the EXACT-like node. -- japhy
	6069	*/
	6070
	6071	assert(ST.min <= ST.max);
	6072	if (! HAS_TEXT(next) && ! JUMPABLE(next)) {
	6073	ST.c1 = ST.c2 = CHRTEST_VOID;
	6074	}
	6075	else {
	6076	regnode *text_node = next;
	6077
	6078	if (! HAS_TEXT(text_node))
	6079	FIND_NEXT_IMPT(text_node);
	6080
	6081	if (! HAS_TEXT(text_node))
	6082	ST.c1 = ST.c2 = CHRTEST_VOID;
	6083	else {
	6084	if ( PL_regkind[OP(text_node)] != EXACT ) {
	6085	ST.c1 = ST.c2 = CHRTEST_VOID;
	6086	}
	6087	else {
	6088
	6089	/* Currently we only get here when
	6090
	6091	PL_rekind[OP(text_node)] == EXACT
	6092
	6093	if this changes back then the macro for IS_TEXT and
	6094	friends need to change. */
	6095	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	6096	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
	6097	reginfo))
	6098	{
	6099	sayNO;
	6100	}
	6101	}
	6102	}
	6103	}
	6104
	6105	ST.A = scan;
	6106	ST.B = next;
	6107	if (minmod) {
	6108	char *li = locinput;
	6109	minmod = 0;
	6110	if (ST.min &&
	6111	regrepeat(rex, &li, ST.A, reginfo, ST.min, depth)
	6112	< ST.min)
	6113	sayNO;
	6114	SET_locinput(li);
	6115	ST.count = ST.min;
	6116	REGCP_SET(ST.cp);
	6117	if (ST.c1 == CHRTEST_VOID)
	6118	goto curly_try_B_min;
	6119
	6120	ST.oldloc = locinput;
	6121
	6122	/* set ST.maxpos to the furthest point along the
	6123	* string that could possibly match */
	6124	if (ST.max == REG_INFTY) {
	6125	ST.maxpos = reginfo->strend - 1;
	6126	if (utf8_target)
	6127	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	6128	ST.maxpos--;
	6129	}
	6130	else if (utf8_target) {
	6131	int m = ST.max - ST.min;
	6132	for (ST.maxpos = locinput;
	6133	m >0 && ST.maxpos < reginfo->strend; m--)
	6134	ST.maxpos += UTF8SKIP(ST.maxpos);
	6135	}
	6136	else {
	6137	ST.maxpos = locinput + ST.max - ST.min;
	6138	if (ST.maxpos >= reginfo->strend)
	6139	ST.maxpos = reginfo->strend - 1;
	6140	}
	6141	goto curly_try_B_min_known;
	6142
	6143	}
	6144	else {
	6145	/* avoid taking address of locinput, so it can remain
	6146	* a register var */
	6147	char *li = locinput;
	6148	ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max, depth);
	6149	if (ST.count < ST.min)
	6150	sayNO;
	6151	SET_locinput(li);
	6152	if ((ST.count > ST.min)
	6153	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	6154	{
	6155	/* A{m,n} must come at the end of the string, there's
	6156	* no point in backing off ... */
	6157	ST.min = ST.count;
	6158	/* ...except that $ and \Z can match before and after
	6159	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	6160	We may back off by one in this case. */
	6161	if (UCHARAT(locinput - 1) == '\n' && OP(ST.B) != EOS)
	6162	ST.min--;
	6163	}
	6164	REGCP_SET(ST.cp);
	6165	goto curly_try_B_max;
	6166	}
	6167	assert(0); /* NOTREACHED */
	6168
	6169
	6170	case CURLY_B_min_known_fail:
	6171	/* failed to find B in a non-greedy match where c1,c2 valid */
	6172
	6173	REGCP_UNWIND(ST.cp);
	6174	if (ST.paren) {
	6175	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6176	}
	6177	/* Couldn't or didn't -- move forward. */
	6178	ST.oldloc = locinput;
	6179	if (utf8_target)
	6180	locinput += UTF8SKIP(locinput);
	6181	else
	6182	locinput++;
	6183	ST.count++;
	6184	curly_try_B_min_known:
	6185	/* find the next place where 'B' could work, then call B */
	6186	{
	6187	int n;
	6188	if (utf8_target) {
	6189	n = (ST.oldloc == locinput) ? 0 : 1;
	6190	if (ST.c1 == ST.c2) {
	6191	/* set n to utf8_distance(oldloc, locinput) */
	6192	while (locinput <= ST.maxpos
	6193	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
	6194	{
	6195	locinput += UTF8SKIP(locinput);
	6196	n++;
	6197	}
	6198	}
	6199	else {
	6200	/* set n to utf8_distance(oldloc, locinput) */
	6201	while (locinput <= ST.maxpos
	6202	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	6203	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	6204	{
	6205	locinput += UTF8SKIP(locinput);
	6206	n++;
	6207	}
	6208	}
	6209	}
	6210	else { /* Not utf8_target */
	6211	if (ST.c1 == ST.c2) {
	6212	while (locinput <= ST.maxpos &&
	6213	UCHARAT(locinput) != ST.c1)
	6214	locinput++;
	6215	}
	6216	else {
	6217	while (locinput <= ST.maxpos
	6218	&& UCHARAT(locinput) != ST.c1
	6219	&& UCHARAT(locinput) != ST.c2)
	6220	locinput++;
	6221	}
	6222	n = locinput - ST.oldloc;
	6223	}
	6224	if (locinput > ST.maxpos)
	6225	sayNO;
	6226	if (n) {
	6227	/* In /a{m,n}b/, ST.oldloc is at "a" x m, locinput is
	6228	* at b; check that everything between oldloc and
	6229	* locinput matches */
	6230	char *li = ST.oldloc;
	6231	ST.count += n;
	6232	if (regrepeat(rex, &li, ST.A, reginfo, n, depth) < n)
	6233	sayNO;
	6234	assert(n == REG_INFTY \|\| locinput == li);
	6235	}
	6236	CURLY_SETPAREN(ST.paren, ST.count);
	6237	if (cur_eval && cur_eval->u.eval.close_paren &&
	6238	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6239	goto fake_end;
	6240	}
	6241	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
	6242	}
	6243	assert(0); /* NOTREACHED */
	6244
	6245
	6246	case CURLY_B_min_fail:
	6247	/* failed to find B in a non-greedy match where c1,c2 invalid */
	6248
	6249	REGCP_UNWIND(ST.cp);
	6250	if (ST.paren) {
	6251	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6252	}
	6253	/* failed -- move forward one */
	6254	{
	6255	char *li = locinput;
	6256	if (!regrepeat(rex, &li, ST.A, reginfo, 1, depth)) {
	6257	sayNO;
	6258	}
	6259	locinput = li;
	6260	}
	6261	{
	6262	ST.count++;
	6263	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	6264	ST.count > 0)) /* count overflow ? */
	6265	{
	6266	curly_try_B_min:
	6267	CURLY_SETPAREN(ST.paren, ST.count);
	6268	if (cur_eval && cur_eval->u.eval.close_paren &&
	6269	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6270	goto fake_end;
	6271	}
	6272	PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
	6273	}
	6274	}
	6275	sayNO;
	6276	assert(0); /* NOTREACHED */
	6277
	6278
	6279	curly_try_B_max:
	6280	/* a successful greedy match: now try to match B */
	6281	if (cur_eval && cur_eval->u.eval.close_paren &&
	6282	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	6283	goto fake_end;
	6284	}
	6285	{
	6286	bool could_match = locinput < reginfo->strend;
	6287
	6288	/* If it could work, try it. */
	6289	if (ST.c1 != CHRTEST_VOID && could_match) {
	6290	if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
	6291	{
	6292	could_match = memEQ(locinput,
	6293	ST.c1_utf8,
	6294	UTF8SKIP(locinput))
	6295	\|\| memEQ(locinput,
	6296	ST.c2_utf8,
	6297	UTF8SKIP(locinput));
	6298	}
	6299	else {
	6300	could_match = UCHARAT(locinput) == ST.c1
	6301	\|\| UCHARAT(locinput) == ST.c2;
	6302	}
	6303	}
	6304	if (ST.c1 == CHRTEST_VOID \|\| could_match) {
	6305	CURLY_SETPAREN(ST.paren, ST.count);
	6306	PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
	6307	assert(0); /* NOTREACHED */
	6308	}
	6309	}
	6310	/* FALL THROUGH */
	6311
	6312	case CURLY_B_max_fail:
	6313	/* failed to find B in a greedy match */
	6314
	6315	REGCP_UNWIND(ST.cp);
	6316	if (ST.paren) {
	6317	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	6318	}
	6319	/* back up. */
	6320	if (--ST.count < ST.min)
	6321	sayNO;
	6322	locinput = HOPc(locinput, -1);
	6323	goto curly_try_B_max;
	6324
	6325	#undef ST
	6326
	6327	case END: /* last op of main pattern */
	6328	fake_end:
	6329	if (cur_eval) {
	6330	/* we've just finished A in /(??{A})B/; now continue with B */
	6331
	6332	st->u.eval.prev_rex = rex_sv; /* inner */
	6333
	6334	/* Save all the positions. */
	6335	st->u.eval.cp = regcppush(rex, 0, maxopenparen);
	6336	rex_sv = cur_eval->u.eval.prev_rex;
	6337	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
	6338	SET_reg_curpm(rex_sv);
	6339	rex = ReANY(rex_sv);
	6340	rexi = RXi_GET(rex);
	6341	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	6342
	6343	REGCP_SET(st->u.eval.lastcp);
	6344
	6345	/* Restore parens of the outer rex without popping the
	6346	* savestack */
	6347	S_regcp_restore(aTHX_ rex, cur_eval->u.eval.lastcp,
	6348	&maxopenparen);
	6349
	6350	st->u.eval.prev_eval = cur_eval;
	6351	cur_eval = cur_eval->u.eval.prev_eval;
	6352	DEBUG_EXECUTE_r(
	6353	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	6354	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	6355	if ( nochange_depth )
	6356	nochange_depth--;
	6357
	6358	PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
	6359	locinput); /* match B */
	6360	}
	6361
	6362	if (locinput < reginfo->till) {
	6363	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	6364	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	6365	PL_colors[4],
	6366	(long)(locinput - startpos),
	6367	(long)(reginfo->till - startpos),
	6368	PL_colors[5]));
	6369
	6370	sayNO_SILENT; /* Cannot match: too short. */
	6371	}
	6372	sayYES; /* Success! */
	6373
	6374	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	6375	DEBUG_EXECUTE_r(
	6376	PerlIO_printf(Perl_debug_log,
	6377	"%*s %ssubpattern success...%s\n",
	6378	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	6379	sayYES; /* Success! */
	6380
	6381	#undef ST
	6382	#define ST st->u.ifmatch
	6383
	6384	{
	6385	char *newstart;
	6386
	6387	case SUSPEND: /* (?>A) */
	6388	ST.wanted = 1;
	6389	newstart = locinput;
	6390	goto do_ifmatch;
	6391
	6392	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	6393	ST.wanted = 0;
	6394	goto ifmatch_trivial_fail_test;
	6395
	6396	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	6397	ST.wanted = 1;
	6398	ifmatch_trivial_fail_test:
	6399	if (scan->flags) {
	6400	char * const s = HOPBACKc(locinput, scan->flags);
	6401	if (!s) {
	6402	/* trivial fail */
	6403	if (logical) {
	6404	logical = 0;
	6405	sw = 1 - cBOOL(ST.wanted);
	6406	}
	6407	else if (ST.wanted)
	6408	sayNO;
	6409	next = scan + ARG(scan);
	6410	if (next == scan)
	6411	next = NULL;
	6412	break;
	6413	}
	6414	newstart = s;
	6415	}
	6416	else
	6417	newstart = locinput;
	6418
	6419	do_ifmatch:
	6420	ST.me = scan;
	6421	ST.logical = logical;
	6422	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	6423
	6424	/* execute body of (?...A) */
	6425	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
	6426	assert(0); /* NOTREACHED */
	6427	}
	6428
	6429	case IFMATCH_A_fail: /* body of (?...A) failed */
	6430	ST.wanted = !ST.wanted;
	6431	/* FALL THROUGH */
	6432
	6433	case IFMATCH_A: /* body of (?...A) succeeded */
	6434	if (ST.logical) {
	6435	sw = cBOOL(ST.wanted);
	6436	}
	6437	else if (!ST.wanted)
	6438	sayNO;
	6439
	6440	if (OP(ST.me) != SUSPEND) {
	6441	/* restore old position except for (?>...) */
	6442	locinput = st->locinput;
	6443	}
	6444	scan = ST.me + ARG(ST.me);
	6445	if (scan == ST.me)
	6446	scan = NULL;
	6447	continue; /* execute B */
	6448
	6449	#undef ST
	6450
	6451	case LONGJMP: /* alternative with many branches compiles to
	6452	* (BRANCHJ; EXACT ...; LONGJMP ) x N */
	6453	next = scan + ARG(scan);
	6454	if (next == scan)
	6455	next = NULL;
	6456	break;
	6457
	6458	case COMMIT: /* (COMMIT) /
	6459	reginfo->cutpoint = reginfo->strend;
	6460	/* FALLTHROUGH */
	6461
	6462	case PRUNE: /* (PRUNE) /
	6463	if (!scan->flags)
	6464	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6465	PUSH_STATE_GOTO(COMMIT_next, next, locinput);
	6466	assert(0); /* NOTREACHED */
	6467
	6468	case COMMIT_next_fail:
	6469	no_final = 1;
	6470	/* FALLTHROUGH */
	6471
	6472	case OPFAIL: /* (FAIL) /
	6473	sayNO;
	6474	assert(0); /* NOTREACHED */
	6475
	6476	#define ST st->u.mark
	6477	case MARKPOINT: /* (MARK:foo) /
	6478	ST.prev_mark = mark_state;
	6479	ST.mark_name = sv_commit = sv_yes_mark
	6480	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6481	mark_state = st;
	6482	ST.mark_loc = locinput;
	6483	PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
	6484	assert(0); /* NOTREACHED */
	6485
	6486	case MARKPOINT_next:
	6487	mark_state = ST.prev_mark;
	6488	sayYES;
	6489	assert(0); /* NOTREACHED */
	6490
	6491	case MARKPOINT_next_fail:
	6492	if (popmark && sv_eq(ST.mark_name,popmark))
	6493	{
	6494	if (ST.mark_loc > startpoint)
	6495	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6496	popmark = NULL; /* we found our mark */
	6497	sv_commit = ST.mark_name;
	6498
	6499	DEBUG_EXECUTE_r({
	6500	PerlIO_printf(Perl_debug_log,
	6501	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	6502	REPORT_CODE_OFF+depth*2, "",
	6503	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	6504	});
	6505	}
	6506	mark_state = ST.prev_mark;
	6507	sv_yes_mark = mark_state ?
	6508	mark_state->u.mark.mark_name : NULL;
	6509	sayNO;
	6510	assert(0); /* NOTREACHED */
	6511
	6512	case SKIP: /* (SKIP) /
	6513	if (scan->flags) {
	6514	/* (SKIP) : if we fail we cut here/
	6515	ST.mark_name = NULL;
	6516	ST.mark_loc = locinput;
	6517	PUSH_STATE_GOTO(SKIP_next,next, locinput);
	6518	} else {
	6519	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	6520	otherwise do nothing. Meaning we need to scan
	6521	*/
	6522	regmatch_state *cur = mark_state;
	6523	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	6524
	6525	while (cur) {
	6526	if ( sv_eq( cur->u.mark.mark_name,
	6527	find ) )
	6528	{
	6529	ST.mark_name = find;
	6530	PUSH_STATE_GOTO( SKIP_next, next, locinput);
	6531	}
	6532	cur = cur->u.mark.prev_mark;
	6533	}
	6534	}
	6535	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	6536	break;
	6537
	6538	case SKIP_next_fail:
	6539	if (ST.mark_name) {
	6540	/* (*CUT:NAME) - Set up to search for the name as we
	6541	collapse the stack*/
	6542	popmark = ST.mark_name;
	6543	} else {
	6544	/* (CUT) - No name, we cut here./
	6545	if (ST.mark_loc > startpoint)
	6546	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	6547	/* but we set sv_commit to latest mark_name if there
	6548	is one so they can test to see how things lead to this
	6549	cut */
	6550	if (mark_state)
	6551	sv_commit=mark_state->u.mark.mark_name;
	6552	}
	6553	no_final = 1;
	6554	sayNO;
	6555	assert(0); /* NOTREACHED */
	6556	#undef ST
	6557
	6558	case LNBREAK: /* \R */
	6559	if ((n=is_LNBREAK_safe(locinput, reginfo->strend, utf8_target))) {
	6560	locinput += n;
	6561	} else
	6562	sayNO;
	6563	break;
	6564
	6565	default:
	6566	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	6567	PTR2UV(scan), OP(scan));
	6568	Perl_croak(aTHX_ "regexp memory corruption");
	6569
	6570	/* this is a point to jump to in order to increment
	6571	* locinput by one character */
	6572	increment_locinput:
	6573	assert(!NEXTCHR_IS_EOS);
	6574	if (utf8_target) {
	6575	locinput += PL_utf8skip[nextchr];
	6576	/* locinput is allowed to go 1 char off the end, but not 2+ */
	6577	if (locinput > reginfo->strend)
	6578	sayNO;
	6579	}
	6580	else
	6581	locinput++;
	6582	break;
	6583
	6584	} /* end switch */
	6585
	6586	/* switch break jumps here */
	6587	scan = next; /* prepare to execute the next op and ... */
	6588	continue; /* ... jump back to the top, reusing st */
	6589	assert(0); /* NOTREACHED */
	6590
	6591	push_yes_state:
	6592	/* push a state that backtracks on success */
	6593	st->u.yes.prev_yes_state = yes_state;
	6594	yes_state = st;
	6595	/* FALL THROUGH */
	6596	push_state:
	6597	/* push a new regex state, then continue at scan */
	6598	{
	6599	regmatch_state *newst;
	6600
	6601	DEBUG_STACK_r({
	6602	regmatch_state *cur = st;
	6603	regmatch_state *curyes = yes_state;
	6604	int curd = depth;
	6605	regmatch_slab *slab = PL_regmatch_slab;
	6606	for (;curd > -1;cur--,curd--) {
	6607	if (cur < SLAB_FIRST(slab)) {
	6608	slab = slab->prev;
	6609	cur = SLAB_LAST(slab);
	6610	}
	6611	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	6612	REPORT_CODE_OFF + 2 + depth * 2,"",
	6613	curd, PL_reg_name[cur->resume_state],
	6614	(curyes == cur) ? "yes" : ""
	6615	);
	6616	if (curyes == cur)
	6617	curyes = cur->u.yes.prev_yes_state;
	6618	}
	6619	} else
	6620	DEBUG_STATE_pp("push")
	6621	);
	6622	depth++;
	6623	st->locinput = locinput;
	6624	newst = st+1;
	6625	if (newst > SLAB_LAST(PL_regmatch_slab))
	6626	newst = S_push_slab(aTHX);
	6627	PL_regmatch_state = newst;
	6628
	6629	locinput = pushinput;
	6630	st = newst;
	6631	continue;
	6632	assert(0); /* NOTREACHED */
	6633	}
	6634	}
	6635
	6636	/*
	6637	* We get here only if there's trouble -- normally "case END" is
	6638	* the terminating point.
	6639	*/
	6640	Perl_croak(aTHX_ "corrupted regexp pointers");
	6641	/NOTREACHED/
	6642	sayNO;
	6643
	6644	yes:
	6645	if (yes_state) {
	6646	/* we have successfully completed a subexpression, but we must now
	6647	* pop to the state marked by yes_state and continue from there */
	6648	assert(st != yes_state);
	6649	#ifdef DEBUGGING
	6650	while (st != yes_state) {
	6651	st--;
	6652	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6653	PL_regmatch_slab = PL_regmatch_slab->prev;
	6654	st = SLAB_LAST(PL_regmatch_slab);
	6655	}
	6656	DEBUG_STATE_r({
	6657	if (no_final) {
	6658	DEBUG_STATE_pp("pop (no final)");
	6659	} else {
	6660	DEBUG_STATE_pp("pop (yes)");
	6661	}
	6662	});
	6663	depth--;
	6664	}
	6665	#else
	6666	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	6667	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	6668	{
	6669	/* not in this slab, pop slab */
	6670	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	6671	PL_regmatch_slab = PL_regmatch_slab->prev;
	6672	st = SLAB_LAST(PL_regmatch_slab);
	6673	}
	6674	depth -= (st - yes_state);
	6675	#endif
	6676	st = yes_state;
	6677	yes_state = st->u.yes.prev_yes_state;
	6678	PL_regmatch_state = st;
	6679
	6680	if (no_final)
	6681	locinput= st->locinput;
	6682	state_num = st->resume_state + no_final;
	6683	goto reenter_switch;
	6684	}
	6685
	6686	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	6687	PL_colors[4], PL_colors[5]));
	6688
	6689	if (reginfo->info_aux_eval) {
	6690	/* each successfully executed (?{...}) block does the equivalent of
	6691	* local $^R = do {...}
	6692	* When popping the save stack, all these locals would be undone;
	6693	* bypass this by setting the outermost saved $^R to the latest
	6694	* value */
	6695	if (oreplsv != GvSV(PL_replgv))
	6696	sv_setsv(oreplsv, GvSV(PL_replgv));
	6697	}
	6698	result = 1;
	6699	goto final_exit;
	6700
	6701	no:
	6702	DEBUG_EXECUTE_r(
	6703	PerlIO_printf(Perl_debug_log,
	6704	"%*s %sfailed...%s\n",
	6705	REPORT_CODE_OFF+depth*2, "",
	6706	PL_colors[4], PL_colors[5])
	6707	);
	6708
	6709	no_silent:
	6710	if (no_final) {
	6711	if (yes_state) {
	6712	goto yes;
	6713	} else {
	6714	goto final_exit;
	6715	}
	6716	}
	6717	if (depth) {
	6718	/* there's a previous state to backtrack to */
	6719	st--;
	6720	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	6721	PL_regmatch_slab = PL_regmatch_slab->prev;
	6722	st = SLAB_LAST(PL_regmatch_slab);
	6723	}
	6724	PL_regmatch_state = st;
	6725	locinput= st->locinput;
	6726
	6727	DEBUG_STATE_pp("pop");
	6728	depth--;
	6729	if (yes_state == st)
	6730	yes_state = st->u.yes.prev_yes_state;
	6731
	6732	state_num = st->resume_state + 1; /* failure = success + 1 */
	6733	goto reenter_switch;
	6734	}
	6735	result = 0;
	6736
	6737	final_exit:
	6738	if (rex->intflags & PREGf_VERBARG_SEEN) {
	6739	SV *sv_err = get_sv("REGERROR", 1);
	6740	SV *sv_mrk = get_sv("REGMARK", 1);
	6741	if (result) {
	6742	sv_commit = &PL_sv_no;
	6743	if (!sv_yes_mark)
	6744	sv_yes_mark = &PL_sv_yes;
	6745	} else {
	6746	if (!sv_commit)
	6747	sv_commit = &PL_sv_yes;
	6748	sv_yes_mark = &PL_sv_no;
	6749	}
	6750	sv_setsv(sv_err, sv_commit);
	6751	sv_setsv(sv_mrk, sv_yes_mark);
	6752	}
	6753
	6754
	6755	if (last_pushed_cv) {
	6756	dSP;
	6757	POP_MULTICALL;
	6758	PERL_UNUSED_VAR(SP);
	6759	}
	6760
	6761	assert(!result \|\| locinput - reginfo->strbeg >= 0);
	6762	return result ? locinput - reginfo->strbeg : -1;
	6763	}
	6764
	6765	/*
	6766	- regrepeat - repeatedly match something simple, report how many
	6767	*
	6768	* What 'simple' means is a node which can be the operand of a quantifier like
	6769	* '+', or {1,3}
	6770	*
	6771	* startposp - pointer a pointer to the start position. This is updated
	6772	* to point to the byte following the highest successful
	6773	* match.
	6774	* p - the regnode to be repeatedly matched against.
	6775	* reginfo - struct holding match state, such as strend
	6776	* max - maximum number of things to match.
	6777	* depth - (for debugging) backtracking depth.
	6778	*/
	6779	STATIC I32
	6780	S_regrepeat(pTHX_ regexp prog, char startposp, const regnode p,
	6781	regmatch_info *const reginfo, I32 max, int depth)
	6782	{
	6783	dVAR;
	6784	char scan; / Pointer to current position in target string */
	6785	I32 c;
	6786	char loceol = reginfo->strend; / local version */
	6787	I32 hardcount = 0; /* How many matches so far */
	6788	bool utf8_target = reginfo->is_utf8_target;
	6789	int to_complement = 0; /* Invert the result? */
	6790	UV utf8_flags;
	6791	_char_class_number classnum;
	6792	#ifndef DEBUGGING
	6793	PERL_UNUSED_ARG(depth);
	6794	#endif
	6795
	6796	PERL_ARGS_ASSERT_REGREPEAT;
	6797
	6798	scan = *startposp;
	6799	if (max == REG_INFTY)
	6800	max = I32_MAX;
	6801	else if (! utf8_target && loceol - scan > max)
	6802	loceol = scan + max;
	6803
	6804	/* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
	6805	* to the maximum of how far we should go in it (leaving it set to the real
	6806	* end, if the maximum permissible would take us beyond that). This allows
	6807	* us to make the loop exit condition that we haven't gone past <loceol> to
	6808	* also mean that we haven't exceeded the max permissible count, saving a
	6809	* test each time through the loop. But it assumes that the OP matches a
	6810	* single byte, which is true for most of the OPs below when applied to a
	6811	* non-UTF-8 target. Those relatively few OPs that don't have this
	6812	* characteristic will have to compensate.
	6813	*
	6814	* There is no adjustment for UTF-8 targets, as the number of bytes per
	6815	* character varies. OPs will have to test both that the count is less
	6816	* than the max permissible (using <hardcount> to keep track), and that we
	6817	* are still within the bounds of the string (using <loceol>. A few OPs
	6818	* match a single byte no matter what the encoding. They can omit the max
	6819	* test if, for the UTF-8 case, they do the adjustment that was skipped
	6820	* above.
	6821	*
	6822	* Thus, the code above sets things up for the common case; and exceptional
	6823	* cases need extra work; the common case is to make sure <scan> doesn't
	6824	* go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
	6825	* count doesn't exceed the maximum permissible */
	6826
	6827	switch (OP(p)) {
	6828	case REG_ANY:
	6829	if (utf8_target) {
	6830	while (scan < loceol && hardcount < max && *scan != '\n') {
	6831	scan += UTF8SKIP(scan);
	6832	hardcount++;
	6833	}
	6834	} else {
	6835	while (scan < loceol && *scan != '\n')
	6836	scan++;
	6837	}
	6838	break;
	6839	case SANY:
	6840	if (utf8_target) {
	6841	while (scan < loceol && hardcount < max) {
	6842	scan += UTF8SKIP(scan);
	6843	hardcount++;
	6844	}
	6845	}
	6846	else
	6847	scan = loceol;
	6848	break;
	6849	case CANY: /* Move <scan> forward <max> bytes, unless goes off end */
	6850	if (utf8_target && loceol - scan > max) {
	6851
	6852	/* <loceol> hadn't been adjusted in the UTF-8 case */
	6853	scan += max;
	6854	}
	6855	else {
	6856	scan = loceol;
	6857	}
	6858	break;
	6859	case EXACT:
	6860	assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
	6861
	6862	c = (U8)*STRING(p);
	6863
	6864	/* Can use a simple loop if the pattern char to match on is invariant
	6865	* under UTF-8, or both target and pattern aren't UTF-8. Note that we
	6866	* can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
	6867	* true iff it doesn't matter if the argument is in UTF-8 or not */
	6868	if (UTF8_IS_INVARIANT(c) \|\| (! utf8_target && ! reginfo->is_utf8_pat)) {
	6869	if (utf8_target && loceol - scan > max) {
	6870	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	6871	* since here, to match at all, 1 char == 1 byte */
	6872	loceol = scan + max;
	6873	}
	6874	while (scan < loceol && UCHARAT(scan) == c) {
	6875	scan++;
	6876	}
	6877	}
	6878	else if (reginfo->is_utf8_pat) {
	6879	if (utf8_target) {
	6880	STRLEN scan_char_len;
	6881
	6882	/* When both target and pattern are UTF-8, we have to do
	6883	* string EQ */
	6884	while (hardcount < max
	6885	&& scan < loceol
	6886	&& (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
	6887	&& memEQ(scan, STRING(p), scan_char_len))
	6888	{
	6889	scan += scan_char_len;
	6890	hardcount++;
	6891	}
	6892	}
	6893	else if (! UTF8_IS_ABOVE_LATIN1(c)) {
	6894
	6895	/* Target isn't utf8; convert the character in the UTF-8
	6896	* pattern to non-UTF8, and do a simple loop */
	6897	c = TWO_BYTE_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
	6898	while (scan < loceol && UCHARAT(scan) == c) {
	6899	scan++;
	6900	}
	6901	} /* else pattern char is above Latin1, can't possibly match the
	6902	non-UTF-8 target */
	6903	}
	6904	else {
	6905
	6906	/* Here, the string must be utf8; pattern isn't, and <c> is
	6907	* different in utf8 than not, so can't compare them directly.
	6908	* Outside the loop, find the two utf8 bytes that represent c, and
	6909	* then look for those in sequence in the utf8 string */
	6910	U8 high = UTF8_TWO_BYTE_HI(c);
	6911	U8 low = UTF8_TWO_BYTE_LO(c);
	6912
	6913	while (hardcount < max
	6914	&& scan + 1 < loceol
	6915	&& UCHARAT(scan) == high
	6916	&& UCHARAT(scan + 1) == low)
	6917	{
	6918	scan += 2;
	6919	hardcount++;
	6920	}
	6921	}
	6922	break;
	6923
	6924	case EXACTFA_NO_TRIE: /* This node only generated for non-utf8 patterns */
	6925	assert(! reginfo->is_utf8_pat);
	6926	/* FALL THROUGH */
	6927	case EXACTFA:
	6928	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6929	goto do_exactf;
	6930
	6931	case EXACTFL:
	6932	RXp_MATCH_TAINTED_on(prog);
	6933	utf8_flags = FOLDEQ_UTF8_LOCALE;
	6934	goto do_exactf;
	6935
	6936	case EXACTF: /* This node only generated for non-utf8 patterns */
	6937	assert(! reginfo->is_utf8_pat);
	6938	utf8_flags = 0;
	6939	goto do_exactf;
	6940
	6941	case EXACTFU_SS:
	6942	case EXACTFU:
	6943	utf8_flags = reginfo->is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	6944
	6945	do_exactf: {
	6946	int c1, c2;
	6947	U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
	6948
	6949	assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
	6950
	6951	if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
	6952	reginfo))
	6953	{
	6954	if (c1 == CHRTEST_VOID) {
	6955	/* Use full Unicode fold matching */
	6956	char *tmpeol = reginfo->strend;
	6957	STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
	6958	while (hardcount < max
	6959	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	6960	STRING(p), NULL, pat_len,
	6961	reginfo->is_utf8_pat, utf8_flags))
	6962	{
	6963	scan = tmpeol;
	6964	tmpeol = reginfo->strend;
	6965	hardcount++;
	6966	}
	6967	}
	6968	else if (utf8_target) {
	6969	if (c1 == c2) {
	6970	while (scan < loceol
	6971	&& hardcount < max
	6972	&& memEQ(scan, c1_utf8, UTF8SKIP(scan)))
	6973	{
	6974	scan += UTF8SKIP(scan);
	6975	hardcount++;
	6976	}
	6977	}
	6978	else {
	6979	while (scan < loceol
	6980	&& hardcount < max
	6981	&& (memEQ(scan, c1_utf8, UTF8SKIP(scan))
	6982	\|\| memEQ(scan, c2_utf8, UTF8SKIP(scan))))
	6983	{
	6984	scan += UTF8SKIP(scan);
	6985	hardcount++;
	6986	}
	6987	}
	6988	}
	6989	else if (c1 == c2) {
	6990	while (scan < loceol && UCHARAT(scan) == c1) {
	6991	scan++;
	6992	}
	6993	}
	6994	else {
	6995	while (scan < loceol &&
	6996	(UCHARAT(scan) == c1 \|\| UCHARAT(scan) == c2))
	6997	{
	6998	scan++;
	6999	}
	7000	}
	7001	}
	7002	break;
	7003	}
	7004	case ANYOF:
	7005	if (utf8_target) {
	7006	while (hardcount < max
	7007	&& scan < loceol
	7008	&& reginclass(prog, p, (U8)scan, (U8) loceol, utf8_target))
	7009	{
	7010	scan += UTF8SKIP(scan);
	7011	hardcount++;
	7012	}
	7013	} else {
	7014	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	7015	scan++;
	7016	}
	7017	break;
	7018
	7019	/* The argument (FLAGS) to all the POSIX node types is the class number */
	7020
	7021	case NPOSIXL:
	7022	to_complement = 1;
	7023	/* FALLTHROUGH */
	7024
	7025	case POSIXL:
	7026	RXp_MATCH_TAINTED_on(prog);
	7027	if (! utf8_target) {
	7028	while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
	7029	*scan)))
	7030	{
	7031	scan++;
	7032	}
	7033	} else {
	7034	while (hardcount < max && scan < loceol
	7035	&& to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
	7036	(U8 *) scan)))
	7037	{
	7038	scan += UTF8SKIP(scan);
	7039	hardcount++;
	7040	}
	7041	}
	7042	break;
	7043
	7044	case POSIXD:
	7045	if (utf8_target) {
	7046	goto utf8_posix;
	7047	}
	7048	/* FALLTHROUGH */
	7049
	7050	case POSIXA:
	7051	if (utf8_target && loceol - scan > max) {
	7052
	7053	/* We didn't adjust <loceol> at the beginning of this routine
	7054	* because is UTF-8, but it is actually ok to do so, since here, to
	7055	* match, 1 char == 1 byte. */
	7056	loceol = scan + max;
	7057	}
	7058	while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
	7059	scan++;
	7060	}
	7061	break;
	7062
	7063	case NPOSIXD:
	7064	if (utf8_target) {
	7065	to_complement = 1;
	7066	goto utf8_posix;
	7067	}
	7068	/* FALL THROUGH */
	7069
	7070	case NPOSIXA:
	7071	if (! utf8_target) {
	7072	while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
	7073	scan++;
	7074	}
	7075	}
	7076	else {
	7077
	7078	/* The complement of something that matches only ASCII matches all
	7079	* UTF-8 variant code points, plus everything in ASCII that isn't
	7080	* in the class. */
	7081	while (hardcount < max && scan < loceol
	7082	&& (! UTF8_IS_INVARIANT(*scan)
	7083	\|\| ! _generic_isCC_A((U8) *scan, FLAGS(p))))
	7084	{
	7085	scan += UTF8SKIP(scan);
	7086	hardcount++;
	7087	}
	7088	}
	7089	break;
	7090
	7091	case NPOSIXU:
	7092	to_complement = 1;
	7093	/* FALLTHROUGH */
	7094
	7095	case POSIXU:
	7096	if (! utf8_target) {
	7097	while (scan < loceol && to_complement
	7098	^ cBOOL(_generic_isCC((U8) *scan, FLAGS(p))))
	7099	{
	7100	scan++;
	7101	}
	7102	}
	7103	else {
	7104	utf8_posix:
	7105	classnum = (_char_class_number) FLAGS(p);
	7106	if (classnum < _FIRST_NON_SWASH_CC) {
	7107
	7108	/* Here, a swash is needed for above-Latin1 code points.
	7109	* Process as many Latin1 code points using the built-in rules.
	7110	* Go to another loop to finish processing upon encountering
	7111	* the first Latin1 code point. We could do that in this loop
	7112	* as well, but the other way saves having to test if the swash
	7113	* has been loaded every time through the loop: extra space to
	7114	* save a test. */
	7115	while (hardcount < max && scan < loceol) {
	7116	if (UTF8_IS_INVARIANT(*scan)) {
	7117	if (! (to_complement ^ cBOOL(_generic_isCC((U8) *scan,
	7118	classnum))))
	7119	{
	7120	break;
	7121	}
	7122	scan++;
	7123	}
	7124	else if (UTF8_IS_DOWNGRADEABLE_START(*scan)) {
	7125	if (! (to_complement
	7126	^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*scan,
	7127	*(scan + 1)),
	7128	classnum))))
	7129	{
	7130	break;
	7131	}
	7132	scan += 2;
	7133	}
	7134	else {
	7135	goto found_above_latin1;
	7136	}
	7137
	7138	hardcount++;
	7139	}
	7140	}
	7141	else {
	7142	/* For these character classes, the knowledge of how to handle
	7143	* every code point is compiled in to Perl via a macro. This
	7144	* code is written for making the loops as tight as possible.
	7145	* It could be refactored to save space instead */
	7146	switch (classnum) {
	7147	case _CC_ENUM_SPACE: /* XXX would require separate code
	7148	if we revert the change of \v
	7149	matching this */
	7150	/* FALL THROUGH */
	7151	case _CC_ENUM_PSXSPC:
	7152	while (hardcount < max
	7153	&& scan < loceol
	7154	&& (to_complement ^ cBOOL(isSPACE_utf8(scan))))
	7155	{
	7156	scan += UTF8SKIP(scan);
	7157	hardcount++;
	7158	}
	7159	break;
	7160	case _CC_ENUM_BLANK:
	7161	while (hardcount < max
	7162	&& scan < loceol
	7163	&& (to_complement ^ cBOOL(isBLANK_utf8(scan))))
	7164	{
	7165	scan += UTF8SKIP(scan);
	7166	hardcount++;
	7167	}
	7168	break;
	7169	case _CC_ENUM_XDIGIT:
	7170	while (hardcount < max
	7171	&& scan < loceol
	7172	&& (to_complement ^ cBOOL(isXDIGIT_utf8(scan))))
	7173	{
	7174	scan += UTF8SKIP(scan);
	7175	hardcount++;
	7176	}
	7177	break;
	7178	case _CC_ENUM_VERTSPACE:
	7179	while (hardcount < max
	7180	&& scan < loceol
	7181	&& (to_complement ^ cBOOL(isVERTWS_utf8(scan))))
	7182	{
	7183	scan += UTF8SKIP(scan);
	7184	hardcount++;
	7185	}
	7186	break;
	7187	case _CC_ENUM_CNTRL:
	7188	while (hardcount < max
	7189	&& scan < loceol
	7190	&& (to_complement ^ cBOOL(isCNTRL_utf8(scan))))
	7191	{
	7192	scan += UTF8SKIP(scan);
	7193	hardcount++;
	7194	}
	7195	break;
	7196	default:
	7197	Perl_croak(aTHX_ "panic: regrepeat() node %d='%s' has an unexpected character class '%d'", OP(p), PL_reg_name[OP(p)], classnum);
	7198	}
	7199	}
	7200	}
	7201	break;
	7202
	7203	found_above_latin1: /* Continuation of POSIXU and NPOSIXU */
	7204
	7205	/* Load the swash if not already present */
	7206	if (! PL_utf8_swash_ptrs[classnum]) {
	7207	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	7208	PL_utf8_swash_ptrs[classnum] = _core_swash_init(
	7209	"utf8", swash_property_names[classnum],
	7210	&PL_sv_undef, 1, 0, NULL, &flags);
	7211	}
	7212
	7213	while (hardcount < max && scan < loceol
	7214	&& to_complement ^ cBOOL(_generic_utf8(
	7215	classnum,
	7216	scan,
	7217	swash_fetch(PL_utf8_swash_ptrs[classnum],
	7218	(U8 *) scan,
	7219	TRUE))))
	7220	{
	7221	scan += UTF8SKIP(scan);
	7222	hardcount++;
	7223	}
	7224	break;
	7225
	7226	case LNBREAK:
	7227	if (utf8_target) {
	7228	while (hardcount < max && scan < loceol &&
	7229	(c=is_LNBREAK_utf8_safe(scan, loceol))) {
	7230	scan += c;
	7231	hardcount++;
	7232	}
	7233	} else {
	7234	/* LNBREAK can match one or two latin chars, which is ok, but we
	7235	* have to use hardcount in this situation, and throw away the
	7236	* adjustment to <loceol> done before the switch statement */
	7237	loceol = reginfo->strend;
	7238	while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
	7239	scan+=c;
	7240	hardcount++;
	7241	}
	7242	}
	7243	break;
	7244
	7245	case BOUND:
	7246	case BOUNDA:
	7247	case BOUNDL:
	7248	case BOUNDU:
	7249	case EOS:
	7250	case GPOS:
	7251	case KEEPS:
	7252	case NBOUND:
	7253	case NBOUNDA:
	7254	case NBOUNDL:
	7255	case NBOUNDU:
	7256	case OPFAIL:
	7257	case SBOL:
	7258	case SEOL:
	7259	/* These are all 0 width, so match right here or not at all. */
	7260	break;
	7261
	7262	default:
	7263	Perl_croak(aTHX_ "panic: regrepeat() called with unrecognized node type %d='%s'", OP(p), PL_reg_name[OP(p)]);
	7264	assert(0); /* NOTREACHED */
	7265
	7266	}
	7267
	7268	if (hardcount)
	7269	c = hardcount;
	7270	else
	7271	c = scan - *startposp;
	7272	*startposp = scan;
	7273
	7274	DEBUG_r({
	7275	GET_RE_DEBUG_FLAGS_DECL;
	7276	DEBUG_EXECUTE_r({
	7277	SV * const prop = sv_newmortal();
	7278	regprop(prog, prop, p);
	7279	PerlIO_printf(Perl_debug_log,
	7280	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	7281	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	7282	});
	7283	});
	7284
	7285	return(c);
	7286	}
	7287
	7288
	7289	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	7290	/*
	7291	- regclass_swash - prepare the utf8 swash. Wraps the shared core version to
	7292	create a copy so that changes the caller makes won't change the shared one.
	7293	If <altsvp> is non-null, will return NULL in it, for back-compat.
	7294	*/
	7295	SV *
	7296	Perl_regclass_swash(pTHX_ const regexp prog, const regnode node, bool doinit, SV listsvp, SV altsvp)
	7297	{
	7298	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	7299
	7300	if (altsvp) {
	7301	*altsvp = NULL;
	7302	}
	7303
	7304	return newSVsv(core_regclass_swash(prog, node, doinit, listsvp));
	7305	}
	7306	#endif
	7307
	7308	STATIC SV *
	7309	S_core_regclass_swash(pTHX_ const regexp prog, const regnode node, bool doinit, SV** listsvp)
	7310	{
	7311	/* Returns the swash for the input 'node' in the regex 'prog'.
	7312	* If <doinit> is 'true', will attempt to create the swash if not already
	7313	* done.
	7314	* If <listsvp> is non-null, will return the printable contents of the
	7315	* swash. This can be used to get debugging information even before the
	7316	* swash exists, by calling this function with 'doinit' set to false, in
	7317	* which case the components that will be used to eventually create the
	7318	* swash are returned (in a printable form).
	7319	* Tied intimately to how regcomp.c sets up the data structure */
	7320
	7321	dVAR;
	7322	SV *sw = NULL;
	7323	SV si = NULL; / Input swash initialization string */
	7324	SV* invlist = NULL;
	7325
	7326	RXi_GET_DECL(prog,progi);
	7327	const struct reg_data * const data = prog ? progi->data : NULL;
	7328
	7329	PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH;
	7330
	7331	assert(ANYOF_NONBITMAP(node));
	7332
	7333	if (data && data->count) {
	7334	const U32 n = ARG(node);
	7335
	7336	if (data->what[n] == 's') {
	7337	SV * const rv = MUTABLE_SV(data->data[n]);
	7338	AV * const av = MUTABLE_AV(SvRV(rv));
	7339	SV **const ary = AvARRAY(av);
	7340	U8 swash_init_flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	7341
	7342	si = ary; / ary[0] = the string to initialize the swash with */
	7343
	7344	/* Elements 2 and 3 are either both present or both absent. [2] is
	7345	* any inversion list generated at compile time; [3] indicates if
	7346	* that inversion list has any user-defined properties in it. */
	7347	if (av_len(av) >= 2) {
	7348	invlist = ary[2];
	7349	if (SvUV(ary[3])) {
	7350	swash_init_flags \|= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
	7351	}
	7352	}
	7353	else {
	7354	invlist = NULL;
	7355	}
	7356
	7357	/* Element [1] is reserved for the set-up swash. If already there,
	7358	* return it; if not, create it and store it there */
	7359	if (ary[1] && SvROK(ary[1])) {
	7360	sw = ary[1];
	7361	}
	7362	else if (si && doinit) {
	7363
	7364	sw = _core_swash_init("utf8", /* the utf8 package */
	7365	"", /* nameless */
	7366	si,
	7367	1, /* binary */
	7368	0, /* not from tr/// */
	7369	invlist,
	7370	&swash_init_flags);
	7371	(void)av_store(av, 1, sw);
	7372	}
	7373	}
	7374	}
	7375
	7376	/* If requested, return a printable version of what this swash matches */
	7377	if (listsvp) {
	7378	SV* matches_string = newSVpvn("", 0);
	7379
	7380	/* The swash should be used, if possible, to get the data, as it
	7381	* contains the resolved data. But this function can be called at
	7382	* compile-time, before everything gets resolved, in which case we
	7383	* return the currently best available information, which is the string
	7384	* that will eventually be used to do that resolving, 'si' */
	7385	if ((! sw \|\| (invlist = _get_swash_invlist(sw)) == NULL)
	7386	&& (si && si != &PL_sv_undef))
	7387	{
	7388	sv_catsv(matches_string, si);
	7389	}
	7390
	7391	/* Add the inversion list to whatever we have. This may have come from
	7392	* the swash, or from an input parameter */
	7393	if (invlist) {
	7394	sv_catsv(matches_string, _invlist_contents(invlist));
	7395	}
	7396	*listsvp = matches_string;
	7397	}
	7398
	7399	return sw;
	7400	}
	7401
	7402	/*
	7403	- reginclass - determine if a character falls into a character class
	7404
	7405	n is the ANYOF regnode
	7406	p is the target string
	7407	p_end points to one byte beyond the end of the target string
	7408	utf8_target tells whether p is in UTF-8.
	7409
	7410	Returns true if matched; false otherwise.
	7411
	7412	Note that this can be a synthetic start class, a combination of various
	7413	nodes, so things you think might be mutually exclusive, such as locale,
	7414	aren't. It can match both locale and non-locale
	7415
	7416	*/
	7417
	7418	STATIC bool
	7419	S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
	7420	{
	7421	dVAR;
	7422	const char flags = ANYOF_FLAGS(n);
	7423	bool match = FALSE;
	7424	UV c = *p;
	7425
	7426	PERL_ARGS_ASSERT_REGINCLASS;
	7427
	7428	/* If c is not already the code point, get it. Note that
	7429	* UTF8_IS_INVARIANT() works even if not in UTF-8 */
	7430	if (! UTF8_IS_INVARIANT(c) && utf8_target) {
	7431	STRLEN c_len = 0;
	7432	c = utf8n_to_uvchr(p, p_end - p, &c_len,
	7433	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	7434	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	7435	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	7436	* UTF8_ALLOW_FFFF */
	7437	if (c_len == (STRLEN)-1)
	7438	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	7439	}
	7440
	7441	/* If this character is potentially in the bitmap, check it */
	7442	if (c < 256) {
	7443	if (ANYOF_BITMAP_TEST(n, c))
	7444	match = TRUE;
	7445	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	7446	&& ! utf8_target
	7447	&& ! isASCII(c))
	7448	{
	7449	match = TRUE;
	7450	}
	7451	else if (flags & ANYOF_LOCALE) {
	7452	if (flags & ANYOF_LOC_FOLD) {
	7453	RXp_MATCH_TAINTED_on(prog);
	7454	if (ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) {
	7455	match = TRUE;
	7456	}
	7457	}
	7458	else if (ANYOF_POSIXL_TEST_ANY_SET(n)) {
	7459
	7460	/* The data structure is arranged so bits 0, 2, 4, ... are set
	7461	* if the class includes the Posix character class given by
	7462	* bit/2; and 1, 3, 5, ... are set if the class includes the
	7463	* complemented Posix class given by int(bit/2). So we loop
	7464	* through the bits, each time changing whether we complement
	7465	* the result or not. Suppose for the sake of illustration
	7466	* that bits 0-3 mean respectively, \w, \W, \s, \S. If bit 0
	7467	* is set, it means there is a match for this ANYOF node if the
	7468	* character is in the class given by the expression (0 / 2 = 0
	7469	* = \w). If it is in that class, isFOO_lc() will return 1,
	7470	* and since 'to_complement' is 0, the result will stay TRUE,
	7471	* and we exit the loop. Suppose instead that bit 0 is 0, but
	7472	* bit 1 is 1. That means there is a match if the character
	7473	* matches \W. We won't bother to call isFOO_lc() on bit 0,
	7474	* but will on bit 1. On the second iteration 'to_complement'
	7475	* will be 1, so the exclusive or will reverse things, so we
	7476	* are testing for \W. On the third iteration, 'to_complement'
	7477	* will be 0, and we would be testing for \s; the fourth
	7478	* iteration would test for \S, etc.
	7479	*
	7480	* Note that this code assumes that all the classes are closed
	7481	* under folding. For example, if a character matches \w, then
	7482	* its fold does too; and vice versa. This should be true for
	7483	* any well-behaved locale for all the currently defined Posix
	7484	* classes, except for :lower: and :upper:, which are handled
	7485	* by the pseudo-class :cased: which matches if either of the
	7486	* other two does. To get rid of this assumption, an outer
	7487	* loop could be used below to iterate over both the source
	7488	* character, and its fold (if different) */
	7489
	7490	int count = 0;
	7491	int to_complement = 0;
	7492
	7493	RXp_MATCH_TAINTED_on(prog);
	7494	while (count < ANYOF_MAX) {
	7495	if (ANYOF_POSIXL_TEST(n, count)
	7496	&& to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))
	7497	{
	7498	match = TRUE;
	7499	break;
	7500	}
	7501	count++;
	7502	to_complement ^= 1;
	7503	}
	7504	}
	7505	}
	7506	}
	7507
	7508	/* If the bitmap didn't (or couldn't) match, and something outside the
	7509	* bitmap could match, try that. Locale nodes specify completely the
	7510	* behavior of code points in the bit map (otherwise, a utf8 target would
	7511	* cause them to be treated as Unicode and not locale), except in
	7512	* the very unlikely event when this node is a synthetic start class, which
	7513	* could be a combination of locale and non-locale nodes. So allow locale
	7514	* to match for the synthetic start class, which will give a false
	7515	* positive that will be resolved when the match is done again as not part
	7516	* of the synthetic start class */
	7517	if (!match) {
	7518	if (c >= 256 && (flags & ANYOF_ABOVE_LATIN1_ALL)) {
	7519	match = TRUE; /* Everything above 255 matches */
	7520	}
	7521	else if (ANYOF_NONBITMAP(n)
	7522	&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
	7523	\|\| (utf8_target
	7524	&& (c >=256
	7525	\|\| (! (flags & ANYOF_LOCALE))
	7526	\|\| OP(n) == ANYOF_SYNTHETIC))))
	7527	{
	7528	SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
	7529	if (sw) {
	7530	U8 * utf8_p;
	7531	if (utf8_target) {
	7532	utf8_p = (U8 *) p;
	7533	} else { /* Convert to utf8 */
	7534	STRLEN len = 1;
	7535	utf8_p = bytes_to_utf8(p, &len);
	7536	}
	7537
	7538	if (swash_fetch(sw, utf8_p, TRUE)) {
	7539	match = TRUE;
	7540	}
	7541
	7542	/* If we allocated a string above, free it */
	7543	if (! utf8_target) Safefree(utf8_p);
	7544	}
	7545	}
	7546
	7547	if (UNICODE_IS_SUPER(c)
	7548	&& (flags & ANYOF_WARN_SUPER)
	7549	&& ckWARN_d(WARN_NON_UNICODE))
	7550	{
	7551	Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
	7552	"Matched non-Unicode code point 0x%04"UVXf" against Unicode property; may not be portable", c);
	7553	}
	7554	}
	7555
	7556	#if ANYOF_INVERT != 1
	7557	/* Depending on compiler optimization cBOOL takes time, so if don't have to
	7558	* use it, don't */
	7559	# error ANYOF_INVERT needs to be set to 1, or guarded with cBOOL below,
	7560	#endif
	7561
	7562	/* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
	7563	return (flags & ANYOF_INVERT) ^ match;
	7564	}
	7565
	7566	STATIC U8 *
	7567	S_reghop3(U8 s, SSize_t off, const U8 lim)
	7568	{
	7569	/* return the position 'off' UTF-8 characters away from 's', forward if
	7570	* 'off' >= 0, backwards if negative. But don't go outside of position
	7571	* 'lim', which better be < s if off < 0 */
	7572
	7573	dVAR;
	7574
	7575	PERL_ARGS_ASSERT_REGHOP3;
	7576
	7577	if (off >= 0) {
	7578	while (off-- && s < lim) {
	7579	/* XXX could check well-formedness here */
	7580	s += UTF8SKIP(s);
	7581	}
	7582	}
	7583	else {
	7584	while (off++ && s > lim) {
	7585	s--;
	7586	if (UTF8_IS_CONTINUED(*s)) {
	7587	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7588	s--;
	7589	}
	7590	/* XXX could check well-formedness here */
	7591	}
	7592	}
	7593	return s;
	7594	}
	7595
	7596	#ifdef XXX_dmq
	7597	/* there are a bunch of places where we use two reghop3's that should
	7598	be replaced with this routine. but since thats not done yet
	7599	we ifdef it out - dmq
	7600	*/
	7601	STATIC U8 *
	7602	S_reghop4(U8 s, SSize_t off, const U8 llim, const U8* rlim)
	7603	{
	7604	dVAR;
	7605
	7606	PERL_ARGS_ASSERT_REGHOP4;
	7607
	7608	if (off >= 0) {
	7609	while (off-- && s < rlim) {
	7610	/* XXX could check well-formedness here */
	7611	s += UTF8SKIP(s);
	7612	}
	7613	}
	7614	else {
	7615	while (off++ && s > llim) {
	7616	s--;
	7617	if (UTF8_IS_CONTINUED(*s)) {
	7618	while (s > llim && UTF8_IS_CONTINUATION(*s))
	7619	s--;
	7620	}
	7621	/* XXX could check well-formedness here */
	7622	}
	7623	}
	7624	return s;
	7625	}
	7626	#endif
	7627
	7628	STATIC U8 *
	7629	S_reghopmaybe3(U8* s, SSize_t off, const U8* lim)
	7630	{
	7631	dVAR;
	7632
	7633	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	7634
	7635	if (off >= 0) {
	7636	while (off-- && s < lim) {
	7637	/* XXX could check well-formedness here */
	7638	s += UTF8SKIP(s);
	7639	}
	7640	if (off >= 0)
	7641	return NULL;
	7642	}
	7643	else {
	7644	while (off++ && s > lim) {
	7645	s--;
	7646	if (UTF8_IS_CONTINUED(*s)) {
	7647	while (s > lim && UTF8_IS_CONTINUATION(*s))
	7648	s--;
	7649	}
	7650	/* XXX could check well-formedness here */
	7651	}
	7652	if (off <= 0)
	7653	return NULL;
	7654	}
	7655	return s;
	7656	}
	7657
	7658
	7659	/* when executing a regex that may have (?{}), extra stuff needs setting
	7660	up that will be visible to the called code, even before the current
	7661	match has finished. In particular:
	7662
	7663	* $_ is localised to the SV currently being matched;
	7664	* pos($_) is created if necessary, ready to be updated on each call-out
	7665	to code;
	7666	* a fake PMOP is created that can be set to PL_curpm (normally PL_curpm
	7667	isn't set until the current pattern is successfully finished), so that
	7668	$1 etc of the match-so-far can be seen;
	7669	* save the old values of subbeg etc of the current regex, and set then
	7670	to the current string (again, this is normally only done at the end
	7671	of execution)
	7672	*/
	7673
	7674	static void
	7675	S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
	7676	{
	7677	MAGIC *mg;
	7678	regexp *const rex = ReANY(reginfo->prog);
	7679	regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
	7680
	7681	eval_state->rex = rex;
	7682
	7683	if (reginfo->sv) {
	7684	/* Make $_ available to executed code. */
	7685	if (reginfo->sv != DEFSV) {
	7686	SAVE_DEFSV;
	7687	DEFSV_set(reginfo->sv);
	7688	}
	7689
	7690	if (!(mg = mg_find_mglob(reginfo->sv))) {
	7691	/* prepare for quick setting of pos */
	7692	mg = sv_magicext_mglob(reginfo->sv);
	7693	mg->mg_len = -1;
	7694	}
	7695	eval_state->pos_magic = mg;
	7696	eval_state->pos = mg->mg_len;
	7697	eval_state->pos_flags = mg->mg_flags;
	7698	}
	7699	else
	7700	eval_state->pos_magic = NULL;
	7701
	7702	if (!PL_reg_curpm) {
	7703	/* PL_reg_curpm is a fake PMOP that we can attach the current
	7704	* regex to and point PL_curpm at, so that $1 et al are visible
	7705	* within a /(?{})/. It's just allocated once per interpreter the
	7706	* first time its needed */
	7707	Newxz(PL_reg_curpm, 1, PMOP);
	7708	#ifdef USE_ITHREADS
	7709	{
	7710	SV* const repointer = &PL_sv_undef;
	7711	/* this regexp is also owned by the new PL_reg_curpm, which
	7712	will try to free it. */
	7713	av_push(PL_regex_padav, repointer);
	7714	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	7715	PL_regex_pad = AvARRAY(PL_regex_padav);
	7716	}
	7717	#endif
	7718	}
	7719	SET_reg_curpm(reginfo->prog);
	7720	eval_state->curpm = PL_curpm;
	7721	PL_curpm = PL_reg_curpm;
	7722	if (RXp_MATCH_COPIED(rex)) {
	7723	/* Here is a serious problem: we cannot rewrite subbeg,
	7724	since it may be needed if this match fails. Thus
	7725	$` inside (?{}) could fail... */
	7726	eval_state->subbeg = rex->subbeg;
	7727	eval_state->sublen = rex->sublen;
	7728	eval_state->suboffset = rex->suboffset;
	7729	eval_state->subcoffset = rex->subcoffset;
	7730	#ifdef PERL_ANY_COW
	7731	eval_state->saved_copy = rex->saved_copy;
	7732	#endif
	7733	RXp_MATCH_COPIED_off(rex);
	7734	}
	7735	else
	7736	eval_state->subbeg = NULL;
	7737	rex->subbeg = (char *)reginfo->strbeg;
	7738	rex->suboffset = 0;
	7739	rex->subcoffset = 0;
	7740	rex->sublen = reginfo->strend - reginfo->strbeg;
	7741	}
	7742
	7743
	7744	/* destructor to clear up regmatch_info_aux and regmatch_info_aux_eval */
	7745
	7746	static void
	7747	S_cleanup_regmatch_info_aux(pTHX_ void *arg)
	7748	{
	7749	dVAR;
	7750	regmatch_info_aux aux = (regmatch_info_aux ) arg;
	7751	regmatch_info_aux_eval *eval_state = aux->info_aux_eval;
	7752	regmatch_slab *s;
	7753
	7754	Safefree(aux->poscache);
	7755
	7756	if (eval_state) {
	7757
	7758	/* undo the effects of S_setup_eval_state() */
	7759
	7760	if (eval_state->subbeg) {
	7761	regexp * const rex = eval_state->rex;
	7762	rex->subbeg = eval_state->subbeg;
	7763	rex->sublen = eval_state->sublen;
	7764	rex->suboffset = eval_state->suboffset;
	7765	rex->subcoffset = eval_state->subcoffset;
	7766	#ifdef PERL_ANY_COW
	7767	rex->saved_copy = eval_state->saved_copy;
	7768	#endif
	7769	RXp_MATCH_COPIED_on(rex);
	7770	}
	7771	if (eval_state->pos_magic)
	7772	{
	7773	eval_state->pos_magic->mg_len = eval_state->pos;
	7774	eval_state->pos_magic->mg_flags =
	7775	(eval_state->pos_magic->mg_flags & ~MGf_BYTES)
	7776	\| (eval_state->pos_flags & MGf_BYTES);
	7777	}
	7778
	7779	PL_curpm = eval_state->curpm;
	7780	}
	7781
	7782	PL_regmatch_state = aux->old_regmatch_state;
	7783	PL_regmatch_slab = aux->old_regmatch_slab;
	7784
	7785	/* free all slabs above current one - this must be the last action
	7786	* of this function, as aux and eval_state are allocated within
	7787	* slabs and may be freed here */
	7788
	7789	s = PL_regmatch_slab->next;
	7790	if (s) {
	7791	PL_regmatch_slab->next = NULL;
	7792	while (s) {
	7793	regmatch_slab * const osl = s;
	7794	s = s->next;
	7795	Safefree(osl);
	7796	}
	7797	}
	7798	}
	7799
	7800
	7801	STATIC void
	7802	S_to_utf8_substr(pTHX_ regexp *prog)
	7803	{
	7804	/* Converts substr fields in prog from bytes to UTF-8, calling fbm_compile
	7805	* on the converted value */
	7806
	7807	int i = 1;
	7808
	7809	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	7810
	7811	do {
	7812	if (prog->substrs->data[i].substr
	7813	&& !prog->substrs->data[i].utf8_substr) {
	7814	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	7815	prog->substrs->data[i].utf8_substr = sv;
	7816	sv_utf8_upgrade(sv);
	7817	if (SvVALID(prog->substrs->data[i].substr)) {
	7818	if (SvTAIL(prog->substrs->data[i].substr)) {
	7819	/* Trim the trailing \n that fbm_compile added last
	7820	time. */
	7821	SvCUR_set(sv, SvCUR(sv) - 1);
	7822	/* Whilst this makes the SV technically "invalid" (as its
	7823	buffer is no longer followed by "\0") when fbm_compile()
	7824	adds the "\n" back, a "\0" is restored. */
	7825	fbm_compile(sv, FBMcf_TAIL);
	7826	} else
	7827	fbm_compile(sv, 0);
	7828	}
	7829	if (prog->substrs->data[i].substr == prog->check_substr)
	7830	prog->check_utf8 = sv;
	7831	}
	7832	} while (i--);
	7833	}
	7834
	7835	STATIC bool
	7836	S_to_byte_substr(pTHX_ regexp *prog)
	7837	{
	7838	/* Converts substr fields in prog from UTF-8 to bytes, calling fbm_compile
	7839	* on the converted value; returns FALSE if can't be converted. */
	7840
	7841	dVAR;
	7842	int i = 1;
	7843
	7844	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	7845
	7846	do {
	7847	if (prog->substrs->data[i].utf8_substr
	7848	&& !prog->substrs->data[i].substr) {
	7849	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	7850	if (! sv_utf8_downgrade(sv, TRUE)) {
	7851	return FALSE;
	7852	}
	7853	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	7854	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	7855	/* Trim the trailing \n that fbm_compile added last
	7856	time. */
	7857	SvCUR_set(sv, SvCUR(sv) - 1);
	7858	fbm_compile(sv, FBMcf_TAIL);
	7859	} else
	7860	fbm_compile(sv, 0);
	7861	}
	7862	prog->substrs->data[i].substr = sv;
	7863	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	7864	prog->check_substr = sv;
	7865	}
	7866	} while (i--);
	7867
	7868	return TRUE;
	7869	}
	7870
	7871	/*
	7872	* Local variables:
	7873	* c-indentation-style: bsd
	7874	* c-basic-offset: 4
	7875	* indent-tabs-mode: nil
	7876	* End:
	7877	*
	7878	* ex: set ts=8 sts=4 sw=4 et:
	7879	*/