perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	&
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/*
	41	* pregcomp and pregexec -- regsub and regerror are not used in perl
	42	*
	43	* Copyright (c) 1986 by University of Toronto.
	44	* Written by Henry Spencer. Not derived from licensed software.
	45	*
	46	* Permission is granted to anyone to use this software for any
	47	* purpose on any computer system, and to redistribute it freely,
	48	* subject to the following restrictions:
	49	*
	50	* 1. The author is not responsible for the consequences of use of
	51	* this software, no matter how awful, even if they arise
	52	* from defects in it.
	53	*
	54	* 2. The origin of this software must not be misrepresented, either
	55	* by explicit claim or by omission.
	56	*
	57	* 3. Altered versions must be plainly marked as such, and must not
	58	* be misrepresented as being the original software.
	59	*
	60	**** Alterations to Henry's code are...
	61	****
	62	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	63	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	64	**** by Larry Wall and others
	65	****
	66	**** You may distribute under the terms of either the GNU General Public
	67	**** License or the Artistic License, as specified in the README file.
	68	*
	69	* Beware that some of this code is subtly aware of the way operator
	70	* precedence is structured in regular expressions. Serious changes in
	71	* regular-expression syntax might require a total rethink.
	72	*/
	73	#include "EXTERN.h"
	74	#define PERL_IN_REGEXEC_C
	75	#include "perl.h"
	76
	77	#ifdef PERL_IN_XSUB_RE
	78	# include "re_comp.h"
	79	#else
	80	# include "regcomp.h"
	81	#endif
	82
	83	#define RF_tainted 1 /* tainted information used? e.g. locale */
	84	#define RF_warned 2 /* warned about big count? */
	85
	86	#define RF_utf8 8 /* Pattern contains multibyte chars? */
	87
	88	#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
	89
	90	#define RS_init 1 /* eval environment created */
	91	#define RS_set 2 /* replsv value is set */
	92
	93	#ifndef STATIC
	94	#define STATIC static
	95	#endif
	96
	97	/* Valid for non-utf8 strings, non-ANYOFV nodes only: avoids the reginclass
	98	* call if there are no complications: i.e., if everything matchable is
	99	* straight forward in the bitmap */
	100	#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) \
	101	: ANYOF_BITMAP_TEST(p,*(c)))
	102
	103	/*
	104	* Forwards.
	105	*/
	106
	107	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	108	#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
	109
	110	#define HOPc(pos,off) \
	111	(char *)(PL_reg_match_utf8 \
	112	? reghop3((U8)pos, off, (U8)(off >= 0 ? PL_regeol : PL_bostr)) \
	113	: (U8*)(pos + off))
	114	#define HOPBACKc(pos, off) \
	115	(char*)(PL_reg_match_utf8\
	116	? reghopmaybe3((U8)pos, -off, (U8)PL_bostr) \
	117	: (pos - off >= PL_bostr) \
	118	? (U8*)pos - off \
	119	: NULL)
	120
	121	#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	122	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	123
	124	/* these are unrolled below in the CCC_TRY_XXX defined */
	125	#ifdef EBCDIC
	126	/* Often 'str' is a hard-coded utf8 string instead of utfebcdic. so just
	127	* skip the check on EBCDIC platforms */
	128	# define LOAD_UTF8_CHARCLASS(class,str) LOAD_UTF8_CHARCLASS_NO_CHECK(class)
	129	#else
	130	# define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
	131	if (!CAT2(PL_utf8_,class)) { \
	132	bool ok; \
	133	ENTER; save_re_context(); \
	134	ok=CAT2(is_utf8_,class)((const U8*)str); \
	135	assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
	136	#endif
	137
	138	/* Doesn't do an assert to verify that is correct */
	139	#define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
	140	if (!CAT2(PL_utf8_,class)) { \
	141	bool throw_away __attribute__unused__; \
	142	ENTER; save_re_context(); \
	143	throw_away = CAT2(is_utf8_,class)((const U8*)" "); \
	144	LEAVE; } } STMT_END
	145
	146	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
	147	#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
	148	#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
	149
	150	#define LOAD_UTF8_CHARCLASS_GCB() /* Grapheme cluster boundaries */ \
	151	LOAD_UTF8_CHARCLASS(X_begin, " "); \
	152	LOAD_UTF8_CHARCLASS(X_non_hangul, "A"); \
	153	/* These are utf8 constants, and not utf-ebcdic constants, so the \
	154	* assert should likely and hopefully fail on an EBCDIC machine */ \
	155	LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */ \
	156	\
	157	/* No asserts are done for these, in case called on an early \
	158	* Unicode version in which they map to nothing */ \
	159	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
	160	LOAD_UTF8_CHARCLASS_NO_CHECK(X_L); /* U+1100 "\xe1\x84\x80" */ \
	161	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV); /* U+AC00 "\xea\xb0\x80" */ \
	162	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT); /* U+AC01 "\xea\xb0\x81" */ \
	163	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
	164	LOAD_UTF8_CHARCLASS_NO_CHECK(X_T); /* U+11A8 "\xe1\x86\xa8" */ \
	165	LOAD_UTF8_CHARCLASS_NO_CHECK(X_V) /* U+1160 "\xe1\x85\xa0" */
	166
	167	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	168
	169	/* The actual code for CCC_TRY, which uses several variables from the routine
	170	* it's callable from. It is designed to be the bulk of a case statement.
	171	* FUNC is the macro or function to call on non-utf8 targets that indicate if
	172	* nextchr matches the class.
	173	* UTF8_TEST is the whole test string to use for utf8 targets
	174	* LOAD is what to use to test, and if not present to load in the swash for the
	175	* class
	176	* POS_OR_NEG is either empty or ! to complement the results of FUNC or
	177	* UTF8_TEST test.
	178	* The logic is: Fail if we're at the end-of-string; otherwise if the target is
	179	* utf8 and a variant, load the swash if necessary and test using the utf8
	180	* test. Advance to the next character if test is ok, otherwise fail; If not
	181	* utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
	182	* fails, or advance to the next character */
	183
	184	#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \
	185	if (locinput >= PL_regeol) { \
	186	sayNO; \
	187	} \
	188	if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
	189	LOAD_UTF8_CHARCLASS(CLASS, STR); \
	190	if (POS_OR_NEG (UTF8_TEST)) { \
	191	sayNO; \
	192	} \
	193	locinput += PL_utf8skip[nextchr]; \
	194	nextchr = UCHARAT(locinput); \
	195	break; \
	196	} \
	197	if (POS_OR_NEG (FUNC(nextchr))) { \
	198	sayNO; \
	199	} \
	200	nextchr = UCHARAT(++locinput); \
	201	break;
	202
	203	/* Handle the non-locale cases for a character class and its complement. It
	204	* calls _CCC_TRY_CODE with a ! to complement the test for the character class.
	205	* This is because that code fails when the test succeeds, so we want to have
	206	* the test fail so that the code succeeds. The swash is stored in a
	207	* predictable PL_ place */
	208	#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, \
	209	CLASS, STR) \
	210	case NAME: \
	211	_CCC_TRY_CODE( !, FUNC, \
	212	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	213	(U8*)locinput, TRUE)), \
	214	CLASS, STR) \
	215	case NNAME: \
	216	_CCC_TRY_CODE( PLACEHOLDER , FUNC, \
	217	cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
	218	(U8*)locinput, TRUE)), \
	219	CLASS, STR) \
	220
	221	/* Generate the case statements for both locale and non-locale character
	222	* classes in regmatch for classes that don't have special unicode semantics.
	223	* Locales don't use an immediate swash, but an intermediary special locale
	224	* function that is called on the pointer to the current place in the input
	225	* string. That function will resolve to needing the same swash. One might
	226	* think that because we don't know what the locale will match, we shouldn't
	227	* check with the swash loading function that it loaded properly; ie, that we
	228	* should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
	229	* regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
	230	* irrelevant here */
	231	#define CCC_TRY(NAME, NNAME, FUNC, \
	232	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	233	NAMEA, NNAMEA, FUNCA, \
	234	CLASS, STR) \
	235	case NAMEL: \
	236	PL_reg_flags \|= RF_tainted; \
	237	_CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \
	238	case NNAMEL: \
	239	PL_reg_flags \|= RF_tainted; \
	240	_CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput), \
	241	CLASS, STR) \
	242	case NAMEA: \
	243	if (locinput >= PL_regeol \|\| ! FUNCA(nextchr)) { \
	244	sayNO; \
	245	} \
	246	/* Matched a utf8-invariant, so don't have to worry about utf8 */ \
	247	nextchr = UCHARAT(++locinput); \
	248	break; \
	249	case NNAMEA: \
	250	if (locinput >= PL_regeol \|\| FUNCA(nextchr)) { \
	251	sayNO; \
	252	} \
	253	if (utf8_target) { \
	254	locinput += PL_utf8skip[nextchr]; \
	255	nextchr = UCHARAT(locinput); \
	256	} \
	257	else { \
	258	nextchr = UCHARAT(++locinput); \
	259	} \
	260	break; \
	261	/* Generate the non-locale cases */ \
	262	_CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
	263
	264	/* This is like CCC_TRY, but has an extra set of parameters for generating case
	265	* statements to handle separate Unicode semantics nodes */
	266	#define CCC_TRY_U(NAME, NNAME, FUNC, \
	267	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	268	NAMEU, NNAMEU, FUNCU, \
	269	NAMEA, NNAMEA, FUNCA, \
	270	CLASS, STR) \
	271	CCC_TRY(NAME, NNAME, FUNC, \
	272	NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
	273	NAMEA, NNAMEA, FUNCA, \
	274	CLASS, STR) \
	275	_CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)
	276
	277	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	278
	279	/* for use after a quantifier and before an EXACT-like node -- japhy */
	280	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	281	*
	282	* NOTE that nothing that affects backtracking should be in here, specifically
	283	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	284	* node that is in between two EXACT like nodes when ascertaining what the required
	285	* "follow" character is. This should probably be moved to regex compile time
	286	* although it may be done at run time beause of the REF possibility - more
	287	* investigation required. -- demerphq
	288	*/
	289	#define JUMPABLE(rn) ( \
	290	OP(rn) == OPEN \|\| \
	291	(OP(rn) == CLOSE && (!cur_eval \|\| cur_eval->u.eval.close_paren != ARG(rn))) \|\| \
	292	OP(rn) == EVAL \|\| \
	293	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	294	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	295	OP(rn) == KEEPS \|\| \
	296	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	297	)
	298	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	299
	300	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	301
	302	#if 0
	303	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	304	we don't need this definition. */
	305	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	306	#define IS_TEXTF(rn) ( (OP(rn)==EXACTFU \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTF) \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	307	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	308
	309	#else
	310	/* ... so we use this as its faster. */
	311	#define IS_TEXT(rn) ( OP(rn)==EXACT )
	312	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn) == EXACTFA)
	313	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	314	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	315
	316	#endif
	317
	318	/*
	319	Search for mandatory following text node; for lookahead, the text must
	320	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	321	*/
	322	#define FIND_NEXT_IMPT(rn) STMT_START { \
	323	while (JUMPABLE(rn)) { \
	324	const OPCODE type = OP(rn); \
	325	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	326	rn = NEXTOPER(NEXTOPER(rn)); \
	327	else if (type == PLUS) \
	328	rn = NEXTOPER(rn); \
	329	else if (type == IFMATCH) \
	330	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	331	else rn += NEXT_OFF(rn); \
	332	} \
	333	} STMT_END
	334
	335
	336	static void restore_pos(pTHX_ void *arg);
	337
	338	#define REGCP_PAREN_ELEMS 4
	339	#define REGCP_OTHER_ELEMS 5
	340	#define REGCP_FRAME_ELEMS 1
	341	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	342	* are needed for the regexp context stack bookkeeping. */
	343
	344	STATIC CHECKPOINT
	345	S_regcppush(pTHX_ I32 parenfloor)
	346	{
	347	dVAR;
	348	const int retval = PL_savestack_ix;
	349	const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
	350	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	351	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	352	int p;
	353	GET_RE_DEBUG_FLAGS_DECL;
	354
	355	if (paren_elems_to_push < 0)
	356	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
	357
	358	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	359	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	360	" out of range (%lu-%ld)",
	361	total_elems, (unsigned long)PL_regsize, (long)parenfloor);
	362
	363	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	364
	365	for (p = PL_regsize; p > parenfloor; p--) {
	366	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	367	SSPUSHINT(PL_regoffs[p].end);
	368	SSPUSHINT(PL_regoffs[p].start);
	369	SSPUSHPTR(PL_reg_start_tmp[p]);
	370	SSPUSHINT(p);
	371	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
	372	" saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
	373	(UV)p, (IV)PL_regoffs[p].start,
	374	(IV)(PL_reg_start_tmp[p] - PL_bostr),
	375	(IV)PL_regoffs[p].end
	376	));
	377	}
	378	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	379	SSPUSHPTR(PL_regoffs);
	380	SSPUSHINT(PL_regsize);
	381	SSPUSHINT(*PL_reglastparen);
	382	SSPUSHINT(*PL_reglastcloseparen);
	383	SSPUSHPTR(PL_reginput);
	384	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	385
	386	return retval;
	387	}
	388
	389	/* These are needed since we do not localize EVAL nodes: */
	390	#define REGCP_SET(cp) \
	391	DEBUG_STATE_r( \
	392	PerlIO_printf(Perl_debug_log, \
	393	" Setting an EVAL scope, savestack=%"IVdf"\n", \
	394	(IV)PL_savestack_ix)); \
	395	cp = PL_savestack_ix
	396
	397	#define REGCP_UNWIND(cp) \
	398	DEBUG_STATE_r( \
	399	if (cp != PL_savestack_ix) \
	400	PerlIO_printf(Perl_debug_log, \
	401	" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
	402	(IV)(cp), (IV)PL_savestack_ix)); \
	403	regcpblow(cp)
	404
	405	STATIC char *
	406	S_regcppop(pTHX_ const regexp *rex)
	407	{
	408	dVAR;
	409	UV i;
	410	char *input;
	411	GET_RE_DEBUG_FLAGS_DECL;
	412
	413	PERL_ARGS_ASSERT_REGCPPOP;
	414
	415	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	416	i = SSPOPUV;
	417	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	418	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	419	input = (char *) SSPOPPTR;
	420	*PL_reglastcloseparen = SSPOPINT;
	421	*PL_reglastparen = SSPOPINT;
	422	PL_regsize = SSPOPINT;
	423	PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
	424
	425	i -= REGCP_OTHER_ELEMS;
	426	/* Now restore the parentheses context. */
	427	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	428	I32 tmps;
	429	U32 paren = (U32)SSPOPINT;
	430	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
	431	PL_regoffs[paren].start = SSPOPINT;
	432	tmps = SSPOPINT;
	433	if (paren <= *PL_reglastparen)
	434	PL_regoffs[paren].end = tmps;
	435	DEBUG_BUFFERS_r(
	436	PerlIO_printf(Perl_debug_log,
	437	" restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	438	(UV)paren, (IV)PL_regoffs[paren].start,
	439	(IV)(PL_reg_start_tmp[paren] - PL_bostr),
	440	(IV)PL_regoffs[paren].end,
	441	(paren > *PL_reglastparen ? "(no)" : ""));
	442	);
	443	}
	444	DEBUG_BUFFERS_r(
	445	if (*PL_reglastparen + 1 <= rex->nparens) {
	446	PerlIO_printf(Perl_debug_log,
	447	" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
	448	(IV)(*PL_reglastparen + 1), (IV)rex->nparens);
	449	}
	450	);
	451	#if 1
	452	/* It would seem that the similar code in regtry()
	453	* already takes care of this, and in fact it is in
	454	* a better location to since this code can #if 0-ed out
	455	* but the code in regtry() is needed or otherwise tests
	456	* requiring null fields (pat.t#187 and split.t#{13,14}
	457	* (as of patchlevel 7877) will fail. Then again,
	458	* this code seems to be necessary or otherwise
	459	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	460	* --jhi updated by dapm */
	461	for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
	462	if (i > PL_regsize)
	463	PL_regoffs[i].start = -1;
	464	PL_regoffs[i].end = -1;
	465	}
	466	#endif
	467	return input;
	468	}
	469
	470	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	471
	472	/*
	473	* pregexec and friends
	474	*/
	475
	476	#ifndef PERL_IN_XSUB_RE
	477	/*
	478	- pregexec - match a regexp against a string
	479	*/
	480	I32
	481	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
	482	char strbeg, I32 minend, SV screamer, U32 nosave)
	483	/* strend: pointer to null at end of string */
	484	/* strbeg: real beginning of string */
	485	/* minend: end of match must be >=minend after stringarg. */
	486	/* nosave: For optimizations. */
	487	{
	488	PERL_ARGS_ASSERT_PREGEXEC;
	489
	490	return
	491	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	492	nosave ? 0 : REXEC_COPY_STR);
	493	}
	494	#endif
	495
	496	/*
	497	* Need to implement the following flags for reg_anch:
	498	*
	499	* USE_INTUIT_NOML - Useful to call re_intuit_start() first
	500	* USE_INTUIT_ML
	501	* INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
	502	* INTUIT_AUTORITATIVE_ML
	503	* INTUIT_ONCE_NOML - Intuit can match in one location only.
	504	* INTUIT_ONCE_ML
	505	*
	506	* Another flag for this function: SECOND_TIME (so that float substrs
	507	* with giant delta may be not rechecked).
	508	*/
	509
	510	/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
	511
	512	/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
	513	Otherwise, only SvCUR(sv) is used to get strbeg. */
	514
	515	/* XXXX We assume that strpos is strbeg unless sv. */
	516
	517	/* XXXX Some places assume that there is a fixed substring.
	518	An update may be needed if optimizer marks as "INTUITable"
	519	RExen without fixed substrings. Similarly, it is assumed that
	520	lengths of all the strings are no more than minlen, thus they
	521	cannot come from lookahead.
	522	(Or minlen should take into account lookahead.)
	523	NOTE: Some of this comment is not correct. minlen does now take account
	524	of lookahead/behind. Further research is required. -- demerphq
	525
	526	*/
	527
	528	/* A failure to find a constant substring means that there is no need to make
	529	an expensive call to REx engine, thus we celebrate a failure. Similarly,
	530	finding a substring too deep into the string means that less calls to
	531	regtry() should be needed.
	532
	533	REx compiler's optimizer found 4 possible hints:
	534	a) Anchored substring;
	535	b) Fixed substring;
	536	c) Whether we are anchored (beginning-of-line or \G);
	537	d) First node (of those at offset 0) which may distinguish positions;
	538	We use a)b)d) and multiline-part of c), and try to find a position in the
	539	string which does not contradict any of them.
	540	*/
	541
	542	/* Most of decisions we do here should have been done at compile time.
	543	The nodes of the REx which we used for the search should have been
	544	deleted from the finite automaton. */
	545
	546	char *
	547	Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV sv, char strpos,
	548	char strend, const U32 flags, re_scream_pos_data data)
	549	{
	550	dVAR;
	551	struct regexp const prog = (struct regexp )SvANY(rx);
	552	register I32 start_shift = 0;
	553	/* Should be nonnegative! */
	554	register I32 end_shift = 0;
	555	register char *s;
	556	register SV *check;
	557	char *strbeg;
	558	char *t;
	559	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	560	I32 ml_anch;
	561	register char other_last = NULL; / other substr checked before this */
	562	char check_at = NULL; / check substr found at this pos */
	563	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	564	RXi_GET_DECL(prog,progi);
	565	#ifdef DEBUGGING
	566	const char * const i_strpos = strpos;
	567	#endif
	568	GET_RE_DEBUG_FLAGS_DECL;
	569
	570	PERL_ARGS_ASSERT_RE_INTUIT_START;
	571
	572	RX_MATCH_UTF8_set(rx,utf8_target);
	573
	574	if (RX_UTF8(rx)) {
	575	PL_reg_flags \|= RF_utf8;
	576	}
	577	DEBUG_EXECUTE_r(
	578	debug_start_match(rx, utf8_target, strpos, strend,
	579	sv ? "Guessing start of match in sv for"
	580	: "Guessing start of match in string for");
	581	);
	582
	583	/* CHR_DIST() would be more correct here but it makes things slow. */
	584	if (prog->minlen > strend - strpos) {
	585	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	586	"String too short... [re_intuit_start]\n"));
	587	goto fail;
	588	}
	589
	590	strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
	591	PL_regeol = strend;
	592	if (utf8_target) {
	593	if (!prog->check_utf8 && prog->check_substr)
	594	to_utf8_substr(prog);
	595	check = prog->check_utf8;
	596	} else {
	597	if (!prog->check_substr && prog->check_utf8)
	598	to_byte_substr(prog);
	599	check = prog->check_substr;
	600	}
	601	if (check == &PL_sv_undef) {
	602	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	603	"Non-utf8 string cannot match utf8 check string\n"));
	604	goto fail;
	605	}
	606	if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
	607	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
	608	\|\| ( (prog->extflags & RXf_ANCH_BOL)
	609	&& !multiline ) ); /* Check after \n? */
	610
	611	if (!ml_anch) {
	612	if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
	613	&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
	614	/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
	615	&& sv && !SvROK(sv)
	616	&& (strpos != strbeg)) {
	617	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
	618	goto fail;
	619	}
	620	if (prog->check_offset_min == prog->check_offset_max &&
	621	!(prog->extflags & RXf_CANY_SEEN)) {
	622	/* Substring at constant offset from beg-of-str... */
	623	I32 slen;
	624
	625	s = HOP3c(strpos, prog->check_offset_min, strend);
	626
	627	if (SvTAIL(check)) {
	628	slen = SvCUR(check); /* >= 1 */
	629
	630	if ( strend - s > slen \|\| strend - s < slen - 1
	631	\|\| (strend - s == slen && strend[-1] != '\n')) {
	632	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
	633	goto fail_finish;
	634	}
	635	/* Now should match s[0..slen-2] */
	636	slen--;
	637	if (slen && (SvPVX_const(check) != s
	638	\|\| (slen > 1
	639	&& memNE(SvPVX_const(check), s, slen)))) {
	640	report_neq:
	641	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
	642	goto fail_finish;
	643	}
	644	}
	645	else if (SvPVX_const(check) != s
	646	\|\| ((slen = SvCUR(check)) > 1
	647	&& memNE(SvPVX_const(check), s, slen)))
	648	goto report_neq;
	649	check_at = s;
	650	goto success_at_start;
	651	}
	652	}
	653	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
	654	s = strpos;
	655	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	656	end_shift = prog->check_end_shift;
	657
	658	if (!ml_anch) {
	659	const I32 end = prog->check_offset_max + CHR_SVLEN(check)
	660	- (SvTAIL(check) != 0);
	661	const I32 eshift = CHR_DIST((U8)strend, (U8)s) - end;
	662
	663	if (end_shift < eshift)
	664	end_shift = eshift;
	665	}
	666	}
	667	else { /* Can match at random position */
	668	ml_anch = 0;
	669	s = strpos;
	670	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
	671	end_shift = prog->check_end_shift;
	672
	673	/* end shift should be non negative here */
	674	}
	675
	676	#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
	677	if (end_shift < 0)
	678	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	679	(IV)end_shift, RX_PRECOMP(prog));
	680	#endif
	681
	682	restart:
	683	/* Find a possible match in the region s..strend by looking for
	684	the "check" substring in the region corrected by start/end_shift. */
	685
	686	{
	687	I32 srch_start_shift = start_shift;
	688	I32 srch_end_shift = end_shift;
	689	if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
	690	srch_end_shift -= ((strbeg - s) - srch_start_shift);
	691	srch_start_shift = strbeg - s;
	692	}
	693	DEBUG_OPTIMISE_MORE_r({
	694	PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
	695	(IV)prog->check_offset_min,
	696	(IV)srch_start_shift,
	697	(IV)srch_end_shift,
	698	(IV)prog->check_end_shift);
	699	});
	700
	701	if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) {
	702	I32 p = -1; /* Internal iterator of scream. */
	703	I32 * const pp = data ? data->scream_pos : &p;
	704	const MAGIC *mg;
	705	bool found = FALSE;
	706
	707	assert(SvMAGICAL(sv));
	708	mg = mg_find(sv, PERL_MAGIC_study);
	709	assert(mg);
	710
	711	if (mg->mg_private == 1) {
	712	found = ((U8 *)mg->mg_ptr)[BmRARE(check)] != (U8)~0;
	713	} else if (mg->mg_private == 2) {
	714	found = ((U16 *)mg->mg_ptr)[BmRARE(check)] != (U16)~0;
	715	} else {
	716	assert (mg->mg_private == 4);
	717	found = ((U32 *)mg->mg_ptr)[BmRARE(check)] != (U32)~0;
	718	}
	719
	720	if (found
	721	\|\| ( BmRARE(check) == '\n'
	722	&& (BmPREVIOUS(check) == SvCUR(check) - 1)
	723	&& SvTAIL(check) ))
	724	s = screaminstr(sv, check,
	725	srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
	726	else
	727	goto fail_finish;
	728	/* we may be pointing at the wrong string */
	729	if (s && RXp_MATCH_COPIED(prog))
	730	s = strbeg + (s - SvPVX_const(sv));
	731	if (data)
	732	*data->scream_olds = s;
	733	}
	734	else {
	735	U8* start_point;
	736	U8* end_point;
	737	if (prog->extflags & RXf_CANY_SEEN) {
	738	start_point= (U8*)(s + srch_start_shift);
	739	end_point= (U8*)(strend - srch_end_shift);
	740	} else {
	741	start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
	742	end_point= HOP3(strend, -srch_end_shift, strbeg);
	743	}
	744	DEBUG_OPTIMISE_MORE_r({
	745	PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
	746	(int)(end_point - start_point),
	747	(int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
	748	start_point);
	749	});
	750
	751	s = fbm_instr( start_point, end_point,
	752	check, multiline ? FBMrf_MULTILINE : 0);
	753	}
	754	}
	755	/* Update the count-of-usability, remove useless subpatterns,
	756	unshift s. */
	757
	758	DEBUG_EXECUTE_r({
	759	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	760	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	761	PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
	762	(s ? "Found" : "Did not find"),
	763	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	764	? "anchored" : "floating"),
	765	quoted,
	766	RE_SV_TAIL(check),
	767	(s ? " at offset " : "...\n") );
	768	});
	769
	770	if (!s)
	771	goto fail_finish;
	772	/* Finish the diagnostic message */
	773	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
	774
	775	/* XXX dmq: first branch is for positive lookbehind...
	776	Our check string is offset from the beginning of the pattern.
	777	So we need to do any stclass tests offset forward from that
	778	point. I think. :-(
	779	*/
	780
	781
	782
	783	check_at=s;
	784
	785
	786	/* Got a candidate. Check MBOL anchoring, and the other substr.
	787	Start with the other substr.
	788	XXXX no SCREAM optimization yet - and a very coarse implementation
	789	XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
	790	always match. Probably should be marked during compile...
	791	Probably it is right to do no SCREAM here...
	792	*/
	793
	794	if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
	795	: (prog->float_substr && prog->anchored_substr))
	796	{
	797	/* Take into account the "other" substring. */
	798	/* XXXX May be hopelessly wrong for UTF... */
	799	if (!other_last)
	800	other_last = strpos;
	801	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
	802	do_other_anchored:
	803	{
	804	char * const last = HOP3c(s, -start_shift, strbeg);
	805	char last1, last2;
	806	char * const saved_s = s;
	807	SV* must;
	808
	809	t = s - prog->check_offset_max;
	810	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	811	&& (!utf8_target
	812	\|\| ((t = (char)reghopmaybe3((U8)s, -(prog->check_offset_max), (U8*)strpos))
	813	&& t > strpos)))
	814	NOOP;
	815	else
	816	t = strpos;
	817	t = HOP3c(t, prog->anchored_offset, strend);
	818	if (t < other_last) /* These positions already checked */
	819	t = other_last;
	820	last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
	821	if (last < last1)
	822	last1 = last;
	823	/* XXXX It is not documented what units *_offsets are in.
	824	We assume bytes, but this is clearly wrong.
	825	Meaning this code needs to be carefully reviewed for errors.
	826	dmq.
	827	*/
	828
	829	/* On end-of-str: see comment below. */
	830	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	831	if (must == &PL_sv_undef) {
	832	s = (char*)NULL;
	833	DEBUG_r(must = prog->anchored_utf8); /* for debug */
	834	}
	835	else
	836	s = fbm_instr(
	837	(unsigned char*)t,
	838	HOP3(HOP3(last1, prog->anchored_offset, strend)
	839	+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
	840	must,
	841	multiline ? FBMrf_MULTILINE : 0
	842	);
	843	DEBUG_EXECUTE_r({
	844	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	845	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	846	PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
	847	(s ? "Found" : "Contradicts"),
	848	quoted, RE_SV_TAIL(must));
	849	});
	850
	851
	852	if (!s) {
	853	if (last1 >= last2) {
	854	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	855	", giving up...\n"));
	856	goto fail_finish;
	857	}
	858	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	859	", trying floating at offset %ld...\n",
	860	(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
	861	other_last = HOP3c(last1, prog->anchored_offset+1, strend);
	862	s = HOP3c(last, 1, strend);
	863	goto restart;
	864	}
	865	else {
	866	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	867	(long)(s - i_strpos)));
	868	t = HOP3c(s, -prog->anchored_offset, strbeg);
	869	other_last = HOP3c(s, 1, strend);
	870	s = saved_s;
	871	if (t == strpos)
	872	goto try_at_start;
	873	goto try_at_offset;
	874	}
	875	}
	876	}
	877	else { /* Take into account the floating substring. */
	878	char last, last1;
	879	char * const saved_s = s;
	880	SV* must;
	881
	882	t = HOP3c(s, -start_shift, strbeg);
	883	last1 = last =
	884	HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
	885	if (CHR_DIST((U8)last, (U8)t) > prog->float_max_offset)
	886	last = HOP3c(t, prog->float_max_offset, strend);
	887	s = HOP3c(t, prog->float_min_offset, strend);
	888	if (s < other_last)
	889	s = other_last;
	890	/* XXXX It is not documented what units _offsets are in. Assume bytes. /
	891	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	892	/* fbm_instr() takes into account exact value of end-of-str
	893	if the check is SvTAIL(ed). Since false positives are OK,
	894	and end-of-str is not later than strend we are OK. */
	895	if (must == &PL_sv_undef) {
	896	s = (char*)NULL;
	897	DEBUG_r(must = prog->float_utf8); /* for debug message */
	898	}
	899	else
	900	s = fbm_instr((unsigned char*)s,
	901	(unsigned char*)last + SvCUR(must)
	902	- (SvTAIL(must)!=0),
	903	must, multiline ? FBMrf_MULTILINE : 0);
	904	DEBUG_EXECUTE_r({
	905	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	906	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	907	PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
	908	(s ? "Found" : "Contradicts"),
	909	quoted, RE_SV_TAIL(must));
	910	});
	911	if (!s) {
	912	if (last1 == last) {
	913	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	914	", giving up...\n"));
	915	goto fail_finish;
	916	}
	917	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	918	", trying anchored starting at offset %ld...\n",
	919	(long)(saved_s + 1 - i_strpos)));
	920	other_last = last;
	921	s = HOP3c(t, 1, strend);
	922	goto restart;
	923	}
	924	else {
	925	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
	926	(long)(s - i_strpos)));
	927	other_last = s; /* Fix this later. --Hugo */
	928	s = saved_s;
	929	if (t == strpos)
	930	goto try_at_start;
	931	goto try_at_offset;
	932	}
	933	}
	934	}
	935
	936
	937	t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
	938
	939	DEBUG_OPTIMISE_MORE_r(
	940	PerlIO_printf(Perl_debug_log,
	941	"Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
	942	(IV)prog->check_offset_min,
	943	(IV)prog->check_offset_max,
	944	(IV)(s-strpos),
	945	(IV)(t-strpos),
	946	(IV)(t-s),
	947	(IV)(strend-strpos)
	948	)
	949	);
	950
	951	if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
	952	&& (!utf8_target
	953	\|\| ((t = (char)reghopmaybe3((U8)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
	954	&& t > strpos)))
	955	{
	956	/* Fixed substring is found far enough so that the match
	957	cannot start at strpos. */
	958	try_at_offset:
	959	if (ml_anch && t[-1] != '\n') {
	960	/* Eventually fbm_*() should handle this, but often
	961	anchored_offset is not 0, so this check will not be wasted. */
	962	/* XXXX In the code below we prefer to look for "^" even in
	963	presence of anchored substrings. And we search even
	964	beyond the found float position. These pessimizations
	965	are historical artefacts only. */
	966	find_anchor:
	967	while (t < strend - prog->minlen) {
	968	if (*t == '\n') {
	969	if (t < check_at - prog->check_offset_min) {
	970	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	971	/* Since we moved from the found position,
	972	we definitely contradict the found anchored
	973	substr. Due to the above check we do not
	974	contradict "check" substr.
	975	Thus we can arrive here only if check substr
	976	is float. Redo checking for "other"=="fixed".
	977	*/
	978	strpos = t + 1;
	979	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
	980	PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
	981	goto do_other_anchored;
	982	}
	983	/* We don't contradict the found floating substring. */
	984	/* XXXX Why not check for STCLASS? */
	985	s = t + 1;
	986	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
	987	PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
	988	goto set_useful;
	989	}
	990	/* Position contradicts check-string */
	991	/* XXXX probably better to look for check-string
	992	than for "\n", so one should lower the limit for t? */
	993	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
	994	PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
	995	other_last = strpos = s = t + 1;
	996	goto restart;
	997	}
	998	t++;
	999	}
	1000	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
	1001	PL_colors[0], PL_colors[1]));
	1002	goto fail_finish;
	1003	}
	1004	else {
	1005	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
	1006	PL_colors[0], PL_colors[1]));
	1007	}
	1008	s = t;
	1009	set_useful:
	1010	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1011	}
	1012	else {
	1013	/* The found string does not prohibit matching at strpos,
	1014	- no optimization of calling REx engine can be performed,
	1015	unless it was an MBOL and we are not after MBOL,
	1016	or a future STCLASS check will fail this. */
	1017	try_at_start:
	1018	/* Even in this situation we may use MBOL flag if strpos is offset
	1019	wrt the start of the string. */
	1020	if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
	1021	&& (strpos != strbeg) && strpos[-1] != '\n'
	1022	/* May be due to an implicit anchor of m{.foo} /
	1023	&& !(prog->intflags & PREGf_IMPLICIT))
	1024	{
	1025	t = strpos;
	1026	goto find_anchor;
	1027	}
	1028	DEBUG_EXECUTE_r( if (ml_anch)
	1029	PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
	1030	(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
	1031	);
	1032	success_at_start:
	1033	if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
	1034	&& (utf8_target ? (
	1035	prog->check_utf8 /* Could be deleted already */
	1036	&& --BmUSEFUL(prog->check_utf8) < 0
	1037	&& (prog->check_utf8 == prog->float_utf8)
	1038	) : (
	1039	prog->check_substr /* Could be deleted already */
	1040	&& --BmUSEFUL(prog->check_substr) < 0
	1041	&& (prog->check_substr == prog->float_substr)
	1042	)))
	1043	{
	1044	/* If flags & SOMETHING - do not do it many times on the same match */
	1045	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
	1046	/* XXX Does the destruction order has to change with utf8_target? */
	1047	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1048	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1049	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1050	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1051	check = NULL; /* abort */
	1052	s = strpos;
	1053	/* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
	1054	see http://bugs.activestate.com/show_bug.cgi?id=87173 */
	1055	if (prog->intflags & PREGf_IMPLICIT)
	1056	prog->extflags &= ~RXf_ANCH_MBOL;
	1057	/* XXXX This is a remnant of the old implementation. It
	1058	looks wasteful, since now INTUIT can use many
	1059	other heuristics. */
	1060	prog->extflags &= ~RXf_USE_INTUIT;
	1061	/* XXXX What other flags might need to be cleared in this branch? */
	1062	}
	1063	else
	1064	s = strpos;
	1065	}
	1066
	1067	/* Last resort... */
	1068	/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
	1069	/* trie stclasses are too expensive to use here, we are better off to
	1070	leave it to regmatch itself */
	1071	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1072	/* minlen == 0 is possible if regstclass is \b or \B,
	1073	and the fixed substr is ''$.
	1074	Since minlen is already taken into account, s+1 is before strend;
	1075	accidentally, minlen >= 1 guaranties no false positives at s + 1
	1076	even for \b or \B. But (minlen? 1 : 0) below assumes that
	1077	regstclass does not come from lookahead... */
	1078	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1079	This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
	1080	const U8* const str = (U8*)STRING(progi->regstclass);
	1081	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1082	? CHR_DIST(str+STR_LEN(progi->regstclass), str)
	1083	: 1);
	1084	char * endpos;
	1085	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1086	endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
	1087	else if (prog->float_substr \|\| prog->float_utf8)
	1088	endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
	1089	else
	1090	endpos= strend;
	1091
	1092	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
	1093	(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
	1094
	1095	t = s;
	1096	s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
	1097	if (!s) {
	1098	#ifdef DEBUGGING
	1099	const char *what = NULL;
	1100	#endif
	1101	if (endpos == strend) {
	1102	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1103	"Could not match STCLASS...\n") );
	1104	goto fail;
	1105	}
	1106	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1107	"This position contradicts STCLASS...\n") );
	1108	if ((prog->extflags & RXf_ANCH) && !ml_anch)
	1109	goto fail;
	1110	/* Contradict one of substrings */
	1111	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1112	if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
	1113	DEBUG_EXECUTE_r( what = "anchored" );
	1114	hop_and_restart:
	1115	s = HOP3c(t, 1, strend);
	1116	if (s + start_shift + end_shift > strend) {
	1117	/* XXXX Should be taken into account earlier? */
	1118	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1119	"Could not match STCLASS...\n") );
	1120	goto fail;
	1121	}
	1122	if (!check)
	1123	goto giveup;
	1124	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1125	"Looking for %s substr starting at offset %ld...\n",
	1126	what, (long)(s + start_shift - i_strpos)) );
	1127	goto restart;
	1128	}
	1129	/* Have both, check_string is floating */
	1130	if (t + start_shift >= check_at) /* Contradicts floating=check */
	1131	goto retry_floating_check;
	1132	/* Recheck anchored substring, but not floating... */
	1133	s = check_at;
	1134	if (!check)
	1135	goto giveup;
	1136	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1137	"Looking for anchored substr starting at offset %ld...\n",
	1138	(long)(other_last - i_strpos)) );
	1139	goto do_other_anchored;
	1140	}
	1141	/* Another way we could have checked stclass at the
	1142	current position only: */
	1143	if (ml_anch) {
	1144	s = t = t + 1;
	1145	if (!check)
	1146	goto giveup;
	1147	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	1148	"Looking for /%s^%s/m starting at offset %ld...\n",
	1149	PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
	1150	goto try_at_offset;
	1151	}
	1152	if (!(utf8_target ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
	1153	goto fail;
	1154	/* Check is floating substring. */
	1155	retry_floating_check:
	1156	t = check_at - start_shift;
	1157	DEBUG_EXECUTE_r( what = "floating" );
	1158	goto hop_and_restart;
	1159	}
	1160	if (t != s) {
	1161	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1162	"By STCLASS: moving %ld --> %ld\n",
	1163	(long)(t - i_strpos), (long)(s - i_strpos))
	1164	);
	1165	}
	1166	else {
	1167	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	1168	"Does not contradict STCLASS...\n");
	1169	);
	1170	}
	1171	}
	1172	giveup:
	1173	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
	1174	PL_colors[4], (check ? "Guessed" : "Giving up"),
	1175	PL_colors[5], (long)(s - i_strpos)) );
	1176	return s;
	1177
	1178	fail_finish: /* Substring not found */
	1179	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1180	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1181	fail:
	1182	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
	1183	PL_colors[4], PL_colors[5]));
	1184	return NULL;
	1185	}
	1186
	1187	#define DECL_TRIE_TYPE(scan) \
	1188	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
	1189	trie_type = (scan->flags != EXACT) \
	1190	? (utf8_target ? trie_utf8_fold : (UTF_PATTERN ? trie_latin_utf8_fold : trie_plain)) \
	1191	: (utf8_target ? trie_utf8 : trie_plain)
	1192
	1193	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
	1194	uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
	1195	switch (trie_type) { \
	1196	case trie_utf8_fold: \
	1197	if ( foldlen>0 ) { \
	1198	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1199	foldlen -= len; \
	1200	uscan += len; \
	1201	len=0; \
	1202	} else { \
	1203	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1204	uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
	1205	foldlen -= UNISKIP( uvc ); \
	1206	uscan = foldbuf + UNISKIP( uvc ); \
	1207	} \
	1208	break; \
	1209	case trie_latin_utf8_fold: \
	1210	if ( foldlen>0 ) { \
	1211	uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
	1212	foldlen -= len; \
	1213	uscan += len; \
	1214	len=0; \
	1215	} else { \
	1216	len = 1; \
	1217	uvc = to_uni_fold( (U8)uc, foldbuf, &foldlen ); \
	1218	foldlen -= UNISKIP( uvc ); \
	1219	uscan = foldbuf + UNISKIP( uvc ); \
	1220	} \
	1221	break; \
	1222	case trie_utf8: \
	1223	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
	1224	break; \
	1225	case trie_plain: \
	1226	uvc = (UV)*uc; \
	1227	len = 1; \
	1228	} \
	1229	if (uvc < 256) { \
	1230	charid = trie->charmap[ uvc ]; \
	1231	} \
	1232	else { \
	1233	charid = 0; \
	1234	if (widecharmap) { \
	1235	SV** const svpp = hv_fetch(widecharmap, \
	1236	(char*)&uvc, sizeof(UV), 0); \
	1237	if (svpp) \
	1238	charid = (U16)SvIV(*svpp); \
	1239	} \
	1240	} \
	1241	} STMT_END
	1242
	1243	#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
	1244	STMT_START { \
	1245	while (s <= e) { \
	1246	if ( (CoNd) \
	1247	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1248	&& (!reginfo \|\| regtry(reginfo, &s)) ) \
	1249	goto got_it; \
	1250	s++; \
	1251	} \
	1252	} STMT_END
	1253
	1254	#define REXEC_FBC_UTF8_SCAN(CoDe) \
	1255	STMT_START { \
	1256	while (s + (uskip = UTF8SKIP(s)) <= strend) { \
	1257	CoDe \
	1258	s += uskip; \
	1259	} \
	1260	} STMT_END
	1261
	1262	#define REXEC_FBC_SCAN(CoDe) \
	1263	STMT_START { \
	1264	while (s < strend) { \
	1265	CoDe \
	1266	s++; \
	1267	} \
	1268	} STMT_END
	1269
	1270	#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
	1271	REXEC_FBC_UTF8_SCAN( \
	1272	if (CoNd) { \
	1273	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1274	goto got_it; \
	1275	else \
	1276	tmp = doevery; \
	1277	} \
	1278	else \
	1279	tmp = 1; \
	1280	)
	1281
	1282	#define REXEC_FBC_CLASS_SCAN(CoNd) \
	1283	REXEC_FBC_SCAN( \
	1284	if (CoNd) { \
	1285	if (tmp && (!reginfo \|\| regtry(reginfo, &s))) \
	1286	goto got_it; \
	1287	else \
	1288	tmp = doevery; \
	1289	} \
	1290	else \
	1291	tmp = 1; \
	1292	)
	1293
	1294	#define REXEC_FBC_TRYIT \
	1295	if ((!reginfo \|\| regtry(reginfo, &s))) \
	1296	goto got_it
	1297
	1298	#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
	1299	if (utf8_target) { \
	1300	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1301	} \
	1302	else { \
	1303	REXEC_FBC_CLASS_SCAN(CoNd); \
	1304	}
	1305
	1306	#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
	1307	if (utf8_target) { \
	1308	UtFpReLoAd; \
	1309	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1310	} \
	1311	else { \
	1312	REXEC_FBC_CLASS_SCAN(CoNd); \
	1313	}
	1314
	1315	#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
	1316	PL_reg_flags \|= RF_tainted; \
	1317	if (utf8_target) { \
	1318	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
	1319	} \
	1320	else { \
	1321	REXEC_FBC_CLASS_SCAN(CoNd); \
	1322	}
	1323
	1324	#define DUMP_EXEC_POS(li,s,doutf8) \
	1325	dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
	1326
	1327
	1328	#define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1329	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1330	tmp = TEST_NON_UTF8(tmp); \
	1331	REXEC_FBC_UTF8_SCAN( \
	1332	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1333	tmp = !tmp; \
	1334	IF_SUCCESS; \
	1335	} \
	1336	else { \
	1337	IF_FAIL; \
	1338	} \
	1339	); \
	1340
	1341	#define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
	1342	if (s == PL_bostr) { \
	1343	tmp = '\n'; \
	1344	} \
	1345	else { \
	1346	U8 * const r = reghop3((U8)s, -1, (U8)PL_bostr); \
	1347	tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT); \
	1348	} \
	1349	tmp = TeSt1_UtF8; \
	1350	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1351	REXEC_FBC_UTF8_SCAN( \
	1352	if (tmp == ! (TeSt2_UtF8)) { \
	1353	tmp = !tmp; \
	1354	IF_SUCCESS; \
	1355	} \
	1356	else { \
	1357	IF_FAIL; \
	1358	} \
	1359	); \
	1360
	1361	/* The only difference between the BOUND and NBOUND cases is that
	1362	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1363	* NBOUND. This is accomplished by passing it in either the if or else clause,
	1364	* with the other one being empty */
	1365	#define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1366	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1367
	1368	#define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1369	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1370
	1371	#define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1372	FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1373
	1374	#define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
	1375	FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1376
	1377
	1378	/* Common to the BOUND and NBOUND cases. Unfortunately the UTF8 tests need to
	1379	* be passed in completely with the variable name being tested, which isn't
	1380	* such a clean interface, but this is easier to read than it was before. We
	1381	* are looking for the boundary (or non-boundary between a word and non-word
	1382	* character. The utf8 and non-utf8 cases have the same logic, but the details
	1383	* must be different. Find the "wordness" of the character just prior to this
	1384	* one, and compare it with the wordness of this one. If they differ, we have
	1385	* a boundary. At the beginning of the string, pretend that the previous
	1386	* character was a new-line */
	1387	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1388	if (utf8_target) { \
	1389	UTF8_CODE \
	1390	} \
	1391	else { /* Not utf8 */ \
	1392	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; \
	1393	tmp = TEST_NON_UTF8(tmp); \
	1394	REXEC_FBC_SCAN( \
	1395	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1396	tmp = !tmp; \
	1397	IF_SUCCESS; \
	1398	} \
	1399	else { \
	1400	IF_FAIL; \
	1401	} \
	1402	); \
	1403	} \
	1404	if ((!prog->minlen && tmp) && (!reginfo \|\| regtry(reginfo, &s))) \
	1405	goto got_it;
	1406
	1407	/* We know what class REx starts with. Try to find this position... */
	1408	/* if reginfo is NULL, its a dryrun */
	1409	/* annoyingly all the vars in this routine have different names from their counterparts
	1410	in regmatch. /grrr */
	1411
	1412	STATIC char *
	1413	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1414	const char strend, regmatch_info reginfo)
	1415	{
	1416	dVAR;
	1417	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1418	char pat_string; / The pattern's exactish string */
	1419	char pat_end; / ptr to end char of pat_string */
	1420	re_fold_t folder; /* Function for computing non-utf8 folds */
	1421	const U8 fold_array; / array for folding ords < 256 */
	1422	STRLEN ln;
	1423	STRLEN lnc;
	1424	register STRLEN uskip;
	1425	U8 c1;
	1426	U8 c2;
	1427	char *e;
	1428	register I32 tmp = 1; /* Scratch variable? */
	1429	register const bool utf8_target = PL_reg_match_utf8;
	1430	UV utf8_fold_flags = 0;
	1431	RXi_GET_DECL(prog,progi);
	1432
	1433	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1434
	1435	/* We know what class it must start with. */
	1436	switch (OP(c)) {
	1437	case ANYOFV:
	1438	case ANYOF:
	1439	if (utf8_target \|\| OP(c) == ANYOFV) {
	1440	STRLEN inclasslen = strend - s;
	1441	REXEC_FBC_UTF8_CLASS_SCAN(
	1442	reginclass(prog, c, (U8*)s, &inclasslen, utf8_target));
	1443	}
	1444	else {
	1445	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
	1446	}
	1447	break;
	1448	case CANY:
	1449	REXEC_FBC_SCAN(
	1450	if (tmp && (!reginfo \|\| regtry(reginfo, &s)))
	1451	goto got_it;
	1452	else
	1453	tmp = doevery;
	1454	);
	1455	break;
	1456
	1457	case EXACTFA:
	1458	if (UTF_PATTERN \|\| utf8_target) {
	1459	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1460	goto do_exactf_utf8;
	1461	}
	1462	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1463	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1464	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1465
	1466	case EXACTF:
	1467	if (UTF_PATTERN \|\| utf8_target) {
	1468
	1469	/* regcomp.c already folded this if pattern is in UTF-8 */
	1470	utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1471	goto do_exactf_utf8;
	1472	}
	1473	fold_array = PL_fold;
	1474	folder = foldEQ;
	1475	goto do_exactf_non_utf8;
	1476
	1477	case EXACTFL:
	1478	if (UTF_PATTERN \|\| utf8_target) {
	1479	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	1480	goto do_exactf_utf8;
	1481	}
	1482	fold_array = PL_fold_locale;
	1483	folder = foldEQ_locale;
	1484	goto do_exactf_non_utf8;
	1485
	1486	case EXACTFU:
	1487	if (UTF_PATTERN \|\| utf8_target) {
	1488	utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1489	goto do_exactf_utf8;
	1490	}
	1491
	1492	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1493	* so we don't have to worry here about this single special case
	1494	* in the Latin1 range */
	1495	fold_array = PL_fold_latin1;
	1496	folder = foldEQ_latin1;
	1497
	1498	/* FALL THROUGH */
	1499
	1500	do_exactf_non_utf8: /* Neither pattern nor string are UTF8 */
	1501
	1502	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1503	* first character of the EXACTF* node and then, if necessary,
	1504	* case-insensitively compare the full text of the node. c1 is the
	1505	* first character. c2 is its fold. This logic will not work for
	1506	* Unicode semantics and the german sharp ss, which hence should
	1507	* not be compiled into a node that gets here. */
	1508	pat_string = STRING(c);
	1509	ln = STR_LEN(c); /* length to match in octets/bytes */
	1510
	1511	/* We know that we have to match at least 'ln' bytes (which is the
	1512	* same as characters, since not utf8). If we have to match 3
	1513	* characters, and there are only 2 availabe, we know without
	1514	* trying that it will fail; so don't start a match past the
	1515	* required minimum number from the far end */
	1516	e = HOP3c(strend, -((I32)ln), s);
	1517
	1518	if (!reginfo && e < s) {
	1519	e = s; /* Due to minlen logic of intuit() */
	1520	}
	1521
	1522	c1 = *pat_string;
	1523	c2 = fold_array[c1];
	1524	if (c1 == c2) { /* If char and fold are the same */
	1525	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1526	}
	1527	else {
	1528	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1529	}
	1530	break;
	1531
	1532	do_exactf_utf8:
	1533	{
	1534	unsigned expansion;
	1535
	1536
	1537	/* If one of the operands is in utf8, we can't use the simpler
	1538	* folding above, due to the fact that many different characters
	1539	* can have the same fold, or portion of a fold, or different-
	1540	* length fold */
	1541	pat_string = STRING(c);
	1542	ln = STR_LEN(c); /* length to match in octets/bytes */
	1543	pat_end = pat_string + ln;
	1544	lnc = (UTF_PATTERN) /* length to match in characters */
	1545	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1546	: ln;
	1547
	1548	/* We have 'lnc' characters to match in the pattern, but because of
	1549	* multi-character folding, each character in the target can match
	1550	* up to 3 characters (Unicode guarantees it will never exceed
	1551	* this) if it is utf8-encoded; and up to 2 if not (based on the
	1552	* fact that the Latin 1 folds are already determined, and the
	1553	* only multi-char fold in that range is the sharp-s folding to
	1554	* 'ss'. Thus, a pattern character can match as little as 1/3 of a
	1555	* string character. Adjust lnc accordingly, always matching at
	1556	* least 1 */
	1557	expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
	1558	lnc = (lnc < expansion) ? 1 : lnc / expansion;
	1559
	1560	/* As in the non-UTF8 case, if we have to match 3 characters, and
	1561	* only 2 are left, it's guaranteed to fail, so don't start a
	1562	* match that would require us to go beyond the end of the string
	1563	*/
	1564	e = HOP3c(strend, -((I32)lnc), s);
	1565
	1566	if (!reginfo && e < s) {
	1567	e = s; /* Due to minlen logic of intuit() */
	1568	}
	1569
	1570	/* XXX Note that we could recalculate e every so-often through the
	1571	* loop to stop earlier, as the worst case expansion above will
	1572	* rarely be met, and as we go along we would usually find that e
	1573	* moves further to the left. Unclear if worth the expense */
	1574
	1575	while (s <= e) {
	1576	char my_strend= (char )strend;
	1577	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	1578	pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
	1579	&& (!reginfo \|\| regtry(reginfo, &s)) )
	1580	{
	1581	goto got_it;
	1582	}
	1583	s += (utf8_target) ? UTF8SKIP(s) : 1;
	1584	}
	1585	break;
	1586	}
	1587	case BOUNDL:
	1588	PL_reg_flags \|= RF_tainted;
	1589	FBC_BOUND(isALNUM_LC,
	1590	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1591	isALNUM_LC_utf8((U8*)s));
	1592	break;
	1593	case NBOUNDL:
	1594	PL_reg_flags \|= RF_tainted;
	1595	FBC_NBOUND(isALNUM_LC,
	1596	isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
	1597	isALNUM_LC_utf8((U8*)s));
	1598	break;
	1599	case BOUND:
	1600	FBC_BOUND(isWORDCHAR,
	1601	isALNUM_uni(tmp),
	1602	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1603	break;
	1604	case BOUNDA:
	1605	FBC_BOUND_NOLOAD(isWORDCHAR_A,
	1606	isWORDCHAR_A(tmp),
	1607	isWORDCHAR_A((U8*)s));
	1608	break;
	1609	case NBOUND:
	1610	FBC_NBOUND(isWORDCHAR,
	1611	isALNUM_uni(tmp),
	1612	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1613	break;
	1614	case NBOUNDA:
	1615	FBC_NBOUND_NOLOAD(isWORDCHAR_A,
	1616	isWORDCHAR_A(tmp),
	1617	isWORDCHAR_A((U8*)s));
	1618	break;
	1619	case BOUNDU:
	1620	FBC_BOUND(isWORDCHAR_L1,
	1621	isALNUM_uni(tmp),
	1622	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1623	break;
	1624	case NBOUNDU:
	1625	FBC_NBOUND(isWORDCHAR_L1,
	1626	isALNUM_uni(tmp),
	1627	cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
	1628	break;
	1629	case ALNUML:
	1630	REXEC_FBC_CSCAN_TAINT(
	1631	isALNUM_LC_utf8((U8*)s),
	1632	isALNUM_LC(*s)
	1633	);
	1634	break;
	1635	case ALNUMU:
	1636	REXEC_FBC_CSCAN_PRELOAD(
	1637	LOAD_UTF8_CHARCLASS_ALNUM(),
	1638	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1639	isWORDCHAR_L1((U8) *s)
	1640	);
	1641	break;
	1642	case ALNUM:
	1643	REXEC_FBC_CSCAN_PRELOAD(
	1644	LOAD_UTF8_CHARCLASS_ALNUM(),
	1645	swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1646	isWORDCHAR((U8) *s)
	1647	);
	1648	break;
	1649	case ALNUMA:
	1650	/* Don't need to worry about utf8, as it can match only a single
	1651	* byte invariant character */
	1652	REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
	1653	break;
	1654	case NALNUMU:
	1655	REXEC_FBC_CSCAN_PRELOAD(
	1656	LOAD_UTF8_CHARCLASS_ALNUM(),
	1657	!swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
	1658	! isWORDCHAR_L1((U8) *s)
	1659	);
	1660	break;
	1661	case NALNUM:
	1662	REXEC_FBC_CSCAN_PRELOAD(
	1663	LOAD_UTF8_CHARCLASS_ALNUM(),
	1664	!swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target),
	1665	! isALNUM(*s)
	1666	);
	1667	break;
	1668	case NALNUMA:
	1669	REXEC_FBC_CSCAN(
	1670	!isWORDCHAR_A(*s),
	1671	!isWORDCHAR_A(*s)
	1672	);
	1673	break;
	1674	case NALNUML:
	1675	REXEC_FBC_CSCAN_TAINT(
	1676	!isALNUM_LC_utf8((U8*)s),
	1677	!isALNUM_LC(*s)
	1678	);
	1679	break;
	1680	case SPACEU:
	1681	REXEC_FBC_CSCAN_PRELOAD(
	1682	LOAD_UTF8_CHARCLASS_SPACE(),
	1683	s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target),
	1684	isSPACE_L1((U8) *s)
	1685	);
	1686	break;
	1687	case SPACE:
	1688	REXEC_FBC_CSCAN_PRELOAD(
	1689	LOAD_UTF8_CHARCLASS_SPACE(),
	1690	s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target),
	1691	isSPACE((U8) *s)
	1692	);
	1693	break;
	1694	case SPACEA:
	1695	/* Don't need to worry about utf8, as it can match only a single
	1696	* byte invariant character */
	1697	REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
	1698	break;
	1699	case SPACEL:
	1700	REXEC_FBC_CSCAN_TAINT(
	1701	isSPACE_LC_utf8((U8*)s),
	1702	isSPACE_LC(*s)
	1703	);
	1704	break;
	1705	case NSPACEU:
	1706	REXEC_FBC_CSCAN_PRELOAD(
	1707	LOAD_UTF8_CHARCLASS_SPACE(),
	1708	!( s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target)),
	1709	! isSPACE_L1((U8) *s)
	1710	);
	1711	break;
	1712	case NSPACE:
	1713	REXEC_FBC_CSCAN_PRELOAD(
	1714	LOAD_UTF8_CHARCLASS_SPACE(),
	1715	!(s == ' ' \|\| swash_fetch(PL_utf8_space,(U8)s, utf8_target)),
	1716	! isSPACE((U8) *s)
	1717	);
	1718	break;
	1719	case NSPACEA:
	1720	REXEC_FBC_CSCAN(
	1721	!isSPACE_A(*s),
	1722	!isSPACE_A(*s)
	1723	);
	1724	break;
	1725	case NSPACEL:
	1726	REXEC_FBC_CSCAN_TAINT(
	1727	!isSPACE_LC_utf8((U8*)s),
	1728	!isSPACE_LC(*s)
	1729	);
	1730	break;
	1731	case DIGIT:
	1732	REXEC_FBC_CSCAN_PRELOAD(
	1733	LOAD_UTF8_CHARCLASS_DIGIT(),
	1734	swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1735	isDIGIT(*s)
	1736	);
	1737	break;
	1738	case DIGITA:
	1739	/* Don't need to worry about utf8, as it can match only a single
	1740	* byte invariant character */
	1741	REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
	1742	break;
	1743	case DIGITL:
	1744	REXEC_FBC_CSCAN_TAINT(
	1745	isDIGIT_LC_utf8((U8*)s),
	1746	isDIGIT_LC(*s)
	1747	);
	1748	break;
	1749	case NDIGIT:
	1750	REXEC_FBC_CSCAN_PRELOAD(
	1751	LOAD_UTF8_CHARCLASS_DIGIT(),
	1752	!swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
	1753	!isDIGIT(*s)
	1754	);
	1755	break;
	1756	case NDIGITA:
	1757	REXEC_FBC_CSCAN(
	1758	!isDIGIT_A(*s),
	1759	!isDIGIT_A(*s)
	1760	);
	1761	break;
	1762	case NDIGITL:
	1763	REXEC_FBC_CSCAN_TAINT(
	1764	!isDIGIT_LC_utf8((U8*)s),
	1765	!isDIGIT_LC(*s)
	1766	);
	1767	break;
	1768	case LNBREAK:
	1769	REXEC_FBC_CSCAN(
	1770	is_LNBREAK_utf8(s),
	1771	is_LNBREAK_latin1(s)
	1772	);
	1773	break;
	1774	case VERTWS:
	1775	REXEC_FBC_CSCAN(
	1776	is_VERTWS_utf8(s),
	1777	is_VERTWS_latin1(s)
	1778	);
	1779	break;
	1780	case NVERTWS:
	1781	REXEC_FBC_CSCAN(
	1782	!is_VERTWS_utf8(s),
	1783	!is_VERTWS_latin1(s)
	1784	);
	1785	break;
	1786	case HORIZWS:
	1787	REXEC_FBC_CSCAN(
	1788	is_HORIZWS_utf8(s),
	1789	is_HORIZWS_latin1(s)
	1790	);
	1791	break;
	1792	case NHORIZWS:
	1793	REXEC_FBC_CSCAN(
	1794	!is_HORIZWS_utf8(s),
	1795	!is_HORIZWS_latin1(s)
	1796	);
	1797	break;
	1798	case AHOCORASICKC:
	1799	case AHOCORASICK:
	1800	{
	1801	DECL_TRIE_TYPE(c);
	1802	/* what trie are we using right now */
	1803	reg_ac_data *aho
	1804	= (reg_ac_data*)progi->data->data[ ARG( c ) ];
	1805	reg_trie_data *trie
	1806	= (reg_trie_data*)progi->data->data[ aho->trie ];
	1807	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	1808
	1809	const char *last_start = strend - trie->minlen;
	1810	#ifdef DEBUGGING
	1811	const char *real_start = s;
	1812	#endif
	1813	STRLEN maxlen = trie->maxlen;
	1814	SV *sv_points;
	1815	U8 *points; / map of where we were in the input string
	1816	when reading a given char. For ASCII this
	1817	is unnecessary overhead as the relationship
	1818	is always 1:1, but for Unicode, especially
	1819	case folded Unicode this is not true. */
	1820	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1821	U8 *bitmap=NULL;
	1822
	1823
	1824	GET_RE_DEBUG_FLAGS_DECL;
	1825
	1826	/* We can't just allocate points here. We need to wrap it in
	1827	* an SV so it gets freed properly if there is a croak while
	1828	* running the match */
	1829	ENTER;
	1830	SAVETMPS;
	1831	sv_points=newSV(maxlen * sizeof(U8 *));
	1832	SvCUR_set(sv_points,
	1833	maxlen * sizeof(U8 *));
	1834	SvPOK_on(sv_points);
	1835	sv_2mortal(sv_points);
	1836	points=(U8**)SvPV_nolen(sv_points );
	1837	if ( trie_type != trie_utf8_fold
	1838	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	1839	{
	1840	if (trie->bitmap)
	1841	bitmap=(U8*)trie->bitmap;
	1842	else
	1843	bitmap=(U8*)ANYOF_BITMAP(c);
	1844	}
	1845	/* this is the Aho-Corasick algorithm modified a touch
	1846	to include special handling for long "unknown char"
	1847	sequences. The basic idea being that we use AC as long
	1848	as we are dealing with a possible matching char, when
	1849	we encounter an unknown char (and we have not encountered
	1850	an accepting state) we scan forward until we find a legal
	1851	starting char.
	1852	AC matching is basically that of trie matching, except
	1853	that when we encounter a failing transition, we fall back
	1854	to the current states "fail state", and try the current char
	1855	again, a process we repeat until we reach the root state,
	1856	state 1, or a legal transition. If we fail on the root state
	1857	then we can either terminate if we have reached an accepting
	1858	state previously, or restart the entire process from the beginning
	1859	if we have not.
	1860
	1861	*/
	1862	while (s <= last_start) {
	1863	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1864	U8 uc = (U8)s;
	1865	U16 charid = 0;
	1866	U32 base = 1;
	1867	U32 state = 1;
	1868	UV uvc = 0;
	1869	STRLEN len = 0;
	1870	STRLEN foldlen = 0;
	1871	U8 uscan = (U8)NULL;
	1872	U8 *leftmost = NULL;
	1873	#ifdef DEBUGGING
	1874	U32 accepted_word= 0;
	1875	#endif
	1876	U32 pointpos = 0;
	1877
	1878	while ( state && uc <= (U8*)strend ) {
	1879	int failed=0;
	1880	U32 word = aho->states[ state ].wordnum;
	1881
	1882	if( state==1 ) {
	1883	if ( bitmap ) {
	1884	DEBUG_TRIE_EXECUTE_r(
	1885	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1886	dump_exec_pos( (char *)uc, c, strend, real_start,
	1887	(char *)uc, utf8_target );
	1888	PerlIO_printf( Perl_debug_log,
	1889	" Scanning for legal start char...\n");
	1890	}
	1891	);
	1892	if (utf8_target) {
	1893	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1894	uc += UTF8SKIP(uc);
	1895	}
	1896	} else {
	1897	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	1898	uc++;
	1899	}
	1900	}
	1901	s= (char *)uc;
	1902	}
	1903	if (uc >(U8*)last_start) break;
	1904	}
	1905
	1906	if ( word ) {
	1907	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	1908	if (!leftmost \|\| lpos < leftmost) {
	1909	DEBUG_r(accepted_word=word);
	1910	leftmost= lpos;
	1911	}
	1912	if (base==0) break;
	1913
	1914	}
	1915	points[pointpos++ % maxlen]= uc;
	1916	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	1917	uscan, len, uvc, charid, foldlen,
	1918	foldbuf, uniflags);
	1919	DEBUG_TRIE_EXECUTE_r({
	1920	dump_exec_pos( (char *)uc, c, strend, real_start,
	1921	s, utf8_target );
	1922	PerlIO_printf(Perl_debug_log,
	1923	" Charid:%3u CP:%4"UVxf" ",
	1924	charid, uvc);
	1925	});
	1926
	1927	do {
	1928	#ifdef DEBUGGING
	1929	word = aho->states[ state ].wordnum;
	1930	#endif
	1931	base = aho->states[ state ].trans.base;
	1932
	1933	DEBUG_TRIE_EXECUTE_r({
	1934	if (failed)
	1935	dump_exec_pos( (char *)uc, c, strend, real_start,
	1936	s, utf8_target );
	1937	PerlIO_printf( Perl_debug_log,
	1938	"%sState: %4"UVxf", word=%"UVxf,
	1939	failed ? " Fail transition to " : "",
	1940	(UV)state, (UV)word);
	1941	});
	1942	if ( base ) {
	1943	U32 tmp;
	1944	I32 offset;
	1945	if (charid &&
	1946	( ((offset = base + charid
	1947	- 1 - trie->uniquecharcount)) >= 0)
	1948	&& ((U32)offset < trie->lasttrans)
	1949	&& trie->trans[offset].check == state
	1950	&& (tmp=trie->trans[offset].next))
	1951	{
	1952	DEBUG_TRIE_EXECUTE_r(
	1953	PerlIO_printf( Perl_debug_log," - legal\n"));
	1954	state = tmp;
	1955	break;
	1956	}
	1957	else {
	1958	DEBUG_TRIE_EXECUTE_r(
	1959	PerlIO_printf( Perl_debug_log," - fail\n"));
	1960	failed = 1;
	1961	state = aho->fail[state];
	1962	}
	1963	}
	1964	else {
	1965	/* we must be accepting here */
	1966	DEBUG_TRIE_EXECUTE_r(
	1967	PerlIO_printf( Perl_debug_log," - accepting\n"));
	1968	failed = 1;
	1969	break;
	1970	}
	1971	} while(state);
	1972	uc += len;
	1973	if (failed) {
	1974	if (leftmost)
	1975	break;
	1976	if (!state) state = 1;
	1977	}
	1978	}
	1979	if ( aho->states[ state ].wordnum ) {
	1980	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	1981	if (!leftmost \|\| lpos < leftmost) {
	1982	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	1983	leftmost = lpos;
	1984	}
	1985	}
	1986	if (leftmost) {
	1987	s = (char*)leftmost;
	1988	DEBUG_TRIE_EXECUTE_r({
	1989	PerlIO_printf(
	1990	Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	1991	(UV)accepted_word, (IV)(s - real_start)
	1992	);
	1993	});
	1994	if (!reginfo \|\| regtry(reginfo, &s)) {
	1995	FREETMPS;
	1996	LEAVE;
	1997	goto got_it;
	1998	}
	1999	s = HOPc(s,1);
	2000	DEBUG_TRIE_EXECUTE_r({
	2001	PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
	2002	});
	2003	} else {
	2004	DEBUG_TRIE_EXECUTE_r(
	2005	PerlIO_printf( Perl_debug_log,"No match.\n"));
	2006	break;
	2007	}
	2008	}
	2009	FREETMPS;
	2010	LEAVE;
	2011	}
	2012	break;
	2013	default:
	2014	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2015	break;
	2016	}
	2017	return 0;
	2018	got_it:
	2019	return s;
	2020	}
	2021
	2022
	2023	/*
	2024	- regexec_flags - match a regexp against a string
	2025	*/
	2026	I32
	2027	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, register char strend,
	2028	char strbeg, I32 minend, SV sv, void *data, U32 flags)
	2029	/* strend: pointer to null at end of string */
	2030	/* strbeg: real beginning of string */
	2031	/* minend: end of match must be >=minend after stringarg. */
	2032	/* data: May be used for some additional optimizations.
	2033	Currently its only used, with a U32 cast, for transmitting
	2034	the ganch offset when doing a /g match. This will change */
	2035	/* nosave: For optimizations. */
	2036	{
	2037	dVAR;
	2038	struct regexp const prog = (struct regexp )SvANY(rx);
	2039	/register/ char *s;
	2040	register regnode *c;
	2041	/register/ char *startpos = stringarg;
	2042	I32 minlen; /* must match at least this many chars */
	2043	I32 dontbother = 0; /* how many characters not to try at end */
	2044	I32 end_shift = 0; /* Same for the end. / / CC */
	2045	I32 scream_pos = -1; /* Internal iterator of scream. */
	2046	char *scream_olds = NULL;
	2047	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2048	I32 multiline;
	2049	RXi_GET_DECL(prog,progi);
	2050	regmatch_info reginfo; /* create some info to pass to regtry etc */
	2051	regexp_paren_pair *swap = NULL;
	2052	GET_RE_DEBUG_FLAGS_DECL;
	2053
	2054	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2055	PERL_UNUSED_ARG(data);
	2056
	2057	/* Be paranoid... */
	2058	if (prog == NULL \|\| startpos == NULL) {
	2059	Perl_croak(aTHX_ "NULL regexp parameter");
	2060	return 0;
	2061	}
	2062
	2063	multiline = prog->extflags & RXf_PMf_MULTILINE;
	2064	reginfo.prog = rx; /* Yes, sorry that this is confusing. */
	2065
	2066	RX_MATCH_UTF8_set(rx, utf8_target);
	2067	DEBUG_EXECUTE_r(
	2068	debug_start_match(rx, utf8_target, startpos, strend,
	2069	"Matching");
	2070	);
	2071
	2072	minlen = prog->minlen;
	2073
	2074	if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	2075	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	2076	"String too short [regexec_flags]...\n"));
	2077	goto phooey;
	2078	}
	2079
	2080
	2081	/* Check validity of program. */
	2082	if (UCHARAT(progi->program) != REG_MAGIC) {
	2083	Perl_croak(aTHX_ "corrupted regexp program");
	2084	}
	2085
	2086	PL_reg_flags = 0;
	2087	PL_reg_eval_set = 0;
	2088	PL_reg_maxiter = 0;
	2089
	2090	if (RX_UTF8(rx))
	2091	PL_reg_flags \|= RF_utf8;
	2092
	2093	/* Mark beginning of line for ^ and lookbehind. */
	2094	reginfo.bol = startpos; /* XXX not used ??? */
	2095	PL_bostr = strbeg;
	2096	reginfo.sv = sv;
	2097
	2098	/* Mark end of line for $ (and such) */
	2099	PL_regeol = strend;
	2100
	2101	/* see how far we have to get to not match where we matched before */
	2102	reginfo.till = startpos+minend;
	2103
	2104	/* If there is a "must appear" string, look for it. */
	2105	s = startpos;
	2106
	2107	if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
	2108	MAGIC *mg;
	2109	if (flags & REXEC_IGNOREPOS){ /* Means: check only at start */
	2110	reginfo.ganch = startpos + prog->gofs;
	2111	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2112	"GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
	2113	} else if (sv && SvTYPE(sv) >= SVt_PVMG
	2114	&& SvMAGIC(sv)
	2115	&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
	2116	&& mg->mg_len >= 0) {
	2117	reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
	2118	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2119	"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
	2120
	2121	if (prog->extflags & RXf_ANCH_GPOS) {
	2122	if (s > reginfo.ganch)
	2123	goto phooey;
	2124	s = reginfo.ganch - prog->gofs;
	2125	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2126	"GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
	2127	if (s < strbeg)
	2128	goto phooey;
	2129	}
	2130	}
	2131	else if (data) {
	2132	reginfo.ganch = strbeg + PTR2UV(data);
	2133	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2134	"GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
	2135
	2136	} else { /* pos() not defined */
	2137	reginfo.ganch = strbeg;
	2138	DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
	2139	"GPOS: reginfo.ganch = strbeg\n"));
	2140	}
	2141	}
	2142	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	2143	/* We have to be careful. If the previous successful match
	2144	was from this regex we don't want a subsequent partially
	2145	successful match to clobber the old results.
	2146	So when we detect this possibility we add a swap buffer
	2147	to the re, and switch the buffer each match. If we fail
	2148	we switch it back, otherwise we leave it swapped.
	2149	*/
	2150	swap = prog->offs;
	2151	/* do we need a save destructor here for eval dies? */
	2152	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	2153	}
	2154	if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL \|\| prog->check_utf8 != NULL)) {
	2155	re_scream_pos_data d;
	2156
	2157	d.scream_olds = &scream_olds;
	2158	d.scream_pos = &scream_pos;
	2159	s = re_intuit_start(rx, sv, s, strend, flags, &d);
	2160	if (!s) {
	2161	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
	2162	goto phooey; /* not present */
	2163	}
	2164	}
	2165
	2166
	2167
	2168	/* Simplest case: anchored match need be tried only once. */
	2169	/* [unless only anchor is BOL and multiline is set] */
	2170	if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
	2171	if (s == startpos && regtry(&reginfo, &startpos))
	2172	goto got_it;
	2173	else if (multiline \|\| (prog->intflags & PREGf_IMPLICIT)
	2174	\|\| (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
	2175	{
	2176	char *end;
	2177
	2178	if (minlen)
	2179	dontbother = minlen - 1;
	2180	end = HOP3c(strend, -dontbother, strbeg) - 1;
	2181	/* for multiline we only have to try after newlines */
	2182	if (prog->check_substr \|\| prog->check_utf8) {
	2183	/* because of the goto we can not easily reuse the macros for bifurcating the
	2184	unicode/non-unicode match modes here like we do elsewhere - demerphq */
	2185	if (utf8_target) {
	2186	if (s == startpos)
	2187	goto after_try_utf8;
	2188	while (1) {
	2189	if (regtry(&reginfo, &s)) {
	2190	goto got_it;
	2191	}
	2192	after_try_utf8:
	2193	if (s > end) {
	2194	goto phooey;
	2195	}
	2196	if (prog->extflags & RXf_USE_INTUIT) {
	2197	s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
	2198	if (!s) {
	2199	goto phooey;
	2200	}
	2201	}
	2202	else {
	2203	s += UTF8SKIP(s);
	2204	}
	2205	}
	2206	} /* end search for check string in unicode */
	2207	else {
	2208	if (s == startpos) {
	2209	goto after_try_latin;
	2210	}
	2211	while (1) {
	2212	if (regtry(&reginfo, &s)) {
	2213	goto got_it;
	2214	}
	2215	after_try_latin:
	2216	if (s > end) {
	2217	goto phooey;
	2218	}
	2219	if (prog->extflags & RXf_USE_INTUIT) {
	2220	s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
	2221	if (!s) {
	2222	goto phooey;
	2223	}
	2224	}
	2225	else {
	2226	s++;
	2227	}
	2228	}
	2229	} /* end search for check string in latin*/
	2230	} /* end search for check string */
	2231	else { /* search for newline */
	2232	if (s > startpos) {
	2233	/XXX: The s-- is almost definitely wrong here under unicode - demeprhq/
	2234	s--;
	2235	}
	2236	/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
	2237	while (s < end) {
	2238	if (s++ == '\n') { / don't need PL_utf8skip here */
	2239	if (regtry(&reginfo, &s))
	2240	goto got_it;
	2241	}
	2242	}
	2243	} /* end search for newline */
	2244	} /* end anchored/multiline check string search */
	2245	goto phooey;
	2246	} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
	2247	{
	2248	/* the warning about reginfo.ganch being used without initialization
	2249	is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
	2250	and we only enter this block when the same bit is set. */
	2251	char *tmp_s = reginfo.ganch - prog->gofs;
	2252
	2253	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
	2254	goto got_it;
	2255	goto phooey;
	2256	}
	2257
	2258	/* Messy cases: unanchored match. */
	2259	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	2260	/* we have /x+whatever/ */
	2261	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
	2262	char ch;
	2263	#ifdef DEBUGGING
	2264	int did_match = 0;
	2265	#endif
	2266	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2267	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2268	ch = SvPVX_const(utf8_target ? prog->anchored_utf8 : prog->anchored_substr)[0];
	2269
	2270	if (utf8_target) {
	2271	REXEC_FBC_SCAN(
	2272	if (*s == ch) {
	2273	DEBUG_EXECUTE_r( did_match = 1 );
	2274	if (regtry(&reginfo, &s)) goto got_it;
	2275	s += UTF8SKIP(s);
	2276	while (s < strend && *s == ch)
	2277	s += UTF8SKIP(s);
	2278	}
	2279	);
	2280	}
	2281	else {
	2282	REXEC_FBC_SCAN(
	2283	if (*s == ch) {
	2284	DEBUG_EXECUTE_r( did_match = 1 );
	2285	if (regtry(&reginfo, &s)) goto got_it;
	2286	s++;
	2287	while (s < strend && *s == ch)
	2288	s++;
	2289	}
	2290	);
	2291	}
	2292	DEBUG_EXECUTE_r(if (!did_match)
	2293	PerlIO_printf(Perl_debug_log,
	2294	"Did not find anchored character...\n")
	2295	);
	2296	}
	2297	else if (prog->anchored_substr != NULL
	2298	\|\| prog->anchored_utf8 != NULL
	2299	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	2300	&& prog->float_max_offset < strend - s)) {
	2301	SV *must;
	2302	I32 back_max;
	2303	I32 back_min;
	2304	char *last;
	2305	char last1; / Last position checked before */
	2306	#ifdef DEBUGGING
	2307	int did_match = 0;
	2308	#endif
	2309	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	2310	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
	2311	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2312	must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
	2313	back_max = back_min = prog->anchored_offset;
	2314	} else {
	2315	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2316	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2317	must = utf8_target ? prog->float_utf8 : prog->float_substr;
	2318	back_max = prog->float_max_offset;
	2319	back_min = prog->float_min_offset;
	2320	}
	2321
	2322
	2323	if (must == &PL_sv_undef)
	2324	/* could not downgrade utf8 check substring, so must fail */
	2325	goto phooey;
	2326
	2327	if (back_min<0) {
	2328	last = strend;
	2329	} else {
	2330	last = HOP3c(strend, /* Cannot start after this */
	2331	-(I32)(CHR_SVLEN(must)
	2332	- (SvTAIL(must) != 0) + back_min), strbeg);
	2333	}
	2334	if (s > PL_bostr)
	2335	last1 = HOPc(s, -1);
	2336	else
	2337	last1 = s - 1; /* bogus */
	2338
	2339	/* XXXX check_substr already used to find "s", can optimize if
	2340	check_substr==must. */
	2341	scream_pos = -1;
	2342	dontbother = end_shift;
	2343	strend = HOPc(strend, -dontbother);
	2344	while ( (s <= last) &&
	2345	((flags & REXEC_SCREAM) && SvSCREAM(sv)
	2346	? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
	2347	end_shift, &scream_pos, 0))
	2348	: (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
	2349	(unsigned char*)strend, must,
	2350	multiline ? FBMrf_MULTILINE : 0))) ) {
	2351	/* we may be pointing at the wrong string */
	2352	if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
	2353	s = strbeg + (s - SvPVX_const(sv));
	2354	DEBUG_EXECUTE_r( did_match = 1 );
	2355	if (HOPc(s, -back_max) > last1) {
	2356	last1 = HOPc(s, -back_min);
	2357	s = HOPc(s, -back_max);
	2358	}
	2359	else {
	2360	char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
	2361
	2362	last1 = HOPc(s, -back_min);
	2363	s = t;
	2364	}
	2365	if (utf8_target) {
	2366	while (s <= last1) {
	2367	if (regtry(&reginfo, &s))
	2368	goto got_it;
	2369	s += UTF8SKIP(s);
	2370	}
	2371	}
	2372	else {
	2373	while (s <= last1) {
	2374	if (regtry(&reginfo, &s))
	2375	goto got_it;
	2376	s++;
	2377	}
	2378	}
	2379	}
	2380	DEBUG_EXECUTE_r(if (!did_match) {
	2381	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	2382	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	2383	PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
	2384	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	2385	? "anchored" : "floating"),
	2386	quoted, RE_SV_TAIL(must));
	2387	});
	2388	goto phooey;
	2389	}
	2390	else if ( (c = progi->regstclass) ) {
	2391	if (minlen) {
	2392	const OPCODE op = OP(progi->regstclass);
	2393	/* don't bother with what can't match */
	2394	if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
	2395	strend = HOPc(strend, -(minlen - 1));
	2396	}
	2397	DEBUG_EXECUTE_r({
	2398	SV * const prop = sv_newmortal();
	2399	regprop(prog, prop, c);
	2400	{
	2401	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	2402	s,strend-s,60);
	2403	PerlIO_printf(Perl_debug_log,
	2404	"Matching stclass %.*s against %s (%d bytes)\n",
	2405	(int)SvCUR(prop), SvPVX_const(prop),
	2406	quoted, (int)(strend - s));
	2407	}
	2408	});
	2409	if (find_byclass(prog, c, s, strend, &reginfo))
	2410	goto got_it;
	2411	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
	2412	}
	2413	else {
	2414	dontbother = 0;
	2415	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	2416	/* Trim the end. */
	2417	char *last;
	2418	SV* float_real;
	2419
	2420	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	2421	utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
	2422	float_real = utf8_target ? prog->float_utf8 : prog->float_substr;
	2423
	2424	if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) {
	2425	last = screaminstr(sv, float_real, s - strbeg,
	2426	end_shift, &scream_pos, 1); /* last one */
	2427	if (!last)
	2428	last = scream_olds; /* Only one occurrence. */
	2429	/* we may be pointing at the wrong string */
	2430	else if (RXp_MATCH_COPIED(prog))
	2431	s = strbeg + (s - SvPVX_const(sv));
	2432	}
	2433	else {
	2434	STRLEN len;
	2435	const char * const little = SvPV_const(float_real, len);
	2436
	2437	if (SvTAIL(float_real)) {
	2438	if (memEQ(strend - len + 1, little, len - 1))
	2439	last = strend - len + 1;
	2440	else if (!multiline)
	2441	last = memEQ(strend - len, little, len)
	2442	? strend - len : NULL;
	2443	else
	2444	goto find_last;
	2445	} else {
	2446	find_last:
	2447	if (len)
	2448	last = rninstr(s, strend, little, little + len);
	2449	else
	2450	last = strend; /* matching "$" */
	2451	}
	2452	}
	2453	if (last == NULL) {
	2454	DEBUG_EXECUTE_r(
	2455	PerlIO_printf(Perl_debug_log,
	2456	"%sCan't trim the tail, match fails (should not happen)%s\n",
	2457	PL_colors[4], PL_colors[5]));
	2458	goto phooey; /* Should not happen! */
	2459	}
	2460	dontbother = strend - last + prog->float_min_offset;
	2461	}
	2462	if (minlen && (dontbother < minlen))
	2463	dontbother = minlen - 1;
	2464	strend -= dontbother; /* this one's always in bytes! */
	2465	/* We don't know much -- general case. */
	2466	if (utf8_target) {
	2467	for (;;) {
	2468	if (regtry(&reginfo, &s))
	2469	goto got_it;
	2470	if (s >= strend)
	2471	break;
	2472	s += UTF8SKIP(s);
	2473	};
	2474	}
	2475	else {
	2476	do {
	2477	if (regtry(&reginfo, &s))
	2478	goto got_it;
	2479	} while (s++ < strend);
	2480	}
	2481	}
	2482
	2483	/* Failure. */
	2484	goto phooey;
	2485
	2486	got_it:
	2487	Safefree(swap);
	2488	RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
	2489
	2490	if (PL_reg_eval_set)
	2491	restore_pos(aTHX_ prog);
	2492	if (RXp_PAREN_NAMES(prog))
	2493	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	2494
	2495	/* make sure $`, $&, $', and $digit will work later */
	2496	if ( !(flags & REXEC_NOT_FIRST) ) {
	2497	RX_MATCH_COPY_FREE(rx);
	2498	if (flags & REXEC_COPY_STR) {
	2499	const I32 i = PL_regeol - startpos + (stringarg - strbeg);
	2500	#ifdef PERL_OLD_COPY_ON_WRITE
	2501	if ((SvIsCOW(sv)
	2502	\|\| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
	2503	if (DEBUG_C_TEST) {
	2504	PerlIO_printf(Perl_debug_log,
	2505	"Copy on write: regexp capture, type %d\n",
	2506	(int) SvTYPE(sv));
	2507	}
	2508	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2509	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2510	assert (SvPOKp(prog->saved_copy));
	2511	} else
	2512	#endif
	2513	{
	2514	RX_MATCH_COPIED_on(rx);
	2515	s = savepvn(strbeg, i);
	2516	prog->subbeg = s;
	2517	}
	2518	prog->sublen = i;
	2519	}
	2520	else {
	2521	prog->subbeg = strbeg;
	2522	prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
	2523	}
	2524	}
	2525
	2526	return 1;
	2527
	2528	phooey:
	2529	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
	2530	PL_colors[4], PL_colors[5]));
	2531	if (PL_reg_eval_set)
	2532	restore_pos(aTHX_ prog);
	2533	if (swap) {
	2534	/* we failed :-( roll it back */
	2535	Safefree(prog->offs);
	2536	prog->offs = swap;
	2537	}
	2538
	2539	return 0;
	2540	}
	2541
	2542
	2543	/*
	2544	- regtry - try match at specific point
	2545	*/
	2546	STATIC I32 /* 0 failure, 1 success */
	2547	S_regtry(pTHX_ regmatch_info reginfo, char *startpos)
	2548	{
	2549	dVAR;
	2550	CHECKPOINT lastcp;
	2551	REGEXP *const rx = reginfo->prog;
	2552	regexp const prog = (struct regexp )SvANY(rx);
	2553	RXi_GET_DECL(prog,progi);
	2554	GET_RE_DEBUG_FLAGS_DECL;
	2555
	2556	PERL_ARGS_ASSERT_REGTRY;
	2557
	2558	reginfo->cutpoint=NULL;
	2559
	2560	if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
	2561	MAGIC *mg;
	2562
	2563	PL_reg_eval_set = RS_init;
	2564	DEBUG_EXECUTE_r(DEBUG_s(
	2565	PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
	2566	(IV)(PL_stack_sp - PL_stack_base));
	2567	));
	2568	SAVESTACK_CXPOS();
	2569	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
	2570	/* Otherwise OP_NEXTSTATE will free whatever on stack now. */
	2571	SAVETMPS;
	2572	/* Apparently this is not needed, judging by wantarray. */
	2573	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
	2574	cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
	2575
	2576	if (reginfo->sv) {
	2577	/* Make $_ available to executed code. */
	2578	if (reginfo->sv != DEFSV) {
	2579	SAVE_DEFSV;
	2580	DEFSV_set(reginfo->sv);
	2581	}
	2582
	2583	if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
	2584	&& (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
	2585	/* prepare for quick setting of pos */
	2586	#ifdef PERL_OLD_COPY_ON_WRITE
	2587	if (SvIsCOW(reginfo->sv))
	2588	sv_force_normal_flags(reginfo->sv, 0);
	2589	#endif
	2590	mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
	2591	&PL_vtbl_mglob, NULL, 0);
	2592	mg->mg_len = -1;
	2593	}
	2594	PL_reg_magic = mg;
	2595	PL_reg_oldpos = mg->mg_len;
	2596	SAVEDESTRUCTOR_X(restore_pos, prog);
	2597	}
	2598	if (!PL_reg_curpm) {
	2599	Newxz(PL_reg_curpm, 1, PMOP);
	2600	#ifdef USE_ITHREADS
	2601	{
	2602	SV* const repointer = &PL_sv_undef;
	2603	/* this regexp is also owned by the new PL_reg_curpm, which
	2604	will try to free it. */
	2605	av_push(PL_regex_padav, repointer);
	2606	PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
	2607	PL_regex_pad = AvARRAY(PL_regex_padav);
	2608	}
	2609	#endif
	2610	}
	2611	#ifdef USE_ITHREADS
	2612	/* It seems that non-ithreads works both with and without this code.
	2613	So for efficiency reasons it seems best not to have the code
	2614	compiled when it is not needed. */
	2615	/* This is safe against NULLs: */
	2616	ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
	2617	/* PM_reg_curpm owns a reference to this regexp. */
	2618	(void)ReREFCNT_inc(rx);
	2619	#endif
	2620	PM_SETRE(PL_reg_curpm, rx);
	2621	PL_reg_oldcurpm = PL_curpm;
	2622	PL_curpm = PL_reg_curpm;
	2623	if (RXp_MATCH_COPIED(prog)) {
	2624	/* Here is a serious problem: we cannot rewrite subbeg,
	2625	since it may be needed if this match fails. Thus
	2626	$` inside (?{}) could fail... */
	2627	PL_reg_oldsaved = prog->subbeg;
	2628	PL_reg_oldsavedlen = prog->sublen;
	2629	#ifdef PERL_OLD_COPY_ON_WRITE
	2630	PL_nrs = prog->saved_copy;
	2631	#endif
	2632	RXp_MATCH_COPIED_off(prog);
	2633	}
	2634	else
	2635	PL_reg_oldsaved = NULL;
	2636	prog->subbeg = PL_bostr;
	2637	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
	2638	}
	2639	DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
	2640	prog->offs[0].start = *startpos - PL_bostr;
	2641	PL_reginput = *startpos;
	2642	PL_reglastparen = &prog->lastparen;
	2643	PL_reglastcloseparen = &prog->lastcloseparen;
	2644	prog->lastparen = 0;
	2645	prog->lastcloseparen = 0;
	2646	PL_regsize = 0;
	2647	PL_regoffs = prog->offs;
	2648	if (PL_reg_start_tmpl <= prog->nparens) {
	2649	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
	2650	if(PL_reg_start_tmp)
	2651	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2652	else
	2653	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	2654	}
	2655
	2656	/* XXXX What this code is doing here?!!! There should be no need
	2657	to do this again and again, PL_reglastparen should take care of
	2658	this! --ilya*/
	2659
	2660	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	2661	* Actually, the code in regcppop() (which Ilya may be meaning by
	2662	* PL_reglastparen), is not needed at all by the test suite
	2663	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	2664	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	2665	* Meanwhile, this code is needed for the
	2666	* above-mentioned test suite tests to succeed. The common theme
	2667	* on those tests seems to be returning null fields from matches.
	2668	* --jhi updated by dapm */
	2669	#if 1
	2670	if (prog->nparens) {
	2671	regexp_paren_pair *pp = PL_regoffs;
	2672	register I32 i;
	2673	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
	2674	++pp;
	2675	pp->start = -1;
	2676	pp->end = -1;
	2677	}
	2678	}
	2679	#endif
	2680	REGCP_SET(lastcp);
	2681	if (regmatch(reginfo, progi->program + 1)) {
	2682	PL_regoffs[0].end = PL_reginput - PL_bostr;
	2683	return 1;
	2684	}
	2685	if (reginfo->cutpoint)
	2686	*startpos= reginfo->cutpoint;
	2687	REGCP_UNWIND(lastcp);
	2688	return 0;
	2689	}
	2690
	2691
	2692	#define sayYES goto yes
	2693	#define sayNO goto no
	2694	#define sayNO_SILENT goto no_silent
	2695
	2696	/* we dont use STMT_START/END here because it leads to
	2697	"unreachable code" warnings, which are bogus, but distracting. */
	2698	#define CACHEsayNO \
	2699	if (ST.cache_mask) \
	2700	PL_reg_poscache[ST.cache_offset] \|= ST.cache_mask; \
	2701	sayNO
	2702
	2703	/* this is used to determine how far from the left messages like
	2704	'failed...' are printed. It should be set such that messages
	2705	are inline with the regop output that created them.
	2706	*/
	2707	#define REPORT_CODE_OFF 32
	2708
	2709
	2710	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	2711	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	2712
	2713	#define SLAB_FIRST(s) (&(s)->states[0])
	2714	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	2715
	2716	/* grab a new slab and return the first slot in it */
	2717
	2718	STATIC regmatch_state *
	2719	S_push_slab(pTHX)
	2720	{
	2721	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	2722	dMY_CXT;
	2723	#endif
	2724	regmatch_slab *s = PL_regmatch_slab->next;
	2725	if (!s) {
	2726	Newx(s, 1, regmatch_slab);
	2727	s->prev = PL_regmatch_slab;
	2728	s->next = NULL;
	2729	PL_regmatch_slab->next = s;
	2730	}
	2731	PL_regmatch_slab = s;
	2732	return SLAB_FIRST(s);
	2733	}
	2734
	2735
	2736	/* push a new state then goto it */
	2737
	2738	#define PUSH_STATE_GOTO(state, node) \
	2739	scan = node; \
	2740	st->resume_state = state; \
	2741	goto push_state;
	2742
	2743	/* push a new state with success backtracking, then goto it */
	2744
	2745	#define PUSH_YES_STATE_GOTO(state, node) \
	2746	scan = node; \
	2747	st->resume_state = state; \
	2748	goto push_yes_state;
	2749
	2750
	2751
	2752	/*
	2753
	2754	regmatch() - main matching routine
	2755
	2756	This is basically one big switch statement in a loop. We execute an op,
	2757	set 'next' to point the next op, and continue. If we come to a point which
	2758	we may need to backtrack to on failure such as (A\|B\|C), we push a
	2759	backtrack state onto the backtrack stack. On failure, we pop the top
	2760	state, and re-enter the loop at the state indicated. If there are no more
	2761	states to pop, we return failure.
	2762
	2763	Sometimes we also need to backtrack on success; for example /A+/, where
	2764	after successfully matching one A, we need to go back and try to
	2765	match another one; similarly for lookahead assertions: if the assertion
	2766	completes successfully, we backtrack to the state just before the assertion
	2767	and then carry on. In these cases, the pushed state is marked as
	2768	'backtrack on success too'. This marking is in fact done by a chain of
	2769	pointers, each pointing to the previous 'yes' state. On success, we pop to
	2770	the nearest yes state, discarding any intermediate failure-only states.
	2771	Sometimes a yes state is pushed just to force some cleanup code to be
	2772	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	2773	it to free the inner regex.
	2774
	2775	Note that failure backtracking rewinds the cursor position, while
	2776	success backtracking leaves it alone.
	2777
	2778	A pattern is complete when the END op is executed, while a subpattern
	2779	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	2780	ops trigger the "pop to last yes state if any, otherwise return true"
	2781	behaviour.
	2782
	2783	A common convention in this function is to use A and B to refer to the two
	2784	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	2785	the subpattern to be matched possibly multiple times, while B is the entire
	2786	rest of the pattern. Variable and state names reflect this convention.
	2787
	2788	The states in the main switch are the union of ops and failure/success of
	2789	substates associated with with that op. For example, IFMATCH is the op
	2790	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	2791	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	2792	successfully matched A and IFMATCH_A_fail is a state saying that we have
	2793	just failed to match A. Resume states always come in pairs. The backtrack
	2794	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	2795	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	2796	on success or failure.
	2797
	2798	The struct that holds a backtracking state is actually a big union, with
	2799	one variant for each major type of op. The variable st points to the
	2800	top-most backtrack struct. To make the code clearer, within each
	2801	block of code we #define ST to alias the relevant union.
	2802
	2803	Here's a concrete example of a (vastly oversimplified) IFMATCH
	2804	implementation:
	2805
	2806	switch (state) {
	2807	....
	2808
	2809	#define ST st->u.ifmatch
	2810
	2811	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2812	ST.foo = ...; // some state we wish to save
	2813	...
	2814	// push a yes backtrack state with a resume value of
	2815	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	2816	// first node of A:
	2817	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
	2818	// NOTREACHED
	2819
	2820	case IFMATCH_A: // we have successfully executed A; now continue with B
	2821	next = B;
	2822	bar = ST.foo; // do something with the preserved value
	2823	break;
	2824
	2825	case IFMATCH_A_fail: // A failed, so the assertion failed
	2826	...; // do some housekeeping, then ...
	2827	sayNO; // propagate the failure
	2828
	2829	#undef ST
	2830
	2831	...
	2832	}
	2833
	2834	For any old-timers reading this who are familiar with the old recursive
	2835	approach, the code above is equivalent to:
	2836
	2837	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	2838	{
	2839	int foo = ...
	2840	...
	2841	if (regmatch(A)) {
	2842	next = B;
	2843	bar = foo;
	2844	break;
	2845	}
	2846	...; // do some housekeeping, then ...
	2847	sayNO; // propagate the failure
	2848	}
	2849
	2850	The topmost backtrack state, pointed to by st, is usually free. If you
	2851	want to claim it, populate any ST.foo fields in it with values you wish to
	2852	save, then do one of
	2853
	2854	PUSH_STATE_GOTO(resume_state, node);
	2855	PUSH_YES_STATE_GOTO(resume_state, node);
	2856
	2857	which sets that backtrack state's resume value to 'resume_state', pushes a
	2858	new free entry to the top of the backtrack stack, then goes to 'node'.
	2859	On backtracking, the free slot is popped, and the saved state becomes the
	2860	new free state. An ST.foo field in this new top state can be temporarily
	2861	accessed to retrieve values, but once the main loop is re-entered, it
	2862	becomes available for reuse.
	2863
	2864	Note that the depth of the backtrack stack constantly increases during the
	2865	left-to-right execution of the pattern, rather than going up and down with
	2866	the pattern nesting. For example the stack is at its maximum at Z at the
	2867	end of the pattern, rather than at X in the following:
	2868
	2869	/(((X)+)+)+....(Y)+....Z/
	2870
	2871	The only exceptions to this are lookahead/behind assertions and the cut,
	2872	(?>A), which pop all the backtrack states associated with A before
	2873	continuing.
	2874
	2875	Backtrack state structs are allocated in slabs of about 4K in size.
	2876	PL_regmatch_state and st always point to the currently active state,
	2877	and PL_regmatch_slab points to the slab currently containing
	2878	PL_regmatch_state. The first time regmatch() is called, the first slab is
	2879	allocated, and is never freed until interpreter destruction. When the slab
	2880	is full, a new one is allocated and chained to the end. At exit from
	2881	regmatch(), slabs allocated since entry are freed.
	2882
	2883	*/
	2884
	2885
	2886	#define DEBUG_STATE_pp(pp) \
	2887	DEBUG_STATE_r({ \
	2888	DUMP_EXEC_POS(locinput, scan, utf8_target); \
	2889	PerlIO_printf(Perl_debug_log, \
	2890	" %*s"pp" %s%s%s%s%s\n", \
	2891	depth*2, "", \
	2892	PL_reg_name[st->resume_state], \
	2893	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	2894	((st==yes_state) ? "Y" : ""), \
	2895	((st==mark_state) ? "M" : ""), \
	2896	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	2897	); \
	2898	});
	2899
	2900
	2901	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	2902
	2903	#ifdef DEBUGGING
	2904
	2905	STATIC void
	2906	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	2907	const char start, const char end, const char *blurb)
	2908	{
	2909	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	2910
	2911	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	2912
	2913	if (!PL_colorset)
	2914	reginitcolors();
	2915	{
	2916	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	2917	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	2918
	2919	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	2920	start, end - start, 60);
	2921
	2922	PerlIO_printf(Perl_debug_log,
	2923	"%s%s REx%s %s against %s\n",
	2924	PL_colors[4], blurb, PL_colors[5], s0, s1);
	2925
	2926	if (utf8_target\|\|utf8_pat)
	2927	PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
	2928	utf8_pat ? "pattern" : "",
	2929	utf8_pat && utf8_target ? " and " : "",
	2930	utf8_target ? "string" : ""
	2931	);
	2932	}
	2933	}
	2934
	2935	STATIC void
	2936	S_dump_exec_pos(pTHX_ const char *locinput,
	2937	const regnode *scan,
	2938	const char *loc_regeol,
	2939	const char *loc_bostr,
	2940	const char *loc_reg_starttry,
	2941	const bool utf8_target)
	2942	{
	2943	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	2944	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	2945	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	2946	/* The part of the string before starttry has one color
	2947	(pref0_len chars), between starttry and current
	2948	position another one (pref_len - pref0_len chars),
	2949	after the current position the third one.
	2950	We assume that pref0_len <= pref_len, otherwise we
	2951	decrease pref0_len. */
	2952	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	2953	? (5 + taill) - l : locinput - loc_bostr;
	2954	int pref0_len;
	2955
	2956	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	2957
	2958	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	2959	pref_len++;
	2960	pref0_len = pref_len - (locinput - loc_reg_starttry);
	2961	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	2962	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	2963	? (5 + taill) - pref_len : loc_regeol - locinput);
	2964	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	2965	l--;
	2966	if (pref0_len < 0)
	2967	pref0_len = 0;
	2968	if (pref0_len > pref_len)
	2969	pref0_len = pref_len;
	2970	{
	2971	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
	2972
	2973	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	2974	(locinput - pref_len),pref0_len, 60, 4, 5);
	2975
	2976	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	2977	(locinput - pref_len + pref0_len),
	2978	pref_len - pref0_len, 60, 2, 3);
	2979
	2980	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	2981	locinput, loc_regeol - locinput, 10, 0, 1);
	2982
	2983	const STRLEN tlen=len0+len1+len2;
	2984	PerlIO_printf(Perl_debug_log,
	2985	"%4"IVdf" <%.s%.s%s%.s>%s\|",
	2986	(IV)(locinput - loc_bostr),
	2987	len0, s0,
	2988	len1, s1,
	2989	(docolor ? "" : "> <"),
	2990	len2, s2,
	2991	(int)(tlen > 19 ? 0 : 19 - tlen),
	2992	"");
	2993	}
	2994	}
	2995
	2996	#endif
	2997
	2998	/* reg_check_named_buff_matched()
	2999	* Checks to see if a named buffer has matched. The data array of
	3000	* buffer numbers corresponding to the buffer is expected to reside
	3001	* in the regexp->data->data array in the slot stored in the ARG() of
	3002	* node involved. Note that this routine doesn't actually care about the
	3003	* name, that information is not preserved from compilation to execution.
	3004	* Returns the index of the leftmost defined buffer with the given name
	3005	* or 0 if non of the buffers matched.
	3006	*/
	3007	STATIC I32
	3008	S_reg_check_named_buff_matched(pTHX_ const regexp rex, const regnode scan)
	3009	{
	3010	I32 n;
	3011	RXi_GET_DECL(rex,rexi);
	3012	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3013	I32 nums=(I32)SvPVX(sv_dat);
	3014
	3015	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3016
	3017	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3018	if ((I32)*PL_reglastparen >= nums[n] &&
	3019	PL_regoffs[nums[n]].end != -1)
	3020	{
	3021	return nums[n];
	3022	}
	3023	}
	3024	return 0;
	3025	}
	3026
	3027
	3028	/* free all slabs above current one - called during LEAVE_SCOPE */
	3029
	3030	STATIC void
	3031	S_clear_backtrack_stack(pTHX_ void *p)
	3032	{
	3033	regmatch_slab *s = PL_regmatch_slab->next;
	3034	PERL_UNUSED_ARG(p);
	3035
	3036	if (!s)
	3037	return;
	3038	PL_regmatch_slab->next = NULL;
	3039	while (s) {
	3040	regmatch_slab * const osl = s;
	3041	s = s->next;
	3042	Safefree(osl);
	3043	}
	3044	}
	3045
	3046
	3047	#define SETREX(Re1,Re2) \
	3048	if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
	3049	Re1 = (Re2)
	3050
	3051	STATIC I32 /* 0 failure, 1 success */
	3052	S_regmatch(pTHX_ regmatch_info reginfo, regnode prog)
	3053	{
	3054	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3055	dMY_CXT;
	3056	#endif
	3057	dVAR;
	3058	register const bool utf8_target = PL_reg_match_utf8;
	3059	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	3060	REGEXP *rex_sv = reginfo->prog;
	3061	regexp rex = (struct regexp )SvANY(rex_sv);
	3062	RXi_GET_DECL(rex,rexi);
	3063	I32 oldsave;
	3064	/* the current state. This is a cached copy of PL_regmatch_state */
	3065	register regmatch_state *st;
	3066	/* cache heavy used fields of st in registers */
	3067	register regnode *scan;
	3068	register regnode *next;
	3069	register U32 n = 0; /* general value; init to avoid compiler warning */
	3070	register I32 ln = 0; /* len or last; init to avoid compiler warning */
	3071	register char *locinput = PL_reginput;
	3072	register I32 nextchr; /* is always set to UCHARAT(locinput) */
	3073
	3074	bool result = 0; /* return value of S_regmatch */
	3075	int depth = 0; /* depth of backtrack stack */
	3076	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	3077	const U32 max_nochange_depth =
	3078	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	3079	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	3080	regmatch_state yes_state = NULL; / state to pop to on success of
	3081	subpattern */
	3082	/* mark_state piggy backs on the yes_state logic so that when we unwind
	3083	the stack on success we can update the mark_state as we go */
	3084	regmatch_state mark_state = NULL; / last mark state we have seen */
	3085	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	3086	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	3087	U32 state_num;
	3088	bool no_final = 0; /* prevent failure from backtracking? */
	3089	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	3090	char *startpoint = PL_reginput;
	3091	SV popmark = NULL; / are we looking for a mark? */
	3092	SV sv_commit = NULL; / last mark name seen in failure */
	3093	SV sv_yes_mark = NULL; / last mark name we have seen
	3094	during a successful match */
	3095	U32 lastopen = 0; /* last open we saw */
	3096	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	3097	SV* const oreplsv = GvSV(PL_replgv);
	3098	/* these three flags are set by various ops to signal information to
	3099	* the very next op. They have a useful lifetime of exactly one loop
	3100	* iteration, and are not preserved or restored by state pushes/pops
	3101	*/
	3102	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	3103	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	3104	int logical = 0; /* the following EVAL is:
	3105	0: (?{...})
	3106	1: (?(?{...})X\|Y)
	3107	2: (??{...})
	3108	or the following IFMATCH/UNLESSM is:
	3109	false: plain (?=foo)
	3110	true: used as a condition: (?(?=foo))
	3111	*/
	3112	#ifdef DEBUGGING
	3113	GET_RE_DEBUG_FLAGS_DECL;
	3114	#endif
	3115
	3116	PERL_ARGS_ASSERT_REGMATCH;
	3117
	3118	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	3119	PerlIO_printf(Perl_debug_log,"regmatch start\n");
	3120	}));
	3121	/* on first ever call to regmatch, allocate first slab */
	3122	if (!PL_regmatch_slab) {
	3123	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3124	PL_regmatch_slab->prev = NULL;
	3125	PL_regmatch_slab->next = NULL;
	3126	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3127	}
	3128
	3129	oldsave = PL_savestack_ix;
	3130	SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
	3131	SAVEVPTR(PL_regmatch_slab);
	3132	SAVEVPTR(PL_regmatch_state);
	3133
	3134	/* grab next free state slot */
	3135	st = ++PL_regmatch_state;
	3136	if (st > SLAB_LAST(PL_regmatch_slab))
	3137	st = PL_regmatch_state = S_push_slab(aTHX);
	3138
	3139	/* Note that nextchr is a byte even in UTF */
	3140	nextchr = UCHARAT(locinput);
	3141	scan = prog;
	3142	while (scan != NULL) {
	3143
	3144	DEBUG_EXECUTE_r( {
	3145	SV * const prop = sv_newmortal();
	3146	regnode *rnext=regnext(scan);
	3147	DUMP_EXEC_POS( locinput, scan, utf8_target );
	3148	regprop(rex, prop, scan);
	3149
	3150	PerlIO_printf(Perl_debug_log,
	3151	"%3"IVdf":%*s%s(%"IVdf")\n",
	3152	(IV)(scan - rexi->program), depth*2, "",
	3153	SvPVX_const(prop),
	3154	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	3155	0 : (IV)(rnext - rexi->program));
	3156	});
	3157
	3158	next = scan + NEXT_OFF(scan);
	3159	if (next == scan)
	3160	next = NULL;
	3161	state_num = OP(scan);
	3162
	3163	reenter_switch:
	3164
	3165	assert(PL_reglastparen == &rex->lastparen);
	3166	assert(PL_reglastcloseparen == &rex->lastcloseparen);
	3167	assert(PL_regoffs == rex->offs);
	3168
	3169	switch (state_num) {
	3170	case BOL:
	3171	if (locinput == PL_bostr)
	3172	{
	3173	/* reginfo->till = reginfo->bol; */
	3174	break;
	3175	}
	3176	sayNO;
	3177	case MBOL:
	3178	if (locinput == PL_bostr \|\|
	3179	((nextchr \|\| locinput < PL_regeol) && locinput[-1] == '\n'))
	3180	{
	3181	break;
	3182	}
	3183	sayNO;
	3184	case SBOL:
	3185	if (locinput == PL_bostr)
	3186	break;
	3187	sayNO;
	3188	case GPOS:
	3189	if (locinput == reginfo->ganch)
	3190	break;
	3191	sayNO;
	3192
	3193	case KEEPS:
	3194	/* update the startpoint */
	3195	st->u.keeper.val = PL_regoffs[0].start;
	3196	PL_reginput = locinput;
	3197	PL_regoffs[0].start = locinput - PL_bostr;
	3198	PUSH_STATE_GOTO(KEEPS_next, next);
	3199	/NOT-REACHED/
	3200	case KEEPS_next_fail:
	3201	/* rollback the start point change */
	3202	PL_regoffs[0].start = st->u.keeper.val;
	3203	sayNO_SILENT;
	3204	/NOT-REACHED/
	3205	case EOL:
	3206	goto seol;
	3207	case MEOL:
	3208	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3209	sayNO;
	3210	break;
	3211	case SEOL:
	3212	seol:
	3213	if ((nextchr \|\| locinput < PL_regeol) && nextchr != '\n')
	3214	sayNO;
	3215	if (PL_regeol - locinput > 1)
	3216	sayNO;
	3217	break;
	3218	case EOS:
	3219	if (PL_regeol != locinput)
	3220	sayNO;
	3221	break;
	3222	case SANY:
	3223	if (!nextchr && locinput >= PL_regeol)
	3224	sayNO;
	3225	if (utf8_target) {
	3226	locinput += PL_utf8skip[nextchr];
	3227	if (locinput > PL_regeol)
	3228	sayNO;
	3229	nextchr = UCHARAT(locinput);
	3230	}
	3231	else
	3232	nextchr = UCHARAT(++locinput);
	3233	break;
	3234	case CANY:
	3235	if (!nextchr && locinput >= PL_regeol)
	3236	sayNO;
	3237	nextchr = UCHARAT(++locinput);
	3238	break;
	3239	case REG_ANY:
	3240	if ((!nextchr && locinput >= PL_regeol) \|\| nextchr == '\n')
	3241	sayNO;
	3242	if (utf8_target) {
	3243	locinput += PL_utf8skip[nextchr];
	3244	if (locinput > PL_regeol)
	3245	sayNO;
	3246	nextchr = UCHARAT(locinput);
	3247	}
	3248	else
	3249	nextchr = UCHARAT(++locinput);
	3250	break;
	3251
	3252	#undef ST
	3253	#define ST st->u.trie
	3254	case TRIEC:
	3255	/* In this case the charclass data is available inline so
	3256	we can fail fast without a lot of extra overhead.
	3257	*/
	3258	if (scan->flags == EXACT \|\| !utf8_target) {
	3259	if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
	3260	DEBUG_EXECUTE_r(
	3261	PerlIO_printf(Perl_debug_log,
	3262	"%*s %sfailed to match trie start class...%s\n",
	3263	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3264	);
	3265	sayNO_SILENT;
	3266	/* NOTREACHED */
	3267	}
	3268	}
	3269	/* FALL THROUGH */
	3270	case TRIE:
	3271	/* the basic plan of execution of the trie is:
	3272	* At the beginning, run though all the states, and
	3273	* find the longest-matching word. Also remember the position
	3274	* of the shortest matching word. For example, this pattern:
	3275	* 1 2 3 4 5
	3276	* ab\|a\|x\|abcd\|abc
	3277	* when matched against the string "abcde", will generate
	3278	* accept states for all words except 3, with the longest
	3279	* matching word being 4, and the shortest being 1 (with
	3280	* the position being after char 1 of the string).
	3281	*
	3282	* Then for each matching word, in word order (i.e. 1,2,4,5),
	3283	* we run the remainder of the pattern; on each try setting
	3284	* the current position to the character following the word,
	3285	* returning to try the next word on failure.
	3286	*
	3287	* We avoid having to build a list of words at runtime by
	3288	* using a compile-time structure, wordinfo[].prev, which
	3289	* gives, for each word, the previous accepting word (if any).
	3290	* In the case above it would contain the mappings 1->2, 2->0,
	3291	* 3->0, 4->5, 5->1. We can use this table to generate, from
	3292	* the longest word (4 above), a list of all words, by
	3293	* following the list of prev pointers; this gives us the
	3294	* unordered list 4,5,1,2. Then given the current word we have
	3295	* just tried, we can go through the list and find the
	3296	* next-biggest word to try (so if we just failed on word 2,
	3297	* the next in the list is 4).
	3298	*
	3299	* Since at runtime we don't record the matching position in
	3300	* the string for each word, we have to work that out for
	3301	* each word we're about to process. The wordinfo table holds
	3302	* the character length of each word; given that we recorded
	3303	* at the start: the position of the shortest word and its
	3304	* length in chars, we just need to move the pointer the
	3305	* difference between the two char lengths. Depending on
	3306	* Unicode status and folding, that's cheap or expensive.
	3307	*
	3308	* This algorithm is optimised for the case where are only a
	3309	* small number of accept states, i.e. 0,1, or maybe 2.
	3310	* With lots of accepts states, and having to try all of them,
	3311	* it becomes quadratic on number of accept states to find all
	3312	* the next words.
	3313	*/
	3314
	3315	{
	3316	/* what type of TRIE am I? (utf8 makes this contextual) */
	3317	DECL_TRIE_TYPE(scan);
	3318
	3319	/* what trie are we using right now */
	3320	reg_trie_data * const trie
	3321	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	3322	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	3323	U32 state = trie->startstate;
	3324
	3325	if (trie->bitmap && trie_type != trie_utf8_fold &&
	3326	!TRIE_BITMAP_TEST(trie,*locinput)
	3327	) {
	3328	if (trie->states[ state ].wordnum) {
	3329	DEBUG_EXECUTE_r(
	3330	PerlIO_printf(Perl_debug_log,
	3331	"%*s %smatched empty string...%s\n",
	3332	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3333	);
	3334	if (!trie->jump)
	3335	break;
	3336	} else {
	3337	DEBUG_EXECUTE_r(
	3338	PerlIO_printf(Perl_debug_log,
	3339	"%*s %sfailed to match trie start class...%s\n",
	3340	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
	3341	);
	3342	sayNO_SILENT;
	3343	}
	3344	}
	3345
	3346	{
	3347	U8 uc = ( U8 )locinput;
	3348
	3349	STRLEN len = 0;
	3350	STRLEN foldlen = 0;
	3351	U8 uscan = (U8)NULL;
	3352	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	3353	U32 charcount = 0; /* how many input chars we have matched */
	3354	U32 accepted = 0; /* have we seen any accepting states? */
	3355
	3356	ST.B = next;
	3357	ST.jump = trie->jump;
	3358	ST.me = scan;
	3359	ST.firstpos = NULL;
	3360	ST.longfold = FALSE; /* char longer if folded => it's harder */
	3361	ST.nextword = 0;
	3362
	3363	/* fully traverse the TRIE; note the position of the
	3364	shortest accept state and the wordnum of the longest
	3365	accept state */
	3366
	3367	while ( state && uc <= (U8*)PL_regeol ) {
	3368	U32 base = trie->states[ state ].trans.base;
	3369	UV uvc = 0;
	3370	U16 charid = 0;
	3371	U16 wordnum;
	3372	wordnum = trie->states[ state ].wordnum;
	3373
	3374	if (wordnum) { /* it's an accept state */
	3375	if (!accepted) {
	3376	accepted = 1;
	3377	/* record first match position */
	3378	if (ST.longfold) {
	3379	ST.firstpos = (U8*)locinput;
	3380	ST.firstchars = 0;
	3381	}
	3382	else {
	3383	ST.firstpos = uc;
	3384	ST.firstchars = charcount;
	3385	}
	3386	}
	3387	if (!ST.nextword \|\| wordnum < ST.nextword)
	3388	ST.nextword = wordnum;
	3389	ST.topword = wordnum;
	3390	}
	3391
	3392	DEBUG_TRIE_EXECUTE_r({
	3393	DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
	3394	PerlIO_printf( Perl_debug_log,
	3395	"%*s %sState: %4"UVxf" Accepted: %c ",
	3396	2+depth * 2, "", PL_colors[4],
	3397	(UV)state, (accepted ? 'Y' : 'N'));
	3398	});
	3399
	3400	/* read a char and goto next state */
	3401	if ( base ) {
	3402	I32 offset;
	3403	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	3404	uscan, len, uvc, charid, foldlen,
	3405	foldbuf, uniflags);
	3406	charcount++;
	3407	if (foldlen>0)
	3408	ST.longfold = TRUE;
	3409	if (charid &&
	3410	( ((offset =
	3411	base + charid - 1 - trie->uniquecharcount)) >= 0)
	3412
	3413	&& ((U32)offset < trie->lasttrans)
	3414	&& trie->trans[offset].check == state)
	3415	{
	3416	state = trie->trans[offset].next;
	3417	}
	3418	else {
	3419	state = 0;
	3420	}
	3421	uc += len;
	3422
	3423	}
	3424	else {
	3425	state = 0;
	3426	}
	3427	DEBUG_TRIE_EXECUTE_r(
	3428	PerlIO_printf( Perl_debug_log,
	3429	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	3430	charid, uvc, (UV)state, PL_colors[5] );
	3431	);
	3432	}
	3433	if (!accepted)
	3434	sayNO;
	3435
	3436	/* calculate total number of accept states */
	3437	{
	3438	U16 w = ST.topword;
	3439	accepted = 0;
	3440	while (w) {
	3441	w = trie->wordinfo[w].prev;
	3442	accepted++;
	3443	}
	3444	ST.accepted = accepted;
	3445	}
	3446
	3447	DEBUG_EXECUTE_r(
	3448	PerlIO_printf( Perl_debug_log,
	3449	"%*s %sgot %"IVdf" possible matches%s\n",
	3450	REPORT_CODE_OFF + depth * 2, "",
	3451	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	3452	);
	3453	goto trie_first_try; /* jump into the fail handler */
	3454	}}
	3455	/* NOTREACHED */
	3456
	3457	case TRIE_next_fail: /* we failed - try next alternative */
	3458	if ( ST.jump) {
	3459	REGCP_UNWIND(ST.cp);
	3460	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	3461	PL_regoffs[n].end = -1;
	3462	*PL_reglastparen = n;
	3463	}
	3464	if (!--ST.accepted) {
	3465	DEBUG_EXECUTE_r({
	3466	PerlIO_printf( Perl_debug_log,
	3467	"%*s %sTRIE failed...%s\n",
	3468	REPORT_CODE_OFF+depth*2, "",
	3469	PL_colors[4],
	3470	PL_colors[5] );
	3471	});
	3472	sayNO_SILENT;
	3473	}
	3474	{
	3475	/* Find next-highest word to process. Note that this code
	3476	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	3477	register U16 min = 0;
	3478	register U16 word;
	3479	register U16 const nextword = ST.nextword;
	3480	register reg_trie_wordinfo * const wordinfo
	3481	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	3482	for (word=ST.topword; word; word=wordinfo[word].prev) {
	3483	if (word > nextword && (!min \|\| word < min))
	3484	min = word;
	3485	}
	3486	ST.nextword = min;
	3487	}
	3488
	3489	trie_first_try:
	3490	if (do_cutgroup) {
	3491	do_cutgroup = 0;
	3492	no_final = 0;
	3493	}
	3494
	3495	if ( ST.jump) {
	3496	ST.lastparen = *PL_reglastparen;
	3497	REGCP_SET(ST.cp);
	3498	}
	3499
	3500	/* find start char of end of current word */
	3501	{
	3502	U32 chars; /* how many chars to skip */
	3503	U8 *uc = ST.firstpos;
	3504	reg_trie_data * const trie
	3505	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	3506
	3507	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3508	>= ST.firstchars);
	3509	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	3510	- ST.firstchars;
	3511
	3512	if (ST.longfold) {
	3513	/* the hard option - fold each char in turn and find
	3514	* its folded length (which may be different */
	3515	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	3516	STRLEN foldlen;
	3517	STRLEN len;
	3518	UV uvc;
	3519	U8 *uscan;
	3520
	3521	while (chars) {
	3522	if (utf8_target) {
	3523	uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
	3524	uniflags);
	3525	uc += len;
	3526	}
	3527	else {
	3528	uvc = *uc;
	3529	uc++;
	3530	}
	3531	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	3532	uscan = foldbuf;
	3533	while (foldlen) {
	3534	if (!--chars)
	3535	break;
	3536	uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
	3537	uniflags);
	3538	uscan += len;
	3539	foldlen -= len;
	3540	}
	3541	}
	3542	}
	3543	else {
	3544	if (utf8_target)
	3545	while (chars--)
	3546	uc += UTF8SKIP(uc);
	3547	else
	3548	uc += chars;
	3549	}
	3550	PL_reginput = (char *)uc;
	3551	}
	3552
	3553	scan = (ST.jump && ST.jump[ST.nextword])
	3554	? ST.me + ST.jump[ST.nextword]
	3555	: ST.B;
	3556
	3557	DEBUG_EXECUTE_r({
	3558	PerlIO_printf( Perl_debug_log,
	3559	"%*s %sTRIE matched word #%d, continuing%s\n",
	3560	REPORT_CODE_OFF+depth*2, "",
	3561	PL_colors[4],
	3562	ST.nextword,
	3563	PL_colors[5]
	3564	);
	3565	});
	3566
	3567	if (ST.accepted > 1 \|\| has_cutgroup) {
	3568	PUSH_STATE_GOTO(TRIE_next, scan);
	3569	/* NOTREACHED */
	3570	}
	3571	/* only one choice left - just continue */
	3572	DEBUG_EXECUTE_r({
	3573	AV *const trie_words
	3574	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	3575	SV ** const tmp = av_fetch( trie_words,
	3576	ST.nextword-1, 0 );
	3577	SV *sv= tmp ? sv_newmortal() : NULL;
	3578
	3579	PerlIO_printf( Perl_debug_log,
	3580	"%*s %sonly one match left, short-circuiting: #%d <%s>%s\n",
	3581	REPORT_CODE_OFF+depth*2, "", PL_colors[4],
	3582	ST.nextword,
	3583	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	3584	PL_colors[0], PL_colors[1],
	3585	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	3586	)
	3587	: "not compiled under -Dr",
	3588	PL_colors[5] );
	3589	});
	3590
	3591	locinput = PL_reginput;
	3592	nextchr = UCHARAT(locinput);
	3593	continue; /* execute rest of RE */
	3594	/* NOTREACHED */
	3595	#undef ST
	3596
	3597	case EXACT: {
	3598	char *s = STRING(scan);
	3599	ln = STR_LEN(scan);
	3600	if (utf8_target != UTF_PATTERN) {
	3601	/* The target and the pattern have differing utf8ness. */
	3602	char *l = locinput;
	3603	const char * const e = s + ln;
	3604
	3605	if (utf8_target) {
	3606	/* The target is utf8, the pattern is not utf8. */
	3607	while (s < e) {
	3608	STRLEN ulen;
	3609	if (l >= PL_regeol)
	3610	sayNO;
	3611	if (NATIVE_TO_UNI((U8)s) !=
	3612	utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
	3613	uniflags))
	3614	sayNO;
	3615	l += ulen;
	3616	s ++;
	3617	}
	3618	}
	3619	else {
	3620	/* The target is not utf8, the pattern is utf8. */
	3621	while (s < e) {
	3622	STRLEN ulen;
	3623	if (l >= PL_regeol)
	3624	sayNO;
	3625	if (NATIVE_TO_UNI(((U8)l)) !=
	3626	utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
	3627	uniflags))
	3628	sayNO;
	3629	s += ulen;
	3630	l ++;
	3631	}
	3632	}
	3633	locinput = l;
	3634	nextchr = UCHARAT(locinput);
	3635	break;
	3636	}
	3637	/* The target and the pattern have the same utf8ness. */
	3638	/* Inline the first character, for speed. */
	3639	if (UCHARAT(s) != nextchr)
	3640	sayNO;
	3641	if (PL_regeol - locinput < ln)
	3642	sayNO;
	3643	if (ln > 1 && memNE(s, locinput, ln))
	3644	sayNO;
	3645	locinput += ln;
	3646	nextchr = UCHARAT(locinput);
	3647	break;
	3648	}
	3649	case EXACTFL: {
	3650	re_fold_t folder;
	3651	const U8 * fold_array;
	3652	const char * s;
	3653	U32 fold_utf8_flags;
	3654
	3655	PL_reg_flags \|= RF_tainted;
	3656	folder = foldEQ_locale;
	3657	fold_array = PL_fold_locale;
	3658	fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
	3659	goto do_exactf;
	3660
	3661	case EXACTFU:
	3662	folder = foldEQ_latin1;
	3663	fold_array = PL_fold_latin1;
	3664	fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	3665	goto do_exactf;
	3666
	3667	case EXACTFA:
	3668	folder = foldEQ_latin1;
	3669	fold_array = PL_fold_latin1;
	3670	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	3671	goto do_exactf;
	3672
	3673	case EXACTF:
	3674	folder = foldEQ;
	3675	fold_array = PL_fold;
	3676	fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	3677
	3678	do_exactf:
	3679	s = STRING(scan);
	3680	ln = STR_LEN(scan);
	3681
	3682	if (utf8_target \|\| UTF_PATTERN) {
	3683	/* Either target or the pattern are utf8. */
	3684	const char * const l = locinput;
	3685	char *e = PL_regeol;
	3686
	3687	if (! foldEQ_utf8_flags(s, 0, ln, cBOOL(UTF_PATTERN),
	3688	l, &e, 0, utf8_target, fold_utf8_flags))
	3689	{
	3690	sayNO;
	3691	}
	3692	locinput = e;
	3693	nextchr = UCHARAT(locinput);
	3694	break;
	3695	}
	3696
	3697	/* Neither the target nor the pattern are utf8 */
	3698	if (UCHARAT(s) != nextchr &&
	3699	UCHARAT(s) != fold_array[nextchr])
	3700	{
	3701	sayNO;
	3702	}
	3703	if (PL_regeol - locinput < ln)
	3704	sayNO;
	3705	if (ln > 1 && ! folder(s, locinput, ln))
	3706	sayNO;
	3707	locinput += ln;
	3708	nextchr = UCHARAT(locinput);
	3709	break;
	3710	}
	3711
	3712	/* XXX Could improve efficiency by separating these all out using a
	3713	* macro or in-line function. At that point regcomp.c would no longer
	3714	* have to set the FLAGS fields of these */
	3715	case BOUNDL:
	3716	case NBOUNDL:
	3717	PL_reg_flags \|= RF_tainted;
	3718	/* FALL THROUGH */
	3719	case BOUND:
	3720	case BOUNDU:
	3721	case BOUNDA:
	3722	case NBOUND:
	3723	case NBOUNDU:
	3724	case NBOUNDA:
	3725	/* was last char in word? */
	3726	if (utf8_target
	3727	&& FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
	3728	&& FLAGS(scan) != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
	3729	{
	3730	if (locinput == PL_bostr)
	3731	ln = '\n';
	3732	else {
	3733	const U8 * const r = reghop3((U8)locinput, -1, (U8)PL_bostr);
	3734
	3735	ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
	3736	}
	3737	if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
	3738	ln = isALNUM_uni(ln);
	3739	LOAD_UTF8_CHARCLASS_ALNUM();
	3740	n = swash_fetch(PL_utf8_alnum, (U8*)locinput, utf8_target);
	3741	}
	3742	else {
	3743	ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
	3744	n = isALNUM_LC_utf8((U8*)locinput);
	3745	}
	3746	}
	3747	else {
	3748
	3749	/* Here the string isn't utf8, or is utf8 and only ascii
	3750	* characters are to match \w. In the latter case looking at
	3751	* the byte just prior to the current one may be just the final
	3752	* byte of a multi-byte character. This is ok. There are two
	3753	* cases:
	3754	* 1) it is a single byte character, and then the test is doing
	3755	* just what it's supposed to.
	3756	* 2) it is a multi-byte character, in which case the final
	3757	* byte is never mistakable for ASCII, and so the test
	3758	* will say it is not a word character, which is the
	3759	* correct answer. */
	3760	ln = (locinput != PL_bostr) ?
	3761	UCHARAT(locinput - 1) : '\n';
	3762	switch (FLAGS(scan)) {
	3763	case REGEX_UNICODE_CHARSET:
	3764	ln = isWORDCHAR_L1(ln);
	3765	n = isWORDCHAR_L1(nextchr);
	3766	break;
	3767	case REGEX_LOCALE_CHARSET:
	3768	ln = isALNUM_LC(ln);
	3769	n = isALNUM_LC(nextchr);
	3770	break;
	3771	case REGEX_DEPENDS_CHARSET:
	3772	ln = isALNUM(ln);
	3773	n = isALNUM(nextchr);
	3774	break;
	3775	case REGEX_ASCII_RESTRICTED_CHARSET:
	3776	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
	3777	ln = isWORDCHAR_A(ln);
	3778	n = isWORDCHAR_A(nextchr);
	3779	break;
	3780	default:
	3781	Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
	3782	break;
	3783	}
	3784	}
	3785	/* Note requires that all BOUNDs be lower than all NBOUNDs in
	3786	* regcomp.sym */
	3787	if (((!ln) == (!n)) == (OP(scan) < NBOUND))
	3788	sayNO;
	3789	break;
	3790	case ANYOFV:
	3791	case ANYOF:
	3792	if (utf8_target \|\| state_num == ANYOFV) {
	3793	STRLEN inclasslen = PL_regeol - locinput;
	3794	if (locinput >= PL_regeol)
	3795	sayNO;
	3796
	3797	if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target))
	3798	sayNO;
	3799	locinput += inclasslen;
	3800	nextchr = UCHARAT(locinput);
	3801	break;
	3802	}
	3803	else {
	3804	if (nextchr < 0)
	3805	nextchr = UCHARAT(locinput);
	3806	if (!nextchr && locinput >= PL_regeol)
	3807	sayNO;
	3808	if (!REGINCLASS(rex, scan, (U8*)locinput))
	3809	sayNO;
	3810	nextchr = UCHARAT(++locinput);
	3811	break;
	3812	}
	3813	break;
	3814	/* Special char classes - The defines start on line 129 or so */
	3815	CCC_TRY_U(ALNUM, NALNUM, isWORDCHAR,
	3816	ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
	3817	ALNUMU, NALNUMU, isWORDCHAR_L1,
	3818	ALNUMA, NALNUMA, isWORDCHAR_A,
	3819	alnum, "a");
	3820
	3821	CCC_TRY_U(SPACE, NSPACE, isSPACE,
	3822	SPACEL, NSPACEL, isSPACE_LC, isSPACE_LC_utf8,
	3823	SPACEU, NSPACEU, isSPACE_L1,
	3824	SPACEA, NSPACEA, isSPACE_A,
	3825	space, " ");
	3826
	3827	CCC_TRY(DIGIT, NDIGIT, isDIGIT,
	3828	DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
	3829	DIGITA, NDIGITA, isDIGIT_A,
	3830	digit, "0");
	3831
	3832	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	3833	a Unicode extended Grapheme Cluster */
	3834	/* From http://www.unicode.org/reports/tr29 (5.2 version). An
	3835	extended Grapheme Cluster is:
	3836
	3837	CR LF
	3838	\| Prepend* Begin Extend*
	3839	\| .
	3840
	3841	Begin is (Hangul-syllable \| ! Control)
	3842	Extend is (Grapheme_Extend \| Spacing_Mark)
	3843	Control is [ GCB_Control CR LF ]
	3844
	3845	The discussion below shows how the code for CLUMP is derived
	3846	from this regex. Note that most of these concepts are from
	3847	property values of the Grapheme Cluster Boundary (GCB) property.
	3848	No code point can have multiple property values for a given
	3849	property. Thus a code point in Prepend can't be in Control, but
	3850	it must be in !Control. This is why Control above includes
	3851	GCB_Control plus CR plus LF. The latter two are used in the GCB
	3852	property separately, and so can't be in GCB_Control, even though
	3853	they logically are controls. Control is not the same as gc=cc,
	3854	but includes format and other characters as well.
	3855
	3856	The Unicode definition of Hangul-syllable is:
	3857	L+
	3858	\| (L* ( ( V \| LV ) V* \| LVT ) T*)
	3859	\| T+
	3860	)
	3861	Each of these is a value for the GCB property, and hence must be
	3862	disjoint, so the order they are tested is immaterial, so the
	3863	above can safely be changed to
	3864	T+
	3865	\| L+
	3866	\| (L* ( LVT \| ( V \| LV ) V) T)
	3867
	3868	The last two terms can be combined like this:
	3869	L* ( L
	3870	\| (( LVT \| ( V \| LV ) V) T))
	3871
	3872	And refactored into this:
	3873	L* (L \| LVT T* \| V V* T* \| LV V* T*)
	3874
	3875	That means that if we have seen any L's at all we can quit
	3876	there, but if the next character is an LVT, a V, or an LV we
	3877	should keep going.
	3878
	3879	There is a subtlety with Prepend* which showed up in testing.
	3880	Note that the Begin, and only the Begin is required in:
	3881	\| Prepend* Begin Extend*
	3882	Also, Begin contains '! Control'. A Prepend must be a
	3883	'! Control', which means it must also be a Begin. What it
	3884	comes down to is that if we match Prepend* and then find no
	3885	suitable Begin afterwards, that if we backtrack the last
	3886	Prepend, that one will be a suitable Begin.
	3887	*/
	3888
	3889	if (locinput >= PL_regeol)
	3890	sayNO;
	3891	if (! utf8_target) {
	3892
	3893	/* Match either CR LF or '.', as all the other possibilities
	3894	* require utf8 */
	3895	locinput++; /* Match the . or CR */
	3896	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	3897	match the LF */
	3898	&& locinput < PL_regeol
	3899	&& UCHARAT(locinput) == '\n') locinput++;
	3900	}
	3901	else {
	3902
	3903	/* Utf8: See if is ( CR LF ); already know that locinput <
	3904	* PL_regeol, so locinput+1 is in bounds */
	3905	if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
	3906	locinput += 2;
	3907	}
	3908	else {
	3909	/* In case have to backtrack to beginning, then match '.' */
	3910	char *starting = locinput;
	3911
	3912	/* In case have to backtrack the last prepend */
	3913	char *previous_prepend = 0;
	3914
	3915	LOAD_UTF8_CHARCLASS_GCB();
	3916
	3917	/* Match (prepend)* */
	3918	while (locinput < PL_regeol
	3919	&& swash_fetch(PL_utf8_X_prepend,
	3920	(U8*)locinput, utf8_target))
	3921	{
	3922	previous_prepend = locinput;
	3923	locinput += UTF8SKIP(locinput);
	3924	}
	3925
	3926	/* As noted above, if we matched a prepend character, but
	3927	* the next thing won't match, back off the last prepend we
	3928	* matched, as it is guaranteed to match the begin */
	3929	if (previous_prepend
	3930	&& (locinput >= PL_regeol
	3931	\|\| ! swash_fetch(PL_utf8_X_begin,
	3932	(U8*)locinput, utf8_target)))
	3933	{
	3934	locinput = previous_prepend;
	3935	}
	3936
	3937	/* Note that here we know PL_regeol > locinput, as we
	3938	* tested that upon input to this switch case, and if we
	3939	* moved locinput forward, we tested the result just above
	3940	* and it either passed, or we backed off so that it will
	3941	* now pass */
	3942	if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, utf8_target)) {
	3943
	3944	/* Here did not match the required 'Begin' in the
	3945	* second term. So just match the very first
	3946	* character, the '.' of the final term of the regex */
	3947	locinput = starting + UTF8SKIP(starting);
	3948	} else {
	3949
	3950	/* Here is the beginning of a character that can have
	3951	* an extender. It is either a hangul syllable, or a
	3952	* non-control */
	3953	if (swash_fetch(PL_utf8_X_non_hangul,
	3954	(U8*)locinput, utf8_target))
	3955	{
	3956
	3957	/* Here not a Hangul syllable, must be a
	3958	* ('! * Control') */
	3959	locinput += UTF8SKIP(locinput);
	3960	} else {
	3961
	3962	/* Here is a Hangul syllable. It can be composed
	3963	* of several individual characters. One
	3964	* possibility is T+ */
	3965	if (swash_fetch(PL_utf8_X_T,
	3966	(U8*)locinput, utf8_target))
	3967	{
	3968	while (locinput < PL_regeol
	3969	&& swash_fetch(PL_utf8_X_T,
	3970	(U8*)locinput, utf8_target))
	3971	{
	3972	locinput += UTF8SKIP(locinput);
	3973	}
	3974	} else {
	3975
	3976	/* Here, not T+, but is a Hangul. That means
	3977	* it is one of the others: L, LV, LVT or V,
	3978	* and matches:
	3979	* L* (L \| LVT T* \| V V* T* \| LV V* T) /
	3980
	3981	/* Match L* */
	3982	while (locinput < PL_regeol
	3983	&& swash_fetch(PL_utf8_X_L,
	3984	(U8*)locinput, utf8_target))
	3985	{
	3986	locinput += UTF8SKIP(locinput);
	3987	}
	3988
	3989	/* Here, have exhausted L*. If the next
	3990	* character is not an LV, LVT nor V, it means
	3991	* we had to have at least one L, so matches L+
	3992	* in the original equation, we have a complete
	3993	* hangul syllable. Are done. */
	3994
	3995	if (locinput < PL_regeol
	3996	&& swash_fetch(PL_utf8_X_LV_LVT_V,
	3997	(U8*)locinput, utf8_target))
	3998	{
	3999
	4000	/* Otherwise keep going. Must be LV, LVT
	4001	* or V. See if LVT */
	4002	if (swash_fetch(PL_utf8_X_LVT,
	4003	(U8*)locinput, utf8_target))
	4004	{
	4005	locinput += UTF8SKIP(locinput);
	4006	} else {
	4007
	4008	/* Must be V or LV. Take it, then
	4009	* match V* */
	4010	locinput += UTF8SKIP(locinput);
	4011	while (locinput < PL_regeol
	4012	&& swash_fetch(PL_utf8_X_V,
	4013	(U8*)locinput, utf8_target))
	4014	{
	4015	locinput += UTF8SKIP(locinput);
	4016	}
	4017	}
	4018
	4019	/* And any of LV, LVT, or V can be followed
	4020	* by T* */
	4021	while (locinput < PL_regeol
	4022	&& swash_fetch(PL_utf8_X_T,
	4023	(U8*)locinput,
	4024	utf8_target))
	4025	{
	4026	locinput += UTF8SKIP(locinput);
	4027	}
	4028	}
	4029	}
	4030	}
	4031
	4032	/* Match any extender */
	4033	while (locinput < PL_regeol
	4034	&& swash_fetch(PL_utf8_X_extend,
	4035	(U8*)locinput, utf8_target))
	4036	{
	4037	locinput += UTF8SKIP(locinput);
	4038	}
	4039	}
	4040	}
	4041	if (locinput > PL_regeol) sayNO;
	4042	}
	4043	nextchr = UCHARAT(locinput);
	4044	break;
	4045
	4046	case NREFFL:
	4047	{ /* The capture buffer cases. The ones beginning with N for the
	4048	named buffers just convert to the equivalent numbered and
	4049	pretend they were called as the corresponding numbered buffer
	4050	op. */
	4051	/* don't initialize these in the declaration, it makes C++
	4052	unhappy */
	4053	char *s;
	4054	char type;
	4055	re_fold_t folder;
	4056	const U8 *fold_array;
	4057	UV utf8_fold_flags;
	4058
	4059	PL_reg_flags \|= RF_tainted;
	4060	folder = foldEQ_locale;
	4061	fold_array = PL_fold_locale;
	4062	type = REFFL;
	4063	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4064	goto do_nref;
	4065
	4066	case NREFFA:
	4067	folder = foldEQ_latin1;
	4068	fold_array = PL_fold_latin1;
	4069	type = REFFA;
	4070	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4071	goto do_nref;
	4072
	4073	case NREFFU:
	4074	folder = foldEQ_latin1;
	4075	fold_array = PL_fold_latin1;
	4076	type = REFFU;
	4077	utf8_fold_flags = 0;
	4078	goto do_nref;
	4079
	4080	case NREFF:
	4081	folder = foldEQ;
	4082	fold_array = PL_fold;
	4083	type = REFF;
	4084	utf8_fold_flags = 0;
	4085	goto do_nref;
	4086
	4087	case NREF:
	4088	type = REF;
	4089	folder = NULL;
	4090	fold_array = NULL;
	4091	utf8_fold_flags = 0;
	4092	do_nref:
	4093
	4094	/* For the named back references, find the corresponding buffer
	4095	* number */
	4096	n = reg_check_named_buff_matched(rex,scan);
	4097
	4098	if ( ! n ) {
	4099	sayNO;
	4100	}
	4101	goto do_nref_ref_common;
	4102
	4103	case REFFL:
	4104	PL_reg_flags \|= RF_tainted;
	4105	folder = foldEQ_locale;
	4106	fold_array = PL_fold_locale;
	4107	utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
	4108	goto do_ref;
	4109
	4110	case REFFA:
	4111	folder = foldEQ_latin1;
	4112	fold_array = PL_fold_latin1;
	4113	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	4114	goto do_ref;
	4115
	4116	case REFFU:
	4117	folder = foldEQ_latin1;
	4118	fold_array = PL_fold_latin1;
	4119	utf8_fold_flags = 0;
	4120	goto do_ref;
	4121
	4122	case REFF:
	4123	folder = foldEQ;
	4124	fold_array = PL_fold;
	4125	utf8_fold_flags = 0;
	4126	goto do_ref;
	4127
	4128	case REF:
	4129	folder = NULL;
	4130	fold_array = NULL;
	4131	utf8_fold_flags = 0;
	4132
	4133	do_ref:
	4134	type = OP(scan);
	4135	n = ARG(scan); /* which paren pair */
	4136
	4137	do_nref_ref_common:
	4138	ln = PL_regoffs[n].start;
	4139	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4140	if (*PL_reglastparen < n \|\| ln == -1)
	4141	sayNO; /* Do not match unless seen CLOSEn. */
	4142	if (ln == PL_regoffs[n].end)
	4143	break;
	4144
	4145	s = PL_bostr + ln;
	4146	if (type != REF /* REF can do byte comparison */
	4147	&& (utf8_target \|\| type == REFFU))
	4148	{ /* XXX handle REFFL better */
	4149	char * limit = PL_regeol;
	4150
	4151	/* This call case insensitively compares the entire buffer
	4152	* at s, with the current input starting at locinput, but
	4153	* not going off the end given by PL_regeol, and returns in
	4154	* limit upon success, how much of the current input was
	4155	* matched */
	4156	if (! foldEQ_utf8_flags(s, NULL, PL_regoffs[n].end - ln, utf8_target,
	4157	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	4158	{
	4159	sayNO;
	4160	}
	4161	locinput = limit;
	4162	nextchr = UCHARAT(locinput);
	4163	break;
	4164	}
	4165
	4166	/* Not utf8: Inline the first character, for speed. */
	4167	if (UCHARAT(s) != nextchr &&
	4168	(type == REF \|\|
	4169	UCHARAT(s) != fold_array[nextchr]))
	4170	sayNO;
	4171	ln = PL_regoffs[n].end - ln;
	4172	if (locinput + ln > PL_regeol)
	4173	sayNO;
	4174	if (ln > 1 && (type == REF
	4175	? memNE(s, locinput, ln)
	4176	: ! folder(s, locinput, ln)))
	4177	sayNO;
	4178	locinput += ln;
	4179	nextchr = UCHARAT(locinput);
	4180	break;
	4181	}
	4182	case NOTHING:
	4183	case TAIL:
	4184	break;
	4185	case BACK:
	4186	break;
	4187
	4188	#undef ST
	4189	#define ST st->u.eval
	4190	{
	4191	SV *ret;
	4192	REGEXP *re_sv;
	4193	regexp *re;
	4194	regexp_internal *rei;
	4195	regnode *startpoint;
	4196
	4197	case GOSTART:
	4198	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	4199	if (cur_eval && cur_eval->locinput==locinput) {
	4200	if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
	4201	Perl_croak(aTHX_ "Infinite recursion in regex");
	4202	if ( ++nochange_depth > max_nochange_depth )
	4203	Perl_croak(aTHX_
	4204	"Pattern subroutine nesting without pos change"
	4205	" exceeded limit in regex");
	4206	} else {
	4207	nochange_depth = 0;
	4208	}
	4209	re_sv = rex_sv;
	4210	re = rex;
	4211	rei = rexi;
	4212	(void)ReREFCNT_inc(rex_sv);
	4213	if (OP(scan)==GOSUB) {
	4214	startpoint = scan + ARG2L(scan);
	4215	ST.close_paren = ARG(scan);
	4216	} else {
	4217	startpoint = rei->program+1;
	4218	ST.close_paren = 0;
	4219	}
	4220	goto eval_recurse_doit;
	4221	/* NOTREACHED */
	4222	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	4223	if (cur_eval && cur_eval->locinput==locinput) {
	4224	if ( ++nochange_depth > max_nochange_depth )
	4225	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	4226	} else {
	4227	nochange_depth = 0;
	4228	}
	4229	{
	4230	/* execute the code in the {...} */
	4231	dSP;
	4232	SV ** const before = SP;
	4233	OP_4tree * const oop = PL_op;
	4234	COP * const ocurcop = PL_curcop;
	4235	PAD *old_comppad;
	4236	char *saved_regeol = PL_regeol;
	4237	struct re_save_state saved_state;
	4238
	4239	/* To not corrupt the existing regex state while executing the
	4240	* eval we would normally put it on the save stack, like with
	4241	* save_re_context. However, re-evals have a weird scoping so we
	4242	* can't just add ENTER/LEAVE here. With that, things like
	4243	*
	4244	* (?{$a=2})(a(?{local$a=$a+1}))aakc(?{$b=$a})
	4245	*
	4246	* would break, as they expect the localisation to be unwound
	4247	* only when the re-engine backtracks through the bit that
	4248	* localised it.
	4249	*
	4250	* What we do instead is just saving the state in a local c
	4251	* variable.
	4252	*/
	4253	Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
	4254
	4255	n = ARG(scan);
	4256	PL_op = (OP_4tree*)rexi->data->data[n];
	4257	DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
	4258	" re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
	4259	/* wrap the call in two SAVECOMPPADs. This ensures that
	4260	* when the save stack is eventually unwound, all the
	4261	* accumulated SAVEt_CLEARSV's will be processed with
	4262	* interspersed SAVEt_COMPPAD's to ensure that lexicals
	4263	* are cleared in the right pad */
	4264	SAVECOMPPAD();
	4265	PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
	4266	PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
	4267
	4268	if (sv_yes_mark) {
	4269	SV *sv_mrk = get_sv("REGMARK", 1);
	4270	sv_setsv(sv_mrk, sv_yes_mark);
	4271	}
	4272
	4273	CALLRUNOPS(aTHX); /* Scalar context. */
	4274	SPAGAIN;
	4275	if (SP == before)
	4276	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	4277	else {
	4278	ret = POPs;
	4279	PUTBACK;
	4280	}
	4281
	4282	Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
	4283
	4284	PL_op = oop;
	4285	SAVECOMPPAD();
	4286	PAD_RESTORE_LOCAL(old_comppad);
	4287	PL_curcop = ocurcop;
	4288	PL_regeol = saved_regeol;
	4289	if (!logical) {
	4290	/* /(?{...})/ */
	4291	sv_setsv(save_scalar(PL_replgv), ret);
	4292	break;
	4293	}
	4294	}
	4295	if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
	4296	logical = 0;
	4297	{
	4298	/* extract RE object from returned value; compiling if
	4299	* necessary */
	4300	MAGIC *mg = NULL;
	4301	REGEXP *rx = NULL;
	4302
	4303	if (SvROK(ret)) {
	4304	SV *const sv = SvRV(ret);
	4305
	4306	if (SvTYPE(sv) == SVt_REGEXP) {
	4307	rx = (REGEXP*) sv;
	4308	} else if (SvSMAGICAL(sv)) {
	4309	mg = mg_find(sv, PERL_MAGIC_qr);
	4310	assert(mg);
	4311	}
	4312	} else if (SvTYPE(ret) == SVt_REGEXP) {
	4313	rx = (REGEXP*) ret;
	4314	} else if (SvSMAGICAL(ret)) {
	4315	if (SvGMAGICAL(ret)) {
	4316	/* I don't believe that there is ever qr magic
	4317	here. */
	4318	assert(!mg_find(ret, PERL_MAGIC_qr));
	4319	sv_unmagic(ret, PERL_MAGIC_qr);
	4320	}
	4321	else {
	4322	mg = mg_find(ret, PERL_MAGIC_qr);
	4323	/* testing suggests mg only ends up non-NULL for
	4324	scalars who were upgraded and compiled in the
	4325	else block below. In turn, this is only
	4326	triggered in the "postponed utf8 string" tests
	4327	in t/op/pat.t */
	4328	}
	4329	}
	4330
	4331	if (mg) {
	4332	rx = (REGEXP ) mg->mg_obj; /XXX:dmq*/
	4333	assert(rx);
	4334	}
	4335	if (rx) {
	4336	rx = reg_temp_copy(NULL, rx);
	4337	}
	4338	else {
	4339	U32 pm_flags = 0;
	4340	const I32 osize = PL_regsize;
	4341
	4342	if (DO_UTF8(ret)) {
	4343	assert (SvUTF8(ret));
	4344	} else if (SvUTF8(ret)) {
	4345	/* Not doing UTF-8, despite what the SV says. Is
	4346	this only if we're trapped in use 'bytes'? */
	4347	/* Make a copy of the octet sequence, but without
	4348	the flag on, as the compiler now honours the
	4349	SvUTF8 flag on ret. */
	4350	STRLEN len;
	4351	const char *const p = SvPV(ret, len);
	4352	ret = newSVpvn_flags(p, len, SVs_TEMP);
	4353	}
	4354	rx = CALLREGCOMP(ret, pm_flags);
	4355	if (!(SvFLAGS(ret)
	4356	& (SVs_TEMP \| SVs_PADTMP \| SVf_READONLY
	4357	\| SVs_GMG))) {
	4358	/* This isn't a first class regexp. Instead, it's
	4359	caching a regexp onto an existing, Perl visible
	4360	scalar. */
	4361	sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
	4362	}
	4363	PL_regsize = osize;
	4364	}
	4365	re_sv = rx;
	4366	re = (struct regexp *)SvANY(rx);
	4367	}
	4368	RXp_MATCH_COPIED_off(re);
	4369	re->subbeg = rex->subbeg;
	4370	re->sublen = rex->sublen;
	4371	rei = RXi_GET(re);
	4372	DEBUG_EXECUTE_r(
	4373	debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
	4374	"Matching embedded");
	4375	);
	4376	startpoint = rei->program + 1;
	4377	ST.close_paren = 0; /* only used for GOSUB */
	4378	/* borrowed from regtry */
	4379	if (PL_reg_start_tmpl <= re->nparens) {
	4380	PL_reg_start_tmpl = re->nparens*3/2 + 3;
	4381	if(PL_reg_start_tmp)
	4382	Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	4383	else
	4384	Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
	4385	}
	4386
	4387	eval_recurse_doit: /* Share code with GOSUB below this line */
	4388	/* run the pattern returned from (??{...}) */
	4389	ST.cp = regcppush(0); /* Save all the positions. */
	4390	REGCP_SET(ST.lastcp);
	4391
	4392	PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
	4393
	4394	/* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
	4395	PL_reglastparen = &re->lastparen;
	4396	PL_reglastcloseparen = &re->lastcloseparen;
	4397	re->lastparen = 0;
	4398	re->lastcloseparen = 0;
	4399
	4400	PL_reginput = locinput;
	4401	PL_regsize = 0;
	4402
	4403	/* XXXX This is too dramatic a measure... */
	4404	PL_reg_maxiter = 0;
	4405
	4406	ST.toggle_reg_flags = PL_reg_flags;
	4407	if (RX_UTF8(re_sv))
	4408	PL_reg_flags \|= RF_utf8;
	4409	else
	4410	PL_reg_flags &= ~RF_utf8;
	4411	ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
	4412
	4413	ST.prev_rex = rex_sv;
	4414	ST.prev_curlyx = cur_curlyx;
	4415	SETREX(rex_sv,re_sv);
	4416	rex = re;
	4417	rexi = rei;
	4418	cur_curlyx = NULL;
	4419	ST.B = next;
	4420	ST.prev_eval = cur_eval;
	4421	cur_eval = st;
	4422	/* now continue from first node in postoned RE */
	4423	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
	4424	/* NOTREACHED */
	4425	}
	4426	/* logical is 1, /(?(?{...})X\|Y)/ */
	4427	sw = cBOOL(SvTRUE(ret));
	4428	logical = 0;
	4429	break;
	4430	}
	4431
	4432	case EVAL_AB: /* cleanup after a successful (??{A})B */
	4433	/* note: this is called twice; first after popping B, then A */
	4434	PL_reg_flags ^= ST.toggle_reg_flags;
	4435	ReREFCNT_dec(rex_sv);
	4436	SETREX(rex_sv,ST.prev_rex);
	4437	rex = (struct regexp *)SvANY(rex_sv);
	4438	rexi = RXi_GET(rex);
	4439	regcpblow(ST.cp);
	4440	cur_eval = ST.prev_eval;
	4441	cur_curlyx = ST.prev_curlyx;
	4442
	4443	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	4444	PL_reglastparen = &rex->lastparen;
	4445	PL_reglastcloseparen = &rex->lastcloseparen;
	4446	/* also update PL_regoffs */
	4447	PL_regoffs = rex->offs;
	4448
	4449	/* XXXX This is too dramatic a measure... */
	4450	PL_reg_maxiter = 0;
	4451	if ( nochange_depth )
	4452	nochange_depth--;
	4453	sayYES;
	4454
	4455
	4456	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	4457	/* note: this is called twice; first after popping B, then A */
	4458	PL_reg_flags ^= ST.toggle_reg_flags;
	4459	ReREFCNT_dec(rex_sv);
	4460	SETREX(rex_sv,ST.prev_rex);
	4461	rex = (struct regexp *)SvANY(rex_sv);
	4462	rexi = RXi_GET(rex);
	4463	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	4464	PL_reglastparen = &rex->lastparen;
	4465	PL_reglastcloseparen = &rex->lastcloseparen;
	4466
	4467	PL_reginput = locinput;
	4468	REGCP_UNWIND(ST.lastcp);
	4469	regcppop(rex);
	4470	cur_eval = ST.prev_eval;
	4471	cur_curlyx = ST.prev_curlyx;
	4472	/* XXXX This is too dramatic a measure... */
	4473	PL_reg_maxiter = 0;
	4474	if ( nochange_depth )
	4475	nochange_depth--;
	4476	sayNO_SILENT;
	4477	#undef ST
	4478
	4479	case OPEN:
	4480	n = ARG(scan); /* which paren pair */
	4481	PL_reg_start_tmp[n] = locinput;
	4482	if (n > PL_regsize)
	4483	PL_regsize = n;
	4484	lastopen = n;
	4485	break;
	4486	case CLOSE:
	4487	n = ARG(scan); /* which paren pair */
	4488	PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
	4489	PL_regoffs[n].end = locinput - PL_bostr;
	4490	/*if (n > PL_regsize)
	4491	PL_regsize = n;*/
	4492	if (n > *PL_reglastparen)
	4493	*PL_reglastparen = n;
	4494	*PL_reglastcloseparen = n;
	4495	if (cur_eval && cur_eval->u.eval.close_paren == n) {
	4496	goto fake_end;
	4497	}
	4498	break;
	4499	case ACCEPT:
	4500	if (ARG(scan)){
	4501	regnode *cursor;
	4502	for (cursor=scan;
	4503	cursor && OP(cursor)!=END;
	4504	cursor=regnext(cursor))
	4505	{
	4506	if ( OP(cursor)==CLOSE ){
	4507	n = ARG(cursor);
	4508	if ( n <= lastopen ) {
	4509	PL_regoffs[n].start
	4510	= PL_reg_start_tmp[n] - PL_bostr;
	4511	PL_regoffs[n].end = locinput - PL_bostr;
	4512	/*if (n > PL_regsize)
	4513	PL_regsize = n;*/
	4514	if (n > *PL_reglastparen)
	4515	*PL_reglastparen = n;
	4516	*PL_reglastcloseparen = n;
	4517	if ( n == ARG(scan) \|\| (cur_eval &&
	4518	cur_eval->u.eval.close_paren == n))
	4519	break;
	4520	}
	4521	}
	4522	}
	4523	}
	4524	goto fake_end;
	4525	/NOTREACHED/
	4526	case GROUPP:
	4527	n = ARG(scan); /* which paren pair */
	4528	sw = cBOOL(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
	4529	break;
	4530	case NGROUPP:
	4531	/* reg_check_named_buff_matched returns 0 for no match */
	4532	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	4533	break;
	4534	case INSUBP:
	4535	n = ARG(scan);
	4536	sw = (cur_eval && (!n \|\| cur_eval->u.eval.close_paren == n));
	4537	break;
	4538	case DEFINEP:
	4539	sw = 0;
	4540	break;
	4541	case IFTHEN:
	4542	PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
	4543	if (sw)
	4544	next = NEXTOPER(NEXTOPER(scan));
	4545	else {
	4546	next = scan + ARG(scan);
	4547	if (OP(next) == IFTHEN) /* Fake one. */
	4548	next = NEXTOPER(NEXTOPER(next));
	4549	}
	4550	break;
	4551	case LOGICAL:
	4552	logical = scan->flags;
	4553	break;
	4554
	4555	/*******************************************************************
	4556
	4557	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	4558	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	4559	STAR/PLUS/CURLY/CURLYN are used instead.)
	4560
	4561	A*B is compiled as <CURLYX><A><WHILEM><B>
	4562
	4563	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	4564	state, which contains the current count, initialised to -1. It also sets
	4565	cur_curlyx to point to this state, with any previous value saved in the
	4566	state block.
	4567
	4568	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	4569	since the pattern may possibly match zero times (i.e. it's a while {} loop
	4570	rather than a do {} while loop).
	4571
	4572	Each entry to WHILEM represents a successful match of A. The count in the
	4573	CURLYX block is incremented, another WHILEM state is pushed, and execution
	4574	passes to A or B depending on greediness and the current count.
	4575
	4576	For example, if matching against the string a1a2a3b (where the aN are
	4577	substrings that match /A/), then the match progresses as follows: (the
	4578	pushed states are interspersed with the bits of strings matched so far):
	4579
	4580	<CURLYX cnt=-1>
	4581	<CURLYX cnt=0><WHILEM>
	4582	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	4583	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	4584	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	4585	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	4586
	4587	(Contrast this with something like CURLYM, which maintains only a single
	4588	backtrack state:
	4589
	4590	<CURLYM cnt=0> a1
	4591	a1 <CURLYM cnt=1> a2
	4592	a1 a2 <CURLYM cnt=2> a3
	4593	a1 a2 a3 <CURLYM cnt=3> b
	4594	)
	4595
	4596	Each WHILEM state block marks a point to backtrack to upon partial failure
	4597	of A or B, and also contains some minor state data related to that
	4598	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	4599	overall state, such as the count, and pointers to the A and B ops.
	4600
	4601	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	4602	must always point to the current CURLYX block, the rules are:
	4603
	4604	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	4605	and set cur_curlyx to point the new block.
	4606
	4607	When popping the CURLYX block after a successful or unsuccessful match,
	4608	restore the previous cur_curlyx.
	4609
	4610	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	4611	to the outer one saved in the CURLYX block.
	4612
	4613	When popping the WHILEM block after a successful or unsuccessful B match,
	4614	restore the previous cur_curlyx.
	4615
	4616	Here's an example for the pattern (AI* BI)*BO
	4617	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	4618
	4619	cur_
	4620	curlyx backtrack stack
	4621	------ ---------------
	4622	NULL
	4623	CO <CO prev=NULL> <WO>
	4624	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4625	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4626	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	4627
	4628	At this point the pattern succeeds, and we work back down the stack to
	4629	clean up, restoring as we go:
	4630
	4631	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	4632	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	4633	CO <CO prev=NULL> <WO>
	4634	NULL
	4635
	4636	*******************************************************************/
	4637
	4638	#define ST st->u.curlyx
	4639
	4640	case CURLYX: /* start of /AB/ (for complex A) /
	4641	{
	4642	/* No need to save/restore up to this paren */
	4643	I32 parenfloor = scan->flags;
	4644
	4645	assert(next); /* keep Coverity happy */
	4646	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	4647	next += ARG(next);
	4648
	4649	/* XXXX Probably it is better to teach regpush to support
	4650	parenfloor > PL_regsize... */
	4651	if (parenfloor > (I32)*PL_reglastparen)
	4652	parenfloor = PL_reglastparen; / Pessimization... */
	4653
	4654	ST.prev_curlyx= cur_curlyx;
	4655	cur_curlyx = st;
	4656	ST.cp = PL_savestack_ix;
	4657
	4658	/* these fields contain the state of the current curly.
	4659	* they are accessed by subsequent WHILEMs */
	4660	ST.parenfloor = parenfloor;
	4661	ST.me = scan;
	4662	ST.B = next;
	4663	ST.minmod = minmod;
	4664	minmod = 0;
	4665	ST.count = -1; /* this will be updated by WHILEM */
	4666	ST.lastloc = NULL; /* this will be updated by WHILEM */
	4667
	4668	PL_reginput = locinput;
	4669	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
	4670	/* NOTREACHED */
	4671	}
	4672
	4673	case CURLYX_end: /* just finished matching all of AB /
	4674	cur_curlyx = ST.prev_curlyx;
	4675	sayYES;
	4676	/* NOTREACHED */
	4677
	4678	case CURLYX_end_fail: /* just failed to match all of AB /
	4679	regcpblow(ST.cp);
	4680	cur_curlyx = ST.prev_curlyx;
	4681	sayNO;
	4682	/* NOTREACHED */
	4683
	4684
	4685	#undef ST
	4686	#define ST st->u.whilem
	4687
	4688	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	4689	{
	4690	/* see the discussion above about CURLYX/WHILEM */
	4691	I32 n;
	4692	int min = ARG1(cur_curlyx->u.curlyx.me);
	4693	int max = ARG2(cur_curlyx->u.curlyx.me);
	4694	regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	4695
	4696	assert(cur_curlyx); /* keep Coverity happy */
	4697	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	4698	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	4699	ST.cache_offset = 0;
	4700	ST.cache_mask = 0;
	4701
	4702	PL_reginput = locinput;
	4703
	4704	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4705	"%*s whilem: matched %ld out of %d..%d\n",
	4706	REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
	4707	);
	4708
	4709	/* First just match a string of min A's. */
	4710
	4711	if (n < min) {
	4712	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4713	cur_curlyx->u.curlyx.lastloc = locinput;
	4714	REGCP_SET(ST.lastcp);
	4715
	4716	PUSH_STATE_GOTO(WHILEM_A_pre, A);
	4717	/* NOTREACHED */
	4718	}
	4719
	4720	/* If degenerate A matches "", assume A done. */
	4721
	4722	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	4723	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4724	"%*s whilem: empty match detected, trying continuation...\n",
	4725	REPORT_CODE_OFF+depth*2, "")
	4726	);
	4727	goto do_whilem_B_max;
	4728	}
	4729
	4730	/* super-linear cache processing */
	4731
	4732	if (scan->flags) {
	4733
	4734	if (!PL_reg_maxiter) {
	4735	/* start the countdown: Postpone detection until we
	4736	* know the match is not that much linear. */
	4737	PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
	4738	/* possible overflow for long strings and many CURLYX's */
	4739	if (PL_reg_maxiter < 0)
	4740	PL_reg_maxiter = I32_MAX;
	4741	PL_reg_leftiter = PL_reg_maxiter;
	4742	}
	4743
	4744	if (PL_reg_leftiter-- == 0) {
	4745	/* initialise cache */
	4746	const I32 size = (PL_reg_maxiter + 7)/8;
	4747	if (PL_reg_poscache) {
	4748	if ((I32)PL_reg_poscache_size < size) {
	4749	Renew(PL_reg_poscache, size, char);
	4750	PL_reg_poscache_size = size;
	4751	}
	4752	Zero(PL_reg_poscache, size, char);
	4753	}
	4754	else {
	4755	PL_reg_poscache_size = size;
	4756	Newxz(PL_reg_poscache, size, char);
	4757	}
	4758	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4759	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	4760	PL_colors[4], PL_colors[5])
	4761	);
	4762	}
	4763
	4764	if (PL_reg_leftiter < 0) {
	4765	/* have we already failed at this position? */
	4766	I32 offset, mask;
	4767	offset = (scan->flags & 0xf) - 1
	4768	+ (locinput - PL_bostr) * (scan->flags>>4);
	4769	mask = 1 << (offset % 8);
	4770	offset /= 8;
	4771	if (PL_reg_poscache[offset] & mask) {
	4772	DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
	4773	"%*s whilem: (cache) already tried at this position...\n",
	4774	REPORT_CODE_OFF+depth*2, "")
	4775	);
	4776	sayNO; /* cache records failure */
	4777	}
	4778	ST.cache_offset = offset;
	4779	ST.cache_mask = mask;
	4780	}
	4781	}
	4782
	4783	/* Prefer B over A for minimal matching. */
	4784
	4785	if (cur_curlyx->u.curlyx.minmod) {
	4786	ST.save_curlyx = cur_curlyx;
	4787	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4788	ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
	4789	REGCP_SET(ST.lastcp);
	4790	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
	4791	/* NOTREACHED */
	4792	}
	4793
	4794	/* Prefer A over B for maximal matching. */
	4795
	4796	if (n < max) { /* More greed allowed? */
	4797	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4798	cur_curlyx->u.curlyx.lastloc = locinput;
	4799	REGCP_SET(ST.lastcp);
	4800	PUSH_STATE_GOTO(WHILEM_A_max, A);
	4801	/* NOTREACHED */
	4802	}
	4803	goto do_whilem_B_max;
	4804	}
	4805	/* NOTREACHED */
	4806
	4807	case WHILEM_B_min: /* just matched B in a minimal match */
	4808	case WHILEM_B_max: /* just matched B in a maximal match */
	4809	cur_curlyx = ST.save_curlyx;
	4810	sayYES;
	4811	/* NOTREACHED */
	4812
	4813	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	4814	cur_curlyx = ST.save_curlyx;
	4815	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4816	cur_curlyx->u.curlyx.count--;
	4817	CACHEsayNO;
	4818	/* NOTREACHED */
	4819
	4820	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	4821	/* FALL THROUGH */
	4822	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	4823	REGCP_UNWIND(ST.lastcp);
	4824	regcppop(rex);
	4825	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	4826	cur_curlyx->u.curlyx.count--;
	4827	CACHEsayNO;
	4828	/* NOTREACHED */
	4829
	4830	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	4831	REGCP_UNWIND(ST.lastcp);
	4832	regcppop(rex); /* Restore some previous $<digit>s? */
	4833	PL_reginput = locinput;
	4834	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4835	"%*s whilem: failed, trying continuation...\n",
	4836	REPORT_CODE_OFF+depth*2, "")
	4837	);
	4838	do_whilem_B_max:
	4839	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4840	&& ckWARN(WARN_REGEXP)
	4841	&& !(PL_reg_flags & RF_warned))
	4842	{
	4843	PL_reg_flags \|= RF_warned;
	4844	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
	4845	"Complex regular subexpression recursion",
	4846	REG_INFTY - 1);
	4847	}
	4848
	4849	/* now try B */
	4850	ST.save_curlyx = cur_curlyx;
	4851	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	4852	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
	4853	/* NOTREACHED */
	4854
	4855	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	4856	cur_curlyx = ST.save_curlyx;
	4857	REGCP_UNWIND(ST.lastcp);
	4858	regcppop(rex);
	4859
	4860	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	4861	/* Maximum greed exceeded */
	4862	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	4863	&& ckWARN(WARN_REGEXP)
	4864	&& !(PL_reg_flags & RF_warned))
	4865	{
	4866	PL_reg_flags \|= RF_warned;
	4867	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	4868	"%s limit (%d) exceeded",
	4869	"Complex regular subexpression recursion",
	4870	REG_INFTY - 1);
	4871	}
	4872	cur_curlyx->u.curlyx.count--;
	4873	CACHEsayNO;
	4874	}
	4875
	4876	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	4877	"%s trying longer...\n", REPORT_CODE_OFF+depth2, "")
	4878	);
	4879	/* Try grabbing another A and see if it helps. */
	4880	PL_reginput = locinput;
	4881	cur_curlyx->u.curlyx.lastloc = locinput;
	4882	ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
	4883	REGCP_SET(ST.lastcp);
	4884	PUSH_STATE_GOTO(WHILEM_A_min,
	4885	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS);
	4886	/* NOTREACHED */
	4887
	4888	#undef ST
	4889	#define ST st->u.branch
	4890
	4891	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	4892	next = scan + ARG(scan);
	4893	if (next == scan)
	4894	next = NULL;
	4895	scan = NEXTOPER(scan);
	4896	/* FALL THROUGH */
	4897
	4898	case BRANCH: /* /(...\|A\|...)/ */
	4899	scan = NEXTOPER(scan); /* scan now points to inner node */
	4900	ST.lastparen = *PL_reglastparen;
	4901	ST.next_branch = next;
	4902	REGCP_SET(ST.cp);
	4903	PL_reginput = locinput;
	4904
	4905	/* Now go into the branch */
	4906	if (has_cutgroup) {
	4907	PUSH_YES_STATE_GOTO(BRANCH_next, scan);
	4908	} else {
	4909	PUSH_STATE_GOTO(BRANCH_next, scan);
	4910	}
	4911	/* NOTREACHED */
	4912	case CUTGROUP:
	4913	PL_reginput = locinput;
	4914	sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
	4915	MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	4916	PUSH_STATE_GOTO(CUTGROUP_next,next);
	4917	/* NOTREACHED */
	4918	case CUTGROUP_next_fail:
	4919	do_cutgroup = 1;
	4920	no_final = 1;
	4921	if (st->u.mark.mark_name)
	4922	sv_commit = st->u.mark.mark_name;
	4923	sayNO;
	4924	/* NOTREACHED */
	4925	case BRANCH_next:
	4926	sayYES;
	4927	/* NOTREACHED */
	4928	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	4929	if (do_cutgroup) {
	4930	do_cutgroup = 0;
	4931	no_final = 0;
	4932	}
	4933	REGCP_UNWIND(ST.cp);
	4934	for (n = *PL_reglastparen; n > ST.lastparen; n--)
	4935	PL_regoffs[n].end = -1;
	4936	*PL_reglastparen = n;
	4937	/dmq: PL_reglastcloseparen = n; */
	4938	scan = ST.next_branch;
	4939	/* no more branches? */
	4940	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	4941	DEBUG_EXECUTE_r({
	4942	PerlIO_printf( Perl_debug_log,
	4943	"%*s %sBRANCH failed...%s\n",
	4944	REPORT_CODE_OFF+depth*2, "",
	4945	PL_colors[4],
	4946	PL_colors[5] );
	4947	});
	4948	sayNO_SILENT;
	4949	}
	4950	continue; /* execute next BRANCH[J] op */
	4951	/* NOTREACHED */
	4952
	4953	case MINMOD:
	4954	minmod = 1;
	4955	break;
	4956
	4957	#undef ST
	4958	#define ST st->u.curlym
	4959
	4960	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	4961
	4962	/* This is an optimisation of CURLYX that enables us to push
	4963	* only a single backtracking state, no matter how many matches
	4964	* there are in {m,n}. It relies on the pattern being constant
	4965	* length, with no parens to influence future backrefs
	4966	*/
	4967
	4968	ST.me = scan;
	4969	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	4970
	4971	/* if paren positive, emulate an OPEN/CLOSE around A */
	4972	if (ST.me->flags) {
	4973	U32 paren = ST.me->flags;
	4974	if (paren > PL_regsize)
	4975	PL_regsize = paren;
	4976	if (paren > *PL_reglastparen)
	4977	*PL_reglastparen = paren;
	4978	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	4979	}
	4980	ST.A = scan;
	4981	ST.B = next;
	4982	ST.alen = 0;
	4983	ST.count = 0;
	4984	ST.minmod = minmod;
	4985	minmod = 0;
	4986	ST.c1 = CHRTEST_UNINIT;
	4987	REGCP_SET(ST.cp);
	4988
	4989	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	4990	goto curlym_do_B;
	4991
	4992	curlym_do_A: /* execute the A in /A{m,n}B/ */
	4993	PL_reginput = locinput;
	4994	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
	4995	/* NOTREACHED */
	4996
	4997	case CURLYM_A: /* we've just matched an A */
	4998	locinput = st->locinput;
	4999	nextchr = UCHARAT(locinput);
	5000
	5001	ST.count++;
	5002	/* after first match, determine A's length: u.curlym.alen */
	5003	if (ST.count == 1) {
	5004	if (PL_reg_match_utf8) {
	5005	char *s = locinput;
	5006	while (s < PL_reginput) {
	5007	ST.alen++;
	5008	s += UTF8SKIP(s);
	5009	}
	5010	}
	5011	else {
	5012	ST.alen = PL_reginput - locinput;
	5013	}
	5014	if (ST.alen == 0)
	5015	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	5016	}
	5017	DEBUG_EXECUTE_r(
	5018	PerlIO_printf(Perl_debug_log,
	5019	"%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	5020	(int)(REPORT_CODE_OFF+(depth*2)), "",
	5021	(IV) ST.count, (IV)ST.alen)
	5022	);
	5023
	5024	locinput = PL_reginput;
	5025
	5026	if (cur_eval && cur_eval->u.eval.close_paren &&
	5027	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5028	goto fake_end;
	5029
	5030	{
	5031	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	5032	if ( max == REG_INFTY \|\| ST.count < max )
	5033	goto curlym_do_A; /* try to match another A */
	5034	}
	5035	goto curlym_do_B; /* try to match B */
	5036
	5037	case CURLYM_A_fail: /* just failed to match an A */
	5038	REGCP_UNWIND(ST.cp);
	5039
	5040	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	5041	\|\| (cur_eval && cur_eval->u.eval.close_paren &&
	5042	cur_eval->u.eval.close_paren == (U32)ST.me->flags))
	5043	sayNO;
	5044
	5045	curlym_do_B: /* execute the B in /A{m,n}B/ */
	5046	PL_reginput = locinput;
	5047	if (ST.c1 == CHRTEST_UNINIT) {
	5048	/* calculate c1 and c2 for possible match of 1st char
	5049	* following curly */
	5050	ST.c1 = ST.c2 = CHRTEST_VOID;
	5051	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	5052	regnode *text_node = ST.B;
	5053	if (! HAS_TEXT(text_node))
	5054	FIND_NEXT_IMPT(text_node);
	5055	/* this used to be
	5056
	5057	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	5058
	5059	But the former is redundant in light of the latter.
	5060
	5061	if this changes back then the macro for
	5062	IS_TEXT and friends need to change.
	5063	*/
	5064	if (PL_regkind[OP(text_node)] == EXACT)
	5065	{
	5066
	5067	ST.c1 = (U8)*STRING(text_node);
	5068	switch (OP(text_node)) {
	5069	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5070	case EXACTFA:
	5071	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5072	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5073	default: ST.c2 = ST.c1;
	5074	}
	5075	}
	5076	}
	5077	}
	5078
	5079	DEBUG_EXECUTE_r(
	5080	PerlIO_printf(Perl_debug_log,
	5081	"%*s CURLYM trying tail with matches=%"IVdf"...\n",
	5082	(int)(REPORT_CODE_OFF+(depth*2)),
	5083	"", (IV)ST.count)
	5084	);
	5085	if (ST.c1 != CHRTEST_VOID
	5086	&& UCHARAT(PL_reginput) != ST.c1
	5087	&& UCHARAT(PL_reginput) != ST.c2)
	5088	{
	5089	/* simulate B failing */
	5090	DEBUG_OPTIMISE_r(
	5091	PerlIO_printf(Perl_debug_log,
	5092	"%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
	5093	(int)(REPORT_CODE_OFF+(depth*2)),"",
	5094	(IV)ST.c1,(IV)ST.c2
	5095	));
	5096	state_num = CURLYM_B_fail;
	5097	goto reenter_switch;
	5098	}
	5099
	5100	if (ST.me->flags) {
	5101	/* mark current A as captured */
	5102	I32 paren = ST.me->flags;
	5103	if (ST.count) {
	5104	PL_regoffs[paren].start
	5105	= HOPc(PL_reginput, -ST.alen) - PL_bostr;
	5106	PL_regoffs[paren].end = PL_reginput - PL_bostr;
	5107	/dmq: PL_reglastcloseparen = paren; */
	5108	}
	5109	else
	5110	PL_regoffs[paren].end = -1;
	5111	if (cur_eval && cur_eval->u.eval.close_paren &&
	5112	cur_eval->u.eval.close_paren == (U32)ST.me->flags)
	5113	{
	5114	if (ST.count)
	5115	goto fake_end;
	5116	else
	5117	sayNO;
	5118	}
	5119	}
	5120
	5121	PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
	5122	/* NOTREACHED */
	5123
	5124	case CURLYM_B_fail: /* just failed to match a B */
	5125	REGCP_UNWIND(ST.cp);
	5126	if (ST.minmod) {
	5127	I32 max = ARG2(ST.me);
	5128	if (max != REG_INFTY && ST.count == max)
	5129	sayNO;
	5130	goto curlym_do_A; /* try to match a further A */
	5131	}
	5132	/* backtrack one A */
	5133	if (ST.count == ARG1(ST.me) /* min */)
	5134	sayNO;
	5135	ST.count--;
	5136	locinput = HOPc(locinput, -ST.alen);
	5137	goto curlym_do_B; /* try to match B */
	5138
	5139	#undef ST
	5140	#define ST st->u.curly
	5141
	5142	#define CURLY_SETPAREN(paren, success) \
	5143	if (paren) { \
	5144	if (success) { \
	5145	PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
	5146	PL_regoffs[paren].end = locinput - PL_bostr; \
	5147	*PL_reglastcloseparen = paren; \
	5148	} \
	5149	else \
	5150	PL_regoffs[paren].end = -1; \
	5151	}
	5152
	5153	case STAR: /* /AB/ where A is width 1 /
	5154	ST.paren = 0;
	5155	ST.min = 0;
	5156	ST.max = REG_INFTY;
	5157	scan = NEXTOPER(scan);
	5158	goto repeat;
	5159	case PLUS: /* /A+B/ where A is width 1 */
	5160	ST.paren = 0;
	5161	ST.min = 1;
	5162	ST.max = REG_INFTY;
	5163	scan = NEXTOPER(scan);
	5164	goto repeat;
	5165	case CURLYN: /* /(A){m,n}B/ where A is width 1 */
	5166	ST.paren = scan->flags; /* Which paren to set */
	5167	if (ST.paren > PL_regsize)
	5168	PL_regsize = ST.paren;
	5169	if (ST.paren > *PL_reglastparen)
	5170	*PL_reglastparen = ST.paren;
	5171	ST.min = ARG1(scan); /* min to match */
	5172	ST.max = ARG2(scan); /* max to match */
	5173	if (cur_eval && cur_eval->u.eval.close_paren &&
	5174	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5175	ST.min=1;
	5176	ST.max=1;
	5177	}
	5178	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	5179	goto repeat;
	5180	case CURLY: /* /A{m,n}B/ where A is width 1 */
	5181	ST.paren = 0;
	5182	ST.min = ARG1(scan); /* min to match */
	5183	ST.max = ARG2(scan); /* max to match */
	5184	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	5185	repeat:
	5186	/*
	5187	* Lookahead to avoid useless match attempts
	5188	* when we know what character comes next.
	5189	*
	5190	* Used to only do .x and .?x, but now it allows
	5191	* for )'s, ('s and (?{ ... })'s to be in the way
	5192	* of the quantifier and the EXACT-like node. -- japhy
	5193	*/
	5194
	5195	if (ST.min > ST.max) /* XXX make this a compile-time check? */
	5196	sayNO;
	5197	if (HAS_TEXT(next) \|\| JUMPABLE(next)) {
	5198	U8 *s;
	5199	regnode *text_node = next;
	5200
	5201	if (! HAS_TEXT(text_node))
	5202	FIND_NEXT_IMPT(text_node);
	5203
	5204	if (! HAS_TEXT(text_node))
	5205	ST.c1 = ST.c2 = CHRTEST_VOID;
	5206	else {
	5207	if ( PL_regkind[OP(text_node)] != EXACT ) {
	5208	ST.c1 = ST.c2 = CHRTEST_VOID;
	5209	goto assume_ok_easy;
	5210	}
	5211	else
	5212	s = (U8*)STRING(text_node);
	5213
	5214	/* Currently we only get here when
	5215
	5216	PL_rekind[OP(text_node)] == EXACT
	5217
	5218	if this changes back then the macro for IS_TEXT and
	5219	friends need to change. */
	5220	if (!UTF_PATTERN) {
	5221	ST.c1 = *s;
	5222	switch (OP(text_node)) {
	5223	case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
	5224	case EXACTFA:
	5225	case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
	5226	case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
	5227	default: ST.c2 = ST.c1; break;
	5228	}
	5229	}
	5230	else { /* UTF_PATTERN */
	5231	if (IS_TEXTFU(text_node) \|\| IS_TEXTF(text_node)) {
	5232	STRLEN ulen1, ulen2;
	5233	U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
	5234	U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
	5235
	5236	to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
	5237	to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
	5238	#ifdef EBCDIC
	5239	ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
	5240	ckWARN(WARN_UTF8) ?
	5241	0 : UTF8_ALLOW_ANY);
	5242	ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
	5243	ckWARN(WARN_UTF8) ?
	5244	0 : UTF8_ALLOW_ANY);
	5245	#else
	5246	ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
	5247	uniflags);
	5248	ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
	5249	uniflags);
	5250	#endif
	5251	}
	5252	else {
	5253	ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
	5254	uniflags);
	5255	}
	5256	}
	5257	}
	5258	}
	5259	else
	5260	ST.c1 = ST.c2 = CHRTEST_VOID;
	5261	assume_ok_easy:
	5262
	5263	ST.A = scan;
	5264	ST.B = next;
	5265	PL_reginput = locinput;
	5266	if (minmod) {
	5267	minmod = 0;
	5268	if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
	5269	sayNO;
	5270	ST.count = ST.min;
	5271	locinput = PL_reginput;
	5272	REGCP_SET(ST.cp);
	5273	if (ST.c1 == CHRTEST_VOID)
	5274	goto curly_try_B_min;
	5275
	5276	ST.oldloc = locinput;
	5277
	5278	/* set ST.maxpos to the furthest point along the
	5279	* string that could possibly match */
	5280	if (ST.max == REG_INFTY) {
	5281	ST.maxpos = PL_regeol - 1;
	5282	if (utf8_target)
	5283	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	5284	ST.maxpos--;
	5285	}
	5286	else if (utf8_target) {
	5287	int m = ST.max - ST.min;
	5288	for (ST.maxpos = locinput;
	5289	m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
	5290	ST.maxpos += UTF8SKIP(ST.maxpos);
	5291	}
	5292	else {
	5293	ST.maxpos = locinput + ST.max - ST.min;
	5294	if (ST.maxpos >= PL_regeol)
	5295	ST.maxpos = PL_regeol - 1;
	5296	}
	5297	goto curly_try_B_min_known;
	5298
	5299	}
	5300	else {
	5301	ST.count = regrepeat(rex, ST.A, ST.max, depth);
	5302	locinput = PL_reginput;
	5303	if (ST.count < ST.min)
	5304	sayNO;
	5305	if ((ST.count > ST.min)
	5306	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	5307	{
	5308	/* A{m,n} must come at the end of the string, there's
	5309	* no point in backing off ... */
	5310	ST.min = ST.count;
	5311	/* ...except that $ and \Z can match before and after
	5312	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	5313	We may back off by one in this case. */
	5314	if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
	5315	ST.min--;
	5316	}
	5317	REGCP_SET(ST.cp);
	5318	goto curly_try_B_max;
	5319	}
	5320	/* NOTREACHED */
	5321
	5322
	5323	case CURLY_B_min_known_fail:
	5324	/* failed to find B in a non-greedy match where c1,c2 valid */
	5325	if (ST.paren && ST.count)
	5326	PL_regoffs[ST.paren].end = -1;
	5327
	5328	PL_reginput = locinput; /* Could be reset... */
	5329	REGCP_UNWIND(ST.cp);
	5330	/* Couldn't or didn't -- move forward. */
	5331	ST.oldloc = locinput;
	5332	if (utf8_target)
	5333	locinput += UTF8SKIP(locinput);
	5334	else
	5335	locinput++;
	5336	ST.count++;
	5337	curly_try_B_min_known:
	5338	/* find the next place where 'B' could work, then call B */
	5339	{
	5340	int n;
	5341	if (utf8_target) {
	5342	n = (ST.oldloc == locinput) ? 0 : 1;
	5343	if (ST.c1 == ST.c2) {
	5344	STRLEN len;
	5345	/* set n to utf8_distance(oldloc, locinput) */
	5346	while (locinput <= ST.maxpos &&
	5347	utf8n_to_uvchr((U8*)locinput,
	5348	UTF8_MAXBYTES, &len,
	5349	uniflags) != (UV)ST.c1) {
	5350	locinput += len;
	5351	n++;
	5352	}
	5353	}
	5354	else {
	5355	/* set n to utf8_distance(oldloc, locinput) */
	5356	while (locinput <= ST.maxpos) {
	5357	STRLEN len;
	5358	const UV c = utf8n_to_uvchr((U8*)locinput,
	5359	UTF8_MAXBYTES, &len,
	5360	uniflags);
	5361	if (c == (UV)ST.c1 \|\| c == (UV)ST.c2)
	5362	break;
	5363	locinput += len;
	5364	n++;
	5365	}
	5366	}
	5367	}
	5368	else {
	5369	if (ST.c1 == ST.c2) {
	5370	while (locinput <= ST.maxpos &&
	5371	UCHARAT(locinput) != ST.c1)
	5372	locinput++;
	5373	}
	5374	else {
	5375	while (locinput <= ST.maxpos
	5376	&& UCHARAT(locinput) != ST.c1
	5377	&& UCHARAT(locinput) != ST.c2)
	5378	locinput++;
	5379	}
	5380	n = locinput - ST.oldloc;
	5381	}
	5382	if (locinput > ST.maxpos)
	5383	sayNO;
	5384	/* PL_reginput == oldloc now */
	5385	if (n) {
	5386	ST.count += n;
	5387	if (regrepeat(rex, ST.A, n, depth) < n)
	5388	sayNO;
	5389	}
	5390	PL_reginput = locinput;
	5391	CURLY_SETPAREN(ST.paren, ST.count);
	5392	if (cur_eval && cur_eval->u.eval.close_paren &&
	5393	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5394	goto fake_end;
	5395	}
	5396	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
	5397	}
	5398	/* NOTREACHED */
	5399
	5400
	5401	case CURLY_B_min_fail:
	5402	/* failed to find B in a non-greedy match where c1,c2 invalid */
	5403	if (ST.paren && ST.count)
	5404	PL_regoffs[ST.paren].end = -1;
	5405
	5406	REGCP_UNWIND(ST.cp);
	5407	/* failed -- move forward one */
	5408	PL_reginput = locinput;
	5409	if (regrepeat(rex, ST.A, 1, depth)) {
	5410	ST.count++;
	5411	locinput = PL_reginput;
	5412	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	5413	ST.count > 0)) /* count overflow ? */
	5414	{
	5415	curly_try_B_min:
	5416	CURLY_SETPAREN(ST.paren, ST.count);
	5417	if (cur_eval && cur_eval->u.eval.close_paren &&
	5418	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5419	goto fake_end;
	5420	}
	5421	PUSH_STATE_GOTO(CURLY_B_min, ST.B);
	5422	}
	5423	}
	5424	sayNO;
	5425	/* NOTREACHED */
	5426
	5427
	5428	curly_try_B_max:
	5429	/* a successful greedy match: now try to match B */
	5430	if (cur_eval && cur_eval->u.eval.close_paren &&
	5431	cur_eval->u.eval.close_paren == (U32)ST.paren) {
	5432	goto fake_end;
	5433	}
	5434	{
	5435	UV c = 0;
	5436	if (ST.c1 != CHRTEST_VOID)
	5437	c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
	5438	UTF8_MAXBYTES, 0, uniflags)
	5439	: (UV) UCHARAT(PL_reginput);
	5440	/* If it could work, try it. */
	5441	if (ST.c1 == CHRTEST_VOID \|\| c == (UV)ST.c1 \|\| c == (UV)ST.c2) {
	5442	CURLY_SETPAREN(ST.paren, ST.count);
	5443	PUSH_STATE_GOTO(CURLY_B_max, ST.B);
	5444	/* NOTREACHED */
	5445	}
	5446	}
	5447	/* FALL THROUGH */
	5448	case CURLY_B_max_fail:
	5449	/* failed to find B in a greedy match */
	5450	if (ST.paren && ST.count)
	5451	PL_regoffs[ST.paren].end = -1;
	5452
	5453	REGCP_UNWIND(ST.cp);
	5454	/* back up. */
	5455	if (--ST.count < ST.min)
	5456	sayNO;
	5457	PL_reginput = locinput = HOPc(locinput, -1);
	5458	goto curly_try_B_max;
	5459
	5460	#undef ST
	5461
	5462	case END:
	5463	fake_end:
	5464	if (cur_eval) {
	5465	/* we've just finished A in /(??{A})B/; now continue with B */
	5466	I32 tmpix;
	5467	st->u.eval.toggle_reg_flags
	5468	= cur_eval->u.eval.toggle_reg_flags;
	5469	PL_reg_flags ^= st->u.eval.toggle_reg_flags;
	5470
	5471	st->u.eval.prev_rex = rex_sv; /* inner */
	5472	SETREX(rex_sv,cur_eval->u.eval.prev_rex);
	5473	rex = (struct regexp *)SvANY(rex_sv);
	5474	rexi = RXi_GET(rex);
	5475	cur_curlyx = cur_eval->u.eval.prev_curlyx;
	5476	(void)ReREFCNT_inc(rex_sv);
	5477	st->u.eval.cp = regcppush(0); /* Save all the positions. */
	5478
	5479	/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
	5480	PL_reglastparen = &rex->lastparen;
	5481	PL_reglastcloseparen = &rex->lastcloseparen;
	5482
	5483	REGCP_SET(st->u.eval.lastcp);
	5484	PL_reginput = locinput;
	5485
	5486	/* Restore parens of the outer rex without popping the
	5487	* savestack */
	5488	tmpix = PL_savestack_ix;
	5489	PL_savestack_ix = cur_eval->u.eval.lastcp;
	5490	regcppop(rex);
	5491	PL_savestack_ix = tmpix;
	5492
	5493	st->u.eval.prev_eval = cur_eval;
	5494	cur_eval = cur_eval->u.eval.prev_eval;
	5495	DEBUG_EXECUTE_r(
	5496	PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
	5497	REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
	5498	if ( nochange_depth )
	5499	nochange_depth--;
	5500
	5501	PUSH_YES_STATE_GOTO(EVAL_AB,
	5502	st->u.eval.prev_eval->u.eval.B); /* match B */
	5503	}
	5504
	5505	if (locinput < reginfo->till) {
	5506	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
	5507	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	5508	PL_colors[4],
	5509	(long)(locinput - PL_reg_starttry),
	5510	(long)(reginfo->till - PL_reg_starttry),
	5511	PL_colors[5]));
	5512
	5513	sayNO_SILENT; /* Cannot match: too short. */
	5514	}
	5515	PL_reginput = locinput; /* put where regtry can find it */
	5516	sayYES; /* Success! */
	5517
	5518	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	5519	DEBUG_EXECUTE_r(
	5520	PerlIO_printf(Perl_debug_log,
	5521	"%*s %ssubpattern success...%s\n",
	5522	REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
	5523	PL_reginput = locinput; /* put where regtry can find it */
	5524	sayYES; /* Success! */
	5525
	5526	#undef ST
	5527	#define ST st->u.ifmatch
	5528
	5529	case SUSPEND: /* (?>A) */
	5530	ST.wanted = 1;
	5531	PL_reginput = locinput;
	5532	goto do_ifmatch;
	5533
	5534	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	5535	ST.wanted = 0;
	5536	goto ifmatch_trivial_fail_test;
	5537
	5538	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	5539	ST.wanted = 1;
	5540	ifmatch_trivial_fail_test:
	5541	if (scan->flags) {
	5542	char * const s = HOPBACKc(locinput, scan->flags);
	5543	if (!s) {
	5544	/* trivial fail */
	5545	if (logical) {
	5546	logical = 0;
	5547	sw = 1 - cBOOL(ST.wanted);
	5548	}
	5549	else if (ST.wanted)
	5550	sayNO;
	5551	next = scan + ARG(scan);
	5552	if (next == scan)
	5553	next = NULL;
	5554	break;
	5555	}
	5556	PL_reginput = s;
	5557	}
	5558	else
	5559	PL_reginput = locinput;
	5560
	5561	do_ifmatch:
	5562	ST.me = scan;
	5563	ST.logical = logical;
	5564	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	5565
	5566	/* execute body of (?...A) */
	5567	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
	5568	/* NOTREACHED */
	5569
	5570	case IFMATCH_A_fail: /* body of (?...A) failed */
	5571	ST.wanted = !ST.wanted;
	5572	/* FALL THROUGH */
	5573
	5574	case IFMATCH_A: /* body of (?...A) succeeded */
	5575	if (ST.logical) {
	5576	sw = cBOOL(ST.wanted);
	5577	}
	5578	else if (!ST.wanted)
	5579	sayNO;
	5580
	5581	if (OP(ST.me) == SUSPEND)
	5582	locinput = PL_reginput;
	5583	else {
	5584	locinput = PL_reginput = st->locinput;
	5585	nextchr = UCHARAT(locinput);
	5586	}
	5587	scan = ST.me + ARG(ST.me);
	5588	if (scan == ST.me)
	5589	scan = NULL;
	5590	continue; /* execute B */
	5591
	5592	#undef ST
	5593
	5594	case LONGJMP:
	5595	next = scan + ARG(scan);
	5596	if (next == scan)
	5597	next = NULL;
	5598	break;
	5599	case COMMIT:
	5600	reginfo->cutpoint = PL_regeol;
	5601	/* FALLTHROUGH */
	5602	case PRUNE:
	5603	PL_reginput = locinput;
	5604	if (!scan->flags)
	5605	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5606	PUSH_STATE_GOTO(COMMIT_next,next);
	5607	/* NOTREACHED */
	5608	case COMMIT_next_fail:
	5609	no_final = 1;
	5610	/* FALLTHROUGH */
	5611	case OPFAIL:
	5612	sayNO;
	5613	/* NOTREACHED */
	5614
	5615	#define ST st->u.mark
	5616	case MARKPOINT:
	5617	ST.prev_mark = mark_state;
	5618	ST.mark_name = sv_commit = sv_yes_mark
	5619	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5620	mark_state = st;
	5621	ST.mark_loc = PL_reginput = locinput;
	5622	PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
	5623	/* NOTREACHED */
	5624	case MARKPOINT_next:
	5625	mark_state = ST.prev_mark;
	5626	sayYES;
	5627	/* NOTREACHED */
	5628	case MARKPOINT_next_fail:
	5629	if (popmark && sv_eq(ST.mark_name,popmark))
	5630	{
	5631	if (ST.mark_loc > startpoint)
	5632	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5633	popmark = NULL; /* we found our mark */
	5634	sv_commit = ST.mark_name;
	5635
	5636	DEBUG_EXECUTE_r({
	5637	PerlIO_printf(Perl_debug_log,
	5638	"%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
	5639	REPORT_CODE_OFF+depth*2, "",
	5640	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	5641	});
	5642	}
	5643	mark_state = ST.prev_mark;
	5644	sv_yes_mark = mark_state ?
	5645	mark_state->u.mark.mark_name : NULL;
	5646	sayNO;
	5647	/* NOTREACHED */
	5648	case SKIP:
	5649	PL_reginput = locinput;
	5650	if (scan->flags) {
	5651	/* (SKIP) : if we fail we cut here/
	5652	ST.mark_name = NULL;
	5653	ST.mark_loc = locinput;
	5654	PUSH_STATE_GOTO(SKIP_next,next);
	5655	} else {
	5656	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	5657	otherwise do nothing. Meaning we need to scan
	5658	*/
	5659	regmatch_state *cur = mark_state;
	5660	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	5661
	5662	while (cur) {
	5663	if ( sv_eq( cur->u.mark.mark_name,
	5664	find ) )
	5665	{
	5666	ST.mark_name = find;
	5667	PUSH_STATE_GOTO( SKIP_next, next );
	5668	}
	5669	cur = cur->u.mark.prev_mark;
	5670	}
	5671	}
	5672	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	5673	break;
	5674	case SKIP_next_fail:
	5675	if (ST.mark_name) {
	5676	/* (*CUT:NAME) - Set up to search for the name as we
	5677	collapse the stack*/
	5678	popmark = ST.mark_name;
	5679	} else {
	5680	/* (CUT) - No name, we cut here./
	5681	if (ST.mark_loc > startpoint)
	5682	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	5683	/* but we set sv_commit to latest mark_name if there
	5684	is one so they can test to see how things lead to this
	5685	cut */
	5686	if (mark_state)
	5687	sv_commit=mark_state->u.mark.mark_name;
	5688	}
	5689	no_final = 1;
	5690	sayNO;
	5691	/* NOTREACHED */
	5692	#undef ST
	5693	case FOLDCHAR:
	5694	n = ARG(scan);
	5695	if ( n == (U32)what_len_TRICKYFOLD(locinput,utf8_target,ln) ) {
	5696	locinput += ln;
	5697	} else if ( LATIN_SMALL_LETTER_SHARP_S == n && !utf8_target && !UTF_PATTERN ) {
	5698	sayNO;
	5699	} else {
	5700	U8 folded[UTF8_MAXBYTES_CASE+1];
	5701	STRLEN foldlen;
	5702	const char * const l = locinput;
	5703	char *e = PL_regeol;
	5704	to_uni_fold(n, folded, &foldlen);
	5705
	5706	if (! foldEQ_utf8((const char*) folded, 0, foldlen, 1,
	5707	l, &e, 0, utf8_target)) {
	5708	sayNO;
	5709	}
	5710	locinput = e;
	5711	}
	5712	nextchr = UCHARAT(locinput);
	5713	break;
	5714	case LNBREAK:
	5715	if ((n=is_LNBREAK(locinput,utf8_target))) {
	5716	locinput += n;
	5717	nextchr = UCHARAT(locinput);
	5718	} else
	5719	sayNO;
	5720	break;
	5721
	5722	#define CASE_CLASS(nAmE) \
	5723	case nAmE: \
	5724	if (locinput >= PL_regeol) \
	5725	sayNO; \
	5726	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5727	locinput += n; \
	5728	nextchr = UCHARAT(locinput); \
	5729	} else \
	5730	sayNO; \
	5731	break; \
	5732	case N##nAmE: \
	5733	if (locinput >= PL_regeol) \
	5734	sayNO; \
	5735	if ((n=is_##nAmE(locinput,utf8_target))) { \
	5736	sayNO; \
	5737	} else { \
	5738	locinput += UTF8SKIP(locinput); \
	5739	nextchr = UCHARAT(locinput); \
	5740	} \
	5741	break
	5742
	5743	CASE_CLASS(VERTWS);
	5744	CASE_CLASS(HORIZWS);
	5745	#undef CASE_CLASS
	5746
	5747	default:
	5748	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	5749	PTR2UV(scan), OP(scan));
	5750	Perl_croak(aTHX_ "regexp memory corruption");
	5751
	5752	} /* end switch */
	5753
	5754	/* switch break jumps here */
	5755	scan = next; /* prepare to execute the next op and ... */
	5756	continue; /* ... jump back to the top, reusing st */
	5757	/* NOTREACHED */
	5758
	5759	push_yes_state:
	5760	/* push a state that backtracks on success */
	5761	st->u.yes.prev_yes_state = yes_state;
	5762	yes_state = st;
	5763	/* FALL THROUGH */
	5764	push_state:
	5765	/* push a new regex state, then continue at scan */
	5766	{
	5767	regmatch_state *newst;
	5768
	5769	DEBUG_STACK_r({
	5770	regmatch_state *cur = st;
	5771	regmatch_state *curyes = yes_state;
	5772	int curd = depth;
	5773	regmatch_slab *slab = PL_regmatch_slab;
	5774	for (;curd > -1;cur--,curd--) {
	5775	if (cur < SLAB_FIRST(slab)) {
	5776	slab = slab->prev;
	5777	cur = SLAB_LAST(slab);
	5778	}
	5779	PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
	5780	REPORT_CODE_OFF + 2 + depth * 2,"",
	5781	curd, PL_reg_name[cur->resume_state],
	5782	(curyes == cur) ? "yes" : ""
	5783	);
	5784	if (curyes == cur)
	5785	curyes = cur->u.yes.prev_yes_state;
	5786	}
	5787	} else
	5788	DEBUG_STATE_pp("push")
	5789	);
	5790	depth++;
	5791	st->locinput = locinput;
	5792	newst = st+1;
	5793	if (newst > SLAB_LAST(PL_regmatch_slab))
	5794	newst = S_push_slab(aTHX);
	5795	PL_regmatch_state = newst;
	5796
	5797	locinput = PL_reginput;
	5798	nextchr = UCHARAT(locinput);
	5799	st = newst;
	5800	continue;
	5801	/* NOTREACHED */
	5802	}
	5803	}
	5804
	5805	/*
	5806	* We get here only if there's trouble -- normally "case END" is
	5807	* the terminating point.
	5808	*/
	5809	Perl_croak(aTHX_ "corrupted regexp pointers");
	5810	/NOTREACHED/
	5811	sayNO;
	5812
	5813	yes:
	5814	if (yes_state) {
	5815	/* we have successfully completed a subexpression, but we must now
	5816	* pop to the state marked by yes_state and continue from there */
	5817	assert(st != yes_state);
	5818	#ifdef DEBUGGING
	5819	while (st != yes_state) {
	5820	st--;
	5821	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5822	PL_regmatch_slab = PL_regmatch_slab->prev;
	5823	st = SLAB_LAST(PL_regmatch_slab);
	5824	}
	5825	DEBUG_STATE_r({
	5826	if (no_final) {
	5827	DEBUG_STATE_pp("pop (no final)");
	5828	} else {
	5829	DEBUG_STATE_pp("pop (yes)");
	5830	}
	5831	});
	5832	depth--;
	5833	}
	5834	#else
	5835	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	5836	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	5837	{
	5838	/* not in this slab, pop slab */
	5839	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	5840	PL_regmatch_slab = PL_regmatch_slab->prev;
	5841	st = SLAB_LAST(PL_regmatch_slab);
	5842	}
	5843	depth -= (st - yes_state);
	5844	#endif
	5845	st = yes_state;
	5846	yes_state = st->u.yes.prev_yes_state;
	5847	PL_regmatch_state = st;
	5848
	5849	if (no_final) {
	5850	locinput= st->locinput;
	5851	nextchr = UCHARAT(locinput);
	5852	}
	5853	state_num = st->resume_state + no_final;
	5854	goto reenter_switch;
	5855	}
	5856
	5857	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
	5858	PL_colors[4], PL_colors[5]));
	5859
	5860	if (PL_reg_eval_set) {
	5861	/* each successfully executed (?{...}) block does the equivalent of
	5862	* local $^R = do {...}
	5863	* When popping the save stack, all these locals would be undone;
	5864	* bypass this by setting the outermost saved $^R to the latest
	5865	* value */
	5866	if (oreplsv != GvSV(PL_replgv))
	5867	sv_setsv(oreplsv, GvSV(PL_replgv));
	5868	}
	5869	result = 1;
	5870	goto final_exit;
	5871
	5872	no:
	5873	DEBUG_EXECUTE_r(
	5874	PerlIO_printf(Perl_debug_log,
	5875	"%*s %sfailed...%s\n",
	5876	REPORT_CODE_OFF+depth*2, "",
	5877	PL_colors[4], PL_colors[5])
	5878	);
	5879
	5880	no_silent:
	5881	if (no_final) {
	5882	if (yes_state) {
	5883	goto yes;
	5884	} else {
	5885	goto final_exit;
	5886	}
	5887	}
	5888	if (depth) {
	5889	/* there's a previous state to backtrack to */
	5890	st--;
	5891	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	5892	PL_regmatch_slab = PL_regmatch_slab->prev;
	5893	st = SLAB_LAST(PL_regmatch_slab);
	5894	}
	5895	PL_regmatch_state = st;
	5896	locinput= st->locinput;
	5897	nextchr = UCHARAT(locinput);
	5898
	5899	DEBUG_STATE_pp("pop");
	5900	depth--;
	5901	if (yes_state == st)
	5902	yes_state = st->u.yes.prev_yes_state;
	5903
	5904	state_num = st->resume_state + 1; /* failure = success + 1 */
	5905	goto reenter_switch;
	5906	}
	5907	result = 0;
	5908
	5909	final_exit:
	5910	if (rex->intflags & PREGf_VERBARG_SEEN) {
	5911	SV *sv_err = get_sv("REGERROR", 1);
	5912	SV *sv_mrk = get_sv("REGMARK", 1);
	5913	if (result) {
	5914	sv_commit = &PL_sv_no;
	5915	if (!sv_yes_mark)
	5916	sv_yes_mark = &PL_sv_yes;
	5917	} else {
	5918	if (!sv_commit)
	5919	sv_commit = &PL_sv_yes;
	5920	sv_yes_mark = &PL_sv_no;
	5921	}
	5922	sv_setsv(sv_err, sv_commit);
	5923	sv_setsv(sv_mrk, sv_yes_mark);
	5924	}
	5925
	5926	/* clean up; in particular, free all slabs above current one */
	5927	LEAVE_SCOPE(oldsave);
	5928
	5929	return result;
	5930	}
	5931
	5932	/*
	5933	- regrepeat - repeatedly match something simple, report how many
	5934	*/
	5935	/*
	5936	* [This routine now assumes that it will only match on things of length 1.
	5937	* That was true before, but now we assume scan - reginput is the count,
	5938	* rather than incrementing count on every character. [Er, except utf8.]]
	5939	*/
	5940	STATIC I32
	5941	S_regrepeat(pTHX_ const regexp prog, const regnode p, I32 max, int depth)
	5942	{
	5943	dVAR;
	5944	register char *scan;
	5945	register I32 c;
	5946	register char *loceol = PL_regeol;
	5947	register I32 hardcount = 0;
	5948	register bool utf8_target = PL_reg_match_utf8;
	5949	UV utf8_flags;
	5950	#ifndef DEBUGGING
	5951	PERL_UNUSED_ARG(depth);
	5952	#endif
	5953
	5954	PERL_ARGS_ASSERT_REGREPEAT;
	5955
	5956	scan = PL_reginput;
	5957	if (max == REG_INFTY)
	5958	max = I32_MAX;
	5959	else if (max < loceol - scan)
	5960	loceol = scan + max;
	5961	switch (OP(p)) {
	5962	case REG_ANY:
	5963	if (utf8_target) {
	5964	loceol = PL_regeol;
	5965	while (scan < loceol && hardcount < max && *scan != '\n') {
	5966	scan += UTF8SKIP(scan);
	5967	hardcount++;
	5968	}
	5969	} else {
	5970	while (scan < loceol && *scan != '\n')
	5971	scan++;
	5972	}
	5973	break;
	5974	case SANY:
	5975	if (utf8_target) {
	5976	loceol = PL_regeol;
	5977	while (scan < loceol && hardcount < max) {
	5978	scan += UTF8SKIP(scan);
	5979	hardcount++;
	5980	}
	5981	}
	5982	else
	5983	scan = loceol;
	5984	break;
	5985	case CANY:
	5986	scan = loceol;
	5987	break;
	5988	case EXACT:
	5989	/* To get here, EXACTish nodes must have byte length == 1. That
	5990	* means they match only characters in the string that can be expressed
	5991	* as a single byte. For non-utf8 strings, that means a simple match.
	5992	* For utf8 strings, the character matched must be an invariant, or
	5993	* downgradable to a single byte. The pattern's utf8ness is
	5994	* irrelevant, as since it's a single byte, it either isn't utf8, or if
	5995	* it is, it's an invariant */
	5996
	5997	c = (U8)*STRING(p);
	5998	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	5999
	6000	if (! utf8_target \|\| UNI_IS_INVARIANT(c)) {
	6001	while (scan < loceol && UCHARAT(scan) == c) {
	6002	scan++;
	6003	}
	6004	}
	6005	else {
	6006
	6007	/* Here, the string is utf8, and the pattern char is different
	6008	* in utf8 than not, so can't compare them directly. Outside the
	6009	* loop, find find the two utf8 bytes that represent c, and then
	6010	* look for those in sequence in the utf8 string */
	6011	U8 high = UTF8_TWO_BYTE_HI(c);
	6012	U8 low = UTF8_TWO_BYTE_LO(c);
	6013	loceol = PL_regeol;
	6014
	6015	while (hardcount < max
	6016	&& scan + 1 < loceol
	6017	&& UCHARAT(scan) == high
	6018	&& UCHARAT(scan + 1) == low)
	6019	{
	6020	scan += 2;
	6021	hardcount++;
	6022	}
	6023	}
	6024	break;
	6025	case EXACTFA:
	6026	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6027	goto do_exactf;
	6028
	6029	case EXACTFL:
	6030	PL_reg_flags \|= RF_tainted;
	6031	utf8_flags = FOLDEQ_UTF8_LOCALE;
	6032	goto do_exactf;
	6033
	6034	case EXACTF:
	6035	case EXACTFU:
	6036	utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	6037
	6038	/* The comments for the EXACT case above apply as well to these fold
	6039	* ones */
	6040
	6041	do_exactf:
	6042	c = (U8)*STRING(p);
	6043	assert(! UTF_PATTERN \|\| UNI_IS_INVARIANT(c));
	6044
	6045	if (utf8_target) { /* Use full Unicode fold matching */
	6046	char *tmpeol = loceol;
	6047	while (hardcount < max
	6048	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	6049	STRING(p), NULL, 1, cBOOL(UTF_PATTERN), utf8_flags))
	6050	{
	6051	scan = tmpeol;
	6052	tmpeol = loceol;
	6053	hardcount++;
	6054	}
	6055
	6056	/* XXX Note that the above handles properly the German sharp s in
	6057	* the pattern matching ss in the string. But it doesn't handle
	6058	* properly cases where the string contains say 'LIGATURE ff' and
	6059	* the pattern is 'f+'. This would require, say, a new function or
	6060	* revised interface to foldEQ_utf8(), in which the maximum number
	6061	* of characters to match could be passed and it would return how
	6062	* many actually did. This is just one of many cases where
	6063	* multi-char folds don't work properly, and so the fix is being
	6064	* deferred */
	6065	}
	6066	else {
	6067	U8 folded;
	6068
	6069	/* Here, the string isn't utf8 and c is a single byte; and either
	6070	* the pattern isn't utf8 or c is an invariant, so its utf8ness
	6071	* doesn't affect c. Can just do simple comparisons for exact or
	6072	* fold matching. */
	6073	switch (OP(p)) {
	6074	case EXACTF: folded = PL_fold[c]; break;
	6075	case EXACTFA:
	6076	case EXACTFU: folded = PL_fold_latin1[c]; break;
	6077	case EXACTFL: folded = PL_fold_locale[c]; break;
	6078	default: Perl_croak(aTHX_ "panic: Unexpected op %u", OP(p));
	6079	}
	6080	while (scan < loceol &&
	6081	(UCHARAT(scan) == c \|\| UCHARAT(scan) == folded))
	6082	{
	6083	scan++;
	6084	}
	6085	}
	6086	break;
	6087	case ANYOFV:
	6088	case ANYOF:
	6089	if (utf8_target \|\| OP(p) == ANYOFV) {
	6090	STRLEN inclasslen;
	6091	loceol = PL_regeol;
	6092	inclasslen = loceol - scan;
	6093	while (hardcount < max
	6094	&& ((inclasslen = loceol - scan) > 0)
	6095	&& reginclass(prog, p, (U8*)scan, &inclasslen, utf8_target))
	6096	{
	6097	scan += inclasslen;
	6098	hardcount++;
	6099	}
	6100	} else {
	6101	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
	6102	scan++;
	6103	}
	6104	break;
	6105	case ALNUMU:
	6106	if (utf8_target) {
	6107	utf8_wordchar:
	6108	loceol = PL_regeol;
	6109	LOAD_UTF8_CHARCLASS_ALNUM();
	6110	while (hardcount < max && scan < loceol &&
	6111	swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6112	{
	6113	scan += UTF8SKIP(scan);
	6114	hardcount++;
	6115	}
	6116	} else {
	6117	while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
	6118	scan++;
	6119	}
	6120	}
	6121	break;
	6122	case ALNUM:
	6123	if (utf8_target)
	6124	goto utf8_wordchar;
	6125	while (scan < loceol && isALNUM((U8) *scan)) {
	6126	scan++;
	6127	}
	6128	break;
	6129	case ALNUMA:
	6130	while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
	6131	scan++;
	6132	}
	6133	break;
	6134	case ALNUML:
	6135	PL_reg_flags \|= RF_tainted;
	6136	if (utf8_target) {
	6137	loceol = PL_regeol;
	6138	while (hardcount < max && scan < loceol &&
	6139	isALNUM_LC_utf8((U8*)scan)) {
	6140	scan += UTF8SKIP(scan);
	6141	hardcount++;
	6142	}
	6143	} else {
	6144	while (scan < loceol && isALNUM_LC(*scan))
	6145	scan++;
	6146	}
	6147	break;
	6148	case NALNUMU:
	6149	if (utf8_target) {
	6150
	6151	utf8_Nwordchar:
	6152
	6153	loceol = PL_regeol;
	6154	LOAD_UTF8_CHARCLASS_ALNUM();
	6155	while (hardcount < max && scan < loceol &&
	6156	! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
	6157	{
	6158	scan += UTF8SKIP(scan);
	6159	hardcount++;
	6160	}
	6161	} else {
	6162	while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
	6163	scan++;
	6164	}
	6165	}
	6166	break;
	6167	case NALNUM:
	6168	if (utf8_target)
	6169	goto utf8_Nwordchar;
	6170	while (scan < loceol && ! isALNUM((U8) *scan)) {
	6171	scan++;
	6172	}
	6173	break;
	6174	case NALNUMA:
	6175	if (utf8_target) {
	6176	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6177	scan += UTF8SKIP(scan);
	6178	}
	6179	}
	6180	else {
	6181	while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
	6182	scan++;
	6183	}
	6184	}
	6185	break;
	6186	case NALNUML:
	6187	PL_reg_flags \|= RF_tainted;
	6188	if (utf8_target) {
	6189	loceol = PL_regeol;
	6190	while (hardcount < max && scan < loceol &&
	6191	!isALNUM_LC_utf8((U8*)scan)) {
	6192	scan += UTF8SKIP(scan);
	6193	hardcount++;
	6194	}
	6195	} else {
	6196	while (scan < loceol && !isALNUM_LC(*scan))
	6197	scan++;
	6198	}
	6199	break;
	6200	case SPACEU:
	6201	if (utf8_target) {
	6202
	6203	utf8_space:
	6204
	6205	loceol = PL_regeol;
	6206	LOAD_UTF8_CHARCLASS_SPACE();
	6207	while (hardcount < max && scan < loceol &&
	6208	(*scan == ' ' \|\|
	6209	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6210	{
	6211	scan += UTF8SKIP(scan);
	6212	hardcount++;
	6213	}
	6214	break;
	6215	}
	6216	else {
	6217	while (scan < loceol && isSPACE_L1((U8) *scan)) {
	6218	scan++;
	6219	}
	6220	break;
	6221	}
	6222	case SPACE:
	6223	if (utf8_target)
	6224	goto utf8_space;
	6225
	6226	while (scan < loceol && isSPACE((U8) *scan)) {
	6227	scan++;
	6228	}
	6229	break;
	6230	case SPACEA:
	6231	while (scan < loceol && isSPACE_A((U8) *scan)) {
	6232	scan++;
	6233	}
	6234	break;
	6235	case SPACEL:
	6236	PL_reg_flags \|= RF_tainted;
	6237	if (utf8_target) {
	6238	loceol = PL_regeol;
	6239	while (hardcount < max && scan < loceol &&
	6240	isSPACE_LC_utf8((U8*)scan)) {
	6241	scan += UTF8SKIP(scan);
	6242	hardcount++;
	6243	}
	6244	} else {
	6245	while (scan < loceol && isSPACE_LC(*scan))
	6246	scan++;
	6247	}
	6248	break;
	6249	case NSPACEU:
	6250	if (utf8_target) {
	6251
	6252	utf8_Nspace:
	6253
	6254	loceol = PL_regeol;
	6255	LOAD_UTF8_CHARCLASS_SPACE();
	6256	while (hardcount < max && scan < loceol &&
	6257	! (*scan == ' ' \|\|
	6258	swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
	6259	{
	6260	scan += UTF8SKIP(scan);
	6261	hardcount++;
	6262	}
	6263	break;
	6264	}
	6265	else {
	6266	while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
	6267	scan++;
	6268	}
	6269	}
	6270	break;
	6271	case NSPACE:
	6272	if (utf8_target)
	6273	goto utf8_Nspace;
	6274
	6275	while (scan < loceol && ! isSPACE((U8) *scan)) {
	6276	scan++;
	6277	}
	6278	break;
	6279	case NSPACEA:
	6280	if (utf8_target) {
	6281	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6282	scan += UTF8SKIP(scan);
	6283	}
	6284	}
	6285	else {
	6286	while (scan < loceol && ! isSPACE_A((U8) *scan)) {
	6287	scan++;
	6288	}
	6289	}
	6290	break;
	6291	case NSPACEL:
	6292	PL_reg_flags \|= RF_tainted;
	6293	if (utf8_target) {
	6294	loceol = PL_regeol;
	6295	while (hardcount < max && scan < loceol &&
	6296	!isSPACE_LC_utf8((U8*)scan)) {
	6297	scan += UTF8SKIP(scan);
	6298	hardcount++;
	6299	}
	6300	} else {
	6301	while (scan < loceol && !isSPACE_LC(*scan))
	6302	scan++;
	6303	}
	6304	break;
	6305	case DIGIT:
	6306	if (utf8_target) {
	6307	loceol = PL_regeol;
	6308	LOAD_UTF8_CHARCLASS_DIGIT();
	6309	while (hardcount < max && scan < loceol &&
	6310	swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6311	scan += UTF8SKIP(scan);
	6312	hardcount++;
	6313	}
	6314	} else {
	6315	while (scan < loceol && isDIGIT(*scan))
	6316	scan++;
	6317	}
	6318	break;
	6319	case DIGITA:
	6320	while (scan < loceol && isDIGIT_A((U8) *scan)) {
	6321	scan++;
	6322	}
	6323	break;
	6324	case DIGITL:
	6325	PL_reg_flags \|= RF_tainted;
	6326	if (utf8_target) {
	6327	loceol = PL_regeol;
	6328	while (hardcount < max && scan < loceol &&
	6329	isDIGIT_LC_utf8((U8*)scan)) {
	6330	scan += UTF8SKIP(scan);
	6331	hardcount++;
	6332	}
	6333	} else {
	6334	while (scan < loceol && isDIGIT_LC(*scan))
	6335	scan++;
	6336	}
	6337	break;
	6338	case NDIGIT:
	6339	if (utf8_target) {
	6340	loceol = PL_regeol;
	6341	LOAD_UTF8_CHARCLASS_DIGIT();
	6342	while (hardcount < max && scan < loceol &&
	6343	!swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
	6344	scan += UTF8SKIP(scan);
	6345	hardcount++;
	6346	}
	6347	} else {
	6348	while (scan < loceol && !isDIGIT(*scan))
	6349	scan++;
	6350	}
	6351	break;
	6352	case NDIGITA:
	6353	if (utf8_target) {
	6354	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6355	scan += UTF8SKIP(scan);
	6356	}
	6357	}
	6358	else {
	6359	while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
	6360	scan++;
	6361	}
	6362	}
	6363	break;
	6364	case NDIGITL:
	6365	PL_reg_flags \|= RF_tainted;
	6366	if (utf8_target) {
	6367	loceol = PL_regeol;
	6368	while (hardcount < max && scan < loceol &&
	6369	!isDIGIT_LC_utf8((U8*)scan)) {
	6370	scan += UTF8SKIP(scan);
	6371	hardcount++;
	6372	}
	6373	} else {
	6374	while (scan < loceol && !isDIGIT_LC(*scan))
	6375	scan++;
	6376	}
	6377	break;
	6378	case LNBREAK:
	6379	if (utf8_target) {
	6380	loceol = PL_regeol;
	6381	while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
	6382	scan += c;
	6383	hardcount++;
	6384	}
	6385	} else {
	6386	/*
	6387	LNBREAK can match two latin chars, which is ok,
	6388	because we have a null terminated string, but we
	6389	have to use hardcount in this situation
	6390	*/
	6391	while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
	6392	scan+=c;
	6393	hardcount++;
	6394	}
	6395	}
	6396	break;
	6397	case HORIZWS:
	6398	if (utf8_target) {
	6399	loceol = PL_regeol;
	6400	while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
	6401	scan += c;
	6402	hardcount++;
	6403	}
	6404	} else {
	6405	while (scan < loceol && is_HORIZWS_latin1(scan))
	6406	scan++;
	6407	}
	6408	break;
	6409	case NHORIZWS:
	6410	if (utf8_target) {
	6411	loceol = PL_regeol;
	6412	while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
	6413	scan += UTF8SKIP(scan);
	6414	hardcount++;
	6415	}
	6416	} else {
	6417	while (scan < loceol && !is_HORIZWS_latin1(scan))
	6418	scan++;
	6419
	6420	}
	6421	break;
	6422	case VERTWS:
	6423	if (utf8_target) {
	6424	loceol = PL_regeol;
	6425	while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
	6426	scan += c;
	6427	hardcount++;
	6428	}
	6429	} else {
	6430	while (scan < loceol && is_VERTWS_latin1(scan))
	6431	scan++;
	6432
	6433	}
	6434	break;
	6435	case NVERTWS:
	6436	if (utf8_target) {
	6437	loceol = PL_regeol;
	6438	while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
	6439	scan += UTF8SKIP(scan);
	6440	hardcount++;
	6441	}
	6442	} else {
	6443	while (scan < loceol && !is_VERTWS_latin1(scan))
	6444	scan++;
	6445
	6446	}
	6447	break;
	6448
	6449	default: /* Called on something of 0 width. */
	6450	break; /* So match right here or not at all. */
	6451	}
	6452
	6453	if (hardcount)
	6454	c = hardcount;
	6455	else
	6456	c = scan - PL_reginput;
	6457	PL_reginput = scan;
	6458
	6459	DEBUG_r({
	6460	GET_RE_DEBUG_FLAGS_DECL;
	6461	DEBUG_EXECUTE_r({
	6462	SV * const prop = sv_newmortal();
	6463	regprop(prog, prop, p);
	6464	PerlIO_printf(Perl_debug_log,
	6465	"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
	6466	REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
	6467	});
	6468	});
	6469
	6470	return(c);
	6471	}
	6472
	6473
	6474	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	6475	/*
	6476	- regclass_swash - prepare the utf8 swash
	6477	*/
	6478
	6479	SV *
	6480	Perl_regclass_swash(pTHX_ const regexp prog, register const regnode node, bool doinit, SV listsvp, SV altsvp)
	6481	{
	6482	dVAR;
	6483	SV *sw = NULL;
	6484	SV *si = NULL;
	6485	SV *alt = NULL;
	6486	RXi_GET_DECL(prog,progi);
	6487	const struct reg_data * const data = prog ? progi->data : NULL;
	6488
	6489	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	6490
	6491	assert(ANYOF_NONBITMAP(node));
	6492
	6493	if (data && data->count) {
	6494	const U32 n = ARG(node);
	6495
	6496	if (data->what[n] == 's') {
	6497	SV * const rv = MUTABLE_SV(data->data[n]);
	6498	AV * const av = MUTABLE_AV(SvRV(rv));
	6499	SV **const ary = AvARRAY(av);
	6500	SV a, b;
	6501
	6502	/* See the end of regcomp.c:S_regclass() for
	6503	* documentation of these array elements. */
	6504
	6505	si = *ary;
	6506	a = SvROK(ary[1]) ? &ary[1] : NULL;
	6507	b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
	6508
	6509	if (a)
	6510	sw = *a;
	6511	else if (si && doinit) {
	6512	sw = swash_init("utf8", "", si, 1, 0);
	6513	(void)av_store(av, 1, sw);
	6514	}
	6515	if (b)
	6516	alt = *b;
	6517	}
	6518	}
	6519
	6520	if (listsvp)
	6521	*listsvp = si;
	6522	if (altsvp)
	6523	*altsvp = alt;
	6524
	6525	return sw;
	6526	}
	6527	#endif
	6528
	6529	/*
	6530	- reginclass - determine if a character falls into a character class
	6531
	6532	n is the ANYOF regnode
	6533	p is the target string
	6534	lenp is pointer to the maximum number of bytes of how far to go in p
	6535	(This is assumed wthout checking to always be at least the current
	6536	character's size)
	6537	utf8_target tells whether p is in UTF-8.
	6538
	6539	Returns true if matched; false otherwise. If lenp is not NULL, on return
	6540	from a successful match, the value it points to will be updated to how many
	6541	bytes in p were matched. If there was no match, the value is undefined,
	6542	possibly changed from the input.
	6543
	6544	Note that this can be a synthetic start class, a combination of various
	6545	nodes, so things you think might be mutually exclusive, such as locale,
	6546	aren't. It can match both locale and non-locale
	6547
	6548	*/
	6549
	6550	STATIC bool
	6551	S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target)
	6552	{
	6553	dVAR;
	6554	const char flags = ANYOF_FLAGS(n);
	6555	bool match = FALSE;
	6556	UV c = *p;
	6557	STRLEN c_len = 0;
	6558	STRLEN maxlen;
	6559
	6560	PERL_ARGS_ASSERT_REGINCLASS;
	6561
	6562	/* If c is not already the code point, get it */
	6563	if (utf8_target && !UTF8_IS_INVARIANT(c)) {
	6564	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
	6565	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	6566	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	6567	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	6568	* UTF8_ALLOW_FFFF */
	6569	if (c_len == (STRLEN)-1)
	6570	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	6571	}
	6572	else {
	6573	c_len = 1;
	6574	}
	6575
	6576	/* Use passed in max length, or one character if none passed in or less
	6577	* than one character. And assume will match just one character. This is
	6578	* overwritten later if matched more. */
	6579	if (lenp) {
	6580	maxlen = (lenp > c_len) ? lenp : c_len;
	6581	*lenp = c_len;
	6582
	6583	}
	6584	else {
	6585	maxlen = c_len;
	6586	}
	6587
	6588	/* If this character is potentially in the bitmap, check it */
	6589	if (c < 256) {
	6590	if (ANYOF_BITMAP_TEST(n, c))
	6591	match = TRUE;
	6592	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
	6593	&& ! utf8_target
	6594	&& ! isASCII(c))
	6595	{
	6596	match = TRUE;
	6597	}
	6598
	6599	else if (flags & ANYOF_LOCALE) {
	6600	PL_reg_flags \|= RF_tainted;
	6601
	6602	if ((flags & ANYOF_LOC_NONBITMAP_FOLD)
	6603	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	6604	{
	6605	match = TRUE;
	6606	}
	6607	else if (ANYOF_CLASS_TEST_ANY_SET(n) &&
	6608	((ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) \|\|
	6609	(ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) \|\|
	6610	(ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) \|\|
	6611	(ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) \|\|
	6612	(ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) \|\|
	6613	(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) \|\|
	6614	(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) \|\|
	6615	(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) \|\|
	6616	(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) \|\|
	6617	(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) \|\|
	6618	(ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) \|\|
	6619	(ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) \|\|
	6620	(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) \|\|
	6621	(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) \|\|
	6622	(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) \|\|
	6623	(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) \|\|
	6624	(ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) \|\|
	6625	(ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) \|\|
	6626	(ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) \|\|
	6627	(ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) \|\|
	6628	(ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) \|\|
	6629	(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) \|\|
	6630	(ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) \|\|
	6631	(ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) \|\|
	6632	(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) \|\|
	6633	(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) \|\|
	6634	(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) \|\|
	6635	(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) \|\|
	6636	(ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) \|\|
	6637	(ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
	6638	) /* How's that for a conditional? */
	6639	) {
	6640	match = TRUE;
	6641	}
	6642	}
	6643	}
	6644
	6645	/* If the bitmap didn't (or couldn't) match, and something outside the
	6646	* bitmap could match, try that. Locale nodes specifiy completely the
	6647	* behavior of code points in the bit map (otherwise, a utf8 target would
	6648	* cause them to be treated as Unicode and not locale), except in
	6649	* the very unlikely event when this node is a synthetic start class, which
	6650	* could be a combination of locale and non-locale nodes. So allow locale
	6651	* to match for the synthetic start class, which will give a false
	6652	* positive that will be resolved when the match is done again as not part
	6653	* of the synthetic start class */
	6654	if (!match) {
	6655	if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
	6656	match = TRUE; /* Everything above 255 matches */
	6657	}
	6658	else if (ANYOF_NONBITMAP(n)
	6659	&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
	6660	\|\| (utf8_target
	6661	&& (c >=256
	6662	\|\| (! (flags & ANYOF_LOCALE))
	6663	\|\| (flags & ANYOF_IS_SYNTHETIC)))))
	6664	{
	6665	AV *av;
	6666	SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
	6667
	6668	if (sw) {
	6669	U8 * utf8_p;
	6670	if (utf8_target) {
	6671	utf8_p = (U8 *) p;
	6672	} else {
	6673
	6674	/* Not utf8. Convert as much of the string as available up
	6675	* to the limit of how far the (single) character in the
	6676	* pattern can possibly match (no need to go further). If
	6677	* the node is a straight ANYOF or not folding, it can't
	6678	* match more than one. Otherwise, It can match up to how
	6679	* far a single char can fold to. Since not utf8, each
	6680	* character is a single byte, so the max it can be in
	6681	* bytes is the same as the max it can be in characters */
	6682	STRLEN len = (OP(n) == ANYOF
	6683	\|\| ! (flags & ANYOF_LOC_NONBITMAP_FOLD))
	6684	? 1
	6685	: (maxlen < UTF8_MAX_FOLD_CHAR_EXPAND)
	6686	? maxlen
	6687	: UTF8_MAX_FOLD_CHAR_EXPAND;
	6688	utf8_p = bytes_to_utf8(p, &len);
	6689	}
	6690
	6691	if (swash_fetch(sw, utf8_p, TRUE))
	6692	match = TRUE;
	6693	else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
	6694
	6695	/* Here, we need to test if the fold of the target string
	6696	* matches. The non-multi char folds have all been moved to
	6697	* the compilation phase, and the multi-char folds have
	6698	* been stored by regcomp into 'av'; we linearly check to
	6699	* see if any match the target string (folded). We know
	6700	* that the originals were each one character, but we don't
	6701	* currently know how many characters/bytes each folded to,
	6702	* except we do know that there are small limits imposed by
	6703	* Unicode. XXX A performance enhancement would be to have
	6704	* regcomp.c store the max number of chars/bytes that are
	6705	* in an av entry, as, say the 0th element. Even better
	6706	* would be to have a hash of the few characters that can
	6707	* start a multi-char fold to the max number of chars of
	6708	* those folds.
	6709	*
	6710	* If there is a match, we will need to advance (if lenp is
	6711	* specified) the match pointer in the target string. But
	6712	* what we are comparing here isn't that string directly,
	6713	* but its fold, whose length may differ from the original.
	6714	* As we go along in constructing the fold, therefore, we
	6715	* create a map so that we know how many bytes in the
	6716	* source to advance given that we have matched a certain
	6717	* number of bytes in the fold. This map is stored in
	6718	* 'map_fold_len_back'. Let n mean the number of bytes in
	6719	* the fold of the first character that we are folding.
	6720	* Then map_fold_len_back[n] is set to the number of bytes
	6721	* in that first character. Similarly let m be the
	6722	* corresponding number for the second character to be
	6723	* folded. Then map_fold_len_back[n+m] is set to the
	6724	* number of bytes occupied by the first two source
	6725	* characters. ... */
	6726	U8 map_fold_len_back[UTF8_MAXBYTES_CASE+1] = { 0 };
	6727	U8 folded[UTF8_MAXBYTES_CASE+1];
	6728	STRLEN foldlen = 0; /* num bytes in fold of 1st char */
	6729	STRLEN total_foldlen = 0; /* num bytes in fold of all
	6730	chars */
	6731
	6732	if (OP(n) == ANYOF \|\| maxlen == 1 \|\| ! lenp \|\| ! av) {
	6733
	6734	/* Here, only need to fold the first char of the target
	6735	* string. It the source wasn't utf8, is 1 byte long */
	6736	to_utf8_fold(utf8_p, folded, &foldlen);
	6737	total_foldlen = foldlen;
	6738	map_fold_len_back[foldlen] = (utf8_target)
	6739	? UTF8SKIP(utf8_p)
	6740	: 1;
	6741	}
	6742	else {
	6743
	6744	/* Here, need to fold more than the first char. Do so
	6745	* up to the limits */
	6746	U8* source_ptr = utf8_p; /* The source for the fold
	6747	is the regex target
	6748	string */
	6749	U8* folded_ptr = folded;
	6750	U8* e = utf8_p + maxlen; /* Can't go beyond last
	6751	available byte in the
	6752	target string */
	6753	U8 i;
	6754	for (i = 0;
	6755	i < UTF8_MAX_FOLD_CHAR_EXPAND && source_ptr < e;
	6756	i++)
	6757	{
	6758
	6759	/* Fold the next character */
	6760	U8 this_char_folded[UTF8_MAXBYTES_CASE+1];
	6761	STRLEN this_char_foldlen;
	6762	to_utf8_fold(source_ptr,
	6763	this_char_folded,
	6764	&this_char_foldlen);
	6765
	6766	/* Bail if it would exceed the byte limit for
	6767	* folding a single char. */
	6768	if (this_char_foldlen + folded_ptr - folded >
	6769	UTF8_MAXBYTES_CASE)
	6770	{
	6771	break;
	6772	}
	6773
	6774	/* Add the fold of this character */
	6775	Copy(this_char_folded,
	6776	folded_ptr,
	6777	this_char_foldlen,
	6778	U8);
	6779	source_ptr += UTF8SKIP(source_ptr);
	6780	folded_ptr += this_char_foldlen;
	6781	total_foldlen = folded_ptr - folded;
	6782
	6783	/* Create map from the number of bytes in the fold
	6784	* back to the number of bytes in the source. If
	6785	* the source isn't utf8, the byte count is just
	6786	* the number of characters so far */
	6787	map_fold_len_back[total_foldlen]
	6788	= (utf8_target)
	6789	? source_ptr - utf8_p
	6790	: i + 1;
	6791	}
	6792	*folded_ptr = '\0';
	6793	}
	6794
	6795
	6796	/* Do the linear search to see if the fold is in the list
	6797	* of multi-char folds. */
	6798	if (av) {
	6799	I32 i;
	6800	for (i = 0; i <= av_len(av); i++) {
	6801	SV* const sv = *av_fetch(av, i, FALSE);
	6802	STRLEN len;
	6803	const char * const s = SvPV_const(sv, len);
	6804
	6805	if (len <= total_foldlen
	6806	&& memEQ(s, (char*)folded, len)
	6807
	6808	/* If 0, means matched a partial char. See
	6809	* [perl #90536] */
	6810	&& map_fold_len_back[len])
	6811	{
	6812
	6813	/* Advance the target string ptr to account for
	6814	* this fold, but have to translate from the
	6815	* folded length to the corresponding source
	6816	* length. */
	6817	if (lenp) {
	6818	*lenp = map_fold_len_back[len];
	6819	}
	6820	match = TRUE;
	6821	break;
	6822	}
	6823	}
	6824	}
	6825	}
	6826
	6827	/* If we allocated a string above, free it */
	6828	if (! utf8_target) Safefree(utf8_p);
	6829	}
	6830	}
	6831	}
	6832
	6833	return (flags & ANYOF_INVERT) ? !match : match;
	6834	}
	6835
	6836	STATIC U8 *
	6837	S_reghop3(U8 s, I32 off, const U8 lim)
	6838	{
	6839	/* return the position 'off' UTF-8 characters away from 's', forward if
	6840	* 'off' >= 0, backwards if negative. But don't go outside of position
	6841	* 'lim', which better be < s if off < 0 */
	6842
	6843	dVAR;
	6844
	6845	PERL_ARGS_ASSERT_REGHOP3;
	6846
	6847	if (off >= 0) {
	6848	while (off-- && s < lim) {
	6849	/* XXX could check well-formedness here */
	6850	s += UTF8SKIP(s);
	6851	}
	6852	}
	6853	else {
	6854	while (off++ && s > lim) {
	6855	s--;
	6856	if (UTF8_IS_CONTINUED(*s)) {
	6857	while (s > lim && UTF8_IS_CONTINUATION(*s))
	6858	s--;
	6859	}
	6860	/* XXX could check well-formedness here */
	6861	}
	6862	}
	6863	return s;
	6864	}
	6865
	6866	#ifdef XXX_dmq
	6867	/* there are a bunch of places where we use two reghop3's that should
	6868	be replaced with this routine. but since thats not done yet
	6869	we ifdef it out - dmq
	6870	*/
	6871	STATIC U8 *
	6872	S_reghop4(U8 s, I32 off, const U8 llim, const U8* rlim)
	6873	{
	6874	dVAR;
	6875
	6876	PERL_ARGS_ASSERT_REGHOP4;
	6877
	6878	if (off >= 0) {
	6879	while (off-- && s < rlim) {
	6880	/* XXX could check well-formedness here */
	6881	s += UTF8SKIP(s);
	6882	}
	6883	}
	6884	else {
	6885	while (off++ && s > llim) {
	6886	s--;
	6887	if (UTF8_IS_CONTINUED(*s)) {
	6888	while (s > llim && UTF8_IS_CONTINUATION(*s))
	6889	s--;
	6890	}
	6891	/* XXX could check well-formedness here */
	6892	}
	6893	}
	6894	return s;
	6895	}
	6896	#endif
	6897
	6898	STATIC U8 *
	6899	S_reghopmaybe3(U8* s, I32 off, const U8* lim)
	6900	{
	6901	dVAR;
	6902
	6903	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	6904
	6905	if (off >= 0) {
	6906	while (off-- && s < lim) {
	6907	/* XXX could check well-formedness here */
	6908	s += UTF8SKIP(s);
	6909	}
	6910	if (off >= 0)
	6911	return NULL;
	6912	}
	6913	else {
	6914	while (off++ && s > lim) {
	6915	s--;
	6916	if (UTF8_IS_CONTINUED(*s)) {
	6917	while (s > lim && UTF8_IS_CONTINUATION(*s))
	6918	s--;
	6919	}
	6920	/* XXX could check well-formedness here */
	6921	}
	6922	if (off <= 0)
	6923	return NULL;
	6924	}
	6925	return s;
	6926	}
	6927
	6928	static void
	6929	restore_pos(pTHX_ void *arg)
	6930	{
	6931	dVAR;
	6932	regexp * const rex = (regexp *)arg;
	6933	if (PL_reg_eval_set) {
	6934	if (PL_reg_oldsaved) {
	6935	rex->subbeg = PL_reg_oldsaved;
	6936	rex->sublen = PL_reg_oldsavedlen;
	6937	#ifdef PERL_OLD_COPY_ON_WRITE
	6938	rex->saved_copy = PL_nrs;
	6939	#endif
	6940	RXp_MATCH_COPIED_on(rex);
	6941	}
	6942	PL_reg_magic->mg_len = PL_reg_oldpos;
	6943	PL_reg_eval_set = 0;
	6944	PL_curpm = PL_reg_oldcurpm;
	6945	}
	6946	}
	6947
	6948	STATIC void
	6949	S_to_utf8_substr(pTHX_ register regexp *prog)
	6950	{
	6951	int i = 1;
	6952
	6953	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	6954
	6955	do {
	6956	if (prog->substrs->data[i].substr
	6957	&& !prog->substrs->data[i].utf8_substr) {
	6958	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	6959	prog->substrs->data[i].utf8_substr = sv;
	6960	sv_utf8_upgrade(sv);
	6961	if (SvVALID(prog->substrs->data[i].substr)) {
	6962	if (SvTAIL(prog->substrs->data[i].substr)) {
	6963	/* Trim the trailing \n that fbm_compile added last
	6964	time. */
	6965	SvCUR_set(sv, SvCUR(sv) - 1);
	6966	/* Whilst this makes the SV technically "invalid" (as its
	6967	buffer is no longer followed by "\0") when fbm_compile()
	6968	adds the "\n" back, a "\0" is restored. */
	6969	fbm_compile(sv, FBMcf_TAIL);
	6970	} else
	6971	fbm_compile(sv, 0);
	6972	}
	6973	if (prog->substrs->data[i].substr == prog->check_substr)
	6974	prog->check_utf8 = sv;
	6975	}
	6976	} while (i--);
	6977	}
	6978
	6979	STATIC void
	6980	S_to_byte_substr(pTHX_ register regexp *prog)
	6981	{
	6982	dVAR;
	6983	int i = 1;
	6984
	6985	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	6986
	6987	do {
	6988	if (prog->substrs->data[i].utf8_substr
	6989	&& !prog->substrs->data[i].substr) {
	6990	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	6991	if (sv_utf8_downgrade(sv, TRUE)) {
	6992	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	6993	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	6994	/* Trim the trailing \n that fbm_compile added last
	6995	time. */
	6996	SvCUR_set(sv, SvCUR(sv) - 1);
	6997	fbm_compile(sv, FBMcf_TAIL);
	6998	} else
	6999	fbm_compile(sv, 0);
	7000	}
	7001	} else {
	7002	SvREFCNT_dec(sv);
	7003	sv = &PL_sv_undef;
	7004	}
	7005	prog->substrs->data[i].substr = sv;
	7006	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	7007	prog->check_substr = sv;
	7008	}
	7009	} while (i--);
	7010	}
	7011
	7012	/*
	7013	* Local variables:
	7014	* c-indentation-style: bsd
	7015	* c-basic-offset: 4
	7016	* indent-tabs-mode: t
	7017	* End:
	7018	*
	7019	* ex: set ts=8 sts=4 sw=4 noet:
	7020	*/