perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regexec.c
	2	*/
	3
	4	/*
	5	* One Ring to rule them all, One Ring to find them
	6	*
	7	* [p.v of _The Lord of the Rings_, opening poem]
	8	* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
	9	* [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
	10	*/
	11
	12	/* This file contains functions for executing a regular expression. See
	13	* also regcomp.c which funnily enough, contains functions for compiling
	14	* a regular expression.
	15	*
	16	* This file is also copied at build time to ext/re/re_exec.c, where
	17	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	18	* This causes the main functions to be compiled under new names and with
	19	* debugging support added, which makes "use re 'debug'" work.
	20	*/
	21
	22	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	23	* confused with the original package (see point 3 below). Thanks, Henry!
	24	*/
	25
	26	/* Additional note: this code is very heavily munged from Henry's version
	27	* in places. In some spots I've traded clarity for efficiency, so don't
	28	* blame Henry for some of the lack of readability.
	29	*/
	30
	31	/* The names of the functions have been changed from regcomp and
	32	* regexec to pregcomp and pregexec in order to avoid conflicts
	33	* with the POSIX routines of the same names.
	34	*/
	35
	36	#ifdef PERL_EXT_RE_BUILD
	37	#include "re_top.h"
	38	#endif
	39
	40	/*
	41	* pregcomp and pregexec -- regsub and regerror are not used in perl
	42	*
	43	* Copyright (c) 1986 by University of Toronto.
	44	* Written by Henry Spencer. Not derived from licensed software.
	45	*
	46	* Permission is granted to anyone to use this software for any
	47	* purpose on any computer system, and to redistribute it freely,
	48	* subject to the following restrictions:
	49	*
	50	* 1. The author is not responsible for the consequences of use of
	51	* this software, no matter how awful, even if they arise
	52	* from defects in it.
	53	*
	54	* 2. The origin of this software must not be misrepresented, either
	55	* by explicit claim or by omission.
	56	*
	57	* 3. Altered versions must be plainly marked as such, and must not
	58	* be misrepresented as being the original software.
	59	*
	60	**** Alterations to Henry's code are...
	61	****
	62	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	63	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
	64	**** by Larry Wall and others
	65	****
	66	**** You may distribute under the terms of either the GNU General Public
	67	**** License or the Artistic License, as specified in the README file.
	68	*
	69	* Beware that some of this code is subtly aware of the way operator
	70	* precedence is structured in regular expressions. Serious changes in
	71	* regular-expression syntax might require a total rethink.
	72	*/
	73	#include "EXTERN.h"
	74	#define PERL_IN_REGEXEC_C
	75	#include "perl.h"
	76
	77	#ifdef PERL_IN_XSUB_RE
	78	# include "re_comp.h"
	79	#else
	80	# include "regcomp.h"
	81	#endif
	82
	83	#include "invlist_inline.h"
	84	#include "unicode_constants.h"
	85
	86	#define B_ON_NON_UTF8_LOCALE_IS_WRONG \
	87	"Use of \\b{} or \\B{} for non-UTF-8 locale is wrong. Assuming a UTF-8 locale"
	88
	89	static const char utf8_locale_required[] =
	90	"Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale";
	91
	92	#ifdef DEBUGGING
	93	/* At least one required character in the target string is expressible only in
	94	* UTF-8. */
	95	static const char* const non_utf8_target_but_utf8_required
	96	= "Can't match, because target string needs to be in UTF-8\n";
	97	#endif
	98
	99	#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
	100	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "%s", non_utf8_target_but_utf8_required));\
	101	goto target; \
	102	} STMT_END
	103
	104	#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
	105
	106	#ifndef STATIC
	107	#define STATIC static
	108	#endif
	109
	110	/* Valid only if 'c', the character being looke-up, is an invariant under
	111	* UTF-8: it avoids the reginclass call if there are no complications: i.e., if
	112	* everything matchable is straight forward in the bitmap */
	113	#define REGINCLASS(prog,p,c,u) (ANYOF_FLAGS(p) \
	114	? reginclass(prog,p,c,c+1,u) \
	115	: ANYOF_BITMAP_TEST(p,*(c)))
	116
	117	/*
	118	* Forwards.
	119	*/
	120
	121	#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
	122	#define CHR_DIST(a,b) (reginfo->is_utf8_target ? utf8_distance(a,b) : a - b)
	123
	124	#define HOPc(pos,off) \
	125	(char *)(reginfo->is_utf8_target \
	126	? reghop3((U8*)pos, off, \
	127	(U8*)(off >= 0 ? reginfo->strend : reginfo->strbeg)) \
	128	: (U8*)(pos + off))
	129
	130	#define HOPBACKc(pos, off) \
	131	(char*)(reginfo->is_utf8_target \
	132	? reghopmaybe3((U8)pos, (SSize_t)0-off, (U8)(reginfo->strbeg)) \
	133	: (pos - off >= reginfo->strbeg) \
	134	? (U8*)pos - off \
	135	: NULL)
	136
	137	#define HOP3(pos,off,lim) (reginfo->is_utf8_target ? reghop3((U8)(pos), off, (U8)(lim)) : (U8*)(pos + off))
	138	#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
	139
	140	/* lim must be +ve. Returns NULL on overshoot */
	141	#define HOPMAYBE3(pos,off,lim) \
	142	(reginfo->is_utf8_target \
	143	? reghopmaybe3((U8)pos, off, (U8)(lim)) \
	144	: ((U8*)pos + off <= lim) \
	145	? (U8*)pos + off \
	146	: NULL)
	147
	148	/* like HOP3, but limits the result to <= lim even for the non-utf8 case.
	149	* off must be >=0; args should be vars rather than expressions */
	150	#define HOP3lim(pos,off,lim) (reginfo->is_utf8_target \
	151	? reghop3((U8)(pos), off, (U8)(lim)) \
	152	: (U8*)((pos + off) > lim ? lim : (pos + off)))
	153
	154	#define HOP4(pos,off,llim, rlim) (reginfo->is_utf8_target \
	155	? reghop4((U8)(pos), off, (U8)(llim), (U8*)(rlim)) \
	156	: (U8*)(pos + off))
	157	#define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
	158
	159	#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
	160	#define NEXTCHR_IS_EOS (nextchr < 0)
	161
	162	#define SET_nextchr \
	163	nextchr = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
	164
	165	#define SET_locinput(p) \
	166	locinput = (p); \
	167	SET_nextchr
	168
	169
	170	#define LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist) STMT_START { \
	171	if (!swash_ptr) { \
	172	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; \
	173	swash_ptr = _core_swash_init("utf8", property_name, &PL_sv_undef, \
	174	1, 0, invlist, &flags); \
	175	assert(swash_ptr); \
	176	} \
	177	} STMT_END
	178
	179	/* If in debug mode, we test that a known character properly matches */
	180	#ifdef DEBUGGING
	181	# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
	182	property_name, \
	183	invlist, \
	184	utf8_char_in_property) \
	185	LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist); \
	186	assert(swash_fetch(swash_ptr, (U8 *) utf8_char_in_property, TRUE));
	187	#else
	188	# define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr, \
	189	property_name, \
	190	invlist, \
	191	utf8_char_in_property) \
	192	LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist)
	193	#endif
	194
	195	#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS_DEBUG_TEST( \
	196	PL_utf8_swash_ptrs[_CC_WORDCHAR], \
	197	"", \
	198	PL_XPosix_ptrs[_CC_WORDCHAR], \
	199	LATIN_SMALL_LIGATURE_LONG_S_T_UTF8);
	200
	201	#define PLACEHOLDER /* Something for the preprocessor to grab onto */
	202	/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
	203
	204	/* for use after a quantifier and before an EXACT-like node -- japhy */
	205	/* it would be nice to rework regcomp.sym to generate this stuff. sigh
	206	*
	207	* NOTE that nothing that affects backtracking should be in here, specifically
	208	* VERBS must NOT be included. JUMPABLE is used to determine if we can ignore a
	209	* node that is in between two EXACT like nodes when ascertaining what the required
	210	* "follow" character is. This should probably be moved to regex compile time
	211	* although it may be done at run time beause of the REF possibility - more
	212	* investigation required. -- demerphq
	213	*/
	214	#define JUMPABLE(rn) ( \
	215	OP(rn) == OPEN \|\| \
	216	(OP(rn) == CLOSE && \
	217	!EVAL_CLOSE_PAREN_IS(cur_eval,ARG(rn)) ) \|\| \
	218	OP(rn) == EVAL \|\| \
	219	OP(rn) == SUSPEND \|\| OP(rn) == IFMATCH \|\| \
	220	OP(rn) == PLUS \|\| OP(rn) == MINMOD \|\| \
	221	OP(rn) == KEEPS \|\| \
	222	(PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
	223	)
	224	#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
	225
	226	#define HAS_TEXT(rn) ( IS_EXACT(rn) \|\| PL_regkind[OP(rn)] == REF )
	227
	228	#if 0
	229	/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
	230	we don't need this definition. XXX These are now out-of-sync*/
	231	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==REF \|\| OP(rn)==NREF )
	232	#define IS_TEXTF(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFU_SS \|\| OP(rn)==EXACTFA \|\| OP(rn)==EXACTFA_NO_TRIE \|\| OP(rn)==EXACTF \|\| OP(rn)==REFF \|\| OP(rn)==NREFF )
	233	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL \|\| OP(rn)==REFFL \|\| OP(rn)==NREFFL )
	234
	235	#else
	236	/* ... so we use this as its faster. */
	237	#define IS_TEXT(rn) ( OP(rn)==EXACT \|\| OP(rn)==EXACTL )
	238	#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU \|\| OP(rn)==EXACTFLU8 \|\| OP(rn)==EXACTFU_SS \|\| OP(rn) == EXACTFA \|\| OP(rn) == EXACTFA_NO_TRIE)
	239	#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
	240	#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
	241
	242	#endif
	243
	244	/*
	245	Search for mandatory following text node; for lookahead, the text must
	246	follow but for lookbehind (rn->flags != 0) we skip to the next step.
	247	*/
	248	#define FIND_NEXT_IMPT(rn) STMT_START { \
	249	while (JUMPABLE(rn)) { \
	250	const OPCODE type = OP(rn); \
	251	if (type == SUSPEND \|\| PL_regkind[type] == CURLY) \
	252	rn = NEXTOPER(NEXTOPER(rn)); \
	253	else if (type == PLUS) \
	254	rn = NEXTOPER(rn); \
	255	else if (type == IFMATCH) \
	256	rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
	257	else rn += NEXT_OFF(rn); \
	258	} \
	259	} STMT_END
	260
	261	#define SLAB_FIRST(s) (&(s)->states[0])
	262	#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
	263
	264	static void S_setup_eval_state(pTHX_ regmatch_info *const reginfo);
	265	static void S_cleanup_regmatch_info_aux(pTHX_ void *arg);
	266	static regmatch_state * S_push_slab(pTHX);
	267
	268	#define REGCP_PAREN_ELEMS 3
	269	#define REGCP_OTHER_ELEMS 3
	270	#define REGCP_FRAME_ELEMS 1
	271	/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
	272	* are needed for the regexp context stack bookkeeping. */
	273
	274	STATIC CHECKPOINT
	275	S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen)
	276	{
	277	const int retval = PL_savestack_ix;
	278	const int paren_elems_to_push =
	279	(maxopenparen - parenfloor) * REGCP_PAREN_ELEMS;
	280	const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
	281	const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
	282	I32 p;
	283	GET_RE_DEBUG_FLAGS_DECL;
	284
	285	PERL_ARGS_ASSERT_REGCPPUSH;
	286
	287	if (paren_elems_to_push < 0)
	288	Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0, maxopenparen: %i parenfloor: %i REGCP_PAREN_ELEMS: %u",
	289	(int)paren_elems_to_push, (int)maxopenparen,
	290	(int)parenfloor, (unsigned)REGCP_PAREN_ELEMS);
	291
	292	if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
	293	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
	294	" out of range (%lu-%ld)",
	295	total_elems,
	296	(unsigned long)maxopenparen,
	297	(long)parenfloor);
	298
	299	SSGROW(total_elems + REGCP_FRAME_ELEMS);
	300
	301	DEBUG_BUFFERS_r(
	302	if ((int)maxopenparen > (int)parenfloor)
	303	Perl_re_printf( aTHX_
	304	"rex=0x%"UVxf" offs=0x%"UVxf": saving capture indices:\n",
	305	PTR2UV(rex),
	306	PTR2UV(rex->offs)
	307	);
	308	);
	309	for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
	310	/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
	311	SSPUSHIV(rex->offs[p].end);
	312	SSPUSHIV(rex->offs[p].start);
	313	SSPUSHINT(rex->offs[p].start_tmp);
	314	DEBUG_BUFFERS_r(Perl_re_printf( aTHX_
	315	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
	316	(UV)p,
	317	(IV)rex->offs[p].start,
	318	(IV)rex->offs[p].start_tmp,
	319	(IV)rex->offs[p].end
	320	));
	321	}
	322	/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
	323	SSPUSHINT(maxopenparen);
	324	SSPUSHINT(rex->lastparen);
	325	SSPUSHINT(rex->lastcloseparen);
	326	SSPUSHUV(SAVEt_REGCONTEXT \| elems_shifted); /* Magic cookie. */
	327
	328	return retval;
	329	}
	330
	331	/* These are needed since we do not localize EVAL nodes: */
	332	#define REGCP_SET(cp) \
	333	DEBUG_STATE_r( \
	334	Perl_re_exec_indentf( aTHX_ \
	335	"Setting an EVAL scope, savestack=%"IVdf",\n", \
	336	depth, (IV)PL_savestack_ix \
	337	) \
	338	); \
	339	cp = PL_savestack_ix
	340
	341	#define REGCP_UNWIND(cp) \
	342	DEBUG_STATE_r( \
	343	if (cp != PL_savestack_ix) \
	344	Perl_re_exec_indentf( aTHX_ \
	345	"Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n",\
	346	depth, (IV)(cp), (IV)PL_savestack_ix \
	347	) \
	348	); \
	349	regcpblow(cp)
	350
	351	#define UNWIND_PAREN(lp, lcp) \
	352	for (n = rex->lastparen; n > lp; n--) \
	353	rex->offs[n].end = -1; \
	354	rex->lastparen = n; \
	355	rex->lastcloseparen = lcp;
	356
	357
	358	STATIC void
	359	S_regcppop(pTHX_ regexp rex, U32 maxopenparen_p)
	360	{
	361	UV i;
	362	U32 paren;
	363	GET_RE_DEBUG_FLAGS_DECL;
	364
	365	PERL_ARGS_ASSERT_REGCPPOP;
	366
	367	/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
	368	i = SSPOPUV;
	369	assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
	370	i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
	371	rex->lastcloseparen = SSPOPINT;
	372	rex->lastparen = SSPOPINT;
	373	*maxopenparen_p = SSPOPINT;
	374
	375	i -= REGCP_OTHER_ELEMS;
	376	/* Now restore the parentheses context. */
	377	DEBUG_BUFFERS_r(
	378	if (i \|\| rex->lastparen + 1 <= rex->nparens)
	379	Perl_re_printf( aTHX_
	380	"rex=0x%"UVxf" offs=0x%"UVxf": restoring capture indices to:\n",
	381	PTR2UV(rex),
	382	PTR2UV(rex->offs)
	383	);
	384	);
	385	paren = *maxopenparen_p;
	386	for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
	387	SSize_t tmps;
	388	rex->offs[paren].start_tmp = SSPOPINT;
	389	rex->offs[paren].start = SSPOPIV;
	390	tmps = SSPOPIV;
	391	if (paren <= rex->lastparen)
	392	rex->offs[paren].end = tmps;
	393	DEBUG_BUFFERS_r( Perl_re_printf( aTHX_
	394	" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"%s\n",
	395	(UV)paren,
	396	(IV)rex->offs[paren].start,
	397	(IV)rex->offs[paren].start_tmp,
	398	(IV)rex->offs[paren].end,
	399	(paren > rex->lastparen ? "(skipped)" : ""));
	400	);
	401	paren--;
	402	}
	403	#if 1
	404	/* It would seem that the similar code in regtry()
	405	* already takes care of this, and in fact it is in
	406	* a better location to since this code can #if 0-ed out
	407	* but the code in regtry() is needed or otherwise tests
	408	* requiring null fields (pat.t#187 and split.t#{13,14}
	409	* (as of patchlevel 7877) will fail. Then again,
	410	* this code seems to be necessary or otherwise
	411	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	412	* --jhi updated by dapm */
	413	for (i = rex->lastparen + 1; i <= rex->nparens; i++) {
	414	if (i > *maxopenparen_p)
	415	rex->offs[i].start = -1;
	416	rex->offs[i].end = -1;
	417	DEBUG_BUFFERS_r( Perl_re_printf( aTHX_
	418	" \\%"UVuf": %s ..-1 undeffing\n",
	419	(UV)i,
	420	(i > *maxopenparen_p) ? "-1" : " "
	421	));
	422	}
	423	#endif
	424	}
	425
	426	/* restore the parens and associated vars at savestack position ix,
	427	* but without popping the stack */
	428
	429	STATIC void
	430	S_regcp_restore(pTHX_ regexp rex, I32 ix, U32 maxopenparen_p)
	431	{
	432	I32 tmpix = PL_savestack_ix;
	433	PL_savestack_ix = ix;
	434	regcppop(rex, maxopenparen_p);
	435	PL_savestack_ix = tmpix;
	436	}
	437
	438	#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
	439
	440	STATIC bool
	441	S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
	442	{
	443	/* Returns a boolean as to whether or not 'character' is a member of the
	444	* Posix character class given by 'classnum' that should be equivalent to a
	445	* value in the typedef '_char_class_number'.
	446	*
	447	* Ideally this could be replaced by a just an array of function pointers
	448	* to the C library functions that implement the macros this calls.
	449	* However, to compile, the precise function signatures are required, and
	450	* these may vary from platform to to platform. To avoid having to figure
	451	* out what those all are on each platform, I (khw) am using this method,
	452	* which adds an extra layer of function call overhead (unless the C
	453	* optimizer strips it away). But we don't particularly care about
	454	* performance with locales anyway. */
	455
	456	switch ((_char_class_number) classnum) {
	457	case _CC_ENUM_ALPHANUMERIC: return isALPHANUMERIC_LC(character);
	458	case _CC_ENUM_ALPHA: return isALPHA_LC(character);
	459	case _CC_ENUM_ASCII: return isASCII_LC(character);
	460	case _CC_ENUM_BLANK: return isBLANK_LC(character);
	461	case _CC_ENUM_CASED: return isLOWER_LC(character)
	462	\|\| isUPPER_LC(character);
	463	case _CC_ENUM_CNTRL: return isCNTRL_LC(character);
	464	case _CC_ENUM_DIGIT: return isDIGIT_LC(character);
	465	case _CC_ENUM_GRAPH: return isGRAPH_LC(character);
	466	case _CC_ENUM_LOWER: return isLOWER_LC(character);
	467	case _CC_ENUM_PRINT: return isPRINT_LC(character);
	468	case _CC_ENUM_PUNCT: return isPUNCT_LC(character);
	469	case _CC_ENUM_SPACE: return isSPACE_LC(character);
	470	case _CC_ENUM_UPPER: return isUPPER_LC(character);
	471	case _CC_ENUM_WORDCHAR: return isWORDCHAR_LC(character);
	472	case _CC_ENUM_XDIGIT: return isXDIGIT_LC(character);
	473	default: /* VERTSPACE should never occur in locales */
	474	Perl_croak(aTHX_ "panic: isFOO_lc() has an unexpected character class '%d'", classnum);
	475	}
	476
	477	NOT_REACHED; /* NOTREACHED */
	478	return FALSE;
	479	}
	480
	481	STATIC bool
	482	S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
	483	{
	484	/* Returns a boolean as to whether or not the (well-formed) UTF-8-encoded
	485	* 'character' is a member of the Posix character class given by 'classnum'
	486	* that should be equivalent to a value in the typedef
	487	* '_char_class_number'.
	488	*
	489	* This just calls isFOO_lc on the code point for the character if it is in
	490	* the range 0-255. Outside that range, all characters use Unicode
	491	* rules, ignoring any locale. So use the Unicode function if this class
	492	* requires a swash, and use the Unicode macro otherwise. */
	493
	494	PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
	495
	496	if (UTF8_IS_INVARIANT(*character)) {
	497	return isFOO_lc(classnum, *character);
	498	}
	499	else if (UTF8_IS_DOWNGRADEABLE_START(*character)) {
	500	return isFOO_lc(classnum,
	501	EIGHT_BIT_UTF8_TO_NATIVE(character, (character + 1)));
	502	}
	503
	504	_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, character + UTF8SKIP(character));
	505
	506	if (classnum < _FIRST_NON_SWASH_CC) {
	507
	508	/* Initialize the swash unless done already */
	509	if (! PL_utf8_swash_ptrs[classnum]) {
	510	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	511	PL_utf8_swash_ptrs[classnum] =
	512	_core_swash_init("utf8",
	513	"",
	514	&PL_sv_undef, 1, 0,
	515	PL_XPosix_ptrs[classnum], &flags);
	516	}
	517
	518	return cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum], (U8 *)
	519	character,
	520	TRUE /* is UTF */ ));
	521	}
	522
	523	switch ((_char_class_number) classnum) {
	524	case _CC_ENUM_SPACE: return is_XPERLSPACE_high(character);
	525	case _CC_ENUM_BLANK: return is_HORIZWS_high(character);
	526	case _CC_ENUM_XDIGIT: return is_XDIGIT_high(character);
	527	case _CC_ENUM_VERTSPACE: return is_VERTWS_high(character);
	528	default: break;
	529	}
	530
	531	return FALSE; /* Things like CNTRL are always below 256 */
	532	}
	533
	534	/*
	535	* pregexec and friends
	536	*/
	537
	538	#ifndef PERL_IN_XSUB_RE
	539	/*
	540	- pregexec - match a regexp against a string
	541	*/
	542	I32
	543	Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
	544	char strbeg, SSize_t minend, SV screamer, U32 nosave)
	545	/* stringarg: the point in the string at which to begin matching */
	546	/* strend: pointer to null at end of string */
	547	/* strbeg: real beginning of string */
	548	/* minend: end of match must be >= minend bytes after stringarg. */
	549	/* screamer: SV being matched: only used for utf8 flag, pos() etc; string
	550	* itself is accessed via the pointers above */
	551	/* nosave: For optimizations. */
	552	{
	553	PERL_ARGS_ASSERT_PREGEXEC;
	554
	555	return
	556	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
	557	nosave ? 0 : REXEC_COPY_STR);
	558	}
	559	#endif
	560
	561
	562
	563	/* re_intuit_start():
	564	*
	565	* Based on some optimiser hints, try to find the earliest position in the
	566	* string where the regex could match.
	567	*
	568	* rx: the regex to match against
	569	* sv: the SV being matched: only used for utf8 flag; the string
	570	* itself is accessed via the pointers below. Note that on
	571	* something like an overloaded SV, SvPOK(sv) may be false
	572	* and the string pointers may point to something unrelated to
	573	* the SV itself.
	574	* strbeg: real beginning of string
	575	* strpos: the point in the string at which to begin matching
	576	* strend: pointer to the byte following the last char of the string
	577	* flags currently unused; set to 0
	578	* data: currently unused; set to NULL
	579	*
	580	* The basic idea of re_intuit_start() is to use some known information
	581	* about the pattern, namely:
	582	*
	583	* a) the longest known anchored substring (i.e. one that's at a
	584	* constant offset from the beginning of the pattern; but not
	585	* necessarily at a fixed offset from the beginning of the
	586	* string);
	587	* b) the longest floating substring (i.e. one that's not at a constant
	588	* offset from the beginning of the pattern);
	589	* c) Whether the pattern is anchored to the string; either
	590	* an absolute anchor: /^../, or anchored to \n: /^.../m,
	591	* or anchored to pos(): /\G/;
	592	* d) A start class: a real or synthetic character class which
	593	* represents which characters are legal at the start of the pattern;
	594	*
	595	* to either quickly reject the match, or to find the earliest position
	596	* within the string at which the pattern might match, thus avoiding
	597	* running the full NFA engine at those earlier locations, only to
	598	* eventually fail and retry further along.
	599	*
	600	* Returns NULL if the pattern can't match, or returns the address within
	601	* the string which is the earliest place the match could occur.
	602	*
	603	* The longest of the anchored and floating substrings is called 'check'
	604	* and is checked first. The other is called 'other' and is checked
	605	* second. The 'other' substring may not be present. For example,
	606	*
	607	* /(abc\|xyz)ABC\d{0,3}DEFG/
	608	*
	609	* will have
	610	*
	611	* check substr (float) = "DEFG", offset 6..9 chars
	612	* other substr (anchored) = "ABC", offset 3..3 chars
	613	* stclass = [ax]
	614	*
	615	* Be aware that during the course of this function, sometimes 'anchored'
	616	* refers to a substring being anchored relative to the start of the
	617	* pattern, and sometimes to the pattern itself being anchored relative to
	618	* the string. For example:
	619	*
	620	* /\dabc/: "abc" is anchored to the pattern;
	621	* /^\dabc/: "abc" is anchored to the pattern and the string;
	622	* /\d+abc/: "abc" is anchored to neither the pattern nor the string;
	623	* /^\d+abc/: "abc" is anchored to neither the pattern nor the string,
	624	* but the pattern is anchored to the string.
	625	*/
	626
	627	char *
	628	Perl_re_intuit_start(pTHX_
	629	REGEXP * const rx,
	630	SV *sv,
	631	const char * const strbeg,
	632	char *strpos,
	633	char *strend,
	634	const U32 flags,
	635	re_scream_pos_data *data)
	636	{
	637	struct regexp *const prog = ReANY(rx);
	638	SSize_t start_shift = prog->check_offset_min;
	639	/* Should be nonnegative! */
	640	SSize_t end_shift = 0;
	641	/* current lowest pos in string where the regex can start matching */
	642	char *rx_origin = strpos;
	643	SV *check;
	644	const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
	645	U8 other_ix = 1 - prog->substrs->check_ix;
	646	bool ml_anch = 0;
	647	char other_last = strpos;/ latest pos 'other' substr already checked to */
	648	char check_at = NULL; / check substr found at this pos */
	649	const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
	650	RXi_GET_DECL(prog,progi);
	651	regmatch_info reginfo_buf; /* create some info to pass to find_byclass */
	652	regmatch_info *const reginfo = &reginfo_buf;
	653	GET_RE_DEBUG_FLAGS_DECL;
	654
	655	PERL_ARGS_ASSERT_RE_INTUIT_START;
	656	PERL_UNUSED_ARG(flags);
	657	PERL_UNUSED_ARG(data);
	658
	659	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	660	"Intuit: trying to determine minimum start position...\n"));
	661
	662	/* for now, assume that all substr offsets are positive. If at some point
	663	* in the future someone wants to do clever things with lookbehind and
	664	* -ve offsets, they'll need to fix up any code in this function
	665	* which uses these offsets. See the thread beginning
	666	* <20140113145929.GF27210@iabyn.com>
	667	*/
	668	assert(prog->substrs->data[0].min_offset >= 0);
	669	assert(prog->substrs->data[0].max_offset >= 0);
	670	assert(prog->substrs->data[1].min_offset >= 0);
	671	assert(prog->substrs->data[1].max_offset >= 0);
	672	assert(prog->substrs->data[2].min_offset >= 0);
	673	assert(prog->substrs->data[2].max_offset >= 0);
	674
	675	/* for now, assume that if both present, that the floating substring
	676	* doesn't start before the anchored substring.
	677	* If you break this assumption (e.g. doing better optimisations
	678	* with lookahead/behind), then you'll need to audit the code in this
	679	* function carefully first
	680	*/
	681	assert(
	682	! ( (prog->anchored_utf8 \|\| prog->anchored_substr)
	683	&& (prog->float_utf8 \|\| prog->float_substr))
	684	\|\| (prog->float_min_offset >= prog->anchored_offset));
	685
	686	/* byte rather than char calculation for efficiency. It fails
	687	* to quickly reject some cases that can't match, but will reject
	688	* them later after doing full char arithmetic */
	689	if (prog->minlen > strend - strpos) {
	690	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	691	" String too short...\n"));
	692	goto fail;
	693	}
	694
	695	RX_MATCH_UTF8_set(rx,utf8_target);
	696	reginfo->is_utf8_target = cBOOL(utf8_target);
	697	reginfo->info_aux = NULL;
	698	reginfo->strbeg = strbeg;
	699	reginfo->strend = strend;
	700	reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
	701	reginfo->intuit = 1;
	702	/* not actually used within intuit, but zero for safety anyway */
	703	reginfo->poscache_maxiter = 0;
	704
	705	if (utf8_target) {
	706	if (!prog->check_utf8 && prog->check_substr)
	707	to_utf8_substr(prog);
	708	check = prog->check_utf8;
	709	} else {
	710	if (!prog->check_substr && prog->check_utf8) {
	711	if (! to_byte_substr(prog)) {
	712	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
	713	}
	714	}
	715	check = prog->check_substr;
	716	}
	717
	718	/* dump the various substring data */
	719	DEBUG_OPTIMISE_MORE_r({
	720	int i;
	721	for (i=0; i<=2; i++) {
	722	SV *sv = (utf8_target ? prog->substrs->data[i].utf8_substr
	723	: prog->substrs->data[i].substr);
	724	if (!sv)
	725	continue;
	726
	727	Perl_re_printf( aTHX_
	728	" substrs[%d]: min=%"IVdf" max=%"IVdf" end shift=%"IVdf
	729	" useful=%"IVdf" utf8=%d [%s]\n",
	730	i,
	731	(IV)prog->substrs->data[i].min_offset,
	732	(IV)prog->substrs->data[i].max_offset,
	733	(IV)prog->substrs->data[i].end_shift,
	734	BmUSEFUL(sv),
	735	utf8_target ? 1 : 0,
	736	SvPEEK(sv));
	737	}
	738	});
	739
	740	if (prog->intflags & PREGf_ANCH) { /* Match at \G, beg-of-str or after \n */
	741
	742	/* ml_anch: check after \n?
	743	*
	744	* A note about PREGf_IMPLICIT: on an un-anchored pattern beginning
	745	* with /.*.../, these flags will have been added by the
	746	* compiler:
	747	* /.abc/, /.abc/m: PREGf_IMPLICIT \| PREGf_ANCH_MBOL
	748	* /.*abc/s: PREGf_IMPLICIT \| PREGf_ANCH_SBOL
	749	*/
	750	ml_anch = (prog->intflags & PREGf_ANCH_MBOL)
	751	&& !(prog->intflags & PREGf_IMPLICIT);
	752
	753	if (!ml_anch && !(prog->intflags & PREGf_IMPLICIT)) {
	754	/* we are only allowed to match at BOS or \G */
	755
	756	/* trivially reject if there's a BOS anchor and we're not at BOS.
	757	*
	758	* Note that we don't try to do a similar quick reject for
	759	* \G, since generally the caller will have calculated strpos
	760	* based on pos() and gofs, so the string is already correctly
	761	* anchored by definition; and handling the exceptions would
	762	* be too fiddly (e.g. REXEC_IGNOREPOS).
	763	*/
	764	if ( strpos != strbeg
	765	&& (prog->intflags & PREGf_ANCH_SBOL))
	766	{
	767	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	768	" Not at start...\n"));
	769	goto fail;
	770	}
	771
	772	/* in the presence of an anchor, the anchored (relative to the
	773	* start of the regex) substr must also be anchored relative
	774	* to strpos. So quickly reject if substr isn't found there.
	775	* This works for \G too, because the caller will already have
	776	* subtracted gofs from pos, and gofs is the offset from the
	777	* \G to the start of the regex. For example, in /.abc\Gdef/,
	778	* where substr="abcdef", pos()=3, gofs=4, offset_min=1:
	779	* caller will have set strpos=pos()-4; we look for the substr
	780	* at position pos()-4+1, which lines up with the "a" */
	781
	782	if (prog->check_offset_min == prog->check_offset_max) {
	783	/* Substring at constant offset from beg-of-str... */
	784	SSize_t slen = SvCUR(check);
	785	char *s = HOP3c(strpos, prog->check_offset_min, strend);
	786
	787	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	788	" Looking for check substr at fixed offset %"IVdf"...\n",
	789	(IV)prog->check_offset_min));
	790
	791	if (SvTAIL(check)) {
	792	/* In this case, the regex is anchored at the end too.
	793	* Unless it's a multiline match, the lengths must match
	794	* exactly, give or take a \n. NB: slen >= 1 since
	795	* the last char of check is \n */
	796	if (!multiline
	797	&& ( strend - s > slen
	798	\|\| strend - s < slen - 1
	799	\|\| (strend - s == slen && strend[-1] != '\n')))
	800	{
	801	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	802	" String too long...\n"));
	803	goto fail_finish;
	804	}
	805	/* Now should match s[0..slen-2] */
	806	slen--;
	807	}
	808	if (slen && (SvPVX_const(check) != s
	809	\|\| (slen > 1 && memNE(SvPVX_const(check), s, slen))))
	810	{
	811	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	812	" String not equal...\n"));
	813	goto fail_finish;
	814	}
	815
	816	check_at = s;
	817	goto success_at_start;
	818	}
	819	}
	820	}
	821
	822	end_shift = prog->check_end_shift;
	823
	824	#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
	825	if (end_shift < 0)
	826	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
	827	(IV)end_shift, RX_PRECOMP(prog));
	828	#endif
	829
	830	restart:
	831
	832	/* This is the (re)entry point of the main loop in this function.
	833	* The goal of this loop is to:
	834	* 1) find the "check" substring in the region rx_origin..strend
	835	* (adjusted by start_shift / end_shift). If not found, reject
	836	* immediately.
	837	* 2) If it exists, look for the "other" substr too if defined; for
	838	* example, if the check substr maps to the anchored substr, then
	839	* check the floating substr, and vice-versa. If not found, go
	840	* back to (1) with rx_origin suitably incremented.
	841	* 3) If we find an rx_origin position that doesn't contradict
	842	* either of the substrings, then check the possible additional
	843	* constraints on rx_origin of /^.../m or a known start class.
	844	* If these fail, then depending on which constraints fail, jump
	845	* back to here, or to various other re-entry points further along
	846	* that skip some of the first steps.
	847	* 4) If we pass all those tests, update the BmUSEFUL() count on the
	848	* substring. If the start position was determined to be at the
	849	* beginning of the string - so, not rejected, but not optimised,
	850	* since we have to run regmatch from position 0 - decrement the
	851	* BmUSEFUL() count. Otherwise increment it.
	852	*/
	853
	854
	855	/* first, look for the 'check' substring */
	856
	857	{
	858	U8* start_point;
	859	U8* end_point;
	860
	861	DEBUG_OPTIMISE_MORE_r({
	862	Perl_re_printf( aTHX_
	863	" At restart: rx_origin=%"IVdf" Check offset min: %"IVdf
	864	" Start shift: %"IVdf" End shift %"IVdf
	865	" Real end Shift: %"IVdf"\n",
	866	(IV)(rx_origin - strbeg),
	867	(IV)prog->check_offset_min,
	868	(IV)start_shift,
	869	(IV)end_shift,
	870	(IV)prog->check_end_shift);
	871	});
	872
	873	end_point = HOP3(strend, -end_shift, strbeg);
	874	start_point = HOPMAYBE3(rx_origin, start_shift, end_point);
	875	if (!start_point)
	876	goto fail_finish;
	877
	878
	879	/* If the regex is absolutely anchored to either the start of the
	880	* string (SBOL) or to pos() (ANCH_GPOS), then
	881	* check_offset_max represents an upper bound on the string where
	882	* the substr could start. For the ANCH_GPOS case, we assume that
	883	* the caller of intuit will have already set strpos to
	884	* pos()-gofs, so in this case strpos + offset_max will still be
	885	* an upper bound on the substr.
	886	*/
	887	if (!ml_anch
	888	&& prog->intflags & PREGf_ANCH
	889	&& prog->check_offset_max != SSize_t_MAX)
	890	{
	891	SSize_t len = SvCUR(check) - !!SvTAIL(check);
	892	const char * const anchor =
	893	(prog->intflags & PREGf_ANCH_GPOS ? strpos : strbeg);
	894
	895	/* do a bytes rather than chars comparison. It's conservative;
	896	* so it skips doing the HOP if the result can't possibly end
	897	* up earlier than the old value of end_point.
	898	*/
	899	if ((char*)end_point - anchor > prog->check_offset_max) {
	900	end_point = HOP3lim((U8*)anchor,
	901	prog->check_offset_max,
	902	end_point -len)
	903	+ len;
	904	}
	905	}
	906
	907	check_at = fbm_instr( start_point, end_point,
	908	check, multiline ? FBMrf_MULTILINE : 0);
	909
	910	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	911	" doing 'check' fbm scan, [%"IVdf"..%"IVdf"] gave %"IVdf"\n",
	912	(IV)((char*)start_point - strbeg),
	913	(IV)((char*)end_point - strbeg),
	914	(IV)(check_at ? check_at - strbeg : -1)
	915	));
	916
	917	/* Update the count-of-usability, remove useless subpatterns,
	918	unshift s. */
	919
	920	DEBUG_EXECUTE_r({
	921	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	922	SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
	923	Perl_re_printf( aTHX_ " %s %s substr %s%s%s",
	924	(check_at ? "Found" : "Did not find"),
	925	(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
	926	? "anchored" : "floating"),
	927	quoted,
	928	RE_SV_TAIL(check),
	929	(check_at ? " at offset " : "...\n") );
	930	});
	931
	932	if (!check_at)
	933	goto fail_finish;
	934	/* set rx_origin to the minimum position where the regex could start
	935	* matching, given the constraint of the just-matched check substring.
	936	* But don't set it lower than previously.
	937	*/
	938
	939	if (check_at - rx_origin > prog->check_offset_max)
	940	rx_origin = HOP3c(check_at, -prog->check_offset_max, rx_origin);
	941	/* Finish the diagnostic message */
	942	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	943	"%ld (rx_origin now %"IVdf")...\n",
	944	(long)(check_at - strbeg),
	945	(IV)(rx_origin - strbeg)
	946	));
	947	}
	948
	949
	950	/* now look for the 'other' substring if defined */
	951
	952	if (utf8_target ? prog->substrs->data[other_ix].utf8_substr
	953	: prog->substrs->data[other_ix].substr)
	954	{
	955	/* Take into account the "other" substring. */
	956	char last, last1;
	957	char *s;
	958	SV* must;
	959	struct reg_substr_datum *other;
	960
	961	do_other_substr:
	962	other = &prog->substrs->data[other_ix];
	963
	964	/* if "other" is anchored:
	965	* we've previously found a floating substr starting at check_at.
	966	* This means that the regex origin must lie somewhere
	967	* between min (rx_origin): HOP3(check_at, -check_offset_max)
	968	* and max: HOP3(check_at, -check_offset_min)
	969	* (except that min will be >= strpos)
	970	* So the fixed substr must lie somewhere between
	971	* HOP3(min, anchored_offset)
	972	* HOP3(max, anchored_offset) + SvCUR(substr)
	973	*/
	974
	975	/* if "other" is floating
	976	* Calculate last1, the absolute latest point where the
	977	* floating substr could start in the string, ignoring any
	978	* constraints from the earlier fixed match. It is calculated
	979	* as follows:
	980	*
	981	* strend - prog->minlen (in chars) is the absolute latest
	982	* position within the string where the origin of the regex
	983	* could appear. The latest start point for the floating
	984	* substr is float_min_offset(*) on from the start of the
	985	* regex. last1 simply combines thee two offsets.
	986	*
	987	* (*) You might think the latest start point should be
	988	* float_max_offset from the regex origin, and technically
	989	* you'd be correct. However, consider
	990	* /a\d{2,4}bcd\w/
	991	* Here, float min, max are 3,5 and minlen is 7.
	992	* This can match either
	993	* /a\d\dbcd\w/
	994	* /a\d\d\dbcd\w/
	995	* /a\d\d\d\dbcd\w/
	996	* In the first case, the regex matches minlen chars; in the
	997	* second, minlen+1, in the third, minlen+2.
	998	* In the first case, the floating offset is 3 (which equals
	999	* float_min), in the second, 4, and in the third, 5 (which
	1000	* equals float_max). In all cases, the floating string bcd
	1001	* can never start more than 4 chars from the end of the
	1002	* string, which equals minlen - float_min. As the substring
	1003	* starts to match more than float_min from the start of the
	1004	* regex, it makes the regex match more than minlen chars,
	1005	* and the two cancel each other out. So we can always use
	1006	* float_min - minlen, rather than float_max - minlen for the
	1007	* latest position in the string.
	1008	*
	1009	* Note that -minlen + float_min_offset is equivalent (AFAIKT)
	1010	* to CHR_SVLEN(must) - !!SvTAIL(must) + prog->float_end_shift
	1011	*/
	1012
	1013	assert(prog->minlen >= other->min_offset);
	1014	last1 = HOP3c(strend,
	1015	other->min_offset - prog->minlen, strbeg);
	1016
	1017	if (other_ix) {/* i.e. if (other-is-float) */
	1018	/* last is the latest point where the floating substr could
	1019	* start, given any constraints from the earlier fixed
	1020	* match. This constraint is that the floating string starts
	1021	* <= float_max_offset chars from the regex origin (rx_origin).
	1022	* If this value is less than last1, use it instead.
	1023	*/
	1024	assert(rx_origin <= last1);
	1025	last =
	1026	/* this condition handles the offset==infinity case, and
	1027	* is a short-cut otherwise. Although it's comparing a
	1028	* byte offset to a char length, it does so in a safe way,
	1029	* since 1 char always occupies 1 or more bytes,
	1030	* so if a string range is (last1 - rx_origin) bytes,
	1031	* it will be less than or equal to (last1 - rx_origin)
	1032	* chars; meaning it errs towards doing the accurate HOP3
	1033	* rather than just using last1 as a short-cut */
	1034	(last1 - rx_origin) < other->max_offset
	1035	? last1
	1036	: (char*)HOP3lim(rx_origin, other->max_offset, last1);
	1037	}
	1038	else {
	1039	assert(strpos + start_shift <= check_at);
	1040	last = HOP4c(check_at, other->min_offset - start_shift,
	1041	strbeg, strend);
	1042	}
	1043
	1044	s = HOP3c(rx_origin, other->min_offset, strend);
	1045	if (s < other_last) /* These positions already checked */
	1046	s = other_last;
	1047
	1048	must = utf8_target ? other->utf8_substr : other->substr;
	1049	assert(SvPOK(must));
	1050	{
	1051	char *from = s;
	1052	char *to = last + SvCUR(must) - (SvTAIL(must)!=0);
	1053
	1054	if (from > to) {
	1055	s = NULL;
	1056	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1057	" skipping 'other' fbm scan: %"IVdf" > %"IVdf"\n",
	1058	(IV)(from - strbeg),
	1059	(IV)(to - strbeg)
	1060	));
	1061	}
	1062	else {
	1063	s = fbm_instr(
	1064	(unsigned char*)from,
	1065	(unsigned char*)to,
	1066	must,
	1067	multiline ? FBMrf_MULTILINE : 0
	1068	);
	1069	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1070	" doing 'other' fbm scan, [%"IVdf"..%"IVdf"] gave %"IVdf"\n",
	1071	(IV)(from - strbeg),
	1072	(IV)(to - strbeg),
	1073	(IV)(s ? s - strbeg : -1)
	1074	));
	1075	}
	1076	}
	1077
	1078	DEBUG_EXECUTE_r({
	1079	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	1080	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	1081	Perl_re_printf( aTHX_ " %s %s substr %s%s",
	1082	s ? "Found" : "Contradicts",
	1083	other_ix ? "floating" : "anchored",
	1084	quoted, RE_SV_TAIL(must));
	1085	});
	1086
	1087
	1088	if (!s) {
	1089	/* last1 is latest possible substr location. If we didn't
	1090	* find it before there, we never will */
	1091	if (last >= last1) {
	1092	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1093	"; giving up...\n"));
	1094	goto fail_finish;
	1095	}
	1096
	1097	/* try to find the check substr again at a later
	1098	* position. Maybe next time we'll find the "other" substr
	1099	* in range too */
	1100	other_last = HOP3c(last, 1, strend) /* highest failure */;
	1101	rx_origin =
	1102	other_ix /* i.e. if other-is-float */
	1103	? HOP3c(rx_origin, 1, strend)
	1104	: HOP4c(last, 1 - other->min_offset, strbeg, strend);
	1105	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1106	"; about to retry %s at offset %ld (rx_origin now %"IVdf")...\n",
	1107	(other_ix ? "floating" : "anchored"),
	1108	(long)(HOP3c(check_at, 1, strend) - strbeg),
	1109	(IV)(rx_origin - strbeg)
	1110	));
	1111	goto restart;
	1112	}
	1113	else {
	1114	if (other_ix) { /* if (other-is-float) */
	1115	/* other_last is set to s, not s+1, since its possible for
	1116	* a floating substr to fail first time, then succeed
	1117	* second time at the same floating position; e.g.:
	1118	* "-AB--AABZ" =~ /\wAB\d*Z/
	1119	* The first time round, anchored and float match at
	1120	* "-(AB)--AAB(Z)" then fail on the initial \w character
	1121	* class. Second time round, they match at "-AB--A(AB)(Z)".
	1122	*/
	1123	other_last = s;
	1124	}
	1125	else {
	1126	rx_origin = HOP3c(s, -other->min_offset, strbeg);
	1127	other_last = HOP3c(s, 1, strend);
	1128	}
	1129	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1130	" at offset %ld (rx_origin now %"IVdf")...\n",
	1131	(long)(s - strbeg),
	1132	(IV)(rx_origin - strbeg)
	1133	));
	1134
	1135	}
	1136	}
	1137	else {
	1138	DEBUG_OPTIMISE_MORE_r(
	1139	Perl_re_printf( aTHX_
	1140	" Check-only match: offset min:%"IVdf" max:%"IVdf
	1141	" check_at:%"IVdf" rx_origin:%"IVdf" rx_origin-check_at:%"IVdf
	1142	" strend:%"IVdf"\n",
	1143	(IV)prog->check_offset_min,
	1144	(IV)prog->check_offset_max,
	1145	(IV)(check_at-strbeg),
	1146	(IV)(rx_origin-strbeg),
	1147	(IV)(rx_origin-check_at),
	1148	(IV)(strend-strbeg)
	1149	)
	1150	);
	1151	}
	1152
	1153	postprocess_substr_matches:
	1154
	1155	/* handle the extra constraint of /^.../m if present */
	1156
	1157	if (ml_anch && rx_origin != strbeg && rx_origin[-1] != '\n') {
	1158	char *s;
	1159
	1160	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1161	" looking for /^/m anchor"));
	1162
	1163	/* we have failed the constraint of a \n before rx_origin.
	1164	* Find the next \n, if any, even if it's beyond the current
	1165	* anchored and/or floating substrings. Whether we should be
	1166	* scanning ahead for the next \n or the next substr is debatable.
	1167	* On the one hand you'd expect rare substrings to appear less
	1168	* often than \n's. On the other hand, searching for \n means
	1169	* we're effectively flipping between check_substr and "\n" on each
	1170	* iteration as the current "rarest" string candidate, which
	1171	* means for example that we'll quickly reject the whole string if
	1172	* hasn't got a \n, rather than trying every substr position
	1173	* first
	1174	*/
	1175
	1176	s = HOP3c(strend, - prog->minlen, strpos);
	1177	if (s <= rx_origin \|\|
	1178	! ( rx_origin = (char *)memchr(rx_origin, '\n', s - rx_origin)))
	1179	{
	1180	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1181	" Did not find /%s^%s/m...\n",
	1182	PL_colors[0], PL_colors[1]));
	1183	goto fail_finish;
	1184	}
	1185
	1186	/* earliest possible origin is 1 char after the \n.
	1187	* (since *rx_origin == '\n', it's safe to ++ here rather than
	1188	* HOP(rx_origin, 1)) */
	1189	rx_origin++;
	1190
	1191	if (prog->substrs->check_ix == 0 /* check is anchored */
	1192	\|\| rx_origin >= HOP3c(check_at, - prog->check_offset_min, strpos))
	1193	{
	1194	/* Position contradicts check-string; either because
	1195	* check was anchored (and thus has no wiggle room),
	1196	* or check was float and rx_origin is above the float range */
	1197	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1198	" Found /%s^%s/m, about to restart lookup for check-string with rx_origin %ld...\n",
	1199	PL_colors[0], PL_colors[1], (long)(rx_origin - strbeg)));
	1200	goto restart;
	1201	}
	1202
	1203	/* if we get here, the check substr must have been float,
	1204	* is in range, and we may or may not have had an anchored
	1205	* "other" substr which still contradicts */
	1206	assert(prog->substrs->check_ix); /* check is float */
	1207
	1208	if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
	1209	/* whoops, the anchored "other" substr exists, so we still
	1210	* contradict. On the other hand, the float "check" substr
	1211	* didn't contradict, so just retry the anchored "other"
	1212	* substr */
	1213	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1214	" Found /%s^%s/m, rescanning for anchored from offset %"IVdf" (rx_origin now %"IVdf")...\n",
	1215	PL_colors[0], PL_colors[1],
	1216	(IV)(rx_origin - strbeg + prog->anchored_offset),
	1217	(IV)(rx_origin - strbeg)
	1218	));
	1219	goto do_other_substr;
	1220	}
	1221
	1222	/* success: we don't contradict the found floating substring
	1223	* (and there's no anchored substr). */
	1224	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1225	" Found /%s^%s/m with rx_origin %ld...\n",
	1226	PL_colors[0], PL_colors[1], (long)(rx_origin - strbeg)));
	1227	}
	1228	else {
	1229	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1230	" (multiline anchor test skipped)\n"));
	1231	}
	1232
	1233	success_at_start:
	1234
	1235
	1236	/* if we have a starting character class, then test that extra constraint.
	1237	* (trie stclasses are too expensive to use here, we are better off to
	1238	* leave it to regmatch itself) */
	1239
	1240	if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
	1241	const U8* const str = (U8*)STRING(progi->regstclass);
	1242
	1243	/* XXX this value could be pre-computed */
	1244	const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
	1245	? (reginfo->is_utf8_pat
	1246	? utf8_distance(str + STR_LEN(progi->regstclass), str)
	1247	: STR_LEN(progi->regstclass))
	1248	: 1);
	1249	char * endpos;
	1250	char *s;
	1251	/* latest pos that a matching float substr constrains rx start to */
	1252	char *rx_max_float = NULL;
	1253
	1254	/* if the current rx_origin is anchored, either by satisfying an
	1255	* anchored substring constraint, or a /^.../m constraint, then we
	1256	* can reject the current origin if the start class isn't found
	1257	* at the current position. If we have a float-only match, then
	1258	* rx_origin is constrained to a range; so look for the start class
	1259	* in that range. if neither, then look for the start class in the
	1260	* whole rest of the string */
	1261
	1262	/* XXX DAPM it's not clear what the minlen test is for, and why
	1263	* it's not used in the floating case. Nothing in the test suite
	1264	* causes minlen == 0 here. See <20140313134639.GS12844@iabyn.com>.
	1265	* Here are some old comments, which may or may not be correct:
	1266	*
	1267	* minlen == 0 is possible if regstclass is \b or \B,
	1268	* and the fixed substr is ''$.
	1269	* Since minlen is already taken into account, rx_origin+1 is
	1270	* before strend; accidentally, minlen >= 1 guaranties no false
	1271	* positives at rx_origin + 1 even for \b or \B. But (minlen? 1 :
	1272	* 0) below assumes that regstclass does not come from lookahead...
	1273	* If regstclass takes bytelength more than 1: If charlength==1, OK.
	1274	* This leaves EXACTF-ish only, which are dealt with in
	1275	* find_byclass().
	1276	*/
	1277
	1278	if (prog->anchored_substr \|\| prog->anchored_utf8 \|\| ml_anch)
	1279	endpos= HOP3c(rx_origin, (prog->minlen ? cl_l : 0), strend);
	1280	else if (prog->float_substr \|\| prog->float_utf8) {
	1281	rx_max_float = HOP3c(check_at, -start_shift, strbeg);
	1282	endpos= HOP3c(rx_max_float, cl_l, strend);
	1283	}
	1284	else
	1285	endpos= strend;
	1286
	1287	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1288	" looking for class: start_shift: %"IVdf" check_at: %"IVdf
	1289	" rx_origin: %"IVdf" endpos: %"IVdf"\n",
	1290	(IV)start_shift, (IV)(check_at - strbeg),
	1291	(IV)(rx_origin - strbeg), (IV)(endpos - strbeg)));
	1292
	1293	s = find_byclass(prog, progi->regstclass, rx_origin, endpos,
	1294	reginfo);
	1295	if (!s) {
	1296	if (endpos == strend) {
	1297	DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
	1298	" Could not match STCLASS...\n") );
	1299	goto fail;
	1300	}
	1301	DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
	1302	" This position contradicts STCLASS...\n") );
	1303	if ((prog->intflags & PREGf_ANCH) && !ml_anch
	1304	&& !(prog->intflags & PREGf_IMPLICIT))
	1305	goto fail;
	1306
	1307	/* Contradict one of substrings */
	1308	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	1309	if (prog->substrs->check_ix == 1) { /* check is float */
	1310	/* Have both, check_string is floating */
	1311	assert(rx_origin + start_shift <= check_at);
	1312	if (rx_origin + start_shift != check_at) {
	1313	/* not at latest position float substr could match:
	1314	* Recheck anchored substring, but not floating.
	1315	* The condition above is in bytes rather than
	1316	* chars for efficiency. It's conservative, in
	1317	* that it errs on the side of doing 'goto
	1318	* do_other_substr'. In this case, at worst,
	1319	* an extra anchored search may get done, but in
	1320	* practice the extra fbm_instr() is likely to
	1321	* get skipped anyway. */
	1322	DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
	1323	" about to retry anchored at offset %ld (rx_origin now %"IVdf")...\n",
	1324	(long)(other_last - strbeg),
	1325	(IV)(rx_origin - strbeg)
	1326	));
	1327	goto do_other_substr;
	1328	}
	1329	}
	1330	}
	1331	else {
	1332	/* float-only */
	1333
	1334	if (ml_anch) {
	1335	/* In the presence of ml_anch, we might be able to
	1336	* find another \n without breaking the current float
	1337	* constraint. */
	1338
	1339	/* strictly speaking this should be HOP3c(..., 1, ...),
	1340	* but since we goto a block of code that's going to
	1341	* search for the next \n if any, its safe here */
	1342	rx_origin++;
	1343	DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
	1344	" about to look for /%s^%s/m starting at rx_origin %ld...\n",
	1345	PL_colors[0], PL_colors[1],
	1346	(long)(rx_origin - strbeg)) );
	1347	goto postprocess_substr_matches;
	1348	}
	1349
	1350	/* strictly speaking this can never be true; but might
	1351	* be if we ever allow intuit without substrings */
	1352	if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
	1353	goto fail;
	1354
	1355	rx_origin = rx_max_float;
	1356	}
	1357
	1358	/* at this point, any matching substrings have been
	1359	* contradicted. Start again... */
	1360
	1361	rx_origin = HOP3c(rx_origin, 1, strend);
	1362
	1363	/* uses bytes rather than char calculations for efficiency.
	1364	* It's conservative: it errs on the side of doing 'goto restart',
	1365	* where there is code that does a proper char-based test */
	1366	if (rx_origin + start_shift + end_shift > strend) {
	1367	DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
	1368	" Could not match STCLASS...\n") );
	1369	goto fail;
	1370	}
	1371	DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
	1372	" about to look for %s substr starting at offset %ld (rx_origin now %"IVdf")...\n",
	1373	(prog->substrs->check_ix ? "floating" : "anchored"),
	1374	(long)(rx_origin + start_shift - strbeg),
	1375	(IV)(rx_origin - strbeg)
	1376	));
	1377	goto restart;
	1378	}
	1379
	1380	/* Success !!! */
	1381
	1382	if (rx_origin != s) {
	1383	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1384	" By STCLASS: moving %ld --> %ld\n",
	1385	(long)(rx_origin - strbeg), (long)(s - strbeg))
	1386	);
	1387	}
	1388	else {
	1389	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1390	" Does not contradict STCLASS...\n");
	1391	);
	1392	}
	1393	}
	1394
	1395	/* Decide whether using the substrings helped */
	1396
	1397	if (rx_origin != strpos) {
	1398	/* Fixed substring is found far enough so that the match
	1399	cannot start at strpos. */
	1400
	1401	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ " try at offset...\n"));
	1402	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
	1403	}
	1404	else {
	1405	/* The found rx_origin position does not prohibit matching at
	1406	* strpos, so calling intuit didn't gain us anything. Decrement
	1407	* the BmUSEFUL() count on the check substring, and if we reach
	1408	* zero, free it. */
	1409	if (!(prog->intflags & PREGf_NAUGHTY)
	1410	&& (utf8_target ? (
	1411	prog->check_utf8 /* Could be deleted already */
	1412	&& --BmUSEFUL(prog->check_utf8) < 0
	1413	&& (prog->check_utf8 == prog->float_utf8)
	1414	) : (
	1415	prog->check_substr /* Could be deleted already */
	1416	&& --BmUSEFUL(prog->check_substr) < 0
	1417	&& (prog->check_substr == prog->float_substr)
	1418	)))
	1419	{
	1420	/* If flags & SOMETHING - do not do it many times on the same match */
	1421	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ " ... Disabling check substring...\n"));
	1422	/* XXX Does the destruction order has to change with utf8_target? */
	1423	SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
	1424	SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
	1425	prog->check_substr = prog->check_utf8 = NULL; /* disable */
	1426	prog->float_substr = prog->float_utf8 = NULL; /* clear */
	1427	check = NULL; /* abort */
	1428	/* XXXX This is a remnant of the old implementation. It
	1429	looks wasteful, since now INTUIT can use many
	1430	other heuristics. */
	1431	prog->extflags &= ~RXf_USE_INTUIT;
	1432	}
	1433	}
	1434
	1435	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	1436	"Intuit: %sSuccessfully guessed:%s match at offset %ld\n",
	1437	PL_colors[4], PL_colors[5], (long)(rx_origin - strbeg)) );
	1438
	1439	return rx_origin;
	1440
	1441	fail_finish: /* Substring not found */
	1442	if (prog->check_substr \|\| prog->check_utf8) /* could be removed already */
	1443	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
	1444	fail:
	1445	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "%sMatch rejected by optimizer%s\n",
	1446	PL_colors[4], PL_colors[5]));
	1447	return NULL;
	1448	}
	1449
	1450
	1451	#define DECL_TRIE_TYPE(scan) \
	1452	const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \
	1453	trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold, \
	1454	trie_utf8l, trie_flu8 } \
	1455	trie_type = ((scan->flags == EXACT) \
	1456	? (utf8_target ? trie_utf8 : trie_plain) \
	1457	: (scan->flags == EXACTL) \
	1458	? (utf8_target ? trie_utf8l : trie_plain) \
	1459	: (scan->flags == EXACTFA) \
	1460	? (utf8_target \
	1461	? trie_utf8_exactfa_fold \
	1462	: trie_latin_utf8_exactfa_fold) \
	1463	: (scan->flags == EXACTFLU8 \
	1464	? trie_flu8 \
	1465	: (utf8_target \
	1466	? trie_utf8_fold \
	1467	: trie_latin_utf8_fold)))
	1468
	1469	#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
	1470	STMT_START { \
	1471	STRLEN skiplen; \
	1472	U8 flags = FOLD_FLAGS_FULL; \
	1473	switch (trie_type) { \
	1474	case trie_flu8: \
	1475	_CHECK_AND_WARN_PROBLEMATIC_LOCALE; \
	1476	if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) { \
	1477	_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc)); \
	1478	} \
	1479	goto do_trie_utf8_fold; \
	1480	case trie_utf8_exactfa_fold: \
	1481	flags \|= FOLD_FLAGS_NOMIX_ASCII; \
	1482	/* FALLTHROUGH */ \
	1483	case trie_utf8_fold: \
	1484	do_trie_utf8_fold: \
	1485	if ( foldlen>0 ) { \
	1486	uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1487	foldlen -= len; \
	1488	uscan += len; \
	1489	len=0; \
	1490	} else { \
	1491	uvc = _to_utf8_fold_flags( (const U8*) uc, foldbuf, &foldlen, flags); \
	1492	len = UTF8SKIP(uc); \
	1493	skiplen = UVCHR_SKIP( uvc ); \
	1494	foldlen -= skiplen; \
	1495	uscan = foldbuf + skiplen; \
	1496	} \
	1497	break; \
	1498	case trie_latin_utf8_exactfa_fold: \
	1499	flags \|= FOLD_FLAGS_NOMIX_ASCII; \
	1500	/* FALLTHROUGH */ \
	1501	case trie_latin_utf8_fold: \
	1502	if ( foldlen>0 ) { \
	1503	uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
	1504	foldlen -= len; \
	1505	uscan += len; \
	1506	len=0; \
	1507	} else { \
	1508	len = 1; \
	1509	uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, flags); \
	1510	skiplen = UVCHR_SKIP( uvc ); \
	1511	foldlen -= skiplen; \
	1512	uscan = foldbuf + skiplen; \
	1513	} \
	1514	break; \
	1515	case trie_utf8l: \
	1516	_CHECK_AND_WARN_PROBLEMATIC_LOCALE; \
	1517	if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) { \
	1518	_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc)); \
	1519	} \
	1520	/* FALLTHROUGH */ \
	1521	case trie_utf8: \
	1522	uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
	1523	break; \
	1524	case trie_plain: \
	1525	uvc = (UV)*uc; \
	1526	len = 1; \
	1527	} \
	1528	if (uvc < 256) { \
	1529	charid = trie->charmap[ uvc ]; \
	1530	} \
	1531	else { \
	1532	charid = 0; \
	1533	if (widecharmap) { \
	1534	SV** const svpp = hv_fetch(widecharmap, \
	1535	(char*)&uvc, sizeof(UV), 0); \
	1536	if (svpp) \
	1537	charid = (U16)SvIV(*svpp); \
	1538	} \
	1539	} \
	1540	} STMT_END
	1541
	1542	#define DUMP_EXEC_POS(li,s,doutf8,depth) \
	1543	dump_exec_pos(li,s,(reginfo->strend),(reginfo->strbeg), \
	1544	startpos, doutf8, depth)
	1545
	1546	#define REXEC_FBC_EXACTISH_SCAN(COND) \
	1547	STMT_START { \
	1548	while (s <= e) { \
	1549	if ( (COND) \
	1550	&& (ln == 1 \|\| folder(s, pat_string, ln)) \
	1551	&& (reginfo->intuit \|\| regtry(reginfo, &s)) )\
	1552	goto got_it; \
	1553	s++; \
	1554	} \
	1555	} STMT_END
	1556
	1557	#define REXEC_FBC_UTF8_SCAN(CODE) \
	1558	STMT_START { \
	1559	while (s < strend) { \
	1560	CODE \
	1561	s += UTF8SKIP(s); \
	1562	} \
	1563	} STMT_END
	1564
	1565	#define REXEC_FBC_SCAN(CODE) \
	1566	STMT_START { \
	1567	while (s < strend) { \
	1568	CODE \
	1569	s++; \
	1570	} \
	1571	} STMT_END
	1572
	1573	#define REXEC_FBC_UTF8_CLASS_SCAN(COND) \
	1574	REXEC_FBC_UTF8_SCAN( /* Loops while (s < strend) */ \
	1575	if (COND) { \
	1576	if (tmp && (reginfo->intuit \|\| regtry(reginfo, &s))) \
	1577	goto got_it; \
	1578	else \
	1579	tmp = doevery; \
	1580	} \
	1581	else \
	1582	tmp = 1; \
	1583	)
	1584
	1585	#define REXEC_FBC_CLASS_SCAN(COND) \
	1586	REXEC_FBC_SCAN( /* Loops while (s < strend) */ \
	1587	if (COND) { \
	1588	if (tmp && (reginfo->intuit \|\| regtry(reginfo, &s))) \
	1589	goto got_it; \
	1590	else \
	1591	tmp = doevery; \
	1592	} \
	1593	else \
	1594	tmp = 1; \
	1595	)
	1596
	1597	#define REXEC_FBC_CSCAN(CONDUTF8,COND) \
	1598	if (utf8_target) { \
	1599	REXEC_FBC_UTF8_CLASS_SCAN(CONDUTF8); \
	1600	} \
	1601	else { \
	1602	REXEC_FBC_CLASS_SCAN(COND); \
	1603	}
	1604
	1605	/* The three macros below are slightly different versions of the same logic.
	1606	*
	1607	* The first is for /a and /aa when the target string is UTF-8. This can only
	1608	* match ascii, but it must advance based on UTF-8. The other two handle the
	1609	* non-UTF-8 and the more generic UTF-8 cases. In all three, we are looking
	1610	* for the boundary (or non-boundary) between a word and non-word character.
	1611	* The utf8 and non-utf8 cases have the same logic, but the details must be
	1612	* different. Find the "wordness" of the character just prior to this one, and
	1613	* compare it with the wordness of this one. If they differ, we have a
	1614	* boundary. At the beginning of the string, pretend that the previous
	1615	* character was a new-line.
	1616	*
	1617	* All these macros uncleanly have side-effects with each other and outside
	1618	* variables. So far it's been too much trouble to clean-up
	1619	*
	1620	* TEST_NON_UTF8 is the macro or function to call to test if its byte input is
	1621	* a word character or not.
	1622	* IF_SUCCESS is code to do if it finds that we are at a boundary between
	1623	* word/non-word
	1624	* IF_FAIL is code to do if we aren't at a boundary between word/non-word
	1625	*
	1626	* Exactly one of the two IF_FOO parameters is a no-op, depending on whether we
	1627	* are looking for a boundary or for a non-boundary. If we are looking for a
	1628	* boundary, we want IF_FAIL to be the no-op, and for IF_SUCCESS to go out and
	1629	* see if this tentative match actually works, and if so, to quit the loop
	1630	* here. And vice-versa if we are looking for a non-boundary.
	1631	*
	1632	* 'tmp' below in the next three macros in the REXEC_FBC_SCAN and
	1633	* REXEC_FBC_UTF8_SCAN loops is a loop invariant, a bool giving the return of
	1634	* TEST_NON_UTF8(s-1). To see this, note that that's what it is defined to be
	1635	* at entry to the loop, and to get to the IF_FAIL branch, tmp must equal
	1636	* TEST_NON_UTF8(s), and in the opposite branch, IF_SUCCESS, tmp is that
	1637	* complement. But in that branch we complement tmp, meaning that at the
	1638	* bottom of the loop tmp is always going to be equal to TEST_NON_UTF8(s),
	1639	* which means at the top of the loop in the next iteration, it is
	1640	* TEST_NON_UTF8(s-1) */
	1641	#define FBC_UTF8_A(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1642	tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) : '\n'; \
	1643	tmp = TEST_NON_UTF8(tmp); \
	1644	REXEC_FBC_UTF8_SCAN( /* advances s while s < strend */ \
	1645	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1646	tmp = !tmp; \
	1647	IF_SUCCESS; /* Is a boundary if values for s-1 and s differ */ \
	1648	} \
	1649	else { \
	1650	IF_FAIL; \
	1651	} \
	1652	); \
	1653
	1654	/* Like FBC_UTF8_A, but TEST_UV is a macro which takes a UV as its input, and
	1655	* TEST_UTF8 is a macro that for the same input code points returns identically
	1656	* to TEST_UV, but takes a pointer to a UTF-8 encoded string instead */
	1657	#define FBC_UTF8(TEST_UV, TEST_UTF8, IF_SUCCESS, IF_FAIL) \
	1658	if (s == reginfo->strbeg) { \
	1659	tmp = '\n'; \
	1660	} \
	1661	else { /* Back-up to the start of the previous character */ \
	1662	U8 * const r = reghop3((U8)s, -1, (U8)reginfo->strbeg); \
	1663	tmp = utf8n_to_uvchr(r, (U8*) reginfo->strend - r, \
	1664	0, UTF8_ALLOW_DEFAULT); \
	1665	} \
	1666	tmp = TEST_UV(tmp); \
	1667	LOAD_UTF8_CHARCLASS_ALNUM(); \
	1668	REXEC_FBC_UTF8_SCAN( /* advances s while s < strend */ \
	1669	if (tmp == ! (TEST_UTF8((U8 *) s))) { \
	1670	tmp = !tmp; \
	1671	IF_SUCCESS; \
	1672	} \
	1673	else { \
	1674	IF_FAIL; \
	1675	} \
	1676	);
	1677
	1678	/* Like the above two macros. UTF8_CODE is the complete code for handling
	1679	* UTF-8. Common to the BOUND and NBOUND cases, set-up by the FBC_BOUND, etc
	1680	* macros below */
	1681	#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
	1682	if (utf8_target) { \
	1683	UTF8_CODE \
	1684	} \
	1685	else { /* Not utf8 */ \
	1686	tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) : '\n'; \
	1687	tmp = TEST_NON_UTF8(tmp); \
	1688	REXEC_FBC_SCAN( /* advances s while s < strend */ \
	1689	if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
	1690	IF_SUCCESS; \
	1691	tmp = !tmp; \
	1692	} \
	1693	else { \
	1694	IF_FAIL; \
	1695	} \
	1696	); \
	1697	} \
	1698	/* Here, things have been set up by the previous code so that tmp is the \
	1699	* return of TEST_NON_UTF(s-1) or TEST_UTF8(s-1) (depending on the \
	1700	* utf8ness of the target). We also have to check if this matches against \
	1701	* the EOS, which we treat as a \n (which is the same value in both UTF-8 \
	1702	* or non-UTF8, so can use the non-utf8 test condition even for a UTF-8 \
	1703	* string */ \
	1704	if (tmp == ! TEST_NON_UTF8('\n')) { \
	1705	IF_SUCCESS; \
	1706	} \
	1707	else { \
	1708	IF_FAIL; \
	1709	}
	1710
	1711	/* This is the macro to use when we want to see if something that looks like it
	1712	* could match, actually does, and if so exits the loop */
	1713	#define REXEC_FBC_TRYIT \
	1714	if ((reginfo->intuit \|\| regtry(reginfo, &s))) \
	1715	goto got_it
	1716
	1717	/* The only difference between the BOUND and NBOUND cases is that
	1718	* REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
	1719	* NBOUND. This is accomplished by passing it as either the if or else clause,
	1720	* with the other one being empty (PLACEHOLDER is defined as empty).
	1721	*
	1722	* The TEST_FOO parameters are for operating on different forms of input, but
	1723	* all should be ones that return identically for the same underlying code
	1724	* points */
	1725	#define FBC_BOUND(TEST_NON_UTF8, TEST_UV, TEST_UTF8) \
	1726	FBC_BOUND_COMMON( \
	1727	FBC_UTF8(TEST_UV, TEST_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), \
	1728	TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1729
	1730	#define FBC_BOUND_A(TEST_NON_UTF8) \
	1731	FBC_BOUND_COMMON( \
	1732	FBC_UTF8_A(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), \
	1733	TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
	1734
	1735	#define FBC_NBOUND(TEST_NON_UTF8, TEST_UV, TEST_UTF8) \
	1736	FBC_BOUND_COMMON( \
	1737	FBC_UTF8(TEST_UV, TEST_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), \
	1738	TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1739
	1740	#define FBC_NBOUND_A(TEST_NON_UTF8) \
	1741	FBC_BOUND_COMMON( \
	1742	FBC_UTF8_A(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), \
	1743	TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
	1744
	1745	#ifdef DEBUGGING
	1746	static IV
	1747	S_get_break_val_cp_checked(SV* const invlist, const UV cp_in) {
	1748	IV cp_out = Perl__invlist_search(invlist, cp_in);
	1749	assert(cp_out >= 0);
	1750	return cp_out;
	1751	}
	1752	# define _generic_GET_BREAK_VAL_CP_CHECKED(invlist, invmap, cp) \
	1753	invmap[S_get_break_val_cp_checked(invlist, cp)]
	1754	#else
	1755	# define _generic_GET_BREAK_VAL_CP_CHECKED(invlist, invmap, cp) \
	1756	invmap[_invlist_search(invlist, cp)]
	1757	#endif
	1758
	1759	/* Takes a pointer to an inversion list, a pointer to its corresponding
	1760	* inversion map, and a code point, and returns the code point's value
	1761	* according to the two arrays. It assumes that all code points have a value.
	1762	* This is used as the base macro for macros for particular properties */
	1763	#define _generic_GET_BREAK_VAL_CP(invlist, invmap, cp) \
	1764	_generic_GET_BREAK_VAL_CP_CHECKED(invlist, invmap, cp)
	1765
	1766	/* Same as above, but takes begin, end ptrs to a UTF-8 encoded string instead
	1767	* of a code point, returning the value for the first code point in the string.
	1768	* And it takes the particular macro name that finds the desired value given a
	1769	* code point. Merely convert the UTF-8 to code point and call the cp macro */
	1770	#define _generic_GET_BREAK_VAL_UTF8(cp_macro, pos, strend) \
	1771	(__ASSERT_(pos < strend) \
	1772	/* Note assumes is valid UTF-8 */ \
	1773	(cp_macro(utf8_to_uvchr_buf((pos), (strend), NULL))))
	1774
	1775	/* Returns the GCB value for the input code point */
	1776	#define getGCB_VAL_CP(cp) \
	1777	_generic_GET_BREAK_VAL_CP( \
	1778	PL_GCB_invlist, \
	1779	_Perl_GCB_invmap, \
	1780	(cp))
	1781
	1782	/* Returns the GCB value for the first code point in the UTF-8 encoded string
	1783	* bounded by pos and strend */
	1784	#define getGCB_VAL_UTF8(pos, strend) \
	1785	_generic_GET_BREAK_VAL_UTF8(getGCB_VAL_CP, pos, strend)
	1786
	1787	/* Returns the LB value for the input code point */
	1788	#define getLB_VAL_CP(cp) \
	1789	_generic_GET_BREAK_VAL_CP( \
	1790	PL_LB_invlist, \
	1791	_Perl_LB_invmap, \
	1792	(cp))
	1793
	1794	/* Returns the LB value for the first code point in the UTF-8 encoded string
	1795	* bounded by pos and strend */
	1796	#define getLB_VAL_UTF8(pos, strend) \
	1797	_generic_GET_BREAK_VAL_UTF8(getLB_VAL_CP, pos, strend)
	1798
	1799
	1800	/* Returns the SB value for the input code point */
	1801	#define getSB_VAL_CP(cp) \
	1802	_generic_GET_BREAK_VAL_CP( \
	1803	PL_SB_invlist, \
	1804	_Perl_SB_invmap, \
	1805	(cp))
	1806
	1807	/* Returns the SB value for the first code point in the UTF-8 encoded string
	1808	* bounded by pos and strend */
	1809	#define getSB_VAL_UTF8(pos, strend) \
	1810	_generic_GET_BREAK_VAL_UTF8(getSB_VAL_CP, pos, strend)
	1811
	1812	/* Returns the WB value for the input code point */
	1813	#define getWB_VAL_CP(cp) \
	1814	_generic_GET_BREAK_VAL_CP( \
	1815	PL_WB_invlist, \
	1816	_Perl_WB_invmap, \
	1817	(cp))
	1818
	1819	/* Returns the WB value for the first code point in the UTF-8 encoded string
	1820	* bounded by pos and strend */
	1821	#define getWB_VAL_UTF8(pos, strend) \
	1822	_generic_GET_BREAK_VAL_UTF8(getWB_VAL_CP, pos, strend)
	1823
	1824	/* We know what class REx starts with. Try to find this position... */
	1825	/* if reginfo->intuit, its a dryrun */
	1826	/* annoyingly all the vars in this routine have different names from their counterparts
	1827	in regmatch. /grrr */
	1828	STATIC char *
	1829	S_find_byclass(pTHX_ regexp * prog, const regnode c, char s,
	1830	const char strend, regmatch_info reginfo)
	1831	{
	1832	dVAR;
	1833	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
	1834	char pat_string; / The pattern's exactish string */
	1835	char pat_end; / ptr to end char of pat_string */
	1836	re_fold_t folder; /* Function for computing non-utf8 folds */
	1837	const U8 fold_array; / array for folding ords < 256 */
	1838	STRLEN ln;
	1839	STRLEN lnc;
	1840	U8 c1;
	1841	U8 c2;
	1842	char *e;
	1843	I32 tmp = 1; /* Scratch variable? */
	1844	const bool utf8_target = reginfo->is_utf8_target;
	1845	UV utf8_fold_flags = 0;
	1846	const bool is_utf8_pat = reginfo->is_utf8_pat;
	1847	bool to_complement = FALSE; /* Invert the result? Taking the xor of this
	1848	with a result inverts that result, as 0^1 =
	1849	1 and 1^1 = 0 */
	1850	_char_class_number classnum;
	1851
	1852	RXi_GET_DECL(prog,progi);
	1853
	1854	PERL_ARGS_ASSERT_FIND_BYCLASS;
	1855
	1856	/* We know what class it must start with. */
	1857	switch (OP(c)) {
	1858	case ANYOFL:
	1859	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	1860
	1861	if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(c)) && ! IN_UTF8_CTYPE_LOCALE) {
	1862	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
	1863	}
	1864
	1865	/* FALLTHROUGH */
	1866	case ANYOFD:
	1867	case ANYOF:
	1868	if (utf8_target) {
	1869	REXEC_FBC_UTF8_CLASS_SCAN(
	1870	reginclass(prog, c, (U8)s, (U8) strend, utf8_target));
	1871	}
	1872	else {
	1873	REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s, 0));
	1874	}
	1875	break;
	1876
	1877	case EXACTFA_NO_TRIE: /* This node only generated for non-utf8 patterns */
	1878	assert(! is_utf8_pat);
	1879	/* FALLTHROUGH */
	1880	case EXACTFA:
	1881	if (is_utf8_pat \|\| utf8_target) {
	1882	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	1883	goto do_exactf_utf8;
	1884	}
	1885	fold_array = PL_fold_latin1; /* Latin1 folds are not affected by */
	1886	folder = foldEQ_latin1; /* /a, except the sharp s one which */
	1887	goto do_exactf_non_utf8; /* isn't dealt with by these */
	1888
	1889	case EXACTF: /* This node only generated for non-utf8 patterns */
	1890	assert(! is_utf8_pat);
	1891	if (utf8_target) {
	1892	utf8_fold_flags = 0;
	1893	goto do_exactf_utf8;
	1894	}
	1895	fold_array = PL_fold;
	1896	folder = foldEQ;
	1897	goto do_exactf_non_utf8;
	1898
	1899	case EXACTFL:
	1900	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	1901	if (is_utf8_pat \|\| utf8_target \|\| IN_UTF8_CTYPE_LOCALE) {
	1902	utf8_fold_flags = FOLDEQ_LOCALE;
	1903	goto do_exactf_utf8;
	1904	}
	1905	fold_array = PL_fold_locale;
	1906	folder = foldEQ_locale;
	1907	goto do_exactf_non_utf8;
	1908
	1909	case EXACTFU_SS:
	1910	if (is_utf8_pat) {
	1911	utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
	1912	}
	1913	goto do_exactf_utf8;
	1914
	1915	case EXACTFLU8:
	1916	if (! utf8_target) { /* All code points in this node require
	1917	UTF-8 to express. */
	1918	break;
	1919	}
	1920	utf8_fold_flags = FOLDEQ_LOCALE \| FOLDEQ_S2_ALREADY_FOLDED
	1921	\| FOLDEQ_S2_FOLDS_SANE;
	1922	goto do_exactf_utf8;
	1923
	1924	case EXACTFU:
	1925	if (is_utf8_pat \|\| utf8_target) {
	1926	utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	1927	goto do_exactf_utf8;
	1928	}
	1929
	1930	/* Any 'ss' in the pattern should have been replaced by regcomp,
	1931	* so we don't have to worry here about this single special case
	1932	* in the Latin1 range */
	1933	fold_array = PL_fold_latin1;
	1934	folder = foldEQ_latin1;
	1935
	1936	/* FALLTHROUGH */
	1937
	1938	do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
	1939	are no glitches with fold-length differences
	1940	between the target string and pattern */
	1941
	1942	/* The idea in the non-utf8 EXACTF* cases is to first find the
	1943	* first character of the EXACTF* node and then, if necessary,
	1944	* case-insensitively compare the full text of the node. c1 is the
	1945	* first character. c2 is its fold. This logic will not work for
	1946	* Unicode semantics and the german sharp ss, which hence should
	1947	* not be compiled into a node that gets here. */
	1948	pat_string = STRING(c);
	1949	ln = STR_LEN(c); /* length to match in octets/bytes */
	1950
	1951	/* We know that we have to match at least 'ln' bytes (which is the
	1952	* same as characters, since not utf8). If we have to match 3
	1953	* characters, and there are only 2 availabe, we know without
	1954	* trying that it will fail; so don't start a match past the
	1955	* required minimum number from the far end */
	1956	e = HOP3c(strend, -((SSize_t)ln), s);
	1957
	1958	if (reginfo->intuit && e < s) {
	1959	e = s; /* Due to minlen logic of intuit() */
	1960	}
	1961
	1962	c1 = *pat_string;
	1963	c2 = fold_array[c1];
	1964	if (c1 == c2) { /* If char and fold are the same */
	1965	REXEC_FBC_EXACTISH_SCAN((U8)s == c1);
	1966	}
	1967	else {
	1968	REXEC_FBC_EXACTISH_SCAN((U8)s == c1 \|\| (U8)s == c2);
	1969	}
	1970	break;
	1971
	1972	do_exactf_utf8:
	1973	{
	1974	unsigned expansion;
	1975
	1976	/* If one of the operands is in utf8, we can't use the simpler folding
	1977	* above, due to the fact that many different characters can have the
	1978	* same fold, or portion of a fold, or different- length fold */
	1979	pat_string = STRING(c);
	1980	ln = STR_LEN(c); /* length to match in octets/bytes */
	1981	pat_end = pat_string + ln;
	1982	lnc = is_utf8_pat /* length to match in characters */
	1983	? utf8_length((U8 ) pat_string, (U8 ) pat_end)
	1984	: ln;
	1985
	1986	/* We have 'lnc' characters to match in the pattern, but because of
	1987	* multi-character folding, each character in the target can match
	1988	* up to 3 characters (Unicode guarantees it will never exceed
	1989	* this) if it is utf8-encoded; and up to 2 if not (based on the
	1990	* fact that the Latin 1 folds are already determined, and the
	1991	* only multi-char fold in that range is the sharp-s folding to
	1992	* 'ss'. Thus, a pattern character can match as little as 1/3 of a
	1993	* string character. Adjust lnc accordingly, rounding up, so that
	1994	* if we need to match at least 4+1/3 chars, that really is 5. */
	1995	expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
	1996	lnc = (lnc + expansion - 1) / expansion;
	1997
	1998	/* As in the non-UTF8 case, if we have to match 3 characters, and
	1999	* only 2 are left, it's guaranteed to fail, so don't start a
	2000	* match that would require us to go beyond the end of the string
	2001	*/
	2002	e = HOP3c(strend, -((SSize_t)lnc), s);
	2003
	2004	if (reginfo->intuit && e < s) {
	2005	e = s; /* Due to minlen logic of intuit() */
	2006	}
	2007
	2008	/* XXX Note that we could recalculate e to stop the loop earlier,
	2009	* as the worst case expansion above will rarely be met, and as we
	2010	* go along we would usually find that e moves further to the left.
	2011	* This would happen only after we reached the point in the loop
	2012	* where if there were no expansion we should fail. Unclear if
	2013	* worth the expense */
	2014
	2015	while (s <= e) {
	2016	char my_strend= (char )strend;
	2017	if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
	2018	pat_string, NULL, ln, is_utf8_pat, utf8_fold_flags)
	2019	&& (reginfo->intuit \|\| regtry(reginfo, &s)) )
	2020	{
	2021	goto got_it;
	2022	}
	2023	s += (utf8_target) ? UTF8SKIP(s) : 1;
	2024	}
	2025	break;
	2026	}
	2027
	2028	case BOUNDL:
	2029	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	2030	if (FLAGS(c) != TRADITIONAL_BOUND) {
	2031	if (! IN_UTF8_CTYPE_LOCALE) {
	2032	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
	2033	B_ON_NON_UTF8_LOCALE_IS_WRONG);
	2034	}
	2035	goto do_boundu;
	2036	}
	2037
	2038	FBC_BOUND(isWORDCHAR_LC, isWORDCHAR_LC_uvchr, isWORDCHAR_LC_utf8);
	2039	break;
	2040
	2041	case NBOUNDL:
	2042	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	2043	if (FLAGS(c) != TRADITIONAL_BOUND) {
	2044	if (! IN_UTF8_CTYPE_LOCALE) {
	2045	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
	2046	B_ON_NON_UTF8_LOCALE_IS_WRONG);
	2047	}
	2048	goto do_nboundu;
	2049	}
	2050
	2051	FBC_NBOUND(isWORDCHAR_LC, isWORDCHAR_LC_uvchr, isWORDCHAR_LC_utf8);
	2052	break;
	2053
	2054	case BOUND: /* regcomp.c makes sure that this only has the traditional \b
	2055	meaning */
	2056	assert(FLAGS(c) == TRADITIONAL_BOUND);
	2057
	2058	FBC_BOUND(isWORDCHAR, isWORDCHAR_uni, isWORDCHAR_utf8);
	2059	break;
	2060
	2061	case BOUNDA: /* regcomp.c makes sure that this only has the traditional \b
	2062	meaning */
	2063	assert(FLAGS(c) == TRADITIONAL_BOUND);
	2064
	2065	FBC_BOUND_A(isWORDCHAR_A);
	2066	break;
	2067
	2068	case NBOUND: /* regcomp.c makes sure that this only has the traditional \b
	2069	meaning */
	2070	assert(FLAGS(c) == TRADITIONAL_BOUND);
	2071
	2072	FBC_NBOUND(isWORDCHAR, isWORDCHAR_uni, isWORDCHAR_utf8);
	2073	break;
	2074
	2075	case NBOUNDA: /* regcomp.c makes sure that this only has the traditional \b
	2076	meaning */
	2077	assert(FLAGS(c) == TRADITIONAL_BOUND);
	2078
	2079	FBC_NBOUND_A(isWORDCHAR_A);
	2080	break;
	2081
	2082	case NBOUNDU:
	2083	if ((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
	2084	FBC_NBOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8);
	2085	break;
	2086	}
	2087
	2088	do_nboundu:
	2089
	2090	to_complement = 1;
	2091	/* FALLTHROUGH */
	2092
	2093	case BOUNDU:
	2094	do_boundu:
	2095	switch((bound_type) FLAGS(c)) {
	2096	case TRADITIONAL_BOUND:
	2097	FBC_BOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8);
	2098	break;
	2099	case GCB_BOUND:
	2100	if (s == reginfo->strbeg) {
	2101	if (reginfo->intuit \|\| regtry(reginfo, &s))
	2102	{
	2103	goto got_it;
	2104	}
	2105
	2106	/* Didn't match. Try at the next position (if there is one) */
	2107	s += (utf8_target) ? UTF8SKIP(s) : 1;
	2108	if (UNLIKELY(s >= reginfo->strend)) {
	2109	break;
	2110	}
	2111	}
	2112
	2113	if (utf8_target) {
	2114	GCB_enum before = getGCB_VAL_UTF8(
	2115	reghop3((U8*)s, -1,
	2116	(U8*)(reginfo->strbeg)),
	2117	(U8*) reginfo->strend);
	2118	while (s < strend) {
	2119	GCB_enum after = getGCB_VAL_UTF8((U8*) s,
	2120	(U8*) reginfo->strend);
	2121	if ( (to_complement ^ isGCB(before, after))
	2122	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2123	{
	2124	goto got_it;
	2125	}
	2126	before = after;
	2127	s += UTF8SKIP(s);
	2128	}
	2129	}
	2130	else { /* Not utf8. Everything is a GCB except between CR and
	2131	LF */
	2132	while (s < strend) {
	2133	if ((to_complement ^ ( UCHARAT(s - 1) != '\r'
	2134	\|\| UCHARAT(s) != '\n'))
	2135	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2136	{
	2137	goto got_it;
	2138	}
	2139	s++;
	2140	}
	2141	}
	2142
	2143	/* And, since this is a bound, it can match after the final
	2144	* character in the string */
	2145	if ((reginfo->intuit \|\| regtry(reginfo, &s))) {
	2146	goto got_it;
	2147	}
	2148	break;
	2149
	2150	case LB_BOUND:
	2151	if (s == reginfo->strbeg) {
	2152	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2153	goto got_it;
	2154	}
	2155	s += (utf8_target) ? UTF8SKIP(s) : 1;
	2156	if (UNLIKELY(s >= reginfo->strend)) {
	2157	break;
	2158	}
	2159	}
	2160
	2161	if (utf8_target) {
	2162	LB_enum before = getLB_VAL_UTF8(reghop3((U8*)s,
	2163	-1,
	2164	(U8*)(reginfo->strbeg)),
	2165	(U8*) reginfo->strend);
	2166	while (s < strend) {
	2167	LB_enum after = getLB_VAL_UTF8((U8) s, (U8) reginfo->strend);
	2168	if (to_complement ^ isLB(before,
	2169	after,
	2170	(U8*) reginfo->strbeg,
	2171	(U8*) s,
	2172	(U8*) reginfo->strend,
	2173	utf8_target)
	2174	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2175	{
	2176	goto got_it;
	2177	}
	2178	before = after;
	2179	s += UTF8SKIP(s);
	2180	}
	2181	}
	2182	else { /* Not utf8. */
	2183	LB_enum before = getLB_VAL_CP((U8) *(s -1));
	2184	while (s < strend) {
	2185	LB_enum after = getLB_VAL_CP((U8) *s);
	2186	if (to_complement ^ isLB(before,
	2187	after,
	2188	(U8*) reginfo->strbeg,
	2189	(U8*) s,
	2190	(U8*) reginfo->strend,
	2191	utf8_target)
	2192	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2193	{
	2194	goto got_it;
	2195	}
	2196	before = after;
	2197	s++;
	2198	}
	2199	}
	2200
	2201	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2202	goto got_it;
	2203	}
	2204
	2205	break;
	2206
	2207	case SB_BOUND:
	2208	if (s == reginfo->strbeg) {
	2209	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2210	goto got_it;
	2211	}
	2212	s += (utf8_target) ? UTF8SKIP(s) : 1;
	2213	if (UNLIKELY(s >= reginfo->strend)) {
	2214	break;
	2215	}
	2216	}
	2217
	2218	if (utf8_target) {
	2219	SB_enum before = getSB_VAL_UTF8(reghop3((U8*)s,
	2220	-1,
	2221	(U8*)(reginfo->strbeg)),
	2222	(U8*) reginfo->strend);
	2223	while (s < strend) {
	2224	SB_enum after = getSB_VAL_UTF8((U8*) s,
	2225	(U8*) reginfo->strend);
	2226	if ((to_complement ^ isSB(before,
	2227	after,
	2228	(U8*) reginfo->strbeg,
	2229	(U8*) s,
	2230	(U8*) reginfo->strend,
	2231	utf8_target))
	2232	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2233	{
	2234	goto got_it;
	2235	}
	2236	before = after;
	2237	s += UTF8SKIP(s);
	2238	}
	2239	}
	2240	else { /* Not utf8. */
	2241	SB_enum before = getSB_VAL_CP((U8) *(s -1));
	2242	while (s < strend) {
	2243	SB_enum after = getSB_VAL_CP((U8) *s);
	2244	if ((to_complement ^ isSB(before,
	2245	after,
	2246	(U8*) reginfo->strbeg,
	2247	(U8*) s,
	2248	(U8*) reginfo->strend,
	2249	utf8_target))
	2250	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2251	{
	2252	goto got_it;
	2253	}
	2254	before = after;
	2255	s++;
	2256	}
	2257	}
	2258
	2259	/* Here are at the final position in the target string. The SB
	2260	* value is always true here, so matches, depending on other
	2261	* constraints */
	2262	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2263	goto got_it;
	2264	}
	2265
	2266	break;
	2267
	2268	case WB_BOUND:
	2269	if (s == reginfo->strbeg) {
	2270	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2271	goto got_it;
	2272	}
	2273	s += (utf8_target) ? UTF8SKIP(s) : 1;
	2274	if (UNLIKELY(s >= reginfo->strend)) {
	2275	break;
	2276	}
	2277	}
	2278
	2279	if (utf8_target) {
	2280	/* We are at a boundary between char_sub_0 and char_sub_1.
	2281	* We also keep track of the value for char_sub_-1 as we
	2282	* loop through the line. Context may be needed to make a
	2283	* determination, and if so, this can save having to
	2284	* recalculate it */
	2285	WB_enum previous = WB_UNKNOWN;
	2286	WB_enum before = getWB_VAL_UTF8(
	2287	reghop3((U8*)s,
	2288	-1,
	2289	(U8*)(reginfo->strbeg)),
	2290	(U8*) reginfo->strend);
	2291	while (s < strend) {
	2292	WB_enum after = getWB_VAL_UTF8((U8*) s,
	2293	(U8*) reginfo->strend);
	2294	if ((to_complement ^ isWB(previous,
	2295	before,
	2296	after,
	2297	(U8*) reginfo->strbeg,
	2298	(U8*) s,
	2299	(U8*) reginfo->strend,
	2300	utf8_target))
	2301	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2302	{
	2303	goto got_it;
	2304	}
	2305	previous = before;
	2306	before = after;
	2307	s += UTF8SKIP(s);
	2308	}
	2309	}
	2310	else { /* Not utf8. */
	2311	WB_enum previous = WB_UNKNOWN;
	2312	WB_enum before = getWB_VAL_CP((U8) *(s -1));
	2313	while (s < strend) {
	2314	WB_enum after = getWB_VAL_CP((U8) *s);
	2315	if ((to_complement ^ isWB(previous,
	2316	before,
	2317	after,
	2318	(U8*) reginfo->strbeg,
	2319	(U8*) s,
	2320	(U8*) reginfo->strend,
	2321	utf8_target))
	2322	&& (reginfo->intuit \|\| regtry(reginfo, &s)))
	2323	{
	2324	goto got_it;
	2325	}
	2326	previous = before;
	2327	before = after;
	2328	s++;
	2329	}
	2330	}
	2331
	2332	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2333	goto got_it;
	2334	}
	2335	}
	2336	break;
	2337
	2338	case LNBREAK:
	2339	REXEC_FBC_CSCAN(is_LNBREAK_utf8_safe(s, strend),
	2340	is_LNBREAK_latin1_safe(s, strend)
	2341	);
	2342	break;
	2343
	2344	/* The argument to all the POSIX node types is the class number to pass to
	2345	* _generic_isCC() to build a mask for searching in PL_charclass[] */
	2346
	2347	case NPOSIXL:
	2348	to_complement = 1;
	2349	/* FALLTHROUGH */
	2350
	2351	case POSIXL:
	2352	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	2353	REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
	2354	to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
	2355	break;
	2356
	2357	case NPOSIXD:
	2358	to_complement = 1;
	2359	/* FALLTHROUGH */
	2360
	2361	case POSIXD:
	2362	if (utf8_target) {
	2363	goto posix_utf8;
	2364	}
	2365	goto posixa;
	2366
	2367	case NPOSIXA:
	2368	if (utf8_target) {
	2369	/* The complement of something that matches only ASCII matches all
	2370	* non-ASCII, plus everything in ASCII that isn't in the class. */
	2371	REXEC_FBC_UTF8_CLASS_SCAN(! isASCII_utf8(s)
	2372	\|\| ! _generic_isCC_A(*s, FLAGS(c)));
	2373	break;
	2374	}
	2375
	2376	to_complement = 1;
	2377	/* FALLTHROUGH */
	2378
	2379	case POSIXA:
	2380	posixa:
	2381	/* Don't need to worry about utf8, as it can match only a single
	2382	* byte invariant character. */
	2383	REXEC_FBC_CLASS_SCAN(
	2384	to_complement ^ cBOOL(_generic_isCC_A(*s, FLAGS(c))));
	2385	break;
	2386
	2387	case NPOSIXU:
	2388	to_complement = 1;
	2389	/* FALLTHROUGH */
	2390
	2391	case POSIXU:
	2392	if (! utf8_target) {
	2393	REXEC_FBC_CLASS_SCAN(to_complement ^ cBOOL(_generic_isCC(*s,
	2394	FLAGS(c))));
	2395	}
	2396	else {
	2397
	2398	posix_utf8:
	2399	classnum = (_char_class_number) FLAGS(c);
	2400	if (classnum < _FIRST_NON_SWASH_CC) {
	2401	while (s < strend) {
	2402
	2403	/* We avoid loading in the swash as long as possible, but
	2404	* should we have to, we jump to a separate loop. This
	2405	* extra 'if' statement is what keeps this code from being
	2406	* just a call to REXEC_FBC_UTF8_CLASS_SCAN() */
	2407	if (UTF8_IS_ABOVE_LATIN1(*s)) {
	2408	goto found_above_latin1;
	2409	}
	2410	if ((UTF8_IS_INVARIANT(*s)
	2411	&& to_complement ^ cBOOL(_generic_isCC((U8) *s,
	2412	classnum)))
	2413	\|\| (UTF8_IS_DOWNGRADEABLE_START(*s)
	2414	&& to_complement ^ cBOOL(
	2415	_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*s,
	2416	*(s + 1)),
	2417	classnum))))
	2418	{
	2419	if (tmp && (reginfo->intuit \|\| regtry(reginfo, &s)))
	2420	goto got_it;
	2421	else {
	2422	tmp = doevery;
	2423	}
	2424	}
	2425	else {
	2426	tmp = 1;
	2427	}
	2428	s += UTF8SKIP(s);
	2429	}
	2430	}
	2431	else switch (classnum) { /* These classes are implemented as
	2432	macros */
	2433	case _CC_ENUM_SPACE:
	2434	REXEC_FBC_UTF8_CLASS_SCAN(
	2435	to_complement ^ cBOOL(isSPACE_utf8(s)));
	2436	break;
	2437
	2438	case _CC_ENUM_BLANK:
	2439	REXEC_FBC_UTF8_CLASS_SCAN(
	2440	to_complement ^ cBOOL(isBLANK_utf8(s)));
	2441	break;
	2442
	2443	case _CC_ENUM_XDIGIT:
	2444	REXEC_FBC_UTF8_CLASS_SCAN(
	2445	to_complement ^ cBOOL(isXDIGIT_utf8(s)));
	2446	break;
	2447
	2448	case _CC_ENUM_VERTSPACE:
	2449	REXEC_FBC_UTF8_CLASS_SCAN(
	2450	to_complement ^ cBOOL(isVERTWS_utf8(s)));
	2451	break;
	2452
	2453	case _CC_ENUM_CNTRL:
	2454	REXEC_FBC_UTF8_CLASS_SCAN(
	2455	to_complement ^ cBOOL(isCNTRL_utf8(s)));
	2456	break;
	2457
	2458	default:
	2459	Perl_croak(aTHX_ "panic: find_byclass() node %d='%s' has an unexpected character class '%d'", OP(c), PL_reg_name[OP(c)], classnum);
	2460	NOT_REACHED; /* NOTREACHED */
	2461	}
	2462	}
	2463	break;
	2464
	2465	found_above_latin1: /* Here we have to load a swash to get the result
	2466	for the current code point */
	2467	if (! PL_utf8_swash_ptrs[classnum]) {
	2468	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	2469	PL_utf8_swash_ptrs[classnum] =
	2470	_core_swash_init("utf8",
	2471	"",
	2472	&PL_sv_undef, 1, 0,
	2473	PL_XPosix_ptrs[classnum], &flags);
	2474	}
	2475
	2476	/* This is a copy of the loop above for swash classes, though using the
	2477	* FBC macro instead of being expanded out. Since we've loaded the
	2478	* swash, we don't have to check for that each time through the loop */
	2479	REXEC_FBC_UTF8_CLASS_SCAN(
	2480	to_complement ^ cBOOL(_generic_utf8(
	2481	classnum,
	2482	s,
	2483	swash_fetch(PL_utf8_swash_ptrs[classnum],
	2484	(U8 *) s, TRUE))));
	2485	break;
	2486
	2487	case AHOCORASICKC:
	2488	case AHOCORASICK:
	2489	{
	2490	DECL_TRIE_TYPE(c);
	2491	/* what trie are we using right now */
	2492	reg_ac_data aho = (reg_ac_data)progi->data->data[ ARG( c ) ];
	2493	reg_trie_data trie = (reg_trie_data)progi->data->data[ aho->trie ];
	2494	HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
	2495
	2496	const char *last_start = strend - trie->minlen;
	2497	#ifdef DEBUGGING
	2498	const char *real_start = s;
	2499	#endif
	2500	STRLEN maxlen = trie->maxlen;
	2501	SV *sv_points;
	2502	U8 *points; / map of where we were in the input string
	2503	when reading a given char. For ASCII this
	2504	is unnecessary overhead as the relationship
	2505	is always 1:1, but for Unicode, especially
	2506	case folded Unicode this is not true. */
	2507	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	2508	U8 *bitmap=NULL;
	2509
	2510
	2511	GET_RE_DEBUG_FLAGS_DECL;
	2512
	2513	/* We can't just allocate points here. We need to wrap it in
	2514	* an SV so it gets freed properly if there is a croak while
	2515	* running the match */
	2516	ENTER;
	2517	SAVETMPS;
	2518	sv_points=newSV(maxlen * sizeof(U8 *));
	2519	SvCUR_set(sv_points,
	2520	maxlen * sizeof(U8 *));
	2521	SvPOK_on(sv_points);
	2522	sv_2mortal(sv_points);
	2523	points=(U8**)SvPV_nolen(sv_points );
	2524	if ( trie_type != trie_utf8_fold
	2525	&& (trie->bitmap \|\| OP(c)==AHOCORASICKC) )
	2526	{
	2527	if (trie->bitmap)
	2528	bitmap=(U8*)trie->bitmap;
	2529	else
	2530	bitmap=(U8*)ANYOF_BITMAP(c);
	2531	}
	2532	/* this is the Aho-Corasick algorithm modified a touch
	2533	to include special handling for long "unknown char" sequences.
	2534	The basic idea being that we use AC as long as we are dealing
	2535	with a possible matching char, when we encounter an unknown char
	2536	(and we have not encountered an accepting state) we scan forward
	2537	until we find a legal starting char.
	2538	AC matching is basically that of trie matching, except that when
	2539	we encounter a failing transition, we fall back to the current
	2540	states "fail state", and try the current char again, a process
	2541	we repeat until we reach the root state, state 1, or a legal
	2542	transition. If we fail on the root state then we can either
	2543	terminate if we have reached an accepting state previously, or
	2544	restart the entire process from the beginning if we have not.
	2545
	2546	*/
	2547	while (s <= last_start) {
	2548	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	2549	U8 uc = (U8)s;
	2550	U16 charid = 0;
	2551	U32 base = 1;
	2552	U32 state = 1;
	2553	UV uvc = 0;
	2554	STRLEN len = 0;
	2555	STRLEN foldlen = 0;
	2556	U8 uscan = (U8)NULL;
	2557	U8 *leftmost = NULL;
	2558	#ifdef DEBUGGING
	2559	U32 accepted_word= 0;
	2560	#endif
	2561	U32 pointpos = 0;
	2562
	2563	while ( state && uc <= (U8*)strend ) {
	2564	int failed=0;
	2565	U32 word = aho->states[ state ].wordnum;
	2566
	2567	if( state==1 ) {
	2568	if ( bitmap ) {
	2569	DEBUG_TRIE_EXECUTE_r(
	2570	if ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	2571	dump_exec_pos( (char *)uc, c, strend, real_start,
	2572	(char *)uc, utf8_target, 0 );
	2573	Perl_re_printf( aTHX_
	2574	" Scanning for legal start char...\n");
	2575	}
	2576	);
	2577	if (utf8_target) {
	2578	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	2579	uc += UTF8SKIP(uc);
	2580	}
	2581	} else {
	2582	while ( uc <= (U8)last_start && !BITMAP_TEST(bitmap,uc) ) {
	2583	uc++;
	2584	}
	2585	}
	2586	s= (char *)uc;
	2587	}
	2588	if (uc >(U8*)last_start) break;
	2589	}
	2590
	2591	if ( word ) {
	2592	U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
	2593	if (!leftmost \|\| lpos < leftmost) {
	2594	DEBUG_r(accepted_word=word);
	2595	leftmost= lpos;
	2596	}
	2597	if (base==0) break;
	2598
	2599	}
	2600	points[pointpos++ % maxlen]= uc;
	2601	if (foldlen \|\| uc < (U8*)strend) {
	2602	REXEC_TRIE_READ_CHAR(trie_type, trie,
	2603	widecharmap, uc,
	2604	uscan, len, uvc, charid, foldlen,
	2605	foldbuf, uniflags);
	2606	DEBUG_TRIE_EXECUTE_r({
	2607	dump_exec_pos( (char *)uc, c, strend,
	2608	real_start, s, utf8_target, 0);
	2609	Perl_re_printf( aTHX_
	2610	" Charid:%3u CP:%4"UVxf" ",
	2611	charid, uvc);
	2612	});
	2613	}
	2614	else {
	2615	len = 0;
	2616	charid = 0;
	2617	}
	2618
	2619
	2620	do {
	2621	#ifdef DEBUGGING
	2622	word = aho->states[ state ].wordnum;
	2623	#endif
	2624	base = aho->states[ state ].trans.base;
	2625
	2626	DEBUG_TRIE_EXECUTE_r({
	2627	if (failed)
	2628	dump_exec_pos( (char *)uc, c, strend, real_start,
	2629	s, utf8_target, 0 );
	2630	Perl_re_printf( aTHX_
	2631	"%sState: %4"UVxf", word=%"UVxf,
	2632	failed ? " Fail transition to " : "",
	2633	(UV)state, (UV)word);
	2634	});
	2635	if ( base ) {
	2636	U32 tmp;
	2637	I32 offset;
	2638	if (charid &&
	2639	( ((offset = base + charid
	2640	- 1 - trie->uniquecharcount)) >= 0)
	2641	&& ((U32)offset < trie->lasttrans)
	2642	&& trie->trans[offset].check == state
	2643	&& (tmp=trie->trans[offset].next))
	2644	{
	2645	DEBUG_TRIE_EXECUTE_r(
	2646	Perl_re_printf( aTHX_ " - legal\n"));
	2647	state = tmp;
	2648	break;
	2649	}
	2650	else {
	2651	DEBUG_TRIE_EXECUTE_r(
	2652	Perl_re_printf( aTHX_ " - fail\n"));
	2653	failed = 1;
	2654	state = aho->fail[state];
	2655	}
	2656	}
	2657	else {
	2658	/* we must be accepting here */
	2659	DEBUG_TRIE_EXECUTE_r(
	2660	Perl_re_printf( aTHX_ " - accepting\n"));
	2661	failed = 1;
	2662	break;
	2663	}
	2664	} while(state);
	2665	uc += len;
	2666	if (failed) {
	2667	if (leftmost)
	2668	break;
	2669	if (!state) state = 1;
	2670	}
	2671	}
	2672	if ( aho->states[ state ].wordnum ) {
	2673	U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
	2674	if (!leftmost \|\| lpos < leftmost) {
	2675	DEBUG_r(accepted_word=aho->states[ state ].wordnum);
	2676	leftmost = lpos;
	2677	}
	2678	}
	2679	if (leftmost) {
	2680	s = (char*)leftmost;
	2681	DEBUG_TRIE_EXECUTE_r({
	2682	Perl_re_printf( aTHX_ "Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
	2683	(UV)accepted_word, (IV)(s - real_start)
	2684	);
	2685	});
	2686	if (reginfo->intuit \|\| regtry(reginfo, &s)) {
	2687	FREETMPS;
	2688	LEAVE;
	2689	goto got_it;
	2690	}
	2691	s = HOPc(s,1);
	2692	DEBUG_TRIE_EXECUTE_r({
	2693	Perl_re_printf( aTHX_ "Pattern failed. Looking for new start point...\n");
	2694	});
	2695	} else {
	2696	DEBUG_TRIE_EXECUTE_r(
	2697	Perl_re_printf( aTHX_ "No match.\n"));
	2698	break;
	2699	}
	2700	}
	2701	FREETMPS;
	2702	LEAVE;
	2703	}
	2704	break;
	2705	default:
	2706	Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
	2707	}
	2708	return 0;
	2709	got_it:
	2710	return s;
	2711	}
	2712
	2713	/* set RX_SAVED_COPY, RX_SUBBEG etc.
	2714	* flags have same meanings as with regexec_flags() */
	2715
	2716	static void
	2717	S_reg_set_capture_string(pTHX_ REGEXP * const rx,
	2718	char *strbeg,
	2719	char *strend,
	2720	SV *sv,
	2721	U32 flags,
	2722	bool utf8_target)
	2723	{
	2724	struct regexp *const prog = ReANY(rx);
	2725
	2726	if (flags & REXEC_COPY_STR) {
	2727	#ifdef PERL_ANY_COW
	2728	if (SvCANCOW(sv)) {
	2729	DEBUG_C(Perl_re_printf( aTHX_
	2730	"Copy on write: regexp capture, type %d\n",
	2731	(int) SvTYPE(sv)));
	2732	/* Create a new COW SV to share the match string and store
	2733	* in saved_copy, unless the current COW SV in saved_copy
	2734	* is valid and suitable for our purpose */
	2735	if (( prog->saved_copy
	2736	&& SvIsCOW(prog->saved_copy)
	2737	&& SvPOKp(prog->saved_copy)
	2738	&& SvIsCOW(sv)
	2739	&& SvPOKp(sv)
	2740	&& SvPVX(sv) == SvPVX(prog->saved_copy)))
	2741	{
	2742	/* just reuse saved_copy SV */
	2743	if (RXp_MATCH_COPIED(prog)) {
	2744	Safefree(prog->subbeg);
	2745	RXp_MATCH_COPIED_off(prog);
	2746	}
	2747	}
	2748	else {
	2749	/* create new COW SV to share string */
	2750	RX_MATCH_COPY_FREE(rx);
	2751	prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
	2752	}
	2753	prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
	2754	assert (SvPOKp(prog->saved_copy));
	2755	prog->sublen = strend - strbeg;
	2756	prog->suboffset = 0;
	2757	prog->subcoffset = 0;
	2758	} else
	2759	#endif
	2760	{
	2761	SSize_t min = 0;
	2762	SSize_t max = strend - strbeg;
	2763	SSize_t sublen;
	2764
	2765	if ( (flags & REXEC_COPY_SKIP_POST)
	2766	&& !(prog->extflags & RXf_PMf_KEEPCOPY) /* //p */
	2767	&& !(PL_sawampersand & SAWAMPERSAND_RIGHT)
	2768	) { /* don't copy $' part of string */
	2769	U32 n = 0;
	2770	max = -1;
	2771	/* calculate the right-most part of the string covered
	2772	* by a capture. Due to lookahead, this may be to
	2773	* the right of $&, so we have to scan all captures */
	2774	while (n <= prog->lastparen) {
	2775	if (prog->offs[n].end > max)
	2776	max = prog->offs[n].end;
	2777	n++;
	2778	}
	2779	if (max == -1)
	2780	max = (PL_sawampersand & SAWAMPERSAND_LEFT)
	2781	? prog->offs[0].start
	2782	: 0;
	2783	assert(max >= 0 && max <= strend - strbeg);
	2784	}
	2785
	2786	if ( (flags & REXEC_COPY_SKIP_PRE)
	2787	&& !(prog->extflags & RXf_PMf_KEEPCOPY) /* //p */
	2788	&& !(PL_sawampersand & SAWAMPERSAND_LEFT)
	2789	) { /* don't copy $` part of string */
	2790	U32 n = 0;
	2791	min = max;
	2792	/* calculate the left-most part of the string covered
	2793	* by a capture. Due to lookbehind, this may be to
	2794	* the left of $&, so we have to scan all captures */
	2795	while (min && n <= prog->lastparen) {
	2796	if ( prog->offs[n].start != -1
	2797	&& prog->offs[n].start < min)
	2798	{
	2799	min = prog->offs[n].start;
	2800	}
	2801	n++;
	2802	}
	2803	if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
	2804	&& min > prog->offs[0].end
	2805	)
	2806	min = prog->offs[0].end;
	2807
	2808	}
	2809
	2810	assert(min >= 0 && min <= max && min <= strend - strbeg);
	2811	sublen = max - min;
	2812
	2813	if (RX_MATCH_COPIED(rx)) {
	2814	if (sublen > prog->sublen)
	2815	prog->subbeg =
	2816	(char*)saferealloc(prog->subbeg, sublen+1);
	2817	}
	2818	else
	2819	prog->subbeg = (char*)safemalloc(sublen+1);
	2820	Copy(strbeg + min, prog->subbeg, sublen, char);
	2821	prog->subbeg[sublen] = '\0';
	2822	prog->suboffset = min;
	2823	prog->sublen = sublen;
	2824	RX_MATCH_COPIED_on(rx);
	2825	}
	2826	prog->subcoffset = prog->suboffset;
	2827	if (prog->suboffset && utf8_target) {
	2828	/* Convert byte offset to chars.
	2829	* XXX ideally should only compute this if @-/@+
	2830	* has been seen, a la PL_sawampersand ??? */
	2831
	2832	/* If there's a direct correspondence between the
	2833	* string which we're matching and the original SV,
	2834	* then we can use the utf8 len cache associated with
	2835	* the SV. In particular, it means that under //g,
	2836	* sv_pos_b2u() will use the previously cached
	2837	* position to speed up working out the new length of
	2838	* subcoffset, rather than counting from the start of
	2839	* the string each time. This stops
	2840	* $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
	2841	* from going quadratic */
	2842	if (SvPOKp(sv) && SvPVX(sv) == strbeg)
	2843	prog->subcoffset = sv_pos_b2u_flags(sv, prog->subcoffset,
	2844	SV_GMAGIC\|SV_CONST_RETURN);
	2845	else
	2846	prog->subcoffset = utf8_length((U8*)strbeg,
	2847	(U8*)(strbeg+prog->suboffset));
	2848	}
	2849	}
	2850	else {
	2851	RX_MATCH_COPY_FREE(rx);
	2852	prog->subbeg = strbeg;
	2853	prog->suboffset = 0;
	2854	prog->subcoffset = 0;
	2855	prog->sublen = strend - strbeg;
	2856	}
	2857	}
	2858
	2859
	2860
	2861
	2862	/*
	2863	- regexec_flags - match a regexp against a string
	2864	*/
	2865	I32
	2866	Perl_regexec_flags(pTHX_ REGEXP * const rx, char stringarg, char strend,
	2867	char strbeg, SSize_t minend, SV sv, void *data, U32 flags)
	2868	/* stringarg: the point in the string at which to begin matching */
	2869	/* strend: pointer to null at end of string */
	2870	/* strbeg: real beginning of string */
	2871	/* minend: end of match must be >= minend bytes after stringarg. */
	2872	/* sv: SV being matched: only used for utf8 flag, pos() etc; string
	2873	* itself is accessed via the pointers above */
	2874	/* data: May be used for some additional optimizations.
	2875	Currently unused. */
	2876	/* flags: For optimizations. See REXEC_* in regexp.h */
	2877
	2878	{
	2879	struct regexp *const prog = ReANY(rx);
	2880	char *s;
	2881	regnode *c;
	2882	char *startpos;
	2883	SSize_t minlen; /* must match at least this many chars */
	2884	SSize_t dontbother = 0; /* how many characters not to try at end */
	2885	const bool utf8_target = cBOOL(DO_UTF8(sv));
	2886	I32 multiline;
	2887	RXi_GET_DECL(prog,progi);
	2888	regmatch_info reginfo_buf; /* create some info to pass to regtry etc */
	2889	regmatch_info *const reginfo = &reginfo_buf;
	2890	regexp_paren_pair *swap = NULL;
	2891	I32 oldsave;
	2892	GET_RE_DEBUG_FLAGS_DECL;
	2893
	2894	PERL_ARGS_ASSERT_REGEXEC_FLAGS;
	2895	PERL_UNUSED_ARG(data);
	2896
	2897	/* Be paranoid... */
	2898	if (prog == NULL) {
	2899	Perl_croak(aTHX_ "NULL regexp parameter");
	2900	}
	2901
	2902	DEBUG_EXECUTE_r(
	2903	debug_start_match(rx, utf8_target, stringarg, strend,
	2904	"Matching");
	2905	);
	2906
	2907	startpos = stringarg;
	2908
	2909	/* set these early as they may be used by the HOP macros below */
	2910	reginfo->strbeg = strbeg;
	2911	reginfo->strend = strend;
	2912	reginfo->is_utf8_target = cBOOL(utf8_target);
	2913
	2914	if (prog->intflags & PREGf_GPOS_SEEN) {
	2915	MAGIC *mg;
	2916
	2917	/* set reginfo->ganch, the position where \G can match */
	2918
	2919	reginfo->ganch =
	2920	(flags & REXEC_IGNOREPOS)
	2921	? stringarg /* use start pos rather than pos() */
	2922	: ((mg = mg_find_mglob(sv)) && mg->mg_len >= 0)
	2923	/* Defined pos(): */
	2924	? strbeg + MgBYTEPOS(mg, sv, strbeg, strend-strbeg)
	2925	: strbeg; /* pos() not defined; use start of string */
	2926
	2927	DEBUG_GPOS_r(Perl_re_printf( aTHX_
	2928	"GPOS ganch set to strbeg[%"IVdf"]\n", (IV)(reginfo->ganch - strbeg)));
	2929
	2930	/* in the presence of \G, we may need to start looking earlier in
	2931	* the string than the suggested start point of stringarg:
	2932	* if prog->gofs is set, then that's a known, fixed minimum
	2933	* offset, such as
	2934	* /..\G/: gofs = 2
	2935	* /ab\|c\G/: gofs = 1
	2936	* or if the minimum offset isn't known, then we have to go back
	2937	* to the start of the string, e.g. /w+\G/
	2938	*/
	2939
	2940	if (prog->intflags & PREGf_ANCH_GPOS) {
	2941	if (prog->gofs) {
	2942	startpos = HOPBACKc(reginfo->ganch, prog->gofs);
	2943	if (!startpos \|\|
	2944	((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg))
	2945	{
	2946	DEBUG_r(Perl_re_printf( aTHX_
	2947	"fail: ganch-gofs before earliest possible start\n"));
	2948	return 0;
	2949	}
	2950	}
	2951	else
	2952	startpos = reginfo->ganch;
	2953	}
	2954	else if (prog->gofs) {
	2955	startpos = HOPBACKc(startpos, prog->gofs);
	2956	if (!startpos)
	2957	startpos = strbeg;
	2958	}
	2959	else if (prog->intflags & PREGf_GPOS_FLOAT)
	2960	startpos = strbeg;
	2961	}
	2962
	2963	minlen = prog->minlen;
	2964	if ((startpos + minlen) > strend \|\| startpos < strbeg) {
	2965	DEBUG_r(Perl_re_printf( aTHX_
	2966	"Regex match can't succeed, so not even tried\n"));
	2967	return 0;
	2968	}
	2969
	2970	/* at the end of this function, we'll do a LEAVE_SCOPE(oldsave),
	2971	* which will call destuctors to reset PL_regmatch_state, free higher
	2972	* PL_regmatch_slabs, and clean up regmatch_info_aux and
	2973	* regmatch_info_aux_eval */
	2974
	2975	oldsave = PL_savestack_ix;
	2976
	2977	s = startpos;
	2978
	2979	if ((prog->extflags & RXf_USE_INTUIT)
	2980	&& !(flags & REXEC_CHECKED))
	2981	{
	2982	s = re_intuit_start(rx, sv, strbeg, startpos, strend,
	2983	flags, NULL);
	2984	if (!s)
	2985	return 0;
	2986
	2987	if (prog->extflags & RXf_CHECK_ALL) {
	2988	/* we can match based purely on the result of INTUIT.
	2989	* Set up captures etc just for $& and $-[0]
	2990	* (an intuit-only match wont have $1,$2,..) */
	2991	assert(!prog->nparens);
	2992
	2993	/* s/// doesn't like it if $& is earlier than where we asked it to
	2994	* start searching (which can happen on something like /.\G/) */
	2995	if ( (flags & REXEC_FAIL_ON_UNDERFLOW)
	2996	&& (s < stringarg))
	2997	{
	2998	/* this should only be possible under \G */
	2999	assert(prog->intflags & PREGf_GPOS_SEEN);
	3000	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	3001	"matched, but failing for REXEC_FAIL_ON_UNDERFLOW\n"));
	3002	goto phooey;
	3003	}
	3004
	3005	/* match via INTUIT shouldn't have any captures.
	3006	* Let @-, @+, $^N know */
	3007	prog->lastparen = prog->lastcloseparen = 0;
	3008	RX_MATCH_UTF8_set(rx, utf8_target);
	3009	prog->offs[0].start = s - strbeg;
	3010	prog->offs[0].end = utf8_target
	3011	? (char)utf8_hop((U8)s, prog->minlenret) - strbeg
	3012	: s - strbeg + prog->minlenret;
	3013	if ( !(flags & REXEC_NOT_FIRST) )
	3014	S_reg_set_capture_string(aTHX_ rx,
	3015	strbeg, strend,
	3016	sv, flags, utf8_target);
	3017
	3018	return 1;
	3019	}
	3020	}
	3021
	3022	multiline = prog->extflags & RXf_PMf_MULTILINE;
	3023
	3024	if (strend - s < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
	3025	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	3026	"String too short [regexec_flags]...\n"));
	3027	goto phooey;
	3028	}
	3029
	3030	/* Check validity of program. */
	3031	if (UCHARAT(progi->program) != REG_MAGIC) {
	3032	Perl_croak(aTHX_ "corrupted regexp program");
	3033	}
	3034
	3035	RX_MATCH_TAINTED_off(rx);
	3036	RX_MATCH_UTF8_set(rx, utf8_target);
	3037
	3038	reginfo->prog = rx; /* Yes, sorry that this is confusing. */
	3039	reginfo->intuit = 0;
	3040	reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
	3041	reginfo->warned = FALSE;
	3042	reginfo->sv = sv;
	3043	reginfo->poscache_maxiter = 0; /* not yet started a countdown */
	3044	/* see how far we have to get to not match where we matched before */
	3045	reginfo->till = stringarg + minend;
	3046
	3047	if (prog->extflags & RXf_EVAL_SEEN && SvPADTMP(sv)) {
	3048	/* SAVEFREESV, not sv_mortalcopy, as this SV must last until after
	3049	S_cleanup_regmatch_info_aux has executed (registered by
	3050	SAVEDESTRUCTOR_X below). S_cleanup_regmatch_info_aux modifies
	3051	magic belonging to this SV.
	3052	Not newSVsv, either, as it does not COW.
	3053	*/
	3054	reginfo->sv = newSV(0);
	3055	SvSetSV_nosteal(reginfo->sv, sv);
	3056	SAVEFREESV(reginfo->sv);
	3057	}
	3058
	3059	/* reserve next 2 or 3 slots in PL_regmatch_state:
	3060	* slot N+0: may currently be in use: skip it
	3061	* slot N+1: use for regmatch_info_aux struct
	3062	* slot N+2: use for regmatch_info_aux_eval struct if we have (?{})'s
	3063	* slot N+3: ready for use by regmatch()
	3064	*/
	3065
	3066	{
	3067	regmatch_state *old_regmatch_state;
	3068	regmatch_slab *old_regmatch_slab;
	3069	int i, max = (prog->extflags & RXf_EVAL_SEEN) ? 2 : 1;
	3070
	3071	/* on first ever match, allocate first slab */
	3072	if (!PL_regmatch_slab) {
	3073	Newx(PL_regmatch_slab, 1, regmatch_slab);
	3074	PL_regmatch_slab->prev = NULL;
	3075	PL_regmatch_slab->next = NULL;
	3076	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
	3077	}
	3078
	3079	old_regmatch_state = PL_regmatch_state;
	3080	old_regmatch_slab = PL_regmatch_slab;
	3081
	3082	for (i=0; i <= max; i++) {
	3083	if (i == 1)
	3084	reginfo->info_aux = &(PL_regmatch_state->u.info_aux);
	3085	else if (i ==2)
	3086	reginfo->info_aux_eval =
	3087	reginfo->info_aux->info_aux_eval =
	3088	&(PL_regmatch_state->u.info_aux_eval);
	3089
	3090	if (++PL_regmatch_state > SLAB_LAST(PL_regmatch_slab))
	3091	PL_regmatch_state = S_push_slab(aTHX);
	3092	}
	3093
	3094	/* note initial PL_regmatch_state position; at end of match we'll
	3095	* pop back to there and free any higher slabs */
	3096
	3097	reginfo->info_aux->old_regmatch_state = old_regmatch_state;
	3098	reginfo->info_aux->old_regmatch_slab = old_regmatch_slab;
	3099	reginfo->info_aux->poscache = NULL;
	3100
	3101	SAVEDESTRUCTOR_X(S_cleanup_regmatch_info_aux, reginfo->info_aux);
	3102
	3103	if ((prog->extflags & RXf_EVAL_SEEN))
	3104	S_setup_eval_state(aTHX_ reginfo);
	3105	else
	3106	reginfo->info_aux_eval = reginfo->info_aux->info_aux_eval = NULL;
	3107	}
	3108
	3109	/* If there is a "must appear" string, look for it. */
	3110
	3111	if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
	3112	/* We have to be careful. If the previous successful match
	3113	was from this regex we don't want a subsequent partially
	3114	successful match to clobber the old results.
	3115	So when we detect this possibility we add a swap buffer
	3116	to the re, and switch the buffer each match. If we fail,
	3117	we switch it back; otherwise we leave it swapped.
	3118	*/
	3119	swap = prog->offs;
	3120	/* do we need a save destructor here for eval dies? */
	3121	Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
	3122	DEBUG_BUFFERS_r(Perl_re_printf( aTHX_
	3123	"rex=0x%"UVxf" saving offs: orig=0x%"UVxf" new=0x%"UVxf"\n",
	3124	PTR2UV(prog),
	3125	PTR2UV(swap),
	3126	PTR2UV(prog->offs)
	3127	));
	3128	}
	3129
	3130	if (prog->recurse_locinput)
	3131	Zero(prog->recurse_locinput,prog->nparens + 1, char *);
	3132
	3133	/* Simplest case: anchored match need be tried only once, or with
	3134	* MBOL, only at the beginning of each line.
	3135	*
	3136	* Note that /..../ sets PREGf_IMPLICIT\|MBOL, while /..../s sets
	3137	* PREGf_IMPLICIT\|SBOL. The idea is that with /.*.../s, if it doesn't
	3138	* match at the start of the string then it won't match anywhere else
	3139	* either; while with /.*.../, if it doesn't match at the beginning,
	3140	* the earliest it could match is at the start of the next line */
	3141
	3142	if (prog->intflags & (PREGf_ANCH & ~PREGf_ANCH_GPOS)) {
	3143	char *end;
	3144
	3145	if (regtry(reginfo, &s))
	3146	goto got_it;
	3147
	3148	if (!(prog->intflags & PREGf_ANCH_MBOL))
	3149	goto phooey;
	3150
	3151	/* didn't match at start, try at other newline positions */
	3152
	3153	if (minlen)
	3154	dontbother = minlen - 1;
	3155	end = HOP3c(strend, -dontbother, strbeg) - 1;
	3156
	3157	/* skip to next newline */
	3158
	3159	while (s <= end) { /* note it could be possible to match at the end of the string */
	3160	/* NB: newlines are the same in unicode as they are in latin */
	3161	if (*s++ != '\n')
	3162	continue;
	3163	if (prog->check_substr \|\| prog->check_utf8) {
	3164	/* note that with PREGf_IMPLICIT, intuit can only fail
	3165	* or return the start position, so it's of limited utility.
	3166	* Nevertheless, I made the decision that the potential for
	3167	* quick fail was still worth it - DAPM */
	3168	s = re_intuit_start(rx, sv, strbeg, s, strend, flags, NULL);
	3169	if (!s)
	3170	goto phooey;
	3171	}
	3172	if (regtry(reginfo, &s))
	3173	goto got_it;
	3174	}
	3175	goto phooey;
	3176	} /* end anchored search */
	3177
	3178	if (prog->intflags & PREGf_ANCH_GPOS)
	3179	{
	3180	/* PREGf_ANCH_GPOS should never be true if PREGf_GPOS_SEEN is not true */
	3181	assert(prog->intflags & PREGf_GPOS_SEEN);
	3182	/* For anchored \G, the only position it can match from is
	3183	* (ganch-gofs); we already set startpos to this above; if intuit
	3184	* moved us on from there, we can't possibly succeed */
	3185	assert(startpos == HOPBACKc(reginfo->ganch, prog->gofs));
	3186	if (s == startpos && regtry(reginfo, &s))
	3187	goto got_it;
	3188	goto phooey;
	3189	}
	3190
	3191	/* Messy cases: unanchored match. */
	3192	if ((prog->anchored_substr \|\| prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
	3193	/* we have /x+whatever/ */
	3194	/* it must be a one character string (XXXX Except is_utf8_pat?) */
	3195	char ch;
	3196	#ifdef DEBUGGING
	3197	int did_match = 0;
	3198	#endif
	3199	if (utf8_target) {
	3200	if (! prog->anchored_utf8) {
	3201	to_utf8_substr(prog);
	3202	}
	3203	ch = SvPVX_const(prog->anchored_utf8)[0];
	3204	REXEC_FBC_SCAN(
	3205	if (*s == ch) {
	3206	DEBUG_EXECUTE_r( did_match = 1 );
	3207	if (regtry(reginfo, &s)) goto got_it;
	3208	s += UTF8SKIP(s);
	3209	while (s < strend && *s == ch)
	3210	s += UTF8SKIP(s);
	3211	}
	3212	);
	3213
	3214	}
	3215	else {
	3216	if (! prog->anchored_substr) {
	3217	if (! to_byte_substr(prog)) {
	3218	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	3219	}
	3220	}
	3221	ch = SvPVX_const(prog->anchored_substr)[0];
	3222	REXEC_FBC_SCAN(
	3223	if (*s == ch) {
	3224	DEBUG_EXECUTE_r( did_match = 1 );
	3225	if (regtry(reginfo, &s)) goto got_it;
	3226	s++;
	3227	while (s < strend && *s == ch)
	3228	s++;
	3229	}
	3230	);
	3231	}
	3232	DEBUG_EXECUTE_r(if (!did_match)
	3233	Perl_re_printf( aTHX_
	3234	"Did not find anchored character...\n")
	3235	);
	3236	}
	3237	else if (prog->anchored_substr != NULL
	3238	\|\| prog->anchored_utf8 != NULL
	3239	\|\| ((prog->float_substr != NULL \|\| prog->float_utf8 != NULL)
	3240	&& prog->float_max_offset < strend - s)) {
	3241	SV *must;
	3242	SSize_t back_max;
	3243	SSize_t back_min;
	3244	char *last;
	3245	char last1; / Last position checked before */
	3246	#ifdef DEBUGGING
	3247	int did_match = 0;
	3248	#endif
	3249	if (prog->anchored_substr \|\| prog->anchored_utf8) {
	3250	if (utf8_target) {
	3251	if (! prog->anchored_utf8) {
	3252	to_utf8_substr(prog);
	3253	}
	3254	must = prog->anchored_utf8;
	3255	}
	3256	else {
	3257	if (! prog->anchored_substr) {
	3258	if (! to_byte_substr(prog)) {
	3259	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	3260	}
	3261	}
	3262	must = prog->anchored_substr;
	3263	}
	3264	back_max = back_min = prog->anchored_offset;
	3265	} else {
	3266	if (utf8_target) {
	3267	if (! prog->float_utf8) {
	3268	to_utf8_substr(prog);
	3269	}
	3270	must = prog->float_utf8;
	3271	}
	3272	else {
	3273	if (! prog->float_substr) {
	3274	if (! to_byte_substr(prog)) {
	3275	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	3276	}
	3277	}
	3278	must = prog->float_substr;
	3279	}
	3280	back_max = prog->float_max_offset;
	3281	back_min = prog->float_min_offset;
	3282	}
	3283
	3284	if (back_min<0) {
	3285	last = strend;
	3286	} else {
	3287	last = HOP3c(strend, /* Cannot start after this */
	3288	-(SSize_t)(CHR_SVLEN(must)
	3289	- (SvTAIL(must) != 0) + back_min), strbeg);
	3290	}
	3291	if (s > reginfo->strbeg)
	3292	last1 = HOPc(s, -1);
	3293	else
	3294	last1 = s - 1; /* bogus */
	3295
	3296	/* XXXX check_substr already used to find "s", can optimize if
	3297	check_substr==must. */
	3298	dontbother = 0;
	3299	strend = HOPc(strend, -dontbother);
	3300	while ( (s <= last) &&
	3301	(s = fbm_instr((unsigned char*)HOP4c(s, back_min, strbeg, strend),
	3302	(unsigned char*)strend, must,
	3303	multiline ? FBMrf_MULTILINE : 0)) ) {
	3304	DEBUG_EXECUTE_r( did_match = 1 );
	3305	if (HOPc(s, -back_max) > last1) {
	3306	last1 = HOPc(s, -back_min);
	3307	s = HOPc(s, -back_max);
	3308	}
	3309	else {
	3310	char * const t = (last1 >= reginfo->strbeg)
	3311	? HOPc(last1, 1) : last1 + 1;
	3312
	3313	last1 = HOPc(s, -back_min);
	3314	s = t;
	3315	}
	3316	if (utf8_target) {
	3317	while (s <= last1) {
	3318	if (regtry(reginfo, &s))
	3319	goto got_it;
	3320	if (s >= last1) {
	3321	s++; /* to break out of outer loop */
	3322	break;
	3323	}
	3324	s += UTF8SKIP(s);
	3325	}
	3326	}
	3327	else {
	3328	while (s <= last1) {
	3329	if (regtry(reginfo, &s))
	3330	goto got_it;
	3331	s++;
	3332	}
	3333	}
	3334	}
	3335	DEBUG_EXECUTE_r(if (!did_match) {
	3336	RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
	3337	SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
	3338	Perl_re_printf( aTHX_ "Did not find %s substr %s%s...\n",
	3339	((must == prog->anchored_substr \|\| must == prog->anchored_utf8)
	3340	? "anchored" : "floating"),
	3341	quoted, RE_SV_TAIL(must));
	3342	});
	3343	goto phooey;
	3344	}
	3345	else if ( (c = progi->regstclass) ) {
	3346	if (minlen) {
	3347	const OPCODE op = OP(progi->regstclass);
	3348	/* don't bother with what can't match */
	3349	if (PL_regkind[op] != EXACT && PL_regkind[op] != TRIE)
	3350	strend = HOPc(strend, -(minlen - 1));
	3351	}
	3352	DEBUG_EXECUTE_r({
	3353	SV * const prop = sv_newmortal();
	3354	regprop(prog, prop, c, reginfo, NULL);
	3355	{
	3356	RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
	3357	s,strend-s,60);
	3358	Perl_re_printf( aTHX_
	3359	"Matching stclass %.*s against %s (%d bytes)\n",
	3360	(int)SvCUR(prop), SvPVX_const(prop),
	3361	quoted, (int)(strend - s));
	3362	}
	3363	});
	3364	if (find_byclass(prog, c, s, strend, reginfo))
	3365	goto got_it;
	3366	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "Contradicts stclass... [regexec_flags]\n"));
	3367	}
	3368	else {
	3369	dontbother = 0;
	3370	if (prog->float_substr != NULL \|\| prog->float_utf8 != NULL) {
	3371	/* Trim the end. */
	3372	char *last= NULL;
	3373	SV* float_real;
	3374	STRLEN len;
	3375	const char *little;
	3376
	3377	if (utf8_target) {
	3378	if (! prog->float_utf8) {
	3379	to_utf8_substr(prog);
	3380	}
	3381	float_real = prog->float_utf8;
	3382	}
	3383	else {
	3384	if (! prog->float_substr) {
	3385	if (! to_byte_substr(prog)) {
	3386	NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
	3387	}
	3388	}
	3389	float_real = prog->float_substr;
	3390	}
	3391
	3392	little = SvPV_const(float_real, len);
	3393	if (SvTAIL(float_real)) {
	3394	/* This means that float_real contains an artificial \n on
	3395	* the end due to the presence of something like this:
	3396	* /foo$/ where we can match both "foo" and "foo\n" at the
	3397	* end of the string. So we have to compare the end of the
	3398	* string first against the float_real without the \n and
	3399	* then against the full float_real with the string. We
	3400	* have to watch out for cases where the string might be
	3401	* smaller than the float_real or the float_real without
	3402	* the \n. */
	3403	char *checkpos= strend - len;
	3404	DEBUG_OPTIMISE_r(
	3405	Perl_re_printf( aTHX_
	3406	"%sChecking for float_real.%s\n",
	3407	PL_colors[4], PL_colors[5]));
	3408	if (checkpos + 1 < strbeg) {
	3409	/* can't match, even if we remove the trailing \n
	3410	* string is too short to match */
	3411	DEBUG_EXECUTE_r(
	3412	Perl_re_printf( aTHX_
	3413	"%sString shorter than required trailing substring, cannot match.%s\n",
	3414	PL_colors[4], PL_colors[5]));
	3415	goto phooey;
	3416	} else if (memEQ(checkpos + 1, little, len - 1)) {
	3417	/* can match, the end of the string matches without the
	3418	* "\n" */
	3419	last = checkpos + 1;
	3420	} else if (checkpos < strbeg) {
	3421	/* cant match, string is too short when the "\n" is
	3422	* included */
	3423	DEBUG_EXECUTE_r(
	3424	Perl_re_printf( aTHX_
	3425	"%sString does not contain required trailing substring, cannot match.%s\n",
	3426	PL_colors[4], PL_colors[5]));
	3427	goto phooey;
	3428	} else if (!multiline) {
	3429	/* non multiline match, so compare with the "\n" at the
	3430	* end of the string */
	3431	if (memEQ(checkpos, little, len)) {
	3432	last= checkpos;
	3433	} else {
	3434	DEBUG_EXECUTE_r(
	3435	Perl_re_printf( aTHX_
	3436	"%sString does not contain required trailing substring, cannot match.%s\n",
	3437	PL_colors[4], PL_colors[5]));
	3438	goto phooey;
	3439	}
	3440	} else {
	3441	/* multiline match, so we have to search for a place
	3442	* where the full string is located */
	3443	goto find_last;
	3444	}
	3445	} else {
	3446	find_last:
	3447	if (len)
	3448	last = rninstr(s, strend, little, little + len);
	3449	else
	3450	last = strend; /* matching "$" */
	3451	}
	3452	if (!last) {
	3453	/* at one point this block contained a comment which was
	3454	* probably incorrect, which said that this was a "should not
	3455	* happen" case. Even if it was true when it was written I am
	3456	* pretty sure it is not anymore, so I have removed the comment
	3457	* and replaced it with this one. Yves */
	3458	DEBUG_EXECUTE_r(
	3459	Perl_re_printf( aTHX_
	3460	"%sString does not contain required substring, cannot match.%s\n",
	3461	PL_colors[4], PL_colors[5]
	3462	));
	3463	goto phooey;
	3464	}
	3465	dontbother = strend - last + prog->float_min_offset;
	3466	}
	3467	if (minlen && (dontbother < minlen))
	3468	dontbother = minlen - 1;
	3469	strend -= dontbother; /* this one's always in bytes! */
	3470	/* We don't know much -- general case. */
	3471	if (utf8_target) {
	3472	for (;;) {
	3473	if (regtry(reginfo, &s))
	3474	goto got_it;
	3475	if (s >= strend)
	3476	break;
	3477	s += UTF8SKIP(s);
	3478	};
	3479	}
	3480	else {
	3481	do {
	3482	if (regtry(reginfo, &s))
	3483	goto got_it;
	3484	} while (s++ < strend);
	3485	}
	3486	}
	3487
	3488	/* Failure. */
	3489	goto phooey;
	3490
	3491	got_it:
	3492	/* s/// doesn't like it if $& is earlier than where we asked it to
	3493	* start searching (which can happen on something like /.\G/) */
	3494	if ( (flags & REXEC_FAIL_ON_UNDERFLOW)
	3495	&& (prog->offs[0].start < stringarg - strbeg))
	3496	{
	3497	/* this should only be possible under \G */
	3498	assert(prog->intflags & PREGf_GPOS_SEEN);
	3499	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	3500	"matched, but failing for REXEC_FAIL_ON_UNDERFLOW\n"));
	3501	goto phooey;
	3502	}
	3503
	3504	DEBUG_BUFFERS_r(
	3505	if (swap)
	3506	Perl_re_printf( aTHX_
	3507	"rex=0x%"UVxf" freeing offs: 0x%"UVxf"\n",
	3508	PTR2UV(prog),
	3509	PTR2UV(swap)
	3510	);
	3511	);
	3512	Safefree(swap);
	3513
	3514	/* clean up; this will trigger destructors that will free all slabs
	3515	* above the current one, and cleanup the regmatch_info_aux
	3516	* and regmatch_info_aux_eval sructs */
	3517
	3518	LEAVE_SCOPE(oldsave);
	3519
	3520	if (RXp_PAREN_NAMES(prog))
	3521	(void)hv_iterinit(RXp_PAREN_NAMES(prog));
	3522
	3523	/* make sure $`, $&, $', and $digit will work later */
	3524	if ( !(flags & REXEC_NOT_FIRST) )
	3525	S_reg_set_capture_string(aTHX_ rx,
	3526	strbeg, reginfo->strend,
	3527	sv, flags, utf8_target);
	3528
	3529	return 1;
	3530
	3531	phooey:
	3532	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "%sMatch failed%s\n",
	3533	PL_colors[4], PL_colors[5]));
	3534
	3535	/* clean up; this will trigger destructors that will free all slabs
	3536	* above the current one, and cleanup the regmatch_info_aux
	3537	* and regmatch_info_aux_eval sructs */
	3538
	3539	LEAVE_SCOPE(oldsave);
	3540
	3541	if (swap) {
	3542	/* we failed :-( roll it back */
	3543	DEBUG_BUFFERS_r(Perl_re_printf( aTHX_
	3544	"rex=0x%"UVxf" rolling back offs: freeing=0x%"UVxf" restoring=0x%"UVxf"\n",
	3545	PTR2UV(prog),
	3546	PTR2UV(prog->offs),
	3547	PTR2UV(swap)
	3548	));
	3549	Safefree(prog->offs);
	3550	prog->offs = swap;
	3551	}
	3552	return 0;
	3553	}
	3554
	3555
	3556	/* Set which rex is pointed to by PL_reg_curpm, handling ref counting.
	3557	* Do inc before dec, in case old and new rex are the same */
	3558	#define SET_reg_curpm(Re2) \
	3559	if (reginfo->info_aux_eval) { \
	3560	(void)ReREFCNT_inc(Re2); \
	3561	ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
	3562	PM_SETRE((PL_reg_curpm), (Re2)); \
	3563	}
	3564
	3565
	3566	/*
	3567	- regtry - try match at specific point
	3568	*/
	3569	STATIC bool /* 0 failure, 1 success */
	3570	S_regtry(pTHX_ regmatch_info reginfo, char *startposp)
	3571	{
	3572	CHECKPOINT lastcp;
	3573	REGEXP *const rx = reginfo->prog;
	3574	regexp *const prog = ReANY(rx);
	3575	SSize_t result;
	3576	#ifdef DEBUGGING
	3577	U32 depth = 0; /* used by REGCP_SET */
	3578	#endif
	3579	RXi_GET_DECL(prog,progi);
	3580	GET_RE_DEBUG_FLAGS_DECL;
	3581
	3582	PERL_ARGS_ASSERT_REGTRY;
	3583
	3584	reginfo->cutpoint=NULL;
	3585
	3586	prog->offs[0].start = *startposp - reginfo->strbeg;
	3587	prog->lastparen = 0;
	3588	prog->lastcloseparen = 0;
	3589
	3590	/* XXXX What this code is doing here?!!! There should be no need
	3591	to do this again and again, prog->lastparen should take care of
	3592	this! --ilya*/
	3593
	3594	/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
	3595	* Actually, the code in regcppop() (which Ilya may be meaning by
	3596	* prog->lastparen), is not needed at all by the test suite
	3597	* (op/regexp, op/pat, op/split), but that code is needed otherwise
	3598	* this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
	3599	* Meanwhile, this code is needed for the
	3600	* above-mentioned test suite tests to succeed. The common theme
	3601	* on those tests seems to be returning null fields from matches.
	3602	* --jhi updated by dapm */
	3603	#if 1
	3604	if (prog->nparens) {
	3605	regexp_paren_pair *pp = prog->offs;
	3606	I32 i;
	3607	for (i = prog->nparens; i > (I32)prog->lastparen; i--) {
	3608	++pp;
	3609	pp->start = -1;
	3610	pp->end = -1;
	3611	}
	3612	}
	3613	#endif
	3614	REGCP_SET(lastcp);
	3615	result = regmatch(reginfo, *startposp, progi->program + 1);
	3616	if (result != -1) {
	3617	prog->offs[0].end = result;
	3618	return 1;
	3619	}
	3620	if (reginfo->cutpoint)
	3621	*startposp= reginfo->cutpoint;
	3622	REGCP_UNWIND(lastcp);
	3623	return 0;
	3624	}
	3625
	3626
	3627	#define sayYES goto yes
	3628	#define sayNO goto no
	3629	#define sayNO_SILENT goto no_silent
	3630
	3631	/* we dont use STMT_START/END here because it leads to
	3632	"unreachable code" warnings, which are bogus, but distracting. */
	3633	#define CACHEsayNO \
	3634	if (ST.cache_mask) \
	3635	reginfo->info_aux->poscache[ST.cache_offset] \|= ST.cache_mask; \
	3636	sayNO
	3637
	3638	/* this is used to determine how far from the left messages like
	3639	'failed...' are printed in regexec.c. It should be set such that
	3640	messages are inline with the regop output that created them.
	3641	*/
	3642	#define REPORT_CODE_OFF 29
	3643	#define INDENT_CHARS(depth) ((int)(depth) % 20)
	3644	#ifdef DEBUGGING
	3645	int
	3646	Perl_re_exec_indentf(pTHX_ const char *fmt, U32 depth, ...)
	3647	{
	3648	va_list ap;
	3649	int result;
	3650	PerlIO *f= Perl_debug_log;
	3651	PERL_ARGS_ASSERT_RE_EXEC_INDENTF;
	3652	va_start(ap, depth);
	3653	PerlIO_printf(f, "%s\|%4"UVuf"\| %s", REPORT_CODE_OFF, "", (UV)depth, INDENT_CHARS(depth), "" );
	3654	result = PerlIO_vprintf(f, fmt, ap);
	3655	va_end(ap);
	3656	return result;
	3657	}
	3658	#endif /* DEBUGGING */
	3659
	3660
	3661	#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
	3662	#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
	3663	#define CHRTEST_NOT_A_CP_1 -999
	3664	#define CHRTEST_NOT_A_CP_2 -998
	3665
	3666	/* grab a new slab and return the first slot in it */
	3667
	3668	STATIC regmatch_state *
	3669	S_push_slab(pTHX)
	3670	{
	3671	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	3672	dMY_CXT;
	3673	#endif
	3674	regmatch_slab *s = PL_regmatch_slab->next;
	3675	if (!s) {
	3676	Newx(s, 1, regmatch_slab);
	3677	s->prev = PL_regmatch_slab;
	3678	s->next = NULL;
	3679	PL_regmatch_slab->next = s;
	3680	}
	3681	PL_regmatch_slab = s;
	3682	return SLAB_FIRST(s);
	3683	}
	3684
	3685
	3686	/* push a new state then goto it */
	3687
	3688	#define PUSH_STATE_GOTO(state, node, input) \
	3689	pushinput = input; \
	3690	scan = node; \
	3691	st->resume_state = state; \
	3692	goto push_state;
	3693
	3694	/* push a new state with success backtracking, then goto it */
	3695
	3696	#define PUSH_YES_STATE_GOTO(state, node, input) \
	3697	pushinput = input; \
	3698	scan = node; \
	3699	st->resume_state = state; \
	3700	goto push_yes_state;
	3701
	3702
	3703
	3704
	3705	/*
	3706
	3707	regmatch() - main matching routine
	3708
	3709	This is basically one big switch statement in a loop. We execute an op,
	3710	set 'next' to point the next op, and continue. If we come to a point which
	3711	we may need to backtrack to on failure such as (A\|B\|C), we push a
	3712	backtrack state onto the backtrack stack. On failure, we pop the top
	3713	state, and re-enter the loop at the state indicated. If there are no more
	3714	states to pop, we return failure.
	3715
	3716	Sometimes we also need to backtrack on success; for example /A+/, where
	3717	after successfully matching one A, we need to go back and try to
	3718	match another one; similarly for lookahead assertions: if the assertion
	3719	completes successfully, we backtrack to the state just before the assertion
	3720	and then carry on. In these cases, the pushed state is marked as
	3721	'backtrack on success too'. This marking is in fact done by a chain of
	3722	pointers, each pointing to the previous 'yes' state. On success, we pop to
	3723	the nearest yes state, discarding any intermediate failure-only states.
	3724	Sometimes a yes state is pushed just to force some cleanup code to be
	3725	called at the end of a successful match or submatch; e.g. (??{$re}) uses
	3726	it to free the inner regex.
	3727
	3728	Note that failure backtracking rewinds the cursor position, while
	3729	success backtracking leaves it alone.
	3730
	3731	A pattern is complete when the END op is executed, while a subpattern
	3732	such as (?=foo) is complete when the SUCCESS op is executed. Both of these
	3733	ops trigger the "pop to last yes state if any, otherwise return true"
	3734	behaviour.
	3735
	3736	A common convention in this function is to use A and B to refer to the two
	3737	subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
	3738	the subpattern to be matched possibly multiple times, while B is the entire
	3739	rest of the pattern. Variable and state names reflect this convention.
	3740
	3741	The states in the main switch are the union of ops and failure/success of
	3742	substates associated with with that op. For example, IFMATCH is the op
	3743	that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
	3744	'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
	3745	successfully matched A and IFMATCH_A_fail is a state saying that we have
	3746	just failed to match A. Resume states always come in pairs. The backtrack
	3747	state we push is marked as 'IFMATCH_A', but when that is popped, we resume
	3748	at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
	3749	on success or failure.
	3750
	3751	The struct that holds a backtracking state is actually a big union, with
	3752	one variant for each major type of op. The variable st points to the
	3753	top-most backtrack struct. To make the code clearer, within each
	3754	block of code we #define ST to alias the relevant union.
	3755
	3756	Here's a concrete example of a (vastly oversimplified) IFMATCH
	3757	implementation:
	3758
	3759	switch (state) {
	3760	....
	3761
	3762	#define ST st->u.ifmatch
	3763
	3764	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3765	ST.foo = ...; // some state we wish to save
	3766	...
	3767	// push a yes backtrack state with a resume value of
	3768	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
	3769	// first node of A:
	3770	PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
	3771	// NOTREACHED
	3772
	3773	case IFMATCH_A: // we have successfully executed A; now continue with B
	3774	next = B;
	3775	bar = ST.foo; // do something with the preserved value
	3776	break;
	3777
	3778	case IFMATCH_A_fail: // A failed, so the assertion failed
	3779	...; // do some housekeeping, then ...
	3780	sayNO; // propagate the failure
	3781
	3782	#undef ST
	3783
	3784	...
	3785	}
	3786
	3787	For any old-timers reading this who are familiar with the old recursive
	3788	approach, the code above is equivalent to:
	3789
	3790	case IFMATCH: // we are executing the IFMATCH op, (?=A)B
	3791	{
	3792	int foo = ...
	3793	...
	3794	if (regmatch(A)) {
	3795	next = B;
	3796	bar = foo;
	3797	break;
	3798	}
	3799	...; // do some housekeeping, then ...
	3800	sayNO; // propagate the failure
	3801	}
	3802
	3803	The topmost backtrack state, pointed to by st, is usually free. If you
	3804	want to claim it, populate any ST.foo fields in it with values you wish to
	3805	save, then do one of
	3806
	3807	PUSH_STATE_GOTO(resume_state, node, newinput);
	3808	PUSH_YES_STATE_GOTO(resume_state, node, newinput);
	3809
	3810	which sets that backtrack state's resume value to 'resume_state', pushes a
	3811	new free entry to the top of the backtrack stack, then goes to 'node'.
	3812	On backtracking, the free slot is popped, and the saved state becomes the
	3813	new free state. An ST.foo field in this new top state can be temporarily
	3814	accessed to retrieve values, but once the main loop is re-entered, it
	3815	becomes available for reuse.
	3816
	3817	Note that the depth of the backtrack stack constantly increases during the
	3818	left-to-right execution of the pattern, rather than going up and down with
	3819	the pattern nesting. For example the stack is at its maximum at Z at the
	3820	end of the pattern, rather than at X in the following:
	3821
	3822	/(((X)+)+)+....(Y)+....Z/
	3823
	3824	The only exceptions to this are lookahead/behind assertions and the cut,
	3825	(?>A), which pop all the backtrack states associated with A before
	3826	continuing.
	3827
	3828	Backtrack state structs are allocated in slabs of about 4K in size.
	3829	PL_regmatch_state and st always point to the currently active state,
	3830	and PL_regmatch_slab points to the slab currently containing
	3831	PL_regmatch_state. The first time regmatch() is called, the first slab is
	3832	allocated, and is never freed until interpreter destruction. When the slab
	3833	is full, a new one is allocated and chained to the end. At exit from
	3834	regmatch(), slabs allocated since entry are freed.
	3835
	3836	*/
	3837
	3838
	3839	#define DEBUG_STATE_pp(pp) \
	3840	DEBUG_STATE_r({ \
	3841	DUMP_EXEC_POS(locinput, scan, utf8_target,depth); \
	3842	Perl_re_printf( aTHX_ \
	3843	"%*s" pp " %s%s%s%s%s\n", \
	3844	INDENT_CHARS(depth), "", \
	3845	PL_reg_name[st->resume_state], \
	3846	((st==yes_state\|\|st==mark_state) ? "[" : ""), \
	3847	((st==yes_state) ? "Y" : ""), \
	3848	((st==mark_state) ? "M" : ""), \
	3849	((st==yes_state\|\|st==mark_state) ? "]" : "") \
	3850	); \
	3851	});
	3852
	3853
	3854	#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
	3855
	3856	#ifdef DEBUGGING
	3857
	3858	STATIC void
	3859	S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
	3860	const char start, const char end, const char *blurb)
	3861	{
	3862	const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
	3863
	3864	PERL_ARGS_ASSERT_DEBUG_START_MATCH;
	3865
	3866	if (!PL_colorset)
	3867	reginitcolors();
	3868	{
	3869	RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
	3870	RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
	3871
	3872	RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
	3873	start, end - start, 60);
	3874
	3875	Perl_re_printf( aTHX_
	3876	"%s%s REx%s %s against %s\n",
	3877	PL_colors[4], blurb, PL_colors[5], s0, s1);
	3878
	3879	if (utf8_target\|\|utf8_pat)
	3880	Perl_re_printf( aTHX_ "UTF-8 %s%s%s...\n",
	3881	utf8_pat ? "pattern" : "",
	3882	utf8_pat && utf8_target ? " and " : "",
	3883	utf8_target ? "string" : ""
	3884	);
	3885	}
	3886	}
	3887
	3888	STATIC void
	3889	S_dump_exec_pos(pTHX_ const char *locinput,
	3890	const regnode *scan,
	3891	const char *loc_regeol,
	3892	const char *loc_bostr,
	3893	const char *loc_reg_starttry,
	3894	const bool utf8_target,
	3895	const U32 depth
	3896	)
	3897	{
	3898	const int docolor = PL_colors[0] \|\| PL_colors[2] \|\| *PL_colors[4];
	3899	const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
	3900	int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
	3901	/* The part of the string before starttry has one color
	3902	(pref0_len chars), between starttry and current
	3903	position another one (pref_len - pref0_len chars),
	3904	after the current position the third one.
	3905	We assume that pref0_len <= pref_len, otherwise we
	3906	decrease pref0_len. */
	3907	int pref_len = (locinput - loc_bostr) > (5 + taill) - l
	3908	? (5 + taill) - l : locinput - loc_bostr;
	3909	int pref0_len;
	3910
	3911	PERL_ARGS_ASSERT_DUMP_EXEC_POS;
	3912
	3913	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput - pref_len)))
	3914	pref_len++;
	3915	pref0_len = pref_len - (locinput - loc_reg_starttry);
	3916	if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
	3917	l = ( loc_regeol - locinput > (5 + taill) - pref_len
	3918	? (5 + taill) - pref_len : loc_regeol - locinput);
	3919	while (utf8_target && UTF8_IS_CONTINUATION((U8)(locinput + l)))
	3920	l--;
	3921	if (pref0_len < 0)
	3922	pref0_len = 0;
	3923	if (pref0_len > pref_len)
	3924	pref0_len = pref_len;
	3925	{
	3926	const int is_uni = utf8_target ? 1 : 0;
	3927
	3928	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
	3929	(locinput - pref_len),pref0_len, 60, 4, 5);
	3930
	3931	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
	3932	(locinput - pref_len + pref0_len),
	3933	pref_len - pref0_len, 60, 2, 3);
	3934
	3935	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
	3936	locinput, loc_regeol - locinput, 10, 0, 1);
	3937
	3938	const STRLEN tlen=len0+len1+len2;
	3939	Perl_re_printf( aTHX_
	3940	"%4"IVdf" <%.s%.s%s%.s>%s\|%4u\| ",
	3941	(IV)(locinput - loc_bostr),
	3942	len0, s0,
	3943	len1, s1,
	3944	(docolor ? "" : "> <"),
	3945	len2, s2,
	3946	(int)(tlen > 19 ? 0 : 19 - tlen),
	3947	"",
	3948	depth);
	3949	}
	3950	}
	3951
	3952	#endif
	3953
	3954	/* reg_check_named_buff_matched()
	3955	* Checks to see if a named buffer has matched. The data array of
	3956	* buffer numbers corresponding to the buffer is expected to reside
	3957	* in the regexp->data->data array in the slot stored in the ARG() of
	3958	* node involved. Note that this routine doesn't actually care about the
	3959	* name, that information is not preserved from compilation to execution.
	3960	* Returns the index of the leftmost defined buffer with the given name
	3961	* or 0 if non of the buffers matched.
	3962	*/
	3963	STATIC I32
	3964	S_reg_check_named_buff_matched(const regexp rex, const regnode scan)
	3965	{
	3966	I32 n;
	3967	RXi_GET_DECL(rex,rexi);
	3968	SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	3969	I32 nums=(I32)SvPVX(sv_dat);
	3970
	3971	PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
	3972
	3973	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	3974	if ((I32)rex->lastparen >= nums[n] &&
	3975	rex->offs[nums[n]].end != -1)
	3976	{
	3977	return nums[n];
	3978	}
	3979	}
	3980	return 0;
	3981	}
	3982
	3983
	3984	static bool
	3985	S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
	3986	U8* c1_utf8, int c2p, U8 c2_utf8, regmatch_info *reginfo)
	3987	{
	3988	/* This function determines if there are one or two characters that match
	3989	* the first character of the passed-in EXACTish node <text_node>, and if
	3990	* so, returns them in the passed-in pointers.
	3991	*
	3992	* If it determines that no possible character in the target string can
	3993	* match, it returns FALSE; otherwise TRUE. (The FALSE situation occurs if
	3994	* the first character in <text_node> requires UTF-8 to represent, and the
	3995	* target string isn't in UTF-8.)
	3996	*
	3997	* If there are more than two characters that could match the beginning of
	3998	* <text_node>, or if more context is required to determine a match or not,
	3999	* it sets both <c1p> and <c2p> to CHRTEST_VOID.
	4000	*
	4001	* The motiviation behind this function is to allow the caller to set up
	4002	* tight loops for matching. If <text_node> is of type EXACT, there is
	4003	* only one possible character that can match its first character, and so
	4004	* the situation is quite simple. But things get much more complicated if
	4005	* folding is involved. It may be that the first character of an EXACTFish
	4006	* node doesn't participate in any possible fold, e.g., punctuation, so it
	4007	* can be matched only by itself. The vast majority of characters that are
	4008	* in folds match just two things, their lower and upper-case equivalents.
	4009	* But not all are like that; some have multiple possible matches, or match
	4010	* sequences of more than one character. This function sorts all that out.
	4011	*
	4012	* Consider the patterns AB or A?B where A and B are arbitrary. In a
	4013	* loop of trying to match A*, we know we can't exit where the thing
	4014	* following it isn't a B. And something can't be a B unless it is the
	4015	* beginning of B. By putting a quick test for that beginning in a tight
	4016	* loop, we can rule out things that can't possibly be B without having to
	4017	* break out of the loop, thus avoiding work. Similarly, if A is a single
	4018	* character, we can make a tight loop matching A*, using the outputs of
	4019	* this function.
	4020	*
	4021	* If the target string to match isn't in UTF-8, and there aren't
	4022	* complications which require CHRTEST_VOID, <c1p> and <c2p> are set to
	4023	* the one or two possible octets (which are characters in this situation)
	4024	* that can match. In all cases, if there is only one character that can
	4025	* match, <c1p> and <c2p> will be identical.
	4026	*
	4027	* If the target string is in UTF-8, the buffers pointed to by <c1_utf8>
	4028	* and <c2_utf8> will contain the one or two UTF-8 sequences of bytes that
	4029	* can match the beginning of <text_node>. They should be declared with at
	4030	* least length UTF8_MAXBYTES+1. (If the target string isn't in UTF-8, it is
	4031	* undefined what these contain.) If one or both of the buffers are
	4032	* invariant under UTF-8, <c1p>, and <c2p> will also be set to the
	4033	* corresponding invariant. If variant, the corresponding *<c1p> and/or
	4034	* *<c2p> will be set to a negative number(s) that shouldn't match any code
	4035	* point (unless inappropriately coerced to unsigned). *<c1p> will equal
	4036	* <c2p> if and only if <c1_utf8> and <c2_utf8> are the same. /
	4037
	4038	const bool utf8_target = reginfo->is_utf8_target;
	4039
	4040	UV c1 = (UV)CHRTEST_NOT_A_CP_1;
	4041	UV c2 = (UV)CHRTEST_NOT_A_CP_2;
	4042	bool use_chrtest_void = FALSE;
	4043	const bool is_utf8_pat = reginfo->is_utf8_pat;
	4044
	4045	/* Used when we have both utf8 input and utf8 output, to avoid converting
	4046	* to/from code points */
	4047	bool utf8_has_been_setup = FALSE;
	4048
	4049	dVAR;
	4050
	4051	U8 pat = (U8)STRING(text_node);
	4052	U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
	4053
	4054	if (OP(text_node) == EXACT \|\| OP(text_node) == EXACTL) {
	4055
	4056	/* In an exact node, only one thing can be matched, that first
	4057	* character. If both the pat and the target are UTF-8, we can just
	4058	* copy the input to the output, avoiding finding the code point of
	4059	* that character */
	4060	if (!is_utf8_pat) {
	4061	c2 = c1 = *pat;
	4062	}
	4063	else if (utf8_target) {
	4064	Copy(pat, c1_utf8, UTF8SKIP(pat), U8);
	4065	Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
	4066	utf8_has_been_setup = TRUE;
	4067	}
	4068	else {
	4069	c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
	4070	}
	4071	}
	4072	else { /* an EXACTFish node */
	4073	U8 *pat_end = pat + STR_LEN(text_node);
	4074
	4075	/* An EXACTFL node has at least some characters unfolded, because what
	4076	* they match is not known until now. So, now is the time to fold
	4077	* the first few of them, as many as are needed to determine 'c1' and
	4078	* 'c2' later in the routine. If the pattern isn't UTF-8, we only need
	4079	* to fold if in a UTF-8 locale, and then only the Sharp S; everything
	4080	* else is 1-1 and isn't assumed to be folded. In a UTF-8 pattern, we
	4081	* need to fold as many characters as a single character can fold to,
	4082	* so that later we can check if the first ones are such a multi-char
	4083	* fold. But, in such a pattern only locale-problematic characters
	4084	* aren't folded, so we can skip this completely if the first character
	4085	* in the node isn't one of the tricky ones */
	4086	if (OP(text_node) == EXACTFL) {
	4087
	4088	if (! is_utf8_pat) {
	4089	if (IN_UTF8_CTYPE_LOCALE && *pat == LATIN_SMALL_LETTER_SHARP_S)
	4090	{
	4091	folded[0] = folded[1] = 's';
	4092	pat = folded;
	4093	pat_end = folded + 2;
	4094	}
	4095	}
	4096	else if (is_PROBLEMATIC_LOCALE_FOLDEDS_START_utf8(pat)) {
	4097	U8 *s = pat;
	4098	U8 *d = folded;
	4099	int i;
	4100
	4101	for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < pat_end; i++) {
	4102	if (isASCII(*s)) {
	4103	(d++) = (U8) toFOLD_LC(s);
	4104	s++;
	4105	}
	4106	else {
	4107	STRLEN len;
	4108	_to_utf8_fold_flags(s,
	4109	d,
	4110	&len,
	4111	FOLD_FLAGS_FULL \| FOLD_FLAGS_LOCALE);
	4112	d += len;
	4113	s += UTF8SKIP(s);
	4114	}
	4115	}
	4116
	4117	pat = folded;
	4118	pat_end = d;
	4119	}
	4120	}
	4121
	4122	if ((is_utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
	4123	\|\| (!is_utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
	4124	{
	4125	/* Multi-character folds require more context to sort out. Also
	4126	* PL_utf8_foldclosures used below doesn't handle them, so have to
	4127	* be handled outside this routine */
	4128	use_chrtest_void = TRUE;
	4129	}
	4130	else { /* an EXACTFish node which doesn't begin with a multi-char fold */
	4131	c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
	4132	if (c1 > 255) {
	4133	/* Load the folds hash, if not already done */
	4134	SV** listp;
	4135	if (! PL_utf8_foldclosures) {
	4136	_load_PL_utf8_foldclosures();
	4137	}
	4138
	4139	/* The fold closures data structure is a hash with the keys
	4140	* being the UTF-8 of every character that is folded to, like
	4141	* 'k', and the values each an array of all code points that
	4142	* fold to its key. e.g. [ 'k', 'K', KELVIN_SIGN ].
	4143	* Multi-character folds are not included */
	4144	if ((! (listp = hv_fetch(PL_utf8_foldclosures,
	4145	(char *) pat,
	4146	UTF8SKIP(pat),
	4147	FALSE))))
	4148	{
	4149	/* Not found in the hash, therefore there are no folds
	4150	* containing it, so there is only a single character that
	4151	* could match */
	4152	c2 = c1;
	4153	}
	4154	else { /* Does participate in folds */
	4155	AV* list = (AV) listp;
	4156	if (av_tindex_nomg(list) != 1) {
	4157
	4158	/* If there aren't exactly two folds to this, it is
	4159	* outside the scope of this function */
	4160	use_chrtest_void = TRUE;
	4161	}
	4162	else { /* There are two. Get them */
	4163	SV** c_p = av_fetch(list, 0, FALSE);
	4164	if (c_p == NULL) {
	4165	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	4166	}
	4167	c1 = SvUV(*c_p);
	4168
	4169	c_p = av_fetch(list, 1, FALSE);
	4170	if (c_p == NULL) {
	4171	Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
	4172	}
	4173	c2 = SvUV(*c_p);
	4174
	4175	/* Folds that cross the 255/256 boundary are forbidden
	4176	* if EXACTFL (and isnt a UTF8 locale), or EXACTFA and
	4177	* one is ASCIII. Since the pattern character is above
	4178	* 255, and its only other match is below 256, the only
	4179	* legal match will be to itself. We have thrown away
	4180	* the original, so have to compute which is the one
	4181	* above 255. */
	4182	if ((c1 < 256) != (c2 < 256)) {
	4183	if ((OP(text_node) == EXACTFL
	4184	&& ! IN_UTF8_CTYPE_LOCALE)
	4185	\|\| ((OP(text_node) == EXACTFA
	4186	\|\| OP(text_node) == EXACTFA_NO_TRIE)
	4187	&& (isASCII(c1) \|\| isASCII(c2))))
	4188	{
	4189	if (c1 < 256) {
	4190	c1 = c2;
	4191	}
	4192	else {
	4193	c2 = c1;
	4194	}
	4195	}
	4196	}
	4197	}
	4198	}
	4199	}
	4200	else /* Here, c1 is <= 255 */
	4201	if (utf8_target
	4202	&& HAS_NONLATIN1_FOLD_CLOSURE(c1)
	4203	&& ( ! (OP(text_node) == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
	4204	&& ((OP(text_node) != EXACTFA
	4205	&& OP(text_node) != EXACTFA_NO_TRIE)
	4206	\|\| ! isASCII(c1)))
	4207	{
	4208	/* Here, there could be something above Latin1 in the target
	4209	* which folds to this character in the pattern. All such
	4210	* cases except LATIN SMALL LETTER Y WITH DIAERESIS have more
	4211	* than two characters involved in their folds, so are outside
	4212	* the scope of this function */
	4213	if (UNLIKELY(c1 == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
	4214	c2 = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
	4215	}
	4216	else {
	4217	use_chrtest_void = TRUE;
	4218	}
	4219	}
	4220	else { /* Here nothing above Latin1 can fold to the pattern
	4221	character */
	4222	switch (OP(text_node)) {
	4223
	4224	case EXACTFL: /* /l rules */
	4225	c2 = PL_fold_locale[c1];
	4226	break;
	4227
	4228	case EXACTF: /* This node only generated for non-utf8
	4229	patterns */
	4230	assert(! is_utf8_pat);
	4231	if (! utf8_target) { /* /d rules */
	4232	c2 = PL_fold[c1];
	4233	break;
	4234	}
	4235	/* FALLTHROUGH */
	4236	/* /u rules for all these. This happens to work for
	4237	* EXACTFA as nothing in Latin1 folds to ASCII */
	4238	case EXACTFA_NO_TRIE: /* This node only generated for
	4239	non-utf8 patterns */
	4240	assert(! is_utf8_pat);
	4241	/* FALLTHROUGH */
	4242	case EXACTFA:
	4243	case EXACTFU_SS:
	4244	case EXACTFU:
	4245	c2 = PL_fold_latin1[c1];
	4246	break;
	4247
	4248	default:
	4249	Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
	4250	NOT_REACHED; /* NOTREACHED */
	4251	}
	4252	}
	4253	}
	4254	}
	4255
	4256	/* Here have figured things out. Set up the returns */
	4257	if (use_chrtest_void) {
	4258	c2p = c1p = CHRTEST_VOID;
	4259	}
	4260	else if (utf8_target) {
	4261	if (! utf8_has_been_setup) { /* Don't have the utf8; must get it */
	4262	uvchr_to_utf8(c1_utf8, c1);
	4263	uvchr_to_utf8(c2_utf8, c2);
	4264	}
	4265
	4266	/* Invariants are stored in both the utf8 and byte outputs; Use
	4267	* negative numbers otherwise for the byte ones. Make sure that the
	4268	* byte ones are the same iff the utf8 ones are the same */
	4269	c1p = (UTF8_IS_INVARIANT(c1_utf8)) ? *c1_utf8 : CHRTEST_NOT_A_CP_1;
	4270	c2p = (UTF8_IS_INVARIANT(c2_utf8))
	4271	? *c2_utf8
	4272	: (c1 == c2)
	4273	? CHRTEST_NOT_A_CP_1
	4274	: CHRTEST_NOT_A_CP_2;
	4275	}
	4276	else if (c1 > 255) {
	4277	if (c2 > 255) { /* both possibilities are above what a non-utf8 string
	4278	can represent */
	4279	return FALSE;
	4280	}
	4281
	4282	c1p = c2p = c2; /* c2 is the only representable value */
	4283	}
	4284	else { /* c1 is representable; see about c2 */
	4285	*c1p = c1;
	4286	*c2p = (c2 < 256) ? c2 : c1;
	4287	}
	4288
	4289	return TRUE;
	4290	}
	4291
	4292	PERL_STATIC_INLINE bool
	4293	S_isGCB(const GCB_enum before, const GCB_enum after)
	4294	{
	4295	/* returns a boolean indicating if there is a Grapheme Cluster Boundary
	4296	* between the inputs. See http://www.unicode.org/reports/tr29/ */
	4297
	4298	return GCB_table[before][after];
	4299	}
	4300
	4301	/* Combining marks attach to most classes that precede them, but this defines
	4302	* the exceptions (from TR14) */
	4303	#define LB_CM_ATTACHES_TO(prev) ( ! ( prev == LB_EDGE \
	4304	\|\| prev == LB_Mandatory_Break \
	4305	\|\| prev == LB_Carriage_Return \
	4306	\|\| prev == LB_Line_Feed \
	4307	\|\| prev == LB_Next_Line \
	4308	\|\| prev == LB_Space \
	4309	\|\| prev == LB_ZWSpace))
	4310
	4311	STATIC bool
	4312	S_isLB(pTHX_ LB_enum before,
	4313	LB_enum after,
	4314	const U8 * const strbeg,
	4315	const U8 * const curpos,
	4316	const U8 * const strend,
	4317	const bool utf8_target)
	4318	{
	4319	U8 * temp_pos = (U8 *) curpos;
	4320	LB_enum prev = before;
	4321
	4322	/* Is the boundary between 'before' and 'after' line-breakable?
	4323	* Most of this is just a table lookup of a generated table from Unicode
	4324	* rules. But some rules require context to decide, and so have to be
	4325	* implemented in code */
	4326
	4327	PERL_ARGS_ASSERT_ISLB;
	4328
	4329	/* Rule numbers in the comments below are as of Unicode 8.0 */
	4330
	4331	redo:
	4332	before = prev;
	4333	switch (LB_table[before][after]) {
	4334	case LB_BREAKABLE:
	4335	return TRUE;
	4336
	4337	case LB_NOBREAK:
	4338	case LB_NOBREAK_EVEN_WITH_SP_BETWEEN:
	4339	return FALSE;
	4340
	4341	case LB_SP_foo + LB_BREAKABLE:
	4342	case LB_SP_foo + LB_NOBREAK:
	4343	case LB_SP_foo + LB_NOBREAK_EVEN_WITH_SP_BETWEEN:
	4344
	4345	/* When we have something following a SP, we have to look at the
	4346	* context in order to know what to do.
	4347	*
	4348	* SP SP should not reach here because LB7: Do not break before
	4349	* spaces. (For two spaces in a row there is nothing that
	4350	* overrides that) */
	4351	assert(after != LB_Space);
	4352
	4353	/* Here we have a space followed by a non-space. Mostly this is a
	4354	* case of LB18: "Break after spaces". But there are complications
	4355	* as the handling of spaces is somewhat tricky. They are in a
	4356	* number of rules, which have to be applied in priority order, but
	4357	* something earlier in the string can cause a rule to be skipped
	4358	* and a lower priority rule invoked. A prime example is LB7 which
	4359	* says don't break before a space. But rule LB8 (lower priority)
	4360	* says that the first break opportunity after a ZW is after any
	4361	* span of spaces immediately after it. If a ZW comes before a SP
	4362	* in the input, rule LB8 applies, and not LB7. Other such rules
	4363	* involve combining marks which are rules 9 and 10, but they may
	4364	* override higher priority rules if they come earlier in the
	4365	* string. Since we're doing random access into the middle of the
	4366	* string, we have to look for rules that should get applied based
	4367	* on both string position and priority. Combining marks do not
	4368	* attach to either ZW nor SP, so we don't have to consider them
	4369	* until later.
	4370	*
	4371	* To check for LB8, we have to find the first non-space character
	4372	* before this span of spaces */
	4373	do {
	4374	prev = backup_one_LB(strbeg, &temp_pos, utf8_target);
	4375	}
	4376	while (prev == LB_Space);
	4377
	4378	/* LB8 Break before any character following a zero-width space,
	4379	* even if one or more spaces intervene.
	4380	* ZW SP* ÷
	4381	* So if we have a ZW just before this span, and to get here this
	4382	* is the final space in the span. */
	4383	if (prev == LB_ZWSpace) {
	4384	return TRUE;
	4385	}
	4386
	4387	/* Here, not ZW SP+. There are several rules that have higher
	4388	* priority than LB18 and can be resolved now, as they don't depend
	4389	* on anything earlier in the string (except ZW, which we have
	4390	* already handled). One of these rules is LB11 Do not break
	4391	* before Word joiner, but we have specially encoded that in the
	4392	* lookup table so it is caught by the single test below which
	4393	* catches the other ones. */
	4394	if (LB_table[LB_Space][after] - LB_SP_foo
	4395	== LB_NOBREAK_EVEN_WITH_SP_BETWEEN)
	4396	{
	4397	return FALSE;
	4398	}
	4399
	4400	/* If we get here, we have to XXX consider combining marks. */
	4401	if (prev == LB_Combining_Mark) {
	4402
	4403	/* What happens with these depends on the character they
	4404	* follow. */
	4405	do {
	4406	prev = backup_one_LB(strbeg, &temp_pos, utf8_target);
	4407	}
	4408	while (prev == LB_Combining_Mark);
	4409
	4410	/* Most times these attach to and inherit the characteristics
	4411	* of that character, but not always, and when not, they are to
	4412	* be treated as AL by rule LB10. */
	4413	if (! LB_CM_ATTACHES_TO(prev)) {
	4414	prev = LB_Alphabetic;
	4415	}
	4416	}
	4417
	4418	/* Here, we have the character preceding the span of spaces all set
	4419	* up. We follow LB18: "Break after spaces" unless the table shows
	4420	* that is overriden */
	4421	return LB_table[prev][after] != LB_NOBREAK_EVEN_WITH_SP_BETWEEN;
	4422
	4423	case LB_CM_foo:
	4424
	4425	/* We don't know how to treat the CM except by looking at the first
	4426	* non-CM character preceding it */
	4427	do {
	4428	prev = backup_one_LB(strbeg, &temp_pos, utf8_target);
	4429	}
	4430	while (prev == LB_Combining_Mark);
	4431
	4432	/* Here, 'prev' is that first earlier non-CM character. If the CM
	4433	* attatches to it, then it inherits the behavior of 'prev'. If it
	4434	* doesn't attach, it is to be treated as an AL */
	4435	if (! LB_CM_ATTACHES_TO(prev)) {
	4436	prev = LB_Alphabetic;
	4437	}
	4438
	4439	goto redo;
	4440
	4441	case LB_HY_or_BA_then_foo + LB_BREAKABLE:
	4442	case LB_HY_or_BA_then_foo + LB_NOBREAK:
	4443
	4444	/* LB21a Don't break after Hebrew + Hyphen.
	4445	* HL (HY \| BA) × */
	4446
	4447	if (backup_one_LB(strbeg, &temp_pos, utf8_target)
	4448	== LB_Hebrew_Letter)
	4449	{
	4450	return FALSE;
	4451	}
	4452
	4453	return LB_table[prev][after] - LB_HY_or_BA_then_foo == LB_BREAKABLE;
	4454
	4455	case LB_PR_or_PO_then_OP_or_HY + LB_BREAKABLE:
	4456	case LB_PR_or_PO_then_OP_or_HY + LB_NOBREAK:
	4457
	4458	/* LB25a (PR \| PO) × ( OP \| HY )? NU */
	4459	if (advance_one_LB(&temp_pos, strend, utf8_target) == LB_Numeric) {
	4460	return FALSE;
	4461	}
	4462
	4463	return LB_table[prev][after] - LB_PR_or_PO_then_OP_or_HY
	4464	== LB_BREAKABLE;
	4465
	4466	case LB_SY_or_IS_then_various + LB_BREAKABLE:
	4467	case LB_SY_or_IS_then_various + LB_NOBREAK:
	4468	{
	4469	/* LB25d NU (SY \| IS)* × (NU \| SY \| IS \| CL \| CP ) */
	4470
	4471	LB_enum temp = prev;
	4472	do {
	4473	temp = backup_one_LB(strbeg, &temp_pos, utf8_target);
	4474	}
	4475	while (temp == LB_Break_Symbols \|\| temp == LB_Infix_Numeric);
	4476	if (temp == LB_Numeric) {
	4477	return FALSE;
	4478	}
	4479
	4480	return LB_table[prev][after] - LB_SY_or_IS_then_various
	4481	== LB_BREAKABLE;
	4482	}
	4483
	4484	case LB_various_then_PO_or_PR + LB_BREAKABLE:
	4485	case LB_various_then_PO_or_PR + LB_NOBREAK:
	4486	{
	4487	/* LB25e NU (SY \| IS)* (CL \| CP)? × (PO \| PR) */
	4488
	4489	LB_enum temp = prev;
	4490	if (temp == LB_Close_Punctuation \|\| temp == LB_Close_Parenthesis)
	4491	{
	4492	temp = backup_one_LB(strbeg, &temp_pos, utf8_target);
	4493	}
	4494	while (temp == LB_Break_Symbols \|\| temp == LB_Infix_Numeric) {
	4495	temp = backup_one_LB(strbeg, &temp_pos, utf8_target);
	4496	}
	4497	if (temp == LB_Numeric) {
	4498	return FALSE;
	4499	}
	4500	return LB_various_then_PO_or_PR;
	4501	}
	4502
	4503	default:
	4504	break;
	4505	}
	4506
	4507	#ifdef DEBUGGING
	4508	Perl_re_printf( aTHX_ "Unhandled LB pair: LB_table[%d, %d] = %d\n",
	4509	before, after, LB_table[before][after]);
	4510	assert(0);
	4511	#endif
	4512	return TRUE;
	4513	}
	4514
	4515	STATIC LB_enum
	4516	S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
	4517	{
	4518	LB_enum lb;
	4519
	4520	PERL_ARGS_ASSERT_ADVANCE_ONE_LB;
	4521
	4522	if (*curpos >= strend) {
	4523	return LB_EDGE;
	4524	}
	4525
	4526	if (utf8_target) {
	4527	curpos += UTF8SKIP(curpos);
	4528	if (*curpos >= strend) {
	4529	return LB_EDGE;
	4530	}
	4531	lb = getLB_VAL_UTF8(*curpos, strend);
	4532	}
	4533	else {
	4534	(*curpos)++;
	4535	if (*curpos >= strend) {
	4536	return LB_EDGE;
	4537	}
	4538	lb = getLB_VAL_CP(**curpos);
	4539	}
	4540
	4541	return lb;
	4542	}
	4543
	4544	STATIC LB_enum
	4545	S_backup_one_LB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
	4546	{
	4547	LB_enum lb;
	4548
	4549	PERL_ARGS_ASSERT_BACKUP_ONE_LB;
	4550
	4551	if (*curpos < strbeg) {
	4552	return LB_EDGE;
	4553	}
	4554
	4555	if (utf8_target) {
	4556	U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
	4557	U8 * prev_prev_char_pos;
	4558
	4559	if (! prev_char_pos) {
	4560	return LB_EDGE;
	4561	}
	4562
	4563	if ((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos, -1, strbeg))) {
	4564	lb = getLB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
	4565	*curpos = prev_char_pos;
	4566	prev_char_pos = prev_prev_char_pos;
	4567	}
	4568	else {
	4569	curpos = (U8 ) strbeg;
	4570	return LB_EDGE;
	4571	}
	4572	}
	4573	else {
	4574	if (*curpos - 2 < strbeg) {
	4575	curpos = (U8 ) strbeg;
	4576	return LB_EDGE;
	4577	}
	4578	(*curpos)--;
	4579	lb = getLB_VAL_CP((curpos - 1));
	4580	}
	4581
	4582	return lb;
	4583	}
	4584
	4585	STATIC bool
	4586	S_isSB(pTHX_ SB_enum before,
	4587	SB_enum after,
	4588	const U8 * const strbeg,
	4589	const U8 * const curpos,
	4590	const U8 * const strend,
	4591	const bool utf8_target)
	4592	{
	4593	/* returns a boolean indicating if there is a Sentence Boundary Break
	4594	* between the inputs. See http://www.unicode.org/reports/tr29/ */
	4595
	4596	U8 * lpos = (U8 *) curpos;
	4597	bool has_para_sep = FALSE;
	4598	bool has_sp = FALSE;
	4599
	4600	PERL_ARGS_ASSERT_ISSB;
	4601
	4602	/* Break at the start and end of text.
	4603	SB1. sot ÷
	4604	SB2. ÷ eot
	4605	But unstated in Unicode is don't break if the text is empty */
	4606	if (before == SB_EDGE \|\| after == SB_EDGE) {
	4607	return before != after;
	4608	}
	4609
	4610	/* SB 3: Do not break within CRLF. */
	4611	if (before == SB_CR && after == SB_LF) {
	4612	return FALSE;
	4613	}
	4614
	4615	/* Break after paragraph separators. CR and LF are considered
	4616	* so because Unicode views text as like word processing text where there
	4617	* are no newlines except between paragraphs, and the word processor takes
	4618	* care of wrapping without there being hard line-breaks in the text *./
	4619	SB4. Sep \| CR \| LF ÷ */
	4620	if (before == SB_Sep \|\| before == SB_CR \|\| before == SB_LF) {
	4621	return TRUE;
	4622	}
	4623
	4624	/* Ignore Format and Extend characters, except after sot, Sep, CR, or LF.
	4625	* (See Section 6.2, Replacing Ignore Rules.)
	4626	SB5. X (Extend \| Format)* → X */
	4627	if (after == SB_Extend \|\| after == SB_Format) {
	4628
	4629	/* Implied is that the these characters attach to everything
	4630	* immediately prior to them except for those separator-type
	4631	* characters. And the rules earlier have already handled the case
	4632	* when one of those immediately precedes the extend char */
	4633	return FALSE;
	4634	}
	4635
	4636	if (before == SB_Extend \|\| before == SB_Format) {
	4637	U8 * temp_pos = lpos;
	4638	const SB_enum backup = backup_one_SB(strbeg, &temp_pos, utf8_target);
	4639	if ( backup != SB_EDGE
	4640	&& backup != SB_Sep
	4641	&& backup != SB_CR
	4642	&& backup != SB_LF)
	4643	{
	4644	before = backup;
	4645	lpos = temp_pos;
	4646	}
	4647
	4648	/* Here, both 'before' and 'backup' are these types; implied is that we
	4649	* don't break between them */
	4650	if (backup == SB_Extend \|\| backup == SB_Format) {
	4651	return FALSE;
	4652	}
	4653	}
	4654
	4655	/* Do not break after ambiguous terminators like period, if they are
	4656	* immediately followed by a number or lowercase letter, if they are
	4657	* between uppercase letters, if the first following letter (optionally
	4658	* after certain punctuation) is lowercase, or if they are followed by
	4659	* "continuation" punctuation such as comma, colon, or semicolon. For
	4660	* example, a period may be an abbreviation or numeric period, and thus may
	4661	* not mark the end of a sentence.
	4662
	4663	* SB6. ATerm × Numeric */
	4664	if (before == SB_ATerm && after == SB_Numeric) {
	4665	return FALSE;
	4666	}
	4667
	4668	/* SB7. (Upper \| Lower) ATerm × Upper */
	4669	if (before == SB_ATerm && after == SB_Upper) {
	4670	U8 * temp_pos = lpos;
	4671	SB_enum backup = backup_one_SB(strbeg, &temp_pos, utf8_target);
	4672	if (backup == SB_Upper \|\| backup == SB_Lower) {
	4673	return FALSE;
	4674	}
	4675	}
	4676
	4677	/* The remaining rules that aren't the final one, all require an STerm or
	4678	* an ATerm after having backed up over some Close* Sp*, and in one case an
	4679	* optional Paragraph separator, although one rule doesn't have any Sp's in it.
	4680	* So do that backup now, setting flags if either Sp or a paragraph
	4681	* separator are found */
	4682
	4683	if (before == SB_Sep \|\| before == SB_CR \|\| before == SB_LF) {
	4684	has_para_sep = TRUE;
	4685	before = backup_one_SB(strbeg, &lpos, utf8_target);
	4686	}
	4687
	4688	if (before == SB_Sp) {
	4689	has_sp = TRUE;
	4690	do {
	4691	before = backup_one_SB(strbeg, &lpos, utf8_target);
	4692	}
	4693	while (before == SB_Sp);
	4694	}
	4695
	4696	while (before == SB_Close) {
	4697	before = backup_one_SB(strbeg, &lpos, utf8_target);
	4698	}
	4699
	4700	/* The next few rules apply only when the backed-up-to is an ATerm, and in
	4701	* most cases an STerm */
	4702	if (before == SB_STerm \|\| before == SB_ATerm) {
	4703
	4704	/* So, here the lhs matches
	4705	* (STerm \| ATerm) Close* Sp* (Sep \| CR \| LF)?
	4706	* and we have set flags if we found an Sp, or the optional Sep,CR,LF.
	4707	* The rules that apply here are:
	4708	*
	4709	* SB8 ATerm Close* Sp* × ( ¬(OLetter \| Upper \| Lower \| Sep \| CR
	4710	\| LF \| STerm \| ATerm) )* Lower
	4711	SB8a (STerm \| ATerm) Close* Sp* × (SContinue \| STerm \| ATerm)
	4712	SB9 (STerm \| ATerm) Close* × (Close \| Sp \| Sep \| CR \| LF)
	4713	SB10 (STerm \| ATerm) Close* Sp* × (Sp \| Sep \| CR \| LF)
	4714	SB11 (STerm \| ATerm) Close* Sp* (Sep \| CR \| LF)? ÷
	4715	*/
	4716
	4717	/* And all but SB11 forbid having seen a paragraph separator */
	4718	if (! has_para_sep) {
	4719	if (before == SB_ATerm) { /* SB8 */
	4720	U8 * rpos = (U8 *) curpos;
	4721	SB_enum later = after;
	4722
	4723	while ( later != SB_OLetter
	4724	&& later != SB_Upper
	4725	&& later != SB_Lower
	4726	&& later != SB_Sep
	4727	&& later != SB_CR
	4728	&& later != SB_LF
	4729	&& later != SB_STerm
	4730	&& later != SB_ATerm
	4731	&& later != SB_EDGE)
	4732	{
	4733	later = advance_one_SB(&rpos, strend, utf8_target);
	4734	}
	4735	if (later == SB_Lower) {
	4736	return FALSE;
	4737	}
	4738	}
	4739
	4740	if ( after == SB_SContinue /* SB8a */
	4741	\|\| after == SB_STerm
	4742	\|\| after == SB_ATerm)
	4743	{
	4744	return FALSE;
	4745	}
	4746
	4747	if (! has_sp) { /* SB9 applies only if there was no Sp* */
	4748	if ( after == SB_Close
	4749	\|\| after == SB_Sp
	4750	\|\| after == SB_Sep
	4751	\|\| after == SB_CR
	4752	\|\| after == SB_LF)
	4753	{
	4754	return FALSE;
	4755	}
	4756	}
	4757
	4758	/* SB10. This and SB9 could probably be combined some way, but khw
	4759	* has decided to follow the Unicode rule book precisely for
	4760	* simplified maintenance */
	4761	if ( after == SB_Sp
	4762	\|\| after == SB_Sep
	4763	\|\| after == SB_CR
	4764	\|\| after == SB_LF)
	4765	{
	4766	return FALSE;
	4767	}
	4768	}
	4769
	4770	/* SB11. */
	4771	return TRUE;
	4772	}
	4773
	4774	/* Otherwise, do not break.
	4775	SB12. Any × Any */
	4776
	4777	return FALSE;
	4778	}
	4779
	4780	STATIC SB_enum
	4781	S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
	4782	{
	4783	SB_enum sb;
	4784
	4785	PERL_ARGS_ASSERT_ADVANCE_ONE_SB;
	4786
	4787	if (*curpos >= strend) {
	4788	return SB_EDGE;
	4789	}
	4790
	4791	if (utf8_target) {
	4792	do {
	4793	curpos += UTF8SKIP(curpos);
	4794	if (*curpos >= strend) {
	4795	return SB_EDGE;
	4796	}
	4797	sb = getSB_VAL_UTF8(*curpos, strend);
	4798	} while (sb == SB_Extend \|\| sb == SB_Format);
	4799	}
	4800	else {
	4801	do {
	4802	(*curpos)++;
	4803	if (*curpos >= strend) {
	4804	return SB_EDGE;
	4805	}
	4806	sb = getSB_VAL_CP(**curpos);
	4807	} while (sb == SB_Extend \|\| sb == SB_Format);
	4808	}
	4809
	4810	return sb;
	4811	}
	4812
	4813	STATIC SB_enum
	4814	S_backup_one_SB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
	4815	{
	4816	SB_enum sb;
	4817
	4818	PERL_ARGS_ASSERT_BACKUP_ONE_SB;
	4819
	4820	if (*curpos < strbeg) {
	4821	return SB_EDGE;
	4822	}
	4823
	4824	if (utf8_target) {
	4825	U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
	4826	if (! prev_char_pos) {
	4827	return SB_EDGE;
	4828	}
	4829
	4830	/* Back up over Extend and Format. curpos is always just to the right
	4831	* of the characater whose value we are getting */
	4832	do {
	4833	U8 * prev_prev_char_pos;
	4834	if ((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos, -1,
	4835	strbeg)))
	4836	{
	4837	sb = getSB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
	4838	*curpos = prev_char_pos;
	4839	prev_char_pos = prev_prev_char_pos;
	4840	}
	4841	else {
	4842	curpos = (U8 ) strbeg;
	4843	return SB_EDGE;
	4844	}
	4845	} while (sb == SB_Extend \|\| sb == SB_Format);
	4846	}
	4847	else {
	4848	do {
	4849	if (*curpos - 2 < strbeg) {
	4850	curpos = (U8 ) strbeg;
	4851	return SB_EDGE;
	4852	}
	4853	(*curpos)--;
	4854	sb = getSB_VAL_CP((curpos - 1));
	4855	} while (sb == SB_Extend \|\| sb == SB_Format);
	4856	}
	4857
	4858	return sb;
	4859	}
	4860
	4861	STATIC bool
	4862	S_isWB(pTHX_ WB_enum previous,
	4863	WB_enum before,
	4864	WB_enum after,
	4865	const U8 * const strbeg,
	4866	const U8 * const curpos,
	4867	const U8 * const strend,
	4868	const bool utf8_target)
	4869	{
	4870	/* Return a boolean as to if the boundary between 'before' and 'after' is
	4871	* a Unicode word break, using their published algorithm, but tailored for
	4872	* Perl by treating spans of white space as one unit. Context may be
	4873	* needed to make this determination. If the value for the character
	4874	* before 'before' is known, it is passed as 'previous'; otherwise that
	4875	* should be set to WB_UNKNOWN. The other input parameters give the
	4876	* boundaries and current position in the matching of the string. That
	4877	* is, 'curpos' marks the position where the character whose wb value is
	4878	* 'after' begins. See http://www.unicode.org/reports/tr29/ */
	4879
	4880	U8 * before_pos = (U8 *) curpos;
	4881	U8 * after_pos = (U8 *) curpos;
	4882	WB_enum prev = before;
	4883	WB_enum next;
	4884
	4885	PERL_ARGS_ASSERT_ISWB;
	4886
	4887	/* Rule numbers in the comments below are as of Unicode 8.0 */
	4888
	4889	redo:
	4890	before = prev;
	4891	switch (WB_table[before][after]) {
	4892	case WB_BREAKABLE:
	4893	return TRUE;
	4894
	4895	case WB_NOBREAK:
	4896	return FALSE;
	4897
	4898	case WB_hs_then_hs: /* 2 horizontal spaces in a row */
	4899	next = advance_one_WB(&after_pos, strend, utf8_target,
	4900	FALSE /* Don't skip Extend nor Format */ );
	4901	/* A space immediately preceeding an Extend or Format is attached
	4902	* to by them, and hence gets separated from previous spaces.
	4903	* Otherwise don't break between horizontal white space */
	4904	return next == WB_Extend \|\| next == WB_Format;
	4905
	4906	/* WB4 Ignore Format and Extend characters, except when they appear at
	4907	* the beginning of a region of text. This code currently isn't
	4908	* general purpose, but it works as the rules are currently and likely
	4909	* to be laid out. The reason it works is that when 'they appear at
	4910	* the beginning of a region of text', the rule is to break before
	4911	* them, just like any other character. Therefore, the default rule
	4912	* applies and we don't have to look in more depth. Should this ever
	4913	* change, we would have to have 2 'case' statements, like in the
	4914	* rules below, and backup a single character (not spacing over the
	4915	* extend ones) and then see if that is one of the region-end
	4916	* characters and go from there */
	4917	case WB_Ex_or_FO_then_foo:
	4918	prev = backup_one_WB(&previous, strbeg, &before_pos, utf8_target);
	4919	goto redo;
	4920
	4921	case WB_DQ_then_HL + WB_BREAKABLE:
	4922	case WB_DQ_then_HL + WB_NOBREAK:
	4923
	4924	/* WB7c Hebrew_Letter Double_Quote × Hebrew_Letter */
	4925
	4926	if (backup_one_WB(&previous, strbeg, &before_pos, utf8_target)
	4927	== WB_Hebrew_Letter)
	4928	{
	4929	return FALSE;
	4930	}
	4931
	4932	return WB_table[before][after] - WB_DQ_then_HL == WB_BREAKABLE;
	4933
	4934	case WB_HL_then_DQ + WB_BREAKABLE:
	4935	case WB_HL_then_DQ + WB_NOBREAK:
	4936
	4937	/* WB7b Hebrew_Letter × Double_Quote Hebrew_Letter */
	4938
	4939	if (advance_one_WB(&after_pos, strend, utf8_target,
	4940	TRUE /* Do skip Extend and Format */ )
	4941	== WB_Hebrew_Letter)
	4942	{
	4943	return FALSE;
	4944	}
	4945
	4946	return WB_table[before][after] - WB_HL_then_DQ == WB_BREAKABLE;
	4947
	4948	case WB_LE_or_HL_then_MB_or_ML_or_SQ + WB_NOBREAK:
	4949	case WB_LE_or_HL_then_MB_or_ML_or_SQ + WB_BREAKABLE:
	4950
	4951	/* WB6 (ALetter \| Hebrew_Letter) × (MidLetter \| MidNumLet
	4952	* \| Single_Quote) (ALetter \| Hebrew_Letter) */
	4953
	4954	next = advance_one_WB(&after_pos, strend, utf8_target,
	4955	TRUE /* Do skip Extend and Format */ );
	4956
	4957	if (next == WB_ALetter \|\| next == WB_Hebrew_Letter)
	4958	{
	4959	return FALSE;
	4960	}
	4961
	4962	return WB_table[before][after]
	4963	- WB_LE_or_HL_then_MB_or_ML_or_SQ == WB_BREAKABLE;
	4964
	4965	case WB_MB_or_ML_or_SQ_then_LE_or_HL + WB_NOBREAK:
	4966	case WB_MB_or_ML_or_SQ_then_LE_or_HL + WB_BREAKABLE:
	4967
	4968	/* WB7 (ALetter \| Hebrew_Letter) (MidLetter \| MidNumLet
	4969	* \| Single_Quote) × (ALetter \| Hebrew_Letter) */
	4970
	4971	prev = backup_one_WB(&previous, strbeg, &before_pos, utf8_target);
	4972	if (prev == WB_ALetter \|\| prev == WB_Hebrew_Letter)
	4973	{
	4974	return FALSE;
	4975	}
	4976
	4977	return WB_table[before][after]
	4978	- WB_MB_or_ML_or_SQ_then_LE_or_HL == WB_BREAKABLE;
	4979
	4980	case WB_MB_or_MN_or_SQ_then_NU + WB_NOBREAK:
	4981	case WB_MB_or_MN_or_SQ_then_NU + WB_BREAKABLE:
	4982
	4983	/* WB11 Numeric (MidNum \| (MidNumLet \| Single_Quote)) × Numeric
	4984	* */
	4985
	4986	if (backup_one_WB(&previous, strbeg, &before_pos, utf8_target)
	4987	== WB_Numeric)
	4988	{
	4989	return FALSE;
	4990	}
	4991
	4992	return WB_table[before][after]
	4993	- WB_MB_or_MN_or_SQ_then_NU == WB_BREAKABLE;
	4994
	4995	case WB_NU_then_MB_or_MN_or_SQ + WB_NOBREAK:
	4996	case WB_NU_then_MB_or_MN_or_SQ + WB_BREAKABLE:
	4997
	4998	/* WB12 Numeric × (MidNum \| MidNumLet \| Single_Quote) Numeric */
	4999
	5000	if (advance_one_WB(&after_pos, strend, utf8_target,
	5001	TRUE /* Do skip Extend and Format */ )
	5002	== WB_Numeric)
	5003	{
	5004	return FALSE;
	5005	}
	5006
	5007	return WB_table[before][after]
	5008	- WB_NU_then_MB_or_MN_or_SQ == WB_BREAKABLE;
	5009
	5010	default:
	5011	break;
	5012	}
	5013
	5014	#ifdef DEBUGGING
	5015	Perl_re_printf( aTHX_ "Unhandled WB pair: WB_table[%d, %d] = %d\n",
	5016	before, after, WB_table[before][after]);
	5017	assert(0);
	5018	#endif
	5019	return TRUE;
	5020	}
	5021
	5022	STATIC WB_enum
	5023	S_advance_one_WB(pTHX_ U8 ** curpos,
	5024	const U8 * const strend,
	5025	const bool utf8_target,
	5026	const bool skip_Extend_Format)
	5027	{
	5028	WB_enum wb;
	5029
	5030	PERL_ARGS_ASSERT_ADVANCE_ONE_WB;
	5031
	5032	if (*curpos >= strend) {
	5033	return WB_EDGE;
	5034	}
	5035
	5036	if (utf8_target) {
	5037
	5038	/* Advance over Extend and Format */
	5039	do {
	5040	curpos += UTF8SKIP(curpos);
	5041	if (*curpos >= strend) {
	5042	return WB_EDGE;
	5043	}
	5044	wb = getWB_VAL_UTF8(*curpos, strend);
	5045	} while ( skip_Extend_Format
	5046	&& (wb == WB_Extend \|\| wb == WB_Format));
	5047	}
	5048	else {
	5049	do {
	5050	(*curpos)++;
	5051	if (*curpos >= strend) {
	5052	return WB_EDGE;
	5053	}
	5054	wb = getWB_VAL_CP(**curpos);
	5055	} while ( skip_Extend_Format
	5056	&& (wb == WB_Extend \|\| wb == WB_Format));
	5057	}
	5058
	5059	return wb;
	5060	}
	5061
	5062	STATIC WB_enum
	5063	S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
	5064	{
	5065	WB_enum wb;
	5066
	5067	PERL_ARGS_ASSERT_BACKUP_ONE_WB;
	5068
	5069	/* If we know what the previous character's break value is, don't have
	5070	* to look it up */
	5071	if (*previous != WB_UNKNOWN) {
	5072	wb = *previous;
	5073
	5074	/* But we need to move backwards by one */
	5075	if (utf8_target) {
	5076	curpos = reghopmaybe3(curpos, -1, strbeg);
	5077	if (! *curpos) {
	5078	*previous = WB_EDGE;
	5079	curpos = (U8 ) strbeg;
	5080	}
	5081	else {
	5082	*previous = WB_UNKNOWN;
	5083	}
	5084	}
	5085	else {
	5086	(*curpos)--;
	5087	previous = (curpos <= strbeg) ? WB_EDGE : WB_UNKNOWN;
	5088	}
	5089
	5090	/* And we always back up over these two types */
	5091	if (wb != WB_Extend && wb != WB_Format) {
	5092	return wb;
	5093	}
	5094	}
	5095
	5096	if (*curpos < strbeg) {
	5097	return WB_EDGE;
	5098	}
	5099
	5100	if (utf8_target) {
	5101	U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
	5102	if (! prev_char_pos) {
	5103	return WB_EDGE;
	5104	}
	5105
	5106	/* Back up over Extend and Format. curpos is always just to the right
	5107	* of the characater whose value we are getting */
	5108	do {
	5109	U8 * prev_prev_char_pos;
	5110	if ((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos,
	5111	-1,
	5112	strbeg)))
	5113	{
	5114	wb = getWB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
	5115	*curpos = prev_char_pos;
	5116	prev_char_pos = prev_prev_char_pos;
	5117	}
	5118	else {
	5119	curpos = (U8 ) strbeg;
	5120	return WB_EDGE;
	5121	}
	5122	} while (wb == WB_Extend \|\| wb == WB_Format);
	5123	}
	5124	else {
	5125	do {
	5126	if (*curpos - 2 < strbeg) {
	5127	curpos = (U8 ) strbeg;
	5128	return WB_EDGE;
	5129	}
	5130	(*curpos)--;
	5131	wb = getWB_VAL_CP((curpos - 1));
	5132	} while (wb == WB_Extend \|\| wb == WB_Format);
	5133	}
	5134
	5135	return wb;
	5136	}
	5137
	5138	#define EVAL_CLOSE_PAREN_IS(st,expr) \
	5139	( \
	5140	( ( st ) ) && \
	5141	( ( st )->u.eval.close_paren ) && \
	5142	( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) ) \
	5143	)
	5144
	5145	#define EVAL_CLOSE_PAREN_IS_TRUE(st,expr) \
	5146	( \
	5147	( ( st ) ) && \
	5148	( ( st )->u.eval.close_paren ) && \
	5149	( ( expr ) ) && \
	5150	( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) ) \
	5151	)
	5152
	5153
	5154	#define EVAL_CLOSE_PAREN_SET(st,expr) \
	5155	(st)->u.eval.close_paren = ( (expr) + 1 )
	5156
	5157	#define EVAL_CLOSE_PAREN_CLEAR(st) \
	5158	(st)->u.eval.close_paren = 0
	5159
	5160	/* returns -1 on failure, $+[0] on success */
	5161	STATIC SSize_t
	5162	S_regmatch(pTHX_ regmatch_info reginfo, char startpos, regnode *prog)
	5163	{
	5164
	5165	#if PERL_VERSION < 9 && !defined(PERL_CORE)
	5166	dMY_CXT;
	5167	#endif
	5168	dVAR;
	5169	const bool utf8_target = reginfo->is_utf8_target;
	5170	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	5171	REGEXP *rex_sv = reginfo->prog;
	5172	regexp *rex = ReANY(rex_sv);
	5173	RXi_GET_DECL(rex,rexi);
	5174	/* the current state. This is a cached copy of PL_regmatch_state */
	5175	regmatch_state *st;
	5176	/* cache heavy used fields of st in registers */
	5177	regnode *scan;
	5178	regnode *next;
	5179	U32 n = 0; /* general value; init to avoid compiler warning */
	5180	SSize_t ln = 0; /* len or last; init to avoid compiler warning */
	5181	char *locinput = startpos;
	5182	char pushinput; / where to continue after a PUSH */
	5183	I32 nextchr; /* is always set to UCHARAT(locinput), or -1 at EOS */
	5184
	5185	bool result = 0; /* return value of S_regmatch */
	5186	int depth = 0; /* depth of backtrack stack */
	5187	U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
	5188	const U32 max_nochange_depth =
	5189	(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
	5190	3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
	5191	regmatch_state yes_state = NULL; / state to pop to on success of
	5192	subpattern */
	5193	/* mark_state piggy backs on the yes_state logic so that when we unwind
	5194	the stack on success we can update the mark_state as we go */
	5195	regmatch_state mark_state = NULL; / last mark state we have seen */
	5196	regmatch_state cur_eval = NULL; / most recent EVAL_AB state */
	5197	struct regmatch_state cur_curlyx = NULL; / most recent curlyx */
	5198	U32 state_num;
	5199	bool no_final = 0; /* prevent failure from backtracking? */
	5200	bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
	5201	char *startpoint = locinput;
	5202	SV popmark = NULL; / are we looking for a mark? */
	5203	SV sv_commit = NULL; / last mark name seen in failure */
	5204	SV sv_yes_mark = NULL; / last mark name we have seen
	5205	during a successful match */
	5206	U32 lastopen = 0; /* last open we saw */
	5207	bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
	5208	SV* const oreplsv = GvSVn(PL_replgv);
	5209	/* these three flags are set by various ops to signal information to
	5210	* the very next op. They have a useful lifetime of exactly one loop
	5211	* iteration, and are not preserved or restored by state pushes/pops
	5212	*/
	5213	bool sw = 0; /* the condition value in (?(cond)a\|b) */
	5214	bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
	5215	int logical = 0; /* the following EVAL is:
	5216	0: (?{...})
	5217	1: (?(?{...})X\|Y)
	5218	2: (??{...})
	5219	or the following IFMATCH/UNLESSM is:
	5220	false: plain (?=foo)
	5221	true: used as a condition: (?(?=foo))
	5222	*/
	5223	PAD* last_pad = NULL;
	5224	dMULTICALL;
	5225	U8 gimme = G_SCALAR;
	5226	CV caller_cv = NULL; / who called us */
	5227	CV last_pushed_cv = NULL; / most recently called (?{}) CV */
	5228	CHECKPOINT runops_cp; /* savestack position before executing EVAL */
	5229	U32 maxopenparen = 0; /* max '(' index seen so far */
	5230	int to_complement; /* Invert the result? */
	5231	_char_class_number classnum;
	5232	bool is_utf8_pat = reginfo->is_utf8_pat;
	5233	bool match = FALSE;
	5234
	5235	/* Solaris Studio 12.3 messes up fetching PL_charclass['\n'] */
	5236	#if (defined(__SUNPRO_C) && (__SUNPRO_C == 0x5120) && defined(__x86_64) && defined(USE_64_BIT_ALL))
	5237	# define SOLARIS_BAD_OPTIMIZER
	5238	const U32 *pl_charclass_dup = PL_charclass;
	5239	# define PL_charclass pl_charclass_dup
	5240	#endif
	5241
	5242	#ifdef DEBUGGING
	5243	GET_RE_DEBUG_FLAGS_DECL;
	5244	#endif
	5245
	5246	/* protect against undef(^R) /
	5247	SAVEFREESV(SvREFCNT_inc_simple_NN(oreplsv));
	5248
	5249	/* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */
	5250	multicall_oldcatch = 0;
	5251	PERL_UNUSED_VAR(multicall_cop);
	5252
	5253	PERL_ARGS_ASSERT_REGMATCH;
	5254
	5255	DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
	5256	Perl_re_printf( aTHX_ "regmatch start\n");
	5257	}));
	5258
	5259	st = PL_regmatch_state;
	5260
	5261	/* Note that nextchr is a byte even in UTF */
	5262	SET_nextchr;
	5263	scan = prog;
	5264	while (scan != NULL) {
	5265
	5266
	5267	next = scan + NEXT_OFF(scan);
	5268	if (next == scan)
	5269	next = NULL;
	5270	state_num = OP(scan);
	5271
	5272	reenter_switch:
	5273	DEBUG_EXECUTE_r(
	5274	if (state_num <= REGNODE_MAX) {
	5275	SV * const prop = sv_newmortal();
	5276	regnode *rnext = regnext(scan);
	5277
	5278	DUMP_EXEC_POS( locinput, scan, utf8_target, depth );
	5279	regprop(rex, prop, scan, reginfo, NULL);
	5280	Perl_re_printf( aTHX_
	5281	"%*s%"IVdf":%s(%"IVdf")\n",
	5282	INDENT_CHARS(depth), "",
	5283	(IV)(scan - rexi->program),
	5284	SvPVX_const(prop),
	5285	(PL_regkind[OP(scan)] == END \|\| !rnext) ?
	5286	0 : (IV)(rnext - rexi->program));
	5287	}
	5288	);
	5289
	5290	to_complement = 0;
	5291
	5292	SET_nextchr;
	5293	assert(nextchr < 256 && (nextchr >= 0 \|\| nextchr == NEXTCHR_EOS));
	5294
	5295	switch (state_num) {
	5296	case SBOL: /* /^../ and /\A../ */
	5297	if (locinput == reginfo->strbeg)
	5298	break;
	5299	sayNO;
	5300
	5301	case MBOL: /* /^../m */
	5302	if (locinput == reginfo->strbeg \|\|
	5303	(!NEXTCHR_IS_EOS && locinput[-1] == '\n'))
	5304	{
	5305	break;
	5306	}
	5307	sayNO;
	5308
	5309	case GPOS: /* \G */
	5310	if (locinput == reginfo->ganch)
	5311	break;
	5312	sayNO;
	5313
	5314	case KEEPS: /* \K */
	5315	/* update the startpoint */
	5316	st->u.keeper.val = rex->offs[0].start;
	5317	rex->offs[0].start = locinput - reginfo->strbeg;
	5318	PUSH_STATE_GOTO(KEEPS_next, next, locinput);
	5319	NOT_REACHED; /* NOTREACHED */
	5320
	5321	case KEEPS_next_fail:
	5322	/* rollback the start point change */
	5323	rex->offs[0].start = st->u.keeper.val;
	5324	sayNO_SILENT;
	5325	NOT_REACHED; /* NOTREACHED */
	5326
	5327	case MEOL: /* /..$/m */
	5328	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	5329	sayNO;
	5330	break;
	5331
	5332	case SEOL: /* /..$/ */
	5333	if (!NEXTCHR_IS_EOS && nextchr != '\n')
	5334	sayNO;
	5335	if (reginfo->strend - locinput > 1)
	5336	sayNO;
	5337	break;
	5338
	5339	case EOS: /* \z */
	5340	if (!NEXTCHR_IS_EOS)
	5341	sayNO;
	5342	break;
	5343
	5344	case SANY: /* /./s */
	5345	if (NEXTCHR_IS_EOS)
	5346	sayNO;
	5347	goto increment_locinput;
	5348
	5349	case REG_ANY: /* /./ */
	5350	if ((NEXTCHR_IS_EOS) \|\| nextchr == '\n')
	5351	sayNO;
	5352	goto increment_locinput;
	5353
	5354
	5355	#undef ST
	5356	#define ST st->u.trie
	5357	case TRIEC: /* (ab\|cd) with known charclass */
	5358	/* In this case the charclass data is available inline so
	5359	we can fail fast without a lot of extra overhead.
	5360	*/
	5361	if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
	5362	DEBUG_EXECUTE_r(
	5363	Perl_re_exec_indentf( aTHX_ "%sfailed to match trie start class...%s\n",
	5364	depth, PL_colors[4], PL_colors[5])
	5365	);
	5366	sayNO_SILENT;
	5367	NOT_REACHED; /* NOTREACHED */
	5368	}
	5369	/* FALLTHROUGH */
	5370	case TRIE: /* (ab\|cd) */
	5371	/* the basic plan of execution of the trie is:
	5372	* At the beginning, run though all the states, and
	5373	* find the longest-matching word. Also remember the position
	5374	* of the shortest matching word. For example, this pattern:
	5375	* 1 2 3 4 5
	5376	* ab\|a\|x\|abcd\|abc
	5377	* when matched against the string "abcde", will generate
	5378	* accept states for all words except 3, with the longest
	5379	* matching word being 4, and the shortest being 2 (with
	5380	* the position being after char 1 of the string).
	5381	*
	5382	* Then for each matching word, in word order (i.e. 1,2,4,5),
	5383	* we run the remainder of the pattern; on each try setting
	5384	* the current position to the character following the word,
	5385	* returning to try the next word on failure.
	5386	*
	5387	* We avoid having to build a list of words at runtime by
	5388	* using a compile-time structure, wordinfo[].prev, which
	5389	* gives, for each word, the previous accepting word (if any).
	5390	* In the case above it would contain the mappings 1->2, 2->0,
	5391	* 3->0, 4->5, 5->1. We can use this table to generate, from
	5392	* the longest word (4 above), a list of all words, by
	5393	* following the list of prev pointers; this gives us the
	5394	* unordered list 4,5,1,2. Then given the current word we have
	5395	* just tried, we can go through the list and find the
	5396	* next-biggest word to try (so if we just failed on word 2,
	5397	* the next in the list is 4).
	5398	*
	5399	* Since at runtime we don't record the matching position in
	5400	* the string for each word, we have to work that out for
	5401	* each word we're about to process. The wordinfo table holds
	5402	* the character length of each word; given that we recorded
	5403	* at the start: the position of the shortest word and its
	5404	* length in chars, we just need to move the pointer the
	5405	* difference between the two char lengths. Depending on
	5406	* Unicode status and folding, that's cheap or expensive.
	5407	*
	5408	* This algorithm is optimised for the case where are only a
	5409	* small number of accept states, i.e. 0,1, or maybe 2.
	5410	* With lots of accepts states, and having to try all of them,
	5411	* it becomes quadratic on number of accept states to find all
	5412	* the next words.
	5413	*/
	5414
	5415	{
	5416	/* what type of TRIE am I? (utf8 makes this contextual) */
	5417	DECL_TRIE_TYPE(scan);
	5418
	5419	/* what trie are we using right now */
	5420	reg_trie_data * const trie
	5421	= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
	5422	HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
	5423	U32 state = trie->startstate;
	5424
	5425	if (scan->flags == EXACTL \|\| scan->flags == EXACTFLU8) {
	5426	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	5427	if (utf8_target
	5428	&& UTF8_IS_ABOVE_LATIN1(nextchr)
	5429	&& scan->flags == EXACTL)
	5430	{
	5431	/* We only output for EXACTL, as we let the folder
	5432	* output this message for EXACTFLU8 to avoid
	5433	* duplication */
	5434	_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput,
	5435	reginfo->strend);
	5436	}
	5437	}
	5438	if ( trie->bitmap
	5439	&& (NEXTCHR_IS_EOS \|\| !TRIE_BITMAP_TEST(trie, nextchr)))
	5440	{
	5441	if (trie->states[ state ].wordnum) {
	5442	DEBUG_EXECUTE_r(
	5443	Perl_re_exec_indentf( aTHX_ "%smatched empty string...%s\n",
	5444	depth, PL_colors[4], PL_colors[5])
	5445	);
	5446	if (!trie->jump)
	5447	break;
	5448	} else {
	5449	DEBUG_EXECUTE_r(
	5450	Perl_re_exec_indentf( aTHX_ "%sfailed to match trie start class...%s\n",
	5451	depth, PL_colors[4], PL_colors[5])
	5452	);
	5453	sayNO_SILENT;
	5454	}
	5455	}
	5456
	5457	{
	5458	U8 uc = ( U8 )locinput;
	5459
	5460	STRLEN len = 0;
	5461	STRLEN foldlen = 0;
	5462	U8 uscan = (U8)NULL;
	5463	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	5464	U32 charcount = 0; /* how many input chars we have matched */
	5465	U32 accepted = 0; /* have we seen any accepting states? */
	5466
	5467	ST.jump = trie->jump;
	5468	ST.me = scan;
	5469	ST.firstpos = NULL;
	5470	ST.longfold = FALSE; /* char longer if folded => it's harder */
	5471	ST.nextword = 0;
	5472
	5473	/* fully traverse the TRIE; note the position of the
	5474	shortest accept state and the wordnum of the longest
	5475	accept state */
	5476
	5477	while ( state && uc <= (U8*)(reginfo->strend) ) {
	5478	U32 base = trie->states[ state ].trans.base;
	5479	UV uvc = 0;
	5480	U16 charid = 0;
	5481	U16 wordnum;
	5482	wordnum = trie->states[ state ].wordnum;
	5483
	5484	if (wordnum) { /* it's an accept state */
	5485	if (!accepted) {
	5486	accepted = 1;
	5487	/* record first match position */
	5488	if (ST.longfold) {
	5489	ST.firstpos = (U8*)locinput;
	5490	ST.firstchars = 0;
	5491	}
	5492	else {
	5493	ST.firstpos = uc;
	5494	ST.firstchars = charcount;
	5495	}
	5496	}
	5497	if (!ST.nextword \|\| wordnum < ST.nextword)
	5498	ST.nextword = wordnum;
	5499	ST.topword = wordnum;
	5500	}
	5501
	5502	DEBUG_TRIE_EXECUTE_r({
	5503	DUMP_EXEC_POS( (char *)uc, scan, utf8_target, depth );
	5504	Perl_re_exec_indentf( aTHX_
	5505	"%sState: %4"UVxf" Accepted: %c ",
	5506	depth, PL_colors[4],
	5507	(UV)state, (accepted ? 'Y' : 'N'));
	5508	});
	5509
	5510	/* read a char and goto next state */
	5511	if ( base && (foldlen \|\| uc < (U8*)(reginfo->strend))) {
	5512	I32 offset;
	5513	REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
	5514	uscan, len, uvc, charid, foldlen,
	5515	foldbuf, uniflags);
	5516	charcount++;
	5517	if (foldlen>0)
	5518	ST.longfold = TRUE;
	5519	if (charid &&
	5520	( ((offset =
	5521	base + charid - 1 - trie->uniquecharcount)) >= 0)
	5522
	5523	&& ((U32)offset < trie->lasttrans)
	5524	&& trie->trans[offset].check == state)
	5525	{
	5526	state = trie->trans[offset].next;
	5527	}
	5528	else {
	5529	state = 0;
	5530	}
	5531	uc += len;
	5532
	5533	}
	5534	else {
	5535	state = 0;
	5536	}
	5537	DEBUG_TRIE_EXECUTE_r(
	5538	Perl_re_printf( aTHX_
	5539	"Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
	5540	charid, uvc, (UV)state, PL_colors[5] );
	5541	);
	5542	}
	5543	if (!accepted)
	5544	sayNO;
	5545
	5546	/* calculate total number of accept states */
	5547	{
	5548	U16 w = ST.topword;
	5549	accepted = 0;
	5550	while (w) {
	5551	w = trie->wordinfo[w].prev;
	5552	accepted++;
	5553	}
	5554	ST.accepted = accepted;
	5555	}
	5556
	5557	DEBUG_EXECUTE_r(
	5558	Perl_re_exec_indentf( aTHX_ "%sgot %"IVdf" possible matches%s\n",
	5559	depth,
	5560	PL_colors[4], (IV)ST.accepted, PL_colors[5] );
	5561	);
	5562	goto trie_first_try; /* jump into the fail handler */
	5563	}}
	5564	NOT_REACHED; /* NOTREACHED */
	5565
	5566	case TRIE_next_fail: /* we failed - try next alternative */
	5567	{
	5568	U8 *uc;
	5569	if ( ST.jump) {
	5570	REGCP_UNWIND(ST.cp);
	5571	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	5572	}
	5573	if (!--ST.accepted) {
	5574	DEBUG_EXECUTE_r({
	5575	Perl_re_exec_indentf( aTHX_ "%sTRIE failed...%s\n",
	5576	depth,
	5577	PL_colors[4],
	5578	PL_colors[5] );
	5579	});
	5580	sayNO_SILENT;
	5581	}
	5582	{
	5583	/* Find next-highest word to process. Note that this code
	5584	* is O(N^2) per trie run (O(N) per branch), so keep tight */
	5585	U16 min = 0;
	5586	U16 word;
	5587	U16 const nextword = ST.nextword;
	5588	reg_trie_wordinfo * const wordinfo
	5589	= ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
	5590	for (word=ST.topword; word; word=wordinfo[word].prev) {
	5591	if (word > nextword && (!min \|\| word < min))
	5592	min = word;
	5593	}
	5594	ST.nextword = min;
	5595	}
	5596
	5597	trie_first_try:
	5598	if (do_cutgroup) {
	5599	do_cutgroup = 0;
	5600	no_final = 0;
	5601	}
	5602
	5603	if ( ST.jump) {
	5604	ST.lastparen = rex->lastparen;
	5605	ST.lastcloseparen = rex->lastcloseparen;
	5606	REGCP_SET(ST.cp);
	5607	}
	5608
	5609	/* find start char of end of current word */
	5610	{
	5611	U32 chars; /* how many chars to skip */
	5612	reg_trie_data * const trie
	5613	= (reg_trie_data*)rexi->data->data[ARG(ST.me)];
	5614
	5615	assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
	5616	>= ST.firstchars);
	5617	chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
	5618	- ST.firstchars;
	5619	uc = ST.firstpos;
	5620
	5621	if (ST.longfold) {
	5622	/* the hard option - fold each char in turn and find
	5623	* its folded length (which may be different */
	5624	U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
	5625	STRLEN foldlen;
	5626	STRLEN len;
	5627	UV uvc;
	5628	U8 *uscan;
	5629
	5630	while (chars) {
	5631	if (utf8_target) {
	5632	uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
	5633	uniflags);
	5634	uc += len;
	5635	}
	5636	else {
	5637	uvc = *uc;
	5638	uc++;
	5639	}
	5640	uvc = to_uni_fold(uvc, foldbuf, &foldlen);
	5641	uscan = foldbuf;
	5642	while (foldlen) {
	5643	if (!--chars)
	5644	break;
	5645	uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
	5646	uniflags);
	5647	uscan += len;
	5648	foldlen -= len;
	5649	}
	5650	}
	5651	}
	5652	else {
	5653	if (utf8_target)
	5654	while (chars--)
	5655	uc += UTF8SKIP(uc);
	5656	else
	5657	uc += chars;
	5658	}
	5659	}
	5660
	5661	scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
	5662	? ST.jump[ST.nextword]
	5663	: NEXT_OFF(ST.me));
	5664
	5665	DEBUG_EXECUTE_r({
	5666	Perl_re_exec_indentf( aTHX_ "%sTRIE matched word #%d, continuing%s\n",
	5667	depth,
	5668	PL_colors[4],
	5669	ST.nextword,
	5670	PL_colors[5]
	5671	);
	5672	});
	5673
	5674	if (ST.accepted > 1 \|\| has_cutgroup) {
	5675	PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
	5676	NOT_REACHED; /* NOTREACHED */
	5677	}
	5678	/* only one choice left - just continue */
	5679	DEBUG_EXECUTE_r({
	5680	AV *const trie_words
	5681	= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
	5682	SV ** const tmp = trie_words
	5683	? av_fetch(trie_words, ST.nextword - 1, 0) : NULL;
	5684	SV *sv= tmp ? sv_newmortal() : NULL;
	5685
	5686	Perl_re_exec_indentf( aTHX_ "%sonly one match left, short-circuiting: #%d <%s>%s\n",
	5687	depth, PL_colors[4],
	5688	ST.nextword,
	5689	tmp ? pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 0,
	5690	PL_colors[0], PL_colors[1],
	5691	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)\|PERL_PV_ESCAPE_NONASCII
	5692	)
	5693	: "not compiled under -Dr",
	5694	PL_colors[5] );
	5695	});
	5696
	5697	locinput = (char*)uc;
	5698	continue; /* execute rest of RE */
	5699	/* NOTREACHED */
	5700	}
	5701	#undef ST
	5702
	5703	case EXACTL: /* /abc/l */
	5704	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	5705
	5706	/* Complete checking would involve going through every character
	5707	* matched by the string to see if any is above latin1. But the
	5708	* comparision otherwise might very well be a fast assembly
	5709	* language routine, and I (khw) don't think slowing things down
	5710	* just to check for this warning is worth it. So this just checks
	5711	* the first character */
	5712	if (utf8_target && UTF8_IS_ABOVE_LATIN1(*locinput)) {
	5713	_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
	5714	}
	5715	/* FALLTHROUGH */
	5716	case EXACT: { /* /abc/ */
	5717	char *s = STRING(scan);
	5718	ln = STR_LEN(scan);
	5719	if (utf8_target != is_utf8_pat) {
	5720	/* The target and the pattern have differing utf8ness. */
	5721	char *l = locinput;
	5722	const char * const e = s + ln;
	5723
	5724	if (utf8_target) {
	5725	/* The target is utf8, the pattern is not utf8.
	5726	* Above-Latin1 code points can't match the pattern;
	5727	* invariants match exactly, and the other Latin1 ones need
	5728	* to be downgraded to a single byte in order to do the
	5729	* comparison. (If we could be confident that the target
	5730	* is not malformed, this could be refactored to have fewer
	5731	* tests by just assuming that if the first bytes match, it
	5732	* is an invariant, but there are tests in the test suite
	5733	* dealing with (??{...}) which violate this) */
	5734	while (s < e) {
	5735	if (l >= reginfo->strend
	5736	\|\| UTF8_IS_ABOVE_LATIN1(* (U8*) l))
	5737	{
	5738	sayNO;
	5739	}
	5740	if (UTF8_IS_INVARIANT((U8)l)) {
	5741	if (l != s) {
	5742	sayNO;
	5743	}
	5744	l++;
	5745	}
	5746	else {
	5747	if (EIGHT_BIT_UTF8_TO_NATIVE(l, (l+1)) != * (U8*) s)
	5748	{
	5749	sayNO;
	5750	}
	5751	l += 2;
	5752	}
	5753	s++;
	5754	}
	5755	}
	5756	else {
	5757	/* The target is not utf8, the pattern is utf8. */
	5758	while (s < e) {
	5759	if (l >= reginfo->strend
	5760	\|\| UTF8_IS_ABOVE_LATIN1(* (U8*) s))
	5761	{
	5762	sayNO;
	5763	}
	5764	if (UTF8_IS_INVARIANT((U8)s)) {
	5765	if (s != l) {
	5766	sayNO;
	5767	}
	5768	s++;
	5769	}
	5770	else {
	5771	if (EIGHT_BIT_UTF8_TO_NATIVE(s, (s+1)) != * (U8*) l)
	5772	{
	5773	sayNO;
	5774	}
	5775	s += 2;
	5776	}
	5777	l++;
	5778	}
	5779	}
	5780	locinput = l;
	5781	}
	5782	else {
	5783	/* The target and the pattern have the same utf8ness. */
	5784	/* Inline the first character, for speed. */
	5785	if (reginfo->strend - locinput < ln
	5786	\|\| UCHARAT(s) != nextchr
	5787	\|\| (ln > 1 && memNE(s, locinput, ln)))
	5788	{
	5789	sayNO;
	5790	}
	5791	locinput += ln;
	5792	}
	5793	break;
	5794	}
	5795
	5796	case EXACTFL: { /* /abc/il */
	5797	re_fold_t folder;
	5798	const U8 * fold_array;
	5799	const char * s;
	5800	U32 fold_utf8_flags;
	5801
	5802	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	5803	folder = foldEQ_locale;
	5804	fold_array = PL_fold_locale;
	5805	fold_utf8_flags = FOLDEQ_LOCALE;
	5806	goto do_exactf;
	5807
	5808	case EXACTFLU8: /* /abc/il; but all 'abc' are above 255, so
	5809	is effectively /u; hence to match, target
	5810	must be UTF-8. */
	5811	if (! utf8_target) {
	5812	sayNO;
	5813	}
	5814	fold_utf8_flags = FOLDEQ_LOCALE \| FOLDEQ_S1_ALREADY_FOLDED
	5815	\| FOLDEQ_S1_FOLDS_SANE;
	5816	folder = foldEQ_latin1;
	5817	fold_array = PL_fold_latin1;
	5818	goto do_exactf;
	5819
	5820	case EXACTFU_SS: /* /\x{df}/iu */
	5821	case EXACTFU: /* /abc/iu */
	5822	folder = foldEQ_latin1;
	5823	fold_array = PL_fold_latin1;
	5824	fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
	5825	goto do_exactf;
	5826
	5827	case EXACTFA_NO_TRIE: /* This node only generated for non-utf8
	5828	patterns */
	5829	assert(! is_utf8_pat);
	5830	/* FALLTHROUGH */
	5831	case EXACTFA: /* /abc/iaa */
	5832	folder = foldEQ_latin1;
	5833	fold_array = PL_fold_latin1;
	5834	fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	5835	goto do_exactf;
	5836
	5837	case EXACTF: /* /abc/i This node only generated for
	5838	non-utf8 patterns */
	5839	assert(! is_utf8_pat);
	5840	folder = foldEQ;
	5841	fold_array = PL_fold;
	5842	fold_utf8_flags = 0;
	5843
	5844	do_exactf:
	5845	s = STRING(scan);
	5846	ln = STR_LEN(scan);
	5847
	5848	if (utf8_target
	5849	\|\| is_utf8_pat
	5850	\|\| state_num == EXACTFU_SS
	5851	\|\| (state_num == EXACTFL && IN_UTF8_CTYPE_LOCALE))
	5852	{
	5853	/* Either target or the pattern are utf8, or has the issue where
	5854	* the fold lengths may differ. */
	5855	const char * const l = locinput;
	5856	char *e = reginfo->strend;
	5857
	5858	if (! foldEQ_utf8_flags(s, 0, ln, is_utf8_pat,
	5859	l, &e, 0, utf8_target, fold_utf8_flags))
	5860	{
	5861	sayNO;
	5862	}
	5863	locinput = e;
	5864	break;
	5865	}
	5866
	5867	/* Neither the target nor the pattern are utf8 */
	5868	if (UCHARAT(s) != nextchr
	5869	&& !NEXTCHR_IS_EOS
	5870	&& UCHARAT(s) != fold_array[nextchr])
	5871	{
	5872	sayNO;
	5873	}
	5874	if (reginfo->strend - locinput < ln)
	5875	sayNO;
	5876	if (ln > 1 && ! folder(s, locinput, ln))
	5877	sayNO;
	5878	locinput += ln;
	5879	break;
	5880	}
	5881
	5882	case NBOUNDL: /* /\B/l */
	5883	to_complement = 1;
	5884	/* FALLTHROUGH */
	5885
	5886	case BOUNDL: /* /\b/l */
	5887	{
	5888	bool b1, b2;
	5889	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	5890
	5891	if (FLAGS(scan) != TRADITIONAL_BOUND) {
	5892	if (! IN_UTF8_CTYPE_LOCALE) {
	5893	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
	5894	B_ON_NON_UTF8_LOCALE_IS_WRONG);
	5895	}
	5896	goto boundu;
	5897	}
	5898
	5899	if (utf8_target) {
	5900	if (locinput == reginfo->strbeg)
	5901	b1 = isWORDCHAR_LC('\n');
	5902	else {
	5903	b1 = isWORDCHAR_LC_utf8(reghop3((U8*)locinput, -1,
	5904	(U8*)(reginfo->strbeg)));
	5905	}
	5906	b2 = (NEXTCHR_IS_EOS)
	5907	? isWORDCHAR_LC('\n')
	5908	: isWORDCHAR_LC_utf8((U8*)locinput);
	5909	}
	5910	else { /* Here the string isn't utf8 */
	5911	b1 = (locinput == reginfo->strbeg)
	5912	? isWORDCHAR_LC('\n')
	5913	: isWORDCHAR_LC(UCHARAT(locinput - 1));
	5914	b2 = (NEXTCHR_IS_EOS)
	5915	? isWORDCHAR_LC('\n')
	5916	: isWORDCHAR_LC(nextchr);
	5917	}
	5918	if (to_complement ^ (b1 == b2)) {
	5919	sayNO;
	5920	}
	5921	break;
	5922	}
	5923
	5924	case NBOUND: /* /\B/ */
	5925	to_complement = 1;
	5926	/* FALLTHROUGH */
	5927
	5928	case BOUND: /* /\b/ */
	5929	if (utf8_target) {
	5930	goto bound_utf8;
	5931	}
	5932	goto bound_ascii_match_only;
	5933
	5934	case NBOUNDA: /* /\B/a */
	5935	to_complement = 1;
	5936	/* FALLTHROUGH */
	5937
	5938	case BOUNDA: /* /\b/a */
	5939	{
	5940	bool b1, b2;
	5941
	5942	bound_ascii_match_only:
	5943	/* Here the string isn't utf8, or is utf8 and only ascii characters
	5944	* are to match \w. In the latter case looking at the byte just
	5945	* prior to the current one may be just the final byte of a
	5946	* multi-byte character. This is ok. There are two cases:
	5947	* 1) it is a single byte character, and then the test is doing
	5948	* just what it's supposed to.
	5949	* 2) it is a multi-byte character, in which case the final byte is
	5950	* never mistakable for ASCII, and so the test will say it is
	5951	* not a word character, which is the correct answer. */
	5952	b1 = (locinput == reginfo->strbeg)
	5953	? isWORDCHAR_A('\n')
	5954	: isWORDCHAR_A(UCHARAT(locinput - 1));
	5955	b2 = (NEXTCHR_IS_EOS)
	5956	? isWORDCHAR_A('\n')
	5957	: isWORDCHAR_A(nextchr);
	5958	if (to_complement ^ (b1 == b2)) {
	5959	sayNO;
	5960	}
	5961	break;
	5962	}
	5963
	5964	case NBOUNDU: /* /\B/u */
	5965	to_complement = 1;
	5966	/* FALLTHROUGH */
	5967
	5968	case BOUNDU: /* /\b/u */
	5969
	5970	boundu:
	5971	if (UNLIKELY(reginfo->strbeg >= reginfo->strend)) {
	5972	match = FALSE;
	5973	}
	5974	else if (utf8_target) {
	5975	bound_utf8:
	5976	switch((bound_type) FLAGS(scan)) {
	5977	case TRADITIONAL_BOUND:
	5978	{
	5979	bool b1, b2;
	5980	b1 = (locinput == reginfo->strbeg)
	5981	? 0 /* isWORDCHAR_L1('\n') */
	5982	: isWORDCHAR_utf8(reghop3((U8*)locinput, -1,
	5983	(U8*)(reginfo->strbeg)));
	5984	b2 = (NEXTCHR_IS_EOS)
	5985	? 0 /* isWORDCHAR_L1('\n') */
	5986	: isWORDCHAR_utf8((U8*)locinput);
	5987	match = cBOOL(b1 != b2);
	5988	break;
	5989	}
	5990	case GCB_BOUND:
	5991	if (locinput == reginfo->strbeg \|\| NEXTCHR_IS_EOS) {
	5992	match = TRUE; /* GCB always matches at begin and
	5993	end */
	5994	}
	5995	else {
	5996	/* Find the gcb values of previous and current
	5997	* chars, then see if is a break point */
	5998	match = isGCB(getGCB_VAL_UTF8(
	5999	reghop3((U8*)locinput,
	6000	-1,
	6001	(U8*)(reginfo->strbeg)),
	6002	(U8*) reginfo->strend),
	6003	getGCB_VAL_UTF8((U8*) locinput,
	6004	(U8*) reginfo->strend));
	6005	}
	6006	break;
	6007
	6008	case LB_BOUND:
	6009	if (locinput == reginfo->strbeg) {
	6010	match = FALSE;
	6011	}
	6012	else if (NEXTCHR_IS_EOS) {
	6013	match = TRUE;
	6014	}
	6015	else {
	6016	match = isLB(getLB_VAL_UTF8(
	6017	reghop3((U8*)locinput,
	6018	-1,
	6019	(U8*)(reginfo->strbeg)),
	6020	(U8*) reginfo->strend),
	6021	getLB_VAL_UTF8((U8*) locinput,
	6022	(U8*) reginfo->strend),
	6023	(U8*) reginfo->strbeg,
	6024	(U8*) locinput,
	6025	(U8*) reginfo->strend,
	6026	utf8_target);
	6027	}
	6028	break;
	6029
	6030	case SB_BOUND: /* Always matches at begin and end */
	6031	if (locinput == reginfo->strbeg \|\| NEXTCHR_IS_EOS) {
	6032	match = TRUE;
	6033	}
	6034	else {
	6035	match = isSB(getSB_VAL_UTF8(
	6036	reghop3((U8*)locinput,
	6037	-1,
	6038	(U8*)(reginfo->strbeg)),
	6039	(U8*) reginfo->strend),
	6040	getSB_VAL_UTF8((U8*) locinput,
	6041	(U8*) reginfo->strend),
	6042	(U8*) reginfo->strbeg,
	6043	(U8*) locinput,
	6044	(U8*) reginfo->strend,
	6045	utf8_target);
	6046	}
	6047	break;
	6048
	6049	case WB_BOUND:
	6050	if (locinput == reginfo->strbeg \|\| NEXTCHR_IS_EOS) {
	6051	match = TRUE;
	6052	}
	6053	else {
	6054	match = isWB(WB_UNKNOWN,
	6055	getWB_VAL_UTF8(
	6056	reghop3((U8*)locinput,
	6057	-1,
	6058	(U8*)(reginfo->strbeg)),
	6059	(U8*) reginfo->strend),
	6060	getWB_VAL_UTF8((U8*) locinput,
	6061	(U8*) reginfo->strend),
	6062	(U8*) reginfo->strbeg,
	6063	(U8*) locinput,
	6064	(U8*) reginfo->strend,
	6065	utf8_target);
	6066	}
	6067	break;
	6068	}
	6069	}
	6070	else { /* Not utf8 target */
	6071	switch((bound_type) FLAGS(scan)) {
	6072	case TRADITIONAL_BOUND:
	6073	{
	6074	bool b1, b2;
	6075	b1 = (locinput == reginfo->strbeg)
	6076	? 0 /* isWORDCHAR_L1('\n') */
	6077	: isWORDCHAR_L1(UCHARAT(locinput - 1));
	6078	b2 = (NEXTCHR_IS_EOS)
	6079	? 0 /* isWORDCHAR_L1('\n') */
	6080	: isWORDCHAR_L1(nextchr);
	6081	match = cBOOL(b1 != b2);
	6082	break;
	6083	}
	6084
	6085	case GCB_BOUND:
	6086	if (locinput == reginfo->strbeg \|\| NEXTCHR_IS_EOS) {
	6087	match = TRUE; /* GCB always matches at begin and
	6088	end */
	6089	}
	6090	else { /* Only CR-LF combo isn't a GCB in 0-255
	6091	range */
	6092	match = UCHARAT(locinput - 1) != '\r'
	6093	\|\| UCHARAT(locinput) != '\n';
	6094	}
	6095	break;
	6096
	6097	case LB_BOUND:
	6098	if (locinput == reginfo->strbeg) {
	6099	match = FALSE;
	6100	}
	6101	else if (NEXTCHR_IS_EOS) {
	6102	match = TRUE;
	6103	}
	6104	else {
	6105	match = isLB(getLB_VAL_CP(UCHARAT(locinput -1)),
	6106	getLB_VAL_CP(UCHARAT(locinput)),
	6107	(U8*) reginfo->strbeg,
	6108	(U8*) locinput,
	6109	(U8*) reginfo->strend,
	6110	utf8_target);
	6111	}
	6112	break;
	6113
	6114	case SB_BOUND: /* Always matches at begin and end */
	6115	if (locinput == reginfo->strbeg \|\| NEXTCHR_IS_EOS) {
	6116	match = TRUE;
	6117	}
	6118	else {
	6119	match = isSB(getSB_VAL_CP(UCHARAT(locinput -1)),
	6120	getSB_VAL_CP(UCHARAT(locinput)),
	6121	(U8*) reginfo->strbeg,
	6122	(U8*) locinput,
	6123	(U8*) reginfo->strend,
	6124	utf8_target);
	6125	}
	6126	break;
	6127
	6128	case WB_BOUND:
	6129	if (locinput == reginfo->strbeg \|\| NEXTCHR_IS_EOS) {
	6130	match = TRUE;
	6131	}
	6132	else {
	6133	match = isWB(WB_UNKNOWN,
	6134	getWB_VAL_CP(UCHARAT(locinput -1)),
	6135	getWB_VAL_CP(UCHARAT(locinput)),
	6136	(U8*) reginfo->strbeg,
	6137	(U8*) locinput,
	6138	(U8*) reginfo->strend,
	6139	utf8_target);
	6140	}
	6141	break;
	6142	}
	6143	}
	6144
	6145	if (to_complement ^ ! match) {
	6146	sayNO;
	6147	}
	6148	break;
	6149
	6150	case ANYOFL: /* /[abc]/l */
	6151	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	6152
	6153	if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(scan)) && ! IN_UTF8_CTYPE_LOCALE)
	6154	{
	6155	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
	6156	}
	6157	/* FALLTHROUGH */
	6158	case ANYOFD: /* /[abc]/d */
	6159	case ANYOF: /* /[abc]/ */
	6160	if (NEXTCHR_IS_EOS)
	6161	sayNO;
	6162	if (utf8_target && ! UTF8_IS_INVARIANT(*locinput)) {
	6163	if (!reginclass(rex, scan, (U8)locinput, (U8)reginfo->strend,
	6164	utf8_target))
	6165	sayNO;
	6166	locinput += UTF8SKIP(locinput);
	6167	}
	6168	else {
	6169	if (!REGINCLASS(rex, scan, (U8*)locinput, utf8_target))
	6170	sayNO;
	6171	locinput++;
	6172	}
	6173	break;
	6174
	6175	/* The argument (FLAGS) to all the POSIX node types is the class number
	6176	* */
	6177
	6178	case NPOSIXL: /* \W or [:^punct:] etc. under /l */
	6179	to_complement = 1;
	6180	/* FALLTHROUGH */
	6181
	6182	case POSIXL: /* \w or [:punct:] etc. under /l */
	6183	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	6184	if (NEXTCHR_IS_EOS)
	6185	sayNO;
	6186
	6187	/* Use isFOO_lc() for characters within Latin1. (Note that
	6188	* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
	6189	* wouldn't be invariant) */
	6190	if (UTF8_IS_INVARIANT(nextchr) \|\| ! utf8_target) {
	6191	if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) {
	6192	sayNO;
	6193	}
	6194	}
	6195	else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
	6196	if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
	6197	EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
	6198	*(locinput + 1))))))
	6199	{
	6200	sayNO;
	6201	}
	6202	}
	6203	else { /* Here, must be an above Latin-1 code point */
	6204	_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
	6205	goto utf8_posix_above_latin1;
	6206	}
	6207
	6208	/* Here, must be utf8 */
	6209	locinput += UTF8SKIP(locinput);
	6210	break;
	6211
	6212	case NPOSIXD: /* \W or [:^punct:] etc. under /d */
	6213	to_complement = 1;
	6214	/* FALLTHROUGH */
	6215
	6216	case POSIXD: /* \w or [:punct:] etc. under /d */
	6217	if (utf8_target) {
	6218	goto utf8_posix;
	6219	}
	6220	goto posixa;
	6221
	6222	case NPOSIXA: /* \W or [:^punct:] etc. under /a */
	6223
	6224	if (NEXTCHR_IS_EOS) {
	6225	sayNO;
	6226	}
	6227
	6228	/* All UTF-8 variants match */
	6229	if (! UTF8_IS_INVARIANT(nextchr)) {
	6230	goto increment_locinput;
	6231	}
	6232
	6233	to_complement = 1;
	6234	goto join_nposixa;
	6235
	6236	case POSIXA: /* \w or [:punct:] etc. under /a */
	6237
	6238	posixa:
	6239	/* We get here through POSIXD, NPOSIXD, and NPOSIXA when not in
	6240	* UTF-8, and also from NPOSIXA even in UTF-8 when the current
	6241	* character is a single byte */
	6242
	6243	if (NEXTCHR_IS_EOS) {
	6244	sayNO;
	6245	}
	6246
	6247	join_nposixa:
	6248
	6249	if (! (to_complement ^ cBOOL(_generic_isCC_A(nextchr,
	6250	FLAGS(scan)))))
	6251	{
	6252	sayNO;
	6253	}
	6254
	6255	/* Here we are either not in utf8, or we matched a utf8-invariant,
	6256	* so the next char is the next byte */
	6257	locinput++;
	6258	break;
	6259
	6260	case NPOSIXU: /* \W or [:^punct:] etc. under /u */
	6261	to_complement = 1;
	6262	/* FALLTHROUGH */
	6263
	6264	case POSIXU: /* \w or [:punct:] etc. under /u */
	6265	utf8_posix:
	6266	if (NEXTCHR_IS_EOS) {
	6267	sayNO;
	6268	}
	6269
	6270	/* Use _generic_isCC() for characters within Latin1. (Note that
	6271	* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
	6272	* wouldn't be invariant) */
	6273	if (UTF8_IS_INVARIANT(nextchr) \|\| ! utf8_target) {
	6274	if (! (to_complement ^ cBOOL(_generic_isCC(nextchr,
	6275	FLAGS(scan)))))
	6276	{
	6277	sayNO;
	6278	}
	6279	locinput++;
	6280	}
	6281	else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
	6282	if (! (to_complement
	6283	^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
	6284	*(locinput + 1)),
	6285	FLAGS(scan)))))
	6286	{
	6287	sayNO;
	6288	}
	6289	locinput += 2;
	6290	}
	6291	else { /* Handle above Latin-1 code points */
	6292	utf8_posix_above_latin1:
	6293	classnum = (_char_class_number) FLAGS(scan);
	6294	if (classnum < _FIRST_NON_SWASH_CC) {
	6295
	6296	/* Here, uses a swash to find such code points. Load if if
	6297	* not done already */
	6298	if (! PL_utf8_swash_ptrs[classnum]) {
	6299	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	6300	PL_utf8_swash_ptrs[classnum]
	6301	= _core_swash_init("utf8",
	6302	"",
	6303	&PL_sv_undef, 1, 0,
	6304	PL_XPosix_ptrs[classnum], &flags);
	6305	}
	6306	if (! (to_complement
	6307	^ cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum],
	6308	(U8 *) locinput, TRUE))))
	6309	{
	6310	sayNO;
	6311	}
	6312	}
	6313	else { /* Here, uses macros to find above Latin-1 code points */
	6314	switch (classnum) {
	6315	case _CC_ENUM_SPACE:
	6316	if (! (to_complement
	6317	^ cBOOL(is_XPERLSPACE_high(locinput))))
	6318	{
	6319	sayNO;
	6320	}
	6321	break;
	6322	case _CC_ENUM_BLANK:
	6323	if (! (to_complement
	6324	^ cBOOL(is_HORIZWS_high(locinput))))
	6325	{
	6326	sayNO;
	6327	}
	6328	break;
	6329	case _CC_ENUM_XDIGIT:
	6330	if (! (to_complement
	6331	^ cBOOL(is_XDIGIT_high(locinput))))
	6332	{
	6333	sayNO;
	6334	}
	6335	break;
	6336	case _CC_ENUM_VERTSPACE:
	6337	if (! (to_complement
	6338	^ cBOOL(is_VERTWS_high(locinput))))
	6339	{
	6340	sayNO;
	6341	}
	6342	break;
	6343	default: /* The rest, e.g. [:cntrl:], can't match
	6344	above Latin1 */
	6345	if (! to_complement) {
	6346	sayNO;
	6347	}
	6348	break;
	6349	}
	6350	}
	6351	locinput += UTF8SKIP(locinput);
	6352	}
	6353	break;
	6354
	6355	case CLUMP: /* Match \X: logical Unicode character. This is defined as
	6356	a Unicode extended Grapheme Cluster */
	6357	if (NEXTCHR_IS_EOS)
	6358	sayNO;
	6359	if (! utf8_target) {
	6360
	6361	/* Match either CR LF or '.', as all the other possibilities
	6362	* require utf8 */
	6363	locinput++; /* Match the . or CR */
	6364	if (nextchr == '\r' /* And if it was CR, and the next is LF,
	6365	match the LF */
	6366	&& locinput < reginfo->strend
	6367	&& UCHARAT(locinput) == '\n')
	6368	{
	6369	locinput++;
	6370	}
	6371	}
	6372	else {
	6373
	6374	/* Get the gcb type for the current character */
	6375	GCB_enum prev_gcb = getGCB_VAL_UTF8((U8*) locinput,
	6376	(U8*) reginfo->strend);
	6377
	6378	/* Then scan through the input until we get to the first
	6379	* character whose type is supposed to be a gcb with the
	6380	* current character. (There is always a break at the
	6381	* end-of-input) */
	6382	locinput += UTF8SKIP(locinput);
	6383	while (locinput < reginfo->strend) {
	6384	GCB_enum cur_gcb = getGCB_VAL_UTF8((U8*) locinput,
	6385	(U8*) reginfo->strend);
	6386	if (isGCB(prev_gcb, cur_gcb)) {
	6387	break;
	6388	}
	6389
	6390	prev_gcb = cur_gcb;
	6391	locinput += UTF8SKIP(locinput);
	6392	}
	6393
	6394
	6395	}
	6396	break;
	6397
	6398	case NREFFL: /* /\g{name}/il */
	6399	{ /* The capture buffer cases. The ones beginning with N for the
	6400	named buffers just convert to the equivalent numbered and
	6401	pretend they were called as the corresponding numbered buffer
	6402	op. */
	6403	/* don't initialize these in the declaration, it makes C++
	6404	unhappy */
	6405	const char *s;
	6406	char type;
	6407	re_fold_t folder;
	6408	const U8 *fold_array;
	6409	UV utf8_fold_flags;
	6410
	6411	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	6412	folder = foldEQ_locale;
	6413	fold_array = PL_fold_locale;
	6414	type = REFFL;
	6415	utf8_fold_flags = FOLDEQ_LOCALE;
	6416	goto do_nref;
	6417
	6418	case NREFFA: /* /\g{name}/iaa */
	6419	folder = foldEQ_latin1;
	6420	fold_array = PL_fold_latin1;
	6421	type = REFFA;
	6422	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6423	goto do_nref;
	6424
	6425	case NREFFU: /* /\g{name}/iu */
	6426	folder = foldEQ_latin1;
	6427	fold_array = PL_fold_latin1;
	6428	type = REFFU;
	6429	utf8_fold_flags = 0;
	6430	goto do_nref;
	6431
	6432	case NREFF: /* /\g{name}/i */
	6433	folder = foldEQ;
	6434	fold_array = PL_fold;
	6435	type = REFF;
	6436	utf8_fold_flags = 0;
	6437	goto do_nref;
	6438
	6439	case NREF: /* /\g{name}/ */
	6440	type = REF;
	6441	folder = NULL;
	6442	fold_array = NULL;
	6443	utf8_fold_flags = 0;
	6444	do_nref:
	6445
	6446	/* For the named back references, find the corresponding buffer
	6447	* number */
	6448	n = reg_check_named_buff_matched(rex,scan);
	6449
	6450	if ( ! n ) {
	6451	sayNO;
	6452	}
	6453	goto do_nref_ref_common;
	6454
	6455	case REFFL: /* /\1/il */
	6456	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	6457	folder = foldEQ_locale;
	6458	fold_array = PL_fold_locale;
	6459	utf8_fold_flags = FOLDEQ_LOCALE;
	6460	goto do_ref;
	6461
	6462	case REFFA: /* /\1/iaa */
	6463	folder = foldEQ_latin1;
	6464	fold_array = PL_fold_latin1;
	6465	utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	6466	goto do_ref;
	6467
	6468	case REFFU: /* /\1/iu */
	6469	folder = foldEQ_latin1;
	6470	fold_array = PL_fold_latin1;
	6471	utf8_fold_flags = 0;
	6472	goto do_ref;
	6473
	6474	case REFF: /* /\1/i */
	6475	folder = foldEQ;
	6476	fold_array = PL_fold;
	6477	utf8_fold_flags = 0;
	6478	goto do_ref;
	6479
	6480	case REF: /* /\1/ */
	6481	folder = NULL;
	6482	fold_array = NULL;
	6483	utf8_fold_flags = 0;
	6484
	6485	do_ref:
	6486	type = OP(scan);
	6487	n = ARG(scan); /* which paren pair */
	6488
	6489	do_nref_ref_common:
	6490	ln = rex->offs[n].start;
	6491	reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */
	6492	if (rex->lastparen < n \|\| ln == -1)
	6493	sayNO; /* Do not match unless seen CLOSEn. */
	6494	if (ln == rex->offs[n].end)
	6495	break;
	6496
	6497	s = reginfo->strbeg + ln;
	6498	if (type != REF /* REF can do byte comparison */
	6499	&& (utf8_target \|\| type == REFFU \|\| type == REFFL))
	6500	{
	6501	char * limit = reginfo->strend;
	6502
	6503	/* This call case insensitively compares the entire buffer
	6504	* at s, with the current input starting at locinput, but
	6505	* not going off the end given by reginfo->strend, and
	6506	* returns in <limit> upon success, how much of the
	6507	* current input was matched */
	6508	if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
	6509	locinput, &limit, 0, utf8_target, utf8_fold_flags))
	6510	{
	6511	sayNO;
	6512	}
	6513	locinput = limit;
	6514	break;
	6515	}
	6516
	6517	/* Not utf8: Inline the first character, for speed. */
	6518	if (!NEXTCHR_IS_EOS &&
	6519	UCHARAT(s) != nextchr &&
	6520	(type == REF \|\|
	6521	UCHARAT(s) != fold_array[nextchr]))
	6522	sayNO;
	6523	ln = rex->offs[n].end - ln;
	6524	if (locinput + ln > reginfo->strend)
	6525	sayNO;
	6526	if (ln > 1 && (type == REF
	6527	? memNE(s, locinput, ln)
	6528	: ! folder(s, locinput, ln)))
	6529	sayNO;
	6530	locinput += ln;
	6531	break;
	6532	}
	6533
	6534	case NOTHING: /* null op; e.g. the 'nothing' following
	6535	* the '' in m{(a+\|b)}' */
	6536	break;
	6537	case TAIL: /* placeholder while compiling (A\|B\|C) */
	6538	break;
	6539
	6540	#undef ST
	6541	#define ST st->u.eval
	6542	#define CUR_EVAL cur_eval->u.eval
	6543
	6544	{
	6545	SV *ret;
	6546	REGEXP *re_sv;
	6547	regexp *re;
	6548	regexp_internal *rei;
	6549	regnode *startpoint;
	6550	U32 arg;
	6551
	6552	case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
	6553	arg= (U32)ARG(scan);
	6554	if (cur_eval && cur_eval->locinput == locinput) {
	6555	if ( ++nochange_depth > max_nochange_depth )
	6556	Perl_croak(aTHX_
	6557	"Pattern subroutine nesting without pos change"
	6558	" exceeded limit in regex");
	6559	} else {
	6560	nochange_depth = 0;
	6561	}
	6562	re_sv = rex_sv;
	6563	re = rex;
	6564	rei = rexi;
	6565	startpoint = scan + ARG2L(scan);
	6566	EVAL_CLOSE_PAREN_SET( st, arg );
	6567	/* Detect infinite recursion
	6568	*
	6569	* A pattern like /(?R)foo/ or /(?<x>(?&y)foo)(?<y>(?&x)bar)/
	6570	* or "a"=~/(.(?2))((?<=(?=(?1)).))/ could recurse forever.
	6571	* So we track the position in the string we are at each time
	6572	* we recurse and if we try to enter the same routine twice from
	6573	* the same position we throw an error.
	6574	*/
	6575	if ( rex->recurse_locinput[arg] == locinput ) {
	6576	/* FIXME: we should show the regop that is failing as part
	6577	* of the error message. */
	6578	Perl_croak(aTHX_ "Infinite recursion in regex");
	6579	} else {
	6580	ST.prev_recurse_locinput= rex->recurse_locinput[arg];
	6581	rex->recurse_locinput[arg]= locinput;
	6582
	6583	DEBUG_r({
	6584	GET_RE_DEBUG_FLAGS_DECL;
	6585	DEBUG_STACK_r({
	6586	Perl_re_exec_indentf( aTHX_
	6587	"entering GOSUB, prev_recurse_locinput=%p recurse_locinput[%d]=%p\n",
	6588	depth, ST.prev_recurse_locinput, arg, rex->recurse_locinput[arg]
	6589	);
	6590	});
	6591	});
	6592	}
	6593
	6594	/* Save all the positions seen so far. */
	6595	ST.cp = regcppush(rex, 0, maxopenparen);
	6596	REGCP_SET(ST.lastcp);
	6597
	6598	/* and then jump to the code we share with EVAL */
	6599	goto eval_recurse_doit;
	6600	/* NOTREACHED */
	6601
	6602	case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X\|Y)B/ */
	6603	if (cur_eval && cur_eval->locinput==locinput) {
	6604	if ( ++nochange_depth > max_nochange_depth )
	6605	Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
	6606	} else {
	6607	nochange_depth = 0;
	6608	}
	6609	{
	6610	/* execute the code in the {...} */
	6611
	6612	dSP;
	6613	IV before;
	6614	OP * const oop = PL_op;
	6615	COP * const ocurcop = PL_curcop;
	6616	OP *nop;
	6617	CV *newcv;
	6618
	6619	/* save all paren positions */
	6620	regcppush(rex, 0, maxopenparen);
	6621	REGCP_SET(runops_cp);
	6622
	6623	if (!caller_cv)
	6624	caller_cv = find_runcv(NULL);
	6625
	6626	n = ARG(scan);
	6627
	6628	if (rexi->data->what[n] == 'r') { /* code from an external qr */
	6629	newcv = (ReANY(
	6630	(REGEXP*)(rexi->data->data[n])
	6631	))->qr_anoncv;
	6632	nop = (OP*)rexi->data->data[n+1];
	6633	}
	6634	else if (rexi->data->what[n] == 'l') { /* literal code */
	6635	newcv = caller_cv;
	6636	nop = (OP*)rexi->data->data[n];
	6637	assert(CvDEPTH(newcv));
	6638	}
	6639	else {
	6640	/* literal with own CV */
	6641	assert(rexi->data->what[n] == 'L');
	6642	newcv = rex->qr_anoncv;
	6643	nop = (OP*)rexi->data->data[n];
	6644	}
	6645
	6646	/* normally if we're about to execute code from the same
	6647	* CV that we used previously, we just use the existing
	6648	* CX stack entry. However, its possible that in the
	6649	* meantime we may have backtracked, popped from the save
	6650	* stack, and undone the SAVECOMPPAD(s) associated with
	6651	* PUSH_MULTICALL; in which case PL_comppad no longer
	6652	* points to newcv's pad. */
	6653	if (newcv != last_pushed_cv \|\| PL_comppad != last_pad)
	6654	{
	6655	U8 flags = (CXp_SUB_RE \|
	6656	((newcv == caller_cv) ? CXp_SUB_RE_FAKE : 0));
	6657	if (last_pushed_cv) {
	6658	/* PUSH/POP_MULTICALL save and restore the
	6659	* caller's PL_comppad; if we call multiple subs
	6660	* using the same CX block, we have to save and
	6661	* unwind the varying PL_comppad's ourselves,
	6662	* especially restoring the right PL_comppad on
	6663	* backtrack - so save it on the save stack */
	6664	SAVECOMPPAD();
	6665	CHANGE_MULTICALL_FLAGS(newcv, flags);
	6666	}
	6667	else {
	6668	PUSH_MULTICALL_FLAGS(newcv, flags);
	6669	}
	6670	last_pushed_cv = newcv;
	6671	}
	6672	else {
	6673	/* these assignments are just to silence compiler
	6674	* warnings */
	6675	multicall_cop = NULL;
	6676	}
	6677	last_pad = PL_comppad;
	6678
	6679	/* the initial nextstate you would normally execute
	6680	* at the start of an eval (which would cause error
	6681	* messages to come from the eval), may be optimised
	6682	* away from the execution path in the regex code blocks;
	6683	* so manually set PL_curcop to it initially */
	6684	{
	6685	OP *o = cUNOPx(nop)->op_first;
	6686	assert(o->op_type == OP_NULL);
	6687	if (o->op_targ == OP_SCOPE) {
	6688	o = cUNOPo->op_first;
	6689	}
	6690	else {
	6691	assert(o->op_targ == OP_LEAVE);
	6692	o = cUNOPo->op_first;
	6693	assert(o->op_type == OP_ENTER);
	6694	o = OpSIBLING(o);
	6695	}
	6696
	6697	if (o->op_type != OP_STUB) {
	6698	assert( o->op_type == OP_NEXTSTATE
	6699	\|\| o->op_type == OP_DBSTATE
	6700	\|\| (o->op_type == OP_NULL
	6701	&& ( o->op_targ == OP_NEXTSTATE
	6702	\|\| o->op_targ == OP_DBSTATE
	6703	)
	6704	)
	6705	);
	6706	PL_curcop = (COP*)o;
	6707	}
	6708	}
	6709	nop = nop->op_next;
	6710
	6711	DEBUG_STATE_r( Perl_re_printf( aTHX_
	6712	" re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) );
	6713
	6714	rex->offs[0].end = locinput - reginfo->strbeg;
	6715	if (reginfo->info_aux_eval->pos_magic)
	6716	MgBYTEPOS_set(reginfo->info_aux_eval->pos_magic,
	6717	reginfo->sv, reginfo->strbeg,
	6718	locinput - reginfo->strbeg);
	6719
	6720	if (sv_yes_mark) {
	6721	SV *sv_mrk = get_sv("REGMARK", 1);
	6722	sv_setsv(sv_mrk, sv_yes_mark);
	6723	}
	6724
	6725	/* we don't use MULTICALL here as we want to call the
	6726	* first op of the block of interest, rather than the
	6727	* first op of the sub. Also, we don't want to free
	6728	* the savestack frame */
	6729	before = (IV)(SP-PL_stack_base);
	6730	PL_op = nop;
	6731	CALLRUNOPS(aTHX); /* Scalar context. */
	6732	SPAGAIN;
	6733	if ((IV)(SP-PL_stack_base) == before)
	6734	ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
	6735	else {
	6736	ret = POPs;
	6737	PUTBACK;
	6738	}
	6739
	6740	/* before restoring everything, evaluate the returned
	6741	* value, so that 'uninit' warnings don't use the wrong
	6742	* PL_op or pad. Also need to process any magic vars
	6743	* (e.g. $1) before parentheses are restored */
	6744
	6745	PL_op = NULL;
	6746
	6747	re_sv = NULL;
	6748	if (logical == 0) /* (?{})/ */
	6749	sv_setsv(save_scalar(PL_replgv), ret); /* $^R */
	6750	else if (logical == 1) { /* /(?(?{...})X\|Y)/ */
	6751	sw = cBOOL(SvTRUE(ret));
	6752	logical = 0;
	6753	}
	6754	else { /* /(??{}) */
	6755	/* if its overloaded, let the regex compiler handle
	6756	* it; otherwise extract regex, or stringify */
	6757	if (SvGMAGICAL(ret))
	6758	ret = sv_mortalcopy(ret);
	6759	if (!SvAMAGIC(ret)) {
	6760	SV *sv = ret;
	6761	if (SvROK(sv))
	6762	sv = SvRV(sv);
	6763	if (SvTYPE(sv) == SVt_REGEXP)
	6764	re_sv = (REGEXP*) sv;
	6765	else if (SvSMAGICAL(ret)) {
	6766	MAGIC *mg = mg_find(ret, PERL_MAGIC_qr);
	6767	if (mg)
	6768	re_sv = (REGEXP *) mg->mg_obj;
	6769	}
	6770
	6771	/* force any undef warnings here */
	6772	if (!re_sv && !SvPOK(ret) && !SvNIOK(ret)) {
	6773	ret = sv_mortalcopy(ret);
	6774	(void) SvPV_force_nolen(ret);
	6775	}
	6776	}
	6777
	6778	}
	6779
	6780	/* *** Note that at this point we don't restore
	6781	* PL_comppad, (or pop the CxSUB) on the assumption it may
	6782	* be used again soon. This is safe as long as nothing
	6783	* in the regexp code uses the pad ! */
	6784	PL_op = oop;
	6785	PL_curcop = ocurcop;
	6786	S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
	6787	PL_curpm = PL_reg_curpm;
	6788
	6789	if (logical != 2)
	6790	break;
	6791	}
	6792
	6793	/* only /(??{})/ from now on */
	6794	logical = 0;
	6795	{
	6796	/* extract RE object from returned value; compiling if
	6797	* necessary */
	6798
	6799	if (re_sv) {
	6800	re_sv = reg_temp_copy(NULL, re_sv);
	6801	}
	6802	else {
	6803	U32 pm_flags = 0;
	6804
	6805	if (SvUTF8(ret) && IN_BYTES) {
	6806	/* In use 'bytes': make a copy of the octet
	6807	* sequence, but without the flag on */
	6808	STRLEN len;
	6809	const char *const p = SvPV(ret, len);
	6810	ret = newSVpvn_flags(p, len, SVs_TEMP);
	6811	}
	6812	if (rex->intflags & PREGf_USE_RE_EVAL)
	6813	pm_flags \|= PMf_USE_RE_EVAL;
	6814
	6815	/* if we got here, it should be an engine which
	6816	* supports compiling code blocks and stuff */
	6817	assert(rex->engine && rex->engine->op_comp);
	6818	assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
	6819	re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
	6820	rex->engine, NULL, NULL,
	6821	/* copy /msixn etc to inner pattern */
	6822	ARG2L(scan),
	6823	pm_flags);
	6824
	6825	if (!(SvFLAGS(ret)
	6826	& (SVs_TEMP \| SVs_GMG \| SVf_ROK))
	6827	&& (!SvPADTMP(ret) \|\| SvREADONLY(ret))) {
	6828	/* This isn't a first class regexp. Instead, it's
	6829	caching a regexp onto an existing, Perl visible
	6830	scalar. */
	6831	sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
	6832	}
	6833	}
	6834	SAVEFREESV(re_sv);
	6835	re = ReANY(re_sv);
	6836	}
	6837	RXp_MATCH_COPIED_off(re);
	6838	re->subbeg = rex->subbeg;
	6839	re->sublen = rex->sublen;
	6840	re->suboffset = rex->suboffset;
	6841	re->subcoffset = rex->subcoffset;
	6842	re->lastparen = 0;
	6843	re->lastcloseparen = 0;
	6844	rei = RXi_GET(re);
	6845	DEBUG_EXECUTE_r(
	6846	debug_start_match(re_sv, utf8_target, locinput,
	6847	reginfo->strend, "Matching embedded");
	6848	);
	6849	startpoint = rei->program + 1;
	6850	EVAL_CLOSE_PAREN_CLEAR(st); /* ST.close_paren = 0;
	6851	* close_paren only for GOSUB */
	6852	ST.prev_recurse_locinput= NULL; /* only used for GOSUB */
	6853	/* Save all the seen positions so far. */
	6854	ST.cp = regcppush(rex, 0, maxopenparen);
	6855	REGCP_SET(ST.lastcp);
	6856	/* and set maxopenparen to 0, since we are starting a "fresh" match */
	6857	maxopenparen = 0;
	6858	/* run the pattern returned from (??{...}) */
	6859
	6860	eval_recurse_doit: /* Share code with GOSUB below this line
	6861	* At this point we expect the stack context to be
	6862	* set up correctly */
	6863
	6864	/* invalidate the S-L poscache. We're now executing a
	6865	* different set of WHILEM ops (and their associated
	6866	* indexes) against the same string, so the bits in the
	6867	* cache are meaningless. Setting maxiter to zero forces
	6868	* the cache to be invalidated and zeroed before reuse.
	6869	* XXX This is too dramatic a measure. Ideally we should
	6870	* save the old cache and restore when running the outer
	6871	* pattern again */
	6872	reginfo->poscache_maxiter = 0;
	6873
	6874	/* the new regexp might have a different is_utf8_pat than we do */
	6875	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(re_sv));
	6876
	6877	ST.prev_rex = rex_sv;
	6878	ST.prev_curlyx = cur_curlyx;
	6879	rex_sv = re_sv;
	6880	SET_reg_curpm(rex_sv);
	6881	rex = re;
	6882	rexi = rei;
	6883	cur_curlyx = NULL;
	6884	ST.B = next;
	6885	ST.prev_eval = cur_eval;
	6886	cur_eval = st;
	6887	/* now continue from first node in postoned RE */
	6888	PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
	6889	NOT_REACHED; /* NOTREACHED */
	6890	}
	6891
	6892	case EVAL_AB: /* cleanup after a successful (??{A})B */
	6893	/* note: this is called twice; first after popping B, then A */
	6894	DEBUG_STACK_r({
	6895	Perl_re_exec_indentf( aTHX_ "EVAL_AB cur_eval=%p prev_eval=%p\n",
	6896	depth, cur_eval, ST.prev_eval);
	6897	});
	6898
	6899	#define SET_RECURSE_LOCINPUT(STR,VAL)\
	6900	if ( cur_eval && CUR_EVAL.close_paren ) {\
	6901	DEBUG_STACK_r({ \
	6902	Perl_re_exec_indentf( aTHX_ STR " GOSUB%d ce=%p recurse_locinput=%p\n",\
	6903	depth, \
	6904	CUR_EVAL.close_paren - 1,\
	6905	cur_eval, \
	6906	VAL); \
	6907	}); \
	6908	rex->recurse_locinput[CUR_EVAL.close_paren - 1] = VAL;\
	6909	}
	6910
	6911	SET_RECURSE_LOCINPUT("EVAL_AB[before]", CUR_EVAL.prev_recurse_locinput);
	6912
	6913	rex_sv = ST.prev_rex;
	6914	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
	6915	SET_reg_curpm(rex_sv);
	6916	rex = ReANY(rex_sv);
	6917	rexi = RXi_GET(rex);
	6918	{
	6919	/* preserve $^R across LEAVE's. See Bug 121070. */
	6920	SV *save_sv= GvSV(PL_replgv);
	6921	SvREFCNT_inc(save_sv);
	6922	regcpblow(ST.cp); /* LEAVE in disguise */
	6923	sv_setsv(GvSV(PL_replgv), save_sv);
	6924	SvREFCNT_dec(save_sv);
	6925	}
	6926	cur_eval = ST.prev_eval;
	6927	cur_curlyx = ST.prev_curlyx;
	6928
	6929	/* Invalidate cache. See "invalidate" comment above. */
	6930	reginfo->poscache_maxiter = 0;
	6931	if ( nochange_depth )
	6932	nochange_depth--;
	6933
	6934	SET_RECURSE_LOCINPUT("EVAL_AB[after]", cur_eval->locinput);
	6935	sayYES;
	6936
	6937
	6938	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
	6939	/* note: this is called twice; first after popping B, then A */
	6940	DEBUG_STACK_r({
	6941	Perl_re_exec_indentf( aTHX_ "EVAL_AB_fail cur_eval=%p prev_eval=%p\n",
	6942	depth, cur_eval, ST.prev_eval);
	6943	});
	6944
	6945	SET_RECURSE_LOCINPUT("EVAL_AB_fail[before]", CUR_EVAL.prev_recurse_locinput);
	6946
	6947	rex_sv = ST.prev_rex;
	6948	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
	6949	SET_reg_curpm(rex_sv);
	6950	rex = ReANY(rex_sv);
	6951	rexi = RXi_GET(rex);
	6952
	6953	REGCP_UNWIND(ST.lastcp);
	6954	regcppop(rex, &maxopenparen);
	6955	cur_eval = ST.prev_eval;
	6956	cur_curlyx = ST.prev_curlyx;
	6957
	6958	/* Invalidate cache. See "invalidate" comment above. */
	6959	reginfo->poscache_maxiter = 0;
	6960	if ( nochange_depth )
	6961	nochange_depth--;
	6962
	6963	SET_RECURSE_LOCINPUT("EVAL_AB_fail[after]", cur_eval->locinput);
	6964	sayNO_SILENT;
	6965	#undef ST
	6966
	6967	case OPEN: /* ( */
	6968	n = ARG(scan); /* which paren pair */
	6969	rex->offs[n].start_tmp = locinput - reginfo->strbeg;
	6970	if (n > maxopenparen)
	6971	maxopenparen = n;
	6972	DEBUG_BUFFERS_r(Perl_re_printf( aTHX_
	6973	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf" tmp; maxopenparen=%"UVuf"\n",
	6974	PTR2UV(rex),
	6975	PTR2UV(rex->offs),
	6976	(UV)n,
	6977	(IV)rex->offs[n].start_tmp,
	6978	(UV)maxopenparen
	6979	));
	6980	lastopen = n;
	6981	break;
	6982
	6983	/* XXX really need to log other places start/end are set too */
	6984	#define CLOSE_CAPTURE \
	6985	rex->offs[n].start = rex->offs[n].start_tmp; \
	6986	rex->offs[n].end = locinput - reginfo->strbeg; \
	6987	DEBUG_BUFFERS_r(Perl_re_printf( aTHX_ \
	6988	"rex=0x%"UVxf" offs=0x%"UVxf": \\%"UVuf": set %"IVdf"..%"IVdf"\n", \
	6989	PTR2UV(rex), \
	6990	PTR2UV(rex->offs), \
	6991	(UV)n, \
	6992	(IV)rex->offs[n].start, \
	6993	(IV)rex->offs[n].end \
	6994	))
	6995
	6996	case CLOSE: /* ) */
	6997	n = ARG(scan); /* which paren pair */
	6998	CLOSE_CAPTURE;
	6999	if (n > rex->lastparen)
	7000	rex->lastparen = n;
	7001	rex->lastcloseparen = n;
	7002	if ( EVAL_CLOSE_PAREN_IS( cur_eval, n ) )
	7003	goto fake_end;
	7004
	7005	break;
	7006
	7007	case ACCEPT: /* (ACCEPT) /
	7008	if (scan->flags)
	7009	sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	7010	if (ARG2L(scan)){
	7011	regnode *cursor;
	7012	for (cursor=scan;
	7013	cursor && OP(cursor)!=END;
	7014	cursor=regnext(cursor))
	7015	{
	7016	if ( OP(cursor)==CLOSE ){
	7017	n = ARG(cursor);
	7018	if ( n <= lastopen ) {
	7019	CLOSE_CAPTURE;
	7020	if (n > rex->lastparen)
	7021	rex->lastparen = n;
	7022	rex->lastcloseparen = n;
	7023	if ( n == ARG(scan) \|\| EVAL_CLOSE_PAREN_IS(cur_eval, n) )
	7024	break;
	7025	}
	7026	}
	7027	}
	7028	}
	7029	goto fake_end;
	7030	/* NOTREACHED */
	7031
	7032	case GROUPP: /* (?(1)) */
	7033	n = ARG(scan); /* which paren pair */
	7034	sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
	7035	break;
	7036
	7037	case NGROUPP: /* (?(<name>)) */
	7038	/* reg_check_named_buff_matched returns 0 for no match */
	7039	sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
	7040	break;
	7041
	7042	case INSUBP: /* (?(R)) */
	7043	n = ARG(scan);
	7044	/* this does not need to use EVAL_CLOSE_PAREN macros, as the arg
	7045	* of SCAN is already set up as matches a eval.close_paren */
	7046	sw = cur_eval && (n == 0 \|\| CUR_EVAL.close_paren == n);
	7047	break;
	7048
	7049	case DEFINEP: /* (?(DEFINE)) */
	7050	sw = 0;
	7051	break;
	7052
	7053	case IFTHEN: /* (?(cond)A\|B) */
	7054	reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */
	7055	if (sw)
	7056	next = NEXTOPER(NEXTOPER(scan));
	7057	else {
	7058	next = scan + ARG(scan);
	7059	if (OP(next) == IFTHEN) /* Fake one. */
	7060	next = NEXTOPER(NEXTOPER(next));
	7061	}
	7062	break;
	7063
	7064	case LOGICAL: /* modifier for EVAL and IFMATCH */
	7065	logical = scan->flags;
	7066	break;
	7067
	7068	/*******************************************************************
	7069
	7070	The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
	7071	pattern, where A and B are subpatterns. (For simple A, CURLYM or
	7072	STAR/PLUS/CURLY/CURLYN are used instead.)
	7073
	7074	A*B is compiled as <CURLYX><A><WHILEM><B>
	7075
	7076	On entry to the subpattern, CURLYX is called. This pushes a CURLYX
	7077	state, which contains the current count, initialised to -1. It also sets
	7078	cur_curlyx to point to this state, with any previous value saved in the
	7079	state block.
	7080
	7081	CURLYX then jumps straight to the WHILEM op, rather than executing A,
	7082	since the pattern may possibly match zero times (i.e. it's a while {} loop
	7083	rather than a do {} while loop).
	7084
	7085	Each entry to WHILEM represents a successful match of A. The count in the
	7086	CURLYX block is incremented, another WHILEM state is pushed, and execution
	7087	passes to A or B depending on greediness and the current count.
	7088
	7089	For example, if matching against the string a1a2a3b (where the aN are
	7090	substrings that match /A/), then the match progresses as follows: (the
	7091	pushed states are interspersed with the bits of strings matched so far):
	7092
	7093	<CURLYX cnt=-1>
	7094	<CURLYX cnt=0><WHILEM>
	7095	<CURLYX cnt=1><WHILEM> a1 <WHILEM>
	7096	<CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
	7097	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
	7098	<CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
	7099
	7100	(Contrast this with something like CURLYM, which maintains only a single
	7101	backtrack state:
	7102
	7103	<CURLYM cnt=0> a1
	7104	a1 <CURLYM cnt=1> a2
	7105	a1 a2 <CURLYM cnt=2> a3
	7106	a1 a2 a3 <CURLYM cnt=3> b
	7107	)
	7108
	7109	Each WHILEM state block marks a point to backtrack to upon partial failure
	7110	of A or B, and also contains some minor state data related to that
	7111	iteration. The CURLYX block, pointed to by cur_curlyx, contains the
	7112	overall state, such as the count, and pointers to the A and B ops.
	7113
	7114	This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
	7115	must always point to the current CURLYX block, the rules are:
	7116
	7117	When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
	7118	and set cur_curlyx to point the new block.
	7119
	7120	When popping the CURLYX block after a successful or unsuccessful match,
	7121	restore the previous cur_curlyx.
	7122
	7123	When WHILEM is about to execute B, save the current cur_curlyx, and set it
	7124	to the outer one saved in the CURLYX block.
	7125
	7126	When popping the WHILEM block after a successful or unsuccessful B match,
	7127	restore the previous cur_curlyx.
	7128
	7129	Here's an example for the pattern (AI* BI)*BO
	7130	I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
	7131
	7132	cur_
	7133	curlyx backtrack stack
	7134	------ ---------------
	7135	NULL
	7136	CO <CO prev=NULL> <WO>
	7137	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	7138	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	7139	NULL <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
	7140
	7141	At this point the pattern succeeds, and we work back down the stack to
	7142	clean up, restoring as we go:
	7143
	7144	CO <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
	7145	CI <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
	7146	CO <CO prev=NULL> <WO>
	7147	NULL
	7148
	7149	*******************************************************************/
	7150
	7151	#define ST st->u.curlyx
	7152
	7153	case CURLYX: /* start of /AB/ (for complex A) /
	7154	{
	7155	/* No need to save/restore up to this paren */
	7156	I32 parenfloor = scan->flags;
	7157
	7158	assert(next); /* keep Coverity happy */
	7159	if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
	7160	next += ARG(next);
	7161
	7162	/* XXXX Probably it is better to teach regpush to support
	7163	parenfloor > maxopenparen ... */
	7164	if (parenfloor > (I32)rex->lastparen)
	7165	parenfloor = rex->lastparen; /* Pessimization... */
	7166
	7167	ST.prev_curlyx= cur_curlyx;
	7168	cur_curlyx = st;
	7169	ST.cp = PL_savestack_ix;
	7170
	7171	/* these fields contain the state of the current curly.
	7172	* they are accessed by subsequent WHILEMs */
	7173	ST.parenfloor = parenfloor;
	7174	ST.me = scan;
	7175	ST.B = next;
	7176	ST.minmod = minmod;
	7177	minmod = 0;
	7178	ST.count = -1; /* this will be updated by WHILEM */
	7179	ST.lastloc = NULL; /* this will be updated by WHILEM */
	7180
	7181	PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
	7182	NOT_REACHED; /* NOTREACHED */
	7183	}
	7184
	7185	case CURLYX_end: /* just finished matching all of AB /
	7186	cur_curlyx = ST.prev_curlyx;
	7187	sayYES;
	7188	NOT_REACHED; /* NOTREACHED */
	7189
	7190	case CURLYX_end_fail: /* just failed to match all of AB /
	7191	regcpblow(ST.cp);
	7192	cur_curlyx = ST.prev_curlyx;
	7193	sayNO;
	7194	NOT_REACHED; /* NOTREACHED */
	7195
	7196
	7197	#undef ST
	7198	#define ST st->u.whilem
	7199
	7200	case WHILEM: /* just matched an A in /AB/ (for complex A) /
	7201	{
	7202	/* see the discussion above about CURLYX/WHILEM */
	7203	I32 n;
	7204	int min, max;
	7205	regnode *A;
	7206
	7207	assert(cur_curlyx); /* keep Coverity happy */
	7208
	7209	min = ARG1(cur_curlyx->u.curlyx.me);
	7210	max = ARG2(cur_curlyx->u.curlyx.me);
	7211	A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
	7212	n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
	7213	ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
	7214	ST.cache_offset = 0;
	7215	ST.cache_mask = 0;
	7216
	7217
	7218	DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "whilem: matched %ld out of %d..%d\n",
	7219	depth, (long)n, min, max)
	7220	);
	7221
	7222	/* First just match a string of min A's. */
	7223
	7224	if (n < min) {
	7225	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	7226	maxopenparen);
	7227	cur_curlyx->u.curlyx.lastloc = locinput;
	7228	REGCP_SET(ST.lastcp);
	7229
	7230	PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
	7231	NOT_REACHED; /* NOTREACHED */
	7232	}
	7233
	7234	/* If degenerate A matches "", assume A done. */
	7235
	7236	if (locinput == cur_curlyx->u.curlyx.lastloc) {
	7237	DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "whilem: empty match detected, trying continuation...\n",
	7238	depth)
	7239	);
	7240	goto do_whilem_B_max;
	7241	}
	7242
	7243	/* super-linear cache processing.
	7244	*
	7245	* The idea here is that for certain types of CURLYX/WHILEM -
	7246	* principally those whose upper bound is infinity (and
	7247	* excluding regexes that have things like \1 and other very
	7248	* non-regular expresssiony things), then if a pattern like
	7249	* /....A*.../ fails and we backtrack to the WHILEM, then we
	7250	* make a note that this particular WHILEM op was at string
	7251	* position 47 (say) when the rest of pattern failed. Then, if
	7252	* we ever find ourselves back at that WHILEM, and at string
	7253	* position 47 again, we can just fail immediately rather than
	7254	* running the rest of the pattern again.
	7255	*
	7256	* This is very handy when patterns start to go
	7257	* 'super-linear', like in (a+)(a+)(a+)*, where you end up
	7258	* with a combinatorial explosion of backtracking.
	7259	*
	7260	* The cache is implemented as a bit array, with one bit per
	7261	* string byte position per WHILEM op (up to 16) - so its
	7262	* between 0.25 and 2x the string size.
	7263	*
	7264	* To avoid allocating a poscache buffer every time, we do an
	7265	* initially countdown; only after we have executed a WHILEM
	7266	* op (string-length x #WHILEMs) times do we allocate the
	7267	* cache.
	7268	*
	7269	* The top 4 bits of scan->flags byte say how many different
	7270	* relevant CURLLYX/WHILEM op pairs there are, while the
	7271	* bottom 4-bits is the identifying index number of this
	7272	* WHILEM.
	7273	*/
	7274
	7275	if (scan->flags) {
	7276
	7277	if (!reginfo->poscache_maxiter) {
	7278	/* start the countdown: Postpone detection until we
	7279	* know the match is not that much linear. */
	7280	reginfo->poscache_maxiter
	7281	= (reginfo->strend - reginfo->strbeg + 1)
	7282	* (scan->flags>>4);
	7283	/* possible overflow for long strings and many CURLYX's */
	7284	if (reginfo->poscache_maxiter < 0)
	7285	reginfo->poscache_maxiter = I32_MAX;
	7286	reginfo->poscache_iter = reginfo->poscache_maxiter;
	7287	}
	7288
	7289	if (reginfo->poscache_iter-- == 0) {
	7290	/* initialise cache */
	7291	const SSize_t size = (reginfo->poscache_maxiter + 7)/8;
	7292	regmatch_info_aux *const aux = reginfo->info_aux;
	7293	if (aux->poscache) {
	7294	if ((SSize_t)reginfo->poscache_size < size) {
	7295	Renew(aux->poscache, size, char);
	7296	reginfo->poscache_size = size;
	7297	}
	7298	Zero(aux->poscache, size, char);
	7299	}
	7300	else {
	7301	reginfo->poscache_size = size;
	7302	Newxz(aux->poscache, size, char);
	7303	}
	7304	DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
	7305	"%swhilem: Detected a super-linear match, switching on caching%s...\n",
	7306	PL_colors[4], PL_colors[5])
	7307	);
	7308	}
	7309
	7310	if (reginfo->poscache_iter < 0) {
	7311	/* have we already failed at this position? */
	7312	SSize_t offset, mask;
	7313
	7314	reginfo->poscache_iter = -1; /* stop eventual underflow */
	7315	offset = (scan->flags & 0xf) - 1
	7316	+ (locinput - reginfo->strbeg)
	7317	* (scan->flags>>4);
	7318	mask = 1 << (offset % 8);
	7319	offset /= 8;
	7320	if (reginfo->info_aux->poscache[offset] & mask) {
	7321	DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "whilem: (cache) already tried at this position...\n",
	7322	depth)
	7323	);
	7324	sayNO; /* cache records failure */
	7325	}
	7326	ST.cache_offset = offset;
	7327	ST.cache_mask = mask;
	7328	}
	7329	}
	7330
	7331	/* Prefer B over A for minimal matching. */
	7332
	7333	if (cur_curlyx->u.curlyx.minmod) {
	7334	ST.save_curlyx = cur_curlyx;
	7335	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	7336	ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor,
	7337	maxopenparen);
	7338	REGCP_SET(ST.lastcp);
	7339	PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
	7340	locinput);
	7341	NOT_REACHED; /* NOTREACHED */
	7342	}
	7343
	7344	/* Prefer A over B for maximal matching. */
	7345
	7346	if (n < max) { /* More greed allowed? */
	7347	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	7348	maxopenparen);
	7349	cur_curlyx->u.curlyx.lastloc = locinput;
	7350	REGCP_SET(ST.lastcp);
	7351	PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
	7352	NOT_REACHED; /* NOTREACHED */
	7353	}
	7354	goto do_whilem_B_max;
	7355	}
	7356	NOT_REACHED; /* NOTREACHED */
	7357
	7358	case WHILEM_B_min: /* just matched B in a minimal match */
	7359	case WHILEM_B_max: /* just matched B in a maximal match */
	7360	cur_curlyx = ST.save_curlyx;
	7361	sayYES;
	7362	NOT_REACHED; /* NOTREACHED */
	7363
	7364	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
	7365	cur_curlyx = ST.save_curlyx;
	7366	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	7367	cur_curlyx->u.curlyx.count--;
	7368	CACHEsayNO;
	7369	NOT_REACHED; /* NOTREACHED */
	7370
	7371	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
	7372	/* FALLTHROUGH */
	7373	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
	7374	REGCP_UNWIND(ST.lastcp);
	7375	regcppop(rex, &maxopenparen);
	7376	cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
	7377	cur_curlyx->u.curlyx.count--;
	7378	CACHEsayNO;
	7379	NOT_REACHED; /* NOTREACHED */
	7380
	7381	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
	7382	REGCP_UNWIND(ST.lastcp);
	7383	regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
	7384	DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_ "whilem: failed, trying continuation...\n",
	7385	depth)
	7386	);
	7387	do_whilem_B_max:
	7388	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	7389	&& ckWARN(WARN_REGEXP)
	7390	&& !reginfo->warned)
	7391	{
	7392	reginfo->warned = TRUE;
	7393	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	7394	"Complex regular subexpression recursion limit (%d) "
	7395	"exceeded",
	7396	REG_INFTY - 1);
	7397	}
	7398
	7399	/* now try B */
	7400	ST.save_curlyx = cur_curlyx;
	7401	cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
	7402	PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
	7403	locinput);
	7404	NOT_REACHED; /* NOTREACHED */
	7405
	7406	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
	7407	cur_curlyx = ST.save_curlyx;
	7408	REGCP_UNWIND(ST.lastcp);
	7409	regcppop(rex, &maxopenparen);
	7410
	7411	if (cur_curlyx->u.curlyx.count >= /max/ARG2(cur_curlyx->u.curlyx.me)) {
	7412	/* Maximum greed exceeded */
	7413	if (cur_curlyx->u.curlyx.count >= REG_INFTY
	7414	&& ckWARN(WARN_REGEXP)
	7415	&& !reginfo->warned)
	7416	{
	7417	reginfo->warned = TRUE;
	7418	Perl_warner(aTHX_ packWARN(WARN_REGEXP),
	7419	"Complex regular subexpression recursion "
	7420	"limit (%d) exceeded",
	7421	REG_INFTY - 1);
	7422	}
	7423	cur_curlyx->u.curlyx.count--;
	7424	CACHEsayNO;
	7425	}
	7426
	7427	DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_ "trying longer...\n", depth)
	7428	);
	7429	/* Try grabbing another A and see if it helps. */
	7430	cur_curlyx->u.curlyx.lastloc = locinput;
	7431	ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
	7432	maxopenparen);
	7433	REGCP_SET(ST.lastcp);
	7434	PUSH_STATE_GOTO(WHILEM_A_min,
	7435	/A/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
	7436	locinput);
	7437	NOT_REACHED; /* NOTREACHED */
	7438
	7439	#undef ST
	7440	#define ST st->u.branch
	7441
	7442	case BRANCHJ: /* /(...\|A\|...)/ with long next pointer */
	7443	next = scan + ARG(scan);
	7444	if (next == scan)
	7445	next = NULL;
	7446	scan = NEXTOPER(scan);
	7447	/* FALLTHROUGH */
	7448
	7449	case BRANCH: /* /(...\|A\|...)/ */
	7450	scan = NEXTOPER(scan); /* scan now points to inner node */
	7451	ST.lastparen = rex->lastparen;
	7452	ST.lastcloseparen = rex->lastcloseparen;
	7453	ST.next_branch = next;
	7454	REGCP_SET(ST.cp);
	7455
	7456	/* Now go into the branch */
	7457	if (has_cutgroup) {
	7458	PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
	7459	} else {
	7460	PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
	7461	}
	7462	NOT_REACHED; /* NOTREACHED */
	7463
	7464	case CUTGROUP: /* /(THEN)/ /
	7465	sv_yes_mark = st->u.mark.mark_name = scan->flags
	7466	? MUTABLE_SV(rexi->data->data[ ARG( scan ) ])
	7467	: NULL;
	7468	PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
	7469	NOT_REACHED; /* NOTREACHED */
	7470
	7471	case CUTGROUP_next_fail:
	7472	do_cutgroup = 1;
	7473	no_final = 1;
	7474	if (st->u.mark.mark_name)
	7475	sv_commit = st->u.mark.mark_name;
	7476	sayNO;
	7477	NOT_REACHED; /* NOTREACHED */
	7478
	7479	case BRANCH_next:
	7480	sayYES;
	7481	NOT_REACHED; /* NOTREACHED */
	7482
	7483	case BRANCH_next_fail: /* that branch failed; try the next, if any */
	7484	if (do_cutgroup) {
	7485	do_cutgroup = 0;
	7486	no_final = 0;
	7487	}
	7488	REGCP_UNWIND(ST.cp);
	7489	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	7490	scan = ST.next_branch;
	7491	/* no more branches? */
	7492	if (!scan \|\| (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
	7493	DEBUG_EXECUTE_r({
	7494	Perl_re_exec_indentf( aTHX_ "%sBRANCH failed...%s\n",
	7495	depth,
	7496	PL_colors[4],
	7497	PL_colors[5] );
	7498	});
	7499	sayNO_SILENT;
	7500	}
	7501	continue; /* execute next BRANCH[J] op */
	7502	/* NOTREACHED */
	7503
	7504	case MINMOD: /* next op will be non-greedy, e.g. A? /
	7505	minmod = 1;
	7506	break;
	7507
	7508	#undef ST
	7509	#define ST st->u.curlym
	7510
	7511	case CURLYM: /* /A{m,n}B/ where A is fixed-length */
	7512
	7513	/* This is an optimisation of CURLYX that enables us to push
	7514	* only a single backtracking state, no matter how many matches
	7515	* there are in {m,n}. It relies on the pattern being constant
	7516	* length, with no parens to influence future backrefs
	7517	*/
	7518
	7519	ST.me = scan;
	7520	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	7521
	7522	ST.lastparen = rex->lastparen;
	7523	ST.lastcloseparen = rex->lastcloseparen;
	7524
	7525	/* if paren positive, emulate an OPEN/CLOSE around A */
	7526	if (ST.me->flags) {
	7527	U32 paren = ST.me->flags;
	7528	if (paren > maxopenparen)
	7529	maxopenparen = paren;
	7530	scan += NEXT_OFF(scan); /* Skip former OPEN. */
	7531	}
	7532	ST.A = scan;
	7533	ST.B = next;
	7534	ST.alen = 0;
	7535	ST.count = 0;
	7536	ST.minmod = minmod;
	7537	minmod = 0;
	7538	ST.c1 = CHRTEST_UNINIT;
	7539	REGCP_SET(ST.cp);
	7540
	7541	if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
	7542	goto curlym_do_B;
	7543
	7544	curlym_do_A: /* execute the A in /A{m,n}B/ */
	7545	PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
	7546	NOT_REACHED; /* NOTREACHED */
	7547
	7548	case CURLYM_A: /* we've just matched an A */
	7549	ST.count++;
	7550	/* after first match, determine A's length: u.curlym.alen */
	7551	if (ST.count == 1) {
	7552	if (reginfo->is_utf8_target) {
	7553	char *s = st->locinput;
	7554	while (s < locinput) {
	7555	ST.alen++;
	7556	s += UTF8SKIP(s);
	7557	}
	7558	}
	7559	else {
	7560	ST.alen = locinput - st->locinput;
	7561	}
	7562	if (ST.alen == 0)
	7563	ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
	7564	}
	7565	DEBUG_EXECUTE_r(
	7566	Perl_re_exec_indentf( aTHX_ "CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
	7567	depth, (IV) ST.count, (IV)ST.alen)
	7568	);
	7569
	7570	if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
	7571	goto fake_end;
	7572
	7573	{
	7574	I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
	7575	if ( max == REG_INFTY \|\| ST.count < max )
	7576	goto curlym_do_A; /* try to match another A */
	7577	}
	7578	goto curlym_do_B; /* try to match B */
	7579
	7580	case CURLYM_A_fail: /* just failed to match an A */
	7581	REGCP_UNWIND(ST.cp);
	7582
	7583
	7584	if (ST.minmod \|\| ST.count < ARG1(ST.me) /* min*/
	7585	\|\| EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
	7586	sayNO;
	7587
	7588	curlym_do_B: /* execute the B in /A{m,n}B/ */
	7589	if (ST.c1 == CHRTEST_UNINIT) {
	7590	/* calculate c1 and c2 for possible match of 1st char
	7591	* following curly */
	7592	ST.c1 = ST.c2 = CHRTEST_VOID;
	7593	assert(ST.B);
	7594	if (HAS_TEXT(ST.B) \|\| JUMPABLE(ST.B)) {
	7595	regnode *text_node = ST.B;
	7596	if (! HAS_TEXT(text_node))
	7597	FIND_NEXT_IMPT(text_node);
	7598	/* this used to be
	7599
	7600	(HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
	7601
	7602	But the former is redundant in light of the latter.
	7603
	7604	if this changes back then the macro for
	7605	IS_TEXT and friends need to change.
	7606	*/
	7607	if (PL_regkind[OP(text_node)] == EXACT) {
	7608	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	7609	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
	7610	reginfo))
	7611	{
	7612	sayNO;
	7613	}
	7614	}
	7615	}
	7616	}
	7617
	7618	DEBUG_EXECUTE_r(
	7619	Perl_re_exec_indentf( aTHX_ "CURLYM trying tail with matches=%"IVdf"...\n",
	7620	depth, (IV)ST.count)
	7621	);
	7622	if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
	7623	if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
	7624	if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	7625	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	7626	{
	7627	/* simulate B failing */
	7628	DEBUG_OPTIMISE_r(
	7629	Perl_re_exec_indentf( aTHX_ "CURLYM Fast bail next target=0x%"UVXf" c1=0x%"UVXf" c2=0x%"UVXf"\n",
	7630	depth,
	7631	valid_utf8_to_uvchr((U8 *) locinput, NULL),
	7632	valid_utf8_to_uvchr(ST.c1_utf8, NULL),
	7633	valid_utf8_to_uvchr(ST.c2_utf8, NULL))
	7634	);
	7635	state_num = CURLYM_B_fail;
	7636	goto reenter_switch;
	7637	}
	7638	}
	7639	else if (nextchr != ST.c1 && nextchr != ST.c2) {
	7640	/* simulate B failing */
	7641	DEBUG_OPTIMISE_r(
	7642	Perl_re_exec_indentf( aTHX_ "CURLYM Fast bail next target=0x%X c1=0x%X c2=0x%X\n",
	7643	depth,
	7644	(int) nextchr, ST.c1, ST.c2)
	7645	);
	7646	state_num = CURLYM_B_fail;
	7647	goto reenter_switch;
	7648	}
	7649	}
	7650
	7651	if (ST.me->flags) {
	7652	/* emulate CLOSE: mark current A as captured */
	7653	I32 paren = ST.me->flags;
	7654	if (ST.count) {
	7655	rex->offs[paren].start
	7656	= HOPc(locinput, -ST.alen) - reginfo->strbeg;
	7657	rex->offs[paren].end = locinput - reginfo->strbeg;
	7658	if ((U32)paren > rex->lastparen)
	7659	rex->lastparen = paren;
	7660	rex->lastcloseparen = paren;
	7661	}
	7662	else
	7663	rex->offs[paren].end = -1;
	7664
	7665	if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
	7666	{
	7667	if (ST.count)
	7668	goto fake_end;
	7669	else
	7670	sayNO;
	7671	}
	7672	}
	7673
	7674	PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
	7675	NOT_REACHED; /* NOTREACHED */
	7676
	7677	case CURLYM_B_fail: /* just failed to match a B */
	7678	REGCP_UNWIND(ST.cp);
	7679	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	7680	if (ST.minmod) {
	7681	I32 max = ARG2(ST.me);
	7682	if (max != REG_INFTY && ST.count == max)
	7683	sayNO;
	7684	goto curlym_do_A; /* try to match a further A */
	7685	}
	7686	/* backtrack one A */
	7687	if (ST.count == ARG1(ST.me) /* min */)
	7688	sayNO;
	7689	ST.count--;
	7690	SET_locinput(HOPc(locinput, -ST.alen));
	7691	goto curlym_do_B; /* try to match B */
	7692
	7693	#undef ST
	7694	#define ST st->u.curly
	7695
	7696	#define CURLY_SETPAREN(paren, success) \
	7697	if (paren) { \
	7698	if (success) { \
	7699	rex->offs[paren].start = HOPc(locinput, -1) - reginfo->strbeg; \
	7700	rex->offs[paren].end = locinput - reginfo->strbeg; \
	7701	if (paren > rex->lastparen) \
	7702	rex->lastparen = paren; \
	7703	rex->lastcloseparen = paren; \
	7704	} \
	7705	else { \
	7706	rex->offs[paren].end = -1; \
	7707	rex->lastparen = ST.lastparen; \
	7708	rex->lastcloseparen = ST.lastcloseparen; \
	7709	} \
	7710	}
	7711
	7712	case STAR: /* /AB/ where A is width 1 char /
	7713	ST.paren = 0;
	7714	ST.min = 0;
	7715	ST.max = REG_INFTY;
	7716	scan = NEXTOPER(scan);
	7717	goto repeat;
	7718
	7719	case PLUS: /* /A+B/ where A is width 1 char */
	7720	ST.paren = 0;
	7721	ST.min = 1;
	7722	ST.max = REG_INFTY;
	7723	scan = NEXTOPER(scan);
	7724	goto repeat;
	7725
	7726	case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
	7727	ST.paren = scan->flags; /* Which paren to set */
	7728	ST.lastparen = rex->lastparen;
	7729	ST.lastcloseparen = rex->lastcloseparen;
	7730	if (ST.paren > maxopenparen)
	7731	maxopenparen = ST.paren;
	7732	ST.min = ARG1(scan); /* min to match */
	7733	ST.max = ARG2(scan); /* max to match */
	7734	if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
	7735	{
	7736	ST.min=1;
	7737	ST.max=1;
	7738	}
	7739	scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
	7740	goto repeat;
	7741
	7742	case CURLY: /* /A{m,n}B/ where A is width 1 char */
	7743	ST.paren = 0;
	7744	ST.min = ARG1(scan); /* min to match */
	7745	ST.max = ARG2(scan); /* max to match */
	7746	scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
	7747	repeat:
	7748	/*
	7749	* Lookahead to avoid useless match attempts
	7750	* when we know what character comes next.
	7751	*
	7752	* Used to only do .x and .?x, but now it allows
	7753	* for )'s, ('s and (?{ ... })'s to be in the way
	7754	* of the quantifier and the EXACT-like node. -- japhy
	7755	*/
	7756
	7757	assert(ST.min <= ST.max);
	7758	if (! HAS_TEXT(next) && ! JUMPABLE(next)) {
	7759	ST.c1 = ST.c2 = CHRTEST_VOID;
	7760	}
	7761	else {
	7762	regnode *text_node = next;
	7763
	7764	if (! HAS_TEXT(text_node))
	7765	FIND_NEXT_IMPT(text_node);
	7766
	7767	if (! HAS_TEXT(text_node))
	7768	ST.c1 = ST.c2 = CHRTEST_VOID;
	7769	else {
	7770	if ( PL_regkind[OP(text_node)] != EXACT ) {
	7771	ST.c1 = ST.c2 = CHRTEST_VOID;
	7772	}
	7773	else {
	7774
	7775	/* Currently we only get here when
	7776
	7777	PL_rekind[OP(text_node)] == EXACT
	7778
	7779	if this changes back then the macro for IS_TEXT and
	7780	friends need to change. */
	7781	if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
	7782	text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
	7783	reginfo))
	7784	{
	7785	sayNO;
	7786	}
	7787	}
	7788	}
	7789	}
	7790
	7791	ST.A = scan;
	7792	ST.B = next;
	7793	if (minmod) {
	7794	char *li = locinput;
	7795	minmod = 0;
	7796	if (ST.min &&
	7797	regrepeat(rex, &li, ST.A, reginfo, ST.min, depth)
	7798	< ST.min)
	7799	sayNO;
	7800	SET_locinput(li);
	7801	ST.count = ST.min;
	7802	REGCP_SET(ST.cp);
	7803	if (ST.c1 == CHRTEST_VOID)
	7804	goto curly_try_B_min;
	7805
	7806	ST.oldloc = locinput;
	7807
	7808	/* set ST.maxpos to the furthest point along the
	7809	* string that could possibly match */
	7810	if (ST.max == REG_INFTY) {
	7811	ST.maxpos = reginfo->strend - 1;
	7812	if (utf8_target)
	7813	while (UTF8_IS_CONTINUATION((U8)ST.maxpos))
	7814	ST.maxpos--;
	7815	}
	7816	else if (utf8_target) {
	7817	int m = ST.max - ST.min;
	7818	for (ST.maxpos = locinput;
	7819	m >0 && ST.maxpos < reginfo->strend; m--)
	7820	ST.maxpos += UTF8SKIP(ST.maxpos);
	7821	}
	7822	else {
	7823	ST.maxpos = locinput + ST.max - ST.min;
	7824	if (ST.maxpos >= reginfo->strend)
	7825	ST.maxpos = reginfo->strend - 1;
	7826	}
	7827	goto curly_try_B_min_known;
	7828
	7829	}
	7830	else {
	7831	/* avoid taking address of locinput, so it can remain
	7832	* a register var */
	7833	char *li = locinput;
	7834	ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max, depth);
	7835	if (ST.count < ST.min)
	7836	sayNO;
	7837	SET_locinput(li);
	7838	if ((ST.count > ST.min)
	7839	&& (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
	7840	{
	7841	/* A{m,n} must come at the end of the string, there's
	7842	* no point in backing off ... */
	7843	ST.min = ST.count;
	7844	/* ...except that $ and \Z can match before and after
	7845	newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
	7846	We may back off by one in this case. */
	7847	if (UCHARAT(locinput - 1) == '\n' && OP(ST.B) != EOS)
	7848	ST.min--;
	7849	}
	7850	REGCP_SET(ST.cp);
	7851	goto curly_try_B_max;
	7852	}
	7853	NOT_REACHED; /* NOTREACHED */
	7854
	7855	case CURLY_B_min_known_fail:
	7856	/* failed to find B in a non-greedy match where c1,c2 valid */
	7857
	7858	REGCP_UNWIND(ST.cp);
	7859	if (ST.paren) {
	7860	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	7861	}
	7862	/* Couldn't or didn't -- move forward. */
	7863	ST.oldloc = locinput;
	7864	if (utf8_target)
	7865	locinput += UTF8SKIP(locinput);
	7866	else
	7867	locinput++;
	7868	ST.count++;
	7869	curly_try_B_min_known:
	7870	/* find the next place where 'B' could work, then call B */
	7871	{
	7872	int n;
	7873	if (utf8_target) {
	7874	n = (ST.oldloc == locinput) ? 0 : 1;
	7875	if (ST.c1 == ST.c2) {
	7876	/* set n to utf8_distance(oldloc, locinput) */
	7877	while (locinput <= ST.maxpos
	7878	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
	7879	{
	7880	locinput += UTF8SKIP(locinput);
	7881	n++;
	7882	}
	7883	}
	7884	else {
	7885	/* set n to utf8_distance(oldloc, locinput) */
	7886	while (locinput <= ST.maxpos
	7887	&& memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
	7888	&& memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
	7889	{
	7890	locinput += UTF8SKIP(locinput);
	7891	n++;
	7892	}
	7893	}
	7894	}
	7895	else { /* Not utf8_target */
	7896	if (ST.c1 == ST.c2) {
	7897	while (locinput <= ST.maxpos &&
	7898	UCHARAT(locinput) != ST.c1)
	7899	locinput++;
	7900	}
	7901	else {
	7902	while (locinput <= ST.maxpos
	7903	&& UCHARAT(locinput) != ST.c1
	7904	&& UCHARAT(locinput) != ST.c2)
	7905	locinput++;
	7906	}
	7907	n = locinput - ST.oldloc;
	7908	}
	7909	if (locinput > ST.maxpos)
	7910	sayNO;
	7911	if (n) {
	7912	/* In /a{m,n}b/, ST.oldloc is at "a" x m, locinput is
	7913	* at b; check that everything between oldloc and
	7914	* locinput matches */
	7915	char *li = ST.oldloc;
	7916	ST.count += n;
	7917	if (regrepeat(rex, &li, ST.A, reginfo, n, depth) < n)
	7918	sayNO;
	7919	assert(n == REG_INFTY \|\| locinput == li);
	7920	}
	7921	CURLY_SETPAREN(ST.paren, ST.count);
	7922	if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
	7923	goto fake_end;
	7924	PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
	7925	}
	7926	NOT_REACHED; /* NOTREACHED */
	7927
	7928	case CURLY_B_min_fail:
	7929	/* failed to find B in a non-greedy match where c1,c2 invalid */
	7930
	7931	REGCP_UNWIND(ST.cp);
	7932	if (ST.paren) {
	7933	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	7934	}
	7935	/* failed -- move forward one */
	7936	{
	7937	char *li = locinput;
	7938	if (!regrepeat(rex, &li, ST.A, reginfo, 1, depth)) {
	7939	sayNO;
	7940	}
	7941	locinput = li;
	7942	}
	7943	{
	7944	ST.count++;
	7945	if (ST.count <= ST.max \|\| (ST.max == REG_INFTY &&
	7946	ST.count > 0)) /* count overflow ? */
	7947	{
	7948	curly_try_B_min:
	7949	CURLY_SETPAREN(ST.paren, ST.count);
	7950	if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
	7951	goto fake_end;
	7952	PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
	7953	}
	7954	}
	7955	sayNO;
	7956	NOT_REACHED; /* NOTREACHED */
	7957
	7958	curly_try_B_max:
	7959	/* a successful greedy match: now try to match B */
	7960	if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
	7961	goto fake_end;
	7962	{
	7963	bool could_match = locinput < reginfo->strend;
	7964
	7965	/* If it could work, try it. */
	7966	if (ST.c1 != CHRTEST_VOID && could_match) {
	7967	if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
	7968	{
	7969	could_match = memEQ(locinput,
	7970	ST.c1_utf8,
	7971	UTF8SKIP(locinput))
	7972	\|\| memEQ(locinput,
	7973	ST.c2_utf8,
	7974	UTF8SKIP(locinput));
	7975	}
	7976	else {
	7977	could_match = UCHARAT(locinput) == ST.c1
	7978	\|\| UCHARAT(locinput) == ST.c2;
	7979	}
	7980	}
	7981	if (ST.c1 == CHRTEST_VOID \|\| could_match) {
	7982	CURLY_SETPAREN(ST.paren, ST.count);
	7983	PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
	7984	NOT_REACHED; /* NOTREACHED */
	7985	}
	7986	}
	7987	/* FALLTHROUGH */
	7988
	7989	case CURLY_B_max_fail:
	7990	/* failed to find B in a greedy match */
	7991
	7992	REGCP_UNWIND(ST.cp);
	7993	if (ST.paren) {
	7994	UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
	7995	}
	7996	/* back up. */
	7997	if (--ST.count < ST.min)
	7998	sayNO;
	7999	locinput = HOPc(locinput, -1);
	8000	goto curly_try_B_max;
	8001
	8002	#undef ST
	8003
	8004	case END: /* last op of main pattern */
	8005	fake_end:
	8006	if (cur_eval) {
	8007	/* we've just finished A in /(??{A})B/; now continue with B */
	8008	SET_RECURSE_LOCINPUT("FAKE-END[before]", CUR_EVAL.prev_recurse_locinput);
	8009	st->u.eval.prev_rex = rex_sv; /* inner */
	8010
	8011	/* Save all the positions. */
	8012	st->u.eval.cp = regcppush(rex, 0, maxopenparen);
	8013	rex_sv = CUR_EVAL.prev_rex;
	8014	is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
	8015	SET_reg_curpm(rex_sv);
	8016	rex = ReANY(rex_sv);
	8017	rexi = RXi_GET(rex);
	8018
	8019	st->u.eval.prev_curlyx = cur_curlyx;
	8020	cur_curlyx = CUR_EVAL.prev_curlyx;
	8021
	8022	REGCP_SET(st->u.eval.lastcp);
	8023
	8024	/* Restore parens of the outer rex without popping the
	8025	* savestack */
	8026	S_regcp_restore(aTHX_ rex, CUR_EVAL.lastcp,
	8027	&maxopenparen);
	8028
	8029	st->u.eval.prev_eval = cur_eval;
	8030	cur_eval = CUR_EVAL.prev_eval;
	8031	DEBUG_EXECUTE_r(
	8032	Perl_re_exec_indentf( aTHX_ "EVAL trying tail ... (cur_eval=%p)\n",
	8033	depth, cur_eval););
	8034	if ( nochange_depth )
	8035	nochange_depth--;
	8036
	8037	SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
	8038
	8039	PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
	8040	locinput); /* match B */
	8041	}
	8042
	8043	if (locinput < reginfo->till) {
	8044	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
	8045	"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
	8046	PL_colors[4],
	8047	(long)(locinput - startpos),
	8048	(long)(reginfo->till - startpos),
	8049	PL_colors[5]));
	8050
	8051	sayNO_SILENT; /* Cannot match: too short. */
	8052	}
	8053	sayYES; /* Success! */
	8054
	8055	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
	8056	DEBUG_EXECUTE_r(
	8057	Perl_re_exec_indentf( aTHX_ "%ssubpattern success...%s\n",
	8058	depth, PL_colors[4], PL_colors[5]));
	8059	sayYES; /* Success! */
	8060
	8061	#undef ST
	8062	#define ST st->u.ifmatch
	8063
	8064	{
	8065	char *newstart;
	8066
	8067	case SUSPEND: /* (?>A) */
	8068	ST.wanted = 1;
	8069	newstart = locinput;
	8070	goto do_ifmatch;
	8071
	8072	case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
	8073	ST.wanted = 0;
	8074	goto ifmatch_trivial_fail_test;
	8075
	8076	case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
	8077	ST.wanted = 1;
	8078	ifmatch_trivial_fail_test:
	8079	if (scan->flags) {
	8080	char * const s = HOPBACKc(locinput, scan->flags);
	8081	if (!s) {
	8082	/* trivial fail */
	8083	if (logical) {
	8084	logical = 0;
	8085	sw = 1 - cBOOL(ST.wanted);
	8086	}
	8087	else if (ST.wanted)
	8088	sayNO;
	8089	next = scan + ARG(scan);
	8090	if (next == scan)
	8091	next = NULL;
	8092	break;
	8093	}
	8094	newstart = s;
	8095	}
	8096	else
	8097	newstart = locinput;
	8098
	8099	do_ifmatch:
	8100	ST.me = scan;
	8101	ST.logical = logical;
	8102	logical = 0; /* XXX: reset state of logical once it has been saved into ST */
	8103
	8104	/* execute body of (?...A) */
	8105	PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
	8106	NOT_REACHED; /* NOTREACHED */
	8107	}
	8108
	8109	case IFMATCH_A_fail: /* body of (?...A) failed */
	8110	ST.wanted = !ST.wanted;
	8111	/* FALLTHROUGH */
	8112
	8113	case IFMATCH_A: /* body of (?...A) succeeded */
	8114	if (ST.logical) {
	8115	sw = cBOOL(ST.wanted);
	8116	}
	8117	else if (!ST.wanted)
	8118	sayNO;
	8119
	8120	if (OP(ST.me) != SUSPEND) {
	8121	/* restore old position except for (?>...) */
	8122	locinput = st->locinput;
	8123	}
	8124	scan = ST.me + ARG(ST.me);
	8125	if (scan == ST.me)
	8126	scan = NULL;
	8127	continue; /* execute B */
	8128
	8129	#undef ST
	8130
	8131	case LONGJMP: /* alternative with many branches compiles to
	8132	* (BRANCHJ; EXACT ...; LONGJMP ) x N */
	8133	next = scan + ARG(scan);
	8134	if (next == scan)
	8135	next = NULL;
	8136	break;
	8137
	8138	case COMMIT: /* (COMMIT) /
	8139	reginfo->cutpoint = reginfo->strend;
	8140	/* FALLTHROUGH */
	8141
	8142	case PRUNE: /* (PRUNE) /
	8143	if (scan->flags)
	8144	sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	8145	PUSH_STATE_GOTO(COMMIT_next, next, locinput);
	8146	NOT_REACHED; /* NOTREACHED */
	8147
	8148	case COMMIT_next_fail:
	8149	no_final = 1;
	8150	/* FALLTHROUGH */
	8151	sayNO;
	8152	NOT_REACHED; /* NOTREACHED */
	8153
	8154	case OPFAIL: /* (FAIL) /
	8155	if (scan->flags)
	8156	sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	8157	if (logical) {
	8158	/* deal with (?(?!)X\|Y) properly,
	8159	* make sure we trigger the no branch
	8160	* of the trailing IFTHEN structure*/
	8161	sw= 0;
	8162	break;
	8163	} else {
	8164	sayNO;
	8165	}
	8166	NOT_REACHED; /* NOTREACHED */
	8167
	8168	#define ST st->u.mark
	8169	case MARKPOINT: /* (MARK:foo) /
	8170	ST.prev_mark = mark_state;
	8171	ST.mark_name = sv_commit = sv_yes_mark
	8172	= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	8173	mark_state = st;
	8174	ST.mark_loc = locinput;
	8175	PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
	8176	NOT_REACHED; /* NOTREACHED */
	8177
	8178	case MARKPOINT_next:
	8179	mark_state = ST.prev_mark;
	8180	sayYES;
	8181	NOT_REACHED; /* NOTREACHED */
	8182
	8183	case MARKPOINT_next_fail:
	8184	if (popmark && sv_eq(ST.mark_name,popmark))
	8185	{
	8186	if (ST.mark_loc > startpoint)
	8187	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	8188	popmark = NULL; /* we found our mark */
	8189	sv_commit = ST.mark_name;
	8190
	8191	DEBUG_EXECUTE_r({
	8192	Perl_re_exec_indentf( aTHX_ "%ssetting cutpoint to mark:%"SVf"...%s\n",
	8193	depth,
	8194	PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
	8195	});
	8196	}
	8197	mark_state = ST.prev_mark;
	8198	sv_yes_mark = mark_state ?
	8199	mark_state->u.mark.mark_name : NULL;
	8200	sayNO;
	8201	NOT_REACHED; /* NOTREACHED */
	8202
	8203	case SKIP: /* (SKIP) /
	8204	if (!scan->flags) {
	8205	/* (SKIP) : if we fail we cut here/
	8206	ST.mark_name = NULL;
	8207	ST.mark_loc = locinput;
	8208	PUSH_STATE_GOTO(SKIP_next,next, locinput);
	8209	} else {
	8210	/* (SKIP:NAME) : if there is a (MARK:NAME) fail where it was,
	8211	otherwise do nothing. Meaning we need to scan
	8212	*/
	8213	regmatch_state *cur = mark_state;
	8214	SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
	8215
	8216	while (cur) {
	8217	if ( sv_eq( cur->u.mark.mark_name,
	8218	find ) )
	8219	{
	8220	ST.mark_name = find;
	8221	PUSH_STATE_GOTO( SKIP_next, next, locinput);
	8222	}
	8223	cur = cur->u.mark.prev_mark;
	8224	}
	8225	}
	8226	/* Didn't find our (MARK:NAME) so ignore this (SKIP:NAME) */
	8227	break;
	8228
	8229	case SKIP_next_fail:
	8230	if (ST.mark_name) {
	8231	/* (*CUT:NAME) - Set up to search for the name as we
	8232	collapse the stack*/
	8233	popmark = ST.mark_name;
	8234	} else {
	8235	/* (CUT) - No name, we cut here./
	8236	if (ST.mark_loc > startpoint)
	8237	reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
	8238	/* but we set sv_commit to latest mark_name if there
	8239	is one so they can test to see how things lead to this
	8240	cut */
	8241	if (mark_state)
	8242	sv_commit=mark_state->u.mark.mark_name;
	8243	}
	8244	no_final = 1;
	8245	sayNO;
	8246	NOT_REACHED; /* NOTREACHED */
	8247	#undef ST
	8248
	8249	case LNBREAK: /* \R */
	8250	if ((n=is_LNBREAK_safe(locinput, reginfo->strend, utf8_target))) {
	8251	locinput += n;
	8252	} else
	8253	sayNO;
	8254	break;
	8255
	8256	default:
	8257	PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
	8258	PTR2UV(scan), OP(scan));
	8259	Perl_croak(aTHX_ "regexp memory corruption");
	8260
	8261	/* this is a point to jump to in order to increment
	8262	* locinput by one character */
	8263	increment_locinput:
	8264	assert(!NEXTCHR_IS_EOS);
	8265	if (utf8_target) {
	8266	locinput += PL_utf8skip[nextchr];
	8267	/* locinput is allowed to go 1 char off the end, but not 2+ */
	8268	if (locinput > reginfo->strend)
	8269	sayNO;
	8270	}
	8271	else
	8272	locinput++;
	8273	break;
	8274
	8275	} /* end switch */
	8276
	8277	/* switch break jumps here */
	8278	scan = next; /* prepare to execute the next op and ... */
	8279	continue; /* ... jump back to the top, reusing st */
	8280	/* NOTREACHED */
	8281
	8282	push_yes_state:
	8283	/* push a state that backtracks on success */
	8284	st->u.yes.prev_yes_state = yes_state;
	8285	yes_state = st;
	8286	/* FALLTHROUGH */
	8287	push_state:
	8288	/* push a new regex state, then continue at scan */
	8289	{
	8290	regmatch_state *newst;
	8291
	8292	DEBUG_STACK_r({
	8293	regmatch_state *cur = st;
	8294	regmatch_state *curyes = yes_state;
	8295	int curd = depth;
	8296	regmatch_slab *slab = PL_regmatch_slab;
	8297	for (;curd > -1 && (depth-curd < 3);cur--,curd--) {
	8298	if (cur < SLAB_FIRST(slab)) {
	8299	slab = slab->prev;
	8300	cur = SLAB_LAST(slab);
	8301	}
	8302	Perl_re_exec_indentf( aTHX_ "#%-3d %-10s %s\n",
	8303	depth,
	8304	curd, PL_reg_name[cur->resume_state],
	8305	(curyes == cur) ? "yes" : ""
	8306	);
	8307	if (curyes == cur)
	8308	curyes = cur->u.yes.prev_yes_state;
	8309	}
	8310	} else
	8311	DEBUG_STATE_pp("push")
	8312	);
	8313	depth++;
	8314	st->locinput = locinput;
	8315	newst = st+1;
	8316	if (newst > SLAB_LAST(PL_regmatch_slab))
	8317	newst = S_push_slab(aTHX);
	8318	PL_regmatch_state = newst;
	8319
	8320	locinput = pushinput;
	8321	st = newst;
	8322	continue;
	8323	/* NOTREACHED */
	8324	}
	8325	}
	8326	#ifdef SOLARIS_BAD_OPTIMIZER
	8327	# undef PL_charclass
	8328	#endif
	8329
	8330	/*
	8331	* We get here only if there's trouble -- normally "case END" is
	8332	* the terminating point.
	8333	*/
	8334	Perl_croak(aTHX_ "corrupted regexp pointers");
	8335	NOT_REACHED; /* NOTREACHED */
	8336
	8337	yes:
	8338	if (yes_state) {
	8339	/* we have successfully completed a subexpression, but we must now
	8340	* pop to the state marked by yes_state and continue from there */
	8341	assert(st != yes_state);
	8342	#ifdef DEBUGGING
	8343	while (st != yes_state) {
	8344	st--;
	8345	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	8346	PL_regmatch_slab = PL_regmatch_slab->prev;
	8347	st = SLAB_LAST(PL_regmatch_slab);
	8348	}
	8349	DEBUG_STATE_r({
	8350	if (no_final) {
	8351	DEBUG_STATE_pp("pop (no final)");
	8352	} else {
	8353	DEBUG_STATE_pp("pop (yes)");
	8354	}
	8355	});
	8356	depth--;
	8357	}
	8358	#else
	8359	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
	8360	\|\| yes_state > SLAB_LAST(PL_regmatch_slab))
	8361	{
	8362	/* not in this slab, pop slab */
	8363	depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
	8364	PL_regmatch_slab = PL_regmatch_slab->prev;
	8365	st = SLAB_LAST(PL_regmatch_slab);
	8366	}
	8367	depth -= (st - yes_state);
	8368	#endif
	8369	st = yes_state;
	8370	yes_state = st->u.yes.prev_yes_state;
	8371	PL_regmatch_state = st;
	8372
	8373	if (no_final)
	8374	locinput= st->locinput;
	8375	state_num = st->resume_state + no_final;
	8376	goto reenter_switch;
	8377	}
	8378
	8379	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "%sMatch successful!%s\n",
	8380	PL_colors[4], PL_colors[5]));
	8381
	8382	if (reginfo->info_aux_eval) {
	8383	/* each successfully executed (?{...}) block does the equivalent of
	8384	* local $^R = do {...}
	8385	* When popping the save stack, all these locals would be undone;
	8386	* bypass this by setting the outermost saved $^R to the latest
	8387	* value */
	8388	/* I dont know if this is needed or works properly now.
	8389	* see code related to PL_replgv elsewhere in this file.
	8390	* Yves
	8391	*/
	8392	if (oreplsv != GvSV(PL_replgv))
	8393	sv_setsv(oreplsv, GvSV(PL_replgv));
	8394	}
	8395	result = 1;
	8396	goto final_exit;
	8397
	8398	no:
	8399	DEBUG_EXECUTE_r(
	8400	Perl_re_exec_indentf( aTHX_ "%sfailed...%s\n",
	8401	depth,
	8402	PL_colors[4], PL_colors[5])
	8403	);
	8404
	8405	no_silent:
	8406	if (no_final) {
	8407	if (yes_state) {
	8408	goto yes;
	8409	} else {
	8410	goto final_exit;
	8411	}
	8412	}
	8413	if (depth) {
	8414	/* there's a previous state to backtrack to */
	8415	st--;
	8416	if (st < SLAB_FIRST(PL_regmatch_slab)) {
	8417	PL_regmatch_slab = PL_regmatch_slab->prev;
	8418	st = SLAB_LAST(PL_regmatch_slab);
	8419	}
	8420	PL_regmatch_state = st;
	8421	locinput= st->locinput;
	8422
	8423	DEBUG_STATE_pp("pop");
	8424	depth--;
	8425	if (yes_state == st)
	8426	yes_state = st->u.yes.prev_yes_state;
	8427
	8428	state_num = st->resume_state + 1; /* failure = success + 1 */
	8429	PERL_ASYNC_CHECK();
	8430	goto reenter_switch;
	8431	}
	8432	result = 0;
	8433
	8434	final_exit:
	8435	if (rex->intflags & PREGf_VERBARG_SEEN) {
	8436	SV *sv_err = get_sv("REGERROR", 1);
	8437	SV *sv_mrk = get_sv("REGMARK", 1);
	8438	if (result) {
	8439	sv_commit = &PL_sv_no;
	8440	if (!sv_yes_mark)
	8441	sv_yes_mark = &PL_sv_yes;
	8442	} else {
	8443	if (!sv_commit)
	8444	sv_commit = &PL_sv_yes;
	8445	sv_yes_mark = &PL_sv_no;
	8446	}
	8447	assert(sv_err);
	8448	assert(sv_mrk);
	8449	sv_setsv(sv_err, sv_commit);
	8450	sv_setsv(sv_mrk, sv_yes_mark);
	8451	}
	8452
	8453
	8454	if (last_pushed_cv) {
	8455	dSP;
	8456	POP_MULTICALL;
	8457	PERL_UNUSED_VAR(SP);
	8458	}
	8459
	8460	assert(!result \|\| locinput - reginfo->strbeg >= 0);
	8461	return result ? locinput - reginfo->strbeg : -1;
	8462	}
	8463
	8464	/*
	8465	- regrepeat - repeatedly match something simple, report how many
	8466	*
	8467	* What 'simple' means is a node which can be the operand of a quantifier like
	8468	* '+', or {1,3}
	8469	*
	8470	* startposp - pointer a pointer to the start position. This is updated
	8471	* to point to the byte following the highest successful
	8472	* match.
	8473	* p - the regnode to be repeatedly matched against.
	8474	* reginfo - struct holding match state, such as strend
	8475	* max - maximum number of things to match.
	8476	* depth - (for debugging) backtracking depth.
	8477	*/
	8478	STATIC I32
	8479	S_regrepeat(pTHX_ regexp prog, char startposp, const regnode p,
	8480	regmatch_info *const reginfo, I32 max, int depth)
	8481	{
	8482	char scan; / Pointer to current position in target string */
	8483	I32 c;
	8484	char loceol = reginfo->strend; / local version */
	8485	I32 hardcount = 0; /* How many matches so far */
	8486	bool utf8_target = reginfo->is_utf8_target;
	8487	unsigned int to_complement = 0; /* Invert the result? */
	8488	UV utf8_flags;
	8489	_char_class_number classnum;
	8490	#ifndef DEBUGGING
	8491	PERL_UNUSED_ARG(depth);
	8492	#endif
	8493
	8494	PERL_ARGS_ASSERT_REGREPEAT;
	8495
	8496	scan = *startposp;
	8497	if (max == REG_INFTY)
	8498	max = I32_MAX;
	8499	else if (! utf8_target && loceol - scan > max)
	8500	loceol = scan + max;
	8501
	8502	/* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
	8503	* to the maximum of how far we should go in it (leaving it set to the real
	8504	* end, if the maximum permissible would take us beyond that). This allows
	8505	* us to make the loop exit condition that we haven't gone past <loceol> to
	8506	* also mean that we haven't exceeded the max permissible count, saving a
	8507	* test each time through the loop. But it assumes that the OP matches a
	8508	* single byte, which is true for most of the OPs below when applied to a
	8509	* non-UTF-8 target. Those relatively few OPs that don't have this
	8510	* characteristic will have to compensate.
	8511	*
	8512	* There is no adjustment for UTF-8 targets, as the number of bytes per
	8513	* character varies. OPs will have to test both that the count is less
	8514	* than the max permissible (using <hardcount> to keep track), and that we
	8515	* are still within the bounds of the string (using <loceol>. A few OPs
	8516	* match a single byte no matter what the encoding. They can omit the max
	8517	* test if, for the UTF-8 case, they do the adjustment that was skipped
	8518	* above.
	8519	*
	8520	* Thus, the code above sets things up for the common case; and exceptional
	8521	* cases need extra work; the common case is to make sure <scan> doesn't
	8522	* go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
	8523	* count doesn't exceed the maximum permissible */
	8524
	8525	switch (OP(p)) {
	8526	case REG_ANY:
	8527	if (utf8_target) {
	8528	while (scan < loceol && hardcount < max && *scan != '\n') {
	8529	scan += UTF8SKIP(scan);
	8530	hardcount++;
	8531	}
	8532	} else {
	8533	while (scan < loceol && *scan != '\n')
	8534	scan++;
	8535	}
	8536	break;
	8537	case SANY:
	8538	if (utf8_target) {
	8539	while (scan < loceol && hardcount < max) {
	8540	scan += UTF8SKIP(scan);
	8541	hardcount++;
	8542	}
	8543	}
	8544	else
	8545	scan = loceol;
	8546	break;
	8547	case EXACTL:
	8548	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	8549	if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
	8550	_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(scan, loceol);
	8551	}
	8552	/* FALLTHROUGH */
	8553	case EXACT:
	8554	assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
	8555
	8556	c = (U8)*STRING(p);
	8557
	8558	/* Can use a simple loop if the pattern char to match on is invariant
	8559	* under UTF-8, or both target and pattern aren't UTF-8. Note that we
	8560	* can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
	8561	* true iff it doesn't matter if the argument is in UTF-8 or not */
	8562	if (UTF8_IS_INVARIANT(c) \|\| (! utf8_target && ! reginfo->is_utf8_pat)) {
	8563	if (utf8_target && loceol - scan > max) {
	8564	/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
	8565	* since here, to match at all, 1 char == 1 byte */
	8566	loceol = scan + max;
	8567	}
	8568	while (scan < loceol && UCHARAT(scan) == c) {
	8569	scan++;
	8570	}
	8571	}
	8572	else if (reginfo->is_utf8_pat) {
	8573	if (utf8_target) {
	8574	STRLEN scan_char_len;
	8575
	8576	/* When both target and pattern are UTF-8, we have to do
	8577	* string EQ */
	8578	while (hardcount < max
	8579	&& scan < loceol
	8580	&& (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
	8581	&& memEQ(scan, STRING(p), scan_char_len))
	8582	{
	8583	scan += scan_char_len;
	8584	hardcount++;
	8585	}
	8586	}
	8587	else if (! UTF8_IS_ABOVE_LATIN1(c)) {
	8588
	8589	/* Target isn't utf8; convert the character in the UTF-8
	8590	* pattern to non-UTF8, and do a simple loop */
	8591	c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
	8592	while (scan < loceol && UCHARAT(scan) == c) {
	8593	scan++;
	8594	}
	8595	} /* else pattern char is above Latin1, can't possibly match the
	8596	non-UTF-8 target */
	8597	}
	8598	else {
	8599
	8600	/* Here, the string must be utf8; pattern isn't, and <c> is
	8601	* different in utf8 than not, so can't compare them directly.
	8602	* Outside the loop, find the two utf8 bytes that represent c, and
	8603	* then look for those in sequence in the utf8 string */
	8604	U8 high = UTF8_TWO_BYTE_HI(c);
	8605	U8 low = UTF8_TWO_BYTE_LO(c);
	8606
	8607	while (hardcount < max
	8608	&& scan + 1 < loceol
	8609	&& UCHARAT(scan) == high
	8610	&& UCHARAT(scan + 1) == low)
	8611	{
	8612	scan += 2;
	8613	hardcount++;
	8614	}
	8615	}
	8616	break;
	8617
	8618	case EXACTFA_NO_TRIE: /* This node only generated for non-utf8 patterns */
	8619	assert(! reginfo->is_utf8_pat);
	8620	/* FALLTHROUGH */
	8621	case EXACTFA:
	8622	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
	8623	goto do_exactf;
	8624
	8625	case EXACTFL:
	8626	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	8627	utf8_flags = FOLDEQ_LOCALE;
	8628	goto do_exactf;
	8629
	8630	case EXACTF: /* This node only generated for non-utf8 patterns */
	8631	assert(! reginfo->is_utf8_pat);
	8632	utf8_flags = 0;
	8633	goto do_exactf;
	8634
	8635	case EXACTFLU8:
	8636	if (! utf8_target) {
	8637	break;
	8638	}
	8639	utf8_flags = FOLDEQ_LOCALE \| FOLDEQ_S2_ALREADY_FOLDED
	8640	\| FOLDEQ_S2_FOLDS_SANE;
	8641	goto do_exactf;
	8642
	8643	case EXACTFU_SS:
	8644	case EXACTFU:
	8645	utf8_flags = reginfo->is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
	8646
	8647	do_exactf: {
	8648	int c1, c2;
	8649	U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
	8650
	8651	assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
	8652
	8653	if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
	8654	reginfo))
	8655	{
	8656	if (c1 == CHRTEST_VOID) {
	8657	/* Use full Unicode fold matching */
	8658	char *tmpeol = reginfo->strend;
	8659	STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
	8660	while (hardcount < max
	8661	&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
	8662	STRING(p), NULL, pat_len,
	8663	reginfo->is_utf8_pat, utf8_flags))
	8664	{
	8665	scan = tmpeol;
	8666	tmpeol = reginfo->strend;
	8667	hardcount++;
	8668	}
	8669	}
	8670	else if (utf8_target) {
	8671	if (c1 == c2) {
	8672	while (scan < loceol
	8673	&& hardcount < max
	8674	&& memEQ(scan, c1_utf8, UTF8SKIP(scan)))
	8675	{
	8676	scan += UTF8SKIP(scan);
	8677	hardcount++;
	8678	}
	8679	}
	8680	else {
	8681	while (scan < loceol
	8682	&& hardcount < max
	8683	&& (memEQ(scan, c1_utf8, UTF8SKIP(scan))
	8684	\|\| memEQ(scan, c2_utf8, UTF8SKIP(scan))))
	8685	{
	8686	scan += UTF8SKIP(scan);
	8687	hardcount++;
	8688	}
	8689	}
	8690	}
	8691	else if (c1 == c2) {
	8692	while (scan < loceol && UCHARAT(scan) == c1) {
	8693	scan++;
	8694	}
	8695	}
	8696	else {
	8697	while (scan < loceol &&
	8698	(UCHARAT(scan) == c1 \|\| UCHARAT(scan) == c2))
	8699	{
	8700	scan++;
	8701	}
	8702	}
	8703	}
	8704	break;
	8705	}
	8706	case ANYOFL:
	8707	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	8708
	8709	if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(p)) && ! IN_UTF8_CTYPE_LOCALE) {
	8710	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
	8711	}
	8712	/* FALLTHROUGH */
	8713	case ANYOFD:
	8714	case ANYOF:
	8715	if (utf8_target) {
	8716	while (hardcount < max
	8717	&& scan < loceol
	8718	&& reginclass(prog, p, (U8)scan, (U8) loceol, utf8_target))
	8719	{
	8720	scan += UTF8SKIP(scan);
	8721	hardcount++;
	8722	}
	8723	} else {
	8724	while (scan < loceol && REGINCLASS(prog, p, (U8*)scan, 0))
	8725	scan++;
	8726	}
	8727	break;
	8728
	8729	/* The argument (FLAGS) to all the POSIX node types is the class number */
	8730
	8731	case NPOSIXL:
	8732	to_complement = 1;
	8733	/* FALLTHROUGH */
	8734
	8735	case POSIXL:
	8736	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	8737	if (! utf8_target) {
	8738	while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
	8739	*scan)))
	8740	{
	8741	scan++;
	8742	}
	8743	} else {
	8744	while (hardcount < max && scan < loceol
	8745	&& to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
	8746	(U8 *) scan)))
	8747	{
	8748	scan += UTF8SKIP(scan);
	8749	hardcount++;
	8750	}
	8751	}
	8752	break;
	8753
	8754	case POSIXD:
	8755	if (utf8_target) {
	8756	goto utf8_posix;
	8757	}
	8758	/* FALLTHROUGH */
	8759
	8760	case POSIXA:
	8761	if (utf8_target && loceol - scan > max) {
	8762
	8763	/* We didn't adjust <loceol> at the beginning of this routine
	8764	* because is UTF-8, but it is actually ok to do so, since here, to
	8765	* match, 1 char == 1 byte. */
	8766	loceol = scan + max;
	8767	}
	8768	while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
	8769	scan++;
	8770	}
	8771	break;
	8772
	8773	case NPOSIXD:
	8774	if (utf8_target) {
	8775	to_complement = 1;
	8776	goto utf8_posix;
	8777	}
	8778	/* FALLTHROUGH */
	8779
	8780	case NPOSIXA:
	8781	if (! utf8_target) {
	8782	while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
	8783	scan++;
	8784	}
	8785	}
	8786	else {
	8787
	8788	/* The complement of something that matches only ASCII matches all
	8789	* non-ASCII, plus everything in ASCII that isn't in the class. */
	8790	while (hardcount < max && scan < loceol
	8791	&& (! isASCII_utf8(scan)
	8792	\|\| ! _generic_isCC_A((U8) *scan, FLAGS(p))))
	8793	{
	8794	scan += UTF8SKIP(scan);
	8795	hardcount++;
	8796	}
	8797	}
	8798	break;
	8799
	8800	case NPOSIXU:
	8801	to_complement = 1;
	8802	/* FALLTHROUGH */
	8803
	8804	case POSIXU:
	8805	if (! utf8_target) {
	8806	while (scan < loceol && to_complement
	8807	^ cBOOL(_generic_isCC((U8) *scan, FLAGS(p))))
	8808	{
	8809	scan++;
	8810	}
	8811	}
	8812	else {
	8813	utf8_posix:
	8814	classnum = (_char_class_number) FLAGS(p);
	8815	if (classnum < _FIRST_NON_SWASH_CC) {
	8816
	8817	/* Here, a swash is needed for above-Latin1 code points.
	8818	* Process as many Latin1 code points using the built-in rules.
	8819	* Go to another loop to finish processing upon encountering
	8820	* the first Latin1 code point. We could do that in this loop
	8821	* as well, but the other way saves having to test if the swash
	8822	* has been loaded every time through the loop: extra space to
	8823	* save a test. */
	8824	while (hardcount < max && scan < loceol) {
	8825	if (UTF8_IS_INVARIANT(*scan)) {
	8826	if (! (to_complement ^ cBOOL(_generic_isCC((U8) *scan,
	8827	classnum))))
	8828	{
	8829	break;
	8830	}
	8831	scan++;
	8832	}
	8833	else if (UTF8_IS_DOWNGRADEABLE_START(*scan)) {
	8834	if (! (to_complement
	8835	^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*scan,
	8836	*(scan + 1)),
	8837	classnum))))
	8838	{
	8839	break;
	8840	}
	8841	scan += 2;
	8842	}
	8843	else {
	8844	goto found_above_latin1;
	8845	}
	8846
	8847	hardcount++;
	8848	}
	8849	}
	8850	else {
	8851	/* For these character classes, the knowledge of how to handle
	8852	* every code point is compiled in to Perl via a macro. This
	8853	* code is written for making the loops as tight as possible.
	8854	* It could be refactored to save space instead */
	8855	switch (classnum) {
	8856	case _CC_ENUM_SPACE:
	8857	while (hardcount < max
	8858	&& scan < loceol
	8859	&& (to_complement ^ cBOOL(isSPACE_utf8(scan))))
	8860	{
	8861	scan += UTF8SKIP(scan);
	8862	hardcount++;
	8863	}
	8864	break;
	8865	case _CC_ENUM_BLANK:
	8866	while (hardcount < max
	8867	&& scan < loceol
	8868	&& (to_complement ^ cBOOL(isBLANK_utf8(scan))))
	8869	{
	8870	scan += UTF8SKIP(scan);
	8871	hardcount++;
	8872	}
	8873	break;
	8874	case _CC_ENUM_XDIGIT:
	8875	while (hardcount < max
	8876	&& scan < loceol
	8877	&& (to_complement ^ cBOOL(isXDIGIT_utf8(scan))))
	8878	{
	8879	scan += UTF8SKIP(scan);
	8880	hardcount++;
	8881	}
	8882	break;
	8883	case _CC_ENUM_VERTSPACE:
	8884	while (hardcount < max
	8885	&& scan < loceol
	8886	&& (to_complement ^ cBOOL(isVERTWS_utf8(scan))))
	8887	{
	8888	scan += UTF8SKIP(scan);
	8889	hardcount++;
	8890	}
	8891	break;
	8892	case _CC_ENUM_CNTRL:
	8893	while (hardcount < max
	8894	&& scan < loceol
	8895	&& (to_complement ^ cBOOL(isCNTRL_utf8(scan))))
	8896	{
	8897	scan += UTF8SKIP(scan);
	8898	hardcount++;
	8899	}
	8900	break;
	8901	default:
	8902	Perl_croak(aTHX_ "panic: regrepeat() node %d='%s' has an unexpected character class '%d'", OP(p), PL_reg_name[OP(p)], classnum);
	8903	}
	8904	}
	8905	}
	8906	break;
	8907
	8908	found_above_latin1: /* Continuation of POSIXU and NPOSIXU */
	8909
	8910	/* Load the swash if not already present */
	8911	if (! PL_utf8_swash_ptrs[classnum]) {
	8912	U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
	8913	PL_utf8_swash_ptrs[classnum] = _core_swash_init(
	8914	"utf8",
	8915	"",
	8916	&PL_sv_undef, 1, 0,
	8917	PL_XPosix_ptrs[classnum], &flags);
	8918	}
	8919
	8920	while (hardcount < max && scan < loceol
	8921	&& to_complement ^ cBOOL(_generic_utf8(
	8922	classnum,
	8923	scan,
	8924	swash_fetch(PL_utf8_swash_ptrs[classnum],
	8925	(U8 *) scan,
	8926	TRUE))))
	8927	{
	8928	scan += UTF8SKIP(scan);
	8929	hardcount++;
	8930	}
	8931	break;
	8932
	8933	case LNBREAK:
	8934	if (utf8_target) {
	8935	while (hardcount < max && scan < loceol &&
	8936	(c=is_LNBREAK_utf8_safe(scan, loceol))) {
	8937	scan += c;
	8938	hardcount++;
	8939	}
	8940	} else {
	8941	/* LNBREAK can match one or two latin chars, which is ok, but we
	8942	* have to use hardcount in this situation, and throw away the
	8943	* adjustment to <loceol> done before the switch statement */
	8944	loceol = reginfo->strend;
	8945	while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
	8946	scan+=c;
	8947	hardcount++;
	8948	}
	8949	}
	8950	break;
	8951
	8952	case BOUNDL:
	8953	case NBOUNDL:
	8954	_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
	8955	/* FALLTHROUGH */
	8956	case BOUND:
	8957	case BOUNDA:
	8958	case BOUNDU:
	8959	case EOS:
	8960	case GPOS:
	8961	case KEEPS:
	8962	case NBOUND:
	8963	case NBOUNDA:
	8964	case NBOUNDU:
	8965	case OPFAIL:
	8966	case SBOL:
	8967	case SEOL:
	8968	/* These are all 0 width, so match right here or not at all. */
	8969	break;
	8970
	8971	default:
	8972	Perl_croak(aTHX_ "panic: regrepeat() called with unrecognized node type %d='%s'", OP(p), PL_reg_name[OP(p)]);
	8973	NOT_REACHED; /* NOTREACHED */
	8974
	8975	}
	8976
	8977	if (hardcount)
	8978	c = hardcount;
	8979	else
	8980	c = scan - *startposp;
	8981	*startposp = scan;
	8982
	8983	DEBUG_r({
	8984	GET_RE_DEBUG_FLAGS_DECL;
	8985	DEBUG_EXECUTE_r({
	8986	SV * const prop = sv_newmortal();
	8987	regprop(prog, prop, p, reginfo, NULL);
	8988	Perl_re_exec_indentf( aTHX_ "%s can match %"IVdf" times out of %"IVdf"...\n",
	8989	depth, SvPVX_const(prop),(IV)c,(IV)max);
	8990	});
	8991	});
	8992
	8993	return(c);
	8994	}
	8995
	8996
	8997	#if !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION)
	8998	/*
	8999	- regclass_swash - prepare the utf8 swash. Wraps the shared core version to
	9000	create a copy so that changes the caller makes won't change the shared one.
	9001	If <altsvp> is non-null, will return NULL in it, for back-compat.
	9002	*/
	9003	SV *
	9004	Perl_regclass_swash(pTHX_ const regexp prog, const regnode node, bool doinit, SV listsvp, SV altsvp)
	9005	{
	9006	PERL_ARGS_ASSERT_REGCLASS_SWASH;
	9007
	9008	if (altsvp) {
	9009	*altsvp = NULL;
	9010	}
	9011
	9012	return newSVsv(_get_regclass_nonbitmap_data(prog, node, doinit, listsvp, NULL, NULL));
	9013	}
	9014
	9015	#endif /* !defined(PERL_IN_XSUB_RE) \|\| defined(PLUGGABLE_RE_EXTENSION) */
	9016
	9017	/*
	9018	- reginclass - determine if a character falls into a character class
	9019
	9020	n is the ANYOF-type regnode
	9021	p is the target string
	9022	p_end points to one byte beyond the end of the target string
	9023	utf8_target tells whether p is in UTF-8.
	9024
	9025	Returns true if matched; false otherwise.
	9026
	9027	Note that this can be a synthetic start class, a combination of various
	9028	nodes, so things you think might be mutually exclusive, such as locale,
	9029	aren't. It can match both locale and non-locale
	9030
	9031	*/
	9032
	9033	STATIC bool
	9034	S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
	9035	{
	9036	dVAR;
	9037	const char flags = ANYOF_FLAGS(n);
	9038	bool match = FALSE;
	9039	UV c = *p;
	9040
	9041	PERL_ARGS_ASSERT_REGINCLASS;
	9042
	9043	/* If c is not already the code point, get it. Note that
	9044	* UTF8_IS_INVARIANT() works even if not in UTF-8 */
	9045	if (! UTF8_IS_INVARIANT(c) && utf8_target) {
	9046	STRLEN c_len = 0;
	9047	c = utf8n_to_uvchr(p, p_end - p, &c_len,
	9048	(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
	9049	\| UTF8_ALLOW_FFFF \| UTF8_CHECK_ONLY);
	9050	/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
	9051	* UTF8_ALLOW_FFFF */
	9052	if (c_len == (STRLEN)-1)
	9053	Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
	9054	if (c > 255 && OP(n) == ANYOFL && ! ANYOFL_UTF8_LOCALE_REQD(flags)) {
	9055	_CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c);
	9056	}
	9057	}
	9058
	9059	/* If this character is potentially in the bitmap, check it */
	9060	if (c < NUM_ANYOF_CODE_POINTS) {
	9061	if (ANYOF_BITMAP_TEST(n, c))
	9062	match = TRUE;
	9063	else if ((flags
	9064	& ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
	9065	&& OP(n) == ANYOFD
	9066	&& ! utf8_target
	9067	&& ! isASCII(c))
	9068	{
	9069	match = TRUE;
	9070	}
	9071	else if (flags & ANYOF_LOCALE_FLAGS) {
	9072	if ((flags & ANYOFL_FOLD)
	9073	&& c < 256
	9074	&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
	9075	{
	9076	match = TRUE;
	9077	}
	9078	else if (ANYOF_POSIXL_TEST_ANY_SET(n)
	9079	&& c < 256
	9080	) {
	9081
	9082	/* The data structure is arranged so bits 0, 2, 4, ... are set
	9083	* if the class includes the Posix character class given by
	9084	* bit/2; and 1, 3, 5, ... are set if the class includes the
	9085	* complemented Posix class given by int(bit/2). So we loop
	9086	* through the bits, each time changing whether we complement
	9087	* the result or not. Suppose for the sake of illustration
	9088	* that bits 0-3 mean respectively, \w, \W, \s, \S. If bit 0
	9089	* is set, it means there is a match for this ANYOF node if the
	9090	* character is in the class given by the expression (0 / 2 = 0
	9091	* = \w). If it is in that class, isFOO_lc() will return 1,
	9092	* and since 'to_complement' is 0, the result will stay TRUE,
	9093	* and we exit the loop. Suppose instead that bit 0 is 0, but
	9094	* bit 1 is 1. That means there is a match if the character
	9095	* matches \W. We won't bother to call isFOO_lc() on bit 0,
	9096	* but will on bit 1. On the second iteration 'to_complement'
	9097	* will be 1, so the exclusive or will reverse things, so we
	9098	* are testing for \W. On the third iteration, 'to_complement'
	9099	* will be 0, and we would be testing for \s; the fourth
	9100	* iteration would test for \S, etc.
	9101	*
	9102	* Note that this code assumes that all the classes are closed
	9103	* under folding. For example, if a character matches \w, then
	9104	* its fold does too; and vice versa. This should be true for
	9105	* any well-behaved locale for all the currently defined Posix
	9106	* classes, except for :lower: and :upper:, which are handled
	9107	* by the pseudo-class :cased: which matches if either of the
	9108	* other two does. To get rid of this assumption, an outer
	9109	* loop could be used below to iterate over both the source
	9110	* character, and its fold (if different) */
	9111
	9112	int count = 0;
	9113	int to_complement = 0;
	9114
	9115	while (count < ANYOF_MAX) {
	9116	if (ANYOF_POSIXL_TEST(n, count)
	9117	&& to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))
	9118	{
	9119	match = TRUE;
	9120	break;
	9121	}
	9122	count++;
	9123	to_complement ^= 1;
	9124	}
	9125	}
	9126	}
	9127	}
	9128
	9129
	9130	/* If the bitmap didn't (or couldn't) match, and something outside the
	9131	* bitmap could match, try that. */
	9132	if (!match) {
	9133	if (c >= NUM_ANYOF_CODE_POINTS
	9134	&& (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP))
	9135	{
	9136	match = TRUE; /* Everything above the bitmap matches */
	9137	}
	9138	/* Here doesn't match everything above the bitmap. If there is
	9139	* some information available beyond the bitmap, we may find a
	9140	* match in it. If so, this is most likely because the code point
	9141	* is outside the bitmap range. But rarely, it could be because of
	9142	* some other reason. If so, various flags are set to indicate
	9143	* this possibility. On ANYOFD nodes, there may be matches that
	9144	* happen only when the target string is UTF-8; or for other node
	9145	* types, because runtime lookup is needed, regardless of the
	9146	* UTF-8ness of the target string. Finally, under /il, there may
	9147	* be some matches only possible if the locale is a UTF-8 one. */
	9148	else if ( ARG(n) != ANYOF_ONLY_HAS_BITMAP
	9149	&& ( c >= NUM_ANYOF_CODE_POINTS
	9150	\|\| ( (flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)
	9151	&& ( UNLIKELY(OP(n) != ANYOFD)
	9152	\|\| (utf8_target && ! isASCII_uni(c)
	9153	# if NUM_ANYOF_CODE_POINTS > 256
	9154	&& c < 256
	9155	# endif
	9156	)))
	9157	\|\| ( ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE(flags)
	9158	&& IN_UTF8_CTYPE_LOCALE)))
	9159	{
	9160	SV* only_utf8_locale = NULL;
	9161	SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0,
	9162	&only_utf8_locale, NULL);
	9163	if (sw) {
	9164	U8 utf8_buffer[2];
	9165	U8 * utf8_p;
	9166	if (utf8_target) {
	9167	utf8_p = (U8 *) p;
	9168	} else { /* Convert to utf8 */
	9169	utf8_p = utf8_buffer;
	9170	append_utf8_from_native_byte(*p, &utf8_p);
	9171	utf8_p = utf8_buffer;
	9172	}
	9173
	9174	if (swash_fetch(sw, utf8_p, TRUE)) {
	9175	match = TRUE;
	9176	}
	9177	}
	9178	if (! match && only_utf8_locale && IN_UTF8_CTYPE_LOCALE) {
	9179	match = _invlist_contains_cp(only_utf8_locale, c);
	9180	}
	9181	}
	9182
	9183	if (UNICODE_IS_SUPER(c)
	9184	&& (flags
	9185	& ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
	9186	&& OP(n) != ANYOFD
	9187	&& ckWARN_d(WARN_NON_UNICODE))
	9188	{
	9189	Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
	9190	"Matched non-Unicode code point 0x%04"UVXf" against Unicode property; may not be portable", c);
	9191	}
	9192	}
	9193
	9194	#if ANYOF_INVERT != 1
	9195	/* Depending on compiler optimization cBOOL takes time, so if don't have to
	9196	* use it, don't */
	9197	# error ANYOF_INVERT needs to be set to 1, or guarded with cBOOL below,
	9198	#endif
	9199
	9200	/* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
	9201	return (flags & ANYOF_INVERT) ^ match;
	9202	}
	9203
	9204	STATIC U8 *
	9205	S_reghop3(U8 s, SSize_t off, const U8 lim)
	9206	{
	9207	/* return the position 'off' UTF-8 characters away from 's', forward if
	9208	* 'off' >= 0, backwards if negative. But don't go outside of position
	9209	* 'lim', which better be < s if off < 0 */
	9210
	9211	PERL_ARGS_ASSERT_REGHOP3;
	9212
	9213	if (off >= 0) {
	9214	while (off-- && s < lim) {
	9215	/* XXX could check well-formedness here */
	9216	s += UTF8SKIP(s);
	9217	}
	9218	}
	9219	else {
	9220	while (off++ && s > lim) {
	9221	s--;
	9222	if (UTF8_IS_CONTINUED(*s)) {
	9223	while (s > lim && UTF8_IS_CONTINUATION(*s))
	9224	s--;
	9225	if (! UTF8_IS_START(*s)) {
	9226	Perl_croak_nocontext("Malformed UTF-8 character (fatal)");
	9227	}
	9228	}
	9229	/* XXX could check well-formedness here */
	9230	}
	9231	}
	9232	return s;
	9233	}
	9234
	9235	STATIC U8 *
	9236	S_reghop4(U8 s, SSize_t off, const U8 llim, const U8* rlim)
	9237	{
	9238	PERL_ARGS_ASSERT_REGHOP4;
	9239
	9240	if (off >= 0) {
	9241	while (off-- && s < rlim) {
	9242	/* XXX could check well-formedness here */
	9243	s += UTF8SKIP(s);
	9244	}
	9245	}
	9246	else {
	9247	while (off++ && s > llim) {
	9248	s--;
	9249	if (UTF8_IS_CONTINUED(*s)) {
	9250	while (s > llim && UTF8_IS_CONTINUATION(*s))
	9251	s--;
	9252	if (! UTF8_IS_START(*s)) {
	9253	Perl_croak_nocontext("Malformed UTF-8 character (fatal)");
	9254	}
	9255	}
	9256	/* XXX could check well-formedness here */
	9257	}
	9258	}
	9259	return s;
	9260	}
	9261
	9262	/* like reghop3, but returns NULL on overrun, rather than returning last
	9263	* char pos */
	9264
	9265	STATIC U8 *
	9266	S_reghopmaybe3(U8* s, SSize_t off, const U8* lim)
	9267	{
	9268	PERL_ARGS_ASSERT_REGHOPMAYBE3;
	9269
	9270	if (off >= 0) {
	9271	while (off-- && s < lim) {
	9272	/* XXX could check well-formedness here */
	9273	s += UTF8SKIP(s);
	9274	}
	9275	if (off >= 0)
	9276	return NULL;
	9277	}
	9278	else {
	9279	while (off++ && s > lim) {
	9280	s--;
	9281	if (UTF8_IS_CONTINUED(*s)) {
	9282	while (s > lim && UTF8_IS_CONTINUATION(*s))
	9283	s--;
	9284	if (! UTF8_IS_START(*s)) {
	9285	Perl_croak_nocontext("Malformed UTF-8 character (fatal)");
	9286	}
	9287	}
	9288	/* XXX could check well-formedness here */
	9289	}
	9290	if (off <= 0)
	9291	return NULL;
	9292	}
	9293	return s;
	9294	}
	9295
	9296
	9297	/* when executing a regex that may have (?{}), extra stuff needs setting
	9298	up that will be visible to the called code, even before the current
	9299	match has finished. In particular:
	9300
	9301	* $_ is localised to the SV currently being matched;
	9302	* pos($_) is created if necessary, ready to be updated on each call-out
	9303	to code;
	9304	* a fake PMOP is created that can be set to PL_curpm (normally PL_curpm
	9305	isn't set until the current pattern is successfully finished), so that
	9306	$1 etc of the match-so-far can be seen;
	9307	* save the old values of subbeg etc of the current regex, and set then
	9308	to the current string (again, this is normally only done at the end
	9309	of execution)
	9310	*/
	9311
	9312	static void
	9313	S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
	9314	{
	9315	MAGIC *mg;
	9316	regexp *const rex = ReANY(reginfo->prog);
	9317	regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
	9318
	9319	eval_state->rex = rex;
	9320
	9321	if (reginfo->sv) {
	9322	/* Make $_ available to executed code. */
	9323	if (reginfo->sv != DEFSV) {
	9324	SAVE_DEFSV;
	9325	DEFSV_set(reginfo->sv);
	9326	}
	9327
	9328	if (!(mg = mg_find_mglob(reginfo->sv))) {
	9329	/* prepare for quick setting of pos */
	9330	mg = sv_magicext_mglob(reginfo->sv);
	9331	mg->mg_len = -1;
	9332	}
	9333	eval_state->pos_magic = mg;
	9334	eval_state->pos = mg->mg_len;
	9335	eval_state->pos_flags = mg->mg_flags;
	9336	}
	9337	else
	9338	eval_state->pos_magic = NULL;
	9339
	9340	if (!PL_reg_curpm) {
	9341	/* PL_reg_curpm is a fake PMOP that we can attach the current
	9342	* regex to and point PL_curpm at, so that $1 et al are visible
	9343	* within a /(?{})/. It's just allocated once per interpreter the
	9344	* first time its needed */
	9345	Newxz(PL_reg_curpm, 1, PMOP);
	9346	#ifdef USE_ITHREADS
	9347	{
	9348	SV* const repointer = &PL_sv_undef;
	9349	/* this regexp is also owned by the new PL_reg_curpm, which
	9350	will try to free it. */
	9351	av_push(PL_regex_padav, repointer);
	9352	PL_reg_curpm->op_pmoffset = av_tindex(PL_regex_padav);
	9353	PL_regex_pad = AvARRAY(PL_regex_padav);
	9354	}
	9355	#endif
	9356	}
	9357	SET_reg_curpm(reginfo->prog);
	9358	eval_state->curpm = PL_curpm;
	9359	PL_curpm = PL_reg_curpm;
	9360	if (RXp_MATCH_COPIED(rex)) {
	9361	/* Here is a serious problem: we cannot rewrite subbeg,
	9362	since it may be needed if this match fails. Thus
	9363	$` inside (?{}) could fail... */
	9364	eval_state->subbeg = rex->subbeg;
	9365	eval_state->sublen = rex->sublen;
	9366	eval_state->suboffset = rex->suboffset;
	9367	eval_state->subcoffset = rex->subcoffset;
	9368	#ifdef PERL_ANY_COW
	9369	eval_state->saved_copy = rex->saved_copy;
	9370	#endif
	9371	RXp_MATCH_COPIED_off(rex);
	9372	}
	9373	else
	9374	eval_state->subbeg = NULL;
	9375	rex->subbeg = (char *)reginfo->strbeg;
	9376	rex->suboffset = 0;
	9377	rex->subcoffset = 0;
	9378	rex->sublen = reginfo->strend - reginfo->strbeg;
	9379	}
	9380
	9381
	9382	/* destructor to clear up regmatch_info_aux and regmatch_info_aux_eval */
	9383
	9384	static void
	9385	S_cleanup_regmatch_info_aux(pTHX_ void *arg)
	9386	{
	9387	regmatch_info_aux aux = (regmatch_info_aux ) arg;
	9388	regmatch_info_aux_eval *eval_state = aux->info_aux_eval;
	9389	regmatch_slab *s;
	9390
	9391	Safefree(aux->poscache);
	9392
	9393	if (eval_state) {
	9394
	9395	/* undo the effects of S_setup_eval_state() */
	9396
	9397	if (eval_state->subbeg) {
	9398	regexp * const rex = eval_state->rex;
	9399	rex->subbeg = eval_state->subbeg;
	9400	rex->sublen = eval_state->sublen;
	9401	rex->suboffset = eval_state->suboffset;
	9402	rex->subcoffset = eval_state->subcoffset;
	9403	#ifdef PERL_ANY_COW
	9404	rex->saved_copy = eval_state->saved_copy;
	9405	#endif
	9406	RXp_MATCH_COPIED_on(rex);
	9407	}
	9408	if (eval_state->pos_magic)
	9409	{
	9410	eval_state->pos_magic->mg_len = eval_state->pos;
	9411	eval_state->pos_magic->mg_flags =
	9412	(eval_state->pos_magic->mg_flags & ~MGf_BYTES)
	9413	\| (eval_state->pos_flags & MGf_BYTES);
	9414	}
	9415
	9416	PL_curpm = eval_state->curpm;
	9417	}
	9418
	9419	PL_regmatch_state = aux->old_regmatch_state;
	9420	PL_regmatch_slab = aux->old_regmatch_slab;
	9421
	9422	/* free all slabs above current one - this must be the last action
	9423	* of this function, as aux and eval_state are allocated within
	9424	* slabs and may be freed here */
	9425
	9426	s = PL_regmatch_slab->next;
	9427	if (s) {
	9428	PL_regmatch_slab->next = NULL;
	9429	while (s) {
	9430	regmatch_slab * const osl = s;
	9431	s = s->next;
	9432	Safefree(osl);
	9433	}
	9434	}
	9435	}
	9436
	9437
	9438	STATIC void
	9439	S_to_utf8_substr(pTHX_ regexp *prog)
	9440	{
	9441	/* Converts substr fields in prog from bytes to UTF-8, calling fbm_compile
	9442	* on the converted value */
	9443
	9444	int i = 1;
	9445
	9446	PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
	9447
	9448	do {
	9449	if (prog->substrs->data[i].substr
	9450	&& !prog->substrs->data[i].utf8_substr) {
	9451	SV* const sv = newSVsv(prog->substrs->data[i].substr);
	9452	prog->substrs->data[i].utf8_substr = sv;
	9453	sv_utf8_upgrade(sv);
	9454	if (SvVALID(prog->substrs->data[i].substr)) {
	9455	if (SvTAIL(prog->substrs->data[i].substr)) {
	9456	/* Trim the trailing \n that fbm_compile added last
	9457	time. */
	9458	SvCUR_set(sv, SvCUR(sv) - 1);
	9459	/* Whilst this makes the SV technically "invalid" (as its
	9460	buffer is no longer followed by "\0") when fbm_compile()
	9461	adds the "\n" back, a "\0" is restored. */
	9462	fbm_compile(sv, FBMcf_TAIL);
	9463	} else
	9464	fbm_compile(sv, 0);
	9465	}
	9466	if (prog->substrs->data[i].substr == prog->check_substr)
	9467	prog->check_utf8 = sv;
	9468	}
	9469	} while (i--);
	9470	}
	9471
	9472	STATIC bool
	9473	S_to_byte_substr(pTHX_ regexp *prog)
	9474	{
	9475	/* Converts substr fields in prog from UTF-8 to bytes, calling fbm_compile
	9476	* on the converted value; returns FALSE if can't be converted. */
	9477
	9478	int i = 1;
	9479
	9480	PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
	9481
	9482	do {
	9483	if (prog->substrs->data[i].utf8_substr
	9484	&& !prog->substrs->data[i].substr) {
	9485	SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
	9486	if (! sv_utf8_downgrade(sv, TRUE)) {
	9487	return FALSE;
	9488	}
	9489	if (SvVALID(prog->substrs->data[i].utf8_substr)) {
	9490	if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
	9491	/* Trim the trailing \n that fbm_compile added last
	9492	time. */
	9493	SvCUR_set(sv, SvCUR(sv) - 1);
	9494	fbm_compile(sv, FBMcf_TAIL);
	9495	} else
	9496	fbm_compile(sv, 0);
	9497	}
	9498	prog->substrs->data[i].substr = sv;
	9499	if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
	9500	prog->check_substr = sv;
	9501	}
	9502	} while (i--);
	9503
	9504	return TRUE;
	9505	}
	9506
	9507	/*
	9508	* ex: set ts=8 sts=4 sw=4 et:
	9509	*/