perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regcomp.c
	2	*/
	3
	4	/*
	5	* "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
	6	*/
	7
	8	/* This file contains functions for compiling a regular expression. See
	9	* also regexec.c which funnily enough, contains functions for executing
	10	* a regular expression.
	11	*
	12	* This file is also copied at build time to ext/re/re_comp.c, where
	13	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	14	* This causes the main functions to be compiled under new names and with
	15	* debugging support added, which makes "use re 'debug'" work.
	16	*/
	17
	18	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	19	* confused with the original package (see point 3 below). Thanks, Henry!
	20	*/
	21
	22	/* Additional note: this code is very heavily munged from Henry's version
	23	* in places. In some spots I've traded clarity for efficiency, so don't
	24	* blame Henry for some of the lack of readability.
	25	*/
	26
	27	/* The names of the functions have been changed from regcomp and
	28	* regexec to pregcomp and pregexec in order to avoid conflicts
	29	* with the POSIX routines of the same names.
	30	*/
	31
	32	#ifdef PERL_EXT_RE_BUILD
	33	#include "re_top.h"
	34	#endif
	35
	36	/*
	37	* pregcomp and pregexec -- regsub and regerror are not used in perl
	38	*
	39	* Copyright (c) 1986 by University of Toronto.
	40	* Written by Henry Spencer. Not derived from licensed software.
	41	*
	42	* Permission is granted to anyone to use this software for any
	43	* purpose on any computer system, and to redistribute it freely,
	44	* subject to the following restrictions:
	45	*
	46	* 1. The author is not responsible for the consequences of use of
	47	* this software, no matter how awful, even if they arise
	48	* from defects in it.
	49	*
	50	* 2. The origin of this software must not be misrepresented, either
	51	* by explicit claim or by omission.
	52	*
	53	* 3. Altered versions must be plainly marked as such, and must not
	54	* be misrepresented as being the original software.
	55	*
	56	*
	57	**** Alterations to Henry's code are...
	58	****
	59	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	60	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
	61	****
	62	**** You may distribute under the terms of either the GNU General Public
	63	**** License or the Artistic License, as specified in the README file.
	64
	65	*
	66	* Beware that some of this code is subtly aware of the way operator
	67	* precedence is structured in regular expressions. Serious changes in
	68	* regular-expression syntax might require a total rethink.
	69	*/
	70	#include "EXTERN.h"
	71	#define PERL_IN_REGCOMP_C
	72	#include "perl.h"
	73
	74	#ifndef PERL_IN_XSUB_RE
	75	# include "INTERN.h"
	76	#endif
	77
	78	#define REG_COMP_C
	79	#ifdef PERL_IN_XSUB_RE
	80	# include "re_comp.h"
	81	#else
	82	# include "regcomp.h"
	83	#endif
	84
	85	#ifdef op
	86	#undef op
	87	#endif /* op */
	88
	89	#ifdef MSDOS
	90	# if defined(BUGGY_MSC6)
	91	/* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
	92	# pragma optimize("a",off)
	93	/* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
	94	# pragma optimize("w",on )
	95	# endif /* BUGGY_MSC6 */
	96	#endif /* MSDOS */
	97
	98	#ifndef STATIC
	99	#define STATIC static
	100	#endif
	101
	102	typedef struct RExC_state_t {
	103	U32 flags; /* are we folding, multilining? */
	104	char precomp; / uncompiled string. */
	105	regexp rx; / perl core regexp structure */
	106	regexp_internal rxi; / internal data for regexp object pprivate field */
	107	char start; / Start of input for compile */
	108	char end; / End of input for compile */
	109	char parse; / Input-scan pointer. */
	110	I32 whilem_seen; /* number of WHILEM in this expr */
	111	regnode emit_start; / Start of emitted-code area */
	112	regnode emit; / Code-emit pointer; &regdummy = don't = compiling */
	113	I32 naughty; /* How bad is this pattern? */
	114	I32 sawback; /* Did we see \1, ...? */
	115	U32 seen;
	116	I32 size; /* Code size. */
	117	I32 npar; /* Capture buffer count, (OPEN). */
	118	I32 cpar; /* Capture buffer count, (CLOSE). */
	119	I32 nestroot; /* root parens we are in - used by accept */
	120	I32 extralen;
	121	I32 seen_zerolen;
	122	I32 seen_evals;
	123	regnode *open_parens; / pointers to open parens */
	124	regnode *close_parens; / pointers to close parens */
	125	regnode opend; / END node in program */
	126	I32 utf8;
	127	HV charnames; / cache of named sequences */
	128	HV paren_names; / Paren names */
	129
	130	regnode *recurse; / Recurse regops */
	131	I32 recurse_count; /* Number of recurse regops */
	132	#if ADD_TO_REGEXEC
	133	char starttry; / -Dr: where regtry was called. */
	134	#define RExC_starttry (pRExC_state->starttry)
	135	#endif
	136	#ifdef DEBUGGING
	137	const char *lastparse;
	138	I32 lastnum;
	139	AV paren_name_list; / idx -> name */
	140	#define RExC_lastparse (pRExC_state->lastparse)
	141	#define RExC_lastnum (pRExC_state->lastnum)
	142	#define RExC_paren_name_list (pRExC_state->paren_name_list)
	143	#endif
	144	} RExC_state_t;
	145
	146	#define RExC_flags (pRExC_state->flags)
	147	#define RExC_precomp (pRExC_state->precomp)
	148	#define RExC_rx (pRExC_state->rx)
	149	#define RExC_rxi (pRExC_state->rxi)
	150	#define RExC_start (pRExC_state->start)
	151	#define RExC_end (pRExC_state->end)
	152	#define RExC_parse (pRExC_state->parse)
	153	#define RExC_whilem_seen (pRExC_state->whilem_seen)
	154	#define RExC_offsets (pRExC_state->rxi->offsets) /* I am not like the others */
	155	#define RExC_emit (pRExC_state->emit)
	156	#define RExC_emit_start (pRExC_state->emit_start)
	157	#define RExC_naughty (pRExC_state->naughty)
	158	#define RExC_sawback (pRExC_state->sawback)
	159	#define RExC_seen (pRExC_state->seen)
	160	#define RExC_size (pRExC_state->size)
	161	#define RExC_npar (pRExC_state->npar)
	162	#define RExC_nestroot (pRExC_state->nestroot)
	163	#define RExC_extralen (pRExC_state->extralen)
	164	#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
	165	#define RExC_seen_evals (pRExC_state->seen_evals)
	166	#define RExC_utf8 (pRExC_state->utf8)
	167	#define RExC_charnames (pRExC_state->charnames)
	168	#define RExC_open_parens (pRExC_state->open_parens)
	169	#define RExC_close_parens (pRExC_state->close_parens)
	170	#define RExC_opend (pRExC_state->opend)
	171	#define RExC_paren_names (pRExC_state->paren_names)
	172	#define RExC_recurse (pRExC_state->recurse)
	173	#define RExC_recurse_count (pRExC_state->recurse_count)
	174
	175
	176	#define ISMULT1(c) ((c) == '*' \|\| (c) == '+' \|\| (c) == '?')
	177	#define ISMULT2(s) ((s) == '' \|\| (s) == '+' \|\| (s) == '?' \|\| \
	178	((*s) == '{' && regcurly(s)))
	179
	180	#ifdef SPSTART
	181	#undef SPSTART /* dratted cpp namespace... */
	182	#endif
	183	/*
	184	* Flags to be passed up and down.
	185	*/
	186	#define WORST 0 /* Worst case. */
	187	#define HASWIDTH 0x1 /* Known to match non-null strings. */
	188	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
	189	#define SPSTART 0x4 /* Starts with * or +. */
	190	#define TRYAGAIN 0x8 /* Weeded out a declaration. */
	191
	192	#define REG_NODE_NUM(x) ((x) ? (int)((x)-RExC_emit_start) : -1)
	193
	194	/* whether trie related optimizations are enabled */
	195	#if PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION
	196	#define TRIE_STUDY_OPT
	197	#define FULL_TRIE_STUDY
	198	#define TRIE_STCLASS
	199	#endif
	200
	201
	202
	203	#define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
	204	#define PBITVAL(paren) (1 << ((paren) & 7))
	205	#define PAREN_TEST(u8str,paren) ( PBYTE(u8str,paren) & PBITVAL(paren))
	206	#define PAREN_SET(u8str,paren) PBYTE(u8str,paren) \|= PBITVAL(paren)
	207	#define PAREN_UNSET(u8str,paren) PBYTE(u8str,paren) &= (~PBITVAL(paren))
	208
	209
	210	/* About scan_data_t.
	211
	212	During optimisation we recurse through the regexp program performing
	213	various inplace (keyhole style) optimisations. In addition study_chunk
	214	and scan_commit populate this data structure with information about
	215	what strings MUST appear in the pattern. We look for the longest
	216	string that must appear for at a fixed location, and we look for the
	217	longest string that may appear at a floating location. So for instance
	218	in the pattern:
	219
	220	/FOO[xX]A.*B[xX]BAR/
	221
	222	Both 'FOO' and 'A' are fixed strings. Both 'B' and 'BAR' are floating
	223	strings (because they follow a .* construct). study_chunk will identify
	224	both FOO and BAR as being the longest fixed and floating strings respectively.
	225
	226	The strings can be composites, for instance
	227
	228	/(f)(o)(o)/
	229
	230	will result in a composite fixed substring 'foo'.
	231
	232	For each string some basic information is maintained:
	233
	234	- offset or min_offset
	235	This is the position the string must appear at, or not before.
	236	It also implicitly (when combined with minlenp) tells us how many
	237	character must match before the string we are searching.
	238	Likewise when combined with minlenp and the length of the string
	239	tells us how many characters must appear after the string we have
	240	found.
	241
	242	- max_offset
	243	Only used for floating strings. This is the rightmost point that
	244	the string can appear at. Ifset to I32 max it indicates that the
	245	string can occur infinitely far to the right.
	246
	247	- minlenp
	248	A pointer to the minimum length of the pattern that the string
	249	was found inside. This is important as in the case of positive
	250	lookahead or positive lookbehind we can have multiple patterns
	251	involved. Consider
	252
	253	/(?=FOO).*F/
	254
	255	The minimum length of the pattern overall is 3, the minimum length
	256	of the lookahead part is 3, but the minimum length of the part that
	257	will actually match is 1. So 'FOO's minimum length is 3, but the
	258	minimum length for the F is 1. This is important as the minimum length
	259	is used to determine offsets in front of and behind the string being
	260	looked for. Since strings can be composites this is the length of the
	261	pattern at the time it was commited with a scan_commit. Note that
	262	the length is calculated by study_chunk, so that the minimum lengths
	263	are not known until the full pattern has been compiled, thus the
	264	pointer to the value.
	265
	266	- lookbehind
	267
	268	In the case of lookbehind the string being searched for can be
	269	offset past the start point of the final matching string.
	270	If this value was just blithely removed from the min_offset it would
	271	invalidate some of the calculations for how many chars must match
	272	before or after (as they are derived from min_offset and minlen and
	273	the length of the string being searched for).
	274	When the final pattern is compiled and the data is moved from the
	275	scan_data_t structure into the regexp structure the information
	276	about lookbehind is factored in, with the information that would
	277	have been lost precalculated in the end_shift field for the
	278	associated string.
	279
	280	The fields pos_min and pos_delta are used to store the minimum offset
	281	and the delta to the maximum offset at the current point in the pattern.
	282
	283	*/
	284
	285	typedef struct scan_data_t {
	286	/I32 len_min; unused /
	287	/I32 len_delta; unused /
	288	I32 pos_min;
	289	I32 pos_delta;
	290	SV *last_found;
	291	I32 last_end; /* min value, <0 unless valid. */
	292	I32 last_start_min;
	293	I32 last_start_max;
	294	SV *longest; / Either &l_fixed, or &l_float. */
	295	SV longest_fixed; / longest fixed string found in pattern */
	296	I32 offset_fixed; /* offset where it starts */
	297	I32 minlen_fixed; / pointer to the minlen relevent to the string */
	298	I32 lookbehind_fixed; /* is the position of the string modfied by LB */
	299	SV longest_float; / longest floating string found in pattern */
	300	I32 offset_float_min; /* earliest point in string it can appear */
	301	I32 offset_float_max; /* latest point in string it can appear */
	302	I32 minlen_float; / pointer to the minlen relevent to the string */
	303	I32 lookbehind_float; /* is the position of the string modified by LB */
	304	I32 flags;
	305	I32 whilem_c;
	306	I32 *last_closep;
	307	struct regnode_charclass_class *start_class;
	308	} scan_data_t;
	309
	310	/*
	311	* Forward declarations for pregcomp()'s friends.
	312	*/
	313
	314	static const scan_data_t zero_scan_data =
	315	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0};
	316
	317	#define SF_BEFORE_EOL (SF_BEFORE_SEOL\|SF_BEFORE_MEOL)
	318	#define SF_BEFORE_SEOL 0x0001
	319	#define SF_BEFORE_MEOL 0x0002
	320	#define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL\|SF_FIX_BEFORE_MEOL)
	321	#define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL\|SF_FL_BEFORE_MEOL)
	322
	323	#ifdef NO_UNARY_PLUS
	324	# define SF_FIX_SHIFT_EOL (0+2)
	325	# define SF_FL_SHIFT_EOL (0+4)
	326	#else
	327	# define SF_FIX_SHIFT_EOL (+2)
	328	# define SF_FL_SHIFT_EOL (+4)
	329	#endif
	330
	331	#define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
	332	#define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
	333
	334	#define SF_FL_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
	335	#define SF_FL_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
	336	#define SF_IS_INF 0x0040
	337	#define SF_HAS_PAR 0x0080
	338	#define SF_IN_PAR 0x0100
	339	#define SF_HAS_EVAL 0x0200
	340	#define SCF_DO_SUBSTR 0x0400
	341	#define SCF_DO_STCLASS_AND 0x0800
	342	#define SCF_DO_STCLASS_OR 0x1000
	343	#define SCF_DO_STCLASS (SCF_DO_STCLASS_AND\|SCF_DO_STCLASS_OR)
	344	#define SCF_WHILEM_VISITED_POS 0x2000
	345
	346	#define SCF_TRIE_RESTUDY 0x4000 /* Do restudy? */
	347	#define SCF_SEEN_ACCEPT 0x8000
	348
	349	#define UTF (RExC_utf8 != 0)
	350	#define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0)
	351	#define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0)
	352
	353	#define OOB_UNICODE 12345678
	354	#define OOB_NAMEDCLASS -1
	355
	356	#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
	357	#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
	358
	359
	360	/* length of regex to show in messages that don't mark a position within */
	361	#define RegexLengthToShowInErrorMessages 127
	362
	363	/*
	364	* If MARKER[12] are adjusted, be sure to adjust the constants at the top
	365	* of t/op/regmesg.t, the tests in t/op/re_tests, and those in
	366	* op/pragma/warn/regcomp.
	367	*/
	368	#define MARKER1 "<-- HERE" /* marker as it appears in the description */
	369	#define MARKER2 " <-- HERE " /* marker as it appears within the regex */
	370
	371	#define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/"
	372
	373	/*
	374	* Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
	375	* arg. Show regex, up to a maximum length. If it's too long, chop and add
	376	* "...".
	377	*/
	378	#define _FAIL(code) STMT_START { \
	379	const char *ellipses = ""; \
	380	IV len = RExC_end - RExC_precomp; \
	381	\
	382	if (!SIZE_ONLY) \
	383	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	384	if (len > RegexLengthToShowInErrorMessages) { \
	385	/* chop 10 shorter than the max, to ensure meaning of "..." */ \
	386	len = RegexLengthToShowInErrorMessages - 10; \
	387	ellipses = "..."; \
	388	} \
	389	code; \
	390	} STMT_END
	391
	392	#define FAIL(msg) _FAIL( \
	393	Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \
	394	msg, (int)len, RExC_precomp, ellipses))
	395
	396	#define FAIL2(msg,arg) _FAIL( \
	397	Perl_croak(aTHX_ msg " in regex m/%.*s%s/", \
	398	arg, (int)len, RExC_precomp, ellipses))
	399
	400	/*
	401	* Simple_vFAIL -- like FAIL, but marks the current location in the scan
	402	*/
	403	#define Simple_vFAIL(m) STMT_START { \
	404	const IV offset = RExC_parse - RExC_precomp; \
	405	Perl_croak(aTHX_ "%s" REPORT_LOCATION, \
	406	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	407	} STMT_END
	408
	409	/*
	410	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
	411	*/
	412	#define vFAIL(m) STMT_START { \
	413	if (!SIZE_ONLY) \
	414	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	415	Simple_vFAIL(m); \
	416	} STMT_END
	417
	418	/*
	419	* Like Simple_vFAIL(), but accepts two arguments.
	420	*/
	421	#define Simple_vFAIL2(m,a1) STMT_START { \
	422	const IV offset = RExC_parse - RExC_precomp; \
	423	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, \
	424	(int)offset, RExC_precomp, RExC_precomp + offset); \
	425	} STMT_END
	426
	427	/*
	428	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
	429	*/
	430	#define vFAIL2(m,a1) STMT_START { \
	431	if (!SIZE_ONLY) \
	432	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	433	Simple_vFAIL2(m, a1); \
	434	} STMT_END
	435
	436
	437	/*
	438	* Like Simple_vFAIL(), but accepts three arguments.
	439	*/
	440	#define Simple_vFAIL3(m, a1, a2) STMT_START { \
	441	const IV offset = RExC_parse - RExC_precomp; \
	442	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, \
	443	(int)offset, RExC_precomp, RExC_precomp + offset); \
	444	} STMT_END
	445
	446	/*
	447	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
	448	*/
	449	#define vFAIL3(m,a1,a2) STMT_START { \
	450	if (!SIZE_ONLY) \
	451	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	452	Simple_vFAIL3(m, a1, a2); \
	453	} STMT_END
	454
	455	/*
	456	* Like Simple_vFAIL(), but accepts four arguments.
	457	*/
	458	#define Simple_vFAIL4(m, a1, a2, a3) STMT_START { \
	459	const IV offset = RExC_parse - RExC_precomp; \
	460	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, \
	461	(int)offset, RExC_precomp, RExC_precomp + offset); \
	462	} STMT_END
	463
	464	#define vWARN(loc,m) STMT_START { \
	465	const IV offset = loc - RExC_precomp; \
	466	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \
	467	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	468	} STMT_END
	469
	470	#define vWARNdep(loc,m) STMT_START { \
	471	const IV offset = loc - RExC_precomp; \
	472	Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
	473	"%s" REPORT_LOCATION, \
	474	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	475	} STMT_END
	476
	477
	478	#define vWARN2(loc, m, a1) STMT_START { \
	479	const IV offset = loc - RExC_precomp; \
	480	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	481	a1, (int)offset, RExC_precomp, RExC_precomp + offset); \
	482	} STMT_END
	483
	484	#define vWARN3(loc, m, a1, a2) STMT_START { \
	485	const IV offset = loc - RExC_precomp; \
	486	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	487	a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset); \
	488	} STMT_END
	489
	490	#define vWARN4(loc, m, a1, a2, a3) STMT_START { \
	491	const IV offset = loc - RExC_precomp; \
	492	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	493	a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
	494	} STMT_END
	495
	496	#define vWARN5(loc, m, a1, a2, a3, a4) STMT_START { \
	497	const IV offset = loc - RExC_precomp; \
	498	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	499	a1, a2, a3, a4, (int)offset, RExC_precomp, RExC_precomp + offset); \
	500	} STMT_END
	501
	502
	503	/* Allow for side effects in s */
	504	#define REGC(c,s) STMT_START { \
	505	if (!SIZE_ONLY) *(s) = (c); else (void)(s); \
	506	} STMT_END
	507
	508	/* Macros for recording node offsets. 20001227 mjd@plover.com
	509	* Nodes are numbered 1, 2, 3, 4. Node #n's position is recorded in
	510	* element 2*n-1 of the array. Element #2n holds the byte length node #n.
	511	* Element 0 holds the number n.
	512	* Position is 1 indexed.
	513	*/
	514
	515	#define Set_Node_Offset_To_R(node,byte) STMT_START { \
	516	if (! SIZE_ONLY) { \
	517	MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \
	518	__LINE__, (int)(node), (int)(byte))); \
	519	if((node) < 0) { \
	520	Perl_croak(aTHX_ "value of node is %d in Offset macro", (int)(node)); \
	521	} else { \
	522	RExC_offsets[2*(node)-1] = (byte); \
	523	} \
	524	} \
	525	} STMT_END
	526
	527	#define Set_Node_Offset(node,byte) \
	528	Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
	529	#define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
	530
	531	#define Set_Node_Length_To_R(node,len) STMT_START { \
	532	if (! SIZE_ONLY) { \
	533	MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \
	534	__LINE__, (int)(node), (int)(len))); \
	535	if((node) < 0) { \
	536	Perl_croak(aTHX_ "value of node is %d in Length macro", (int)(node)); \
	537	} else { \
	538	RExC_offsets[2*(node)] = (len); \
	539	} \
	540	} \
	541	} STMT_END
	542
	543	#define Set_Node_Length(node,len) \
	544	Set_Node_Length_To_R((node)-RExC_emit_start, len)
	545	#define Set_Cur_Node_Length(len) Set_Node_Length(RExC_emit, len)
	546	#define Set_Node_Cur_Length(node) \
	547	Set_Node_Length(node, RExC_parse - parse_start)
	548
	549	/* Get offsets and lengths */
	550	#define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
	551	#define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
	552
	553	#define Set_Node_Offset_Length(node,offset,len) STMT_START { \
	554	Set_Node_Offset_To_R((node)-RExC_emit_start, (offset)); \
	555	Set_Node_Length_To_R((node)-RExC_emit_start, (len)); \
	556	} STMT_END
	557
	558
	559	#if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
	560	#define EXPERIMENTAL_INPLACESCAN
	561	#endif
	562
	563	#define DEBUG_STUDYDATA(str,data,depth) \
	564	DEBUG_OPTIMISE_MORE_r(if(data){ \
	565	PerlIO_printf(Perl_debug_log, \
	566	"%*s" str "Pos:%"IVdf"/%"IVdf \
	567	" Flags: 0x%"UVXf" Whilem_c: %"IVdf" Lcp: %"IVdf" %s", \
	568	(int)(depth)*2, "", \
	569	(IV)((data)->pos_min), \
	570	(IV)((data)->pos_delta), \
	571	(UV)((data)->flags), \
	572	(IV)((data)->whilem_c), \
	573	(IV)((data)->last_closep ? *((data)->last_closep) : -1), \
	574	is_inf ? "INF " : "" \
	575	); \
	576	if ((data)->last_found) \
	577	PerlIO_printf(Perl_debug_log, \
	578	"Last:'%s' %"IVdf":%"IVdf"/%"IVdf" %sFixed:'%s' @ %"IVdf \
	579	" %sFloat: '%s' @ %"IVdf"/%"IVdf"", \
	580	SvPVX_const((data)->last_found), \
	581	(IV)((data)->last_end), \
	582	(IV)((data)->last_start_min), \
	583	(IV)((data)->last_start_max), \
	584	((data)->longest && \
	585	(data)->longest==&((data)->longest_fixed)) ? "*" : "", \
	586	SvPVX_const((data)->longest_fixed), \
	587	(IV)((data)->offset_fixed), \
	588	((data)->longest && \
	589	(data)->longest==&((data)->longest_float)) ? "*" : "", \
	590	SvPVX_const((data)->longest_float), \
	591	(IV)((data)->offset_float_min), \
	592	(IV)((data)->offset_float_max) \
	593	); \
	594	PerlIO_printf(Perl_debug_log,"\n"); \
	595	});
	596
	597	static void clear_re(pTHX_ void *r);
	598
	599	/* Mark that we cannot extend a found fixed substring at this point.
	600	Update the longest found anchored substring and the longest found
	601	floating substrings if needed. */
	602
	603	STATIC void
	604	S_scan_commit(pTHX_ const RExC_state_t pRExC_state, scan_data_t data, I32 *minlenp, int is_inf)
	605	{
	606	const STRLEN l = CHR_SVLEN(data->last_found);
	607	const STRLEN old_l = CHR_SVLEN(*data->longest);
	608	GET_RE_DEBUG_FLAGS_DECL;
	609
	610	if ((l >= old_l) && ((l > old_l) \|\| (data->flags & SF_BEFORE_EOL))) {
	611	SvSetMagicSV(*data->longest, data->last_found);
	612	if (*data->longest == data->longest_fixed) {
	613	data->offset_fixed = l ? data->last_start_min : data->pos_min;
	614	if (data->flags & SF_BEFORE_EOL)
	615	data->flags
	616	\|= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
	617	else
	618	data->flags &= ~SF_FIX_BEFORE_EOL;
	619	data->minlen_fixed=minlenp;
	620	data->lookbehind_fixed=0;
	621	}
	622	else { /* data->longest == data->longest_float /
	623	data->offset_float_min = l ? data->last_start_min : data->pos_min;
	624	data->offset_float_max = (l
	625	? data->last_start_max
	626	: data->pos_min + data->pos_delta);
	627	if (is_inf \|\| (U32)data->offset_float_max > (U32)I32_MAX)
	628	data->offset_float_max = I32_MAX;
	629	if (data->flags & SF_BEFORE_EOL)
	630	data->flags
	631	\|= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
	632	else
	633	data->flags &= ~SF_FL_BEFORE_EOL;
	634	data->minlen_float=minlenp;
	635	data->lookbehind_float=0;
	636	}
	637	}
	638	SvCUR_set(data->last_found, 0);
	639	{
	640	SV * const sv = data->last_found;
	641	if (SvUTF8(sv) && SvMAGICAL(sv)) {
	642	MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
	643	if (mg)
	644	mg->mg_len = 0;
	645	}
	646	}
	647	data->last_end = -1;
	648	data->flags &= ~SF_BEFORE_EOL;
	649	DEBUG_STUDYDATA("cl_anything: ",data,0);
	650	}
	651
	652	/* Can match anything (initialization) */
	653	STATIC void
	654	S_cl_anything(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	655	{
	656	ANYOF_CLASS_ZERO(cl);
	657	ANYOF_BITMAP_SETALL(cl);
	658	cl->flags = ANYOF_EOS\|ANYOF_UNICODE_ALL;
	659	if (LOC)
	660	cl->flags \|= ANYOF_LOCALE;
	661	}
	662
	663	/* Can match anything (initialization) */
	664	STATIC int
	665	S_cl_is_anything(const struct regnode_charclass_class *cl)
	666	{
	667	int value;
	668
	669	for (value = 0; value <= ANYOF_MAX; value += 2)
	670	if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
	671	return 1;
	672	if (!(cl->flags & ANYOF_UNICODE_ALL))
	673	return 0;
	674	if (!ANYOF_BITMAP_TESTALLSET((const void*)cl))
	675	return 0;
	676	return 1;
	677	}
	678
	679	/* Can match anything (initialization) */
	680	STATIC void
	681	S_cl_init(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	682	{
	683	Zero(cl, 1, struct regnode_charclass_class);
	684	cl->type = ANYOF;
	685	cl_anything(pRExC_state, cl);
	686	}
	687
	688	STATIC void
	689	S_cl_init_zero(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	690	{
	691	Zero(cl, 1, struct regnode_charclass_class);
	692	cl->type = ANYOF;
	693	cl_anything(pRExC_state, cl);
	694	if (LOC)
	695	cl->flags \|= ANYOF_LOCALE;
	696	}
	697
	698	/* 'And' a given class with another one. Can create false positives */
	699	/* We assume that cl is not inverted */
	700	STATIC void
	701	S_cl_and(struct regnode_charclass_class *cl,
	702	const struct regnode_charclass_class *and_with)
	703	{
	704
	705	assert(and_with->type == ANYOF);
	706	if (!(and_with->flags & ANYOF_CLASS)
	707	&& !(cl->flags & ANYOF_CLASS)
	708	&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	709	&& !(and_with->flags & ANYOF_FOLD)
	710	&& !(cl->flags & ANYOF_FOLD)) {
	711	int i;
	712
	713	if (and_with->flags & ANYOF_INVERT)
	714	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	715	cl->bitmap[i] &= ~and_with->bitmap[i];
	716	else
	717	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	718	cl->bitmap[i] &= and_with->bitmap[i];
	719	} /* XXXX: logic is complicated otherwise, leave it along for a moment. */
	720	if (!(and_with->flags & ANYOF_EOS))
	721	cl->flags &= ~ANYOF_EOS;
	722
	723	if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
	724	!(and_with->flags & ANYOF_INVERT)) {
	725	cl->flags &= ~ANYOF_UNICODE_ALL;
	726	cl->flags \|= ANYOF_UNICODE;
	727	ARG_SET(cl, ARG(and_with));
	728	}
	729	if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
	730	!(and_with->flags & ANYOF_INVERT))
	731	cl->flags &= ~ANYOF_UNICODE_ALL;
	732	if (!(and_with->flags & (ANYOF_UNICODE\|ANYOF_UNICODE_ALL)) &&
	733	!(and_with->flags & ANYOF_INVERT))
	734	cl->flags &= ~ANYOF_UNICODE;
	735	}
	736
	737	/* 'OR' a given class with another one. Can create false positives */
	738	/* We assume that cl is not inverted */
	739	STATIC void
	740	S_cl_or(const RExC_state_t pRExC_state, struct regnode_charclass_class cl, const struct regnode_charclass_class *or_with)
	741	{
	742	if (or_with->flags & ANYOF_INVERT) {
	743	/* We do not use
	744	* (B1 \| CL1) \| (!B2 & !CL2) = (B1 \| !B2 & !CL2) \| (CL1 \| (!B2 & !CL2))
	745	* <= (B1 \| !B2) \| (CL1 \| !CL2)
	746	* which is wasteful if CL2 is small, but we ignore CL2:
	747	* (B1 \| CL1) \| (!B2 & !CL2) <= (B1 \| CL1) \| !B2 = (B1 \| !B2) \| CL1
	748	* XXXX Can we handle case-fold? Unclear:
	749	* (OK1(i) \| OK1(i')) \| !(OK1(i) \| OK1(i')) =
	750	* (OK1(i) \| OK1(i')) \| (!OK1(i) & !OK1(i'))
	751	*/
	752	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	753	&& !(or_with->flags & ANYOF_FOLD)
	754	&& !(cl->flags & ANYOF_FOLD) ) {
	755	int i;
	756
	757	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	758	cl->bitmap[i] \|= ~or_with->bitmap[i];
	759	} /* XXXX: logic is complicated otherwise */
	760	else {
	761	cl_anything(pRExC_state, cl);
	762	}
	763	} else {
	764	/* (B1 \| CL1) \| (B2 \| CL2) = (B1 \| B2) \| (CL1 \| CL2)) */
	765	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	766	&& (!(or_with->flags & ANYOF_FOLD)
	767	\|\| (cl->flags & ANYOF_FOLD)) ) {
	768	int i;
	769
	770	/* OR char bitmap and class bitmap separately */
	771	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	772	cl->bitmap[i] \|= or_with->bitmap[i];
	773	if (or_with->flags & ANYOF_CLASS) {
	774	for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
	775	cl->classflags[i] \|= or_with->classflags[i];
	776	cl->flags \|= ANYOF_CLASS;
	777	}
	778	}
	779	else { /* XXXX: logic is complicated, leave it along for a moment. */
	780	cl_anything(pRExC_state, cl);
	781	}
	782	}
	783	if (or_with->flags & ANYOF_EOS)
	784	cl->flags \|= ANYOF_EOS;
	785
	786	if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
	787	ARG(cl) != ARG(or_with)) {
	788	cl->flags \|= ANYOF_UNICODE_ALL;
	789	cl->flags &= ~ANYOF_UNICODE;
	790	}
	791	if (or_with->flags & ANYOF_UNICODE_ALL) {
	792	cl->flags \|= ANYOF_UNICODE_ALL;
	793	cl->flags &= ~ANYOF_UNICODE;
	794	}
	795	}
	796
	797	#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
	798	#define TRIE_LIST_CUR(state) ( TRIE_LIST_ITEM( state, 0 ).forid )
	799	#define TRIE_LIST_LEN(state) ( TRIE_LIST_ITEM( state, 0 ).newstate )
	800	#define TRIE_LIST_USED(idx) ( trie->states[state].trans.list ? (TRIE_LIST_CUR( idx ) - 1) : 0 )
	801
	802
	803	#ifdef DEBUGGING
	804	/*
	805	dump_trie(trie,widecharmap,revcharmap)
	806	dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
	807	dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
	808
	809	These routines dump out a trie in a somewhat readable format.
	810	The _interim_ variants are used for debugging the interim
	811	tables that are used to generate the final compressed
	812	representation which is what dump_trie expects.
	813
	814	Part of the reason for their existance is to provide a form
	815	of documentation as to how the different representations function.
	816
	817	*/
	818
	819	/*
	820	Dumps the final compressed table form of the trie to Perl_debug_log.
	821	Used for debugging make_trie().
	822	*/
	823
	824	STATIC void
	825	S_dump_trie(pTHX_ const struct _reg_trie_data trie, HV widecharmap,
	826	AV *revcharmap, U32 depth)
	827	{
	828	U32 state;
	829	SV *sv=sv_newmortal();
	830	int colwidth= widecharmap ? 6 : 4;
	831	GET_RE_DEBUG_FLAGS_DECL;
	832
	833
	834	PerlIO_printf( Perl_debug_log, "%*sChar : %-6s%-6s%-4s ",
	835	(int)depth * 2 + 2,"",
	836	"Match","Base","Ofs" );
	837
	838	for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
	839	SV ** const tmp = av_fetch( revcharmap, state, 0);
	840	if ( tmp ) {
	841	PerlIO_printf( Perl_debug_log, "%*s",
	842	colwidth,
	843	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), colwidth,
	844	PL_colors[0], PL_colors[1],
	845	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	846	PERL_PV_ESCAPE_FIRSTCHAR
	847	)
	848	);
	849	}
	850	}
	851	PerlIO_printf( Perl_debug_log, "\n%*sState\|-----------------------",
	852	(int)depth * 2 + 2,"");
	853
	854	for( state = 0 ; state < trie->uniquecharcount ; state++ )
	855	PerlIO_printf( Perl_debug_log, "%.*s", colwidth, "--------");
	856	PerlIO_printf( Perl_debug_log, "\n");
	857
	858	for( state = 1 ; state < trie->statecount ; state++ ) {
	859	const U32 base = trie->states[ state ].trans.base;
	860
	861	PerlIO_printf( Perl_debug_log, "%s#%4"UVXf"\|", (int)depth 2 + 2,"", (UV)state);
	862
	863	if ( trie->states[ state ].wordnum ) {
	864	PerlIO_printf( Perl_debug_log, " W%4X", trie->states[ state ].wordnum );
	865	} else {
	866	PerlIO_printf( Perl_debug_log, "%6s", "" );
	867	}
	868
	869	PerlIO_printf( Perl_debug_log, " @%4"UVXf" ", (UV)base );
	870
	871	if ( base ) {
	872	U32 ofs = 0;
	873
	874	while( ( base + ofs < trie->uniquecharcount ) \|\|
	875	( base + ofs - trie->uniquecharcount < trie->lasttrans
	876	&& trie->trans[ base + ofs - trie->uniquecharcount ].check != state))
	877	ofs++;
	878
	879	PerlIO_printf( Perl_debug_log, "+%2"UVXf"[ ", (UV)ofs);
	880
	881	for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
	882	if ( ( base + ofs >= trie->uniquecharcount ) &&
	883	( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
	884	trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
	885	{
	886	PerlIO_printf( Perl_debug_log, "%*"UVXf,
	887	colwidth,
	888	(UV)trie->trans[ base + ofs - trie->uniquecharcount ].next );
	889	} else {
	890	PerlIO_printf( Perl_debug_log, "%*s",colwidth," ." );
	891	}
	892	}
	893
	894	PerlIO_printf( Perl_debug_log, "]");
	895
	896	}
	897	PerlIO_printf( Perl_debug_log, "\n" );
	898	}
	899	}
	900	/*
	901	Dumps a fully constructed but uncompressed trie in list form.
	902	List tries normally only are used for construction when the number of
	903	possible chars (trie->uniquecharcount) is very high.
	904	Used for debugging make_trie().
	905	*/
	906	STATIC void
	907	S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
	908	HV widecharmap, AV revcharmap, U32 next_alloc,
	909	U32 depth)
	910	{
	911	U32 state;
	912	SV *sv=sv_newmortal();
	913	int colwidth= widecharmap ? 6 : 4;
	914	GET_RE_DEBUG_FLAGS_DECL;
	915	/* print out the table precompression. */
	916	PerlIO_printf( Perl_debug_log, "%sState :Word \| Transition Data\n%s%s",
	917	(int)depth * 2 + 2,"", (int)depth * 2 + 2,"",
	918	"------:-----+-----------------\n" );
	919
	920	for( state=1 ; state < next_alloc ; state ++ ) {
	921	U16 charid;
	922
	923	PerlIO_printf( Perl_debug_log, "%*s %4"UVXf" :",
	924	(int)depth * 2 + 2,"", (UV)state );
	925	if ( ! trie->states[ state ].wordnum ) {
	926	PerlIO_printf( Perl_debug_log, "%5s\| ","");
	927	} else {
	928	PerlIO_printf( Perl_debug_log, "W%4x\| ",
	929	trie->states[ state ].wordnum
	930	);
	931	}
	932	for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
	933	SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
	934	if ( tmp ) {
	935	PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" \| ",
	936	colwidth,
	937	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), colwidth,
	938	PL_colors[0], PL_colors[1],
	939	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	940	PERL_PV_ESCAPE_FIRSTCHAR
	941	) ,
	942	TRIE_LIST_ITEM(state,charid).forid,
	943	(UV)TRIE_LIST_ITEM(state,charid).newstate
	944	);
	945	if (!(charid % 10))
	946	PerlIO_printf(Perl_debug_log, "\n%*s\| ",
	947	(int)((depth * 2) + 14), "");
	948	}
	949	}
	950	PerlIO_printf( Perl_debug_log, "\n");
	951	}
	952	}
	953
	954	/*
	955	Dumps a fully constructed but uncompressed trie in table form.
	956	This is the normal DFA style state transition table, with a few
	957	twists to facilitate compression later.
	958	Used for debugging make_trie().
	959	*/
	960	STATIC void
	961	S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
	962	HV widecharmap, AV revcharmap, U32 next_alloc,
	963	U32 depth)
	964	{
	965	U32 state;
	966	U16 charid;
	967	SV *sv=sv_newmortal();
	968	int colwidth= widecharmap ? 6 : 4;
	969	GET_RE_DEBUG_FLAGS_DECL;
	970
	971	/*
	972	print out the table precompression so that we can do a visual check
	973	that they are identical.
	974	*/
	975
	976	PerlIO_printf( Perl_debug_log, "%sChar : ",(int)depth 2 + 2,"" );
	977
	978	for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
	979	SV ** const tmp = av_fetch( revcharmap, charid, 0);
	980	if ( tmp ) {
	981	PerlIO_printf( Perl_debug_log, "%*s",
	982	colwidth,
	983	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), colwidth,
	984	PL_colors[0], PL_colors[1],
	985	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	986	PERL_PV_ESCAPE_FIRSTCHAR
	987	)
	988	);
	989	}
	990	}
	991
	992	PerlIO_printf( Perl_debug_log, "\n%sState+-",(int)depth 2 + 2,"" );
	993
	994	for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
	995	PerlIO_printf( Perl_debug_log, "%.*s", colwidth,"--------");
	996	}
	997
	998	PerlIO_printf( Perl_debug_log, "\n" );
	999
	1000	for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
	1001
	1002	PerlIO_printf( Perl_debug_log, "%*s%4"UVXf" : ",
	1003	(int)depth * 2 + 2,"",
	1004	(UV)TRIE_NODENUM( state ) );
	1005
	1006	for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
	1007	UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
	1008	if (v)
	1009	PerlIO_printf( Perl_debug_log, "%*"UVXf, colwidth, v );
	1010	else
	1011	PerlIO_printf( Perl_debug_log, "%*s", colwidth, "." );
	1012	}
	1013	if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
	1014	PerlIO_printf( Perl_debug_log, " (%4"UVXf")\n", (UV)trie->trans[ state ].check );
	1015	} else {
	1016	PerlIO_printf( Perl_debug_log, " (%4"UVXf") W%4X\n", (UV)trie->trans[ state ].check,
	1017	trie->states[ TRIE_NODENUM( state ) ].wordnum );
	1018	}
	1019	}
	1020	}
	1021
	1022	#endif
	1023
	1024	/* make_trie(startbranch,first,last,tail,word_count,flags,depth)
	1025	startbranch: the first branch in the whole branch sequence
	1026	first : start branch of sequence of branch-exact nodes.
	1027	May be the same as startbranch
	1028	last : Thing following the last branch.
	1029	May be the same as tail.
	1030	tail : item following the branch sequence
	1031	count : words in the sequence
	1032	flags : currently the OP() type we will be building one of /EXACT(\|F\|Fl)/
	1033	depth : indent depth
	1034
	1035	Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
	1036
	1037	A trie is an N'ary tree where the branches are determined by digital
	1038	decomposition of the key. IE, at the root node you look up the 1st character and
	1039	follow that branch repeat until you find the end of the branches. Nodes can be
	1040	marked as "accepting" meaning they represent a complete word. Eg:
	1041
	1042	/he\|she\|his\|hers/
	1043
	1044	would convert into the following structure. Numbers represent states, letters
	1045	following numbers represent valid transitions on the letter from that state, if
	1046	the number is in square brackets it represents an accepting state, otherwise it
	1047	will be in parenthesis.
	1048
	1049	+-h->+-e->[3]-+-r->(8)-+-s->[9]
	1050	\| \|
	1051	\| (2)
	1052	\| \|
	1053	(1) +-i->(6)-+-s->[7]
	1054	\|
	1055	+-s->(3)-+-h->(4)-+-e->[5]
	1056
	1057	Accept Word Mapping: 3=>1 (he),5=>2 (she), 7=>3 (his), 9=>4 (hers)
	1058
	1059	This shows that when matching against the string 'hers' we will begin at state 1
	1060	read 'h' and move to state 2, read 'e' and move to state 3 which is accepting,
	1061	then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
	1062	is also accepting. Thus we know that we can match both 'he' and 'hers' with a
	1063	single traverse. We store a mapping from accepting to state to which word was
	1064	matched, and then when we have multiple possibilities we try to complete the
	1065	rest of the regex in the order in which they occured in the alternation.
	1066
	1067	The only prior NFA like behaviour that would be changed by the TRIE support is
	1068	the silent ignoring of duplicate alternations which are of the form:
	1069
	1070	/ (DUPE\|DUPE) X? (?{ ... }) Y /x
	1071
	1072	Thus EVAL blocks follwing a trie may be called a different number of times with
	1073	and without the optimisation. With the optimisations dupes will be silently
	1074	ignored. This inconsistant behaviour of EVAL type nodes is well established as
	1075	the following demonstrates:
	1076
	1077	'words'=~/(word\|word\|word)(?{ print $1 })[xyz]/
	1078
	1079	which prints out 'word' three times, but
	1080
	1081	'words'=~/(word\|word\|word)(?{ print $1 })S/
	1082
	1083	which doesnt print it out at all. This is due to other optimisations kicking in.
	1084
	1085	Example of what happens on a structural level:
	1086
	1087	The regexp /(ac\|ad\|ab)+/ will produce the folowing debug output:
	1088
	1089	1: CURLYM[1] {1,32767}(18)
	1090	5: BRANCH(8)
	1091	6: EXACT <ac>(16)
	1092	8: BRANCH(11)
	1093	9: EXACT <ad>(16)
	1094	11: BRANCH(14)
	1095	12: EXACT <ab>(16)
	1096	16: SUCCEED(0)
	1097	17: NOTHING(18)
	1098	18: END(0)
	1099
	1100	This would be optimizable with startbranch=5, first=5, last=16, tail=16
	1101	and should turn into:
	1102
	1103	1: CURLYM[1] {1,32767}(18)
	1104	5: TRIE(16)
	1105	[Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
	1106	<ac>
	1107	<ad>
	1108	<ab>
	1109	16: SUCCEED(0)
	1110	17: NOTHING(18)
	1111	18: END(0)
	1112
	1113	Cases where tail != last would be like /(?foo\|bar)baz/:
	1114
	1115	1: BRANCH(4)
	1116	2: EXACT <foo>(8)
	1117	4: BRANCH(7)
	1118	5: EXACT <bar>(8)
	1119	7: TAIL(8)
	1120	8: EXACT <baz>(10)
	1121	10: END(0)
	1122
	1123	which would be optimizable with startbranch=1, first=1, last=7, tail=8
	1124	and would end up looking like:
	1125
	1126	1: TRIE(8)
	1127	[Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
	1128	<foo>
	1129	<bar>
	1130	7: TAIL(8)
	1131	8: EXACT <baz>(10)
	1132	10: END(0)
	1133
	1134	d = uvuni_to_utf8_flags(d, uv, 0);
	1135
	1136	is the recommended Unicode-aware way of saying
	1137
	1138	*(d++) = uv;
	1139	*/
	1140
	1141	#define TRIE_STORE_REVCHAR \
	1142	STMT_START { \
	1143	SV *tmp = newSVpvs(""); \
	1144	if (UTF) SvUTF8_on(tmp); \
	1145	Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc ); \
	1146	av_push( revcharmap, tmp ); \
	1147	} STMT_END
	1148
	1149	#define TRIE_READ_CHAR STMT_START { \
	1150	wordlen++; \
	1151	if ( UTF ) { \
	1152	if ( folder ) { \
	1153	if ( foldlen > 0 ) { \
	1154	uvc = utf8n_to_uvuni( scan, UTF8_MAXLEN, &len, uniflags ); \
	1155	foldlen -= len; \
	1156	scan += len; \
	1157	len = 0; \
	1158	} else { \
	1159	uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
	1160	uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
	1161	foldlen -= UNISKIP( uvc ); \
	1162	scan = foldbuf + UNISKIP( uvc ); \
	1163	} \
	1164	} else { \
	1165	uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
	1166	} \
	1167	} else { \
	1168	uvc = (U32)*uc; \
	1169	len = 1; \
	1170	} \
	1171	} STMT_END
	1172
	1173
	1174
	1175	#define TRIE_LIST_PUSH(state,fid,ns) STMT_START { \
	1176	if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) { \
	1177	U32 ging = TRIE_LIST_LEN( state ) *= 2; \
	1178	Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
	1179	} \
	1180	TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid; \
	1181	TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns; \
	1182	TRIE_LIST_CUR( state )++; \
	1183	} STMT_END
	1184
	1185	#define TRIE_LIST_NEW(state) STMT_START { \
	1186	Newxz( trie->states[ state ].trans.list, \
	1187	4, reg_trie_trans_le ); \
	1188	TRIE_LIST_CUR( state ) = 1; \
	1189	TRIE_LIST_LEN( state ) = 4; \
	1190	} STMT_END
	1191
	1192	#define TRIE_HANDLE_WORD(state) STMT_START { \
	1193	U16 dupe= trie->states[ state ].wordnum; \
	1194	regnode * const noper_next = regnext( noper ); \
	1195	\
	1196	if (trie->wordlen) \
	1197	trie->wordlen[ curword ] = wordlen; \
	1198	DEBUG_r({ \
	1199	/* store the word for dumping */ \
	1200	SV* tmp; \
	1201	if (OP(noper) != NOTHING) \
	1202	tmp = newSVpvn(STRING(noper), STR_LEN(noper)); \
	1203	else \
	1204	tmp = newSVpvn( "", 0 ); \
	1205	if ( UTF ) SvUTF8_on( tmp ); \
	1206	av_push( trie_words, tmp ); \
	1207	}); \
	1208	\
	1209	curword++; \
	1210	\
	1211	if ( noper_next < tail ) { \
	1212	if (!trie->jump) \
	1213	trie->jump = (U16 *) PerlMemShared_calloc( word_count + 1, sizeof(U16) ); \
	1214	trie->jump[curword] = (U16)(noper_next - convert); \
	1215	if (!jumper) \
	1216	jumper = noper_next; \
	1217	if (!nextbranch) \
	1218	nextbranch= regnext(cur); \
	1219	} \
	1220	\
	1221	if ( dupe ) { \
	1222	/* So it's a dupe. This means we need to maintain a */\
	1223	/* linked-list from the first to the next. */\
	1224	/* we only allocate the nextword buffer when there */\
	1225	/* a dupe, so first time we have to do the allocation */\
	1226	if (!trie->nextword) \
	1227	trie->nextword = (U16 *) \
	1228	PerlMemShared_calloc( word_count + 1, sizeof(U16)); \
	1229	while ( trie->nextword[dupe] ) \
	1230	dupe= trie->nextword[dupe]; \
	1231	trie->nextword[dupe]= curword; \
	1232	} else { \
	1233	/* we haven't inserted this word yet. */ \
	1234	trie->states[ state ].wordnum = curword; \
	1235	} \
	1236	} STMT_END
	1237
	1238
	1239	#define TRIE_TRANS_STATE(state,base,ucharcount,charid,special) \
	1240	( ( base + charid >= ucharcount \
	1241	&& base + charid < ubound \
	1242	&& state == trie->trans[ base - ucharcount + charid ].check \
	1243	&& trie->trans[ base - ucharcount + charid ].next ) \
	1244	? trie->trans[ base - ucharcount + charid ].next \
	1245	: ( state==1 ? special : 0 ) \
	1246	)
	1247
	1248	#define MADE_TRIE 1
	1249	#define MADE_JUMP_TRIE 2
	1250	#define MADE_EXACT_TRIE 4
	1251
	1252	STATIC I32
	1253	S_make_trie(pTHX_ RExC_state_t pRExC_state, regnode startbranch, regnode first, regnode last, regnode *tail, U32 word_count, U32 flags, U32 depth)
	1254	{
	1255	dVAR;
	1256	/* first pass, loop through and scan words */
	1257	reg_trie_data *trie;
	1258	HV *widecharmap = NULL;
	1259	AV *revcharmap = newAV();
	1260	regnode *cur;
	1261	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1262	STRLEN len = 0;
	1263	UV uvc = 0;
	1264	U16 curword = 0;
	1265	U32 next_alloc = 0;
	1266	regnode *jumper = NULL;
	1267	regnode *nextbranch = NULL;
	1268	regnode *convert = NULL;
	1269	/* we just use folder as a flag in utf8 */
	1270	const U8 * const folder = ( flags == EXACTF
	1271	? PL_fold
	1272	: ( flags == EXACTFL
	1273	? PL_fold_locale
	1274	: NULL
	1275	)
	1276	);
	1277
	1278	#ifdef DEBUGGING
	1279	const U32 data_slot = add_data( pRExC_state, 4, "tuuu" );
	1280	AV *trie_words = NULL;
	1281	/* along with revcharmap, this only used during construction but both are
	1282	* useful during debugging so we store them in the struct when debugging.
	1283	*/
	1284	#else
	1285	const U32 data_slot = add_data( pRExC_state, 2, "tu" );
	1286	STRLEN trie_charcount=0;
	1287	#endif
	1288	SV *re_trie_maxbuff;
	1289	GET_RE_DEBUG_FLAGS_DECL;
	1290	#ifndef DEBUGGING
	1291	PERL_UNUSED_ARG(depth);
	1292	#endif
	1293
	1294	trie = (reg_trie_data *) PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
	1295	trie->refcount = 1;
	1296	trie->startstate = 1;
	1297	trie->wordcount = word_count;
	1298	RExC_rxi->data->data[ data_slot ] = (void*)trie;
	1299	trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
	1300	if (!(UTF && folder))
	1301	trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
	1302	DEBUG_r({
	1303	trie_words = newAV();
	1304	});
	1305
	1306	re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
	1307	if (!SvIOK(re_trie_maxbuff)) {
	1308	sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
	1309	}
	1310	DEBUG_OPTIMISE_r({
	1311	PerlIO_printf( Perl_debug_log,
	1312	"%*smake_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
	1313	(int)depth * 2 + 2, "",
	1314	REG_NODE_NUM(startbranch),REG_NODE_NUM(first),
	1315	REG_NODE_NUM(last), REG_NODE_NUM(tail),
	1316	(int)depth);
	1317	});
	1318
	1319	/* Find the node we are going to overwrite */
	1320	if ( first == startbranch && OP( last ) != BRANCH ) {
	1321	/* whole branch chain */
	1322	convert = first;
	1323	} else {
	1324	/* branch sub-chain */
	1325	convert = NEXTOPER( first );
	1326	}
	1327
	1328	/* -- First loop and Setup --
	1329
	1330	We first traverse the branches and scan each word to determine if it
	1331	contains widechars, and how many unique chars there are, this is
	1332	important as we have to build a table with at least as many columns as we
	1333	have unique chars.
	1334
	1335	We use an array of integers to represent the character codes 0..255
	1336	(trie->charmap) and we use a an HV* to store unicode characters. We use the
	1337	native representation of the character value as the key and IV's for the
	1338	coded index.
	1339
	1340	TODO If we keep track of how many times each character is used we can
	1341	remap the columns so that the table compression later on is more
	1342	efficient in terms of memory by ensuring most common value is in the
	1343	middle and the least common are on the outside. IMO this would be better
	1344	than a most to least common mapping as theres a decent chance the most
	1345	common letter will share a node with the least common, meaning the node
	1346	will not be compressable. With a middle is most common approach the worst
	1347	case is when we have the least common nodes twice.
	1348
	1349	*/
	1350
	1351	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	1352	regnode * const noper = NEXTOPER( cur );
	1353	const U8 uc = (U8)STRING( noper );
	1354	const U8 * const e = uc + STR_LEN( noper );
	1355	STRLEN foldlen = 0;
	1356	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1357	const U8 scan = (U8)NULL;
	1358	U32 wordlen = 0; /* required init */
	1359	STRLEN chars=0;
	1360
	1361	if (OP(noper) == NOTHING) {
	1362	trie->minlen= 0;
	1363	continue;
	1364	}
	1365	if (trie->bitmap) {
	1366	TRIE_BITMAP_SET(trie,*uc);
	1367	if ( folder ) TRIE_BITMAP_SET(trie,folder[ *uc ]);
	1368	}
	1369	for ( ; uc < e ; uc += len ) {
	1370	TRIE_CHARCOUNT(trie)++;
	1371	TRIE_READ_CHAR;
	1372	chars++;
	1373	if ( uvc < 256 ) {
	1374	if ( !trie->charmap[ uvc ] ) {
	1375	trie->charmap[ uvc ]=( ++trie->uniquecharcount );
	1376	if ( folder )
	1377	trie->charmap[ folder[ uvc ] ] = trie->charmap[ uvc ];
	1378	TRIE_STORE_REVCHAR;
	1379	}
	1380	} else {
	1381	SV** svpp;
	1382	if ( !widecharmap )
	1383	widecharmap = newHV();
	1384
	1385	svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
	1386
	1387	if ( !svpp )
	1388	Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
	1389
	1390	if ( !SvTRUE( *svpp ) ) {
	1391	sv_setiv( *svpp, ++trie->uniquecharcount );
	1392	TRIE_STORE_REVCHAR;
	1393	}
	1394	}
	1395	}
	1396	if( cur == first ) {
	1397	trie->minlen=chars;
	1398	trie->maxlen=chars;
	1399	} else if (chars < trie->minlen) {
	1400	trie->minlen=chars;
	1401	} else if (chars > trie->maxlen) {
	1402	trie->maxlen=chars;
	1403	}
	1404
	1405	} /* end first pass */
	1406	DEBUG_TRIE_COMPILE_r(
	1407	PerlIO_printf( Perl_debug_log, "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
	1408	(int)depth * 2 + 2,"",
	1409	( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
	1410	(int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
	1411	(int)trie->minlen, (int)trie->maxlen )
	1412	);
	1413	trie->wordlen = (U32 *) PerlMemShared_calloc( word_count, sizeof(U32) );
	1414
	1415	/*
	1416	We now know what we are dealing with in terms of unique chars and
	1417	string sizes so we can calculate how much memory a naive
	1418	representation using a flat table will take. If it's over a reasonable
	1419	limit (as specified by ${^RE_TRIE_MAXBUF}) we use a more memory
	1420	conservative but potentially much slower representation using an array
	1421	of lists.
	1422
	1423	At the end we convert both representations into the same compressed
	1424	form that will be used in regexec.c for matching with. The latter
	1425	is a form that cannot be used to construct with but has memory
	1426	properties similar to the list form and access properties similar
	1427	to the table form making it both suitable for fast searches and
	1428	small enough that its feasable to store for the duration of a program.
	1429
	1430	See the comment in the code where the compressed table is produced
	1431	inplace from the flat tabe representation for an explanation of how
	1432	the compression works.
	1433
	1434	*/
	1435
	1436
	1437	if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1) > SvIV(re_trie_maxbuff) ) {
	1438	/*
	1439	Second Pass -- Array Of Lists Representation
	1440
	1441	Each state will be represented by a list of charid:state records
	1442	(reg_trie_trans_le) the first such element holds the CUR and LEN
	1443	points of the allocated array. (See defines above).
	1444
	1445	We build the initial structure using the lists, and then convert
	1446	it into the compressed table form which allows faster lookups
	1447	(but cant be modified once converted).
	1448	*/
	1449
	1450	STRLEN transcount = 1;
	1451
	1452	DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
	1453	"%*sCompiling trie using list compiler\n",
	1454	(int)depth * 2 + 2, ""));
	1455
	1456	trie->states = (reg_trie_state *)
	1457	PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
	1458	sizeof(reg_trie_state) );
	1459	TRIE_LIST_NEW(1);
	1460	next_alloc = 2;
	1461
	1462	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	1463
	1464	regnode * const noper = NEXTOPER( cur );
	1465	U8 uc = (U8)STRING( noper );
	1466	const U8 * const e = uc + STR_LEN( noper );
	1467	U32 state = 1; /* required init */
	1468	U16 charid = 0; /* sanity init */
	1469	U8 scan = (U8)NULL; /* sanity init */
	1470	STRLEN foldlen = 0; /* required init */
	1471	U32 wordlen = 0; /* required init */
	1472	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1473
	1474	if (OP(noper) != NOTHING) {
	1475	for ( ; uc < e ; uc += len ) {
	1476
	1477	TRIE_READ_CHAR;
	1478
	1479	if ( uvc < 256 ) {
	1480	charid = trie->charmap[ uvc ];
	1481	} else {
	1482	SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
	1483	if ( !svpp ) {
	1484	charid = 0;
	1485	} else {
	1486	charid=(U16)SvIV( *svpp );
	1487	}
	1488	}
	1489	/* charid is now 0 if we dont know the char read, or nonzero if we do */
	1490	if ( charid ) {
	1491
	1492	U16 check;
	1493	U32 newstate = 0;
	1494
	1495	charid--;
	1496	if ( !trie->states[ state ].trans.list ) {
	1497	TRIE_LIST_NEW( state );
	1498	}
	1499	for ( check = 1; check <= TRIE_LIST_USED( state ); check++ ) {
	1500	if ( TRIE_LIST_ITEM( state, check ).forid == charid ) {
	1501	newstate = TRIE_LIST_ITEM( state, check ).newstate;
	1502	break;
	1503	}
	1504	}
	1505	if ( ! newstate ) {
	1506	newstate = next_alloc++;
	1507	TRIE_LIST_PUSH( state, charid, newstate );
	1508	transcount++;
	1509	}
	1510	state = newstate;
	1511	} else {
	1512	Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
	1513	}
	1514	}
	1515	}
	1516	TRIE_HANDLE_WORD(state);
	1517
	1518	} /* end second pass */
	1519
	1520	/* next alloc is the NEXT state to be allocated */
	1521	trie->statecount = next_alloc;
	1522	trie->states = (reg_trie_state *)
	1523	PerlMemShared_realloc( trie->states,
	1524	next_alloc
	1525	* sizeof(reg_trie_state) );
	1526
	1527	/* and now dump it out before we compress it */
	1528	DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
	1529	revcharmap, next_alloc,
	1530	depth+1)
	1531	);
	1532
	1533	trie->trans = (reg_trie_trans *)
	1534	PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
	1535	{
	1536	U32 state;
	1537	U32 tp = 0;
	1538	U32 zp = 0;
	1539
	1540
	1541	for( state=1 ; state < next_alloc ; state ++ ) {
	1542	U32 base=0;
	1543
	1544	/*
	1545	DEBUG_TRIE_COMPILE_MORE_r(
	1546	PerlIO_printf( Perl_debug_log, "tp: %d zp: %d ",tp,zp)
	1547	);
	1548	*/
	1549
	1550	if (trie->states[state].trans.list) {
	1551	U16 minid=TRIE_LIST_ITEM( state, 1).forid;
	1552	U16 maxid=minid;
	1553	U16 idx;
	1554
	1555	for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
	1556	const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
	1557	if ( forid < minid ) {
	1558	minid=forid;
	1559	} else if ( forid > maxid ) {
	1560	maxid=forid;
	1561	}
	1562	}
	1563	if ( transcount < tp + maxid - minid + 1) {
	1564	transcount *= 2;
	1565	trie->trans = (reg_trie_trans *)
	1566	PerlMemShared_realloc( trie->trans,
	1567	transcount
	1568	* sizeof(reg_trie_trans) );
	1569	Zero( trie->trans + (transcount / 2), transcount / 2 , reg_trie_trans );
	1570	}
	1571	base = trie->uniquecharcount + tp - minid;
	1572	if ( maxid == minid ) {
	1573	U32 set = 0;
	1574	for ( ; zp < tp ; zp++ ) {
	1575	if ( ! trie->trans[ zp ].next ) {
	1576	base = trie->uniquecharcount + zp - minid;
	1577	trie->trans[ zp ].next = TRIE_LIST_ITEM( state, 1).newstate;
	1578	trie->trans[ zp ].check = state;
	1579	set = 1;
	1580	break;
	1581	}
	1582	}
	1583	if ( !set ) {
	1584	trie->trans[ tp ].next = TRIE_LIST_ITEM( state, 1).newstate;
	1585	trie->trans[ tp ].check = state;
	1586	tp++;
	1587	zp = tp;
	1588	}
	1589	} else {
	1590	for ( idx=1; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
	1591	const U32 tid = base - trie->uniquecharcount + TRIE_LIST_ITEM( state, idx ).forid;
	1592	trie->trans[ tid ].next = TRIE_LIST_ITEM( state, idx ).newstate;
	1593	trie->trans[ tid ].check = state;
	1594	}
	1595	tp += ( maxid - minid + 1 );
	1596	}
	1597	Safefree(trie->states[ state ].trans.list);
	1598	}
	1599	/*
	1600	DEBUG_TRIE_COMPILE_MORE_r(
	1601	PerlIO_printf( Perl_debug_log, " base: %d\n",base);
	1602	);
	1603	*/
	1604	trie->states[ state ].trans.base=base;
	1605	}
	1606	trie->lasttrans = tp + 1;
	1607	}
	1608	} else {
	1609	/*
	1610	Second Pass -- Flat Table Representation.
	1611
	1612	we dont use the 0 slot of either trans[] or states[] so we add 1 to each.
	1613	We know that we will need Charcount+1 trans at most to store the data
	1614	(one row per char at worst case) So we preallocate both structures
	1615	assuming worst case.
	1616
	1617	We then construct the trie using only the .next slots of the entry
	1618	structs.
	1619
	1620	We use the .check field of the first entry of the node temporarily to
	1621	make compression both faster and easier by keeping track of how many non
	1622	zero fields are in the node.
	1623
	1624	Since trans are numbered from 1 any 0 pointer in the table is a FAIL
	1625	transition.
	1626
	1627	There are two terms at use here: state as a TRIE_NODEIDX() which is a
	1628	number representing the first entry of the node, and state as a
	1629	TRIE_NODENUM() which is the trans number. state 1 is TRIE_NODEIDX(1) and
	1630	TRIE_NODENUM(1), state 2 is TRIE_NODEIDX(2) and TRIE_NODENUM(3) if there
	1631	are 2 entrys per node. eg:
	1632
	1633	A B A B
	1634	1. 2 4 1. 3 7
	1635	2. 0 3 3. 0 5
	1636	3. 0 0 5. 0 0
	1637	4. 0 0 7. 0 0
	1638
	1639	The table is internally in the right hand, idx form. However as we also
	1640	have to deal with the states array which is indexed by nodenum we have to
	1641	use TRIE_NODENUM() to convert.
	1642
	1643	*/
	1644	DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
	1645	"%*sCompiling trie using table compiler\n",
	1646	(int)depth * 2 + 2, ""));
	1647
	1648	trie->trans = (reg_trie_trans *)
	1649	PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
	1650	* trie->uniquecharcount + 1,
	1651	sizeof(reg_trie_trans) );
	1652	trie->states = (reg_trie_state *)
	1653	PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
	1654	sizeof(reg_trie_state) );
	1655	next_alloc = trie->uniquecharcount + 1;
	1656
	1657
	1658	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	1659
	1660	regnode * const noper = NEXTOPER( cur );
	1661	const U8 uc = (U8)STRING( noper );
	1662	const U8 * const e = uc + STR_LEN( noper );
	1663
	1664	U32 state = 1; /* required init */
	1665
	1666	U16 charid = 0; /* sanity init */
	1667	U32 accept_state = 0; /* sanity init */
	1668	U8 scan = (U8)NULL; /* sanity init */
	1669
	1670	STRLEN foldlen = 0; /* required init */
	1671	U32 wordlen = 0; /* required init */
	1672	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1673
	1674	if ( OP(noper) != NOTHING ) {
	1675	for ( ; uc < e ; uc += len ) {
	1676
	1677	TRIE_READ_CHAR;
	1678
	1679	if ( uvc < 256 ) {
	1680	charid = trie->charmap[ uvc ];
	1681	} else {
	1682	SV* const * const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
	1683	charid = svpp ? (U16)SvIV(*svpp) : 0;
	1684	}
	1685	if ( charid ) {
	1686	charid--;
	1687	if ( !trie->trans[ state + charid ].next ) {
	1688	trie->trans[ state + charid ].next = next_alloc;
	1689	trie->trans[ state ].check++;
	1690	next_alloc += trie->uniquecharcount;
	1691	}
	1692	state = trie->trans[ state + charid ].next;
	1693	} else {
	1694	Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
	1695	}
	1696	/* charid is now 0 if we dont know the char read, or nonzero if we do */
	1697	}
	1698	}
	1699	accept_state = TRIE_NODENUM( state );
	1700	TRIE_HANDLE_WORD(accept_state);
	1701
	1702	} /* end second pass */
	1703
	1704	/* and now dump it out before we compress it */
	1705	DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
	1706	revcharmap,
	1707	next_alloc, depth+1));
	1708
	1709	{
	1710	/*
	1711	* Inplace compress the table.*
	1712
	1713	For sparse data sets the table constructed by the trie algorithm will
	1714	be mostly 0/FAIL transitions or to put it another way mostly empty.
	1715	(Note that leaf nodes will not contain any transitions.)
	1716
	1717	This algorithm compresses the tables by eliminating most such
	1718	transitions, at the cost of a modest bit of extra work during lookup:
	1719
	1720	- Each states[] entry contains a .base field which indicates the
	1721	index in the state[] array wheres its transition data is stored.
	1722
	1723	- If .base is 0 there are no valid transitions from that node.
	1724
	1725	- If .base is nonzero then charid is added to it to find an entry in
	1726	the trans array.
	1727
	1728	-If trans[states[state].base+charid].check!=state then the
	1729	transition is taken to be a 0/Fail transition. Thus if there are fail
	1730	transitions at the front of the node then the .base offset will point
	1731	somewhere inside the previous nodes data (or maybe even into a node
	1732	even earlier), but the .check field determines if the transition is
	1733	valid.
	1734
	1735	XXX - wrong maybe?
	1736	The following process inplace converts the table to the compressed
	1737	table: We first do not compress the root node 1,and mark its all its
	1738	.check pointers as 1 and set its .base pointer as 1 as well. This
	1739	allows to do a DFA construction from the compressed table later, and
	1740	ensures that any .base pointers we calculate later are greater than
	1741	0.
	1742
	1743	- We set 'pos' to indicate the first entry of the second node.
	1744
	1745	- We then iterate over the columns of the node, finding the first and
	1746	last used entry at l and m. We then copy l..m into pos..(pos+m-l),
	1747	and set the .check pointers accordingly, and advance pos
	1748	appropriately and repreat for the next node. Note that when we copy
	1749	the next pointers we have to convert them from the original
	1750	NODEIDX form to NODENUM form as the former is not valid post
	1751	compression.
	1752
	1753	- If a node has no transitions used we mark its base as 0 and do not
	1754	advance the pos pointer.
	1755
	1756	- If a node only has one transition we use a second pointer into the
	1757	structure to fill in allocated fail transitions from other states.
	1758	This pointer is independent of the main pointer and scans forward
	1759	looking for null transitions that are allocated to a state. When it
	1760	finds one it writes the single transition into the "hole". If the
	1761	pointer doesnt find one the single transition is appended as normal.
	1762
	1763	- Once compressed we can Renew/realloc the structures to release the
	1764	excess space.
	1765
	1766	See "Table-Compression Methods" in sec 3.9 of the Red Dragon,
	1767	specifically Fig 3.47 and the associated pseudocode.
	1768
	1769	demq
	1770	*/
	1771	const U32 laststate = TRIE_NODENUM( next_alloc );
	1772	U32 state, charid;
	1773	U32 pos = 0, zp=0;
	1774	trie->statecount = laststate;
	1775
	1776	for ( state = 1 ; state < laststate ; state++ ) {
	1777	U8 flag = 0;
	1778	const U32 stateidx = TRIE_NODEIDX( state );
	1779	const U32 o_used = trie->trans[ stateidx ].check;
	1780	U32 used = trie->trans[ stateidx ].check;
	1781	trie->trans[ stateidx ].check = 0;
	1782
	1783	for ( charid = 0 ; used && charid < trie->uniquecharcount ; charid++ ) {
	1784	if ( flag \|\| trie->trans[ stateidx + charid ].next ) {
	1785	if ( trie->trans[ stateidx + charid ].next ) {
	1786	if (o_used == 1) {
	1787	for ( ; zp < pos ; zp++ ) {
	1788	if ( ! trie->trans[ zp ].next ) {
	1789	break;
	1790	}
	1791	}
	1792	trie->states[ state ].trans.base = zp + trie->uniquecharcount - charid ;
	1793	trie->trans[ zp ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
	1794	trie->trans[ zp ].check = state;
	1795	if ( ++zp > pos ) pos = zp;
	1796	break;
	1797	}
	1798	used--;
	1799	}
	1800	if ( !flag ) {
	1801	flag = 1;
	1802	trie->states[ state ].trans.base = pos + trie->uniquecharcount - charid ;
	1803	}
	1804	trie->trans[ pos ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
	1805	trie->trans[ pos ].check = state;
	1806	pos++;
	1807	}
	1808	}
	1809	}
	1810	trie->lasttrans = pos + 1;
	1811	trie->states = (reg_trie_state *)
	1812	PerlMemShared_realloc( trie->states, laststate
	1813	* sizeof(reg_trie_state) );
	1814	DEBUG_TRIE_COMPILE_MORE_r(
	1815	PerlIO_printf( Perl_debug_log,
	1816	"%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
	1817	(int)depth * 2 + 2,"",
	1818	(int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1 ),
	1819	(IV)next_alloc,
	1820	(IV)pos,
	1821	( ( next_alloc - pos ) * 100 ) / (double)next_alloc );
	1822	);
	1823
	1824	} /* end table compress */
	1825	}
	1826	DEBUG_TRIE_COMPILE_MORE_r(
	1827	PerlIO_printf(Perl_debug_log, "%*sStatecount:%"UVxf" Lasttrans:%"UVxf"\n",
	1828	(int)depth * 2 + 2, "",
	1829	(UV)trie->statecount,
	1830	(UV)trie->lasttrans)
	1831	);
	1832	/* resize the trans array to remove unused space */
	1833	trie->trans = (reg_trie_trans *)
	1834	PerlMemShared_realloc( trie->trans, trie->lasttrans
	1835	* sizeof(reg_trie_trans) );
	1836
	1837	/* and now dump out the compressed format */
	1838	DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
	1839
	1840	{ /* Modify the program and insert the new TRIE node*/
	1841	U8 nodetype =(U8)(flags & 0xFF);
	1842	char *str=NULL;
	1843
	1844	#ifdef DEBUGGING
	1845	regnode *optimize = NULL;
	1846	U32 mjd_offset = 0;
	1847	U32 mjd_nodelen = 0;
	1848	#endif
	1849	/*
	1850	This means we convert either the first branch or the first Exact,
	1851	depending on whether the thing following (in 'last') is a branch
	1852	or not and whther first is the startbranch (ie is it a sub part of
	1853	the alternation or is it the whole thing.)
	1854	Assuming its a sub part we conver the EXACT otherwise we convert
	1855	the whole branch sequence, including the first.
	1856	*/
	1857	/* Find the node we are going to overwrite */
	1858	if ( first != startbranch \|\| OP( last ) == BRANCH ) {
	1859	/* branch sub-chain */
	1860	NEXT_OFF( first ) = (U16)(last - first);
	1861	DEBUG_r({
	1862	mjd_offset= Node_Offset((convert));
	1863	mjd_nodelen= Node_Length((convert));
	1864	});
	1865	/* whole branch chain */
	1866	} else {
	1867	DEBUG_r({
	1868	const regnode *nop = NEXTOPER( convert );
	1869	mjd_offset= Node_Offset((nop));
	1870	mjd_nodelen= Node_Length((nop));
	1871	});
	1872	}
	1873
	1874	DEBUG_OPTIMISE_r(
	1875	PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
	1876	(int)depth * 2 + 2, "",
	1877	(UV)mjd_offset, (UV)mjd_nodelen)
	1878	);
	1879
	1880	/* But first we check to see if there is a common prefix we can
	1881	split out as an EXACT and put in front of the TRIE node. */
	1882	trie->startstate= 1;
	1883	if ( trie->bitmap && !widecharmap && !trie->jump ) {
	1884	U32 state;
	1885	for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
	1886	U32 ofs = 0;
	1887	I32 idx = -1;
	1888	U32 count = 0;
	1889	const U32 base = trie->states[ state ].trans.base;
	1890
	1891	if ( trie->states[state].wordnum )
	1892	count = 1;
	1893
	1894	for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
	1895	if ( ( base + ofs >= trie->uniquecharcount ) &&
	1896	( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
	1897	trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
	1898	{
	1899	if ( ++count > 1 ) {
	1900	SV **tmp = av_fetch( revcharmap, ofs, 0);
	1901	const U8 ch = (U8)SvPV_nolen_const( *tmp );
	1902	if ( state == 1 ) break;
	1903	if ( count == 2 ) {
	1904	Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
	1905	DEBUG_OPTIMISE_r(
	1906	PerlIO_printf(Perl_debug_log,
	1907	"%*sNew Start State=%"UVuf" Class: [",
	1908	(int)depth * 2 + 2, "",
	1909	(UV)state));
	1910	if (idx >= 0) {
	1911	SV ** const tmp = av_fetch( revcharmap, idx, 0);
	1912	const U8 * const ch = (U8)SvPV_nolen_const( tmp );
	1913
	1914	TRIE_BITMAP_SET(trie,*ch);
	1915	if ( folder )
	1916	TRIE_BITMAP_SET(trie, folder[ *ch ]);
	1917	DEBUG_OPTIMISE_r(
	1918	PerlIO_printf(Perl_debug_log, (char*)ch)
	1919	);
	1920	}
	1921	}
	1922	TRIE_BITMAP_SET(trie,*ch);
	1923	if ( folder )
	1924	TRIE_BITMAP_SET(trie,folder[ *ch ]);
	1925	DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"%s", ch));
	1926	}
	1927	idx = ofs;
	1928	}
	1929	}
	1930	if ( count == 1 ) {
	1931	SV **tmp = av_fetch( revcharmap, idx, 0);
	1932	char ch = SvPV_nolen( tmp );
	1933	DEBUG_OPTIMISE_r({
	1934	SV *sv=sv_newmortal();
	1935	PerlIO_printf( Perl_debug_log,
	1936	"%*sPrefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n",
	1937	(int)depth * 2 + 2, "",
	1938	(UV)state, (UV)idx,
	1939	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 6,
	1940	PL_colors[0], PL_colors[1],
	1941	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	1942	PERL_PV_ESCAPE_FIRSTCHAR
	1943	)
	1944	);
	1945	});
	1946	if ( state==1 ) {
	1947	OP( convert ) = nodetype;
	1948	str=STRING(convert);
	1949	STR_LEN(convert)=0;
	1950	}
	1951	while (*ch) {
	1952	str++ = ch++;
	1953	STR_LEN(convert)++;
	1954	}
	1955
	1956	} else {
	1957	#ifdef DEBUGGING
	1958	if (state>1)
	1959	DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"]\n"));
	1960	#endif
	1961	break;
	1962	}
	1963	}
	1964	if (str) {
	1965	regnode *n = convert+NODE_SZ_STR(convert);
	1966	NEXT_OFF(convert) = NODE_SZ_STR(convert);
	1967	trie->startstate = state;
	1968	trie->minlen -= (state - 1);
	1969	trie->maxlen -= (state - 1);
	1970	DEBUG_r({
	1971	regnode *fix = convert;
	1972	U32 word = trie->wordcount;
	1973	mjd_nodelen++;
	1974	Set_Node_Offset_Length(convert, mjd_offset, state - 1);
	1975	while( ++fix < n ) {
	1976	Set_Node_Offset_Length(fix, 0, 0);
	1977	}
	1978	while (word--) {
	1979	SV ** const tmp = av_fetch( trie_words, word, 0 );
	1980	if (tmp) {
	1981	if ( STR_LEN(convert) <= SvCUR(*tmp) )
	1982	sv_chop(tmp, SvPV_nolen(tmp) + STR_LEN(convert));
	1983	else
	1984	sv_chop(tmp, SvPV_nolen(tmp) + SvCUR(*tmp));
	1985	}
	1986	}
	1987	});
	1988	if (trie->maxlen) {
	1989	convert = n;
	1990	} else {
	1991	NEXT_OFF(convert) = (U16)(tail - convert);
	1992	DEBUG_r(optimize= n);
	1993	}
	1994	}
	1995	}
	1996	if (!jumper)
	1997	jumper = last;
	1998	if ( trie->maxlen ) {
	1999	NEXT_OFF( convert ) = (U16)(tail - convert);
	2000	ARG_SET( convert, data_slot );
	2001	/* Store the offset to the first unabsorbed branch in
	2002	jump[0], which is otherwise unused by the jump logic.
	2003	We use this when dumping a trie and during optimisation. */
	2004	if (trie->jump)
	2005	trie->jump[0] = (U16)(nextbranch - convert);
	2006
	2007	/* XXXX */
	2008	if ( !trie->states[trie->startstate].wordnum && trie->bitmap &&
	2009	( (char )jumper - (char )convert) >= (int)sizeof(struct regnode_charclass) )
	2010	{
	2011	OP( convert ) = TRIEC;
	2012	Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char);
	2013	PerlMemShared_free(trie->bitmap);
	2014	trie->bitmap= NULL;
	2015	} else
	2016	OP( convert ) = TRIE;
	2017
	2018	/* store the type in the flags */
	2019	convert->flags = nodetype;
	2020	DEBUG_r({
	2021	optimize = convert
	2022	+ NODE_STEP_REGNODE
	2023	+ regarglen[ OP( convert ) ];
	2024	});
	2025	/* XXX We really should free up the resource in trie now,
	2026	as we won't use them - (which resources?) dmq */
	2027	}
	2028	/* needed for dumping*/
	2029	DEBUG_r(if (optimize) {
	2030	regnode *opt = convert;
	2031	while ( ++opt < optimize) {
	2032	Set_Node_Offset_Length(opt,0,0);
	2033	}
	2034	/*
	2035	Try to clean up some of the debris left after the
	2036	optimisation.
	2037	*/
	2038	while( optimize < jumper ) {
	2039	mjd_nodelen += Node_Length((optimize));
	2040	OP( optimize ) = OPTIMIZED;
	2041	Set_Node_Offset_Length(optimize,0,0);
	2042	optimize++;
	2043	}
	2044	Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
	2045	});
	2046	} /* end node insert */
	2047	RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
	2048	#ifdef DEBUGGING
	2049	RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
	2050	RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
	2051	#else
	2052	SvREFCNT_dec(revcharmap);
	2053	#endif
	2054	return trie->jump
	2055	? MADE_JUMP_TRIE
	2056	: trie->startstate>1
	2057	? MADE_EXACT_TRIE
	2058	: MADE_TRIE;
	2059	}
	2060
	2061	STATIC void
	2062	S_make_trie_failtable(pTHX_ RExC_state_t pRExC_state, regnode source, regnode *stclass, U32 depth)
	2063	{
	2064	/* The Trie is constructed and compressed now so we can build a fail array now if its needed
	2065
	2066	This is basically the Aho-Corasick algorithm. Its from exercise 3.31 and 3.32 in the
	2067	"Red Dragon" -- Compilers, principles, techniques, and tools. Aho, Sethi, Ullman 1985/88
	2068	ISBN 0-201-10088-6
	2069
	2070	We find the fail state for each state in the trie, this state is the longest proper
	2071	suffix of the current states 'word' that is also a proper prefix of another word in our
	2072	trie. State 1 represents the word '' and is the thus the default fail state. This allows
	2073	the DFA not to have to restart after its tried and failed a word at a given point, it
	2074	simply continues as though it had been matching the other word in the first place.
	2075	Consider
	2076	'abcdgu'=~/abcdefg\|cdgu/
	2077	When we get to 'd' we are still matching the first word, we would encounter 'g' which would
	2078	fail, which would bring use to the state representing 'd' in the second word where we would
	2079	try 'g' and succeed, prodceding to match 'cdgu'.
	2080	*/
	2081	/* add a fail transition */
	2082	const U32 trie_offset = ARG(source);
	2083	reg_trie_data trie=(reg_trie_data )RExC_rxi->data->data[trie_offset];
	2084	U32 *q;
	2085	const U32 ucharcount = trie->uniquecharcount;
	2086	const U32 numstates = trie->statecount;
	2087	const U32 ubound = trie->lasttrans + ucharcount;
	2088	U32 q_read = 0;
	2089	U32 q_write = 0;
	2090	U32 charid;
	2091	U32 base = trie->states[ 1 ].trans.base;
	2092	U32 *fail;
	2093	reg_ac_data *aho;
	2094	const U32 data_slot = add_data( pRExC_state, 1, "T" );
	2095	GET_RE_DEBUG_FLAGS_DECL;
	2096	#ifndef DEBUGGING
	2097	PERL_UNUSED_ARG(depth);
	2098	#endif
	2099
	2100
	2101	ARG_SET( stclass, data_slot );
	2102	aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
	2103	RExC_rxi->data->data[ data_slot ] = (void*)aho;
	2104	aho->trie=trie_offset;
	2105	aho->states=(reg_trie_state )PerlMemShared_malloc( numstates sizeof(reg_trie_state) );
	2106	Copy( trie->states, aho->states, numstates, reg_trie_state );
	2107	Newxz( q, numstates, U32);
	2108	aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
	2109	aho->refcount = 1;
	2110	fail = aho->fail;
	2111	/* initialize fail[0..1] to be 1 so that we always have
	2112	a valid final fail state */
	2113	fail[ 0 ] = fail[ 1 ] = 1;
	2114
	2115	for ( charid = 0; charid < ucharcount ; charid++ ) {
	2116	const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 );
	2117	if ( newstate ) {
	2118	q[ q_write ] = newstate;
	2119	/* set to point at the root */
	2120	fail[ q[ q_write++ ] ]=1;
	2121	}
	2122	}
	2123	while ( q_read < q_write) {
	2124	const U32 cur = q[ q_read++ % numstates ];
	2125	base = trie->states[ cur ].trans.base;
	2126
	2127	for ( charid = 0 ; charid < ucharcount ; charid++ ) {
	2128	const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 );
	2129	if (ch_state) {
	2130	U32 fail_state = cur;
	2131	U32 fail_base;
	2132	do {
	2133	fail_state = fail[ fail_state ];
	2134	fail_base = aho->states[ fail_state ].trans.base;
	2135	} while ( !TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 ) );
	2136
	2137	fail_state = TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 );
	2138	fail[ ch_state ] = fail_state;
	2139	if ( !aho->states[ ch_state ].wordnum && aho->states[ fail_state ].wordnum )
	2140	{
	2141	aho->states[ ch_state ].wordnum = aho->states[ fail_state ].wordnum;
	2142	}
	2143	q[ q_write++ % numstates] = ch_state;
	2144	}
	2145	}
	2146	}
	2147	/* restore fail[0..1] to 0 so that we "fall out" of the AC loop
	2148	when we fail in state 1, this allows us to use the
	2149	charclass scan to find a valid start char. This is based on the principle
	2150	that theres a good chance the string being searched contains lots of stuff
	2151	that cant be a start char.
	2152	*/
	2153	fail[ 0 ] = fail[ 1 ] = 0;
	2154	DEBUG_TRIE_COMPILE_r({
	2155	PerlIO_printf(Perl_debug_log,
	2156	"%*sStclass Failtable (%"UVuf" states): 0",
	2157	(int)(depth * 2), "", (UV)numstates
	2158	);
	2159	for( q_read=1; q_read<numstates; q_read++ ) {
	2160	PerlIO_printf(Perl_debug_log, ", %"UVuf, (UV)fail[q_read]);
	2161	}
	2162	PerlIO_printf(Perl_debug_log, "\n");
	2163	});
	2164	Safefree(q);
	2165	/RExC_seen \|= REG_SEEN_TRIEDFA;/
	2166	}
	2167
	2168
	2169	/*
	2170	* There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
	2171	* These need to be revisited when a newer toolchain becomes available.
	2172	*/
	2173	#if defined(__sparc64__) && defined(__GNUC__)
	2174	# if __GNUC__ < 2 \|\| (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
	2175	# undef SPARC64_GCC_WORKAROUND
	2176	# define SPARC64_GCC_WORKAROUND 1
	2177	# endif
	2178	#endif
	2179
	2180	#define DEBUG_PEEP(str,scan,depth) \
	2181	DEBUG_OPTIMISE_r({if (scan){ \
	2182	SV * const mysv=sv_newmortal(); \
	2183	regnode *Next = regnext(scan); \
	2184	regprop(RExC_rx, mysv, scan); \
	2185	PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
	2186	(int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
	2187	Next ? (REG_NODE_NUM(Next)) : 0 ); \
	2188	}});
	2189
	2190
	2191
	2192
	2193
	2194	#define JOIN_EXACT(scan,min,flags) \
	2195	if (PL_regkind[OP(scan)] == EXACT) \
	2196	join_exact(pRExC_state,(scan),(min),(flags),NULL,depth+1)
	2197
	2198	STATIC U32
	2199	S_join_exact(pTHX_ RExC_state_t pRExC_state, regnode scan, I32 min, U32 flags,regnode val, U32 depth) {
	2200	/* Merge several consecutive EXACTish nodes into one. */
	2201	regnode *n = regnext(scan);
	2202	U32 stringok = 1;
	2203	regnode *next = scan + NODE_SZ_STR(scan);
	2204	U32 merged = 0;
	2205	U32 stopnow = 0;
	2206	#ifdef DEBUGGING
	2207	regnode *stop = scan;
	2208	GET_RE_DEBUG_FLAGS_DECL;
	2209	#else
	2210	PERL_UNUSED_ARG(depth);
	2211	#endif
	2212	#ifndef EXPERIMENTAL_INPLACESCAN
	2213	PERL_UNUSED_ARG(flags);
	2214	PERL_UNUSED_ARG(val);
	2215	#endif
	2216	DEBUG_PEEP("join",scan,depth);
	2217
	2218	/* Skip NOTHING, merge EXACT. /
	2219	while (n &&
	2220	( PL_regkind[OP(n)] == NOTHING \|\|
	2221	(stringok && (OP(n) == OP(scan))))
	2222	&& NEXT_OFF(n)
	2223	&& NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
	2224
	2225	if (OP(n) == TAIL \|\| n > next)
	2226	stringok = 0;
	2227	if (PL_regkind[OP(n)] == NOTHING) {
	2228	DEBUG_PEEP("skip:",n,depth);
	2229	NEXT_OFF(scan) += NEXT_OFF(n);
	2230	next = n + NODE_STEP_REGNODE;
	2231	#ifdef DEBUGGING
	2232	if (stringok)
	2233	stop = n;
	2234	#endif
	2235	n = regnext(n);
	2236	}
	2237	else if (stringok) {
	2238	const unsigned int oldl = STR_LEN(scan);
	2239	regnode * const nnext = regnext(n);
	2240
	2241	DEBUG_PEEP("merg",n,depth);
	2242
	2243	merged++;
	2244	if (oldl + STR_LEN(n) > U8_MAX)
	2245	break;
	2246	NEXT_OFF(scan) += NEXT_OFF(n);
	2247	STR_LEN(scan) += STR_LEN(n);
	2248	next = n + NODE_SZ_STR(n);
	2249	/* Now we can overwrite n : /
	2250	Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
	2251	#ifdef DEBUGGING
	2252	stop = next - 1;
	2253	#endif
	2254	n = nnext;
	2255	if (stopnow) break;
	2256	}
	2257
	2258	#ifdef EXPERIMENTAL_INPLACESCAN
	2259	if (flags && !NEXT_OFF(n)) {
	2260	DEBUG_PEEP("atch", val, depth);
	2261	if (reg_off_by_arg[OP(n)]) {
	2262	ARG_SET(n, val - n);
	2263	}
	2264	else {
	2265	NEXT_OFF(n) = val - n;
	2266	}
	2267	stopnow = 1;
	2268	}
	2269	#endif
	2270	}
	2271
	2272	if (UTF && ( OP(scan) == EXACTF ) && ( STR_LEN(scan) >= 6 ) ) {
	2273	/*
	2274	Two problematic code points in Unicode casefolding of EXACT nodes:
	2275
	2276	U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
	2277	U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
	2278
	2279	which casefold to
	2280
	2281	Unicode UTF-8
	2282
	2283	U+03B9 U+0308 U+0301 0xCE 0xB9 0xCC 0x88 0xCC 0x81
	2284	U+03C5 U+0308 U+0301 0xCF 0x85 0xCC 0x88 0xCC 0x81
	2285
	2286	This means that in case-insensitive matching (or "loose matching",
	2287	as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
	2288	length of the above casefolded versions) can match a target string
	2289	of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
	2290	This would rather mess up the minimum length computation.
	2291
	2292	What we'll do is to look for the tail four bytes, and then peek
	2293	at the preceding two bytes to see whether we need to decrease
	2294	the minimum length by four (six minus two).
	2295
	2296	Thanks to the design of UTF-8, there cannot be false matches:
	2297	A sequence of valid UTF-8 bytes cannot be a subsequence of
	2298	another valid sequence of UTF-8 bytes.
	2299
	2300	*/
	2301	char * const s0 = STRING(scan), s, t;
	2302	char * const s1 = s0 + STR_LEN(scan) - 1;
	2303	char * const s2 = s1 - 4;
	2304	#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */
	2305	const char t0[] = "\xaf\x49\xaf\x42";
	2306	#else
	2307	const char t0[] = "\xcc\x88\xcc\x81";
	2308	#endif
	2309	const char * const t1 = t0 + 3;
	2310
	2311	for (s = s0 + 2;
	2312	s < s2 && (t = ninstr(s, s1, t0, t1));
	2313	s = t + 4) {
	2314	#ifdef EBCDIC
	2315	if (((U8)t[-1] == 0x68 && (U8)t[-2] == 0xB4) \|\|
	2316	((U8)t[-1] == 0x46 && (U8)t[-2] == 0xB5))
	2317	#else
	2318	if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) \|\|
	2319	((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
	2320	#endif
	2321	*min -= 4;
	2322	}
	2323	}
	2324
	2325	#ifdef DEBUGGING
	2326	/* Allow dumping */
	2327	n = scan + NODE_SZ_STR(scan);
	2328	while (n <= stop) {
	2329	if (PL_regkind[OP(n)] != NOTHING \|\| OP(n) == NOTHING) {
	2330	OP(n) = OPTIMIZED;
	2331	NEXT_OFF(n) = 0;
	2332	}
	2333	n++;
	2334	}
	2335	#endif
	2336	DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl",scan,depth)});
	2337	return stopnow;
	2338	}
	2339
	2340	/* REx optimizer. Converts nodes into quickier variants "in place".
	2341	Finds fixed substrings. */
	2342
	2343	/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
	2344	to the position after last scanned or to NULL. */
	2345
	2346	#define INIT_AND_WITHP \
	2347	assert(!and_withp); \
	2348	Newx(and_withp,1,struct regnode_charclass_class); \
	2349	SAVEFREEPV(and_withp)
	2350
	2351	/* this is a chain of data about sub patterns we are processing that
	2352	need to be handled seperately/specially in study_chunk. Its so
	2353	we can simulate recursion without losing state. */
	2354	struct scan_frame;
	2355	typedef struct scan_frame {
	2356	regnode last; / last node to process in this frame */
	2357	regnode next; / next node to process when last is reached */
	2358	struct scan_frame prev; /previous frame*/
	2359	I32 stop; /* what stopparen do we use */
	2360	} scan_frame;
	2361
	2362
	2363	#define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
	2364
	2365	STATIC I32
	2366	S_study_chunk(pTHX_ RExC_state_t pRExC_state, regnode *scanp,
	2367	I32 minlenp, I32 deltap,
	2368	regnode *last,
	2369	scan_data_t *data,
	2370	I32 stopparen,
	2371	U8* recursed,
	2372	struct regnode_charclass_class *and_withp,
	2373	U32 flags, U32 depth)
	2374	/* scanp: Start here (read-write). */
	2375	/* deltap: Write maxlen-minlen here. */
	2376	/* last: Stop before this one. */
	2377	/* data: string data about the pattern */
	2378	/* stopparen: treat close N as END */
	2379	/* recursed: which subroutines have we recursed into */
	2380	/* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
	2381	{
	2382	dVAR;
	2383	I32 min = 0, pars = 0, code;
	2384	regnode scan = scanp, *next;
	2385	I32 delta = 0;
	2386	int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
	2387	int is_inf_internal = 0; /* The studied chunk is infinite */
	2388	I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
	2389	scan_data_t data_fake;
	2390	SV *re_trie_maxbuff = NULL;
	2391	regnode *first_non_open = scan;
	2392	I32 stopmin = I32_MAX;
	2393	scan_frame *frame = NULL;
	2394
	2395	GET_RE_DEBUG_FLAGS_DECL;
	2396
	2397	#ifdef DEBUGGING
	2398	StructCopy(&zero_scan_data, &data_fake, scan_data_t);
	2399	#endif
	2400
	2401	if ( depth == 0 ) {
	2402	while (first_non_open && OP(first_non_open) == OPEN)
	2403	first_non_open=regnext(first_non_open);
	2404	}
	2405
	2406
	2407	fake_study_recurse:
	2408	while ( scan && OP(scan) != END && scan < last ){
	2409	/* Peephole optimizer: */
	2410	DEBUG_STUDYDATA("Peep:", data,depth);
	2411	DEBUG_PEEP("Peep",scan,depth);
	2412	JOIN_EXACT(scan,&min,0);
	2413
	2414	/* Follow the next-chain of the current node and optimize
	2415	away all the NOTHINGs from it. */
	2416	if (OP(scan) != CURLYX) {
	2417	const int max = (reg_off_by_arg[OP(scan)]
	2418	? I32_MAX
	2419	/* I32 may be smaller than U16 on CRAYs! */
	2420	: (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
	2421	int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
	2422	int noff;
	2423	regnode *n = scan;
	2424
	2425	/* Skip NOTHING and LONGJMP. */
	2426	while ((n = regnext(n))
	2427	&& ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
	2428	\|\| ((OP(n) == LONGJMP) && (noff = ARG(n))))
	2429	&& off + noff < max)
	2430	off += noff;
	2431	if (reg_off_by_arg[OP(scan)])
	2432	ARG(scan) = off;
	2433	else
	2434	NEXT_OFF(scan) = off;
	2435	}
	2436
	2437
	2438
	2439	/* The principal pseudo-switch. Cannot be a switch, since we
	2440	look into several different things. */
	2441	if (OP(scan) == BRANCH \|\| OP(scan) == BRANCHJ
	2442	\|\| OP(scan) == IFTHEN) {
	2443	next = regnext(scan);
	2444	code = OP(scan);
	2445	/* demq: the op(next)==code check is to see if we have "branch-branch" AFAICT */
	2446
	2447	if (OP(next) == code \|\| code == IFTHEN) {
	2448	/* NOTE - There is similar code to this block below for handling
	2449	TRIE nodes on a re-study. If you change stuff here check there
	2450	too. */
	2451	I32 max1 = 0, min1 = I32_MAX, num = 0;
	2452	struct regnode_charclass_class accum;
	2453	regnode * const startbranch=scan;
	2454
	2455	if (flags & SCF_DO_SUBSTR)
	2456	SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
	2457	if (flags & SCF_DO_STCLASS)
	2458	cl_init_zero(pRExC_state, &accum);
	2459
	2460	while (OP(scan) == code) {
	2461	I32 deltanext, minnext, f = 0, fake;
	2462	struct regnode_charclass_class this_class;
	2463
	2464	num++;
	2465	data_fake.flags = 0;
	2466	if (data) {
	2467	data_fake.whilem_c = data->whilem_c;
	2468	data_fake.last_closep = data->last_closep;
	2469	}
	2470	else
	2471	data_fake.last_closep = &fake;
	2472
	2473	data_fake.pos_delta = delta;
	2474	next = regnext(scan);
	2475	scan = NEXTOPER(scan);
	2476	if (code != BRANCH)
	2477	scan = NEXTOPER(scan);
	2478	if (flags & SCF_DO_STCLASS) {
	2479	cl_init(pRExC_state, &this_class);
	2480	data_fake.start_class = &this_class;
	2481	f = SCF_DO_STCLASS_AND;
	2482	}
	2483	if (flags & SCF_WHILEM_VISITED_POS)
	2484	f \|= SCF_WHILEM_VISITED_POS;
	2485
	2486	/* we suppose the run is continuous, last=next...*/
	2487	minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
	2488	next, &data_fake,
	2489	stopparen, recursed, NULL, f,depth+1);
	2490	if (min1 > minnext)
	2491	min1 = minnext;
	2492	if (max1 < minnext + deltanext)
	2493	max1 = minnext + deltanext;
	2494	if (deltanext == I32_MAX)
	2495	is_inf = is_inf_internal = 1;
	2496	scan = next;
	2497	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	2498	pars++;
	2499	if (data_fake.flags & SCF_SEEN_ACCEPT) {
	2500	if ( stopmin > minnext)
	2501	stopmin = min + min1;
	2502	flags &= ~SCF_DO_SUBSTR;
	2503	if (data)
	2504	data->flags \|= SCF_SEEN_ACCEPT;
	2505	}
	2506	if (data) {
	2507	if (data_fake.flags & SF_HAS_EVAL)
	2508	data->flags \|= SF_HAS_EVAL;
	2509	data->whilem_c = data_fake.whilem_c;
	2510	}
	2511	if (flags & SCF_DO_STCLASS)
	2512	cl_or(pRExC_state, &accum, &this_class);
	2513	}
	2514	if (code == IFTHEN && num < 2) /* Empty ELSE branch */
	2515	min1 = 0;
	2516	if (flags & SCF_DO_SUBSTR) {
	2517	data->pos_min += min1;
	2518	data->pos_delta += max1 - min1;
	2519	if (max1 != min1 \|\| is_inf)
	2520	data->longest = &(data->longest_float);
	2521	}
	2522	min += min1;
	2523	delta += max1 - min1;
	2524	if (flags & SCF_DO_STCLASS_OR) {
	2525	cl_or(pRExC_state, data->start_class, &accum);
	2526	if (min1) {
	2527	cl_and(data->start_class, and_withp);
	2528	flags &= ~SCF_DO_STCLASS;
	2529	}
	2530	}
	2531	else if (flags & SCF_DO_STCLASS_AND) {
	2532	if (min1) {
	2533	cl_and(data->start_class, &accum);
	2534	flags &= ~SCF_DO_STCLASS;
	2535	}
	2536	else {
	2537	/* Switch to OR mode: cache the old value of
	2538	* data->start_class */
	2539	INIT_AND_WITHP;
	2540	StructCopy(data->start_class, and_withp,
	2541	struct regnode_charclass_class);
	2542	flags &= ~SCF_DO_STCLASS_AND;
	2543	StructCopy(&accum, data->start_class,
	2544	struct regnode_charclass_class);
	2545	flags \|= SCF_DO_STCLASS_OR;
	2546	data->start_class->flags \|= ANYOF_EOS;
	2547	}
	2548	}
	2549
	2550	if (PERL_ENABLE_TRIE_OPTIMISATION && OP( startbranch ) == BRANCH ) {
	2551	/* demq.
	2552
	2553	Assuming this was/is a branch we are dealing with: 'scan' now
	2554	points at the item that follows the branch sequence, whatever
	2555	it is. We now start at the beginning of the sequence and look
	2556	for subsequences of
	2557
	2558	BRANCH->EXACT=>x1
	2559	BRANCH->EXACT=>x2
	2560	tail
	2561
	2562	which would be constructed from a pattern like /A\|LIST\|OF\|WORDS/
	2563
	2564	If we can find such a subseqence we need to turn the first
	2565	element into a trie and then add the subsequent branch exact
	2566	strings to the trie.
	2567
	2568	We have two cases
	2569
	2570	1. patterns where the whole set of branch can be converted.
	2571
	2572	2. patterns where only a subset can be converted.
	2573
	2574	In case 1 we can replace the whole set with a single regop
	2575	for the trie. In case 2 we need to keep the start and end
	2576	branchs so
	2577
	2578	'BRANCH EXACT; BRANCH EXACT; BRANCH X'
	2579	becomes BRANCH TRIE; BRANCH X;
	2580
	2581	There is an additional case, that being where there is a
	2582	common prefix, which gets split out into an EXACT like node
	2583	preceding the TRIE node.
	2584
	2585	If x(1..n)==tail then we can do a simple trie, if not we make
	2586	a "jump" trie, such that when we match the appropriate word
	2587	we "jump" to the appopriate tail node. Essentailly we turn
	2588	a nested if into a case structure of sorts.
	2589
	2590	*/
	2591
	2592	int made=0;
	2593	if (!re_trie_maxbuff) {
	2594	re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
	2595	if (!SvIOK(re_trie_maxbuff))
	2596	sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
	2597	}
	2598	if ( SvIV(re_trie_maxbuff)>=0 ) {
	2599	regnode *cur;
	2600	regnode first = (regnode )NULL;
	2601	regnode last = (regnode )NULL;
	2602	regnode *tail = scan;
	2603	U8 optype = 0;
	2604	U32 count=0;
	2605
	2606	#ifdef DEBUGGING
	2607	SV * const mysv = sv_newmortal(); /* for dumping */
	2608	#endif
	2609	/* var tail is used because there may be a TAIL
	2610	regop in the way. Ie, the exacts will point to the
	2611	thing following the TAIL, but the last branch will
	2612	point at the TAIL. So we advance tail. If we
	2613	have nested (?:) we may have to move through several
	2614	tails.
	2615	*/
	2616
	2617	while ( OP( tail ) == TAIL ) {
	2618	/* this is the TAIL generated by (?:) */
	2619	tail = regnext( tail );
	2620	}
	2621
	2622
	2623	DEBUG_OPTIMISE_r({
	2624	regprop(RExC_rx, mysv, tail );
	2625	PerlIO_printf( Perl_debug_log, "%*s%s%s\n",
	2626	(int)depth * 2 + 2, "",
	2627	"Looking for TRIE'able sequences. Tail node is: ",
	2628	SvPV_nolen_const( mysv )
	2629	);
	2630	});
	2631
	2632	/*
	2633
	2634	step through the branches, cur represents each
	2635	branch, noper is the first thing to be matched
	2636	as part of that branch and noper_next is the
	2637	regnext() of that node. if noper is an EXACT
	2638	and noper_next is the same as scan (our current
	2639	position in the regex) then the EXACT branch is
	2640	a possible optimization target. Once we have
	2641	two or more consequetive such branches we can
	2642	create a trie of the EXACT's contents and stich
	2643	it in place. If the sequence represents all of
	2644	the branches we eliminate the whole thing and
	2645	replace it with a single TRIE. If it is a
	2646	subsequence then we need to stitch it in. This
	2647	means the first branch has to remain, and needs
	2648	to be repointed at the item on the branch chain
	2649	following the last branch optimized. This could
	2650	be either a BRANCH, in which case the
	2651	subsequence is internal, or it could be the
	2652	item following the branch sequence in which
	2653	case the subsequence is at the end.
	2654
	2655	*/
	2656
	2657	/* dont use tail as the end marker for this traverse */
	2658	for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
	2659	regnode * const noper = NEXTOPER( cur );
	2660	#if defined(DEBUGGING) \|\| defined(NOJUMPTRIE)
	2661	regnode * const noper_next = regnext( noper );
	2662	#endif
	2663
	2664	DEBUG_OPTIMISE_r({
	2665	regprop(RExC_rx, mysv, cur);
	2666	PerlIO_printf( Perl_debug_log, "%*s- %s (%d)",
	2667	(int)depth * 2 + 2,"", SvPV_nolen_const( mysv ), REG_NODE_NUM(cur) );
	2668
	2669	regprop(RExC_rx, mysv, noper);
	2670	PerlIO_printf( Perl_debug_log, " -> %s",
	2671	SvPV_nolen_const(mysv));
	2672
	2673	if ( noper_next ) {
	2674	regprop(RExC_rx, mysv, noper_next );
	2675	PerlIO_printf( Perl_debug_log,"\t=> %s\t",
	2676	SvPV_nolen_const(mysv));
	2677	}
	2678	PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d)\n",
	2679	REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur) );
	2680	});
	2681	if ( (((first && optype!=NOTHING) ? OP( noper ) == optype
	2682	: PL_regkind[ OP( noper ) ] == EXACT )
	2683	\|\| OP(noper) == NOTHING )
	2684	#ifdef NOJUMPTRIE
	2685	&& noper_next == tail
	2686	#endif
	2687	&& count < U16_MAX)
	2688	{
	2689	count++;
	2690	if ( !first \|\| optype == NOTHING ) {
	2691	if (!first) first = cur;
	2692	optype = OP( noper );
	2693	} else {
	2694	last = cur;
	2695	}
	2696	} else {
	2697	if ( last ) {
	2698	make_trie( pRExC_state,
	2699	startbranch, first, cur, tail, count,
	2700	optype, depth+1 );
	2701	}
	2702	if ( PL_regkind[ OP( noper ) ] == EXACT
	2703	#ifdef NOJUMPTRIE
	2704	&& noper_next == tail
	2705	#endif
	2706	){
	2707	count = 1;
	2708	first = cur;
	2709	optype = OP( noper );
	2710	} else {
	2711	count = 0;
	2712	first = NULL;
	2713	optype = 0;
	2714	}
	2715	last = NULL;
	2716	}
	2717	}
	2718	DEBUG_OPTIMISE_r({
	2719	regprop(RExC_rx, mysv, cur);
	2720	PerlIO_printf( Perl_debug_log,
	2721	"%s- %s (%d) <SCAN FINISHED>\n", (int)depth 2 + 2,
	2722	"", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
	2723
	2724	});
	2725	if ( last ) {
	2726	made= make_trie( pRExC_state, startbranch, first, scan, tail, count, optype, depth+1 );
	2727	#ifdef TRIE_STUDY_OPT
	2728	if ( ((made == MADE_EXACT_TRIE &&
	2729	startbranch == first)
	2730	\|\| ( first_non_open == first )) &&
	2731	depth==0 ) {
	2732	flags \|= SCF_TRIE_RESTUDY;
	2733	if ( startbranch == first
	2734	&& scan == tail )
	2735	{
	2736	RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
	2737	}
	2738	}
	2739	#endif
	2740	}
	2741	}
	2742
	2743	} /* do trie */
	2744
	2745	}
	2746	else if ( code == BRANCHJ ) { /* single branch is optimized. */
	2747	scan = NEXTOPER(NEXTOPER(scan));
	2748	} else /* single branch is optimized. */
	2749	scan = NEXTOPER(scan);
	2750	continue;
	2751	} else if (OP(scan) == SUSPEND \|\| OP(scan) == GOSUB \|\| OP(scan) == GOSTART) {
	2752	scan_frame *newframe = NULL;
	2753	I32 paren;
	2754	regnode *start;
	2755	regnode *end;
	2756
	2757	if (OP(scan) != SUSPEND) {
	2758	/* set the pointer */
	2759	if (OP(scan) == GOSUB) {
	2760	paren = ARG(scan);
	2761	RExC_recurse[ARG2L(scan)] = scan;
	2762	start = RExC_open_parens[paren-1];
	2763	end = RExC_close_parens[paren-1];
	2764	} else {
	2765	paren = 0;
	2766	start = RExC_rxi->program + 1;
	2767	end = RExC_opend;
	2768	}
	2769	if (!recursed) {
	2770	Newxz(recursed, (((RExC_npar)>>3) +1), U8);
	2771	SAVEFREEPV(recursed);
	2772	}
	2773	if (!PAREN_TEST(recursed,paren+1)) {
	2774	PAREN_SET(recursed,paren+1);
	2775	Newx(newframe,1,scan_frame);
	2776	} else {
	2777	if (flags & SCF_DO_SUBSTR) {
	2778	SCAN_COMMIT(pRExC_state,data,minlenp);
	2779	data->longest = &(data->longest_float);
	2780	}
	2781	is_inf = is_inf_internal = 1;
	2782	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	2783	cl_anything(pRExC_state, data->start_class);
	2784	flags &= ~SCF_DO_STCLASS;
	2785	}
	2786	} else {
	2787	Newx(newframe,1,scan_frame);
	2788	paren = stopparen;
	2789	start = scan+2;
	2790	end = regnext(scan);
	2791	}
	2792	if (newframe) {
	2793	assert(start);
	2794	assert(end);
	2795	SAVEFREEPV(newframe);
	2796	newframe->next = regnext(scan);
	2797	newframe->last = last;
	2798	newframe->stop = stopparen;
	2799	newframe->prev = frame;
	2800
	2801	frame = newframe;
	2802	scan = start;
	2803	stopparen = paren;
	2804	last = end;
	2805
	2806	continue;
	2807	}
	2808	}
	2809	else if (OP(scan) == EXACT) {
	2810	I32 l = STR_LEN(scan);
	2811	UV uc;
	2812	if (UTF) {
	2813	const U8 * const s = (U8*)STRING(scan);
	2814	l = utf8_length(s, s + l);
	2815	uc = utf8_to_uvchr(s, NULL);
	2816	} else {
	2817	uc = ((U8)STRING(scan));
	2818	}
	2819	min += l;
	2820	if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
	2821	/* The code below prefers earlier match for fixed
	2822	offset, later match for variable offset. */
	2823	if (data->last_end == -1) { /* Update the start info. */
	2824	data->last_start_min = data->pos_min;
	2825	data->last_start_max = is_inf
	2826	? I32_MAX : data->pos_min + data->pos_delta;
	2827	}
	2828	sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
	2829	if (UTF)
	2830	SvUTF8_on(data->last_found);
	2831	{
	2832	SV * const sv = data->last_found;
	2833	MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
	2834	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	2835	if (mg && mg->mg_len >= 0)
	2836	mg->mg_len += utf8_length((U8*)STRING(scan),
	2837	(U8*)STRING(scan)+STR_LEN(scan));
	2838	}
	2839	data->last_end = data->pos_min + l;
	2840	data->pos_min += l; /* As in the first entry. */
	2841	data->flags &= ~SF_BEFORE_EOL;
	2842	}
	2843	if (flags & SCF_DO_STCLASS_AND) {
	2844	/* Check whether it is compatible with what we know already! */
	2845	int compat = 1;
	2846
	2847	if (uc >= 0x100 \|\|
	2848	(!(data->start_class->flags & (ANYOF_CLASS \| ANYOF_LOCALE))
	2849	&& !ANYOF_BITMAP_TEST(data->start_class, uc)
	2850	&& (!(data->start_class->flags & ANYOF_FOLD)
	2851	\|\| !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
	2852	)
	2853	compat = 0;
	2854	ANYOF_CLASS_ZERO(data->start_class);
	2855	ANYOF_BITMAP_ZERO(data->start_class);
	2856	if (compat)
	2857	ANYOF_BITMAP_SET(data->start_class, uc);
	2858	data->start_class->flags &= ~ANYOF_EOS;
	2859	if (uc < 0x100)
	2860	data->start_class->flags &= ~ANYOF_UNICODE_ALL;
	2861	}
	2862	else if (flags & SCF_DO_STCLASS_OR) {
	2863	/* false positive possible if the class is case-folded */
	2864	if (uc < 0x100)
	2865	ANYOF_BITMAP_SET(data->start_class, uc);
	2866	else
	2867	data->start_class->flags \|= ANYOF_UNICODE_ALL;
	2868	data->start_class->flags &= ~ANYOF_EOS;
	2869	cl_and(data->start_class, and_withp);
	2870	}
	2871	flags &= ~SCF_DO_STCLASS;
	2872	}
	2873	else if (PL_regkind[OP(scan)] == EXACT) { /* But OP != EXACT! */
	2874	I32 l = STR_LEN(scan);
	2875	UV uc = ((U8)STRING(scan));
	2876
	2877	/* Search for fixed substrings supports EXACT only. */
	2878	if (flags & SCF_DO_SUBSTR) {
	2879	assert(data);
	2880	SCAN_COMMIT(pRExC_state, data, minlenp);
	2881	}
	2882	if (UTF) {
	2883	const U8 * const s = (U8 *)STRING(scan);
	2884	l = utf8_length(s, s + l);
	2885	uc = utf8_to_uvchr(s, NULL);
	2886	}
	2887	min += l;
	2888	if (flags & SCF_DO_SUBSTR)
	2889	data->pos_min += l;
	2890	if (flags & SCF_DO_STCLASS_AND) {
	2891	/* Check whether it is compatible with what we know already! */
	2892	int compat = 1;
	2893
	2894	if (uc >= 0x100 \|\|
	2895	(!(data->start_class->flags & (ANYOF_CLASS \| ANYOF_LOCALE))
	2896	&& !ANYOF_BITMAP_TEST(data->start_class, uc)
	2897	&& !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
	2898	compat = 0;
	2899	ANYOF_CLASS_ZERO(data->start_class);
	2900	ANYOF_BITMAP_ZERO(data->start_class);
	2901	if (compat) {
	2902	ANYOF_BITMAP_SET(data->start_class, uc);
	2903	data->start_class->flags &= ~ANYOF_EOS;
	2904	data->start_class->flags \|= ANYOF_FOLD;
	2905	if (OP(scan) == EXACTFL)
	2906	data->start_class->flags \|= ANYOF_LOCALE;
	2907	}
	2908	}
	2909	else if (flags & SCF_DO_STCLASS_OR) {
	2910	if (data->start_class->flags & ANYOF_FOLD) {
	2911	/* false positive possible if the class is case-folded.
	2912	Assume that the locale settings are the same... */
	2913	if (uc < 0x100)
	2914	ANYOF_BITMAP_SET(data->start_class, uc);
	2915	data->start_class->flags &= ~ANYOF_EOS;
	2916	}
	2917	cl_and(data->start_class, and_withp);
	2918	}
	2919	flags &= ~SCF_DO_STCLASS;
	2920	}
	2921	else if (strchr((const char*)PL_varies,OP(scan))) {
	2922	I32 mincount, maxcount, minnext, deltanext, fl = 0;
	2923	I32 f = flags, pos_before = 0;
	2924	regnode * const oscan = scan;
	2925	struct regnode_charclass_class this_class;
	2926	struct regnode_charclass_class *oclass = NULL;
	2927	I32 next_is_eval = 0;
	2928
	2929	switch (PL_regkind[OP(scan)]) {
	2930	case WHILEM: /* End of (?:...)* . */
	2931	scan = NEXTOPER(scan);
	2932	goto finish;
	2933	case PLUS:
	2934	if (flags & (SCF_DO_SUBSTR \| SCF_DO_STCLASS)) {
	2935	next = NEXTOPER(scan);
	2936	if (OP(next) == EXACT \|\| (flags & SCF_DO_STCLASS)) {
	2937	mincount = 1;
	2938	maxcount = REG_INFTY;
	2939	next = regnext(scan);
	2940	scan = NEXTOPER(scan);
	2941	goto do_curly;
	2942	}
	2943	}
	2944	if (flags & SCF_DO_SUBSTR)
	2945	data->pos_min++;
	2946	min++;
	2947	/* Fall through. */
	2948	case STAR:
	2949	if (flags & SCF_DO_STCLASS) {
	2950	mincount = 0;
	2951	maxcount = REG_INFTY;
	2952	next = regnext(scan);
	2953	scan = NEXTOPER(scan);
	2954	goto do_curly;
	2955	}
	2956	is_inf = is_inf_internal = 1;
	2957	scan = regnext(scan);
	2958	if (flags & SCF_DO_SUBSTR) {
	2959	SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */
	2960	data->longest = &(data->longest_float);
	2961	}
	2962	goto optimize_curly_tail;
	2963	case CURLY:
	2964	if (stopparen>0 && (OP(scan)==CURLYN \|\| OP(scan)==CURLYM)
	2965	&& (scan->flags == stopparen))
	2966	{
	2967	mincount = 1;
	2968	maxcount = 1;
	2969	} else {
	2970	mincount = ARG1(scan);
	2971	maxcount = ARG2(scan);
	2972	}
	2973	next = regnext(scan);
	2974	if (OP(scan) == CURLYX) {
	2975	I32 lp = (data ? *(data->last_closep) : 0);
	2976	scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
	2977	}
	2978	scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
	2979	next_is_eval = (OP(scan) == EVAL);
	2980	do_curly:
	2981	if (flags & SCF_DO_SUBSTR) {
	2982	if (mincount == 0) SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */
	2983	pos_before = data->pos_min;
	2984	}
	2985	if (data) {
	2986	fl = data->flags;
	2987	data->flags &= ~(SF_HAS_PAR\|SF_IN_PAR\|SF_HAS_EVAL);
	2988	if (is_inf)
	2989	data->flags \|= SF_IS_INF;
	2990	}
	2991	if (flags & SCF_DO_STCLASS) {
	2992	cl_init(pRExC_state, &this_class);
	2993	oclass = data->start_class;
	2994	data->start_class = &this_class;
	2995	f \|= SCF_DO_STCLASS_AND;
	2996	f &= ~SCF_DO_STCLASS_OR;
	2997	}
	2998	/* These are the cases when once a subexpression
	2999	fails at a particular position, it cannot succeed
	3000	even after backtracking at the enclosing scope.
	3001
	3002	XXXX what if minimal match and we are at the
	3003	initial run of {n,m}? */
	3004	if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
	3005	f &= ~SCF_WHILEM_VISITED_POS;
	3006
	3007	/* This will finish on WHILEM, setting scan, or on NULL: */
	3008	minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
	3009	last, data, stopparen, recursed, NULL,
	3010	(mincount == 0
	3011	? (f & ~SCF_DO_SUBSTR) : f),depth+1);
	3012
	3013	if (flags & SCF_DO_STCLASS)
	3014	data->start_class = oclass;
	3015	if (mincount == 0 \|\| minnext == 0) {
	3016	if (flags & SCF_DO_STCLASS_OR) {
	3017	cl_or(pRExC_state, data->start_class, &this_class);
	3018	}
	3019	else if (flags & SCF_DO_STCLASS_AND) {
	3020	/* Switch to OR mode: cache the old value of
	3021	* data->start_class */
	3022	INIT_AND_WITHP;
	3023	StructCopy(data->start_class, and_withp,
	3024	struct regnode_charclass_class);
	3025	flags &= ~SCF_DO_STCLASS_AND;
	3026	StructCopy(&this_class, data->start_class,
	3027	struct regnode_charclass_class);
	3028	flags \|= SCF_DO_STCLASS_OR;
	3029	data->start_class->flags \|= ANYOF_EOS;
	3030	}
	3031	} else { /* Non-zero len */
	3032	if (flags & SCF_DO_STCLASS_OR) {
	3033	cl_or(pRExC_state, data->start_class, &this_class);
	3034	cl_and(data->start_class, and_withp);
	3035	}
	3036	else if (flags & SCF_DO_STCLASS_AND)
	3037	cl_and(data->start_class, &this_class);
	3038	flags &= ~SCF_DO_STCLASS;
	3039	}
	3040	if (!scan) /* It was not CURLYX, but CURLY. */
	3041	scan = next;
	3042	if ( /* ? quantifier ok, except for (?{ ... }) */
	3043	(next_is_eval \|\| !(mincount == 0 && maxcount == 1))
	3044	&& (minnext == 0) && (deltanext == 0)
	3045	&& data && !(data->flags & (SF_HAS_PAR\|SF_IN_PAR))
	3046	&& maxcount <= REG_INFTY/3 /* Complement check for big count */
	3047	&& ckWARN(WARN_REGEXP))
	3048	{
	3049	vWARN(RExC_parse,
	3050	"Quantifier unexpected on zero-length expression");
	3051	}
	3052
	3053	min += minnext * mincount;
	3054	is_inf_internal \|= ((maxcount == REG_INFTY
	3055	&& (minnext + deltanext) > 0)
	3056	\|\| deltanext == I32_MAX);
	3057	is_inf \|= is_inf_internal;
	3058	delta += (minnext + deltanext) * maxcount - minnext * mincount;
	3059
	3060	/* Try powerful optimization CURLYX => CURLYN. */
	3061	if ( OP(oscan) == CURLYX && data
	3062	&& data->flags & SF_IN_PAR
	3063	&& !(data->flags & SF_HAS_EVAL)
	3064	&& !deltanext && minnext == 1 ) {
	3065	/* Try to optimize to CURLYN. */
	3066	regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
	3067	regnode * const nxt1 = nxt;
	3068	#ifdef DEBUGGING
	3069	regnode *nxt2;
	3070	#endif
	3071
	3072	/* Skip open. */
	3073	nxt = regnext(nxt);
	3074	if (!strchr((const char*)PL_simple,OP(nxt))
	3075	&& !(PL_regkind[OP(nxt)] == EXACT
	3076	&& STR_LEN(nxt) == 1))
	3077	goto nogo;
	3078	#ifdef DEBUGGING
	3079	nxt2 = nxt;
	3080	#endif
	3081	nxt = regnext(nxt);
	3082	if (OP(nxt) != CLOSE)
	3083	goto nogo;
	3084	if (RExC_open_parens) {
	3085	RExC_open_parens[ARG(nxt1)-1]=oscan; /open->CURLYM/
	3086	RExC_close_parens[ARG(nxt1)-1]=nxt+2; /close->while/
	3087	}
	3088	/* Now we know that nxt2 is the only contents: */
	3089	oscan->flags = (U8)ARG(nxt);
	3090	OP(oscan) = CURLYN;
	3091	OP(nxt1) = NOTHING; /* was OPEN. */
	3092
	3093	#ifdef DEBUGGING
	3094	OP(nxt1 + 1) = OPTIMIZED; /* was count. */
	3095	NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
	3096	NEXT_OFF(nxt2) = 0; /* just for consistancy with CURLY. */
	3097	OP(nxt) = OPTIMIZED; /* was CLOSE. */
	3098	OP(nxt + 1) = OPTIMIZED; /* was count. */
	3099	NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
	3100	#endif
	3101	}
	3102	nogo:
	3103
	3104	/* Try optimization CURLYX => CURLYM. */
	3105	if ( OP(oscan) == CURLYX && data
	3106	&& !(data->flags & SF_HAS_PAR)
	3107	&& !(data->flags & SF_HAS_EVAL)
	3108	&& !deltanext /* atom is fixed width */
	3109	&& minnext != 0 /* CURLYM can't handle zero width */
	3110	) {
	3111	/* XXXX How to optimize if data == 0? */
	3112	/* Optimize to a simpler form. */
	3113	regnode nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; / OPEN */
	3114	regnode *nxt2;
	3115
	3116	OP(oscan) = CURLYM;
	3117	while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
	3118	&& (OP(nxt2) != WHILEM))
	3119	nxt = nxt2;
	3120	OP(nxt2) = SUCCEED; /* Whas WHILEM */
	3121	/* Need to optimize away parenths. */
	3122	if (data->flags & SF_IN_PAR) {
	3123	/* Set the parenth number. */
	3124	regnode nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; / OPEN*/
	3125
	3126	if (OP(nxt) != CLOSE)
	3127	FAIL("Panic opt close");
	3128	oscan->flags = (U8)ARG(nxt);
	3129	if (RExC_open_parens) {
	3130	RExC_open_parens[ARG(nxt1)-1]=oscan; /open->CURLYM/
	3131	RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /close->NOTHING/
	3132	}
	3133	OP(nxt1) = OPTIMIZED; /* was OPEN. */
	3134	OP(nxt) = OPTIMIZED; /* was CLOSE. */
	3135
	3136	#ifdef DEBUGGING
	3137	OP(nxt1 + 1) = OPTIMIZED; /* was count. */
	3138	OP(nxt + 1) = OPTIMIZED; /* was count. */
	3139	NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
	3140	NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
	3141	#endif
	3142	#if 0
	3143	while ( nxt1 && (OP(nxt1) != WHILEM)) {
	3144	regnode *nnxt = regnext(nxt1);
	3145
	3146	if (nnxt == nxt) {
	3147	if (reg_off_by_arg[OP(nxt1)])
	3148	ARG_SET(nxt1, nxt2 - nxt1);
	3149	else if (nxt2 - nxt1 < U16_MAX)
	3150	NEXT_OFF(nxt1) = nxt2 - nxt1;
	3151	else
	3152	OP(nxt) = NOTHING; /* Cannot beautify */
	3153	}
	3154	nxt1 = nnxt;
	3155	}
	3156	#endif
	3157	/* Optimize again: */
	3158	study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
	3159	NULL, stopparen, recursed, NULL, 0,depth+1);
	3160	}
	3161	else
	3162	oscan->flags = 0;
	3163	}
	3164	else if ((OP(oscan) == CURLYX)
	3165	&& (flags & SCF_WHILEM_VISITED_POS)
	3166	/* See the comment on a similar expression above.
	3167	However, this time it not a subexpression
	3168	we care about, but the expression itself. */
	3169	&& (maxcount == REG_INFTY)
	3170	&& data && ++data->whilem_c < 16) {
	3171	/* This stays as CURLYX, we can put the count/of pair. */
	3172	/* Find WHILEM (as in regexec.c) */
	3173	regnode *nxt = oscan + NEXT_OFF(oscan);
	3174
	3175	if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
	3176	nxt += ARG(nxt);
	3177	PREVOPER(nxt)->flags = (U8)(data->whilem_c
	3178	\| (RExC_whilem_seen << 4)); /* On WHILEM */
	3179	}
	3180	if (data && fl & (SF_HAS_PAR\|SF_IN_PAR))
	3181	pars++;
	3182	if (flags & SCF_DO_SUBSTR) {
	3183	SV *last_str = NULL;
	3184	int counted = mincount != 0;
	3185
	3186	if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
	3187	#if defined(SPARC64_GCC_WORKAROUND)
	3188	I32 b = 0;
	3189	STRLEN l = 0;
	3190	const char *s = NULL;
	3191	I32 old = 0;
	3192
	3193	if (pos_before >= data->last_start_min)
	3194	b = pos_before;
	3195	else
	3196	b = data->last_start_min;
	3197
	3198	l = 0;
	3199	s = SvPV_const(data->last_found, l);
	3200	old = b - data->last_start_min;
	3201
	3202	#else
	3203	I32 b = pos_before >= data->last_start_min
	3204	? pos_before : data->last_start_min;
	3205	STRLEN l;
	3206	const char * const s = SvPV_const(data->last_found, l);
	3207	I32 old = b - data->last_start_min;
	3208	#endif
	3209
	3210	if (UTF)
	3211	old = utf8_hop((U8)s, old) - (U8)s;
	3212
	3213	l -= old;
	3214	/* Get the added string: */
	3215	last_str = newSVpvn(s + old, l);
	3216	if (UTF)
	3217	SvUTF8_on(last_str);
	3218	if (deltanext == 0 && pos_before == b) {
	3219	/* What was added is a constant string */
	3220	if (mincount > 1) {
	3221	SvGROW(last_str, (mincount * l) + 1);
	3222	repeatcpy(SvPVX(last_str) + l,
	3223	SvPVX_const(last_str), l, mincount - 1);
	3224	SvCUR_set(last_str, SvCUR(last_str) * mincount);
	3225	/* Add additional parts. */
	3226	SvCUR_set(data->last_found,
	3227	SvCUR(data->last_found) - l);
	3228	sv_catsv(data->last_found, last_str);
	3229	{
	3230	SV * sv = data->last_found;
	3231	MAGIC *mg =
	3232	SvUTF8(sv) && SvMAGICAL(sv) ?
	3233	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	3234	if (mg && mg->mg_len >= 0)
	3235	mg->mg_len += CHR_SVLEN(last_str);
	3236	}
	3237	data->last_end += l * (mincount - 1);
	3238	}
	3239	} else {
	3240	/* start offset must point into the last copy */
	3241	data->last_start_min += minnext * (mincount - 1);
	3242	data->last_start_max += is_inf ? I32_MAX
	3243	: (maxcount - 1) * (minnext + data->pos_delta);
	3244	}
	3245	}
	3246	/* It is counted once already... */
	3247	data->pos_min += minnext * (mincount - counted);
	3248	data->pos_delta += - counted * deltanext +
	3249	(minnext + deltanext) * maxcount - minnext * mincount;
	3250	if (mincount != maxcount) {
	3251	/* Cannot extend fixed substrings found inside
	3252	the group. */
	3253	SCAN_COMMIT(pRExC_state,data,minlenp);
	3254	if (mincount && last_str) {
	3255	SV * const sv = data->last_found;
	3256	MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
	3257	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	3258
	3259	if (mg)
	3260	mg->mg_len = -1;
	3261	sv_setsv(sv, last_str);
	3262	data->last_end = data->pos_min;
	3263	data->last_start_min =
	3264	data->pos_min - CHR_SVLEN(last_str);
	3265	data->last_start_max = is_inf
	3266	? I32_MAX
	3267	: data->pos_min + data->pos_delta
	3268	- CHR_SVLEN(last_str);
	3269	}
	3270	data->longest = &(data->longest_float);
	3271	}
	3272	SvREFCNT_dec(last_str);
	3273	}
	3274	if (data && (fl & SF_HAS_EVAL))
	3275	data->flags \|= SF_HAS_EVAL;
	3276	optimize_curly_tail:
	3277	if (OP(oscan) != CURLYX) {
	3278	while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
	3279	&& NEXT_OFF(next))
	3280	NEXT_OFF(oscan) += NEXT_OFF(next);
	3281	}
	3282	continue;
	3283	default: /* REF and CLUMP only? */
	3284	if (flags & SCF_DO_SUBSTR) {
	3285	SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
	3286	data->longest = &(data->longest_float);
	3287	}
	3288	is_inf = is_inf_internal = 1;
	3289	if (flags & SCF_DO_STCLASS_OR)
	3290	cl_anything(pRExC_state, data->start_class);
	3291	flags &= ~SCF_DO_STCLASS;
	3292	break;
	3293	}
	3294	}
	3295	else if (strchr((const char*)PL_simple,OP(scan))) {
	3296	int value = 0;
	3297
	3298	if (flags & SCF_DO_SUBSTR) {
	3299	SCAN_COMMIT(pRExC_state,data,minlenp);
	3300	data->pos_min++;
	3301	}
	3302	min++;
	3303	if (flags & SCF_DO_STCLASS) {
	3304	data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */
	3305
	3306	/* Some of the logic below assumes that switching
	3307	locale on will only add false positives. */
	3308	switch (PL_regkind[OP(scan)]) {
	3309	case SANY:
	3310	default:
	3311	do_default:
	3312	/* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
	3313	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	3314	cl_anything(pRExC_state, data->start_class);
	3315	break;
	3316	case REG_ANY:
	3317	if (OP(scan) == SANY)
	3318	goto do_default;
	3319	if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
	3320	value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
	3321	\|\| (data->start_class->flags & ANYOF_CLASS));
	3322	cl_anything(pRExC_state, data->start_class);
	3323	}
	3324	if (flags & SCF_DO_STCLASS_AND \|\| !value)
	3325	ANYOF_BITMAP_CLEAR(data->start_class,'\n');
	3326	break;
	3327	case ANYOF:
	3328	if (flags & SCF_DO_STCLASS_AND)
	3329	cl_and(data->start_class,
	3330	(struct regnode_charclass_class*)scan);
	3331	else
	3332	cl_or(pRExC_state, data->start_class,
	3333	(struct regnode_charclass_class*)scan);
	3334	break;
	3335	case ALNUM:
	3336	if (flags & SCF_DO_STCLASS_AND) {
	3337	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3338	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
	3339	for (value = 0; value < 256; value++)
	3340	if (!isALNUM(value))
	3341	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3342	}
	3343	}
	3344	else {
	3345	if (data->start_class->flags & ANYOF_LOCALE)
	3346	ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
	3347	else {
	3348	for (value = 0; value < 256; value++)
	3349	if (isALNUM(value))
	3350	ANYOF_BITMAP_SET(data->start_class, value);
	3351	}
	3352	}
	3353	break;
	3354	case ALNUML:
	3355	if (flags & SCF_DO_STCLASS_AND) {
	3356	if (data->start_class->flags & ANYOF_LOCALE)
	3357	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
	3358	}
	3359	else {
	3360	ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
	3361	data->start_class->flags \|= ANYOF_LOCALE;
	3362	}
	3363	break;
	3364	case NALNUM:
	3365	if (flags & SCF_DO_STCLASS_AND) {
	3366	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3367	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
	3368	for (value = 0; value < 256; value++)
	3369	if (isALNUM(value))
	3370	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3371	}
	3372	}
	3373	else {
	3374	if (data->start_class->flags & ANYOF_LOCALE)
	3375	ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
	3376	else {
	3377	for (value = 0; value < 256; value++)
	3378	if (!isALNUM(value))
	3379	ANYOF_BITMAP_SET(data->start_class, value);
	3380	}
	3381	}
	3382	break;
	3383	case NALNUML:
	3384	if (flags & SCF_DO_STCLASS_AND) {
	3385	if (data->start_class->flags & ANYOF_LOCALE)
	3386	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
	3387	}
	3388	else {
	3389	data->start_class->flags \|= ANYOF_LOCALE;
	3390	ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
	3391	}
	3392	break;
	3393	case SPACE:
	3394	if (flags & SCF_DO_STCLASS_AND) {
	3395	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3396	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
	3397	for (value = 0; value < 256; value++)
	3398	if (!isSPACE(value))
	3399	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3400	}
	3401	}
	3402	else {
	3403	if (data->start_class->flags & ANYOF_LOCALE)
	3404	ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
	3405	else {
	3406	for (value = 0; value < 256; value++)
	3407	if (isSPACE(value))
	3408	ANYOF_BITMAP_SET(data->start_class, value);
	3409	}
	3410	}
	3411	break;
	3412	case SPACEL:
	3413	if (flags & SCF_DO_STCLASS_AND) {
	3414	if (data->start_class->flags & ANYOF_LOCALE)
	3415	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
	3416	}
	3417	else {
	3418	data->start_class->flags \|= ANYOF_LOCALE;
	3419	ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
	3420	}
	3421	break;
	3422	case NSPACE:
	3423	if (flags & SCF_DO_STCLASS_AND) {
	3424	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3425	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
	3426	for (value = 0; value < 256; value++)
	3427	if (isSPACE(value))
	3428	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3429	}
	3430	}
	3431	else {
	3432	if (data->start_class->flags & ANYOF_LOCALE)
	3433	ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
	3434	else {
	3435	for (value = 0; value < 256; value++)
	3436	if (!isSPACE(value))
	3437	ANYOF_BITMAP_SET(data->start_class, value);
	3438	}
	3439	}
	3440	break;
	3441	case NSPACEL:
	3442	if (flags & SCF_DO_STCLASS_AND) {
	3443	if (data->start_class->flags & ANYOF_LOCALE) {
	3444	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
	3445	for (value = 0; value < 256; value++)
	3446	if (!isSPACE(value))
	3447	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3448	}
	3449	}
	3450	else {
	3451	data->start_class->flags \|= ANYOF_LOCALE;
	3452	ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
	3453	}
	3454	break;
	3455	case DIGIT:
	3456	if (flags & SCF_DO_STCLASS_AND) {
	3457	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT);
	3458	for (value = 0; value < 256; value++)
	3459	if (!isDIGIT(value))
	3460	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3461	}
	3462	else {
	3463	if (data->start_class->flags & ANYOF_LOCALE)
	3464	ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
	3465	else {
	3466	for (value = 0; value < 256; value++)
	3467	if (isDIGIT(value))
	3468	ANYOF_BITMAP_SET(data->start_class, value);
	3469	}
	3470	}
	3471	break;
	3472	case NDIGIT:
	3473	if (flags & SCF_DO_STCLASS_AND) {
	3474	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT);
	3475	for (value = 0; value < 256; value++)
	3476	if (isDIGIT(value))
	3477	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3478	}
	3479	else {
	3480	if (data->start_class->flags & ANYOF_LOCALE)
	3481	ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
	3482	else {
	3483	for (value = 0; value < 256; value++)
	3484	if (!isDIGIT(value))
	3485	ANYOF_BITMAP_SET(data->start_class, value);
	3486	}
	3487	}
	3488	break;
	3489	}
	3490	if (flags & SCF_DO_STCLASS_OR)
	3491	cl_and(data->start_class, and_withp);
	3492	flags &= ~SCF_DO_STCLASS;
	3493	}
	3494	}
	3495	else if (PL_regkind[OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
	3496	data->flags \|= (OP(scan) == MEOL
	3497	? SF_BEFORE_MEOL
	3498	: SF_BEFORE_SEOL);
	3499	}
	3500	else if ( PL_regkind[OP(scan)] == BRANCHJ
	3501	/* Lookbehind, or need to calculate parens/evals/stclass: */
	3502	&& (scan->flags \|\| data \|\| (flags & SCF_DO_STCLASS))
	3503	&& (OP(scan) == IFMATCH \|\| OP(scan) == UNLESSM)) {
	3504	if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
	3505	\|\| OP(scan) == UNLESSM )
	3506	{
	3507	/* Negative Lookahead/lookbehind
	3508	In this case we can't do fixed string optimisation.
	3509	*/
	3510
	3511	I32 deltanext, minnext, fake = 0;
	3512	regnode *nscan;
	3513	struct regnode_charclass_class intrnl;
	3514	int f = 0;
	3515
	3516	data_fake.flags = 0;
	3517	if (data) {
	3518	data_fake.whilem_c = data->whilem_c;
	3519	data_fake.last_closep = data->last_closep;
	3520	}
	3521	else
	3522	data_fake.last_closep = &fake;
	3523	data_fake.pos_delta = delta;
	3524	if ( flags & SCF_DO_STCLASS && !scan->flags
	3525	&& OP(scan) == IFMATCH ) { /* Lookahead */
	3526	cl_init(pRExC_state, &intrnl);
	3527	data_fake.start_class = &intrnl;
	3528	f \|= SCF_DO_STCLASS_AND;
	3529	}
	3530	if (flags & SCF_WHILEM_VISITED_POS)
	3531	f \|= SCF_WHILEM_VISITED_POS;
	3532	next = regnext(scan);
	3533	nscan = NEXTOPER(NEXTOPER(scan));
	3534	minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
	3535	last, &data_fake, stopparen, recursed, NULL, f, depth+1);
	3536	if (scan->flags) {
	3537	if (deltanext) {
	3538	FAIL("Variable length lookbehind not implemented");
	3539	}
	3540	else if (minnext > (I32)U8_MAX) {
	3541	FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
	3542	}
	3543	scan->flags = (U8)minnext;
	3544	}
	3545	if (data) {
	3546	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	3547	pars++;
	3548	if (data_fake.flags & SF_HAS_EVAL)
	3549	data->flags \|= SF_HAS_EVAL;
	3550	data->whilem_c = data_fake.whilem_c;
	3551	}
	3552	if (f & SCF_DO_STCLASS_AND) {
	3553	const int was = (data->start_class->flags & ANYOF_EOS);
	3554
	3555	cl_and(data->start_class, &intrnl);
	3556	if (was)
	3557	data->start_class->flags \|= ANYOF_EOS;
	3558	}
	3559	}
	3560	#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
	3561	else {
	3562	/* Positive Lookahead/lookbehind
	3563	In this case we can do fixed string optimisation,
	3564	but we must be careful about it. Note in the case of
	3565	lookbehind the positions will be offset by the minimum
	3566	length of the pattern, something we won't know about
	3567	until after the recurse.
	3568	*/
	3569	I32 deltanext, fake = 0;
	3570	regnode *nscan;
	3571	struct regnode_charclass_class intrnl;
	3572	int f = 0;
	3573	/* We use SAVEFREEPV so that when the full compile
	3574	is finished perl will clean up the allocated
	3575	minlens when its all done. This was we don't
	3576	have to worry about freeing them when we know
	3577	they wont be used, which would be a pain.
	3578	*/
	3579	I32 *minnextp;
	3580	Newx( minnextp, 1, I32 );
	3581	SAVEFREEPV(minnextp);
	3582
	3583	if (data) {
	3584	StructCopy(data, &data_fake, scan_data_t);
	3585	if ((flags & SCF_DO_SUBSTR) && data->last_found) {
	3586	f \|= SCF_DO_SUBSTR;
	3587	if (scan->flags)
	3588	SCAN_COMMIT(pRExC_state, &data_fake,minlenp);
	3589	data_fake.last_found=newSVsv(data->last_found);
	3590	}
	3591	}
	3592	else
	3593	data_fake.last_closep = &fake;
	3594	data_fake.flags = 0;
	3595	data_fake.pos_delta = delta;
	3596	if (is_inf)
	3597	data_fake.flags \|= SF_IS_INF;
	3598	if ( flags & SCF_DO_STCLASS && !scan->flags
	3599	&& OP(scan) == IFMATCH ) { /* Lookahead */
	3600	cl_init(pRExC_state, &intrnl);
	3601	data_fake.start_class = &intrnl;
	3602	f \|= SCF_DO_STCLASS_AND;
	3603	}
	3604	if (flags & SCF_WHILEM_VISITED_POS)
	3605	f \|= SCF_WHILEM_VISITED_POS;
	3606	next = regnext(scan);
	3607	nscan = NEXTOPER(NEXTOPER(scan));
	3608
	3609	*minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext,
	3610	last, &data_fake, stopparen, recursed, NULL, f,depth+1);
	3611	if (scan->flags) {
	3612	if (deltanext) {
	3613	FAIL("Variable length lookbehind not implemented");
	3614	}
	3615	else if (*minnextp > (I32)U8_MAX) {
	3616	FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
	3617	}
	3618	scan->flags = (U8)*minnextp;
	3619	}
	3620
	3621	*minnextp += min;
	3622
	3623	if (f & SCF_DO_STCLASS_AND) {
	3624	const int was = (data->start_class->flags & ANYOF_EOS);
	3625
	3626	cl_and(data->start_class, &intrnl);
	3627	if (was)
	3628	data->start_class->flags \|= ANYOF_EOS;
	3629	}
	3630	if (data) {
	3631	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	3632	pars++;
	3633	if (data_fake.flags & SF_HAS_EVAL)
	3634	data->flags \|= SF_HAS_EVAL;
	3635	data->whilem_c = data_fake.whilem_c;
	3636	if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
	3637	if (RExC_rx->minlen<*minnextp)
	3638	RExC_rx->minlen=*minnextp;
	3639	SCAN_COMMIT(pRExC_state, &data_fake, minnextp);
	3640	SvREFCNT_dec(data_fake.last_found);
	3641
	3642	if ( data_fake.minlen_fixed != minlenp )
	3643	{
	3644	data->offset_fixed= data_fake.offset_fixed;
	3645	data->minlen_fixed= data_fake.minlen_fixed;
	3646	data->lookbehind_fixed+= scan->flags;
	3647	}
	3648	if ( data_fake.minlen_float != minlenp )
	3649	{
	3650	data->minlen_float= data_fake.minlen_float;
	3651	data->offset_float_min=data_fake.offset_float_min;
	3652	data->offset_float_max=data_fake.offset_float_max;
	3653	data->lookbehind_float+= scan->flags;
	3654	}
	3655	}
	3656	}
	3657
	3658
	3659	}
	3660	#endif
	3661	}
	3662	else if (OP(scan) == OPEN) {
	3663	if (stopparen != (I32)ARG(scan))
	3664	pars++;
	3665	}
	3666	else if (OP(scan) == CLOSE) {
	3667	if (stopparen == (I32)ARG(scan)) {
	3668	break;
	3669	}
	3670	if ((I32)ARG(scan) == is_par) {
	3671	next = regnext(scan);
	3672
	3673	if ( next && (OP(next) != WHILEM) && next < last)
	3674	is_par = 0; /* Disable optimization */
	3675	}
	3676	if (data)
	3677	*(data->last_closep) = ARG(scan);
	3678	}
	3679	else if (OP(scan) == EVAL) {
	3680	if (data)
	3681	data->flags \|= SF_HAS_EVAL;
	3682	}
	3683	else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
	3684	if (flags & SCF_DO_SUBSTR) {
	3685	SCAN_COMMIT(pRExC_state,data,minlenp);
	3686	flags &= ~SCF_DO_SUBSTR;
	3687	}
	3688	if (data && OP(scan)==ACCEPT) {
	3689	data->flags \|= SCF_SEEN_ACCEPT;
	3690	if (stopmin > min)
	3691	stopmin = min;
	3692	}
	3693	}
	3694	else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
	3695	{
	3696	if (flags & SCF_DO_SUBSTR) {
	3697	SCAN_COMMIT(pRExC_state,data,minlenp);
	3698	data->longest = &(data->longest_float);
	3699	}
	3700	is_inf = is_inf_internal = 1;
	3701	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	3702	cl_anything(pRExC_state, data->start_class);
	3703	flags &= ~SCF_DO_STCLASS;
	3704	}
	3705	else if (OP(scan) == GPOS) {
	3706	if (!(RExC_rx->extflags & RXf_GPOS_FLOAT) &&
	3707	!(delta \|\| is_inf \|\| (data && data->pos_delta)))
	3708	{
	3709	if (!(RExC_rx->extflags & RXf_ANCH) && (flags & SCF_DO_SUBSTR))
	3710	RExC_rx->extflags \|= RXf_ANCH_GPOS;
	3711	if (RExC_rx->gofs < (U32)min)
	3712	RExC_rx->gofs = min;
	3713	} else {
	3714	RExC_rx->extflags \|= RXf_GPOS_FLOAT;
	3715	RExC_rx->gofs = 0;
	3716	}
	3717	}
	3718	#ifdef TRIE_STUDY_OPT
	3719	#ifdef FULL_TRIE_STUDY
	3720	else if (PL_regkind[OP(scan)] == TRIE) {
	3721	/* NOTE - There is similar code to this block above for handling
	3722	BRANCH nodes on the initial study. If you change stuff here
	3723	check there too. */
	3724	regnode *trie_node= scan;
	3725	regnode *tail= regnext(scan);
	3726	reg_trie_data trie = (reg_trie_data)RExC_rxi->data->data[ ARG(scan) ];
	3727	I32 max1 = 0, min1 = I32_MAX;
	3728	struct regnode_charclass_class accum;
	3729
	3730	if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
	3731	SCAN_COMMIT(pRExC_state, data,minlenp); /* Cannot merge strings after this. */
	3732	if (flags & SCF_DO_STCLASS)
	3733	cl_init_zero(pRExC_state, &accum);
	3734
	3735	if (!trie->jump) {
	3736	min1= trie->minlen;
	3737	max1= trie->maxlen;
	3738	} else {
	3739	const regnode *nextbranch= NULL;
	3740	U32 word;
	3741
	3742	for ( word=1 ; word <= trie->wordcount ; word++)
	3743	{
	3744	I32 deltanext=0, minnext=0, f = 0, fake;
	3745	struct regnode_charclass_class this_class;
	3746
	3747	data_fake.flags = 0;
	3748	if (data) {
	3749	data_fake.whilem_c = data->whilem_c;
	3750	data_fake.last_closep = data->last_closep;
	3751	}
	3752	else
	3753	data_fake.last_closep = &fake;
	3754	data_fake.pos_delta = delta;
	3755	if (flags & SCF_DO_STCLASS) {
	3756	cl_init(pRExC_state, &this_class);
	3757	data_fake.start_class = &this_class;
	3758	f = SCF_DO_STCLASS_AND;
	3759	}
	3760	if (flags & SCF_WHILEM_VISITED_POS)
	3761	f \|= SCF_WHILEM_VISITED_POS;
	3762
	3763	if (trie->jump[word]) {
	3764	if (!nextbranch)
	3765	nextbranch = trie_node + trie->jump[0];
	3766	scan= trie_node + trie->jump[word];
	3767	/* We go from the jump point to the branch that follows
	3768	it. Note this means we need the vestigal unused branches
	3769	even though they arent otherwise used.
	3770	*/
	3771	minnext = study_chunk(pRExC_state, &scan, minlenp,
	3772	&deltanext, (regnode *)nextbranch, &data_fake,
	3773	stopparen, recursed, NULL, f,depth+1);
	3774	}
	3775	if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
	3776	nextbranch= regnext((regnode*)nextbranch);
	3777
	3778	if (min1 > (I32)(minnext + trie->minlen))
	3779	min1 = minnext + trie->minlen;
	3780	if (max1 < (I32)(minnext + deltanext + trie->maxlen))
	3781	max1 = minnext + deltanext + trie->maxlen;
	3782	if (deltanext == I32_MAX)
	3783	is_inf = is_inf_internal = 1;
	3784
	3785	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	3786	pars++;
	3787	if (data_fake.flags & SCF_SEEN_ACCEPT) {
	3788	if ( stopmin > min + min1)
	3789	stopmin = min + min1;
	3790	flags &= ~SCF_DO_SUBSTR;
	3791	if (data)
	3792	data->flags \|= SCF_SEEN_ACCEPT;
	3793	}
	3794	if (data) {
	3795	if (data_fake.flags & SF_HAS_EVAL)
	3796	data->flags \|= SF_HAS_EVAL;
	3797	data->whilem_c = data_fake.whilem_c;
	3798	}
	3799	if (flags & SCF_DO_STCLASS)
	3800	cl_or(pRExC_state, &accum, &this_class);
	3801	}
	3802	}
	3803	if (flags & SCF_DO_SUBSTR) {
	3804	data->pos_min += min1;
	3805	data->pos_delta += max1 - min1;
	3806	if (max1 != min1 \|\| is_inf)
	3807	data->longest = &(data->longest_float);
	3808	}
	3809	min += min1;
	3810	delta += max1 - min1;
	3811	if (flags & SCF_DO_STCLASS_OR) {
	3812	cl_or(pRExC_state, data->start_class, &accum);
	3813	if (min1) {
	3814	cl_and(data->start_class, and_withp);
	3815	flags &= ~SCF_DO_STCLASS;
	3816	}
	3817	}
	3818	else if (flags & SCF_DO_STCLASS_AND) {
	3819	if (min1) {
	3820	cl_and(data->start_class, &accum);
	3821	flags &= ~SCF_DO_STCLASS;
	3822	}
	3823	else {
	3824	/* Switch to OR mode: cache the old value of
	3825	* data->start_class */
	3826	INIT_AND_WITHP;
	3827	StructCopy(data->start_class, and_withp,
	3828	struct regnode_charclass_class);
	3829	flags &= ~SCF_DO_STCLASS_AND;
	3830	StructCopy(&accum, data->start_class,
	3831	struct regnode_charclass_class);
	3832	flags \|= SCF_DO_STCLASS_OR;
	3833	data->start_class->flags \|= ANYOF_EOS;
	3834	}
	3835	}
	3836	scan= tail;
	3837	continue;
	3838	}
	3839	#else
	3840	else if (PL_regkind[OP(scan)] == TRIE) {
	3841	reg_trie_data trie = (reg_trie_data)RExC_rxi->data->data[ ARG(scan) ];
	3842	U8*bang=NULL;
	3843
	3844	min += trie->minlen;
	3845	delta += (trie->maxlen - trie->minlen);
	3846	flags &= ~SCF_DO_STCLASS; /* xxx */
	3847	if (flags & SCF_DO_SUBSTR) {
	3848	SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
	3849	data->pos_min += trie->minlen;
	3850	data->pos_delta += (trie->maxlen - trie->minlen);
	3851	if (trie->maxlen != trie->minlen)
	3852	data->longest = &(data->longest_float);
	3853	}
	3854	if (trie->jump) /* no more substrings -- for now /grr*/
	3855	flags &= ~SCF_DO_SUBSTR;
	3856	}
	3857	#endif /* old or new */
	3858	#endif /* TRIE_STUDY_OPT */
	3859	/* Else: zero-length, ignore. */
	3860	scan = regnext(scan);
	3861	}
	3862	if (frame) {
	3863	last = frame->last;
	3864	scan = frame->next;
	3865	stopparen = frame->stop;
	3866	frame = frame->prev;
	3867	goto fake_study_recurse;
	3868	}
	3869
	3870	finish:
	3871	assert(!frame);
	3872	DEBUG_STUDYDATA("pre-fin:",data,depth);
	3873
	3874	*scanp = scan;
	3875	*deltap = is_inf_internal ? I32_MAX : delta;
	3876	if (flags & SCF_DO_SUBSTR && is_inf)
	3877	data->pos_delta = I32_MAX - data->pos_min;
	3878	if (is_par > (I32)U8_MAX)
	3879	is_par = 0;
	3880	if (is_par && pars==1 && data) {
	3881	data->flags \|= SF_IN_PAR;
	3882	data->flags &= ~SF_HAS_PAR;
	3883	}
	3884	else if (pars && data) {
	3885	data->flags \|= SF_HAS_PAR;
	3886	data->flags &= ~SF_IN_PAR;
	3887	}
	3888	if (flags & SCF_DO_STCLASS_OR)
	3889	cl_and(data->start_class, and_withp);
	3890	if (flags & SCF_TRIE_RESTUDY)
	3891	data->flags \|= SCF_TRIE_RESTUDY;
	3892
	3893	DEBUG_STUDYDATA("post-fin:",data,depth);
	3894
	3895	return min < stopmin ? min : stopmin;
	3896	}
	3897
	3898	STATIC U32
	3899	S_add_data(RExC_state_t pRExC_state, U32 n, const char s)
	3900	{
	3901	U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0;
	3902
	3903	Renewc(RExC_rxi->data,
	3904	sizeof(RExC_rxi->data) + sizeof(void) * (count + n - 1),
	3905	char, struct reg_data);
	3906	if(count)
	3907	Renew(RExC_rxi->data->what, count + n, U8);
	3908	else
	3909	Newx(RExC_rxi->data->what, n, U8);
	3910	RExC_rxi->data->count = count + n;
	3911	Copy(s, RExC_rxi->data->what + count, n, U8);
	3912	return count;
	3913	}
	3914
	3915	/XXX: todo make this not included in a non debugging perl /
	3916	#ifndef PERL_IN_XSUB_RE
	3917	void
	3918	Perl_reginitcolors(pTHX)
	3919	{
	3920	dVAR;
	3921	const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
	3922	if (s) {
	3923	char *t = savepv(s);
	3924	int i = 0;
	3925	PL_colors[0] = t;
	3926	while (++i < 6) {
	3927	t = strchr(t, '\t');
	3928	if (t) {
	3929	*t = '\0';
	3930	PL_colors[i] = ++t;
	3931	}
	3932	else
	3933	PL_colors[i] = t = (char *)"";
	3934	}
	3935	} else {
	3936	int i = 0;
	3937	while (i < 6)
	3938	PL_colors[i++] = (char *)"";
	3939	}
	3940	PL_colorset = 1;
	3941	}
	3942	#endif
	3943
	3944
	3945	#ifdef TRIE_STUDY_OPT
	3946	#define CHECK_RESTUDY_GOTO \
	3947	if ( \
	3948	(data.flags & SCF_TRIE_RESTUDY) \
	3949	&& ! restudied++ \
	3950	) goto reStudy
	3951	#else
	3952	#define CHECK_RESTUDY_GOTO
	3953	#endif
	3954
	3955	/*
	3956	- pregcomp - compile a regular expression into internal code
	3957	*
	3958	* We can't allocate space until we know how big the compiled form will be,
	3959	* but we can't compile it (and thus know how big it is) until we've got a
	3960	* place to put the code. So we cheat: we compile it twice, once with code
	3961	* generation turned off and size counting turned on, and once "for real".
	3962	* This also means that we don't allocate space until we are sure that the
	3963	* thing really will compile successfully, and we never have to move the
	3964	* code and thus invalidate pointers into it. (Note that it has to be in
	3965	* one piece because free() must be able to free it all.) [NB: not true in perl]
	3966	*
	3967	* Beware that the optimization-preparation code in here knows about some
	3968	* of the structure of the compiled regexp. [I'll say.]
	3969	*/
	3970
	3971
	3972
	3973	#ifndef PERL_IN_XSUB_RE
	3974	#define RE_ENGINE_PTR &PL_core_reg_engine
	3975	#else
	3976	extern const struct regexp_engine my_reg_engine;
	3977	#define RE_ENGINE_PTR &my_reg_engine
	3978	#endif
	3979
	3980	#ifndef PERL_IN_XSUB_RE
	3981	regexp *
	3982	Perl_pregcomp(pTHX_ char exp, char xend, PMOP *pm)
	3983	{
	3984	dVAR;
	3985	HV * const table = GvHV(PL_hintgv);
	3986	/* Dispatch a request to compile a regexp to correct
	3987	regexp engine. */
	3988	if (table) {
	3989	SV **ptr= hv_fetchs(table, "regcomp", FALSE);
	3990	GET_RE_DEBUG_FLAGS_DECL;
	3991	if (ptr && SvIOK(ptr) && SvIV(ptr)) {
	3992	const regexp_engine eng=INT2PTR(regexp_engine,SvIV(*ptr));
	3993	DEBUG_COMPILE_r({
	3994	PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
	3995	SvIV(*ptr));
	3996	});
	3997	return CALLREGCOMP_ENG(eng, exp, xend, pm);
	3998	}
	3999	}
	4000	return Perl_re_compile(aTHX_ exp, xend, pm);
	4001	}
	4002	#endif
	4003
	4004	regexp *
	4005	Perl_re_compile(pTHX_ char exp, char xend, PMOP *pm)
	4006	{
	4007	dVAR;
	4008	register regexp *r;
	4009	register regexp_internal *ri;
	4010	regnode *scan;
	4011	regnode *first;
	4012	I32 flags;
	4013	I32 minlen = 0;
	4014	I32 sawplus = 0;
	4015	I32 sawopen = 0;
	4016	scan_data_t data;
	4017	RExC_state_t RExC_state;
	4018	RExC_state_t * const pRExC_state = &RExC_state;
	4019	#ifdef TRIE_STUDY_OPT
	4020	int restudied= 0;
	4021	RExC_state_t copyRExC_state;
	4022	#endif
	4023	GET_RE_DEBUG_FLAGS_DECL;
	4024	DEBUG_r(if (!PL_colorset) reginitcolors());
	4025
	4026	if (exp == NULL)
	4027	FAIL("NULL regexp argument");
	4028
	4029	RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
	4030
	4031	RExC_precomp = exp;
	4032	DEBUG_COMPILE_r({
	4033	SV *dsv= sv_newmortal();
	4034	RE_PV_QUOTED_DECL(s, RExC_utf8,
	4035	dsv, RExC_precomp, (xend - exp), 60);
	4036	PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
	4037	PL_colors[4],PL_colors[5],s);
	4038	});
	4039	RExC_flags = pm->op_pmflags;
	4040	RExC_sawback = 0;
	4041
	4042	RExC_seen = 0;
	4043	RExC_seen_zerolen = *exp == '^' ? -1 : 0;
	4044	RExC_seen_evals = 0;
	4045	RExC_extralen = 0;
	4046
	4047	/* First pass: determine size, legality. */
	4048	RExC_parse = exp;
	4049	RExC_start = exp;
	4050	RExC_end = xend;
	4051	RExC_naughty = 0;
	4052	RExC_npar = 1;
	4053	RExC_nestroot = 0;
	4054	RExC_size = 0L;
	4055	RExC_emit = &PL_regdummy;
	4056	RExC_whilem_seen = 0;
	4057	RExC_charnames = NULL;
	4058	RExC_open_parens = NULL;
	4059	RExC_close_parens = NULL;
	4060	RExC_opend = NULL;
	4061	RExC_paren_names = NULL;
	4062	#ifdef DEBUGGING
	4063	RExC_paren_name_list = NULL;
	4064	#endif
	4065	RExC_recurse = NULL;
	4066	RExC_recurse_count = 0;
	4067
	4068	#if 0 /* REGC() is (currently) a NOP at the first pass.
	4069	* Clever compilers notice this and complain. --jhi */
	4070	REGC((U8)REG_MAGIC, (char*)RExC_emit);
	4071	#endif
	4072	DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n"));
	4073	if (reg(pRExC_state, 0, &flags,1) == NULL) {
	4074	RExC_precomp = NULL;
	4075	return(NULL);
	4076	}
	4077	DEBUG_PARSE_r({
	4078	PerlIO_printf(Perl_debug_log,
	4079	"Required size %"IVdf" nodes\n"
	4080	"Starting second pass (creation)\n",
	4081	(IV)RExC_size);
	4082	RExC_lastnum=0;
	4083	RExC_lastparse=NULL;
	4084	});
	4085	/* Small enough for pointer-storage convention?
	4086	If extralen==0, this means that we will not need long jumps. */
	4087	if (RExC_size >= 0x10000L && RExC_extralen)
	4088	RExC_size += RExC_extralen;
	4089	else
	4090	RExC_extralen = 0;
	4091	if (RExC_whilem_seen > 15)
	4092	RExC_whilem_seen = 15;
	4093
	4094	#ifdef DEBUGGING
	4095	/* Make room for a sentinel value at the end of the program */
	4096	RExC_size++;
	4097	#endif
	4098
	4099	/* Allocate space and zero-initialize. Note, the two step process
	4100	of zeroing when in debug mode, thus anything assigned has to
	4101	happen after that */
	4102	Newxz(r, 1, regexp);
	4103	Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
	4104	char, regexp_internal);
	4105	if ( r == NULL \|\| ri == NULL )
	4106	FAIL("Regexp out of space");
	4107	#ifdef DEBUGGING
	4108	/* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
	4109	Zero(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), char);
	4110	#else
	4111	/* bulk initialize base fields with 0. */
	4112	Zero(ri, sizeof(regexp_internal), char);
	4113	#endif
	4114
	4115	/* non-zero initialization begins here */
	4116	RXi_SET( r, ri );
	4117	r->engine= RE_ENGINE_PTR;
	4118	r->refcnt = 1;
	4119	r->prelen = xend - exp;
	4120	r->precomp = savepvn(RExC_precomp, r->prelen);
	4121	r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
	4122	r->intflags = 0;
	4123	r->nparens = RExC_npar - 1; /* set early to validate backrefs */
	4124
	4125	if (RExC_seen & REG_SEEN_RECURSE) {
	4126	Newxz(RExC_open_parens, RExC_npar,regnode *);
	4127	SAVEFREEPV(RExC_open_parens);
	4128	Newxz(RExC_close_parens,RExC_npar,regnode *);
	4129	SAVEFREEPV(RExC_close_parens);
	4130	}
	4131
	4132	/* Useful during FAIL. */
	4133	Newxz(ri->offsets, 2RExC_size+1, U32); / MJD 20001228 */
	4134	if (ri->offsets) {
	4135	ri->offsets[0] = RExC_size;
	4136	}
	4137	DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log,
	4138	"%s %"UVuf" bytes for offset annotations.\n",
	4139	ri->offsets ? "Got" : "Couldn't get",
	4140	(UV)((2RExC_size+1) sizeof(U32))));
	4141
	4142	RExC_rx = r;
	4143	RExC_rxi = ri;
	4144
	4145	/* Second pass: emit code. */
	4146	RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */
	4147	RExC_parse = exp;
	4148	RExC_end = xend;
	4149	RExC_naughty = 0;
	4150	RExC_npar = 1;
	4151	RExC_emit_start = ri->program;
	4152	RExC_emit = ri->program;
	4153	#ifdef DEBUGGING
	4154	/* put a sentinal on the end of the program so we can check for
	4155	overwrites */
	4156	ri->program[RExC_size].type = 255;
	4157	#endif
	4158	/* Store the count of eval-groups for security checks: */
	4159	RExC_rx->seen_evals = RExC_seen_evals;
	4160	REGC((U8)REG_MAGIC, (char*) RExC_emit++);
	4161	if (reg(pRExC_state, 0, &flags,1) == NULL)
	4162	return(NULL);
	4163
	4164	/* XXXX To minimize changes to RE engine we always allocate
	4165	3-units-long substrs field. */
	4166	Newx(r->substrs, 1, struct reg_substr_data);
	4167	if (RExC_recurse_count) {
	4168	Newxz(RExC_recurse,RExC_recurse_count,regnode *);
	4169	SAVEFREEPV(RExC_recurse);
	4170	}
	4171
	4172	reStudy:
	4173	r->minlen = minlen = sawplus = sawopen = 0;
	4174	Zero(r->substrs, 1, struct reg_substr_data);
	4175
	4176	#ifdef TRIE_STUDY_OPT
	4177	if ( restudied ) {
	4178	U32 seen=RExC_seen;
	4179	DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log,"Restudying\n"));
	4180
	4181	RExC_state = copyRExC_state;
	4182	if (seen & REG_TOP_LEVEL_BRANCHES)
	4183	RExC_seen \|= REG_TOP_LEVEL_BRANCHES;
	4184	else
	4185	RExC_seen &= ~REG_TOP_LEVEL_BRANCHES;
	4186	if (data.last_found) {
	4187	SvREFCNT_dec(data.longest_fixed);
	4188	SvREFCNT_dec(data.longest_float);
	4189	SvREFCNT_dec(data.last_found);
	4190	}
	4191	StructCopy(&zero_scan_data, &data, scan_data_t);
	4192	} else {
	4193	StructCopy(&zero_scan_data, &data, scan_data_t);
	4194	copyRExC_state = RExC_state;
	4195	}
	4196	#else
	4197	StructCopy(&zero_scan_data, &data, scan_data_t);
	4198	#endif
	4199
	4200	/* Dig out information for optimizations. */
	4201	r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */
	4202	pm->op_pmflags = RExC_flags;
	4203	if (UTF)
	4204	r->extflags \|= RXf_UTF8; /* Unicode in it? */
	4205	ri->regstclass = NULL;
	4206	if (RExC_naughty >= 10) /* Probably an expensive pattern. */
	4207	r->intflags \|= PREGf_NAUGHTY;
	4208	scan = ri->program + 1; /* First BRANCH. */
	4209
	4210	/* testing for BRANCH here tells us whether there is "must appear"
	4211	data in the pattern. If there is then we can use it for optimisations */
	4212	if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES)) { /* Only one top-level choice. */
	4213	I32 fake;
	4214	STRLEN longest_float_length, longest_fixed_length;
	4215	struct regnode_charclass_class ch_class; /* pointed to by data */
	4216	int stclass_flag;
	4217	I32 last_close = 0; /* pointed to by data */
	4218
	4219	first = scan;
	4220	/* Skip introductions and multiplicators >= 1. */
	4221	while ((OP(first) == OPEN && (sawopen = 1)) \|\|
	4222	/* An OR of one alternative - should not happen now. */
	4223	(OP(first) == BRANCH && OP(regnext(first)) != BRANCH) \|\|
	4224	/* for now we can't handle lookbehind IFMATCH*/
	4225	(OP(first) == IFMATCH && !first->flags) \|\|
	4226	(OP(first) == PLUS) \|\|
	4227	(OP(first) == MINMOD) \|\|
	4228	/* An {n,m} with n>0 */
	4229	(PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) )
	4230	{
	4231
	4232	if (OP(first) == PLUS)
	4233	sawplus = 1;
	4234	else
	4235	first += regarglen[OP(first)];
	4236	if (OP(first) == IFMATCH) {
	4237	first = NEXTOPER(first);
	4238	first += EXTRA_STEP_2ARGS;
	4239	} else /* XXX possible optimisation for /(?=)/ */
	4240	first = NEXTOPER(first);
	4241	}
	4242
	4243	/* Starting-point info. */
	4244	again:
	4245	DEBUG_PEEP("first:",first,0);
	4246	/* Ignore EXACT as we deal with it later. */
	4247	if (PL_regkind[OP(first)] == EXACT) {
	4248	if (OP(first) == EXACT)
	4249	NOOP; /* Empty, get anchored substr later. */
	4250	else if ((OP(first) == EXACTF \|\| OP(first) == EXACTFL))
	4251	ri->regstclass = first;
	4252	}
	4253	#ifdef TRIE_STCLASS
	4254	else if (PL_regkind[OP(first)] == TRIE &&
	4255	((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0)
	4256	{
	4257	regnode *trie_op;
	4258	/* this can happen only on restudy */
	4259	if ( OP(first) == TRIE ) {
	4260	struct regnode_1 trieop = (struct regnode_1 )
	4261	PerlMemShared_calloc(1, sizeof(struct regnode_1));
	4262	StructCopy(first,trieop,struct regnode_1);
	4263	trie_op=(regnode *)trieop;
	4264	} else {
	4265	struct regnode_charclass trieop = (struct regnode_charclass )
	4266	PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
	4267	StructCopy(first,trieop,struct regnode_charclass);
	4268	trie_op=(regnode *)trieop;
	4269	}
	4270	OP(trie_op)+=2;
	4271	make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0);
	4272	ri->regstclass = trie_op;
	4273	}
	4274	#endif
	4275	else if (strchr((const char*)PL_simple,OP(first)))
	4276	ri->regstclass = first;
	4277	else if (PL_regkind[OP(first)] == BOUND \|\|
	4278	PL_regkind[OP(first)] == NBOUND)
	4279	ri->regstclass = first;
	4280	else if (PL_regkind[OP(first)] == BOL) {
	4281	r->extflags \|= (OP(first) == MBOL
	4282	? RXf_ANCH_MBOL
	4283	: (OP(first) == SBOL
	4284	? RXf_ANCH_SBOL
	4285	: RXf_ANCH_BOL));
	4286	first = NEXTOPER(first);
	4287	goto again;
	4288	}
	4289	else if (OP(first) == GPOS) {
	4290	r->extflags \|= RXf_ANCH_GPOS;
	4291	first = NEXTOPER(first);
	4292	goto again;
	4293	}
	4294	else if ((!sawopen \|\| !RExC_sawback) &&
	4295	(OP(first) == STAR &&
	4296	PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
	4297	!(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
	4298	{
	4299	/* turn .* into ^.* with an implied $=1 /
	4300	const int type =
	4301	(OP(NEXTOPER(first)) == REG_ANY)
	4302	? RXf_ANCH_MBOL
	4303	: RXf_ANCH_SBOL;
	4304	r->extflags \|= type;
	4305	r->intflags \|= PREGf_IMPLICIT;
	4306	first = NEXTOPER(first);
	4307	goto again;
	4308	}
	4309	if (sawplus && (!sawopen \|\| !RExC_sawback)
	4310	&& !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
	4311	/* x+ must match at the 1st pos of run of x's */
	4312	r->intflags \|= PREGf_SKIP;
	4313
	4314	/* Scan is after the zeroth branch, first is atomic matcher. */
	4315	#ifdef TRIE_STUDY_OPT
	4316	DEBUG_PARSE_r(
	4317	if (!restudied)
	4318	PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
	4319	(IV)(first - scan + 1))
	4320	);
	4321	#else
	4322	DEBUG_PARSE_r(
	4323	PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
	4324	(IV)(first - scan + 1))
	4325	);
	4326	#endif
	4327
	4328
	4329	/*
	4330	* If there's something expensive in the r.e., find the
	4331	* longest literal string that must appear and make it the
	4332	* regmust. Resolve ties in favor of later strings, since
	4333	* the regstart check works with the beginning of the r.e.
	4334	* and avoiding duplication strengthens checking. Not a
	4335	* strong reason, but sufficient in the absence of others.
	4336	* [Now we resolve ties in favor of the earlier string if
	4337	* it happens that c_offset_min has been invalidated, since the
	4338	* earlier string may buy us something the later one won't.]
	4339	*/
	4340
	4341	data.longest_fixed = newSVpvs("");
	4342	data.longest_float = newSVpvs("");
	4343	data.last_found = newSVpvs("");
	4344	data.longest = &(data.longest_fixed);
	4345	first = scan;
	4346	if (!ri->regstclass) {
	4347	cl_init(pRExC_state, &ch_class);
	4348	data.start_class = &ch_class;
	4349	stclass_flag = SCF_DO_STCLASS_AND;
	4350	} else /* XXXX Check for BOUND? */
	4351	stclass_flag = 0;
	4352	data.last_closep = &last_close;
	4353
	4354	minlen = study_chunk(pRExC_state, &first, &minlen, &fake, scan + RExC_size, /* Up to end */
	4355	&data, -1, NULL, NULL,
	4356	SCF_DO_SUBSTR \| SCF_WHILEM_VISITED_POS \| stclass_flag,0);
	4357
	4358
	4359	CHECK_RESTUDY_GOTO;
	4360
	4361
	4362	if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
	4363	&& data.last_start_min == 0 && data.last_end > 0
	4364	&& !RExC_seen_zerolen
	4365	&& !(RExC_seen & REG_SEEN_VERBARG)
	4366	&& (!(RExC_seen & REG_SEEN_GPOS) \|\| (r->extflags & RXf_ANCH_GPOS)))
	4367	r->extflags \|= RXf_CHECK_ALL;
	4368	scan_commit(pRExC_state, &data,&minlen,0);
	4369	SvREFCNT_dec(data.last_found);
	4370
	4371	/* Note that code very similar to this but for anchored string
	4372	follows immediately below, changes may need to be made to both.
	4373	Be careful.
	4374	*/
	4375	longest_float_length = CHR_SVLEN(data.longest_float);
	4376	if (longest_float_length
	4377	\|\| (data.flags & SF_FL_BEFORE_EOL
	4378	&& (!(data.flags & SF_FL_BEFORE_MEOL)
	4379	\|\| (RExC_flags & RXf_PMf_MULTILINE))))
	4380	{
	4381	I32 t,ml;
	4382
	4383	if (SvCUR(data.longest_fixed) /* ok to leave SvCUR */
	4384	&& data.offset_fixed == data.offset_float_min
	4385	&& SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
	4386	goto remove_float; /* As in (a)+. */
	4387
	4388	/* copy the information about the longest float from the reg_scan_data
	4389	over to the program. */
	4390	if (SvUTF8(data.longest_float)) {
	4391	r->float_utf8 = data.longest_float;
	4392	r->float_substr = NULL;
	4393	} else {
	4394	r->float_substr = data.longest_float;
	4395	r->float_utf8 = NULL;
	4396	}
	4397	/* float_end_shift is how many chars that must be matched that
	4398	follow this item. We calculate it ahead of time as once the
	4399	lookbehind offset is added in we lose the ability to correctly
	4400	calculate it.*/
	4401	ml = data.minlen_float ? *(data.minlen_float)
	4402	: (I32)longest_float_length;
	4403	r->float_end_shift = ml - data.offset_float_min
	4404	- longest_float_length + (SvTAIL(data.longest_float) != 0)
	4405	+ data.lookbehind_float;
	4406	r->float_min_offset = data.offset_float_min - data.lookbehind_float;
	4407	r->float_max_offset = data.offset_float_max;
	4408	if (data.offset_float_max < I32_MAX) /* Don't offset infinity */
	4409	r->float_max_offset -= data.lookbehind_float;
	4410
	4411	t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
	4412	&& (!(data.flags & SF_FL_BEFORE_MEOL)
	4413	\|\| (RExC_flags & RXf_PMf_MULTILINE)));
	4414	fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
	4415	}
	4416	else {
	4417	remove_float:
	4418	r->float_substr = r->float_utf8 = NULL;
	4419	SvREFCNT_dec(data.longest_float);
	4420	longest_float_length = 0;
	4421	}
	4422
	4423	/* Note that code very similar to this but for floating string
	4424	is immediately above, changes may need to be made to both.
	4425	Be careful.
	4426	*/
	4427	longest_fixed_length = CHR_SVLEN(data.longest_fixed);
	4428	if (longest_fixed_length
	4429	\|\| (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
	4430	&& (!(data.flags & SF_FIX_BEFORE_MEOL)
	4431	\|\| (RExC_flags & RXf_PMf_MULTILINE))))
	4432	{
	4433	I32 t,ml;
	4434
	4435	/* copy the information about the longest fixed
	4436	from the reg_scan_data over to the program. */
	4437	if (SvUTF8(data.longest_fixed)) {
	4438	r->anchored_utf8 = data.longest_fixed;
	4439	r->anchored_substr = NULL;
	4440	} else {
	4441	r->anchored_substr = data.longest_fixed;
	4442	r->anchored_utf8 = NULL;
	4443	}
	4444	/* fixed_end_shift is how many chars that must be matched that
	4445	follow this item. We calculate it ahead of time as once the
	4446	lookbehind offset is added in we lose the ability to correctly
	4447	calculate it.*/
	4448	ml = data.minlen_fixed ? *(data.minlen_fixed)
	4449	: (I32)longest_fixed_length;
	4450	r->anchored_end_shift = ml - data.offset_fixed
	4451	- longest_fixed_length + (SvTAIL(data.longest_fixed) != 0)
	4452	+ data.lookbehind_fixed;
	4453	r->anchored_offset = data.offset_fixed - data.lookbehind_fixed;
	4454
	4455	t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
	4456	&& (!(data.flags & SF_FIX_BEFORE_MEOL)
	4457	\|\| (RExC_flags & RXf_PMf_MULTILINE)));
	4458	fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
	4459	}
	4460	else {
	4461	r->anchored_substr = r->anchored_utf8 = NULL;
	4462	SvREFCNT_dec(data.longest_fixed);
	4463	longest_fixed_length = 0;
	4464	}
	4465	if (ri->regstclass
	4466	&& (OP(ri->regstclass) == REG_ANY \|\| OP(ri->regstclass) == SANY))
	4467	ri->regstclass = NULL;
	4468	if ((!(r->anchored_substr \|\| r->anchored_utf8) \|\| r->anchored_offset)
	4469	&& stclass_flag
	4470	&& !(data.start_class->flags & ANYOF_EOS)
	4471	&& !cl_is_anything(data.start_class))
	4472	{
	4473	const U32 n = add_data(pRExC_state, 1, "f");
	4474
	4475	Newx(RExC_rxi->data->data[n], 1,
	4476	struct regnode_charclass_class);
	4477	StructCopy(data.start_class,
	4478	(struct regnode_charclass_class*)RExC_rxi->data->data[n],
	4479	struct regnode_charclass_class);
	4480	ri->regstclass = (regnode*)RExC_rxi->data->data[n];
	4481	r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
	4482	DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
	4483	regprop(r, sv, (regnode*)data.start_class);
	4484	PerlIO_printf(Perl_debug_log,
	4485	"synthetic stclass \"%s\".\n",
	4486	SvPVX_const(sv));});
	4487	}
	4488
	4489	/* A temporary algorithm prefers floated substr to fixed one to dig more info. */
	4490	if (longest_fixed_length > longest_float_length) {
	4491	r->check_end_shift = r->anchored_end_shift;
	4492	r->check_substr = r->anchored_substr;
	4493	r->check_utf8 = r->anchored_utf8;
	4494	r->check_offset_min = r->check_offset_max = r->anchored_offset;
	4495	if (r->extflags & RXf_ANCH_SINGLE)
	4496	r->extflags \|= RXf_NOSCAN;
	4497	}
	4498	else {
	4499	r->check_end_shift = r->float_end_shift;
	4500	r->check_substr = r->float_substr;
	4501	r->check_utf8 = r->float_utf8;
	4502	r->check_offset_min = r->float_min_offset;
	4503	r->check_offset_max = r->float_max_offset;
	4504	}
	4505	/* XXXX Currently intuiting is not compatible with ANCH_GPOS.
	4506	This should be changed ASAP! */
	4507	if ((r->check_substr \|\| r->check_utf8) && !(r->extflags & RXf_ANCH_GPOS)) {
	4508	r->extflags \|= RXf_USE_INTUIT;
	4509	if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
	4510	r->extflags \|= RXf_INTUIT_TAIL;
	4511	}
	4512	/* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
	4513	if ( (STRLEN)minlen < longest_float_length )
	4514	minlen= longest_float_length;
	4515	if ( (STRLEN)minlen < longest_fixed_length )
	4516	minlen= longest_fixed_length;
	4517	*/
	4518	}
	4519	else {
	4520	/* Several toplevels. Best we can is to set minlen. */
	4521	I32 fake;
	4522	struct regnode_charclass_class ch_class;
	4523	I32 last_close = 0;
	4524
	4525	DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n"));
	4526
	4527	scan = ri->program + 1;
	4528	cl_init(pRExC_state, &ch_class);
	4529	data.start_class = &ch_class;
	4530	data.last_closep = &last_close;
	4531
	4532
	4533	minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size,
	4534	&data, -1, NULL, NULL, SCF_DO_STCLASS_AND\|SCF_WHILEM_VISITED_POS,0);
	4535
	4536	CHECK_RESTUDY_GOTO;
	4537
	4538	r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
	4539	= r->float_substr = r->float_utf8 = NULL;
	4540	if (!(data.start_class->flags & ANYOF_EOS)
	4541	&& !cl_is_anything(data.start_class))
	4542	{
	4543	const U32 n = add_data(pRExC_state, 1, "f");
	4544
	4545	Newx(RExC_rxi->data->data[n], 1,
	4546	struct regnode_charclass_class);
	4547	StructCopy(data.start_class,
	4548	(struct regnode_charclass_class*)RExC_rxi->data->data[n],
	4549	struct regnode_charclass_class);
	4550	ri->regstclass = (regnode*)RExC_rxi->data->data[n];
	4551	r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
	4552	DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
	4553	regprop(r, sv, (regnode*)data.start_class);
	4554	PerlIO_printf(Perl_debug_log,
	4555	"synthetic stclass \"%s\".\n",
	4556	SvPVX_const(sv));});
	4557	}
	4558	}
	4559
	4560	/* Guard against an embedded (?=) or (?<=) with a longer minlen than
	4561	the "real" pattern. */
	4562	DEBUG_OPTIMISE_r({
	4563	PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf"\n",
	4564	(IV)minlen, (IV)r->minlen);
	4565	});
	4566	r->minlenret = minlen;
	4567	if (r->minlen < minlen)
	4568	r->minlen = minlen;
	4569
	4570	if (RExC_seen & REG_SEEN_GPOS)
	4571	r->extflags \|= RXf_GPOS_SEEN;
	4572	if (RExC_seen & REG_SEEN_LOOKBEHIND)
	4573	r->extflags \|= RXf_LOOKBEHIND_SEEN;
	4574	if (RExC_seen & REG_SEEN_EVAL)
	4575	r->extflags \|= RXf_EVAL_SEEN;
	4576	if (RExC_seen & REG_SEEN_CANY)
	4577	r->extflags \|= RXf_CANY_SEEN;
	4578	if (RExC_seen & REG_SEEN_VERBARG)
	4579	r->intflags \|= PREGf_VERBARG_SEEN;
	4580	if (RExC_seen & REG_SEEN_CUTGROUP)
	4581	r->intflags \|= PREGf_CUTGROUP_SEEN;
	4582	if (RExC_paren_names)
	4583	r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
	4584	else
	4585	r->paren_names = NULL;
	4586	if (r->prelen == 3 && strEQ("\\s+", r->precomp))
	4587	r->extflags \|= RXf_WHITE;
	4588	else if (r->prelen == 1 && r->precomp[0] == '^')
	4589	r->extflags \|= RXf_START_ONLY;
	4590
	4591	#ifdef DEBUGGING
	4592	if (RExC_paren_names) {
	4593	ri->name_list_idx = add_data( pRExC_state, 1, "p" );
	4594	ri->data->data[ri->name_list_idx] = (void*)SvREFCNT_inc(RExC_paren_name_list);
	4595	} else
	4596	#endif
	4597	ri->name_list_idx = 0;
	4598
	4599	if (RExC_recurse_count) {
	4600	for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
	4601	const regnode *scan = RExC_recurse[RExC_recurse_count-1];
	4602	ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
	4603	}
	4604	}
	4605	Newxz(r->startp, RExC_npar, I32);
	4606	Newxz(r->endp, RExC_npar, I32);
	4607	/* assume we don't need to swap parens around before we match */
	4608
	4609	DEBUG_DUMP_r({
	4610	PerlIO_printf(Perl_debug_log,"Final program:\n");
	4611	regdump(r);
	4612	});
	4613	DEBUG_OFFSETS_r(if (ri->offsets) {
	4614	const U32 len = ri->offsets[0];
	4615	U32 i;
	4616	GET_RE_DEBUG_FLAGS_DECL;
	4617	PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->offsets[0]);
	4618	for (i = 1; i <= len; i++) {
	4619	if (ri->offsets[i2-1] \|\| ri->offsets[i2])
	4620	PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ",
	4621	(UV)i, (UV)ri->offsets[i2-1], (UV)ri->offsets[i2]);
	4622	}
	4623	PerlIO_printf(Perl_debug_log, "\n");
	4624	});
	4625	return(r);
	4626	}
	4627
	4628	#undef CORE_ONLY_BLOCK
	4629	#undef RE_ENGINE_PTR
	4630
	4631	#ifndef PERL_IN_XSUB_RE
	4632	SV*
	4633	Perl_reg_named_buff_get(pTHX_ SV* namesv, const REGEXP * const from_re, U32 flags)
	4634	{
	4635	AV *retarray = NULL;
	4636	SV *ret;
	4637	if (flags & 1)
	4638	retarray=newAV();
	4639
	4640	if (from_re \|\| PL_curpm) {
	4641	const REGEXP * const rx = from_re ? from_re : PM_GETRE(PL_curpm);
	4642	if (rx && rx->paren_names) {
	4643	HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
	4644	if (he_str) {
	4645	IV i;
	4646	SV* sv_dat=HeVAL(he_str);
	4647	I32 nums=(I32)SvPVX(sv_dat);
	4648	for ( i=0; i<SvIVX(sv_dat); i++ ) {
	4649	if ((I32)(rx->lastparen) >= nums[i] &&
	4650	rx->endp[nums[i]] != -1)
	4651	{
	4652	ret = reg_numbered_buff_get(nums[i],rx,NULL,0);
	4653	if (!retarray)
	4654	return ret;
	4655	} else {
	4656	ret = newSVsv(&PL_sv_undef);
	4657	}
	4658	if (retarray) {
	4659	SvREFCNT_inc(ret);
	4660	av_push(retarray, ret);
	4661	}
	4662	}
	4663	if (retarray)
	4664	return (SV*)retarray;
	4665	}
	4666	}
	4667	}
	4668	return NULL;
	4669	}
	4670
	4671	SV*
	4672	Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv, U32 flags)
	4673	{
	4674	char *s = NULL;
	4675	I32 i = 0;
	4676	I32 s1, t1;
	4677	SV *sv = usesv ? usesv : newSVpvs("");
	4678	PERL_UNUSED_ARG(flags);
	4679
	4680	if (!rx->subbeg) {
	4681	sv_setsv(sv,&PL_sv_undef);
	4682	return sv;
	4683	}
	4684	else
	4685	if (paren == -2 && rx->startp[0] != -1) {
	4686	/* $` */
	4687	i = rx->startp[0];
	4688	s = rx->subbeg;
	4689	}
	4690	else
	4691	if (paren == -1 && rx->endp[0] != -1) {
	4692	/* $' */
	4693	s = rx->subbeg + rx->endp[0];
	4694	i = rx->sublen - rx->endp[0];
	4695	}
	4696	else
	4697	if ( 0 <= paren && paren <= (I32)rx->nparens &&
	4698	(s1 = rx->startp[paren]) != -1 &&
	4699	(t1 = rx->endp[paren]) != -1)
	4700	{
	4701	/* $& $1 ... */
	4702	i = t1 - s1;
	4703	s = rx->subbeg + s1;
	4704	} else {
	4705	sv_setsv(sv,&PL_sv_undef);
	4706	return sv;
	4707	}
	4708	assert(rx->sublen >= (s - rx->subbeg) + i );
	4709	if (i >= 0) {
	4710	const int oldtainted = PL_tainted;
	4711	TAINT_NOT;
	4712	sv_setpvn(sv, s, i);
	4713	PL_tainted = oldtainted;
	4714	if ( (rx->extflags & RXf_CANY_SEEN)
	4715	? (RX_MATCH_UTF8(rx)
	4716	&& (!i \|\| is_utf8_string((U8*)s, i)))
	4717	: (RX_MATCH_UTF8(rx)) )
	4718	{
	4719	SvUTF8_on(sv);
	4720	}
	4721	else
	4722	SvUTF8_off(sv);
	4723	if (PL_tainting) {
	4724	if (RX_MATCH_TAINTED(rx)) {
	4725	if (SvTYPE(sv) >= SVt_PVMG) {
	4726	MAGIC* const mg = SvMAGIC(sv);
	4727	MAGIC* mgt;
	4728	PL_tainted = 1;
	4729	SvMAGIC_set(sv, mg->mg_moremagic);
	4730	SvTAINT(sv);
	4731	if ((mgt = SvMAGIC(sv))) {
	4732	mg->mg_moremagic = mgt;
	4733	SvMAGIC_set(sv, mg);
	4734	}
	4735	} else {
	4736	PL_tainted = 1;
	4737	SvTAINT(sv);
	4738	}
	4739	} else
	4740	SvTAINTED_off(sv);
	4741	}
	4742	} else {
	4743	sv_setsv(sv,&PL_sv_undef);
	4744	}
	4745	return sv;
	4746	}
	4747	#endif
	4748
	4749	/* Scans the name of a named buffer from the pattern.
	4750	* If flags is REG_RSN_RETURN_NULL returns null.
	4751	* If flags is REG_RSN_RETURN_NAME returns an SV* containing the name
	4752	* If flags is REG_RSN_RETURN_DATA returns the data SV* corresponding
	4753	* to the parsed name as looked up in the RExC_paren_names hash.
	4754	* If there is an error throws a vFAIL().. type exception.
	4755	*/
	4756
	4757	#define REG_RSN_RETURN_NULL 0
	4758	#define REG_RSN_RETURN_NAME 1
	4759	#define REG_RSN_RETURN_DATA 2
	4760
	4761	STATIC SV*
	4762	S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
	4763	char *name_start = RExC_parse;
	4764
	4765	if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
	4766	/* skip IDFIRST by using do...while */
	4767	if (UTF)
	4768	do {
	4769	RExC_parse += UTF8SKIP(RExC_parse);
	4770	} while (isALNUM_utf8((U8*)RExC_parse));
	4771	else
	4772	do {
	4773	RExC_parse++;
	4774	} while (isALNUM(*RExC_parse));
	4775	}
	4776
	4777	if ( flags ) {
	4778	SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
	4779	(int)(RExC_parse - name_start)));
	4780	if (UTF)
	4781	SvUTF8_on(sv_name);
	4782	if ( flags == REG_RSN_RETURN_NAME)
	4783	return sv_name;
	4784	else if (flags==REG_RSN_RETURN_DATA) {
	4785	HE *he_str = NULL;
	4786	SV *sv_dat = NULL;
	4787	if ( ! sv_name ) /* should not happen*/
	4788	Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
	4789	if (RExC_paren_names)
	4790	he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
	4791	if ( he_str )
	4792	sv_dat = HeVAL(he_str);
	4793	if ( ! sv_dat )
	4794	vFAIL("Reference to nonexistent named group");
	4795	return sv_dat;
	4796	}
	4797	else {
	4798	Perl_croak(aTHX_ "panic: bad flag in reg_scan_name");
	4799	}
	4800	/* NOT REACHED */
	4801	}
	4802	return NULL;
	4803	}
	4804
	4805	#define DEBUG_PARSE_MSG(funcname) DEBUG_PARSE_r({ \
	4806	int rem=(int)(RExC_end - RExC_parse); \
	4807	int cut; \
	4808	int num; \
	4809	int iscut=0; \
	4810	if (rem>10) { \
	4811	rem=10; \
	4812	iscut=1; \
	4813	} \
	4814	cut=10-rem; \
	4815	if (RExC_lastparse!=RExC_parse) \
	4816	PerlIO_printf(Perl_debug_log," >%.s%-s", \
	4817	rem, RExC_parse, \
	4818	cut + 4, \
	4819	iscut ? "..." : "<" \
	4820	); \
	4821	else \
	4822	PerlIO_printf(Perl_debug_log,"%16s",""); \
	4823	\
	4824	if (SIZE_ONLY) \
	4825	num=RExC_size; \
	4826	else \
	4827	num=REG_NODE_NUM(RExC_emit); \
	4828	if (RExC_lastnum!=num) \
	4829	PerlIO_printf(Perl_debug_log,"\|%4d",num); \
	4830	else \
	4831	PerlIO_printf(Perl_debug_log,"\|%4s",""); \
	4832	PerlIO_printf(Perl_debug_log,"\|%*s%-4s", \
	4833	(int)((depth*2)), "", \
	4834	(funcname) \
	4835	); \
	4836	RExC_lastnum=num; \
	4837	RExC_lastparse=RExC_parse; \
	4838	})
	4839
	4840
	4841
	4842	#define DEBUG_PARSE(funcname) DEBUG_PARSE_r({ \
	4843	DEBUG_PARSE_MSG((funcname)); \
	4844	PerlIO_printf(Perl_debug_log,"%4s","\n"); \
	4845	})
	4846	#define DEBUG_PARSE_FMT(funcname,fmt,args) DEBUG_PARSE_r({ \
	4847	DEBUG_PARSE_MSG((funcname)); \
	4848	PerlIO_printf(Perl_debug_log,fmt "\n",args); \
	4849	})
	4850	/*
	4851	- reg - regular expression, i.e. main body or parenthesized thing
	4852	*
	4853	* Caller must absorb opening parenthesis.
	4854	*
	4855	* Combining parenthesis handling with the base level of regular expression
	4856	* is a trifle forced, but the need to tie the tails of the branches to what
	4857	* follows makes it hard to avoid.
	4858	*/
	4859	#define REGTAIL(x,y,z) regtail((x),(y),(z),depth+1)
	4860	#ifdef DEBUGGING
	4861	#define REGTAIL_STUDY(x,y,z) regtail_study((x),(y),(z),depth+1)
	4862	#else
	4863	#define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
	4864	#endif
	4865
	4866	/* this idea is borrowed from STR_WITH_LEN in handy.h */
	4867	#define CHECK_WORD(s,v,l) \
	4868	(((sizeof(s)-1)==(l)) && (strnEQ(start_verb, (s ""), (sizeof(s)-1))))
	4869
	4870	STATIC regnode *
	4871	S_reg(pTHX_ RExC_state_t pRExC_state, I32 paren, I32 flagp,U32 depth)
	4872	/* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
	4873	{
	4874	dVAR;
	4875	register regnode ret; / Will be the head of the group. */
	4876	register regnode *br;
	4877	register regnode *lastbr;
	4878	register regnode *ender = NULL;
	4879	register I32 parno = 0;
	4880	I32 flags;
	4881	const I32 oregflags = RExC_flags;
	4882	bool have_branch = 0;
	4883	bool is_open = 0;
	4884
	4885	/* for (?g), (?gc), and (?o) warnings; warning
	4886	about (?c) will warn about (?g) -- japhy */
	4887
	4888	#define WASTED_O 0x01
	4889	#define WASTED_G 0x02
	4890	#define WASTED_C 0x04
	4891	#define WASTED_GC (0x02\|0x04)
	4892	I32 wastedflags = 0x00;
	4893
	4894	char * parse_start = RExC_parse; /* MJD */
	4895	char * const oregcomp_parse = RExC_parse;
	4896
	4897	GET_RE_DEBUG_FLAGS_DECL;
	4898	DEBUG_PARSE("reg ");
	4899
	4900
	4901	flagp = 0; / Tentatively. */
	4902
	4903
	4904	/* Make an OPEN node, if parenthesized. */
	4905	if (paren) {
	4906	if ( RExC_parse == '') { /* (VERB:ARG) /
	4907	char *start_verb = RExC_parse;
	4908	STRLEN verb_len = 0;
	4909	char *start_arg = NULL;
	4910	unsigned char op = 0;
	4911	int argok = 1;
	4912	int internal_argval = 0; /* internal_argval is only useful if !argok */
	4913	while ( RExC_parse && RExC_parse != ')' ) {
	4914	if ( *RExC_parse == ':' ) {
	4915	start_arg = RExC_parse + 1;
	4916	break;
	4917	}
	4918	RExC_parse++;
	4919	}
	4920	++start_verb;
	4921	verb_len = RExC_parse - start_verb;
	4922	if ( start_arg ) {
	4923	RExC_parse++;
	4924	while ( RExC_parse && RExC_parse != ')' )
	4925	RExC_parse++;
	4926	if ( *RExC_parse != ')' )
	4927	vFAIL("Unterminated verb pattern argument");
	4928	if ( RExC_parse == start_arg )
	4929	start_arg = NULL;
	4930	} else {
	4931	if ( *RExC_parse != ')' )
	4932	vFAIL("Unterminated verb pattern");
	4933	}
	4934
	4935	switch ( *start_verb ) {
	4936	case 'A': /* (ACCEPT) /
	4937	if ( CHECK_WORD("ACCEPT",start_verb,verb_len) ) {
	4938	op = ACCEPT;
	4939	internal_argval = RExC_nestroot;
	4940	}
	4941	break;
	4942	case 'C': /* (COMMIT) /
	4943	if ( CHECK_WORD("COMMIT",start_verb,verb_len) )
	4944	op = COMMIT;
	4945	break;
	4946	case 'F': /* (FAIL) /
	4947	if ( verb_len==1 \|\| CHECK_WORD("FAIL",start_verb,verb_len) ) {
	4948	op = OPFAIL;
	4949	argok = 0;
	4950	}
	4951	break;
	4952	case ':': /* (:NAME) /
	4953	case 'M': /* (MARK:NAME) /
	4954	if ( verb_len==0 \|\| CHECK_WORD("MARK",start_verb,verb_len) ) {
	4955	op = MARKPOINT;
	4956	argok = -1;
	4957	}
	4958	break;
	4959	case 'P': /* (PRUNE) /
	4960	if ( CHECK_WORD("PRUNE",start_verb,verb_len) )
	4961	op = PRUNE;
	4962	break;
	4963	case 'S': /* (SKIP) /
	4964	if ( CHECK_WORD("SKIP",start_verb,verb_len) )
	4965	op = SKIP;
	4966	break;
	4967	case 'T': /* (THEN) /
	4968	/* [19:06] <TimToady> :: is then */
	4969	if ( CHECK_WORD("THEN",start_verb,verb_len) ) {
	4970	op = CUTGROUP;
	4971	RExC_seen \|= REG_SEEN_CUTGROUP;
	4972	}
	4973	break;
	4974	}
	4975	if ( ! op ) {
	4976	RExC_parse++;
	4977	vFAIL3("Unknown verb pattern '%.*s'",
	4978	verb_len, start_verb);
	4979	}
	4980	if ( argok ) {
	4981	if ( start_arg && internal_argval ) {
	4982	vFAIL3("Verb pattern '%.*s' may not have an argument",
	4983	verb_len, start_verb);
	4984	} else if ( argok < 0 && !start_arg ) {
	4985	vFAIL3("Verb pattern '%.*s' has a mandatory argument",
	4986	verb_len, start_verb);
	4987	} else {
	4988	ret = reganode(pRExC_state, op, internal_argval);
	4989	if ( ! internal_argval && ! SIZE_ONLY ) {
	4990	if (start_arg) {
	4991	SV *sv = newSVpvn( start_arg, RExC_parse - start_arg);
	4992	ARG(ret) = add_data( pRExC_state, 1, "S" );
	4993	RExC_rxi->data->data[ARG(ret)]=(void*)sv;
	4994	ret->flags = 0;
	4995	} else {
	4996	ret->flags = 1;
	4997	}
	4998	}
	4999	}
	5000	if (!internal_argval)
	5001	RExC_seen \|= REG_SEEN_VERBARG;
	5002	} else if ( start_arg ) {
	5003	vFAIL3("Verb pattern '%.*s' may not have an argument",
	5004	verb_len, start_verb);
	5005	} else {
	5006	ret = reg_node(pRExC_state, op);
	5007	}
	5008	nextchar(pRExC_state);
	5009	return ret;
	5010	} else
	5011	if (RExC_parse == '?') { / (?...) */
	5012	bool is_logical = 0;
	5013	const char * const seqstart = RExC_parse;
	5014
	5015	RExC_parse++;
	5016	paren = *RExC_parse++;
	5017	ret = NULL; /* For look-ahead/behind. */
	5018	switch (paren) {
	5019
	5020	case 'P': /* (?P...) variants for those used to PCRE/Python */
	5021	paren = *RExC_parse++;
	5022	if ( paren == '<') /* (?P<...>) named capture */
	5023	goto named_capture;
	5024	else if (paren == '>') { /* (?P>name) named recursion */
	5025	goto named_recursion;
	5026	}
	5027	else if (paren == '=') { /* (?P=...) named backref */
	5028	/* this pretty much dupes the code for \k<NAME> in regatom(), if
	5029	you change this make sure you change that */
	5030	char* name_start = RExC_parse;
	5031	U32 num = 0;
	5032	SV *sv_dat = reg_scan_name(pRExC_state,
	5033	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5034	if (RExC_parse == name_start \|\| *RExC_parse != ')')
	5035	vFAIL2("Sequence %.3s... not terminated",parse_start);
	5036
	5037	if (!SIZE_ONLY) {
	5038	num = add_data( pRExC_state, 1, "S" );
	5039	RExC_rxi->data->data[num]=(void*)sv_dat;
	5040	SvREFCNT_inc(sv_dat);
	5041	}
	5042	RExC_sawback = 1;
	5043	ret = reganode(pRExC_state,
	5044	(U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
	5045	num);
	5046	*flagp \|= HASWIDTH;
	5047
	5048	Set_Node_Offset(ret, parse_start+1);
	5049	Set_Node_Cur_Length(ret); /* MJD */
	5050
	5051	nextchar(pRExC_state);
	5052	return ret;
	5053	}
	5054	goto unknown;
	5055	case '<': /* (?<...) */
	5056	if (*RExC_parse == '!')
	5057	paren = ',';
	5058	else if (*RExC_parse != '=')
	5059	named_capture:
	5060	{ /* (?<...>) */
	5061	char *name_start;
	5062	SV *svname;
	5063	paren= '>';
	5064	case '\'': /* (?'...') */
	5065	name_start= RExC_parse;
	5066	svname = reg_scan_name(pRExC_state,
	5067	SIZE_ONLY ? /* reverse test from the others */
	5068	REG_RSN_RETURN_NAME :
	5069	REG_RSN_RETURN_NULL);
	5070	if (RExC_parse == name_start)
	5071	goto unknown;
	5072	if (*RExC_parse != paren)
	5073	vFAIL2("Sequence (?%c... not terminated",
	5074	paren=='>' ? '<' : paren);
	5075	if (SIZE_ONLY) {
	5076	HE *he_str;
	5077	SV *sv_dat = NULL;
	5078	if (!svname) /* shouldnt happen */
	5079	Perl_croak(aTHX_
	5080	"panic: reg_scan_name returned NULL");
	5081	if (!RExC_paren_names) {
	5082	RExC_paren_names= newHV();
	5083	sv_2mortal((SV*)RExC_paren_names);
	5084	#ifdef DEBUGGING
	5085	RExC_paren_name_list= newAV();
	5086	sv_2mortal((SV*)RExC_paren_name_list);
	5087	#endif
	5088	}
	5089	he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
	5090	if ( he_str )
	5091	sv_dat = HeVAL(he_str);
	5092	if ( ! sv_dat ) {
	5093	/* croak baby croak */
	5094	Perl_croak(aTHX_
	5095	"panic: paren_name hash element allocation failed");
	5096	} else if ( SvPOK(sv_dat) ) {
	5097	IV count=SvIV(sv_dat);
	5098	I32 pv=(I32)SvGROW(sv_dat,SvCUR(sv_dat)+sizeof(I32)+1);
	5099	SvCUR_set(sv_dat,SvCUR(sv_dat)+sizeof(I32));
	5100	pv[count]=RExC_npar;
	5101	SvIVX(sv_dat)++;
	5102	} else {
	5103	(void)SvUPGRADE(sv_dat,SVt_PVNV);
	5104	sv_setpvn(sv_dat, (char *)&(RExC_npar), sizeof(I32));
	5105	SvIOK_on(sv_dat);
	5106	SvIVX(sv_dat)= 1;
	5107	}
	5108	#ifdef DEBUGGING
	5109	if (!av_store(RExC_paren_name_list, RExC_npar, SvREFCNT_inc(svname)))
	5110	SvREFCNT_dec(svname);
	5111	#endif
	5112
	5113	/sv_dump(sv_dat);/
	5114	}
	5115	nextchar(pRExC_state);
	5116	paren = 1;
	5117	goto capturing_parens;
	5118	}
	5119	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	5120	RExC_parse++;
	5121	case '=': /* (?=...) */
	5122	case '!': /* (?!...) */
	5123	RExC_seen_zerolen++;
	5124	if (*RExC_parse == ')') {
	5125	ret=reg_node(pRExC_state, OPFAIL);
	5126	nextchar(pRExC_state);
	5127	return ret;
	5128	}
	5129	case ':': /* (?:...) */
	5130	case '>': /* (?>...) */
	5131	break;
	5132	case '$': /* (?$...) */
	5133	case '@': /* (?@...) */
	5134	vFAIL2("Sequence (?%c...) not implemented", (int)paren);
	5135	break;
	5136	case '#': /* (?#...) */
	5137	while (RExC_parse && RExC_parse != ')')
	5138	RExC_parse++;
	5139	if (*RExC_parse != ')')
	5140	FAIL("Sequence (?#... not terminated");
	5141	nextchar(pRExC_state);
	5142	*flagp = TRYAGAIN;
	5143	return NULL;
	5144	case '0' : /* (?0) */
	5145	case 'R' : /* (?R) */
	5146	if (*RExC_parse != ')')
	5147	FAIL("Sequence (?R) not terminated");
	5148	ret = reg_node(pRExC_state, GOSTART);
	5149	nextchar(pRExC_state);
	5150	return ret;
	5151	/notreached/
	5152	{ /* named and numeric backreferences */
	5153	I32 num;
	5154	case '&': /* (?&NAME) */
	5155	parse_start = RExC_parse - 1;
	5156	named_recursion:
	5157	{
	5158	SV *sv_dat = reg_scan_name(pRExC_state,
	5159	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5160	num = sv_dat ? ((I32 )SvPVX(sv_dat)) : 0;
	5161	}
	5162	goto gen_recurse_regop;
	5163	/* NOT REACHED */
	5164	case '+':
	5165	if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
	5166	RExC_parse++;
	5167	vFAIL("Illegal pattern");
	5168	}
	5169	goto parse_recursion;
	5170	/* NOT REACHED*/
	5171	case '-': /* (?-1) */
	5172	if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
	5173	RExC_parse--; /* rewind to let it be handled later */
	5174	goto parse_flags;
	5175	}
	5176	/FALLTHROUGH /
	5177	case '1': case '2': case '3': case '4': /* (?1) */
	5178	case '5': case '6': case '7': case '8': case '9':
	5179	RExC_parse--;
	5180	parse_recursion:
	5181	num = atoi(RExC_parse);
	5182	parse_start = RExC_parse - 1; /* MJD */
	5183	if (*RExC_parse == '-')
	5184	RExC_parse++;
	5185	while (isDIGIT(*RExC_parse))
	5186	RExC_parse++;
	5187	if (*RExC_parse!=')')
	5188	vFAIL("Expecting close bracket");
	5189
	5190	gen_recurse_regop:
	5191	if ( paren == '-' ) {
	5192	/*
	5193	Diagram of capture buffer numbering.
	5194	Top line is the normal capture buffer numbers
	5195	Botton line is the negative indexing as from
	5196	the X (the (?-2))
	5197
	5198	+ 1 2 3 4 5 X 6 7
	5199	/(a(x)y)(a(b(c(?-2)d)e)f)(g(h))/
	5200	- 5 4 3 2 1 X x x
	5201
	5202	*/
	5203	num = RExC_npar + num;
	5204	if (num < 1) {
	5205	RExC_parse++;
	5206	vFAIL("Reference to nonexistent group");
	5207	}
	5208	} else if ( paren == '+' ) {
	5209	num = RExC_npar + num - 1;
	5210	}
	5211
	5212	ret = reganode(pRExC_state, GOSUB, num);
	5213	if (!SIZE_ONLY) {
	5214	if (num > (I32)RExC_rx->nparens) {
	5215	RExC_parse++;
	5216	vFAIL("Reference to nonexistent group");
	5217	}
	5218	ARG2L_SET( ret, RExC_recurse_count++);
	5219	RExC_emit++;
	5220	DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
	5221	"Recurse #%"UVuf" to %"IVdf"\n", (UV)ARG(ret), (IV)ARG2L(ret)));
	5222	} else {
	5223	RExC_size++;
	5224	}
	5225	RExC_seen \|= REG_SEEN_RECURSE;
	5226	Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
	5227	Set_Node_Offset(ret, parse_start); /* MJD */
	5228
	5229	nextchar(pRExC_state);
	5230	return ret;
	5231	} /* named and numeric backreferences */
	5232	/* NOT REACHED */
	5233
	5234	case 'p': /* (?p...) */
	5235	if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
	5236	vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
	5237	/* FALL THROUGH*/
	5238	case '?': /* (??...) */
	5239	is_logical = 1;
	5240	if (*RExC_parse != '{')
	5241	goto unknown;
	5242	paren = *RExC_parse++;
	5243	/* FALL THROUGH */
	5244	case '{': /* (?{...}) */
	5245	{
	5246	I32 count = 1;
	5247	U32 n = 0;
	5248	char c;
	5249	char *s = RExC_parse;
	5250
	5251	RExC_seen_zerolen++;
	5252	RExC_seen \|= REG_SEEN_EVAL;
	5253	while (count && (c = *RExC_parse)) {
	5254	if (c == '\\') {
	5255	if (RExC_parse[1])
	5256	RExC_parse++;
	5257	}
	5258	else if (c == '{')
	5259	count++;
	5260	else if (c == '}')
	5261	count--;
	5262	RExC_parse++;
	5263	}
	5264	if (*RExC_parse != ')') {
	5265	RExC_parse = s;
	5266	vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
	5267	}
	5268	if (!SIZE_ONLY) {
	5269	PAD *pad;
	5270	OP_4tree sop, rop;
	5271	SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
	5272
	5273	ENTER;
	5274	Perl_save_re_context(aTHX);
	5275	rop = sv_compile_2op(sv, &sop, "re", &pad);
	5276	sop->op_private \|= OPpREFCOUNTED;
	5277	/* re_dup will OpREFCNT_inc */
	5278	OpREFCNT_set(sop, 1);
	5279	LEAVE;
	5280
	5281	n = add_data(pRExC_state, 3, "nop");
	5282	RExC_rxi->data->data[n] = (void*)rop;
	5283	RExC_rxi->data->data[n+1] = (void*)sop;
	5284	RExC_rxi->data->data[n+2] = (void*)pad;
	5285	SvREFCNT_dec(sv);
	5286	}
	5287	else { /* First pass */
	5288	if (PL_reginterp_cnt < ++RExC_seen_evals
	5289	&& IN_PERL_RUNTIME)
	5290	/* No compiled RE interpolated, has runtime
	5291	components ===> unsafe. */
	5292	FAIL("Eval-group not allowed at runtime, use re 'eval'");
	5293	if (PL_tainting && PL_tainted)
	5294	FAIL("Eval-group in insecure regular expression");
	5295	#if PERL_VERSION > 8
	5296	if (IN_PERL_COMPILETIME)
	5297	PL_cv_has_eval = 1;
	5298	#endif
	5299	}
	5300
	5301	nextchar(pRExC_state);
	5302	if (is_logical) {
	5303	ret = reg_node(pRExC_state, LOGICAL);
	5304	if (!SIZE_ONLY)
	5305	ret->flags = 2;
	5306	REGTAIL(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
	5307	/* deal with the length of this later - MJD */
	5308	return ret;
	5309	}
	5310	ret = reganode(pRExC_state, EVAL, n);
	5311	Set_Node_Length(ret, RExC_parse - parse_start + 1);
	5312	Set_Node_Offset(ret, parse_start);
	5313	return ret;
	5314	}
	5315	case '(': /* (?(?{...})...) and (?(?=...)...) */
	5316	{
	5317	int is_define= 0;
	5318	if (RExC_parse[0] == '?') { /* (?(?...)) */
	5319	if (RExC_parse[1] == '=' \|\| RExC_parse[1] == '!'
	5320	\|\| RExC_parse[1] == '<'
	5321	\|\| RExC_parse[1] == '{') { /* Lookahead or eval. */
	5322	I32 flag;
	5323
	5324	ret = reg_node(pRExC_state, LOGICAL);
	5325	if (!SIZE_ONLY)
	5326	ret->flags = 1;
	5327	REGTAIL(pRExC_state, ret, reg(pRExC_state, 1, &flag,depth+1));
	5328	goto insert_if;
	5329	}
	5330	}
	5331	else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */
	5332	\|\| RExC_parse[0] == '\'' ) /* (?('NAME')...) */
	5333	{
	5334	char ch = RExC_parse[0] == '<' ? '>' : '\'';
	5335	char *name_start= RExC_parse++;
	5336	U32 num = 0;
	5337	SV *sv_dat=reg_scan_name(pRExC_state,
	5338	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5339	if (RExC_parse == name_start \|\| *RExC_parse != ch)
	5340	vFAIL2("Sequence (?(%c... not terminated",
	5341	(ch == '>' ? '<' : ch));
	5342	RExC_parse++;
	5343	if (!SIZE_ONLY) {
	5344	num = add_data( pRExC_state, 1, "S" );
	5345	RExC_rxi->data->data[num]=(void*)sv_dat;
	5346	SvREFCNT_inc(sv_dat);
	5347	}
	5348	ret = reganode(pRExC_state,NGROUPP,num);
	5349	goto insert_if_check_paren;
	5350	}
	5351	else if (RExC_parse[0] == 'D' &&
	5352	RExC_parse[1] == 'E' &&
	5353	RExC_parse[2] == 'F' &&
	5354	RExC_parse[3] == 'I' &&
	5355	RExC_parse[4] == 'N' &&
	5356	RExC_parse[5] == 'E')
	5357	{
	5358	ret = reganode(pRExC_state,DEFINEP,0);
	5359	RExC_parse +=6 ;
	5360	is_define = 1;
	5361	goto insert_if_check_paren;
	5362	}
	5363	else if (RExC_parse[0] == 'R') {
	5364	RExC_parse++;
	5365	parno = 0;
	5366	if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
	5367	parno = atoi(RExC_parse++);
	5368	while (isDIGIT(*RExC_parse))
	5369	RExC_parse++;
	5370	} else if (RExC_parse[0] == '&') {
	5371	SV *sv_dat;
	5372	RExC_parse++;
	5373	sv_dat = reg_scan_name(pRExC_state,
	5374	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5375	parno = sv_dat ? ((I32 )SvPVX(sv_dat)) : 0;
	5376	}
	5377	ret = reganode(pRExC_state,INSUBP,parno);
	5378	goto insert_if_check_paren;
	5379	}
	5380	else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
	5381	/* (?(1)...) */
	5382	char c;
	5383	parno = atoi(RExC_parse++);
	5384
	5385	while (isDIGIT(*RExC_parse))
	5386	RExC_parse++;
	5387	ret = reganode(pRExC_state, GROUPP, parno);
	5388
	5389	insert_if_check_paren:
	5390	if ((c = *nextchar(pRExC_state)) != ')')
	5391	vFAIL("Switch condition not recognized");
	5392	insert_if:
	5393	REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
	5394	br = regbranch(pRExC_state, &flags, 1,depth+1);
	5395	if (br == NULL)
	5396	br = reganode(pRExC_state, LONGJMP, 0);
	5397	else
	5398	REGTAIL(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0));
	5399	c = *nextchar(pRExC_state);
	5400	if (flags&HASWIDTH)
	5401	*flagp \|= HASWIDTH;
	5402	if (c == '\|') {
	5403	if (is_define)
	5404	vFAIL("(?(DEFINE)....) does not allow branches");
	5405	lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one for optimizer. */
	5406	regbranch(pRExC_state, &flags, 1,depth+1);
	5407	REGTAIL(pRExC_state, ret, lastbr);
	5408	if (flags&HASWIDTH)
	5409	*flagp \|= HASWIDTH;
	5410	c = *nextchar(pRExC_state);
	5411	}
	5412	else
	5413	lastbr = NULL;
	5414	if (c != ')')
	5415	vFAIL("Switch (?(condition)... contains too many branches");
	5416	ender = reg_node(pRExC_state, TAIL);
	5417	REGTAIL(pRExC_state, br, ender);
	5418	if (lastbr) {
	5419	REGTAIL(pRExC_state, lastbr, ender);
	5420	REGTAIL(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender);
	5421	}
	5422	else
	5423	REGTAIL(pRExC_state, ret, ender);
	5424	return ret;
	5425	}
	5426	else {
	5427	vFAIL2("Unknown switch condition (?(%.2s", RExC_parse);
	5428	}
	5429	}
	5430	case 0:
	5431	RExC_parse--; /* for vFAIL to print correctly */
	5432	vFAIL("Sequence (? incomplete");
	5433	break;
	5434	default:
	5435	--RExC_parse;
	5436	parse_flags: /* (?i) */
	5437	{
	5438	U32 posflags = 0, negflags = 0;
	5439	U32 *flagsp = &posflags;
	5440
	5441	while (*RExC_parse) {
	5442	/* && strchr("iogcmsx", RExC_parse) /
	5443	/* (?g), (?gc) and (?o) are useless here
	5444	and must be globally applied -- japhy */
	5445	switch (*RExC_parse) {
	5446	CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
	5447	case 'o':
	5448	case 'g':
	5449	if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	5450	const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
	5451	if (! (wastedflags & wflagbit) ) {
	5452	wastedflags \|= wflagbit;
	5453	vWARN5(
	5454	RExC_parse + 1,
	5455	"Useless (%s%c) - %suse /%c modifier",
	5456	flagsp == &negflags ? "?-" : "?",
	5457	*RExC_parse,
	5458	flagsp == &negflags ? "don't " : "",
	5459	*RExC_parse
	5460	);
	5461	}
	5462	}
	5463	break;
	5464
	5465	case 'c':
	5466	if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	5467	if (! (wastedflags & WASTED_C) ) {
	5468	wastedflags \|= WASTED_GC;
	5469	vWARN3(
	5470	RExC_parse + 1,
	5471	"Useless (%sc) - %suse /gc modifier",
	5472	flagsp == &negflags ? "?-" : "?",
	5473	flagsp == &negflags ? "don't " : ""
	5474	);
	5475	}
	5476	}
	5477	break;
	5478	case 'k':
	5479	if (flagsp == &negflags) {
	5480	if (SIZE_ONLY && ckWARN(WARN_REGEXP))
	5481	vWARN(RExC_parse + 1,"Useless use of (?-k)");
	5482	} else {
	5483	*flagsp \|= RXf_PMf_KEEPCOPY;
	5484	}
	5485	break;
	5486	case '-':
	5487	if (flagsp == &negflags)
	5488	goto unknown;
	5489	flagsp = &negflags;
	5490	wastedflags = 0; /* reset so (?g-c) warns twice */
	5491	break;
	5492	case ':':
	5493	paren = ':';
	5494	/FALLTHROUGH/
	5495	case ')':
	5496	RExC_flags \|= posflags;
	5497	RExC_flags &= ~negflags;
	5498	nextchar(pRExC_state);
	5499	if (paren != ':') {
	5500	*flagp = TRYAGAIN;
	5501	return NULL;
	5502	} else {
	5503	ret = NULL;
	5504	goto parse_rest;
	5505	}
	5506	/NOTREACHED/
	5507	default:
	5508	unknown:
	5509	RExC_parse++;
	5510	vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
	5511	/NOTREACHED/
	5512	}
	5513	++RExC_parse;
	5514	}
	5515	}} /* one for the default block, one for the switch */
	5516	}
	5517	else { /* (...) */
	5518	capturing_parens:
	5519	parno = RExC_npar;
	5520	RExC_npar++;
	5521
	5522	ret = reganode(pRExC_state, OPEN, parno);
	5523	if (!SIZE_ONLY ){
	5524	if (!RExC_nestroot)
	5525	RExC_nestroot = parno;
	5526	if (RExC_seen & REG_SEEN_RECURSE) {
	5527	DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
	5528	"Setting open paren #%"IVdf" to %d\n",
	5529	(IV)parno, REG_NODE_NUM(ret)));
	5530	RExC_open_parens[parno-1]= ret;
	5531	}
	5532	}
	5533	Set_Node_Length(ret, 1); /* MJD */
	5534	Set_Node_Offset(ret, RExC_parse); /* MJD */
	5535	is_open = 1;
	5536	}
	5537	}
	5538	else /* ! paren */
	5539	ret = NULL;
	5540
	5541	parse_rest:
	5542	/* Pick up the branches, linking them together. */
	5543	parse_start = RExC_parse; /* MJD */
	5544	br = regbranch(pRExC_state, &flags, 1,depth+1);
	5545	/* branch_len = (paren != 0); */
	5546
	5547	if (br == NULL)
	5548	return(NULL);
	5549	if (*RExC_parse == '\|') {
	5550	if (!SIZE_ONLY && RExC_extralen) {
	5551	reginsert(pRExC_state, BRANCHJ, br, depth+1);
	5552	}
	5553	else { /* MJD */
	5554	reginsert(pRExC_state, BRANCH, br, depth+1);
	5555	Set_Node_Length(br, paren != 0);
	5556	Set_Node_Offset_To_R(br-RExC_emit_start, parse_start-RExC_start);
	5557	}
	5558	have_branch = 1;
	5559	if (SIZE_ONLY)
	5560	RExC_extralen += 1; /* For BRANCHJ-BRANCH. */
	5561	}
	5562	else if (paren == ':') {
	5563	*flagp \|= flags&SIMPLE;
	5564	}
	5565	if (is_open) { /* Starts with OPEN. */
	5566	REGTAIL(pRExC_state, ret, br); /* OPEN -> first. */
	5567	}
	5568	else if (paren != '?') /* Not Conditional */
	5569	ret = br;
	5570	*flagp \|= flags & (SPSTART \| HASWIDTH);
	5571	lastbr = br;
	5572	while (*RExC_parse == '\|') {
	5573	if (!SIZE_ONLY && RExC_extralen) {
	5574	ender = reganode(pRExC_state, LONGJMP,0);
	5575	REGTAIL(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender); /* Append to the previous. */
	5576	}
	5577	if (SIZE_ONLY)
	5578	RExC_extralen += 2; /* Account for LONGJMP. */
	5579	nextchar(pRExC_state);
	5580	br = regbranch(pRExC_state, &flags, 0, depth+1);
	5581
	5582	if (br == NULL)
	5583	return(NULL);
	5584	REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */
	5585	lastbr = br;
	5586	if (flags&HASWIDTH)
	5587	*flagp \|= HASWIDTH;
	5588	*flagp \|= flags&SPSTART;
	5589	}
	5590
	5591	if (have_branch \|\| paren != ':') {
	5592	/* Make a closing node, and hook it on the end. */
	5593	switch (paren) {
	5594	case ':':
	5595	ender = reg_node(pRExC_state, TAIL);
	5596	break;
	5597	case 1:
	5598	ender = reganode(pRExC_state, CLOSE, parno);
	5599	if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
	5600	DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
	5601	"Setting close paren #%"IVdf" to %d\n",
	5602	(IV)parno, REG_NODE_NUM(ender)));
	5603	RExC_close_parens[parno-1]= ender;
	5604	if (RExC_nestroot == parno)
	5605	RExC_nestroot = 0;
	5606	}
	5607	Set_Node_Offset(ender,RExC_parse+1); /* MJD */
	5608	Set_Node_Length(ender,1); /* MJD */
	5609	break;
	5610	case '<':
	5611	case ',':
	5612	case '=':
	5613	case '!':
	5614	*flagp &= ~HASWIDTH;
	5615	/* FALL THROUGH */
	5616	case '>':
	5617	ender = reg_node(pRExC_state, SUCCEED);
	5618	break;
	5619	case 0:
	5620	ender = reg_node(pRExC_state, END);
	5621	if (!SIZE_ONLY) {
	5622	assert(!RExC_opend); /* there can only be one! */
	5623	RExC_opend = ender;
	5624	}
	5625	break;
	5626	}
	5627	REGTAIL(pRExC_state, lastbr, ender);
	5628
	5629	if (have_branch && !SIZE_ONLY) {
	5630	if (depth==1)
	5631	RExC_seen \|= REG_TOP_LEVEL_BRANCHES;
	5632
	5633	/* Hook the tails of the branches to the closing node. */
	5634	for (br = ret; br; br = regnext(br)) {
	5635	const U8 op = PL_regkind[OP(br)];
	5636	if (op == BRANCH) {
	5637	REGTAIL_STUDY(pRExC_state, NEXTOPER(br), ender);
	5638	}
	5639	else if (op == BRANCHJ) {
	5640	REGTAIL_STUDY(pRExC_state, NEXTOPER(NEXTOPER(br)), ender);
	5641	}
	5642	}
	5643	}
	5644	}
	5645
	5646	{
	5647	const char *p;
	5648	static const char parens[] = "=!<,>";
	5649
	5650	if (paren && (p = strchr(parens, paren))) {
	5651	U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
	5652	int flag = (p - parens) > 1;
	5653
	5654	if (paren == '>')
	5655	node = SUSPEND, flag = 0;
	5656	reginsert(pRExC_state, node,ret, depth+1);
	5657	Set_Node_Cur_Length(ret);
	5658	Set_Node_Offset(ret, parse_start + 1);
	5659	ret->flags = flag;
	5660	REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
	5661	}
	5662	}
	5663
	5664	/* Check for proper termination. */
	5665	if (paren) {
	5666	RExC_flags = oregflags;
	5667	if (RExC_parse >= RExC_end \|\| *nextchar(pRExC_state) != ')') {
	5668	RExC_parse = oregcomp_parse;
	5669	vFAIL("Unmatched (");
	5670	}
	5671	}
	5672	else if (!paren && RExC_parse < RExC_end) {
	5673	if (*RExC_parse == ')') {
	5674	RExC_parse++;
	5675	vFAIL("Unmatched )");
	5676	}
	5677	else
	5678	FAIL("Junk on end of regexp"); /* "Can't happen". */
	5679	/* NOTREACHED */
	5680	}
	5681
	5682	return(ret);
	5683	}
	5684
	5685	/*
	5686	- regbranch - one alternative of an \| operator
	5687	*
	5688	* Implements the concatenation operator.
	5689	*/
	5690	STATIC regnode *
	5691	S_regbranch(pTHX_ RExC_state_t pRExC_state, I32 flagp, I32 first, U32 depth)
	5692	{
	5693	dVAR;
	5694	register regnode *ret;
	5695	register regnode *chain = NULL;
	5696	register regnode *latest;
	5697	I32 flags = 0, c = 0;
	5698	GET_RE_DEBUG_FLAGS_DECL;
	5699	DEBUG_PARSE("brnc");
	5700	if (first)
	5701	ret = NULL;
	5702	else {
	5703	if (!SIZE_ONLY && RExC_extralen)
	5704	ret = reganode(pRExC_state, BRANCHJ,0);
	5705	else {
	5706	ret = reg_node(pRExC_state, BRANCH);
	5707	Set_Node_Length(ret, 1);
	5708	}
	5709	}
	5710
	5711	if (!first && SIZE_ONLY)
	5712	RExC_extralen += 1; /* BRANCHJ */
	5713
	5714	flagp = WORST; / Tentatively. */
	5715
	5716	RExC_parse--;
	5717	nextchar(pRExC_state);
	5718	while (RExC_parse < RExC_end && RExC_parse != '\|' && RExC_parse != ')') {
	5719	flags &= ~TRYAGAIN;
	5720	latest = regpiece(pRExC_state, &flags,depth+1);
	5721	if (latest == NULL) {
	5722	if (flags & TRYAGAIN)
	5723	continue;
	5724	return(NULL);
	5725	}
	5726	else if (ret == NULL)
	5727	ret = latest;
	5728	*flagp \|= flags&HASWIDTH;
	5729	if (chain == NULL) /* First piece. */
	5730	*flagp \|= flags&SPSTART;
	5731	else {
	5732	RExC_naughty++;
	5733	REGTAIL(pRExC_state, chain, latest);
	5734	}
	5735	chain = latest;
	5736	c++;
	5737	}
	5738	if (chain == NULL) { /* Loop ran zero times. */
	5739	chain = reg_node(pRExC_state, NOTHING);
	5740	if (ret == NULL)
	5741	ret = chain;
	5742	}
	5743	if (c == 1) {
	5744	*flagp \|= flags&SIMPLE;
	5745	}
	5746
	5747	return ret;
	5748	}
	5749
	5750	/*
	5751	- regpiece - something followed by possible [*+?]
	5752	*
	5753	* Note that the branching code sequences used for ? and the general cases
	5754	* of * and + are somewhat optimized: they use the same NOTHING node as
	5755	* both the endmarker for their branch list and the body of the last branch.
	5756	* It might seem that this node could be dispensed with entirely, but the
	5757	* endmarker role is not redundant.
	5758	*/
	5759	STATIC regnode *
	5760	S_regpiece(pTHX_ RExC_state_t pRExC_state, I32 flagp, U32 depth)
	5761	{
	5762	dVAR;
	5763	register regnode *ret;
	5764	register char op;
	5765	register char *next;
	5766	I32 flags;
	5767	const char * const origparse = RExC_parse;
	5768	I32 min;
	5769	I32 max = REG_INFTY;
	5770	char *parse_start;
	5771	const char *maxpos = NULL;
	5772	GET_RE_DEBUG_FLAGS_DECL;
	5773	DEBUG_PARSE("piec");
	5774
	5775	ret = regatom(pRExC_state, &flags,depth+1);
	5776	if (ret == NULL) {
	5777	if (flags & TRYAGAIN)
	5778	*flagp \|= TRYAGAIN;
	5779	return(NULL);
	5780	}
	5781
	5782	op = *RExC_parse;
	5783
	5784	if (op == '{' && regcurly(RExC_parse)) {
	5785	maxpos = NULL;
	5786	parse_start = RExC_parse; /* MJD */
	5787	next = RExC_parse + 1;
	5788	while (isDIGIT(next) \|\| next == ',') {
	5789	if (*next == ',') {
	5790	if (maxpos)
	5791	break;
	5792	else
	5793	maxpos = next;
	5794	}
	5795	next++;
	5796	}
	5797	if (next == '}') { / got one */
	5798	if (!maxpos)
	5799	maxpos = next;
	5800	RExC_parse++;
	5801	min = atoi(RExC_parse);
	5802	if (*maxpos == ',')
	5803	maxpos++;
	5804	else
	5805	maxpos = RExC_parse;
	5806	max = atoi(maxpos);
	5807	if (!max && *maxpos != '0')
	5808	max = REG_INFTY; /* meaning "infinity" */
	5809	else if (max >= REG_INFTY)
	5810	vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
	5811	RExC_parse = next;
	5812	nextchar(pRExC_state);
	5813
	5814	do_curly:
	5815	if ((flags&SIMPLE)) {
	5816	RExC_naughty += 2 + RExC_naughty / 2;
	5817	reginsert(pRExC_state, CURLY, ret, depth+1);
	5818	Set_Node_Offset(ret, parse_start+1); /* MJD */
	5819	Set_Node_Cur_Length(ret);
	5820	}
	5821	else {
	5822	regnode * const w = reg_node(pRExC_state, WHILEM);
	5823
	5824	w->flags = 0;
	5825	REGTAIL(pRExC_state, ret, w);
	5826	if (!SIZE_ONLY && RExC_extralen) {
	5827	reginsert(pRExC_state, LONGJMP,ret, depth+1);
	5828	reginsert(pRExC_state, NOTHING,ret, depth+1);
	5829	NEXT_OFF(ret) = 3; /* Go over LONGJMP. */
	5830	}
	5831	reginsert(pRExC_state, CURLYX,ret, depth+1);
	5832	/* MJD hk */
	5833	Set_Node_Offset(ret, parse_start+1);
	5834	Set_Node_Length(ret,
	5835	op == '{' ? (RExC_parse - parse_start) : 1);
	5836
	5837	if (!SIZE_ONLY && RExC_extralen)
	5838	NEXT_OFF(ret) = 3; /* Go over NOTHING to LONGJMP. */
	5839	REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
	5840	if (SIZE_ONLY)
	5841	RExC_whilem_seen++, RExC_extralen += 3;
	5842	RExC_naughty += 4 + RExC_naughty; /* compound interest */
	5843	}
	5844	ret->flags = 0;
	5845
	5846	if (min > 0)
	5847	*flagp = WORST;
	5848	if (max > 0)
	5849	*flagp \|= HASWIDTH;
	5850	if (max && max < min)
	5851	vFAIL("Can't do {n,m} with n > m");
	5852	if (!SIZE_ONLY) {
	5853	ARG1_SET(ret, (U16)min);
	5854	ARG2_SET(ret, (U16)max);
	5855	}
	5856
	5857	goto nest_check;
	5858	}
	5859	}
	5860
	5861	if (!ISMULT1(op)) {
	5862	*flagp = flags;
	5863	return(ret);
	5864	}
	5865
	5866	#if 0 /* Now runtime fix should be reliable. */
	5867
	5868	/* if this is reinstated, don't forget to put this back into perldiag:
	5869
	5870	=item Regexp *+ operand could be empty at {#} in regex m/%s/
	5871
	5872	(F) The part of the regexp subject to either the * or + quantifier
	5873	could match an empty string. The {#} shows in the regular
	5874	expression about where the problem was discovered.
	5875
	5876	*/
	5877
	5878	if (!(flags&HASWIDTH) && op != '?')
	5879	vFAIL("Regexp *+ operand could be empty");
	5880	#endif
	5881
	5882	parse_start = RExC_parse;
	5883	nextchar(pRExC_state);
	5884
	5885	*flagp = (op != '+') ? (WORST\|SPSTART\|HASWIDTH) : (WORST\|HASWIDTH);
	5886
	5887	if (op == '*' && (flags&SIMPLE)) {
	5888	reginsert(pRExC_state, STAR, ret, depth+1);
	5889	ret->flags = 0;
	5890	RExC_naughty += 4;
	5891	}
	5892	else if (op == '*') {
	5893	min = 0;
	5894	goto do_curly;
	5895	}
	5896	else if (op == '+' && (flags&SIMPLE)) {
	5897	reginsert(pRExC_state, PLUS, ret, depth+1);
	5898	ret->flags = 0;
	5899	RExC_naughty += 3;
	5900	}
	5901	else if (op == '+') {
	5902	min = 1;
	5903	goto do_curly;
	5904	}
	5905	else if (op == '?') {
	5906	min = 0; max = 1;
	5907	goto do_curly;
	5908	}
	5909	nest_check:
	5910	if (!SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
	5911	vWARN3(RExC_parse,
	5912	"%.*s matches null string many times",
	5913	(int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
	5914	origparse);
	5915	}
	5916
	5917	if (RExC_parse < RExC_end && *RExC_parse == '?') {
	5918	nextchar(pRExC_state);
	5919	reginsert(pRExC_state, MINMOD, ret, depth+1);
	5920	REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
	5921	}
	5922	#ifndef REG_ALLOW_MINMOD_SUSPEND
	5923	else
	5924	#endif
	5925	if (RExC_parse < RExC_end && *RExC_parse == '+') {
	5926	regnode *ender;
	5927	nextchar(pRExC_state);
	5928	ender = reg_node(pRExC_state, SUCCEED);
	5929	REGTAIL(pRExC_state, ret, ender);
	5930	reginsert(pRExC_state, SUSPEND, ret, depth+1);
	5931	ret->flags = 0;
	5932	ender = reg_node(pRExC_state, TAIL);
	5933	REGTAIL(pRExC_state, ret, ender);
	5934	/ret= ender;/
	5935	}
	5936
	5937	if (RExC_parse < RExC_end && ISMULT2(RExC_parse)) {
	5938	RExC_parse++;
	5939	vFAIL("Nested quantifiers");
	5940	}
	5941
	5942	return(ret);
	5943	}
	5944
	5945
	5946	/* reg_namedseq(pRExC_state,UVp)
	5947
	5948	This is expected to be called by a parser routine that has
	5949	recognized'\N' and needs to handle the rest. RExC_parse is
	5950	expected to point at the first char following the N at the time
	5951	of the call.
	5952
	5953	If valuep is non-null then it is assumed that we are parsing inside
	5954	of a charclass definition and the first codepoint in the resolved
	5955	string is returned via *valuep and the routine will return NULL.
	5956	In this mode if a multichar string is returned from the charnames
	5957	handler a warning will be issued, and only the first char in the
	5958	sequence will be examined. If the string returned is zero length
	5959	then the value of *valuep is undefined and NON-NULL will
	5960	be returned to indicate failure. (This will NOT be a valid pointer
	5961	to a regnode.)
	5962
	5963	If value is null then it is assumed that we are parsing normal text
	5964	and inserts a new EXACT node into the program containing the resolved
	5965	string and returns a pointer to the new node. If the string is
	5966	zerolength a NOTHING node is emitted.
	5967
	5968	On success RExC_parse is set to the char following the endbrace.
	5969	Parsing failures will generate a fatal errorvia vFAIL(...)
	5970
	5971	NOTE: We cache all results from the charnames handler locally in
	5972	the RExC_charnames hash (created on first use) to prevent a charnames
	5973	handler from playing silly-buggers and returning a short string and
	5974	then a long string for a given pattern. Since the regexp program
	5975	size is calculated during an initial parse this would result
	5976	in a buffer overrun so we cache to prevent the charname result from
	5977	changing during the course of the parse.
	5978
	5979	*/
	5980	STATIC regnode *
	5981	S_reg_namedseq(pTHX_ RExC_state_t pRExC_state, UV valuep)
	5982	{
	5983	char * name; /* start of the content of the name */
	5984	char * endbrace; /* endbrace following the name */
	5985	SV *sv_str = NULL;
	5986	SV *sv_name = NULL;
	5987	STRLEN len; /* this has various purposes throughout the code */
	5988	bool cached = 0; /* if this is true then we shouldn't refcount dev sv_str */
	5989	regnode *ret = NULL;
	5990
	5991	if (*RExC_parse != '{') {
	5992	vFAIL("Missing braces on \\N{}");
	5993	}
	5994	name = RExC_parse+1;
	5995	endbrace = strchr(RExC_parse, '}');
	5996	if ( ! endbrace ) {
	5997	RExC_parse++;
	5998	vFAIL("Missing right brace on \\N{}");
	5999	}
	6000	RExC_parse = endbrace + 1;
	6001
	6002
	6003	/* RExC_parse points at the beginning brace,
	6004	endbrace points at the last */
	6005	if ( name[0]=='U' && name[1]=='+' ) {
	6006	/* its a "unicode hex" notation {U+89AB} */
	6007	I32 fl = PERL_SCAN_ALLOW_UNDERSCORES
	6008	\| PERL_SCAN_DISALLOW_PREFIX
	6009	\| (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
	6010	UV cp;
	6011	len = (STRLEN)(endbrace - name - 2);
	6012	cp = grok_hex(name + 2, &len, &fl, NULL);
	6013	if ( len != (STRLEN)(endbrace - name - 2) ) {
	6014	cp = 0xFFFD;
	6015	}
	6016	if (cp > 0xff)
	6017	RExC_utf8 = 1;
	6018	if ( valuep ) {
	6019	*valuep = cp;
	6020	return NULL;
	6021	}
	6022	sv_str= Perl_newSVpvf_nocontext("%c",(int)cp);
	6023	} else {
	6024	/* fetch the charnames handler for this scope */
	6025	HV * const table = GvHV(PL_hintgv);
	6026	SV **cvp= table ?
	6027	hv_fetchs(table, "charnames", FALSE) :
	6028	NULL;
	6029	SV cv= cvp ? cvp : NULL;
	6030	HE *he_str;
	6031	int count;
	6032	/* create an SV with the name as argument */
	6033	sv_name = newSVpvn(name, endbrace - name);
	6034
	6035	if (!table \|\| !(PL_hints & HINT_LOCALIZE_HH)) {
	6036	vFAIL2("Constant(\\N{%s}) unknown: "
	6037	"(possibly a missing \"use charnames ...\")",
	6038	SvPVX(sv_name));
	6039	}
	6040	if (!cvp \|\| !SvOK(cvp)) { / when $^H{charnames} = undef; */
	6041	vFAIL2("Constant(\\N{%s}): "
	6042	"$^H{charnames} is not defined",SvPVX(sv_name));
	6043	}
	6044
	6045
	6046
	6047	if (!RExC_charnames) {
	6048	/* make sure our cache is allocated */
	6049	RExC_charnames = newHV();
	6050	sv_2mortal((SV*)RExC_charnames);
	6051	}
	6052	/* see if we have looked this one up before */
	6053	he_str = hv_fetch_ent( RExC_charnames, sv_name, 0, 0 );
	6054	if ( he_str ) {
	6055	sv_str = HeVAL(he_str);
	6056	cached = 1;
	6057	} else {
	6058	dSP ;
	6059
	6060	ENTER ;
	6061	SAVETMPS ;
	6062	PUSHMARK(SP) ;
	6063
	6064	XPUSHs(sv_name);
	6065
	6066	PUTBACK ;
	6067
	6068	count= call_sv(cv, G_SCALAR);
	6069
	6070	if (count == 1) { /* XXXX is this right? dmq */
	6071	sv_str = POPs;
	6072	SvREFCNT_inc_simple_void(sv_str);
	6073	}
	6074
	6075	SPAGAIN ;
	6076	PUTBACK ;
	6077	FREETMPS ;
	6078	LEAVE ;
	6079
	6080	if ( !sv_str \|\| !SvOK(sv_str) ) {
	6081	vFAIL2("Constant(\\N{%s}): Call to &{$^H{charnames}} "
	6082	"did not return a defined value",SvPVX(sv_name));
	6083	}
	6084	if (hv_store_ent( RExC_charnames, sv_name, sv_str, 0))
	6085	cached = 1;
	6086	}
	6087	}
	6088	if (valuep) {
	6089	char *p = SvPV(sv_str, len);
	6090	if (len) {
	6091	STRLEN numlen = 1;
	6092	if ( SvUTF8(sv_str) ) {
	6093	valuep = utf8_to_uvchr((U8)p, &numlen);
	6094	if (*valuep > 0x7F)
	6095	RExC_utf8 = 1;
	6096	/* XXXX
	6097	We have to turn on utf8 for high bit chars otherwise
	6098	we get failures with
	6099
	6100	"ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
	6101	"SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
	6102
	6103	This is different from what \x{} would do with the same
	6104	codepoint, where the condition is > 0xFF.
	6105	- dmq
	6106	*/
	6107
	6108
	6109	} else {
	6110	valuep = (UV)p;
	6111	/* warn if we havent used the whole string? */
	6112	}
	6113	if (numlen<len && SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	6114	vWARN2(RExC_parse,
	6115	"Ignoring excess chars from \\N{%s} in character class",
	6116	SvPVX(sv_name)
	6117	);
	6118	}
	6119	} else if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	6120	vWARN2(RExC_parse,
	6121	"Ignoring zero length \\N{%s} in character class",
	6122	SvPVX(sv_name)
	6123	);
	6124	}
	6125	if (sv_name)
	6126	SvREFCNT_dec(sv_name);
	6127	if (!cached)
	6128	SvREFCNT_dec(sv_str);
	6129	return len ? NULL : (regnode *)&len;
	6130	} else if(SvCUR(sv_str)) {
	6131
	6132	char *s;
	6133	char p, pend;
	6134	STRLEN charlen = 1;
	6135	char * parse_start = name-3; /* needed for the offsets */
	6136	GET_RE_DEBUG_FLAGS_DECL; /* needed for the offsets */
	6137
	6138	ret = reg_node(pRExC_state,
	6139	(U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
	6140	s= STRING(ret);
	6141
	6142	if ( RExC_utf8 && !SvUTF8(sv_str) ) {
	6143	sv_utf8_upgrade(sv_str);
	6144	} else if ( !RExC_utf8 && SvUTF8(sv_str) ) {
	6145	RExC_utf8= 1;
	6146	}
	6147
	6148	p = SvPV(sv_str, len);
	6149	pend = p + len;
	6150	/* len is the length written, charlen is the size the char read */
	6151	for ( len = 0; p < pend; p += charlen ) {
	6152	if (UTF) {
	6153	UV uvc = utf8_to_uvchr((U8*)p, &charlen);
	6154	if (FOLD) {
	6155	STRLEN foldlen,numlen;
	6156	U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
	6157	uvc = toFOLD_uni(uvc, tmpbuf, &foldlen);
	6158	/* Emit all the Unicode characters. */
	6159
	6160	for (foldbuf = tmpbuf;
	6161	foldlen;
	6162	foldlen -= numlen)
	6163	{
	6164	uvc = utf8_to_uvchr(foldbuf, &numlen);
	6165	if (numlen > 0) {
	6166	const STRLEN unilen = reguni(pRExC_state, uvc, s);
	6167	s += unilen;
	6168	len += unilen;
	6169	/* In EBCDIC the numlen
	6170	* and unilen can differ. */
	6171	foldbuf += numlen;
	6172	if (numlen >= foldlen)
	6173	break;
	6174	}
	6175	else
	6176	break; /* "Can't happen." */
	6177	}
	6178	} else {
	6179	const STRLEN unilen = reguni(pRExC_state, uvc, s);
	6180	if (unilen > 0) {
	6181	s += unilen;
	6182	len += unilen;
	6183	}
	6184	}
	6185	} else {
	6186	len++;
	6187	REGC(*p, s++);
	6188	}
	6189	}
	6190	if (SIZE_ONLY) {
	6191	RExC_size += STR_SZ(len);
	6192	} else {
	6193	STR_LEN(ret) = len;
	6194	RExC_emit += STR_SZ(len);
	6195	}
	6196	Set_Node_Cur_Length(ret); /* MJD */
	6197	RExC_parse--;
	6198	nextchar(pRExC_state);
	6199	} else {
	6200	ret = reg_node(pRExC_state,NOTHING);
	6201	}
	6202	if (!cached) {
	6203	SvREFCNT_dec(sv_str);
	6204	}
	6205	if (sv_name) {
	6206	SvREFCNT_dec(sv_name);
	6207	}
	6208	return ret;
	6209
	6210	}
	6211
	6212
	6213	/*
	6214	* reg_recode
	6215	*
	6216	* It returns the code point in utf8 for the value in *encp.
	6217	* value: a code value in the source encoding
	6218	* encp: a pointer to an Encode object
	6219	*
	6220	* If the result from Encode is not a single character,
	6221	* it returns U+FFFD (Replacement character) and sets *encp to NULL.
	6222	*/
	6223	STATIC UV
	6224	S_reg_recode(pTHX_ const char value, SV **encp)
	6225	{
	6226	STRLEN numlen = 1;
	6227	SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
	6228	const char * const s = encp && encp ? sv_recode_to_utf8(sv, encp)
	6229	: SvPVX(sv);
	6230	const STRLEN newlen = SvCUR(sv);
	6231	UV uv = UNICODE_REPLACEMENT;
	6232
	6233	if (newlen)
	6234	uv = SvUTF8(sv)
	6235	? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
	6236	: (U8)s;
	6237
	6238	if (!newlen \|\| numlen != newlen) {
	6239	uv = UNICODE_REPLACEMENT;
	6240	if (encp)
	6241	*encp = NULL;
	6242	}
	6243	return uv;
	6244	}
	6245
	6246
	6247	/*
	6248	- regatom - the lowest level
	6249
	6250	Try to identify anything special at the start of the pattern. If there
	6251	is, then handle it as required. This may involve generating a single regop,
	6252	such as for an assertion; or it may involve recursing, such as to
	6253	handle a () structure.
	6254
	6255	If the string doesn't start with something special then we gobble up
	6256	as much literal text as we can.
	6257
	6258	Once we have been able to handle whatever type of thing started the
	6259	sequence, we return.
	6260
	6261	Note: we have to be careful with escapes, as they can be both literal
	6262	and special, and in the case of \10 and friends can either, depending
	6263	on context. Specifically there are two seperate switches for handling
	6264	escape sequences, with the one for handling literal escapes requiring
	6265	a dummy entry for all of the special escapes that are actually handled
	6266	by the other.
	6267	*/
	6268
	6269	STATIC regnode *
	6270	S_regatom(pTHX_ RExC_state_t pRExC_state, I32 flagp, U32 depth)
	6271	{
	6272	dVAR;
	6273	register regnode *ret = NULL;
	6274	I32 flags;
	6275	char *parse_start = RExC_parse;
	6276	GET_RE_DEBUG_FLAGS_DECL;
	6277	DEBUG_PARSE("atom");
	6278	flagp = WORST; / Tentatively. */
	6279
	6280
	6281	tryagain:
	6282	switch (*RExC_parse) {
	6283	case '^':
	6284	RExC_seen_zerolen++;
	6285	nextchar(pRExC_state);
	6286	if (RExC_flags & RXf_PMf_MULTILINE)
	6287	ret = reg_node(pRExC_state, MBOL);
	6288	else if (RExC_flags & RXf_PMf_SINGLELINE)
	6289	ret = reg_node(pRExC_state, SBOL);
	6290	else
	6291	ret = reg_node(pRExC_state, BOL);
	6292	Set_Node_Length(ret, 1); /* MJD */
	6293	break;
	6294	case '$':
	6295	nextchar(pRExC_state);
	6296	if (*RExC_parse)
	6297	RExC_seen_zerolen++;
	6298	if (RExC_flags & RXf_PMf_MULTILINE)
	6299	ret = reg_node(pRExC_state, MEOL);
	6300	else if (RExC_flags & RXf_PMf_SINGLELINE)
	6301	ret = reg_node(pRExC_state, SEOL);
	6302	else
	6303	ret = reg_node(pRExC_state, EOL);
	6304	Set_Node_Length(ret, 1); /* MJD */
	6305	break;
	6306	case '.':
	6307	nextchar(pRExC_state);
	6308	if (RExC_flags & RXf_PMf_SINGLELINE)
	6309	ret = reg_node(pRExC_state, SANY);
	6310	else
	6311	ret = reg_node(pRExC_state, REG_ANY);
	6312	*flagp \|= HASWIDTH\|SIMPLE;
	6313	RExC_naughty++;
	6314	Set_Node_Length(ret, 1); /* MJD */
	6315	break;
	6316	case '[':
	6317	{
	6318	char * const oregcomp_parse = ++RExC_parse;
	6319	ret = regclass(pRExC_state,depth+1);
	6320	if (*RExC_parse != ']') {
	6321	RExC_parse = oregcomp_parse;
	6322	vFAIL("Unmatched [");
	6323	}
	6324	nextchar(pRExC_state);
	6325	*flagp \|= HASWIDTH\|SIMPLE;
	6326	Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
	6327	break;
	6328	}
	6329	case '(':
	6330	nextchar(pRExC_state);
	6331	ret = reg(pRExC_state, 1, &flags,depth+1);
	6332	if (ret == NULL) {
	6333	if (flags & TRYAGAIN) {
	6334	if (RExC_parse == RExC_end) {
	6335	/* Make parent create an empty node if needed. */
	6336	*flagp \|= TRYAGAIN;
	6337	return(NULL);
	6338	}
	6339	goto tryagain;
	6340	}
	6341	return(NULL);
	6342	}
	6343	*flagp \|= flags&(HASWIDTH\|SPSTART\|SIMPLE);
	6344	break;
	6345	case '\|':
	6346	case ')':
	6347	if (flags & TRYAGAIN) {
	6348	*flagp \|= TRYAGAIN;
	6349	return NULL;
	6350	}
	6351	vFAIL("Internal urp");
	6352	/* Supposed to be caught earlier. */
	6353	break;
	6354	case '{':
	6355	if (!regcurly(RExC_parse)) {
	6356	RExC_parse++;
	6357	goto defchar;
	6358	}
	6359	/* FALL THROUGH */
	6360	case '?':
	6361	case '+':
	6362	case '*':
	6363	RExC_parse++;
	6364	vFAIL("Quantifier follows nothing");
	6365	break;
	6366	case '\\':
	6367	/* Special Escapes
	6368
	6369	This switch handles escape sequences that resolve to some kind
	6370	of special regop and not to literal text. Escape sequnces that
	6371	resolve to literal text are handled below in the switch marked
	6372	"Literal Escapes".
	6373
	6374	Every entry in this switch must have a corresponding entry
	6375	in the literal escape switch. However, the opposite is not
	6376	required, as the default for this switch is to jump to the
	6377	literal text handling code.
	6378	*/
	6379	switch (*++RExC_parse) {
	6380	/* Special Escapes */
	6381	case 'A':
	6382	RExC_seen_zerolen++;
	6383	ret = reg_node(pRExC_state, SBOL);
	6384	*flagp \|= SIMPLE;
	6385	goto finish_meta_pat;
	6386	case 'G':
	6387	ret = reg_node(pRExC_state, GPOS);
	6388	RExC_seen \|= REG_SEEN_GPOS;
	6389	*flagp \|= SIMPLE;
	6390	goto finish_meta_pat;
	6391	case 'K':
	6392	RExC_seen_zerolen++;
	6393	ret = reg_node(pRExC_state, KEEPS);
	6394	*flagp \|= SIMPLE;
	6395	goto finish_meta_pat;
	6396	case 'Z':
	6397	ret = reg_node(pRExC_state, SEOL);
	6398	*flagp \|= SIMPLE;
	6399	RExC_seen_zerolen++; /* Do not optimize RE away */
	6400	goto finish_meta_pat;
	6401	case 'z':
	6402	ret = reg_node(pRExC_state, EOS);
	6403	*flagp \|= SIMPLE;
	6404	RExC_seen_zerolen++; /* Do not optimize RE away */
	6405	goto finish_meta_pat;
	6406	case 'C':
	6407	ret = reg_node(pRExC_state, CANY);
	6408	RExC_seen \|= REG_SEEN_CANY;
	6409	*flagp \|= HASWIDTH\|SIMPLE;
	6410	goto finish_meta_pat;
	6411	case 'X':
	6412	ret = reg_node(pRExC_state, CLUMP);
	6413	*flagp \|= HASWIDTH;
	6414	goto finish_meta_pat;
	6415	case 'w':
	6416	ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML : ALNUM));
	6417	*flagp \|= HASWIDTH\|SIMPLE;
	6418	goto finish_meta_pat;
	6419	case 'W':
	6420	ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML : NALNUM));
	6421	*flagp \|= HASWIDTH\|SIMPLE;
	6422	goto finish_meta_pat;
	6423	case 'b':
	6424	RExC_seen_zerolen++;
	6425	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	6426	ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL : BOUND));
	6427	*flagp \|= SIMPLE;
	6428	goto finish_meta_pat;
	6429	case 'B':
	6430	RExC_seen_zerolen++;
	6431	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	6432	ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL : NBOUND));
	6433	*flagp \|= SIMPLE;
	6434	goto finish_meta_pat;
	6435	case 's':
	6436	ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL : SPACE));
	6437	*flagp \|= HASWIDTH\|SIMPLE;
	6438	goto finish_meta_pat;
	6439	case 'S':
	6440	ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL : NSPACE));
	6441	*flagp \|= HASWIDTH\|SIMPLE;
	6442	goto finish_meta_pat;
	6443	case 'd':
	6444	ret = reg_node(pRExC_state, DIGIT);
	6445	*flagp \|= HASWIDTH\|SIMPLE;
	6446	goto finish_meta_pat;
	6447	case 'D':
	6448	ret = reg_node(pRExC_state, NDIGIT);
	6449	*flagp \|= HASWIDTH\|SIMPLE;
	6450	goto finish_meta_pat;
	6451	case 'v':
	6452	ret = reganode(pRExC_state, PRUNE, 0);
	6453	ret->flags = 1;
	6454	*flagp \|= SIMPLE;
	6455	goto finish_meta_pat;
	6456	case 'V':
	6457	ret = reganode(pRExC_state, SKIP, 0);
	6458	ret->flags = 1;
	6459	*flagp \|= SIMPLE;
	6460	finish_meta_pat:
	6461	nextchar(pRExC_state);
	6462	Set_Node_Length(ret, 2); /* MJD */
	6463	break;
	6464	case 'p':
	6465	case 'P':
	6466	{
	6467	char* const oldregxend = RExC_end;
	6468	char* parse_start = RExC_parse - 2;
	6469
	6470	if (RExC_parse[1] == '{') {
	6471	/* a lovely hack--pretend we saw [\pX] instead */
	6472	RExC_end = strchr(RExC_parse, '}');
	6473	if (!RExC_end) {
	6474	const U8 c = (U8)*RExC_parse;
	6475	RExC_parse += 2;
	6476	RExC_end = oldregxend;
	6477	vFAIL2("Missing right brace on \\%c{}", c);
	6478	}
	6479	RExC_end++;
	6480	}
	6481	else {
	6482	RExC_end = RExC_parse + 2;
	6483	if (RExC_end > oldregxend)
	6484	RExC_end = oldregxend;
	6485	}
	6486	RExC_parse--;
	6487
	6488	ret = regclass(pRExC_state,depth+1);
	6489
	6490	RExC_end = oldregxend;
	6491	RExC_parse--;
	6492
	6493	Set_Node_Offset(ret, parse_start + 2);
	6494	Set_Node_Cur_Length(ret);
	6495	nextchar(pRExC_state);
	6496	*flagp \|= HASWIDTH\|SIMPLE;
	6497	}
	6498	break;
	6499	case 'N':
	6500	/* Handle \N{NAME} here and not below because it can be
	6501	multicharacter. join_exact() will join them up later on.
	6502	Also this makes sure that things like /\N{BLAH}+/ and
	6503	\N{BLAH} being multi char Just Happen. dmq*/
	6504	++RExC_parse;
	6505	ret= reg_namedseq(pRExC_state, NULL);
	6506	break;
	6507	case 'k': /* Handle \k<NAME> and \k'NAME' */
	6508	parse_named_seq:
	6509	{
	6510	char ch= RExC_parse[1];
	6511	if (ch != '<' && ch != '\'' && ch != '{') {
	6512	RExC_parse++;
	6513	vFAIL2("Sequence %.2s... not terminated",parse_start);
	6514	} else {
	6515	/* this pretty much dupes the code for (?P=...) in reg(), if
	6516	you change this make sure you change that */
	6517	char* name_start = (RExC_parse += 2);
	6518	U32 num = 0;
	6519	SV *sv_dat = reg_scan_name(pRExC_state,
	6520	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	6521	ch= (ch == '<') ? '>' : (ch == '{') ? '}' : '\'';
	6522	if (RExC_parse == name_start \|\| *RExC_parse != ch)
	6523	vFAIL2("Sequence %.3s... not terminated",parse_start);
	6524
	6525	if (!SIZE_ONLY) {
	6526	num = add_data( pRExC_state, 1, "S" );
	6527	RExC_rxi->data->data[num]=(void*)sv_dat;
	6528	SvREFCNT_inc(sv_dat);
	6529	}
	6530
	6531	RExC_sawback = 1;
	6532	ret = reganode(pRExC_state,
	6533	(U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
	6534	num);
	6535	*flagp \|= HASWIDTH;
	6536
	6537	/* override incorrect value set in reganode MJD */
	6538	Set_Node_Offset(ret, parse_start+1);
	6539	Set_Node_Cur_Length(ret); /* MJD */
	6540	nextchar(pRExC_state);
	6541
	6542	}
	6543	break;
	6544	}
	6545	case 'g':
	6546	case '1': case '2': case '3': case '4':
	6547	case '5': case '6': case '7': case '8': case '9':
	6548	{
	6549	I32 num;
	6550	bool isg = *RExC_parse == 'g';
	6551	bool isrel = 0;
	6552	bool hasbrace = 0;
	6553	if (isg) {
	6554	RExC_parse++;
	6555	if (*RExC_parse == '{') {
	6556	RExC_parse++;
	6557	hasbrace = 1;
	6558	}
	6559	if (*RExC_parse == '-') {
	6560	RExC_parse++;
	6561	isrel = 1;
	6562	}
	6563	if (hasbrace && !isDIGIT(*RExC_parse)) {
	6564	if (isrel) RExC_parse--;
	6565	RExC_parse -= 2;
	6566	goto parse_named_seq;
	6567	} }
	6568	num = atoi(RExC_parse);
	6569	if (isrel) {
	6570	num = RExC_npar - num;
	6571	if (num < 1)
	6572	vFAIL("Reference to nonexistent or unclosed group");
	6573	}
	6574	if (!isg && num > 9 && num >= RExC_npar)
	6575	goto defchar;
	6576	else {
	6577	char * const parse_start = RExC_parse - 1; /* MJD */
	6578	while (isDIGIT(*RExC_parse))
	6579	RExC_parse++;
	6580	if (parse_start == RExC_parse - 1)
	6581	vFAIL("Unterminated \\g... pattern");
	6582	if (hasbrace) {
	6583	if (*RExC_parse != '}')
	6584	vFAIL("Unterminated \\g{...} pattern");
	6585	RExC_parse++;
	6586	}
	6587	if (!SIZE_ONLY) {
	6588	if (num > (I32)RExC_rx->nparens)
	6589	vFAIL("Reference to nonexistent group");
	6590	}
	6591	RExC_sawback = 1;
	6592	ret = reganode(pRExC_state,
	6593	(U8)(FOLD ? (LOC ? REFFL : REFF) : REF),
	6594	num);
	6595	*flagp \|= HASWIDTH;
	6596
	6597	/* override incorrect value set in reganode MJD */
	6598	Set_Node_Offset(ret, parse_start+1);
	6599	Set_Node_Cur_Length(ret); /* MJD */
	6600	RExC_parse--;
	6601	nextchar(pRExC_state);
	6602	}
	6603	}
	6604	break;
	6605	case '\0':
	6606	if (RExC_parse >= RExC_end)
	6607	FAIL("Trailing \\");
	6608	/* FALL THROUGH */
	6609	default:
	6610	/* Do not generate "unrecognized" warnings here, we fall
	6611	back into the quick-grab loop below */
	6612	parse_start--;
	6613	goto defchar;
	6614	}
	6615	break;
	6616
	6617	case '#':
	6618	if (RExC_flags & RXf_PMf_EXTENDED) {
	6619	while (RExC_parse < RExC_end && *RExC_parse != '\n')
	6620	RExC_parse++;
	6621	if (RExC_parse < RExC_end)
	6622	goto tryagain;
	6623	}
	6624	/* FALL THROUGH */
	6625
	6626	default: {
	6627	register STRLEN len;
	6628	register UV ender;
	6629	register char *p;
	6630	char *s;
	6631	STRLEN foldlen;
	6632	U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
	6633
	6634	parse_start = RExC_parse - 1;
	6635
	6636	RExC_parse++;
	6637
	6638	defchar:
	6639	ender = 0;
	6640	ret = reg_node(pRExC_state,
	6641	(U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
	6642	s = STRING(ret);
	6643	for (len = 0, p = RExC_parse - 1;
	6644	len < 127 && p < RExC_end;
	6645	len++)
	6646	{
	6647	char * const oldp = p;
	6648
	6649	if (RExC_flags & RXf_PMf_EXTENDED)
	6650	p = regwhite(p, RExC_end);
	6651	switch (*p) {
	6652	case '^':
	6653	case '$':
	6654	case '.':
	6655	case '[':
	6656	case '(':
	6657	case ')':
	6658	case '\|':
	6659	goto loopdone;
	6660	case '\\':
	6661	/* Literal Escapes Switch
	6662
	6663	This switch is meant to handle escape sequences that
	6664	resolve to a literal character.
	6665
	6666	Every escape sequence that represents something
	6667	else, like an assertion or a char class, is handled
	6668	in the switch marked 'Special Escapes' above in this
	6669	routine, but also has an entry here as anything that
	6670	isn't explicitly mentioned here will be treated as
	6671	an unescaped equivalent literal.
	6672	*/
	6673
	6674	switch (*++p) {
	6675	/* These are all the special escapes. */
	6676	case 'A': /* Start assertion */
	6677	case 'b': case 'B': /* Word-boundary assertion*/
	6678	case 'C': /* Single char !DANGEROUS! */
	6679	case 'd': case 'D': /* digit class */
	6680	case 'g': case 'G': /* generic-backref, pos assertion */
	6681	case 'k': case 'K': /* named backref, keep marker */
	6682	case 'N': /* named char sequence */
	6683	case 'p': case 'P': /* unicode property */
	6684	case 's': case 'S': /* space class */
	6685	case 'v': case 'V': /* (PRUNE) and (SKIP) */
	6686	case 'w': case 'W': /* word class */
	6687	case 'X': /* eXtended Unicode "combining character sequence" */
	6688	case 'z': case 'Z': /* End of line/string assertion */
	6689	--p;
	6690	goto loopdone;
	6691
	6692	/* Anything after here is an escape that resolves to a
	6693	literal. (Except digits, which may or may not)
	6694	*/
	6695	case 'n':
	6696	ender = '\n';
	6697	p++;
	6698	break;
	6699	case 'r':
	6700	ender = '\r';
	6701	p++;
	6702	break;
	6703	case 't':
	6704	ender = '\t';
	6705	p++;
	6706	break;
	6707	case 'f':
	6708	ender = '\f';
	6709	p++;
	6710	break;
	6711	case 'e':
	6712	ender = ASCII_TO_NATIVE('\033');
	6713	p++;
	6714	break;
	6715	case 'a':
	6716	ender = ASCII_TO_NATIVE('\007');
	6717	p++;
	6718	break;
	6719	case 'x':
	6720	if (*++p == '{') {
	6721	char* const e = strchr(p, '}');
	6722
	6723	if (!e) {
	6724	RExC_parse = p + 1;
	6725	vFAIL("Missing right brace on \\x{}");
	6726	}
	6727	else {
	6728	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	6729	\| PERL_SCAN_DISALLOW_PREFIX;
	6730	STRLEN numlen = e - p - 1;
	6731	ender = grok_hex(p + 1, &numlen, &flags, NULL);
	6732	if (ender > 0xff)
	6733	RExC_utf8 = 1;
	6734	p = e + 1;
	6735	}
	6736	}
	6737	else {
	6738	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	6739	STRLEN numlen = 2;
	6740	ender = grok_hex(p, &numlen, &flags, NULL);
	6741	p += numlen;
	6742	}
	6743	if (PL_encoding && ender < 0x100)
	6744	goto recode_encoding;
	6745	break;
	6746	case 'c':
	6747	p++;
	6748	ender = UCHARAT(p++);
	6749	ender = toCTRL(ender);
	6750	break;
	6751	case '0': case '1': case '2': case '3':case '4':
	6752	case '5': case '6': case '7': case '8':case '9':
	6753	if (*p == '0' \|\|
	6754	(isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
	6755	I32 flags = 0;
	6756	STRLEN numlen = 3;
	6757	ender = grok_oct(p, &numlen, &flags, NULL);
	6758	p += numlen;
	6759	}
	6760	else {
	6761	--p;
	6762	goto loopdone;
	6763	}
	6764	if (PL_encoding && ender < 0x100)
	6765	goto recode_encoding;
	6766	break;
	6767	recode_encoding:
	6768	{
	6769	SV* enc = PL_encoding;
	6770	ender = reg_recode((const char)(U8)ender, &enc);
	6771	if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
	6772	vWARN(p, "Invalid escape in the specified encoding");
	6773	RExC_utf8 = 1;
	6774	}
	6775	break;
	6776	case '\0':
	6777	if (p >= RExC_end)
	6778	FAIL("Trailing \\");
	6779	/* FALL THROUGH */
	6780	default:
	6781	if (!SIZE_ONLY&& isALPHA(*p) && ckWARN(WARN_REGEXP))
	6782	vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
	6783	goto normal_default;
	6784	}
	6785	break;
	6786	default:
	6787	normal_default:
	6788	if (UTF8_IS_START(*p) && UTF) {
	6789	STRLEN numlen;
	6790	ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
	6791	&numlen, UTF8_ALLOW_DEFAULT);
	6792	p += numlen;
	6793	}
	6794	else
	6795	ender = *p++;
	6796	break;
	6797	}
	6798	if (RExC_flags & RXf_PMf_EXTENDED)
	6799	p = regwhite(p, RExC_end);
	6800	if (UTF && FOLD) {
	6801	/* Prime the casefolded buffer. */
	6802	ender = toFOLD_uni(ender, tmpbuf, &foldlen);
	6803	}
	6804	if (ISMULT2(p)) { /* Back off on ?+. /
	6805	if (len)
	6806	p = oldp;
	6807	else if (UTF) {
	6808	if (FOLD) {
	6809	/* Emit all the Unicode characters. */
	6810	STRLEN numlen;
	6811	for (foldbuf = tmpbuf;
	6812	foldlen;
	6813	foldlen -= numlen) {
	6814	ender = utf8_to_uvchr(foldbuf, &numlen);
	6815	if (numlen > 0) {
	6816	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6817	s += unilen;
	6818	len += unilen;
	6819	/* In EBCDIC the numlen
	6820	* and unilen can differ. */
	6821	foldbuf += numlen;
	6822	if (numlen >= foldlen)
	6823	break;
	6824	}
	6825	else
	6826	break; /* "Can't happen." */
	6827	}
	6828	}
	6829	else {
	6830	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6831	if (unilen > 0) {
	6832	s += unilen;
	6833	len += unilen;
	6834	}
	6835	}
	6836	}
	6837	else {
	6838	len++;
	6839	REGC((char)ender, s++);
	6840	}
	6841	break;
	6842	}
	6843	if (UTF) {
	6844	if (FOLD) {
	6845	/* Emit all the Unicode characters. */
	6846	STRLEN numlen;
	6847	for (foldbuf = tmpbuf;
	6848	foldlen;
	6849	foldlen -= numlen) {
	6850	ender = utf8_to_uvchr(foldbuf, &numlen);
	6851	if (numlen > 0) {
	6852	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6853	len += unilen;
	6854	s += unilen;
	6855	/* In EBCDIC the numlen
	6856	* and unilen can differ. */
	6857	foldbuf += numlen;
	6858	if (numlen >= foldlen)
	6859	break;
	6860	}
	6861	else
	6862	break;
	6863	}
	6864	}
	6865	else {
	6866	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6867	if (unilen > 0) {
	6868	s += unilen;
	6869	len += unilen;
	6870	}
	6871	}
	6872	len--;
	6873	}
	6874	else
	6875	REGC((char)ender, s++);
	6876	}
	6877	loopdone:
	6878	RExC_parse = p - 1;
	6879	Set_Node_Cur_Length(ret); /* MJD */
	6880	nextchar(pRExC_state);
	6881	{
	6882	/* len is STRLEN which is unsigned, need to copy to signed */
	6883	IV iv = len;
	6884	if (iv < 0)
	6885	vFAIL("Internal disaster");
	6886	}
	6887	if (len > 0)
	6888	*flagp \|= HASWIDTH;
	6889	if (len == 1 && UNI_IS_INVARIANT(ender))
	6890	*flagp \|= SIMPLE;
	6891
	6892	if (SIZE_ONLY)
	6893	RExC_size += STR_SZ(len);
	6894	else {
	6895	STR_LEN(ret) = len;
	6896	RExC_emit += STR_SZ(len);
	6897	}
	6898	}
	6899	break;
	6900	}
	6901
	6902	return(ret);
	6903	}
	6904
	6905	STATIC char *
	6906	S_regwhite(char p, const char e)
	6907	{
	6908	while (p < e) {
	6909	if (isSPACE(*p))
	6910	++p;
	6911	else if (*p == '#') {
	6912	do {
	6913	p++;
	6914	} while (p < e && *p != '\n');
	6915	}
	6916	else
	6917	break;
	6918	}
	6919	return p;
	6920	}
	6921
	6922	/* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
	6923	Character classes ([:foo:]) can also be negated ([:^foo:]).
	6924	Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
	6925	Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
	6926	but trigger failures because they are currently unimplemented. */
	6927
	6928	#define POSIXCC_DONE(c) ((c) == ':')
	6929	#define POSIXCC_NOTYET(c) ((c) == '=' \|\| (c) == '.')
	6930	#define POSIXCC(c) (POSIXCC_DONE(c) \|\| POSIXCC_NOTYET(c))
	6931
	6932	STATIC I32
	6933	S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
	6934	{
	6935	dVAR;
	6936	I32 namedclass = OOB_NAMEDCLASS;
	6937
	6938	if (value == '[' && RExC_parse + 1 < RExC_end &&
	6939	/* I smell either [: or [= or [. -- POSIX has been here, right? */
	6940	POSIXCC(UCHARAT(RExC_parse))) {
	6941	const char c = UCHARAT(RExC_parse);
	6942	char* const s = RExC_parse++;
	6943
	6944	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
	6945	RExC_parse++;
	6946	if (RExC_parse == RExC_end)
	6947	/* Grandfather lone [:, [=, [. */
	6948	RExC_parse = s;
	6949	else {
	6950	const char* const t = RExC_parse++; /* skip over the c */
	6951	assert(*t == c);
	6952
	6953	if (UCHARAT(RExC_parse) == ']') {
	6954	const char *posixcc = s + 1;
	6955	RExC_parse++; /* skip over the ending ] */
	6956
	6957	if (*s == ':') {
	6958	const I32 complement = posixcc == '^' ? posixcc++ : 0;
	6959	const I32 skip = t - posixcc;
	6960
	6961	/* Initially switch on the length of the name. */
	6962	switch (skip) {
	6963	case 4:
	6964	if (memEQ(posixcc, "word", 4)) /* this is not POSIX, this is the Perl \w */
	6965	namedclass = complement ? ANYOF_NALNUM : ANYOF_ALNUM;
	6966	break;
	6967	case 5:
	6968	/* Names all of length 5. */
	6969	/* alnum alpha ascii blank cntrl digit graph lower
	6970	print punct space upper */
	6971	/* Offset 4 gives the best switch position. */
	6972	switch (posixcc[4]) {
	6973	case 'a':
	6974	if (memEQ(posixcc, "alph", 4)) /* alpha */
	6975	namedclass = complement ? ANYOF_NALPHA : ANYOF_ALPHA;
	6976	break;
	6977	case 'e':
	6978	if (memEQ(posixcc, "spac", 4)) /* space */
	6979	namedclass = complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
	6980	break;
	6981	case 'h':
	6982	if (memEQ(posixcc, "grap", 4)) /* graph */
	6983	namedclass = complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
	6984	break;
	6985	case 'i':
	6986	if (memEQ(posixcc, "asci", 4)) /* ascii */
	6987	namedclass = complement ? ANYOF_NASCII : ANYOF_ASCII;
	6988	break;
	6989	case 'k':
	6990	if (memEQ(posixcc, "blan", 4)) /* blank */
	6991	namedclass = complement ? ANYOF_NBLANK : ANYOF_BLANK;
	6992	break;
	6993	case 'l':
	6994	if (memEQ(posixcc, "cntr", 4)) /* cntrl */
	6995	namedclass = complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
	6996	break;
	6997	case 'm':
	6998	if (memEQ(posixcc, "alnu", 4)) /* alnum */
	6999	namedclass = complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
	7000	break;
	7001	case 'r':
	7002	if (memEQ(posixcc, "lowe", 4)) /* lower */
	7003	namedclass = complement ? ANYOF_NLOWER : ANYOF_LOWER;
	7004	else if (memEQ(posixcc, "uppe", 4)) /* upper */
	7005	namedclass = complement ? ANYOF_NUPPER : ANYOF_UPPER;
	7006	break;
	7007	case 't':
	7008	if (memEQ(posixcc, "digi", 4)) /* digit */
	7009	namedclass = complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
	7010	else if (memEQ(posixcc, "prin", 4)) /* print */
	7011	namedclass = complement ? ANYOF_NPRINT : ANYOF_PRINT;
	7012	else if (memEQ(posixcc, "punc", 4)) /* punct */
	7013	namedclass = complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
	7014	break;
	7015	}
	7016	break;
	7017	case 6:
	7018	if (memEQ(posixcc, "xdigit", 6))
	7019	namedclass = complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
	7020	break;
	7021	}
	7022
	7023	if (namedclass == OOB_NAMEDCLASS)
	7024	Simple_vFAIL3("POSIX class [:%.*s:] unknown",
	7025	t - s - 1, s + 1);
	7026	assert (posixcc[skip] == ':');
	7027	assert (posixcc[skip+1] == ']');
	7028	} else if (!SIZE_ONLY) {
	7029	/* [[=foo=]] and [[.foo.]] are still future. */
	7030
	7031	/* adjust RExC_parse so the warning shows after
	7032	the class closes */
	7033	while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
	7034	RExC_parse++;
	7035	Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
	7036	}
	7037	} else {
	7038	/* Maternal grandfather:
	7039	* "[:" ending in ":" but not in ":]" */
	7040	RExC_parse = s;
	7041	}
	7042	}
	7043	}
	7044
	7045	return namedclass;
	7046	}
	7047
	7048	STATIC void
	7049	S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
	7050	{
	7051	dVAR;
	7052	if (POSIXCC(UCHARAT(RExC_parse))) {
	7053	const char *s = RExC_parse;
	7054	const char c = *s++;
	7055
	7056	while (isALNUM(*s))
	7057	s++;
	7058	if (s && c == s && s[1] == ']') {
	7059	if (ckWARN(WARN_REGEXP))
	7060	vWARN3(s+2,
	7061	"POSIX syntax [%c %c] belongs inside character classes",
	7062	c, c);
	7063
	7064	/* [[=foo=]] and [[.foo.]] are still future. */
	7065	if (POSIXCC_NOTYET(c)) {
	7066	/* adjust RExC_parse so the error shows after
	7067	the class closes */
	7068	while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
	7069	NOOP;
	7070	Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
	7071	}
	7072	}
	7073	}
	7074	}
	7075
	7076
	7077	#define _C_C_T_(NAME,TEST,WORD) \
	7078	ANYOF_##NAME: \
	7079	if (LOC) \
	7080	ANYOF_CLASS_SET(ret, ANYOF_##NAME); \
	7081	else { \
	7082	for (value = 0; value < 256; value++) \
	7083	if (TEST) \
	7084	ANYOF_BITMAP_SET(ret, value); \
	7085	} \
	7086	yesno = '+'; \
	7087	what = WORD; \
	7088	break; \
	7089	case ANYOF_N##NAME: \
	7090	if (LOC) \
	7091	ANYOF_CLASS_SET(ret, ANYOF_N##NAME); \
	7092	else { \
	7093	for (value = 0; value < 256; value++) \
	7094	if (!TEST) \
	7095	ANYOF_BITMAP_SET(ret, value); \
	7096	} \
	7097	yesno = '!'; \
	7098	what = WORD; \
	7099	break
	7100
	7101
	7102	/*
	7103	parse a class specification and produce either an ANYOF node that
	7104	matches the pattern or if the pattern matches a single char only and
	7105	that char is < 256 and we are case insensitive then we produce an
	7106	EXACT node instead.
	7107	*/
	7108
	7109	STATIC regnode *
	7110	S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
	7111	{
	7112	dVAR;
	7113	register UV value = 0;
	7114	register UV nextvalue;
	7115	register IV prevvalue = OOB_UNICODE;
	7116	register IV range = 0;
	7117	register regnode *ret;
	7118	STRLEN numlen;
	7119	IV namedclass;
	7120	char *rangebegin = NULL;
	7121	bool need_class = 0;
	7122	SV *listsv = NULL;
	7123	UV n;
	7124	bool optimize_invert = TRUE;
	7125	AV* unicode_alternate = NULL;
	7126	#ifdef EBCDIC
	7127	UV literal_endpoint = 0;
	7128	#endif
	7129	UV stored = 0; /* number of chars stored in the class */
	7130
	7131	regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
	7132	case we need to change the emitted regop to an EXACT. */
	7133	const char * orig_parse = RExC_parse;
	7134	GET_RE_DEBUG_FLAGS_DECL;
	7135	#ifndef DEBUGGING
	7136	PERL_UNUSED_ARG(depth);
	7137	#endif
	7138
	7139	DEBUG_PARSE("clas");
	7140
	7141	/* Assume we are going to generate an ANYOF node. */
	7142	ret = reganode(pRExC_state, ANYOF, 0);
	7143
	7144	if (!SIZE_ONLY)
	7145	ANYOF_FLAGS(ret) = 0;
	7146
	7147	if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
	7148	RExC_naughty++;
	7149	RExC_parse++;
	7150	if (!SIZE_ONLY)
	7151	ANYOF_FLAGS(ret) \|= ANYOF_INVERT;
	7152	}
	7153
	7154	if (SIZE_ONLY) {
	7155	RExC_size += ANYOF_SKIP;
	7156	listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
	7157	}
	7158	else {
	7159	RExC_emit += ANYOF_SKIP;
	7160	if (FOLD)
	7161	ANYOF_FLAGS(ret) \|= ANYOF_FOLD;
	7162	if (LOC)
	7163	ANYOF_FLAGS(ret) \|= ANYOF_LOCALE;
	7164	ANYOF_BITMAP_ZERO(ret);
	7165	listsv = newSVpvs("# comment\n");
	7166	}
	7167
	7168	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
	7169
	7170	if (!SIZE_ONLY && POSIXCC(nextvalue))
	7171	checkposixcc(pRExC_state);
	7172
	7173	/* allow 1st char to be ] (allowing it to be - is dealt with later) */
	7174	if (UCHARAT(RExC_parse) == ']')
	7175	goto charclassloop;
	7176
	7177	parseit:
	7178	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
	7179
	7180	charclassloop:
	7181
	7182	namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
	7183
	7184	if (!range)
	7185	rangebegin = RExC_parse;
	7186	if (UTF) {
	7187	value = utf8n_to_uvchr((U8*)RExC_parse,
	7188	RExC_end - RExC_parse,
	7189	&numlen, UTF8_ALLOW_DEFAULT);
	7190	RExC_parse += numlen;
	7191	}
	7192	else
	7193	value = UCHARAT(RExC_parse++);
	7194
	7195	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
	7196	if (value == '[' && POSIXCC(nextvalue))
	7197	namedclass = regpposixcc(pRExC_state, value);
	7198	else if (value == '\\') {
	7199	if (UTF) {
	7200	value = utf8n_to_uvchr((U8*)RExC_parse,
	7201	RExC_end - RExC_parse,
	7202	&numlen, UTF8_ALLOW_DEFAULT);
	7203	RExC_parse += numlen;
	7204	}
	7205	else
	7206	value = UCHARAT(RExC_parse++);
	7207	/* Some compilers cannot handle switching on 64-bit integer
	7208	* values, therefore value cannot be an UV. Yes, this will
	7209	* be a problem later if we want switch on Unicode.
	7210	* A similar issue a little bit later when switching on
	7211	* namedclass. --jhi */
	7212	switch ((I32)value) {
	7213	case 'w': namedclass = ANYOF_ALNUM; break;
	7214	case 'W': namedclass = ANYOF_NALNUM; break;
	7215	case 's': namedclass = ANYOF_SPACE; break;
	7216	case 'S': namedclass = ANYOF_NSPACE; break;
	7217	case 'd': namedclass = ANYOF_DIGIT; break;
	7218	case 'D': namedclass = ANYOF_NDIGIT; break;
	7219	case 'N': /* Handle \N{NAME} in class */
	7220	{
	7221	/* We only pay attention to the first char of
	7222	multichar strings being returned. I kinda wonder
	7223	if this makes sense as it does change the behaviour
	7224	from earlier versions, OTOH that behaviour was broken
	7225	as well. */
	7226	UV v; /* value is register so we cant & it /grrr */
	7227	if (reg_namedseq(pRExC_state, &v)) {
	7228	goto parseit;
	7229	}
	7230	value= v;
	7231	}
	7232	break;
	7233	case 'p':
	7234	case 'P':
	7235	{
	7236	char *e;
	7237	if (RExC_parse >= RExC_end)
	7238	vFAIL2("Empty \\%c{}", (U8)value);
	7239	if (*RExC_parse == '{') {
	7240	const U8 c = (U8)value;
	7241	e = strchr(RExC_parse++, '}');
	7242	if (!e)
	7243	vFAIL2("Missing right brace on \\%c{}", c);
	7244	while (isSPACE(UCHARAT(RExC_parse)))
	7245	RExC_parse++;
	7246	if (e == RExC_parse)
	7247	vFAIL2("Empty \\%c{}", c);
	7248	n = e - RExC_parse;
	7249	while (isSPACE(UCHARAT(RExC_parse + n - 1)))
	7250	n--;
	7251	}
	7252	else {
	7253	e = RExC_parse;
	7254	n = 1;
	7255	}
	7256	if (!SIZE_ONLY) {
	7257	if (UCHARAT(RExC_parse) == '^') {
	7258	RExC_parse++;
	7259	n--;
	7260	value = value == 'p' ? 'P' : 'p'; /* toggle */
	7261	while (isSPACE(UCHARAT(RExC_parse))) {
	7262	RExC_parse++;
	7263	n--;
	7264	}
	7265	}
	7266	Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
	7267	(value=='p' ? '+' : '!'), (int)n, RExC_parse);
	7268	}
	7269	RExC_parse = e + 1;
	7270	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	7271	namedclass = ANYOF_MAX; /* no official name, but it's named */
	7272	}
	7273	break;
	7274	case 'n': value = '\n'; break;
	7275	case 'r': value = '\r'; break;
	7276	case 't': value = '\t'; break;
	7277	case 'f': value = '\f'; break;
	7278	case 'b': value = '\b'; break;
	7279	case 'e': value = ASCII_TO_NATIVE('\033');break;
	7280	case 'a': value = ASCII_TO_NATIVE('\007');break;
	7281	case 'x':
	7282	if (*RExC_parse == '{') {
	7283	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	7284	\| PERL_SCAN_DISALLOW_PREFIX;
	7285	char * const e = strchr(RExC_parse++, '}');
	7286	if (!e)
	7287	vFAIL("Missing right brace on \\x{}");
	7288
	7289	numlen = e - RExC_parse;
	7290	value = grok_hex(RExC_parse, &numlen, &flags, NULL);
	7291	RExC_parse = e + 1;
	7292	}
	7293	else {
	7294	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	7295	numlen = 2;
	7296	value = grok_hex(RExC_parse, &numlen, &flags, NULL);
	7297	RExC_parse += numlen;
	7298	}
	7299	if (PL_encoding && value < 0x100)
	7300	goto recode_encoding;
	7301	break;
	7302	case 'c':
	7303	value = UCHARAT(RExC_parse++);
	7304	value = toCTRL(value);
	7305	break;
	7306	case '0': case '1': case '2': case '3': case '4':
	7307	case '5': case '6': case '7': case '8': case '9':
	7308	{
	7309	I32 flags = 0;
	7310	numlen = 3;
	7311	value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
	7312	RExC_parse += numlen;
	7313	if (PL_encoding && value < 0x100)
	7314	goto recode_encoding;
	7315	break;
	7316	}
	7317	recode_encoding:
	7318	{
	7319	SV* enc = PL_encoding;
	7320	value = reg_recode((const char)(U8)value, &enc);
	7321	if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
	7322	vWARN(RExC_parse,
	7323	"Invalid escape in the specified encoding");
	7324	break;
	7325	}
	7326	default:
	7327	if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
	7328	vWARN2(RExC_parse,
	7329	"Unrecognized escape \\%c in character class passed through",
	7330	(int)value);
	7331	break;
	7332	}
	7333	} /* end of \blah */
	7334	#ifdef EBCDIC
	7335	else
	7336	literal_endpoint++;
	7337	#endif
	7338
	7339	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
	7340
	7341	if (!SIZE_ONLY && !need_class)
	7342	ANYOF_CLASS_ZERO(ret);
	7343
	7344	need_class = 1;
	7345
	7346	/* a bad range like a-\d, a-[:digit:] ? */
	7347	if (range) {
	7348	if (!SIZE_ONLY) {
	7349	if (ckWARN(WARN_REGEXP)) {
	7350	const int w =
	7351	RExC_parse >= rangebegin ?
	7352	RExC_parse - rangebegin : 0;
	7353	vWARN4(RExC_parse,
	7354	"False [] range \"%.s\"",
	7355	w, w, rangebegin);
	7356	}
	7357	if (prevvalue < 256) {
	7358	ANYOF_BITMAP_SET(ret, prevvalue);
	7359	ANYOF_BITMAP_SET(ret, '-');
	7360	}
	7361	else {
	7362	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	7363	Perl_sv_catpvf(aTHX_ listsv,
	7364	"%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
	7365	}
	7366	}
	7367
	7368	range = 0; /* this was not a true range */
	7369	}
	7370
	7371
	7372
	7373	if (!SIZE_ONLY) {
	7374	const char *what = NULL;
	7375	char yesno = 0;
	7376
	7377	if (namedclass > OOB_NAMEDCLASS)
	7378	optimize_invert = FALSE;
	7379	/* Possible truncation here but in some 64-bit environments
	7380	* the compiler gets heartburn about switch on 64-bit values.
	7381	* A similar issue a little earlier when switching on value.
	7382	* --jhi */
	7383	switch ((I32)namedclass) {
	7384	case _C_C_T_(ALNUM, isALNUM(value), "Word");
	7385	case _C_C_T_(ALNUMC, isALNUMC(value), "Alnum");
	7386	case _C_C_T_(ALPHA, isALPHA(value), "Alpha");
	7387	case _C_C_T_(BLANK, isBLANK(value), "Blank");
	7388	case _C_C_T_(CNTRL, isCNTRL(value), "Cntrl");
	7389	case _C_C_T_(GRAPH, isGRAPH(value), "Graph");
	7390	case _C_C_T_(LOWER, isLOWER(value), "Lower");
	7391	case _C_C_T_(PRINT, isPRINT(value), "Print");
	7392	case _C_C_T_(PSXSPC, isPSXSPC(value), "Space");
	7393	case _C_C_T_(PUNCT, isPUNCT(value), "Punct");
	7394	case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
	7395	case _C_C_T_(UPPER, isUPPER(value), "Upper");
	7396	case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
	7397	case ANYOF_ASCII:
	7398	if (LOC)
	7399	ANYOF_CLASS_SET(ret, ANYOF_ASCII);
	7400	else {
	7401	#ifndef EBCDIC
	7402	for (value = 0; value < 128; value++)
	7403	ANYOF_BITMAP_SET(ret, value);
	7404	#else /* EBCDIC */
	7405	for (value = 0; value < 256; value++) {
	7406	if (isASCII(value))
	7407	ANYOF_BITMAP_SET(ret, value);
	7408	}
	7409	#endif /* EBCDIC */
	7410	}
	7411	yesno = '+';
	7412	what = "ASCII";
	7413	break;
	7414	case ANYOF_NASCII:
	7415	if (LOC)
	7416	ANYOF_CLASS_SET(ret, ANYOF_NASCII);
	7417	else {
	7418	#ifndef EBCDIC
	7419	for (value = 128; value < 256; value++)
	7420	ANYOF_BITMAP_SET(ret, value);
	7421	#else /* EBCDIC */
	7422	for (value = 0; value < 256; value++) {
	7423	if (!isASCII(value))
	7424	ANYOF_BITMAP_SET(ret, value);
	7425	}
	7426	#endif /* EBCDIC */
	7427	}
	7428	yesno = '!';
	7429	what = "ASCII";
	7430	break;
	7431	case ANYOF_DIGIT:
	7432	if (LOC)
	7433	ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
	7434	else {
	7435	/* consecutive digits assumed */
	7436	for (value = '0'; value <= '9'; value++)
	7437	ANYOF_BITMAP_SET(ret, value);
	7438	}
	7439	yesno = '+';
	7440	what = "Digit";
	7441	break;
	7442	case ANYOF_NDIGIT:
	7443	if (LOC)
	7444	ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
	7445	else {
	7446	/* consecutive digits assumed */
	7447	for (value = 0; value < '0'; value++)
	7448	ANYOF_BITMAP_SET(ret, value);
	7449	for (value = '9' + 1; value < 256; value++)
	7450	ANYOF_BITMAP_SET(ret, value);
	7451	}
	7452	yesno = '!';
	7453	what = "Digit";
	7454	break;
	7455	case ANYOF_MAX:
	7456	/* this is to handle \p and \P */
	7457	break;
	7458	default:
	7459	vFAIL("Invalid [::] class");
	7460	break;
	7461	}
	7462	if (what) {
	7463	/* Strings such as "+utf8::isWord\n" */
	7464	Perl_sv_catpvf(aTHX_ listsv, "%cutf8::Is%s\n", yesno, what);
	7465	}
	7466	if (LOC)
	7467	ANYOF_FLAGS(ret) \|= ANYOF_CLASS;
	7468	continue;
	7469	}
	7470	} /* end of namedclass \blah */
	7471
	7472	if (range) {
	7473	if (prevvalue > (IV)value) /* b-a */ {
	7474	const int w = RExC_parse - rangebegin;
	7475	Simple_vFAIL4("Invalid [] range \"%.s\"", w, w, rangebegin);
	7476	range = 0; /* not a valid range */
	7477	}
	7478	}
	7479	else {
	7480	prevvalue = value; /* save the beginning of the range */
	7481	if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
	7482	RExC_parse[1] != ']') {
	7483	RExC_parse++;
	7484
	7485	/* a bad range like \w-, [:word:]- ? */
	7486	if (namedclass > OOB_NAMEDCLASS) {
	7487	if (ckWARN(WARN_REGEXP)) {
	7488	const int w =
	7489	RExC_parse >= rangebegin ?
	7490	RExC_parse - rangebegin : 0;
	7491	vWARN4(RExC_parse,
	7492	"False [] range \"%.s\"",
	7493	w, w, rangebegin);
	7494	}
	7495	if (!SIZE_ONLY)
	7496	ANYOF_BITMAP_SET(ret, '-');
	7497	} else
	7498	range = 1; /* yeah, it's a range! */
	7499	continue; /* but do it the next time */
	7500	}
	7501	}
	7502
	7503	/* now is the next time */
	7504	/stored += (value - prevvalue + 1);/
	7505	if (!SIZE_ONLY) {
	7506	if (prevvalue < 256) {
	7507	const IV ceilvalue = value < 256 ? value : 255;
	7508	IV i;
	7509	#ifdef EBCDIC
	7510	/* In EBCDIC [\x89-\x91] should include
	7511	* the \x8e but [i-j] should not. */
	7512	if (literal_endpoint == 2 &&
	7513	((isLOWER(prevvalue) && isLOWER(ceilvalue)) \|\|
	7514	(isUPPER(prevvalue) && isUPPER(ceilvalue))))
	7515	{
	7516	if (isLOWER(prevvalue)) {
	7517	for (i = prevvalue; i <= ceilvalue; i++)
	7518	if (isLOWER(i))
	7519	ANYOF_BITMAP_SET(ret, i);
	7520	} else {
	7521	for (i = prevvalue; i <= ceilvalue; i++)
	7522	if (isUPPER(i))
	7523	ANYOF_BITMAP_SET(ret, i);
	7524	}
	7525	}
	7526	else
	7527	#endif
	7528	for (i = prevvalue; i <= ceilvalue; i++) {
	7529	if (!ANYOF_BITMAP_TEST(ret,i)) {
	7530	stored++;
	7531	ANYOF_BITMAP_SET(ret, i);
	7532	}
	7533	}
	7534	}
	7535	if (value > 255 \|\| UTF) {
	7536	const UV prevnatvalue = NATIVE_TO_UNI(prevvalue);
	7537	const UV natvalue = NATIVE_TO_UNI(value);
	7538	stored+=2; /* can't optimize this class */
	7539	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	7540	if (prevnatvalue < natvalue) { /* what about > ? */
	7541	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
	7542	prevnatvalue, natvalue);
	7543	}
	7544	else if (prevnatvalue == natvalue) {
	7545	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", natvalue);
	7546	if (FOLD) {
	7547	U8 foldbuf[UTF8_MAXBYTES_CASE+1];
	7548	STRLEN foldlen;
	7549	const UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
	7550
	7551	#ifdef EBCDIC /* RD t/uni/fold ff and 6b */
	7552	if (RExC_precomp[0] == ':' &&
	7553	RExC_precomp[1] == '[' &&
	7554	(f == 0xDF \|\| f == 0x92)) {
	7555	f = NATIVE_TO_UNI(f);
	7556	}
	7557	#endif
	7558	/* If folding and foldable and a single
	7559	* character, insert also the folded version
	7560	* to the charclass. */
	7561	if (f != value) {
	7562	#ifdef EBCDIC /* RD tunifold ligatures s,t fb05, fb06 */
	7563	if ((RExC_precomp[0] == ':' &&
	7564	RExC_precomp[1] == '[' &&
	7565	(f == 0xA2 &&
	7566	(value == 0xFB05 \|\| value == 0xFB06))) ?
	7567	foldlen == ((STRLEN)UNISKIP(f) - 1) :
	7568	foldlen == (STRLEN)UNISKIP(f) )
	7569	#else
	7570	if (foldlen == (STRLEN)UNISKIP(f))
	7571	#endif
	7572	Perl_sv_catpvf(aTHX_ listsv,
	7573	"%04"UVxf"\n", f);
	7574	else {
	7575	/* Any multicharacter foldings
	7576	* require the following transform:
	7577	* [ABCDEF] -> (?:[ABCabcDEFd]\|pq\|rst)
	7578	* where E folds into "pq" and F folds
	7579	* into "rst", all other characters
	7580	* fold to single characters. We save
	7581	* away these multicharacter foldings,
	7582	* to be later saved as part of the
	7583	* additional "s" data. */
	7584	SV *sv;
	7585
	7586	if (!unicode_alternate)
	7587	unicode_alternate = newAV();
	7588	sv = newSVpvn((char*)foldbuf, foldlen);
	7589	SvUTF8_on(sv);
	7590	av_push(unicode_alternate, sv);
	7591	}
	7592	}
	7593
	7594	/* If folding and the value is one of the Greek
	7595	* sigmas insert a few more sigmas to make the
	7596	* folding rules of the sigmas to work right.
	7597	* Note that not all the possible combinations
	7598	* are handled here: some of them are handled
	7599	* by the standard folding rules, and some of
	7600	* them (literal or EXACTF cases) are handled
	7601	* during runtime in regexec.c:S_find_byclass(). */
	7602	if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
	7603	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	7604	(UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
	7605	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	7606	(UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
	7607	}
	7608	else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
	7609	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	7610	(UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
	7611	}
	7612	}
	7613	}
	7614	#ifdef EBCDIC
	7615	literal_endpoint = 0;
	7616	#endif
	7617	}
	7618
	7619	range = 0; /* this range (if it was one) is done now */
	7620	}
	7621
	7622	if (need_class) {
	7623	ANYOF_FLAGS(ret) \|= ANYOF_LARGE;
	7624	if (SIZE_ONLY)
	7625	RExC_size += ANYOF_CLASS_ADD_SKIP;
	7626	else
	7627	RExC_emit += ANYOF_CLASS_ADD_SKIP;
	7628	}
	7629
	7630
	7631	if (SIZE_ONLY)
	7632	return ret;
	7633	/**** !SIZE_ONLY AFTER HERE *******/
	7634
	7635	if( stored == 1 && value < 256
	7636	&& !( ANYOF_FLAGS(ret) & ( ANYOF_FLAGS_ALL ^ ANYOF_FOLD ) )
	7637	) {
	7638	/* optimize single char class to an EXACT node
	7639	but only when its not a UTF/high char */
	7640	const char * cur_parse= RExC_parse;
	7641	RExC_emit = (regnode *)orig_emit;
	7642	RExC_parse = (char *)orig_parse;
	7643	ret = reg_node(pRExC_state,
	7644	(U8)((ANYOF_FLAGS(ret) & ANYOF_FOLD) ? EXACTF : EXACT));
	7645	RExC_parse = (char *)cur_parse;
	7646	*STRING(ret)= (char)value;
	7647	STR_LEN(ret)= 1;
	7648	RExC_emit += STR_SZ(1);
	7649	return ret;
	7650	}
	7651	/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
	7652	if ( /* If the only flag is folding (plus possibly inversion). */
	7653	((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD)
	7654	) {
	7655	for (value = 0; value < 256; ++value) {
	7656	if (ANYOF_BITMAP_TEST(ret, value)) {
	7657	UV fold = PL_fold[value];
	7658
	7659	if (fold != value)
	7660	ANYOF_BITMAP_SET(ret, fold);
	7661	}
	7662	}
	7663	ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
	7664	}
	7665
	7666	/* optimize inverted simple patterns (e.g. [^a-z]) */
	7667	if (optimize_invert &&
	7668	/* If the only flag is inversion. */
	7669	(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
	7670	for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
	7671	ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
	7672	ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
	7673	}
	7674	{
	7675	AV * const av = newAV();
	7676	SV *rv;
	7677	/* The 0th element stores the character class description
	7678	* in its textual form: used later (regexec.c:Perl_regclass_swash())
	7679	* to initialize the appropriate swash (which gets stored in
	7680	* the 1st element), and also useful for dumping the regnode.
	7681	* The 2nd element stores the multicharacter foldings,
	7682	* used later (regexec.c:S_reginclass()). */
	7683	av_store(av, 0, listsv);
	7684	av_store(av, 1, NULL);
	7685	av_store(av, 2, (SV*)unicode_alternate);
	7686	rv = newRV_noinc((SV*)av);
	7687	n = add_data(pRExC_state, 1, "s");
	7688	RExC_rxi->data->data[n] = (void*)rv;
	7689	ARG_SET(ret, n);
	7690	}
	7691	return ret;
	7692	}
	7693	#undef _C_C_T_
	7694
	7695
	7696	STATIC char*
	7697	S_nextchar(pTHX_ RExC_state_t *pRExC_state)
	7698	{
	7699	char* const retval = RExC_parse++;
	7700
	7701	for (;;) {
	7702	if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
	7703	RExC_parse[2] == '#') {
	7704	while (*RExC_parse != ')') {
	7705	if (RExC_parse == RExC_end)
	7706	FAIL("Sequence (?#... not terminated");
	7707	RExC_parse++;
	7708	}
	7709	RExC_parse++;
	7710	continue;
	7711	}
	7712	if (RExC_flags & RXf_PMf_EXTENDED) {
	7713	if (isSPACE(*RExC_parse)) {
	7714	RExC_parse++;
	7715	continue;
	7716	}
	7717	else if (*RExC_parse == '#') {
	7718	while (RExC_parse < RExC_end)
	7719	if (*RExC_parse++ == '\n') break;
	7720	continue;
	7721	}
	7722	}
	7723	return retval;
	7724	}
	7725	}
	7726
	7727	/*
	7728	- reg_node - emit a node
	7729	*/
	7730	STATIC regnode * /* Location. */
	7731	S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
	7732	{
	7733	dVAR;
	7734	register regnode *ptr;
	7735	regnode * const ret = RExC_emit;
	7736	GET_RE_DEBUG_FLAGS_DECL;
	7737
	7738	if (SIZE_ONLY) {
	7739	SIZE_ALIGN(RExC_size);
	7740	RExC_size += 1;
	7741	return(ret);
	7742	}
	7743	#ifdef DEBUGGING
	7744	if (OP(RExC_emit) == 255)
	7745	Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %s: %d ",
	7746	reg_name[op], OP(RExC_emit));
	7747	#endif
	7748	NODE_ALIGN_FILL(ret);
	7749	ptr = ret;
	7750	FILL_ADVANCE_NODE(ptr, op);
	7751	if (RExC_offsets) { /* MJD */
	7752	MJD_OFFSET_DEBUG(("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n",
	7753	"reg_node", __LINE__,
	7754	reg_name[op],
	7755	(UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0]
	7756	? "Overwriting end of array!\n" : "OK",
	7757	(UV)(RExC_emit - RExC_emit_start),
	7758	(UV)(RExC_parse - RExC_start),
	7759	(UV)RExC_offsets[0]));
	7760	Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
	7761	}
	7762
	7763	RExC_emit = ptr;
	7764	return(ret);
	7765	}
	7766
	7767	/*
	7768	- reganode - emit a node with an argument
	7769	*/
	7770	STATIC regnode * /* Location. */
	7771	S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
	7772	{
	7773	dVAR;
	7774	register regnode *ptr;
	7775	regnode * const ret = RExC_emit;
	7776	GET_RE_DEBUG_FLAGS_DECL;
	7777
	7778	if (SIZE_ONLY) {
	7779	SIZE_ALIGN(RExC_size);
	7780	RExC_size += 2;
	7781	/*
	7782	We can't do this:
	7783
	7784	assert(2==regarglen[op]+1);
	7785
	7786	Anything larger than this has to allocate the extra amount.
	7787	If we changed this to be:
	7788
	7789	RExC_size += (1 + regarglen[op]);
	7790
	7791	then it wouldn't matter. Its not clear what side effect
	7792	might come from that so its not done so far.
	7793	-- dmq
	7794	*/
	7795	return(ret);
	7796	}
	7797	#ifdef DEBUGGING
	7798	if (OP(RExC_emit) == 255)
	7799	Perl_croak(aTHX_ "panic: reganode overwriting end of allocated program space");
	7800	#endif
	7801	NODE_ALIGN_FILL(ret);
	7802	ptr = ret;
	7803	FILL_ADVANCE_NODE_ARG(ptr, op, arg);
	7804	if (RExC_offsets) { /* MJD */
	7805	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
	7806	"reganode",
	7807	__LINE__,
	7808	reg_name[op],
	7809	(UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0] ?
	7810	"Overwriting end of array!\n" : "OK",
	7811	(UV)(RExC_emit - RExC_emit_start),
	7812	(UV)(RExC_parse - RExC_start),
	7813	(UV)RExC_offsets[0]));
	7814	Set_Cur_Node_Offset;
	7815	}
	7816
	7817	RExC_emit = ptr;
	7818	return(ret);
	7819	}
	7820
	7821	/*
	7822	- reguni - emit (if appropriate) a Unicode character
	7823	*/
	7824	STATIC STRLEN
	7825	S_reguni(pTHX_ const RExC_state_t pRExC_state, UV uv, char s)
	7826	{
	7827	dVAR;
	7828	return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8)s, uv) - (U8)s);
	7829	}
	7830
	7831	/*
	7832	- reginsert - insert an operator in front of already-emitted operand
	7833	*
	7834	* Means relocating the operand.
	7835	*/
	7836	STATIC void
	7837	S_reginsert(pTHX_ RExC_state_t pRExC_state, U8 op, regnode opnd, U32 depth)
	7838	{
	7839	dVAR;
	7840	register regnode *src;
	7841	register regnode *dst;
	7842	register regnode *place;
	7843	const int offset = regarglen[(U8)op];
	7844	const int size = NODE_STEP_REGNODE + offset;
	7845	GET_RE_DEBUG_FLAGS_DECL;
	7846	/* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
	7847	DEBUG_PARSE_FMT("inst"," - %s",reg_name[op]);
	7848	if (SIZE_ONLY) {
	7849	RExC_size += size;
	7850	return;
	7851	}
	7852
	7853	src = RExC_emit;
	7854	RExC_emit += size;
	7855	dst = RExC_emit;
	7856	if (RExC_open_parens) {
	7857	int paren;
	7858	DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);
	7859	for ( paren=0 ; paren < RExC_npar ; paren++ ) {
	7860	if ( RExC_open_parens[paren] >= opnd ) {
	7861	DEBUG_PARSE_FMT("open"," - %d",size);
	7862	RExC_open_parens[paren] += size;
	7863	} else {
	7864	DEBUG_PARSE_FMT("open"," - %s","ok");
	7865	}
	7866	if ( RExC_close_parens[paren] >= opnd ) {
	7867	DEBUG_PARSE_FMT("close"," - %d",size);
	7868	RExC_close_parens[paren] += size;
	7869	} else {
	7870	DEBUG_PARSE_FMT("close"," - %s","ok");
	7871	}
	7872	}
	7873	}
	7874
	7875	while (src > opnd) {
	7876	StructCopy(--src, --dst, regnode);
	7877	if (RExC_offsets) { /* MJD 20010112 */
	7878	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n",
	7879	"reg_insert",
	7880	__LINE__,
	7881	reg_name[op],
	7882	(UV)(dst - RExC_emit_start) > RExC_offsets[0]
	7883	? "Overwriting end of array!\n" : "OK",
	7884	(UV)(src - RExC_emit_start),
	7885	(UV)(dst - RExC_emit_start),
	7886	(UV)RExC_offsets[0]));
	7887	Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
	7888	Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
	7889	}
	7890	}
	7891
	7892
	7893	place = opnd; /* Op node, where operand used to be. */
	7894	if (RExC_offsets) { /* MJD */
	7895	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
	7896	"reginsert",
	7897	__LINE__,
	7898	reg_name[op],
	7899	(UV)(place - RExC_emit_start) > RExC_offsets[0]
	7900	? "Overwriting end of array!\n" : "OK",
	7901	(UV)(place - RExC_emit_start),
	7902	(UV)(RExC_parse - RExC_start),
	7903	(UV)RExC_offsets[0]));
	7904	Set_Node_Offset(place, RExC_parse);
	7905	Set_Node_Length(place, 1);
	7906	}
	7907	src = NEXTOPER(place);
	7908	FILL_ADVANCE_NODE(place, op);
	7909	Zero(src, offset, regnode);
	7910	}
	7911
	7912	/*
	7913	- regtail - set the next-pointer at the end of a node chain of p to val.
	7914	- SEE ALSO: regtail_study
	7915	*/
	7916	/* TODO: All three parms should be const */
	7917	STATIC void
	7918	S_regtail(pTHX_ RExC_state_t pRExC_state, regnode p, const regnode *val,U32 depth)
	7919	{
	7920	dVAR;
	7921	register regnode *scan;
	7922	GET_RE_DEBUG_FLAGS_DECL;
	7923	#ifndef DEBUGGING
	7924	PERL_UNUSED_ARG(depth);
	7925	#endif
	7926
	7927	if (SIZE_ONLY)
	7928	return;
	7929
	7930	/* Find last node. */
	7931	scan = p;
	7932	for (;;) {
	7933	regnode * const temp = regnext(scan);
	7934	DEBUG_PARSE_r({
	7935	SV * const mysv=sv_newmortal();
	7936	DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
	7937	regprop(RExC_rx, mysv, scan);
	7938	PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n",
	7939	SvPV_nolen_const(mysv), REG_NODE_NUM(scan),
	7940	(temp == NULL ? "->" : ""),
	7941	(temp == NULL ? reg_name[OP(val)] : "")
	7942	);
	7943	});
	7944	if (temp == NULL)
	7945	break;
	7946	scan = temp;
	7947	}
	7948
	7949	if (reg_off_by_arg[OP(scan)]) {
	7950	ARG_SET(scan, val - scan);
	7951	}
	7952	else {
	7953	NEXT_OFF(scan) = val - scan;
	7954	}
	7955	}
	7956
	7957	#ifdef DEBUGGING
	7958	/*
	7959	- regtail_study - set the next-pointer at the end of a node chain of p to val.
	7960	- Look for optimizable sequences at the same time.
	7961	- currently only looks for EXACT chains.
	7962
	7963	This is expermental code. The idea is to use this routine to perform
	7964	in place optimizations on branches and groups as they are constructed,
	7965	with the long term intention of removing optimization from study_chunk so
	7966	that it is purely analytical.
	7967
	7968	Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
	7969	to control which is which.
	7970
	7971	*/
	7972	/* TODO: All four parms should be const */
	7973
	7974	STATIC U8
	7975	S_regtail_study(pTHX_ RExC_state_t pRExC_state, regnode p, const regnode *val,U32 depth)
	7976	{
	7977	dVAR;
	7978	register regnode *scan;
	7979	U8 exact = PSEUDO;
	7980	#ifdef EXPERIMENTAL_INPLACESCAN
	7981	I32 min = 0;
	7982	#endif
	7983
	7984	GET_RE_DEBUG_FLAGS_DECL;
	7985
	7986
	7987	if (SIZE_ONLY)
	7988	return exact;
	7989
	7990	/* Find last node. */
	7991
	7992	scan = p;
	7993	for (;;) {
	7994	regnode * const temp = regnext(scan);
	7995	#ifdef EXPERIMENTAL_INPLACESCAN
	7996	if (PL_regkind[OP(scan)] == EXACT)
	7997	if (join_exact(pRExC_state,scan,&min,1,val,depth+1))
	7998	return EXACT;
	7999	#endif
	8000	if ( exact ) {
	8001	switch (OP(scan)) {
	8002	case EXACT:
	8003	case EXACTF:
	8004	case EXACTFL:
	8005	if( exact == PSEUDO )
	8006	exact= OP(scan);
	8007	else if ( exact != OP(scan) )
	8008	exact= 0;
	8009	case NOTHING:
	8010	break;
	8011	default:
	8012	exact= 0;
	8013	}
	8014	}
	8015	DEBUG_PARSE_r({
	8016	SV * const mysv=sv_newmortal();
	8017	DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
	8018	regprop(RExC_rx, mysv, scan);
	8019	PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n",
	8020	SvPV_nolen_const(mysv),
	8021	REG_NODE_NUM(scan),
	8022	reg_name[exact]);
	8023	});
	8024	if (temp == NULL)
	8025	break;
	8026	scan = temp;
	8027	}
	8028	DEBUG_PARSE_r({
	8029	SV * const mysv_val=sv_newmortal();
	8030	DEBUG_PARSE_MSG("");
	8031	regprop(RExC_rx, mysv_val, val);
	8032	PerlIO_printf(Perl_debug_log, "~ attach to %s (%"IVdf") offset to %"IVdf"\n",
	8033	SvPV_nolen_const(mysv_val),
	8034	(IV)REG_NODE_NUM(val),
	8035	(IV)(val - scan)
	8036	);
	8037	});
	8038	if (reg_off_by_arg[OP(scan)]) {
	8039	ARG_SET(scan, val - scan);
	8040	}
	8041	else {
	8042	NEXT_OFF(scan) = val - scan;
	8043	}
	8044
	8045	return exact;
	8046	}
	8047	#endif
	8048
	8049	/*
	8050	- regcurly - a little FSA that accepts {\d+,?\d*}
	8051	*/
	8052	STATIC I32
	8053	S_regcurly(register const char *s)
	8054	{
	8055	if (*s++ != '{')
	8056	return FALSE;
	8057	if (!isDIGIT(*s))
	8058	return FALSE;
	8059	while (isDIGIT(*s))
	8060	s++;
	8061	if (*s == ',')
	8062	s++;
	8063	while (isDIGIT(*s))
	8064	s++;
	8065	if (*s != '}')
	8066	return FALSE;
	8067	return TRUE;
	8068	}
	8069
	8070
	8071	/*
	8072	- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
	8073	*/
	8074	void
	8075	Perl_regdump(pTHX_ const regexp *r)
	8076	{
	8077	#ifdef DEBUGGING
	8078	dVAR;
	8079	SV * const sv = sv_newmortal();
	8080	SV *dsv= sv_newmortal();
	8081	RXi_GET_DECL(r,ri);
	8082
	8083	(void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
	8084
	8085	/* Header fields of interest. */
	8086	if (r->anchored_substr) {
	8087	RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr),
	8088	RE_SV_DUMPLEN(r->anchored_substr), 30);
	8089	PerlIO_printf(Perl_debug_log,
	8090	"anchored %s%s at %"IVdf" ",
	8091	s, RE_SV_TAIL(r->anchored_substr),
	8092	(IV)r->anchored_offset);
	8093	} else if (r->anchored_utf8) {
	8094	RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8),
	8095	RE_SV_DUMPLEN(r->anchored_utf8), 30);
	8096	PerlIO_printf(Perl_debug_log,
	8097	"anchored utf8 %s%s at %"IVdf" ",
	8098	s, RE_SV_TAIL(r->anchored_utf8),
	8099	(IV)r->anchored_offset);
	8100	}
	8101	if (r->float_substr) {
	8102	RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr),
	8103	RE_SV_DUMPLEN(r->float_substr), 30);
	8104	PerlIO_printf(Perl_debug_log,
	8105	"floating %s%s at %"IVdf"..%"UVuf" ",
	8106	s, RE_SV_TAIL(r->float_substr),
	8107	(IV)r->float_min_offset, (UV)r->float_max_offset);
	8108	} else if (r->float_utf8) {
	8109	RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8),
	8110	RE_SV_DUMPLEN(r->float_utf8), 30);
	8111	PerlIO_printf(Perl_debug_log,
	8112	"floating utf8 %s%s at %"IVdf"..%"UVuf" ",
	8113	s, RE_SV_TAIL(r->float_utf8),
	8114	(IV)r->float_min_offset, (UV)r->float_max_offset);
	8115	}
	8116	if (r->check_substr \|\| r->check_utf8)
	8117	PerlIO_printf(Perl_debug_log,
	8118	(const char *)
	8119	(r->check_substr == r->float_substr
	8120	&& r->check_utf8 == r->float_utf8
	8121	? "(checking floating" : "(checking anchored"));
	8122	if (r->extflags & RXf_NOSCAN)
	8123	PerlIO_printf(Perl_debug_log, " noscan");
	8124	if (r->extflags & RXf_CHECK_ALL)
	8125	PerlIO_printf(Perl_debug_log, " isall");
	8126	if (r->check_substr \|\| r->check_utf8)
	8127	PerlIO_printf(Perl_debug_log, ") ");
	8128
	8129	if (ri->regstclass) {
	8130	regprop(r, sv, ri->regstclass);
	8131	PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv));
	8132	}
	8133	if (r->extflags & RXf_ANCH) {
	8134	PerlIO_printf(Perl_debug_log, "anchored");
	8135	if (r->extflags & RXf_ANCH_BOL)
	8136	PerlIO_printf(Perl_debug_log, "(BOL)");
	8137	if (r->extflags & RXf_ANCH_MBOL)
	8138	PerlIO_printf(Perl_debug_log, "(MBOL)");
	8139	if (r->extflags & RXf_ANCH_SBOL)
	8140	PerlIO_printf(Perl_debug_log, "(SBOL)");
	8141	if (r->extflags & RXf_ANCH_GPOS)
	8142	PerlIO_printf(Perl_debug_log, "(GPOS)");
	8143	PerlIO_putc(Perl_debug_log, ' ');
	8144	}
	8145	if (r->extflags & RXf_GPOS_SEEN)
	8146	PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", (UV)r->gofs);
	8147	if (r->intflags & PREGf_SKIP)
	8148	PerlIO_printf(Perl_debug_log, "plus ");
	8149	if (r->intflags & PREGf_IMPLICIT)
	8150	PerlIO_printf(Perl_debug_log, "implicit ");
	8151	PerlIO_printf(Perl_debug_log, "minlen %"IVdf" ", (IV)r->minlen);
	8152	if (r->extflags & RXf_EVAL_SEEN)
	8153	PerlIO_printf(Perl_debug_log, "with eval ");
	8154	PerlIO_printf(Perl_debug_log, "\n");
	8155	#else
	8156	PERL_UNUSED_CONTEXT;
	8157	PERL_UNUSED_ARG(r);
	8158	#endif /* DEBUGGING */
	8159	}
	8160
	8161	/*
	8162	- regprop - printable representation of opcode
	8163	*/
	8164	void
	8165	Perl_regprop(pTHX_ const regexp prog, SV sv, const regnode *o)
	8166	{
	8167	#ifdef DEBUGGING
	8168	dVAR;
	8169	register int k;
	8170	RXi_GET_DECL(prog,progi);
	8171	GET_RE_DEBUG_FLAGS_DECL;
	8172
	8173
	8174	sv_setpvn(sv, "", 0);
	8175
	8176	if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */
	8177	/* It would be nice to FAIL() here, but this may be called from
	8178	regexec.c, and it would be hard to supply pRExC_state. */
	8179	Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", (int)OP(o), (int)REGNODE_MAX);
	8180	sv_catpv(sv, reg_name[OP(o)]); /* Take off const! */
	8181
	8182	k = PL_regkind[OP(o)];
	8183
	8184	if (k == EXACT) {
	8185	SV * const dsv = sv_2mortal(newSVpvs(""));
	8186	/* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
	8187	* is a crude hack but it may be the best for now since
	8188	* we have no flag "this EXACTish node was UTF-8"
	8189	* --jhi */
	8190	const char * const s =
	8191	pv_pretty(dsv, STRING(o), STR_LEN(o), 60,
	8192	PL_colors[0], PL_colors[1],
	8193	PERL_PV_ESCAPE_UNI_DETECT \|
	8194	PERL_PV_PRETTY_ELIPSES \|
	8195	PERL_PV_PRETTY_LTGT
	8196	);
	8197	Perl_sv_catpvf(aTHX_ sv, " %s", s );
	8198	} else if (k == TRIE) {
	8199	/* print the details of the trie in dumpuntil instead, as
	8200	* progi->data isn't available here */
	8201	const char op = OP(o);
	8202	const U32 n = ARG(o);
	8203	const reg_ac_data * const ac = IS_TRIE_AC(op) ?
	8204	(reg_ac_data *)progi->data->data[n] :
	8205	NULL;
	8206	const reg_trie_data * const trie
	8207	= (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
	8208
	8209	Perl_sv_catpvf(aTHX_ sv, "-%s",reg_name[o->flags]);
	8210	DEBUG_TRIE_COMPILE_r(
	8211	Perl_sv_catpvf(aTHX_ sv,
	8212	"<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
	8213	(UV)trie->startstate,
	8214	(IV)trie->statecount-1, /* -1 because of the unused 0 element */
	8215	(UV)trie->wordcount,
	8216	(UV)trie->minlen,
	8217	(UV)trie->maxlen,
	8218	(UV)TRIE_CHARCOUNT(trie),
	8219	(UV)trie->uniquecharcount
	8220	)
	8221	);
	8222	if ( IS_ANYOF_TRIE(op) \|\| trie->bitmap ) {
	8223	int i;
	8224	int rangestart = -1;
	8225	U8* bitmap = IS_ANYOF_TRIE(op) ? (U8)ANYOF_BITMAP(o) : (U8)TRIE_BITMAP(trie);
	8226	Perl_sv_catpvf(aTHX_ sv, "[");
	8227	for (i = 0; i <= 256; i++) {
	8228	if (i < 256 && BITMAP_TEST(bitmap,i)) {
	8229	if (rangestart == -1)
	8230	rangestart = i;
	8231	} else if (rangestart != -1) {
	8232	if (i <= rangestart + 3)
	8233	for (; rangestart < i; rangestart++)
	8234	put_byte(sv, rangestart);
	8235	else {
	8236	put_byte(sv, rangestart);
	8237	sv_catpvs(sv, "-");
	8238	put_byte(sv, i - 1);
	8239	}
	8240	rangestart = -1;
	8241	}
	8242	}
	8243	Perl_sv_catpvf(aTHX_ sv, "]");
	8244	}
	8245
	8246	} else if (k == CURLY) {
	8247	if (OP(o) == CURLYM \|\| OP(o) == CURLYN \|\| OP(o) == CURLYX)
	8248	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
	8249	Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
	8250	}
	8251	else if (k == WHILEM && o->flags) /* Ordinal/of */
	8252	Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
	8253	else if (k == REF \|\| k == OPEN \|\| k == CLOSE \|\| k == GROUPP \|\| OP(o)==ACCEPT) {
	8254	Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */
	8255	if ( prog->paren_names ) {
	8256	if ( k != REF \|\| OP(o) < NREF) {
	8257	AV list= (AV )progi->data->data[progi->name_list_idx];
	8258	SV **name= av_fetch(list, ARG(o), 0 );
	8259	if (name)
	8260	Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
	8261	}
	8262	else {
	8263	AV list= (AV )progi->data->data[ progi->name_list_idx ];
	8264	SV sv_dat=(SV)progi->data->data[ ARG( o ) ];
	8265	I32 nums=(I32)SvPVX(sv_dat);
	8266	SV **name= av_fetch(list, nums[0], 0 );
	8267	I32 n;
	8268	if (name) {
	8269	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	8270	Perl_sv_catpvf(aTHX_ sv, "%s%"IVdf,
	8271	(n ? "," : ""), (IV)nums[n]);
	8272	}
	8273	Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
	8274	}
	8275	}
	8276	}
	8277	} else if (k == GOSUB)
	8278	Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); /* Paren and offset */
	8279	else if (k == VERB) {
	8280	if (!o->flags)
	8281	Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
	8282	SVfARG((SV*)progi->data->data[ ARG( o ) ]));
	8283	} else if (k == LOGICAL)
	8284	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
	8285	else if (k == ANYOF) {
	8286	int i, rangestart = -1;
	8287	const U8 flags = ANYOF_FLAGS(o);
	8288
	8289	/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
	8290	static const char * const anyofs[] = {
	8291	"\\w",
	8292	"\\W",
	8293	"\\s",
	8294	"\\S",
	8295	"\\d",
	8296	"\\D",
	8297	"[:alnum:]",
	8298	"[:^alnum:]",
	8299	"[:alpha:]",
	8300	"[:^alpha:]",
	8301	"[:ascii:]",
	8302	"[:^ascii:]",
	8303	"[:ctrl:]",
	8304	"[:^ctrl:]",
	8305	"[:graph:]",
	8306	"[:^graph:]",
	8307	"[:lower:]",
	8308	"[:^lower:]",
	8309	"[:print:]",
	8310	"[:^print:]",
	8311	"[:punct:]",
	8312	"[:^punct:]",
	8313	"[:upper:]",
	8314	"[:^upper:]",
	8315	"[:xdigit:]",
	8316	"[:^xdigit:]",
	8317	"[:space:]",
	8318	"[:^space:]",
	8319	"[:blank:]",
	8320	"[:^blank:]"
	8321	};
	8322
	8323	if (flags & ANYOF_LOCALE)
	8324	sv_catpvs(sv, "{loc}");
	8325	if (flags & ANYOF_FOLD)
	8326	sv_catpvs(sv, "{i}");
	8327	Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
	8328	if (flags & ANYOF_INVERT)
	8329	sv_catpvs(sv, "^");
	8330	for (i = 0; i <= 256; i++) {
	8331	if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
	8332	if (rangestart == -1)
	8333	rangestart = i;
	8334	} else if (rangestart != -1) {
	8335	if (i <= rangestart + 3)
	8336	for (; rangestart < i; rangestart++)
	8337	put_byte(sv, rangestart);
	8338	else {
	8339	put_byte(sv, rangestart);
	8340	sv_catpvs(sv, "-");
	8341	put_byte(sv, i - 1);
	8342	}
	8343	rangestart = -1;
	8344	}
	8345	}
	8346
	8347	if (o->flags & ANYOF_CLASS)
	8348	for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
	8349	if (ANYOF_CLASS_TEST(o,i))
	8350	sv_catpv(sv, anyofs[i]);
	8351
	8352	if (flags & ANYOF_UNICODE)
	8353	sv_catpvs(sv, "{unicode}");
	8354	else if (flags & ANYOF_UNICODE_ALL)
	8355	sv_catpvs(sv, "{unicode_all}");
	8356
	8357	{
	8358	SV *lv;
	8359	SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
	8360
	8361	if (lv) {
	8362	if (sw) {
	8363	U8 s[UTF8_MAXBYTES_CASE+1];
	8364
	8365	for (i = 0; i <= 256; i++) { /* just the first 256 */
	8366	uvchr_to_utf8(s, i);
	8367
	8368	if (i < 256 && swash_fetch(sw, s, TRUE)) {
	8369	if (rangestart == -1)
	8370	rangestart = i;
	8371	} else if (rangestart != -1) {
	8372	if (i <= rangestart + 3)
	8373	for (; rangestart < i; rangestart++) {
	8374	const U8 * const e = uvchr_to_utf8(s,rangestart);
	8375	U8 *p;
	8376	for(p = s; p < e; p++)
	8377	put_byte(sv, *p);
	8378	}
	8379	else {
	8380	const U8 *e = uvchr_to_utf8(s,rangestart);
	8381	U8 *p;
	8382	for (p = s; p < e; p++)
	8383	put_byte(sv, *p);
	8384	sv_catpvs(sv, "-");
	8385	e = uvchr_to_utf8(s, i-1);
	8386	for (p = s; p < e; p++)
	8387	put_byte(sv, *p);
	8388	}
	8389	rangestart = -1;
	8390	}
	8391	}
	8392
	8393	sv_catpvs(sv, "..."); /* et cetera */
	8394	}
	8395
	8396	{
	8397	char *s = savesvpv(lv);
	8398	char * const origs = s;
	8399
	8400	while (s && s != '\n')
	8401	s++;
	8402
	8403	if (*s == '\n') {
	8404	const char * const t = ++s;
	8405
	8406	while (*s) {
	8407	if (*s == '\n')
	8408	*s = ' ';
	8409	s++;
	8410	}
	8411	if (s[-1] == ' ')
	8412	s[-1] = 0;
	8413
	8414	sv_catpv(sv, t);
	8415	}
	8416
	8417	Safefree(origs);
	8418	}
	8419	}
	8420	}
	8421
	8422	Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
	8423	}
	8424	else if (k == BRANCHJ && (OP(o) == UNLESSM \|\| OP(o) == IFMATCH))
	8425	Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
	8426	#else
	8427	PERL_UNUSED_CONTEXT;
	8428	PERL_UNUSED_ARG(sv);
	8429	PERL_UNUSED_ARG(o);
	8430	PERL_UNUSED_ARG(prog);
	8431	#endif /* DEBUGGING */
	8432	}
	8433
	8434	SV *
	8435	Perl_re_intuit_string(pTHX_ regexp *prog)
	8436	{ /* Assume that RE_INTUIT is set */
	8437	dVAR;
	8438	GET_RE_DEBUG_FLAGS_DECL;
	8439	PERL_UNUSED_CONTEXT;
	8440
	8441	DEBUG_COMPILE_r(
	8442	{
	8443	const char * const s = SvPV_nolen_const(prog->check_substr
	8444	? prog->check_substr : prog->check_utf8);
	8445
	8446	if (!PL_colorset) reginitcolors();
	8447	PerlIO_printf(Perl_debug_log,
	8448	"%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
	8449	PL_colors[4],
	8450	prog->check_substr ? "" : "utf8 ",
	8451	PL_colors[5],PL_colors[0],
	8452	s,
	8453	PL_colors[1],
	8454	(strlen(s) > 60 ? "..." : ""));
	8455	} );
	8456
	8457	return prog->check_substr ? prog->check_substr : prog->check_utf8;
	8458	}
	8459
	8460	/*
	8461	pregfree()
	8462
	8463	handles refcounting and freeing the perl core regexp structure. When
	8464	it is necessary to actually free the structure the first thing it
	8465	does is call the 'free' method of the regexp_engine associated to to
	8466	the regexp, allowing the handling of the void *pprivate; member
	8467	first. (This routine is not overridable by extensions, which is why
	8468	the extensions free is called first.)
	8469
	8470	See regdupe and regdupe_internal if you change anything here.
	8471	*/
	8472	#ifndef PERL_IN_XSUB_RE
	8473	void
	8474	Perl_pregfree(pTHX_ struct regexp *r)
	8475	{
	8476	dVAR;
	8477	GET_RE_DEBUG_FLAGS_DECL;
	8478
	8479	if (!r \|\| (--r->refcnt > 0))
	8480	return;
	8481
	8482	CALLREGFREE_PVT(r); /* free the private data */
	8483
	8484	/* gcov results gave these as non-null 100% of the time, so there's no
	8485	optimisation in checking them before calling Safefree */
	8486	Safefree(r->precomp);
	8487	RX_MATCH_COPY_FREE(r);
	8488	#ifdef PERL_OLD_COPY_ON_WRITE
	8489	if (r->saved_copy)
	8490	SvREFCNT_dec(r->saved_copy);
	8491	#endif
	8492	if (r->substrs) {
	8493	if (r->anchored_substr)
	8494	SvREFCNT_dec(r->anchored_substr);
	8495	if (r->anchored_utf8)
	8496	SvREFCNT_dec(r->anchored_utf8);
	8497	if (r->float_substr)
	8498	SvREFCNT_dec(r->float_substr);
	8499	if (r->float_utf8)
	8500	SvREFCNT_dec(r->float_utf8);
	8501	Safefree(r->substrs);
	8502	}
	8503	if (r->paren_names)
	8504	SvREFCNT_dec(r->paren_names);
	8505
	8506	Safefree(r->startp);
	8507	Safefree(r->endp);
	8508	Safefree(r);
	8509	}
	8510	#endif
	8511
	8512	/* regfree_internal()
	8513
	8514	Free the private data in a regexp. This is overloadable by
	8515	extensions. Perl takes care of the regexp structure in pregfree(),
	8516	this covers the *pprivate pointer which technically perldoesnt
	8517	know about, however of course we have to handle the
	8518	regexp_internal structure when no extension is in use.
	8519
	8520	Note this is called before freeing anything in the regexp
	8521	structure.
	8522	*/
	8523
	8524	void
	8525	Perl_regfree_internal(pTHX_ struct regexp *r)
	8526	{
	8527	dVAR;
	8528	RXi_GET_DECL(r,ri);
	8529	GET_RE_DEBUG_FLAGS_DECL;
	8530
	8531	DEBUG_COMPILE_r({
	8532	if (!PL_colorset)
	8533	reginitcolors();
	8534	{
	8535	SV *dsv= sv_newmortal();
	8536	RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8),
	8537	dsv, r->precomp, r->prelen, 60);
	8538	PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
	8539	PL_colors[4],PL_colors[5],s);
	8540	}
	8541	});
	8542
	8543	Safefree(ri->offsets); /* 20010421 MJD */
	8544	if (ri->data) {
	8545	int n = ri->data->count;
	8546	PAD* new_comppad = NULL;
	8547	PAD* old_comppad;
	8548	PADOFFSET refcnt;
	8549
	8550	while (--n >= 0) {
	8551	/* If you add a ->what type here, update the comment in regcomp.h */
	8552	switch (ri->data->what[n]) {
	8553	case 's':
	8554	case 'S':
	8555	case 'u':
	8556	SvREFCNT_dec((SV*)ri->data->data[n]);
	8557	break;
	8558	case 'f':
	8559	Safefree(ri->data->data[n]);
	8560	break;
	8561	case 'p':
	8562	new_comppad = (AV*)ri->data->data[n];
	8563	break;
	8564	case 'o':
	8565	if (new_comppad == NULL)
	8566	Perl_croak(aTHX_ "panic: pregfree comppad");
	8567	PAD_SAVE_LOCAL(old_comppad,
	8568	/* Watch out for global destruction's random ordering. */
	8569	(SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL
	8570	);
	8571	OP_REFCNT_LOCK;
	8572	refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]);
	8573	OP_REFCNT_UNLOCK;
	8574	if (!refcnt)
	8575	op_free((OP_4tree*)ri->data->data[n]);
	8576
	8577	PAD_RESTORE_LOCAL(old_comppad);
	8578	SvREFCNT_dec((SV*)new_comppad);
	8579	new_comppad = NULL;
	8580	break;
	8581	case 'n':
	8582	break;
	8583	case 'T':
	8584	{ /* Aho Corasick add-on structure for a trie node.
	8585	Used in stclass optimization only */
	8586	U32 refcount;
	8587	reg_ac_data aho=(reg_ac_data)ri->data->data[n];
	8588	OP_REFCNT_LOCK;
	8589	refcount = --aho->refcount;
	8590	OP_REFCNT_UNLOCK;
	8591	if ( !refcount ) {
	8592	PerlMemShared_free(aho->states);
	8593	PerlMemShared_free(aho->fail);
	8594	/* do this last!!!! */
	8595	PerlMemShared_free(ri->data->data[n]);
	8596	PerlMemShared_free(ri->regstclass);
	8597	}
	8598	}
	8599	break;
	8600	case 't':
	8601	{
	8602	/* trie structure. */
	8603	U32 refcount;
	8604	reg_trie_data trie=(reg_trie_data)ri->data->data[n];
	8605	OP_REFCNT_LOCK;
	8606	refcount = --trie->refcount;
	8607	OP_REFCNT_UNLOCK;
	8608	if ( !refcount ) {
	8609	PerlMemShared_free(trie->charmap);
	8610	PerlMemShared_free(trie->states);
	8611	PerlMemShared_free(trie->trans);
	8612	if (trie->bitmap)
	8613	PerlMemShared_free(trie->bitmap);
	8614	if (trie->wordlen)
	8615	PerlMemShared_free(trie->wordlen);
	8616	if (trie->jump)
	8617	PerlMemShared_free(trie->jump);
	8618	if (trie->nextword)
	8619	PerlMemShared_free(trie->nextword);
	8620	/* do this last!!!! */
	8621	PerlMemShared_free(ri->data->data[n]);
	8622	}
	8623	}
	8624	break;
	8625	default:
	8626	Perl_croak(aTHX_ "panic: regfree data code '%c'", ri->data->what[n]);
	8627	}
	8628	}
	8629	Safefree(ri->data->what);
	8630	Safefree(ri->data);
	8631	}
	8632	if (ri->swap) {
	8633	Safefree(ri->swap->startp);
	8634	Safefree(ri->swap->endp);
	8635	Safefree(ri->swap);
	8636	}
	8637	Safefree(ri);
	8638	}
	8639
	8640	#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t))
	8641	#define av_dup_inc(s,t) (AV)SvREFCNT_inc(sv_dup((SV)s,t))
	8642	#define hv_dup_inc(s,t) (HV)SvREFCNT_inc(sv_dup((SV)s,t))
	8643	#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
	8644
	8645	/*
	8646	regdupe - duplicate a regexp.
	8647
	8648	This routine is called by sv.c's re_dup and is expected to clone a
	8649	given regexp structure. It is a no-op when not under USE_ITHREADS.
	8650	(Originally this was re_dup() for change history see sv.c)
	8651
	8652	After all of the core data stored in struct regexp is duplicated
	8653	the regexp_engine.dupe method is used to copy any private data
	8654	stored in the *pprivate pointer. This allows extensions to handle
	8655	any duplication it needs to do.
	8656
	8657	See pregfree() and regfree_internal() if you change anything here.
	8658	*/
	8659	#if defined(USE_ITHREADS)
	8660	#ifndef PERL_IN_XSUB_RE
	8661	regexp *
	8662	Perl_re_dup(pTHX_ const regexp r, CLONE_PARAMS param)
	8663	{
	8664	dVAR;
	8665	regexp *ret;
	8666	int i, npar;
	8667	struct reg_substr_datum *s;
	8668
	8669	if (!r)
	8670	return (REGEXP *)NULL;
	8671
	8672	if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r)))
	8673	return ret;
	8674
	8675
	8676	npar = r->nparens+1;
	8677	Newxz(ret, 1, regexp);
	8678	Newx(ret->startp, npar, I32);
	8679	Copy(r->startp, ret->startp, npar, I32);
	8680	Newx(ret->endp, npar, I32);
	8681	Copy(r->endp, ret->endp, npar, I32);
	8682
	8683	if (r->substrs) {
	8684	Newx(ret->substrs, 1, struct reg_substr_data);
	8685	for (s = ret->substrs->data, i = 0; i < 3; i++, s++) {
	8686	s->min_offset = r->substrs->data[i].min_offset;
	8687	s->max_offset = r->substrs->data[i].max_offset;
	8688	s->end_shift = r->substrs->data[i].end_shift;
	8689	s->substr = sv_dup_inc(r->substrs->data[i].substr, param);
	8690	s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param);
	8691	}
	8692	} else
	8693	ret->substrs = NULL;
	8694
	8695	ret->precomp = SAVEPVN(r->precomp, r->prelen);
	8696	ret->refcnt = r->refcnt;
	8697	ret->minlen = r->minlen;
	8698	ret->minlenret = r->minlenret;
	8699	ret->prelen = r->prelen;
	8700	ret->nparens = r->nparens;
	8701	ret->lastparen = r->lastparen;
	8702	ret->lastcloseparen = r->lastcloseparen;
	8703	ret->intflags = r->intflags;
	8704	ret->extflags = r->extflags;
	8705
	8706	ret->sublen = r->sublen;
	8707
	8708	ret->engine = r->engine;
	8709
	8710	ret->paren_names = hv_dup_inc(r->paren_names, param);
	8711
	8712	if (RX_MATCH_COPIED(ret))
	8713	ret->subbeg = SAVEPVN(r->subbeg, r->sublen);
	8714	else
	8715	ret->subbeg = NULL;
	8716	#ifdef PERL_OLD_COPY_ON_WRITE
	8717	ret->saved_copy = NULL;
	8718	#endif
	8719
	8720	ret->pprivate = r->pprivate;
	8721	if (ret->pprivate)
	8722	RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
	8723
	8724	ptr_table_store(PL_ptr_table, r, ret);
	8725	return ret;
	8726	}
	8727	#endif /* PERL_IN_XSUB_RE */
	8728
	8729	/*
	8730	regdupe_internal()
	8731
	8732	This is the internal complement to regdupe() which is used to copy
	8733	the structure pointed to by the *pprivate pointer in the regexp.
	8734	This is the core version of the extension overridable cloning hook.
	8735	The regexp structure being duplicated will be copied by perl prior
	8736	to this and will be provided as the regexp *r argument, however
	8737	with the /old/ structures pprivate pointer value. Thus this routine
	8738	may override any copying normally done by perl.
	8739
	8740	It returns a pointer to the new regexp_internal structure.
	8741	*/
	8742
	8743	void *
	8744	Perl_regdupe_internal(pTHX_ const regexp r, CLONE_PARAMS param)
	8745	{
	8746	dVAR;
	8747	regexp_internal *reti;
	8748	int len, npar;
	8749	RXi_GET_DECL(r,ri);
	8750
	8751	npar = r->nparens+1;
	8752	len = ri->offsets[0];
	8753
	8754	Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal);
	8755	Copy(ri->program, reti->program, len+1, regnode);
	8756
	8757	if(ri->swap) {
	8758	Newx(reti->swap, 1, regexp_paren_ofs);
	8759	/* no need to copy these */
	8760	Newx(reti->swap->startp, npar, I32);
	8761	Newx(reti->swap->endp, npar, I32);
	8762	} else {
	8763	reti->swap = NULL;
	8764	}
	8765
	8766
	8767	reti->regstclass = NULL;
	8768	if (ri->data) {
	8769	struct reg_data *d;
	8770	const int count = ri->data->count;
	8771	int i;
	8772
	8773	Newxc(d, sizeof(struct reg_data) + countsizeof(void ),
	8774	char, struct reg_data);
	8775	Newx(d->what, count, U8);
	8776
	8777	d->count = count;
	8778	for (i = 0; i < count; i++) {
	8779	d->what[i] = ri->data->what[i];
	8780	switch (d->what[i]) {
	8781	/* legal options are one of: sSfpontTu
	8782	see also regcomp.h and pregfree() */
	8783	case 's':
	8784	case 'S':
	8785	case 'p': /* actually an AV, but the dup function is identical. */
	8786	case 'u': /* actually an HV, but the dup function is identical. */
	8787	d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
	8788	break;
	8789	case 'f':
	8790	/* This is cheating. */
	8791	Newx(d->data[i], 1, struct regnode_charclass_class);
	8792	StructCopy(ri->data->data[i], d->data[i],
	8793	struct regnode_charclass_class);
	8794	reti->regstclass = (regnode*)d->data[i];
	8795	break;
	8796	case 'o':
	8797	/* Compiled op trees are readonly and in shared memory,
	8798	and can thus be shared without duplication. */
	8799	OP_REFCNT_LOCK;
	8800	d->data[i] = (void)OpREFCNT_inc((OP)ri->data->data[i]);
	8801	OP_REFCNT_UNLOCK;
	8802	break;
	8803	case 'T':
	8804	/* Trie stclasses are readonly and can thus be shared
	8805	* without duplication. We free the stclass in pregfree
	8806	* when the corresponding reg_ac_data struct is freed.
	8807	*/
	8808	reti->regstclass= ri->regstclass;
	8809	/* Fall through */
	8810	case 't':
	8811	OP_REFCNT_LOCK;
	8812	((reg_trie_data*)ri->data->data[i])->refcount++;
	8813	OP_REFCNT_UNLOCK;
	8814	/* Fall through */
	8815	case 'n':
	8816	d->data[i] = ri->data->data[i];
	8817	break;
	8818	default:
	8819	Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", ri->data->what[i]);
	8820	}
	8821	}
	8822
	8823	reti->data = d;
	8824	}
	8825	else
	8826	reti->data = NULL;
	8827
	8828	reti->name_list_idx = ri->name_list_idx;
	8829
	8830	Newx(reti->offsets, 2*len+1, U32);
	8831	Copy(ri->offsets, reti->offsets, 2*len+1, U32);
	8832
	8833	return (void*)reti;
	8834	}
	8835
	8836	#endif /* USE_ITHREADS */
	8837
	8838	/*
	8839	reg_stringify()
	8840
	8841	converts a regexp embedded in a MAGIC struct to its stringified form,
	8842	caching the converted form in the struct and returns the cached
	8843	string.
	8844
	8845	If lp is nonnull then it is used to return the length of the
	8846	resulting string
	8847
	8848	If flags is nonnull and the returned string contains UTF8 then
	8849	(*flags & 1) will be true.
	8850
	8851	If haseval is nonnull then it is used to return whether the pattern
	8852	contains evals.
	8853
	8854	Normally called via macro:
	8855
	8856	CALLREG_STRINGIFY(mg,&len,&utf8);
	8857
	8858	And internally with
	8859
	8860	CALLREG_AS_STR(mg,&lp,&flags,&haseval)
	8861
	8862	See sv_2pv_flags() in sv.c for an example of internal usage.
	8863
	8864	*/
	8865	#ifndef PERL_IN_XSUB_RE
	8866	char *
	8867	Perl_reg_stringify(pTHX_ MAGIC mg, STRLEN lp, U32 flags, I32 haseval ) {
	8868	dVAR;
	8869	const regexp * const re = (regexp *)mg->mg_obj;
	8870
	8871	if (!mg->mg_ptr) {
	8872	const char fptr = STD_PAT_MODS; /"msix"*/
	8873	char reflags[7];
	8874	char ch;
	8875	bool hask = ((re->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
	8876	bool hasm = ((re->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
	8877	U16 reganch = (U16)((re->extflags & RXf_PMf_STD_PMMOD) >> 12);
	8878	bool need_newline = 0;
	8879	int left = 0;
	8880	int right = 4 + hask;
	8881	if (hask)
	8882	reflags[left++] = KEEPCOPY_PAT_MOD; /'k'/
	8883	while((ch = *fptr++)) {
	8884	if(reganch & 1) {
	8885	reflags[left++] = ch;
	8886	}
	8887	else {
	8888	reflags[right--] = ch;
	8889	}
	8890	reganch >>= 1;
	8891	}
	8892	if(hasm) {
	8893	reflags[left] = '-';
	8894	left = 5 + hask;
	8895	}
	8896	/* printf("[%.7s]\n",left,reflags); /
	8897	mg->mg_len = re->prelen + 4 + left;
	8898	/*
	8899	* If /x was used, we have to worry about a regex ending with a
	8900	* comment later being embedded within another regex. If so, we don't
	8901	* want this regex's "commentization" to leak out to the right part of
	8902	* the enclosing regex, we must cap it with a newline.
	8903	*
	8904	* So, if /x was used, we scan backwards from the end of the regex. If
	8905	* we find a '#' before we find a newline, we need to add a newline
	8906	* ourself. If we find a '\n' first (or if we don't find '#' or '\n'),
	8907	* we don't need to add anything. -jfriedl
	8908	*/
	8909	if (PMf_EXTENDED & re->extflags) {
	8910	const char *endptr = re->precomp + re->prelen;
	8911	while (endptr >= re->precomp) {
	8912	const char c = *(endptr--);
	8913	if (c == '\n')
	8914	break; /* don't need another */
	8915	if (c == '#') {
	8916	/* we end while in a comment, so we need a newline */
	8917	mg->mg_len++; /* save space for it */
	8918	need_newline = 1; /* note to add it */
	8919	break;
	8920	}
	8921	}
	8922	}
	8923
	8924	Newx(mg->mg_ptr, mg->mg_len + 1 + left, char);
	8925	mg->mg_ptr[0] = '(';
	8926	mg->mg_ptr[1] = '?';
	8927	Copy(reflags, mg->mg_ptr+2, left, char);
	8928	*(mg->mg_ptr+left+2) = ':';
	8929	Copy(re->precomp, mg->mg_ptr+3+left, re->prelen, char);
	8930	if (need_newline)
	8931	mg->mg_ptr[mg->mg_len - 2] = '\n';
	8932	mg->mg_ptr[mg->mg_len - 1] = ')';
	8933	mg->mg_ptr[mg->mg_len] = 0;
	8934	}
	8935	if (haseval)
	8936	*haseval = re->seen_evals;
	8937	if (flags)
	8938	*flags = ((re->extflags & RXf_UTF8) ? 1 : 0);
	8939
	8940	if (lp)
	8941	*lp = mg->mg_len;
	8942	return mg->mg_ptr;
	8943	}
	8944
	8945	/*
	8946	- regnext - dig the "next" pointer out of a node
	8947	*/
	8948	regnode *
	8949	Perl_regnext(pTHX_ register regnode *p)
	8950	{
	8951	dVAR;
	8952	register I32 offset;
	8953
	8954	if (!p)
	8955	return(NULL);
	8956
	8957	offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
	8958	if (offset == 0)
	8959	return(NULL);
	8960
	8961	return(p+offset);
	8962	}
	8963	#endif
	8964
	8965	STATIC void
	8966	S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
	8967	{
	8968	va_list args;
	8969	STRLEN l1 = strlen(pat1);
	8970	STRLEN l2 = strlen(pat2);
	8971	char buf[512];
	8972	SV *msv;
	8973	const char *message;
	8974
	8975	if (l1 > 510)
	8976	l1 = 510;
	8977	if (l1 + l2 > 510)
	8978	l2 = 510 - l1;
	8979	Copy(pat1, buf, l1 , char);
	8980	Copy(pat2, buf + l1, l2 , char);
	8981	buf[l1 + l2] = '\n';
	8982	buf[l1 + l2 + 1] = '\0';
	8983	#ifdef I_STDARG
	8984	/* ANSI variant takes additional second argument */
	8985	va_start(args, pat2);
	8986	#else
	8987	va_start(args);
	8988	#endif
	8989	msv = vmess(buf, &args);
	8990	va_end(args);
	8991	message = SvPV_const(msv,l1);
	8992	if (l1 > 512)
	8993	l1 = 512;
	8994	Copy(message, buf, l1 , char);
	8995	buf[l1-1] = '\0'; /* Overwrite \n */
	8996	Perl_croak(aTHX_ "%s", buf);
	8997	}
	8998
	8999	/* XXX Here's a total kludge. But we need to re-enter for swash routines. */
	9000
	9001	#ifndef PERL_IN_XSUB_RE
	9002	void
	9003	Perl_save_re_context(pTHX)
	9004	{
	9005	dVAR;
	9006
	9007	struct re_save_state *state;
	9008
	9009	SAVEVPTR(PL_curcop);
	9010	SSGROW(SAVESTACK_ALLOC_FOR_RE_SAVE_STATE + 1);
	9011
	9012	state = (struct re_save_state *)(PL_savestack + PL_savestack_ix);
	9013	PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
	9014	SSPUSHINT(SAVEt_RE_STATE);
	9015
	9016	Copy(&PL_reg_state, state, 1, struct re_save_state);
	9017
	9018	PL_reg_start_tmp = 0;
	9019	PL_reg_start_tmpl = 0;
	9020	PL_reg_oldsaved = NULL;
	9021	PL_reg_oldsavedlen = 0;
	9022	PL_reg_maxiter = 0;
	9023	PL_reg_leftiter = 0;
	9024	PL_reg_poscache = NULL;
	9025	PL_reg_poscache_size = 0;
	9026	#ifdef PERL_OLD_COPY_ON_WRITE
	9027	PL_nrs = NULL;
	9028	#endif
	9029
	9030	/* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
	9031	if (PL_curpm) {
	9032	const REGEXP * const rx = PM_GETRE(PL_curpm);
	9033	if (rx) {
	9034	U32 i;
	9035	for (i = 1; i <= rx->nparens; i++) {
	9036	char digits[TYPE_CHARS(long)];
	9037	const STRLEN len = my_snprintf(digits, sizeof(digits), "%lu", (long)i);
	9038	GV const const gvp
	9039	= (GV**)hv_fetch(PL_defstash, digits, len, 0);
	9040
	9041	if (gvp) {
	9042	GV * const gv = *gvp;
	9043	if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
	9044	save_scalar(gv);
	9045	}
	9046	}
	9047	}
	9048	}
	9049	}
	9050	#endif
	9051
	9052	static void
	9053	clear_re(pTHX_ void *r)
	9054	{
	9055	dVAR;
	9056	ReREFCNT_dec((regexp *)r);
	9057	}
	9058
	9059	#ifdef DEBUGGING
	9060
	9061	STATIC void
	9062	S_put_byte(pTHX_ SV *sv, int c)
	9063	{
	9064	if (isCNTRL(c) \|\| c == 255 \|\| !isPRINT(c))
	9065	Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
	9066	else if (c == '-' \|\| c == ']' \|\| c == '\\' \|\| c == '^')
	9067	Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
	9068	else
	9069	Perl_sv_catpvf(aTHX_ sv, "%c", c);
	9070	}
	9071
	9072
	9073	#define CLEAR_OPTSTART \
	9074	if (optstart) STMT_START { \
	9075	DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log, " (%"IVdf" nodes)\n", (IV)(node - optstart))); \
	9076	optstart=NULL; \
	9077	} STMT_END
	9078
	9079	#define DUMPUNTIL(b,e) CLEAR_OPTSTART; node=dumpuntil(r,start,(b),(e),last,sv,indent+1,depth+1);
	9080
	9081	STATIC const regnode *
	9082	S_dumpuntil(pTHX_ const regexp r, const regnode start, const regnode *node,
	9083	const regnode last, const regnode plast,
	9084	SV* sv, I32 indent, U32 depth)
	9085	{
	9086	dVAR;
	9087	register U8 op = PSEUDO; /* Arbitrary non-END op. */
	9088	register const regnode *next;
	9089	const regnode *optstart= NULL;
	9090
	9091	RXi_GET_DECL(r,ri);
	9092	GET_RE_DEBUG_FLAGS_DECL;
	9093
	9094	#ifdef DEBUG_DUMPUNTIL
	9095	PerlIO_printf(Perl_debug_log, "--- %d : %d - %d - %d\n",indent,node-start,
	9096	last ? last-start : 0,plast ? plast-start : 0);
	9097	#endif
	9098
	9099	if (plast && plast < last)
	9100	last= plast;
	9101
	9102	while (PL_regkind[op] != END && (!last \|\| node < last)) {
	9103	/* While that wasn't END last time... */
	9104	NODE_ALIGN(node);
	9105	op = OP(node);
	9106	if (op == CLOSE \|\| op == WHILEM)
	9107	indent--;
	9108	next = regnext((regnode *)node);
	9109
	9110	/* Where, what. */
	9111	if (OP(node) == OPTIMIZED) {
	9112	if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
	9113	optstart = node;
	9114	else
	9115	goto after_print;
	9116	} else
	9117	CLEAR_OPTSTART;
	9118
	9119	regprop(r, sv, node);
	9120	PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
	9121	(int)(2*indent + 1), "", SvPVX_const(sv));
	9122
	9123	if (OP(node) != OPTIMIZED) {
	9124	if (next == NULL) /* Next ptr. */
	9125	PerlIO_printf(Perl_debug_log, " (0)");
	9126	else if (PL_regkind[(U8)op] == BRANCH && PL_regkind[OP(next)] != BRANCH )
	9127	PerlIO_printf(Perl_debug_log, " (FAIL)");
	9128	else
	9129	PerlIO_printf(Perl_debug_log, " (%"IVdf")", (IV)(next - start));
	9130	(void)PerlIO_putc(Perl_debug_log, '\n');
	9131	}
	9132
	9133	after_print:
	9134	if (PL_regkind[(U8)op] == BRANCHJ) {
	9135	assert(next);
	9136	{
	9137	register const regnode *nnode = (OP(next) == LONGJMP
	9138	? regnext((regnode *)next)
	9139	: next);
	9140	if (last && nnode > last)
	9141	nnode = last;
	9142	DUMPUNTIL(NEXTOPER(NEXTOPER(node)), nnode);
	9143	}
	9144	}
	9145	else if (PL_regkind[(U8)op] == BRANCH) {
	9146	assert(next);
	9147	DUMPUNTIL(NEXTOPER(node), next);
	9148	}
	9149	else if ( PL_regkind[(U8)op] == TRIE ) {
	9150	const regnode *this_trie = node;
	9151	const char op = OP(node);
	9152	const U32 n = ARG(node);
	9153	const reg_ac_data * const ac = op>=AHOCORASICK ?
	9154	(reg_ac_data *)ri->data->data[n] :
	9155	NULL;
	9156	const reg_trie_data * const trie =
	9157	(reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
	9158	#ifdef DEBUGGING
	9159	AV const trie_words = (AV ) ri->data->data[n + TRIE_WORDS_OFFSET];
	9160	#endif
	9161	const regnode *nextbranch= NULL;
	9162	I32 word_idx;
	9163	sv_setpvn(sv, "", 0);
	9164	for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
	9165	SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
	9166
	9167	PerlIO_printf(Perl_debug_log, "%*s%s ",
	9168	(int)(2*(indent+3)), "",
	9169	elem_ptr ? pv_pretty(sv, SvPV_nolen_const(elem_ptr), SvCUR(elem_ptr), 60,
	9170	PL_colors[0], PL_colors[1],
	9171	(SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) \|
	9172	PERL_PV_PRETTY_ELIPSES \|
	9173	PERL_PV_PRETTY_LTGT
	9174	)
	9175	: "???"
	9176	);
	9177	if (trie->jump) {
	9178	U16 dist= trie->jump[word_idx+1];
	9179	PerlIO_printf(Perl_debug_log, "(%"UVuf")\n",
	9180	(UV)((dist ? this_trie + dist : next) - start));
	9181	if (dist) {
	9182	if (!nextbranch)
	9183	nextbranch= this_trie + trie->jump[0];
	9184	DUMPUNTIL(this_trie + dist, nextbranch);
	9185	}
	9186	if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
	9187	nextbranch= regnext((regnode *)nextbranch);
	9188	} else {
	9189	PerlIO_printf(Perl_debug_log, "\n");
	9190	}
	9191	}
	9192	if (last && next > last)
	9193	node= last;
	9194	else
	9195	node= next;
	9196	}
	9197	else if ( op == CURLY ) { /* "next" might be very big: optimizer */
	9198	DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS,
	9199	NEXTOPER(node) + EXTRA_STEP_2ARGS + 1);
	9200	}
	9201	else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
	9202	assert(next);
	9203	DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, next);
	9204	}
	9205	else if ( op == PLUS \|\| op == STAR) {
	9206	DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
	9207	}
	9208	else if (op == ANYOF) {
	9209	/* arglen 1 + class block */
	9210	node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE)
	9211	? ANYOF_CLASS_SKIP : ANYOF_SKIP);
	9212	node = NEXTOPER(node);
	9213	}
	9214	else if (PL_regkind[(U8)op] == EXACT) {
	9215	/* Literal string, where present. */
	9216	node += NODE_SZ_STR(node) - 1;
	9217	node = NEXTOPER(node);
	9218	}
	9219	else {
	9220	node = NEXTOPER(node);
	9221	node += regarglen[(U8)op];
	9222	}
	9223	if (op == CURLYX \|\| op == OPEN)
	9224	indent++;
	9225	}
	9226	CLEAR_OPTSTART;
	9227	#ifdef DEBUG_DUMPUNTIL
	9228	PerlIO_printf(Perl_debug_log, "--- %d\n", (int)indent);
	9229	#endif
	9230	return node;
	9231	}
	9232
	9233	#endif /* DEBUGGING */
	9234
	9235	/*
	9236	* Local variables:
	9237	* c-indentation-style: bsd
	9238	* c-basic-offset: 4
	9239	* indent-tabs-mode: t
	9240	* End:
	9241	*
	9242	* ex: set ts=8 sts=4 sw=4 noet:
	9243	*/