perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regcomp.c
	2	*/
	3
	4	/*
	5	* "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
	6	*/
	7
	8	/* This file contains functions for compiling a regular expression. See
	9	* also regexec.c which funnily enough, contains functions for executing
	10	* a regular expression.
	11	*
	12	* This file is also copied at build time to ext/re/re_comp.c, where
	13	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	14	* This causes the main functions to be compiled under new names and with
	15	* debugging support added, which makes "use re 'debug'" work.
	16	*/
	17
	18	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	19	* confused with the original package (see point 3 below). Thanks, Henry!
	20	*/
	21
	22	/* Additional note: this code is very heavily munged from Henry's version
	23	* in places. In some spots I've traded clarity for efficiency, so don't
	24	* blame Henry for some of the lack of readability.
	25	*/
	26
	27	/* The names of the functions have been changed from regcomp and
	28	* regexec to pregcomp and pregexec in order to avoid conflicts
	29	* with the POSIX routines of the same names.
	30	*/
	31
	32	#ifdef PERL_EXT_RE_BUILD
	33	#include "re_top.h"
	34	#endif
	35
	36	/*
	37	* pregcomp and pregexec -- regsub and regerror are not used in perl
	38	*
	39	* Copyright (c) 1986 by University of Toronto.
	40	* Written by Henry Spencer. Not derived from licensed software.
	41	*
	42	* Permission is granted to anyone to use this software for any
	43	* purpose on any computer system, and to redistribute it freely,
	44	* subject to the following restrictions:
	45	*
	46	* 1. The author is not responsible for the consequences of use of
	47	* this software, no matter how awful, even if they arise
	48	* from defects in it.
	49	*
	50	* 2. The origin of this software must not be misrepresented, either
	51	* by explicit claim or by omission.
	52	*
	53	* 3. Altered versions must be plainly marked as such, and must not
	54	* be misrepresented as being the original software.
	55	*
	56	*
	57	**** Alterations to Henry's code are...
	58	****
	59	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	60	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
	61	****
	62	**** You may distribute under the terms of either the GNU General Public
	63	**** License or the Artistic License, as specified in the README file.
	64
	65	*
	66	* Beware that some of this code is subtly aware of the way operator
	67	* precedence is structured in regular expressions. Serious changes in
	68	* regular-expression syntax might require a total rethink.
	69	*/
	70	#include "EXTERN.h"
	71	#define PERL_IN_REGCOMP_C
	72	#include "perl.h"
	73
	74	#ifndef PERL_IN_XSUB_RE
	75	# include "INTERN.h"
	76	#endif
	77
	78	#define REG_COMP_C
	79	#ifdef PERL_IN_XSUB_RE
	80	# include "re_comp.h"
	81	#else
	82	# include "regcomp.h"
	83	#endif
	84
	85	#ifdef op
	86	#undef op
	87	#endif /* op */
	88
	89	#ifdef MSDOS
	90	# if defined(BUGGY_MSC6)
	91	/* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
	92	# pragma optimize("a",off)
	93	/* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
	94	# pragma optimize("w",on )
	95	# endif /* BUGGY_MSC6 */
	96	#endif /* MSDOS */
	97
	98	#ifndef STATIC
	99	#define STATIC static
	100	#endif
	101
	102	typedef struct RExC_state_t {
	103	U32 flags; /* are we folding, multilining? */
	104	char precomp; / uncompiled string. */
	105	regexp rx; / perl core regexp structure */
	106	regexp_internal rxi; / internal data for regexp object pprivate field */
	107	char start; / Start of input for compile */
	108	char end; / End of input for compile */
	109	char parse; / Input-scan pointer. */
	110	I32 whilem_seen; /* number of WHILEM in this expr */
	111	regnode emit_start; / Start of emitted-code area */
	112	regnode emit_bound; / First regnode outside of the allocated space */
	113	regnode emit; / Code-emit pointer; &regdummy = don't = compiling */
	114	I32 naughty; /* How bad is this pattern? */
	115	I32 sawback; /* Did we see \1, ...? */
	116	U32 seen;
	117	I32 size; /* Code size. */
	118	I32 npar; /* Capture buffer count, (OPEN). */
	119	I32 cpar; /* Capture buffer count, (CLOSE). */
	120	I32 nestroot; /* root parens we are in - used by accept */
	121	I32 extralen;
	122	I32 seen_zerolen;
	123	I32 seen_evals;
	124	regnode *open_parens; / pointers to open parens */
	125	regnode *close_parens; / pointers to close parens */
	126	regnode opend; / END node in program */
	127	I32 utf8; /* whether the pattern is utf8 or not */
	128	I32 orig_utf8; /* whether the pattern was originally in utf8 */
	129	/* XXX use this for future optimisation of case
	130	* where pattern must be upgraded to utf8. */
	131	HV charnames; / cache of named sequences */
	132	HV paren_names; / Paren names */
	133
	134	regnode *recurse; / Recurse regops */
	135	I32 recurse_count; /* Number of recurse regops */
	136	#if ADD_TO_REGEXEC
	137	char starttry; / -Dr: where regtry was called. */
	138	#define RExC_starttry (pRExC_state->starttry)
	139	#endif
	140	#ifdef DEBUGGING
	141	const char *lastparse;
	142	I32 lastnum;
	143	AV paren_name_list; / idx -> name */
	144	#define RExC_lastparse (pRExC_state->lastparse)
	145	#define RExC_lastnum (pRExC_state->lastnum)
	146	#define RExC_paren_name_list (pRExC_state->paren_name_list)
	147	#endif
	148	} RExC_state_t;
	149
	150	#define RExC_flags (pRExC_state->flags)
	151	#define RExC_precomp (pRExC_state->precomp)
	152	#define RExC_rx (pRExC_state->rx)
	153	#define RExC_rxi (pRExC_state->rxi)
	154	#define RExC_start (pRExC_state->start)
	155	#define RExC_end (pRExC_state->end)
	156	#define RExC_parse (pRExC_state->parse)
	157	#define RExC_whilem_seen (pRExC_state->whilem_seen)
	158	#ifdef RE_TRACK_PATTERN_OFFSETS
	159	#define RExC_offsets (pRExC_state->rxi->u.offsets) /* I am not like the others */
	160	#endif
	161	#define RExC_emit (pRExC_state->emit)
	162	#define RExC_emit_start (pRExC_state->emit_start)
	163	#define RExC_emit_bound (pRExC_state->emit_bound)
	164	#define RExC_naughty (pRExC_state->naughty)
	165	#define RExC_sawback (pRExC_state->sawback)
	166	#define RExC_seen (pRExC_state->seen)
	167	#define RExC_size (pRExC_state->size)
	168	#define RExC_npar (pRExC_state->npar)
	169	#define RExC_nestroot (pRExC_state->nestroot)
	170	#define RExC_extralen (pRExC_state->extralen)
	171	#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
	172	#define RExC_seen_evals (pRExC_state->seen_evals)
	173	#define RExC_utf8 (pRExC_state->utf8)
	174	#define RExC_orig_utf8 (pRExC_state->orig_utf8)
	175	#define RExC_charnames (pRExC_state->charnames)
	176	#define RExC_open_parens (pRExC_state->open_parens)
	177	#define RExC_close_parens (pRExC_state->close_parens)
	178	#define RExC_opend (pRExC_state->opend)
	179	#define RExC_paren_names (pRExC_state->paren_names)
	180	#define RExC_recurse (pRExC_state->recurse)
	181	#define RExC_recurse_count (pRExC_state->recurse_count)
	182
	183
	184	#define ISMULT1(c) ((c) == '*' \|\| (c) == '+' \|\| (c) == '?')
	185	#define ISMULT2(s) ((s) == '' \|\| (s) == '+' \|\| (s) == '?' \|\| \
	186	((*s) == '{' && regcurly(s)))
	187
	188	#ifdef SPSTART
	189	#undef SPSTART /* dratted cpp namespace... */
	190	#endif
	191	/*
	192	* Flags to be passed up and down.
	193	*/
	194	#define WORST 0 /* Worst case. */
	195	#define HASWIDTH 0x01 /* Known to match non-null strings. */
	196	#define SIMPLE 0x02 /* Simple enough to be STAR/PLUS operand. */
	197	#define SPSTART 0x04 /* Starts with * or +. */
	198	#define TRYAGAIN 0x08 /* Weeded out a declaration. */
	199	#define POSTPONED 0x10 /* (?1),(?&name), (??{...}) or similar */
	200
	201	#define REG_NODE_NUM(x) ((x) ? (int)((x)-RExC_emit_start) : -1)
	202
	203	/* whether trie related optimizations are enabled */
	204	#if PERL_ENABLE_EXTENDED_TRIE_OPTIMISATION
	205	#define TRIE_STUDY_OPT
	206	#define FULL_TRIE_STUDY
	207	#define TRIE_STCLASS
	208	#endif
	209
	210
	211
	212	#define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
	213	#define PBITVAL(paren) (1 << ((paren) & 7))
	214	#define PAREN_TEST(u8str,paren) ( PBYTE(u8str,paren) & PBITVAL(paren))
	215	#define PAREN_SET(u8str,paren) PBYTE(u8str,paren) \|= PBITVAL(paren)
	216	#define PAREN_UNSET(u8str,paren) PBYTE(u8str,paren) &= (~PBITVAL(paren))
	217
	218
	219	/* About scan_data_t.
	220
	221	During optimisation we recurse through the regexp program performing
	222	various inplace (keyhole style) optimisations. In addition study_chunk
	223	and scan_commit populate this data structure with information about
	224	what strings MUST appear in the pattern. We look for the longest
	225	string that must appear for at a fixed location, and we look for the
	226	longest string that may appear at a floating location. So for instance
	227	in the pattern:
	228
	229	/FOO[xX]A.*B[xX]BAR/
	230
	231	Both 'FOO' and 'A' are fixed strings. Both 'B' and 'BAR' are floating
	232	strings (because they follow a .* construct). study_chunk will identify
	233	both FOO and BAR as being the longest fixed and floating strings respectively.
	234
	235	The strings can be composites, for instance
	236
	237	/(f)(o)(o)/
	238
	239	will result in a composite fixed substring 'foo'.
	240
	241	For each string some basic information is maintained:
	242
	243	- offset or min_offset
	244	This is the position the string must appear at, or not before.
	245	It also implicitly (when combined with minlenp) tells us how many
	246	character must match before the string we are searching.
	247	Likewise when combined with minlenp and the length of the string
	248	tells us how many characters must appear after the string we have
	249	found.
	250
	251	- max_offset
	252	Only used for floating strings. This is the rightmost point that
	253	the string can appear at. Ifset to I32 max it indicates that the
	254	string can occur infinitely far to the right.
	255
	256	- minlenp
	257	A pointer to the minimum length of the pattern that the string
	258	was found inside. This is important as in the case of positive
	259	lookahead or positive lookbehind we can have multiple patterns
	260	involved. Consider
	261
	262	/(?=FOO).*F/
	263
	264	The minimum length of the pattern overall is 3, the minimum length
	265	of the lookahead part is 3, but the minimum length of the part that
	266	will actually match is 1. So 'FOO's minimum length is 3, but the
	267	minimum length for the F is 1. This is important as the minimum length
	268	is used to determine offsets in front of and behind the string being
	269	looked for. Since strings can be composites this is the length of the
	270	pattern at the time it was commited with a scan_commit. Note that
	271	the length is calculated by study_chunk, so that the minimum lengths
	272	are not known until the full pattern has been compiled, thus the
	273	pointer to the value.
	274
	275	- lookbehind
	276
	277	In the case of lookbehind the string being searched for can be
	278	offset past the start point of the final matching string.
	279	If this value was just blithely removed from the min_offset it would
	280	invalidate some of the calculations for how many chars must match
	281	before or after (as they are derived from min_offset and minlen and
	282	the length of the string being searched for).
	283	When the final pattern is compiled and the data is moved from the
	284	scan_data_t structure into the regexp structure the information
	285	about lookbehind is factored in, with the information that would
	286	have been lost precalculated in the end_shift field for the
	287	associated string.
	288
	289	The fields pos_min and pos_delta are used to store the minimum offset
	290	and the delta to the maximum offset at the current point in the pattern.
	291
	292	*/
	293
	294	typedef struct scan_data_t {
	295	/I32 len_min; unused /
	296	/I32 len_delta; unused /
	297	I32 pos_min;
	298	I32 pos_delta;
	299	SV *last_found;
	300	I32 last_end; /* min value, <0 unless valid. */
	301	I32 last_start_min;
	302	I32 last_start_max;
	303	SV *longest; / Either &l_fixed, or &l_float. */
	304	SV longest_fixed; / longest fixed string found in pattern */
	305	I32 offset_fixed; /* offset where it starts */
	306	I32 minlen_fixed; / pointer to the minlen relevent to the string */
	307	I32 lookbehind_fixed; /* is the position of the string modfied by LB */
	308	SV longest_float; / longest floating string found in pattern */
	309	I32 offset_float_min; /* earliest point in string it can appear */
	310	I32 offset_float_max; /* latest point in string it can appear */
	311	I32 minlen_float; / pointer to the minlen relevent to the string */
	312	I32 lookbehind_float; /* is the position of the string modified by LB */
	313	I32 flags;
	314	I32 whilem_c;
	315	I32 *last_closep;
	316	struct regnode_charclass_class *start_class;
	317	} scan_data_t;
	318
	319	/*
	320	* Forward declarations for pregcomp()'s friends.
	321	*/
	322
	323	static const scan_data_t zero_scan_data =
	324	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0};
	325
	326	#define SF_BEFORE_EOL (SF_BEFORE_SEOL\|SF_BEFORE_MEOL)
	327	#define SF_BEFORE_SEOL 0x0001
	328	#define SF_BEFORE_MEOL 0x0002
	329	#define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL\|SF_FIX_BEFORE_MEOL)
	330	#define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL\|SF_FL_BEFORE_MEOL)
	331
	332	#ifdef NO_UNARY_PLUS
	333	# define SF_FIX_SHIFT_EOL (0+2)
	334	# define SF_FL_SHIFT_EOL (0+4)
	335	#else
	336	# define SF_FIX_SHIFT_EOL (+2)
	337	# define SF_FL_SHIFT_EOL (+4)
	338	#endif
	339
	340	#define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
	341	#define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
	342
	343	#define SF_FL_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
	344	#define SF_FL_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
	345	#define SF_IS_INF 0x0040
	346	#define SF_HAS_PAR 0x0080
	347	#define SF_IN_PAR 0x0100
	348	#define SF_HAS_EVAL 0x0200
	349	#define SCF_DO_SUBSTR 0x0400
	350	#define SCF_DO_STCLASS_AND 0x0800
	351	#define SCF_DO_STCLASS_OR 0x1000
	352	#define SCF_DO_STCLASS (SCF_DO_STCLASS_AND\|SCF_DO_STCLASS_OR)
	353	#define SCF_WHILEM_VISITED_POS 0x2000
	354
	355	#define SCF_TRIE_RESTUDY 0x4000 /* Do restudy? */
	356	#define SCF_SEEN_ACCEPT 0x8000
	357
	358	#define UTF (RExC_utf8 != 0)
	359	#define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0)
	360	#define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0)
	361
	362	#define OOB_UNICODE 12345678
	363	#define OOB_NAMEDCLASS -1
	364
	365	#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
	366	#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
	367
	368
	369	/* length of regex to show in messages that don't mark a position within */
	370	#define RegexLengthToShowInErrorMessages 127
	371
	372	/*
	373	* If MARKER[12] are adjusted, be sure to adjust the constants at the top
	374	* of t/op/regmesg.t, the tests in t/op/re_tests, and those in
	375	* op/pragma/warn/regcomp.
	376	*/
	377	#define MARKER1 "<-- HERE" /* marker as it appears in the description */
	378	#define MARKER2 " <-- HERE " /* marker as it appears within the regex */
	379
	380	#define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/"
	381
	382	/*
	383	* Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
	384	* arg. Show regex, up to a maximum length. If it's too long, chop and add
	385	* "...".
	386	*/
	387	#define _FAIL(code) STMT_START { \
	388	const char *ellipses = ""; \
	389	IV len = RExC_end - RExC_precomp; \
	390	\
	391	if (!SIZE_ONLY) \
	392	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	393	if (len > RegexLengthToShowInErrorMessages) { \
	394	/* chop 10 shorter than the max, to ensure meaning of "..." */ \
	395	len = RegexLengthToShowInErrorMessages - 10; \
	396	ellipses = "..."; \
	397	} \
	398	code; \
	399	} STMT_END
	400
	401	#define FAIL(msg) _FAIL( \
	402	Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \
	403	msg, (int)len, RExC_precomp, ellipses))
	404
	405	#define FAIL2(msg,arg) _FAIL( \
	406	Perl_croak(aTHX_ msg " in regex m/%.*s%s/", \
	407	arg, (int)len, RExC_precomp, ellipses))
	408
	409	/*
	410	* Simple_vFAIL -- like FAIL, but marks the current location in the scan
	411	*/
	412	#define Simple_vFAIL(m) STMT_START { \
	413	const IV offset = RExC_parse - RExC_precomp; \
	414	Perl_croak(aTHX_ "%s" REPORT_LOCATION, \
	415	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	416	} STMT_END
	417
	418	/*
	419	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
	420	*/
	421	#define vFAIL(m) STMT_START { \
	422	if (!SIZE_ONLY) \
	423	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	424	Simple_vFAIL(m); \
	425	} STMT_END
	426
	427	/*
	428	* Like Simple_vFAIL(), but accepts two arguments.
	429	*/
	430	#define Simple_vFAIL2(m,a1) STMT_START { \
	431	const IV offset = RExC_parse - RExC_precomp; \
	432	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, \
	433	(int)offset, RExC_precomp, RExC_precomp + offset); \
	434	} STMT_END
	435
	436	/*
	437	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
	438	*/
	439	#define vFAIL2(m,a1) STMT_START { \
	440	if (!SIZE_ONLY) \
	441	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	442	Simple_vFAIL2(m, a1); \
	443	} STMT_END
	444
	445
	446	/*
	447	* Like Simple_vFAIL(), but accepts three arguments.
	448	*/
	449	#define Simple_vFAIL3(m, a1, a2) STMT_START { \
	450	const IV offset = RExC_parse - RExC_precomp; \
	451	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, \
	452	(int)offset, RExC_precomp, RExC_precomp + offset); \
	453	} STMT_END
	454
	455	/*
	456	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
	457	*/
	458	#define vFAIL3(m,a1,a2) STMT_START { \
	459	if (!SIZE_ONLY) \
	460	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	461	Simple_vFAIL3(m, a1, a2); \
	462	} STMT_END
	463
	464	/*
	465	* Like Simple_vFAIL(), but accepts four arguments.
	466	*/
	467	#define Simple_vFAIL4(m, a1, a2, a3) STMT_START { \
	468	const IV offset = RExC_parse - RExC_precomp; \
	469	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, \
	470	(int)offset, RExC_precomp, RExC_precomp + offset); \
	471	} STMT_END
	472
	473	#define vWARN(loc,m) STMT_START { \
	474	const IV offset = loc - RExC_precomp; \
	475	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \
	476	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	477	} STMT_END
	478
	479	#define vWARNdep(loc,m) STMT_START { \
	480	const IV offset = loc - RExC_precomp; \
	481	Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
	482	"%s" REPORT_LOCATION, \
	483	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	484	} STMT_END
	485
	486
	487	#define vWARN2(loc, m, a1) STMT_START { \
	488	const IV offset = loc - RExC_precomp; \
	489	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	490	a1, (int)offset, RExC_precomp, RExC_precomp + offset); \
	491	} STMT_END
	492
	493	#define vWARN3(loc, m, a1, a2) STMT_START { \
	494	const IV offset = loc - RExC_precomp; \
	495	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	496	a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset); \
	497	} STMT_END
	498
	499	#define vWARN4(loc, m, a1, a2, a3) STMT_START { \
	500	const IV offset = loc - RExC_precomp; \
	501	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	502	a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
	503	} STMT_END
	504
	505	#define vWARN5(loc, m, a1, a2, a3, a4) STMT_START { \
	506	const IV offset = loc - RExC_precomp; \
	507	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	508	a1, a2, a3, a4, (int)offset, RExC_precomp, RExC_precomp + offset); \
	509	} STMT_END
	510
	511
	512	/* Allow for side effects in s */
	513	#define REGC(c,s) STMT_START { \
	514	if (!SIZE_ONLY) *(s) = (c); else (void)(s); \
	515	} STMT_END
	516
	517	/* Macros for recording node offsets. 20001227 mjd@plover.com
	518	* Nodes are numbered 1, 2, 3, 4. Node #n's position is recorded in
	519	* element 2*n-1 of the array. Element #2n holds the byte length node #n.
	520	* Element 0 holds the number n.
	521	* Position is 1 indexed.
	522	*/
	523	#ifndef RE_TRACK_PATTERN_OFFSETS
	524	#define Set_Node_Offset_To_R(node,byte)
	525	#define Set_Node_Offset(node,byte)
	526	#define Set_Cur_Node_Offset
	527	#define Set_Node_Length_To_R(node,len)
	528	#define Set_Node_Length(node,len)
	529	#define Set_Node_Cur_Length(node)
	530	#define Node_Offset(n)
	531	#define Node_Length(n)
	532	#define Set_Node_Offset_Length(node,offset,len)
	533	#define ProgLen(ri) ri->u.proglen
	534	#define SetProgLen(ri,x) ri->u.proglen = x
	535	#else
	536	#define ProgLen(ri) ri->u.offsets[0]
	537	#define SetProgLen(ri,x) ri->u.offsets[0] = x
	538	#define Set_Node_Offset_To_R(node,byte) STMT_START { \
	539	if (! SIZE_ONLY) { \
	540	MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \
	541	__LINE__, (int)(node), (int)(byte))); \
	542	if((node) < 0) { \
	543	Perl_croak(aTHX_ "value of node is %d in Offset macro", (int)(node)); \
	544	} else { \
	545	RExC_offsets[2*(node)-1] = (byte); \
	546	} \
	547	} \
	548	} STMT_END
	549
	550	#define Set_Node_Offset(node,byte) \
	551	Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
	552	#define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
	553
	554	#define Set_Node_Length_To_R(node,len) STMT_START { \
	555	if (! SIZE_ONLY) { \
	556	MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \
	557	__LINE__, (int)(node), (int)(len))); \
	558	if((node) < 0) { \
	559	Perl_croak(aTHX_ "value of node is %d in Length macro", (int)(node)); \
	560	} else { \
	561	RExC_offsets[2*(node)] = (len); \
	562	} \
	563	} \
	564	} STMT_END
	565
	566	#define Set_Node_Length(node,len) \
	567	Set_Node_Length_To_R((node)-RExC_emit_start, len)
	568	#define Set_Cur_Node_Length(len) Set_Node_Length(RExC_emit, len)
	569	#define Set_Node_Cur_Length(node) \
	570	Set_Node_Length(node, RExC_parse - parse_start)
	571
	572	/* Get offsets and lengths */
	573	#define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
	574	#define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
	575
	576	#define Set_Node_Offset_Length(node,offset,len) STMT_START { \
	577	Set_Node_Offset_To_R((node)-RExC_emit_start, (offset)); \
	578	Set_Node_Length_To_R((node)-RExC_emit_start, (len)); \
	579	} STMT_END
	580	#endif
	581
	582	#if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
	583	#define EXPERIMENTAL_INPLACESCAN
	584	#endif /RE_TRACK_PATTERN_OFFSETS/
	585
	586	#define DEBUG_STUDYDATA(str,data,depth) \
	587	DEBUG_OPTIMISE_MORE_r(if(data){ \
	588	PerlIO_printf(Perl_debug_log, \
	589	"%*s" str "Pos:%"IVdf"/%"IVdf \
	590	" Flags: 0x%"UVXf" Whilem_c: %"IVdf" Lcp: %"IVdf" %s", \
	591	(int)(depth)*2, "", \
	592	(IV)((data)->pos_min), \
	593	(IV)((data)->pos_delta), \
	594	(UV)((data)->flags), \
	595	(IV)((data)->whilem_c), \
	596	(IV)((data)->last_closep ? *((data)->last_closep) : -1), \
	597	is_inf ? "INF " : "" \
	598	); \
	599	if ((data)->last_found) \
	600	PerlIO_printf(Perl_debug_log, \
	601	"Last:'%s' %"IVdf":%"IVdf"/%"IVdf" %sFixed:'%s' @ %"IVdf \
	602	" %sFloat: '%s' @ %"IVdf"/%"IVdf"", \
	603	SvPVX_const((data)->last_found), \
	604	(IV)((data)->last_end), \
	605	(IV)((data)->last_start_min), \
	606	(IV)((data)->last_start_max), \
	607	((data)->longest && \
	608	(data)->longest==&((data)->longest_fixed)) ? "*" : "", \
	609	SvPVX_const((data)->longest_fixed), \
	610	(IV)((data)->offset_fixed), \
	611	((data)->longest && \
	612	(data)->longest==&((data)->longest_float)) ? "*" : "", \
	613	SvPVX_const((data)->longest_float), \
	614	(IV)((data)->offset_float_min), \
	615	(IV)((data)->offset_float_max) \
	616	); \
	617	PerlIO_printf(Perl_debug_log,"\n"); \
	618	});
	619
	620	static void clear_re(pTHX_ void *r);
	621
	622	/* Mark that we cannot extend a found fixed substring at this point.
	623	Update the longest found anchored substring and the longest found
	624	floating substrings if needed. */
	625
	626	STATIC void
	627	S_scan_commit(pTHX_ const RExC_state_t pRExC_state, scan_data_t data, I32 *minlenp, int is_inf)
	628	{
	629	const STRLEN l = CHR_SVLEN(data->last_found);
	630	const STRLEN old_l = CHR_SVLEN(*data->longest);
	631	GET_RE_DEBUG_FLAGS_DECL;
	632
	633	if ((l >= old_l) && ((l > old_l) \|\| (data->flags & SF_BEFORE_EOL))) {
	634	SvSetMagicSV(*data->longest, data->last_found);
	635	if (*data->longest == data->longest_fixed) {
	636	data->offset_fixed = l ? data->last_start_min : data->pos_min;
	637	if (data->flags & SF_BEFORE_EOL)
	638	data->flags
	639	\|= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
	640	else
	641	data->flags &= ~SF_FIX_BEFORE_EOL;
	642	data->minlen_fixed=minlenp;
	643	data->lookbehind_fixed=0;
	644	}
	645	else { /* data->longest == data->longest_float /
	646	data->offset_float_min = l ? data->last_start_min : data->pos_min;
	647	data->offset_float_max = (l
	648	? data->last_start_max
	649	: data->pos_min + data->pos_delta);
	650	if (is_inf \|\| (U32)data->offset_float_max > (U32)I32_MAX)
	651	data->offset_float_max = I32_MAX;
	652	if (data->flags & SF_BEFORE_EOL)
	653	data->flags
	654	\|= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
	655	else
	656	data->flags &= ~SF_FL_BEFORE_EOL;
	657	data->minlen_float=minlenp;
	658	data->lookbehind_float=0;
	659	}
	660	}
	661	SvCUR_set(data->last_found, 0);
	662	{
	663	SV * const sv = data->last_found;
	664	if (SvUTF8(sv) && SvMAGICAL(sv)) {
	665	MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
	666	if (mg)
	667	mg->mg_len = 0;
	668	}
	669	}
	670	data->last_end = -1;
	671	data->flags &= ~SF_BEFORE_EOL;
	672	DEBUG_STUDYDATA("commit: ",data,0);
	673	}
	674
	675	/* Can match anything (initialization) */
	676	STATIC void
	677	S_cl_anything(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	678	{
	679	ANYOF_CLASS_ZERO(cl);
	680	ANYOF_BITMAP_SETALL(cl);
	681	cl->flags = ANYOF_EOS\|ANYOF_UNICODE_ALL;
	682	if (LOC)
	683	cl->flags \|= ANYOF_LOCALE;
	684	}
	685
	686	/* Can match anything (initialization) */
	687	STATIC int
	688	S_cl_is_anything(const struct regnode_charclass_class *cl)
	689	{
	690	int value;
	691
	692	for (value = 0; value <= ANYOF_MAX; value += 2)
	693	if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
	694	return 1;
	695	if (!(cl->flags & ANYOF_UNICODE_ALL))
	696	return 0;
	697	if (!ANYOF_BITMAP_TESTALLSET((const void*)cl))
	698	return 0;
	699	return 1;
	700	}
	701
	702	/* Can match anything (initialization) */
	703	STATIC void
	704	S_cl_init(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	705	{
	706	Zero(cl, 1, struct regnode_charclass_class);
	707	cl->type = ANYOF;
	708	cl_anything(pRExC_state, cl);
	709	}
	710
	711	STATIC void
	712	S_cl_init_zero(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	713	{
	714	Zero(cl, 1, struct regnode_charclass_class);
	715	cl->type = ANYOF;
	716	cl_anything(pRExC_state, cl);
	717	if (LOC)
	718	cl->flags \|= ANYOF_LOCALE;
	719	}
	720
	721	/* 'And' a given class with another one. Can create false positives */
	722	/* We assume that cl is not inverted */
	723	STATIC void
	724	S_cl_and(struct regnode_charclass_class *cl,
	725	const struct regnode_charclass_class *and_with)
	726	{
	727
	728	assert(and_with->type == ANYOF);
	729	if (!(and_with->flags & ANYOF_CLASS)
	730	&& !(cl->flags & ANYOF_CLASS)
	731	&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	732	&& !(and_with->flags & ANYOF_FOLD)
	733	&& !(cl->flags & ANYOF_FOLD)) {
	734	int i;
	735
	736	if (and_with->flags & ANYOF_INVERT)
	737	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	738	cl->bitmap[i] &= ~and_with->bitmap[i];
	739	else
	740	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	741	cl->bitmap[i] &= and_with->bitmap[i];
	742	} /* XXXX: logic is complicated otherwise, leave it along for a moment. */
	743	if (!(and_with->flags & ANYOF_EOS))
	744	cl->flags &= ~ANYOF_EOS;
	745
	746	if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
	747	!(and_with->flags & ANYOF_INVERT)) {
	748	cl->flags &= ~ANYOF_UNICODE_ALL;
	749	cl->flags \|= ANYOF_UNICODE;
	750	ARG_SET(cl, ARG(and_with));
	751	}
	752	if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
	753	!(and_with->flags & ANYOF_INVERT))
	754	cl->flags &= ~ANYOF_UNICODE_ALL;
	755	if (!(and_with->flags & (ANYOF_UNICODE\|ANYOF_UNICODE_ALL)) &&
	756	!(and_with->flags & ANYOF_INVERT))
	757	cl->flags &= ~ANYOF_UNICODE;
	758	}
	759
	760	/* 'OR' a given class with another one. Can create false positives */
	761	/* We assume that cl is not inverted */
	762	STATIC void
	763	S_cl_or(const RExC_state_t pRExC_state, struct regnode_charclass_class cl, const struct regnode_charclass_class *or_with)
	764	{
	765	if (or_with->flags & ANYOF_INVERT) {
	766	/* We do not use
	767	* (B1 \| CL1) \| (!B2 & !CL2) = (B1 \| !B2 & !CL2) \| (CL1 \| (!B2 & !CL2))
	768	* <= (B1 \| !B2) \| (CL1 \| !CL2)
	769	* which is wasteful if CL2 is small, but we ignore CL2:
	770	* (B1 \| CL1) \| (!B2 & !CL2) <= (B1 \| CL1) \| !B2 = (B1 \| !B2) \| CL1
	771	* XXXX Can we handle case-fold? Unclear:
	772	* (OK1(i) \| OK1(i')) \| !(OK1(i) \| OK1(i')) =
	773	* (OK1(i) \| OK1(i')) \| (!OK1(i) & !OK1(i'))
	774	*/
	775	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	776	&& !(or_with->flags & ANYOF_FOLD)
	777	&& !(cl->flags & ANYOF_FOLD) ) {
	778	int i;
	779
	780	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	781	cl->bitmap[i] \|= ~or_with->bitmap[i];
	782	} /* XXXX: logic is complicated otherwise */
	783	else {
	784	cl_anything(pRExC_state, cl);
	785	}
	786	} else {
	787	/* (B1 \| CL1) \| (B2 \| CL2) = (B1 \| B2) \| (CL1 \| CL2)) */
	788	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	789	&& (!(or_with->flags & ANYOF_FOLD)
	790	\|\| (cl->flags & ANYOF_FOLD)) ) {
	791	int i;
	792
	793	/* OR char bitmap and class bitmap separately */
	794	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	795	cl->bitmap[i] \|= or_with->bitmap[i];
	796	if (or_with->flags & ANYOF_CLASS) {
	797	for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
	798	cl->classflags[i] \|= or_with->classflags[i];
	799	cl->flags \|= ANYOF_CLASS;
	800	}
	801	}
	802	else { /* XXXX: logic is complicated, leave it along for a moment. */
	803	cl_anything(pRExC_state, cl);
	804	}
	805	}
	806	if (or_with->flags & ANYOF_EOS)
	807	cl->flags \|= ANYOF_EOS;
	808
	809	if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
	810	ARG(cl) != ARG(or_with)) {
	811	cl->flags \|= ANYOF_UNICODE_ALL;
	812	cl->flags &= ~ANYOF_UNICODE;
	813	}
	814	if (or_with->flags & ANYOF_UNICODE_ALL) {
	815	cl->flags \|= ANYOF_UNICODE_ALL;
	816	cl->flags &= ~ANYOF_UNICODE;
	817	}
	818	}
	819
	820	#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
	821	#define TRIE_LIST_CUR(state) ( TRIE_LIST_ITEM( state, 0 ).forid )
	822	#define TRIE_LIST_LEN(state) ( TRIE_LIST_ITEM( state, 0 ).newstate )
	823	#define TRIE_LIST_USED(idx) ( trie->states[state].trans.list ? (TRIE_LIST_CUR( idx ) - 1) : 0 )
	824
	825
	826	#ifdef DEBUGGING
	827	/*
	828	dump_trie(trie,widecharmap,revcharmap)
	829	dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
	830	dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
	831
	832	These routines dump out a trie in a somewhat readable format.
	833	The _interim_ variants are used for debugging the interim
	834	tables that are used to generate the final compressed
	835	representation which is what dump_trie expects.
	836
	837	Part of the reason for their existance is to provide a form
	838	of documentation as to how the different representations function.
	839
	840	*/
	841
	842	/*
	843	Dumps the final compressed table form of the trie to Perl_debug_log.
	844	Used for debugging make_trie().
	845	*/
	846
	847	STATIC void
	848	S_dump_trie(pTHX_ const struct _reg_trie_data trie, HV widecharmap,
	849	AV *revcharmap, U32 depth)
	850	{
	851	U32 state;
	852	SV *sv=sv_newmortal();
	853	int colwidth= widecharmap ? 6 : 4;
	854	GET_RE_DEBUG_FLAGS_DECL;
	855
	856
	857	PerlIO_printf( Perl_debug_log, "%*sChar : %-6s%-6s%-4s ",
	858	(int)depth * 2 + 2,"",
	859	"Match","Base","Ofs" );
	860
	861	for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
	862	SV ** const tmp = av_fetch( revcharmap, state, 0);
	863	if ( tmp ) {
	864	PerlIO_printf( Perl_debug_log, "%*s",
	865	colwidth,
	866	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), colwidth,
	867	PL_colors[0], PL_colors[1],
	868	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	869	PERL_PV_ESCAPE_FIRSTCHAR
	870	)
	871	);
	872	}
	873	}
	874	PerlIO_printf( Perl_debug_log, "\n%*sState\|-----------------------",
	875	(int)depth * 2 + 2,"");
	876
	877	for( state = 0 ; state < trie->uniquecharcount ; state++ )
	878	PerlIO_printf( Perl_debug_log, "%.*s", colwidth, "--------");
	879	PerlIO_printf( Perl_debug_log, "\n");
	880
	881	for( state = 1 ; state < trie->statecount ; state++ ) {
	882	const U32 base = trie->states[ state ].trans.base;
	883
	884	PerlIO_printf( Perl_debug_log, "%s#%4"UVXf"\|", (int)depth 2 + 2,"", (UV)state);
	885
	886	if ( trie->states[ state ].wordnum ) {
	887	PerlIO_printf( Perl_debug_log, " W%4X", trie->states[ state ].wordnum );
	888	} else {
	889	PerlIO_printf( Perl_debug_log, "%6s", "" );
	890	}
	891
	892	PerlIO_printf( Perl_debug_log, " @%4"UVXf" ", (UV)base );
	893
	894	if ( base ) {
	895	U32 ofs = 0;
	896
	897	while( ( base + ofs < trie->uniquecharcount ) \|\|
	898	( base + ofs - trie->uniquecharcount < trie->lasttrans
	899	&& trie->trans[ base + ofs - trie->uniquecharcount ].check != state))
	900	ofs++;
	901
	902	PerlIO_printf( Perl_debug_log, "+%2"UVXf"[ ", (UV)ofs);
	903
	904	for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
	905	if ( ( base + ofs >= trie->uniquecharcount ) &&
	906	( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
	907	trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
	908	{
	909	PerlIO_printf( Perl_debug_log, "%*"UVXf,
	910	colwidth,
	911	(UV)trie->trans[ base + ofs - trie->uniquecharcount ].next );
	912	} else {
	913	PerlIO_printf( Perl_debug_log, "%*s",colwidth," ." );
	914	}
	915	}
	916
	917	PerlIO_printf( Perl_debug_log, "]");
	918
	919	}
	920	PerlIO_printf( Perl_debug_log, "\n" );
	921	}
	922	}
	923	/*
	924	Dumps a fully constructed but uncompressed trie in list form.
	925	List tries normally only are used for construction when the number of
	926	possible chars (trie->uniquecharcount) is very high.
	927	Used for debugging make_trie().
	928	*/
	929	STATIC void
	930	S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
	931	HV widecharmap, AV revcharmap, U32 next_alloc,
	932	U32 depth)
	933	{
	934	U32 state;
	935	SV *sv=sv_newmortal();
	936	int colwidth= widecharmap ? 6 : 4;
	937	GET_RE_DEBUG_FLAGS_DECL;
	938	/* print out the table precompression. */
	939	PerlIO_printf( Perl_debug_log, "%sState :Word \| Transition Data\n%s%s",
	940	(int)depth * 2 + 2,"", (int)depth * 2 + 2,"",
	941	"------:-----+-----------------\n" );
	942
	943	for( state=1 ; state < next_alloc ; state ++ ) {
	944	U16 charid;
	945
	946	PerlIO_printf( Perl_debug_log, "%*s %4"UVXf" :",
	947	(int)depth * 2 + 2,"", (UV)state );
	948	if ( ! trie->states[ state ].wordnum ) {
	949	PerlIO_printf( Perl_debug_log, "%5s\| ","");
	950	} else {
	951	PerlIO_printf( Perl_debug_log, "W%4x\| ",
	952	trie->states[ state ].wordnum
	953	);
	954	}
	955	for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
	956	SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
	957	if ( tmp ) {
	958	PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" \| ",
	959	colwidth,
	960	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), colwidth,
	961	PL_colors[0], PL_colors[1],
	962	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	963	PERL_PV_ESCAPE_FIRSTCHAR
	964	) ,
	965	TRIE_LIST_ITEM(state,charid).forid,
	966	(UV)TRIE_LIST_ITEM(state,charid).newstate
	967	);
	968	if (!(charid % 10))
	969	PerlIO_printf(Perl_debug_log, "\n%*s\| ",
	970	(int)((depth * 2) + 14), "");
	971	}
	972	}
	973	PerlIO_printf( Perl_debug_log, "\n");
	974	}
	975	}
	976
	977	/*
	978	Dumps a fully constructed but uncompressed trie in table form.
	979	This is the normal DFA style state transition table, with a few
	980	twists to facilitate compression later.
	981	Used for debugging make_trie().
	982	*/
	983	STATIC void
	984	S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
	985	HV widecharmap, AV revcharmap, U32 next_alloc,
	986	U32 depth)
	987	{
	988	U32 state;
	989	U16 charid;
	990	SV *sv=sv_newmortal();
	991	int colwidth= widecharmap ? 6 : 4;
	992	GET_RE_DEBUG_FLAGS_DECL;
	993
	994	/*
	995	print out the table precompression so that we can do a visual check
	996	that they are identical.
	997	*/
	998
	999	PerlIO_printf( Perl_debug_log, "%sChar : ",(int)depth 2 + 2,"" );
	1000
	1001	for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
	1002	SV ** const tmp = av_fetch( revcharmap, charid, 0);
	1003	if ( tmp ) {
	1004	PerlIO_printf( Perl_debug_log, "%*s",
	1005	colwidth,
	1006	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), colwidth,
	1007	PL_colors[0], PL_colors[1],
	1008	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	1009	PERL_PV_ESCAPE_FIRSTCHAR
	1010	)
	1011	);
	1012	}
	1013	}
	1014
	1015	PerlIO_printf( Perl_debug_log, "\n%sState+-",(int)depth 2 + 2,"" );
	1016
	1017	for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
	1018	PerlIO_printf( Perl_debug_log, "%.*s", colwidth,"--------");
	1019	}
	1020
	1021	PerlIO_printf( Perl_debug_log, "\n" );
	1022
	1023	for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
	1024
	1025	PerlIO_printf( Perl_debug_log, "%*s%4"UVXf" : ",
	1026	(int)depth * 2 + 2,"",
	1027	(UV)TRIE_NODENUM( state ) );
	1028
	1029	for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
	1030	UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
	1031	if (v)
	1032	PerlIO_printf( Perl_debug_log, "%*"UVXf, colwidth, v );
	1033	else
	1034	PerlIO_printf( Perl_debug_log, "%*s", colwidth, "." );
	1035	}
	1036	if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
	1037	PerlIO_printf( Perl_debug_log, " (%4"UVXf")\n", (UV)trie->trans[ state ].check );
	1038	} else {
	1039	PerlIO_printf( Perl_debug_log, " (%4"UVXf") W%4X\n", (UV)trie->trans[ state ].check,
	1040	trie->states[ TRIE_NODENUM( state ) ].wordnum );
	1041	}
	1042	}
	1043	}
	1044
	1045	#endif
	1046
	1047	/* make_trie(startbranch,first,last,tail,word_count,flags,depth)
	1048	startbranch: the first branch in the whole branch sequence
	1049	first : start branch of sequence of branch-exact nodes.
	1050	May be the same as startbranch
	1051	last : Thing following the last branch.
	1052	May be the same as tail.
	1053	tail : item following the branch sequence
	1054	count : words in the sequence
	1055	flags : currently the OP() type we will be building one of /EXACT(\|F\|Fl)/
	1056	depth : indent depth
	1057
	1058	Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
	1059
	1060	A trie is an N'ary tree where the branches are determined by digital
	1061	decomposition of the key. IE, at the root node you look up the 1st character and
	1062	follow that branch repeat until you find the end of the branches. Nodes can be
	1063	marked as "accepting" meaning they represent a complete word. Eg:
	1064
	1065	/he\|she\|his\|hers/
	1066
	1067	would convert into the following structure. Numbers represent states, letters
	1068	following numbers represent valid transitions on the letter from that state, if
	1069	the number is in square brackets it represents an accepting state, otherwise it
	1070	will be in parenthesis.
	1071
	1072	+-h->+-e->[3]-+-r->(8)-+-s->[9]
	1073	\| \|
	1074	\| (2)
	1075	\| \|
	1076	(1) +-i->(6)-+-s->[7]
	1077	\|
	1078	+-s->(3)-+-h->(4)-+-e->[5]
	1079
	1080	Accept Word Mapping: 3=>1 (he),5=>2 (she), 7=>3 (his), 9=>4 (hers)
	1081
	1082	This shows that when matching against the string 'hers' we will begin at state 1
	1083	read 'h' and move to state 2, read 'e' and move to state 3 which is accepting,
	1084	then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
	1085	is also accepting. Thus we know that we can match both 'he' and 'hers' with a
	1086	single traverse. We store a mapping from accepting to state to which word was
	1087	matched, and then when we have multiple possibilities we try to complete the
	1088	rest of the regex in the order in which they occured in the alternation.
	1089
	1090	The only prior NFA like behaviour that would be changed by the TRIE support is
	1091	the silent ignoring of duplicate alternations which are of the form:
	1092
	1093	/ (DUPE\|DUPE) X? (?{ ... }) Y /x
	1094
	1095	Thus EVAL blocks follwing a trie may be called a different number of times with
	1096	and without the optimisation. With the optimisations dupes will be silently
	1097	ignored. This inconsistant behaviour of EVAL type nodes is well established as
	1098	the following demonstrates:
	1099
	1100	'words'=~/(word\|word\|word)(?{ print $1 })[xyz]/
	1101
	1102	which prints out 'word' three times, but
	1103
	1104	'words'=~/(word\|word\|word)(?{ print $1 })S/
	1105
	1106	which doesnt print it out at all. This is due to other optimisations kicking in.
	1107
	1108	Example of what happens on a structural level:
	1109
	1110	The regexp /(ac\|ad\|ab)+/ will produce the folowing debug output:
	1111
	1112	1: CURLYM[1] {1,32767}(18)
	1113	5: BRANCH(8)
	1114	6: EXACT <ac>(16)
	1115	8: BRANCH(11)
	1116	9: EXACT <ad>(16)
	1117	11: BRANCH(14)
	1118	12: EXACT <ab>(16)
	1119	16: SUCCEED(0)
	1120	17: NOTHING(18)
	1121	18: END(0)
	1122
	1123	This would be optimizable with startbranch=5, first=5, last=16, tail=16
	1124	and should turn into:
	1125
	1126	1: CURLYM[1] {1,32767}(18)
	1127	5: TRIE(16)
	1128	[Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
	1129	<ac>
	1130	<ad>
	1131	<ab>
	1132	16: SUCCEED(0)
	1133	17: NOTHING(18)
	1134	18: END(0)
	1135
	1136	Cases where tail != last would be like /(?foo\|bar)baz/:
	1137
	1138	1: BRANCH(4)
	1139	2: EXACT <foo>(8)
	1140	4: BRANCH(7)
	1141	5: EXACT <bar>(8)
	1142	7: TAIL(8)
	1143	8: EXACT <baz>(10)
	1144	10: END(0)
	1145
	1146	which would be optimizable with startbranch=1, first=1, last=7, tail=8
	1147	and would end up looking like:
	1148
	1149	1: TRIE(8)
	1150	[Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
	1151	<foo>
	1152	<bar>
	1153	7: TAIL(8)
	1154	8: EXACT <baz>(10)
	1155	10: END(0)
	1156
	1157	d = uvuni_to_utf8_flags(d, uv, 0);
	1158
	1159	is the recommended Unicode-aware way of saying
	1160
	1161	*(d++) = uv;
	1162	*/
	1163
	1164	#define TRIE_STORE_REVCHAR \
	1165	STMT_START { \
	1166	SV *tmp = newSVpvs(""); \
	1167	if (UTF) SvUTF8_on(tmp); \
	1168	Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc ); \
	1169	av_push( revcharmap, tmp ); \
	1170	} STMT_END
	1171
	1172	#define TRIE_READ_CHAR STMT_START { \
	1173	wordlen++; \
	1174	if ( UTF ) { \
	1175	if ( folder ) { \
	1176	if ( foldlen > 0 ) { \
	1177	uvc = utf8n_to_uvuni( scan, UTF8_MAXLEN, &len, uniflags ); \
	1178	foldlen -= len; \
	1179	scan += len; \
	1180	len = 0; \
	1181	} else { \
	1182	uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
	1183	uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
	1184	foldlen -= UNISKIP( uvc ); \
	1185	scan = foldbuf + UNISKIP( uvc ); \
	1186	} \
	1187	} else { \
	1188	uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
	1189	} \
	1190	} else { \
	1191	uvc = (U32)*uc; \
	1192	len = 1; \
	1193	} \
	1194	} STMT_END
	1195
	1196
	1197
	1198	#define TRIE_LIST_PUSH(state,fid,ns) STMT_START { \
	1199	if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) { \
	1200	U32 ging = TRIE_LIST_LEN( state ) *= 2; \
	1201	Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
	1202	} \
	1203	TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid; \
	1204	TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns; \
	1205	TRIE_LIST_CUR( state )++; \
	1206	} STMT_END
	1207
	1208	#define TRIE_LIST_NEW(state) STMT_START { \
	1209	Newxz( trie->states[ state ].trans.list, \
	1210	4, reg_trie_trans_le ); \
	1211	TRIE_LIST_CUR( state ) = 1; \
	1212	TRIE_LIST_LEN( state ) = 4; \
	1213	} STMT_END
	1214
	1215	#define TRIE_HANDLE_WORD(state) STMT_START { \
	1216	U16 dupe= trie->states[ state ].wordnum; \
	1217	regnode * const noper_next = regnext( noper ); \
	1218	\
	1219	if (trie->wordlen) \
	1220	trie->wordlen[ curword ] = wordlen; \
	1221	DEBUG_r({ \
	1222	/* store the word for dumping */ \
	1223	SV* tmp; \
	1224	if (OP(noper) != NOTHING) \
	1225	tmp = newSVpvn(STRING(noper), STR_LEN(noper)); \
	1226	else \
	1227	tmp = newSVpvn( "", 0 ); \
	1228	if ( UTF ) SvUTF8_on( tmp ); \
	1229	av_push( trie_words, tmp ); \
	1230	}); \
	1231	\
	1232	curword++; \
	1233	\
	1234	if ( noper_next < tail ) { \
	1235	if (!trie->jump) \
	1236	trie->jump = (U16 *) PerlMemShared_calloc( word_count + 1, sizeof(U16) ); \
	1237	trie->jump[curword] = (U16)(noper_next - convert); \
	1238	if (!jumper) \
	1239	jumper = noper_next; \
	1240	if (!nextbranch) \
	1241	nextbranch= regnext(cur); \
	1242	} \
	1243	\
	1244	if ( dupe ) { \
	1245	/* So it's a dupe. This means we need to maintain a */\
	1246	/* linked-list from the first to the next. */\
	1247	/* we only allocate the nextword buffer when there */\
	1248	/* a dupe, so first time we have to do the allocation */\
	1249	if (!trie->nextword) \
	1250	trie->nextword = (U16 *) \
	1251	PerlMemShared_calloc( word_count + 1, sizeof(U16)); \
	1252	while ( trie->nextword[dupe] ) \
	1253	dupe= trie->nextword[dupe]; \
	1254	trie->nextword[dupe]= curword; \
	1255	} else { \
	1256	/* we haven't inserted this word yet. */ \
	1257	trie->states[ state ].wordnum = curword; \
	1258	} \
	1259	} STMT_END
	1260
	1261
	1262	#define TRIE_TRANS_STATE(state,base,ucharcount,charid,special) \
	1263	( ( base + charid >= ucharcount \
	1264	&& base + charid < ubound \
	1265	&& state == trie->trans[ base - ucharcount + charid ].check \
	1266	&& trie->trans[ base - ucharcount + charid ].next ) \
	1267	? trie->trans[ base - ucharcount + charid ].next \
	1268	: ( state==1 ? special : 0 ) \
	1269	)
	1270
	1271	#define MADE_TRIE 1
	1272	#define MADE_JUMP_TRIE 2
	1273	#define MADE_EXACT_TRIE 4
	1274
	1275	STATIC I32
	1276	S_make_trie(pTHX_ RExC_state_t pRExC_state, regnode startbranch, regnode first, regnode last, regnode *tail, U32 word_count, U32 flags, U32 depth)
	1277	{
	1278	dVAR;
	1279	/* first pass, loop through and scan words */
	1280	reg_trie_data *trie;
	1281	HV *widecharmap = NULL;
	1282	AV *revcharmap = newAV();
	1283	regnode *cur;
	1284	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	1285	STRLEN len = 0;
	1286	UV uvc = 0;
	1287	U16 curword = 0;
	1288	U32 next_alloc = 0;
	1289	regnode *jumper = NULL;
	1290	regnode *nextbranch = NULL;
	1291	regnode *convert = NULL;
	1292	/* we just use folder as a flag in utf8 */
	1293	const U8 * const folder = ( flags == EXACTF
	1294	? PL_fold
	1295	: ( flags == EXACTFL
	1296	? PL_fold_locale
	1297	: NULL
	1298	)
	1299	);
	1300
	1301	#ifdef DEBUGGING
	1302	const U32 data_slot = add_data( pRExC_state, 4, "tuuu" );
	1303	AV *trie_words = NULL;
	1304	/* along with revcharmap, this only used during construction but both are
	1305	* useful during debugging so we store them in the struct when debugging.
	1306	*/
	1307	#else
	1308	const U32 data_slot = add_data( pRExC_state, 2, "tu" );
	1309	STRLEN trie_charcount=0;
	1310	#endif
	1311	SV *re_trie_maxbuff;
	1312	GET_RE_DEBUG_FLAGS_DECL;
	1313	#ifndef DEBUGGING
	1314	PERL_UNUSED_ARG(depth);
	1315	#endif
	1316
	1317	trie = (reg_trie_data *) PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
	1318	trie->refcount = 1;
	1319	trie->startstate = 1;
	1320	trie->wordcount = word_count;
	1321	RExC_rxi->data->data[ data_slot ] = (void*)trie;
	1322	trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
	1323	if (!(UTF && folder))
	1324	trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
	1325	DEBUG_r({
	1326	trie_words = newAV();
	1327	});
	1328
	1329	re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
	1330	if (!SvIOK(re_trie_maxbuff)) {
	1331	sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
	1332	}
	1333	DEBUG_OPTIMISE_r({
	1334	PerlIO_printf( Perl_debug_log,
	1335	"%*smake_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
	1336	(int)depth * 2 + 2, "",
	1337	REG_NODE_NUM(startbranch),REG_NODE_NUM(first),
	1338	REG_NODE_NUM(last), REG_NODE_NUM(tail),
	1339	(int)depth);
	1340	});
	1341
	1342	/* Find the node we are going to overwrite */
	1343	if ( first == startbranch && OP( last ) != BRANCH ) {
	1344	/* whole branch chain */
	1345	convert = first;
	1346	} else {
	1347	/* branch sub-chain */
	1348	convert = NEXTOPER( first );
	1349	}
	1350
	1351	/* -- First loop and Setup --
	1352
	1353	We first traverse the branches and scan each word to determine if it
	1354	contains widechars, and how many unique chars there are, this is
	1355	important as we have to build a table with at least as many columns as we
	1356	have unique chars.
	1357
	1358	We use an array of integers to represent the character codes 0..255
	1359	(trie->charmap) and we use a an HV* to store unicode characters. We use the
	1360	native representation of the character value as the key and IV's for the
	1361	coded index.
	1362
	1363	TODO If we keep track of how many times each character is used we can
	1364	remap the columns so that the table compression later on is more
	1365	efficient in terms of memory by ensuring most common value is in the
	1366	middle and the least common are on the outside. IMO this would be better
	1367	than a most to least common mapping as theres a decent chance the most
	1368	common letter will share a node with the least common, meaning the node
	1369	will not be compressable. With a middle is most common approach the worst
	1370	case is when we have the least common nodes twice.
	1371
	1372	*/
	1373
	1374	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	1375	regnode * const noper = NEXTOPER( cur );
	1376	const U8 uc = (U8)STRING( noper );
	1377	const U8 * const e = uc + STR_LEN( noper );
	1378	STRLEN foldlen = 0;
	1379	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1380	const U8 scan = (U8)NULL;
	1381	U32 wordlen = 0; /* required init */
	1382	STRLEN chars = 0;
	1383	bool set_bit = trie->bitmap ? 1 : 0; /store the first char in the bitmap?/
	1384
	1385	if (OP(noper) == NOTHING) {
	1386	trie->minlen= 0;
	1387	continue;
	1388	}
	1389	if ( set_bit ) /* bitmap only alloced when !(UTF&&Folding) */
	1390	TRIE_BITMAP_SET(trie,uc); / store the raw first byte
	1391	regardless of encoding */
	1392
	1393	for ( ; uc < e ; uc += len ) {
	1394	TRIE_CHARCOUNT(trie)++;
	1395	TRIE_READ_CHAR;
	1396	chars++;
	1397	if ( uvc < 256 ) {
	1398	if ( !trie->charmap[ uvc ] ) {
	1399	trie->charmap[ uvc ]=( ++trie->uniquecharcount );
	1400	if ( folder )
	1401	trie->charmap[ folder[ uvc ] ] = trie->charmap[ uvc ];
	1402	TRIE_STORE_REVCHAR;
	1403	}
	1404	if ( set_bit ) {
	1405	/* store the codepoint in the bitmap, and if its ascii
	1406	also store its folded equivelent. */
	1407	TRIE_BITMAP_SET(trie,uvc);
	1408	if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]);
	1409	set_bit = 0; /* We've done our bit :-) */
	1410	}
	1411	} else {
	1412	SV** svpp;
	1413	if ( !widecharmap )
	1414	widecharmap = newHV();
	1415
	1416	svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
	1417
	1418	if ( !svpp )
	1419	Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
	1420
	1421	if ( !SvTRUE( *svpp ) ) {
	1422	sv_setiv( *svpp, ++trie->uniquecharcount );
	1423	TRIE_STORE_REVCHAR;
	1424	}
	1425	}
	1426	}
	1427	if( cur == first ) {
	1428	trie->minlen=chars;
	1429	trie->maxlen=chars;
	1430	} else if (chars < trie->minlen) {
	1431	trie->minlen=chars;
	1432	} else if (chars > trie->maxlen) {
	1433	trie->maxlen=chars;
	1434	}
	1435
	1436	} /* end first pass */
	1437	DEBUG_TRIE_COMPILE_r(
	1438	PerlIO_printf( Perl_debug_log, "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
	1439	(int)depth * 2 + 2,"",
	1440	( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
	1441	(int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
	1442	(int)trie->minlen, (int)trie->maxlen )
	1443	);
	1444	trie->wordlen = (U32 *) PerlMemShared_calloc( word_count, sizeof(U32) );
	1445
	1446	/*
	1447	We now know what we are dealing with in terms of unique chars and
	1448	string sizes so we can calculate how much memory a naive
	1449	representation using a flat table will take. If it's over a reasonable
	1450	limit (as specified by ${^RE_TRIE_MAXBUF}) we use a more memory
	1451	conservative but potentially much slower representation using an array
	1452	of lists.
	1453
	1454	At the end we convert both representations into the same compressed
	1455	form that will be used in regexec.c for matching with. The latter
	1456	is a form that cannot be used to construct with but has memory
	1457	properties similar to the list form and access properties similar
	1458	to the table form making it both suitable for fast searches and
	1459	small enough that its feasable to store for the duration of a program.
	1460
	1461	See the comment in the code where the compressed table is produced
	1462	inplace from the flat tabe representation for an explanation of how
	1463	the compression works.
	1464
	1465	*/
	1466
	1467
	1468	if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1) > SvIV(re_trie_maxbuff) ) {
	1469	/*
	1470	Second Pass -- Array Of Lists Representation
	1471
	1472	Each state will be represented by a list of charid:state records
	1473	(reg_trie_trans_le) the first such element holds the CUR and LEN
	1474	points of the allocated array. (See defines above).
	1475
	1476	We build the initial structure using the lists, and then convert
	1477	it into the compressed table form which allows faster lookups
	1478	(but cant be modified once converted).
	1479	*/
	1480
	1481	STRLEN transcount = 1;
	1482
	1483	DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
	1484	"%*sCompiling trie using list compiler\n",
	1485	(int)depth * 2 + 2, ""));
	1486
	1487	trie->states = (reg_trie_state *)
	1488	PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
	1489	sizeof(reg_trie_state) );
	1490	TRIE_LIST_NEW(1);
	1491	next_alloc = 2;
	1492
	1493	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	1494
	1495	regnode * const noper = NEXTOPER( cur );
	1496	U8 uc = (U8)STRING( noper );
	1497	const U8 * const e = uc + STR_LEN( noper );
	1498	U32 state = 1; /* required init */
	1499	U16 charid = 0; /* sanity init */
	1500	U8 scan = (U8)NULL; /* sanity init */
	1501	STRLEN foldlen = 0; /* required init */
	1502	U32 wordlen = 0; /* required init */
	1503	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1504
	1505	if (OP(noper) != NOTHING) {
	1506	for ( ; uc < e ; uc += len ) {
	1507
	1508	TRIE_READ_CHAR;
	1509
	1510	if ( uvc < 256 ) {
	1511	charid = trie->charmap[ uvc ];
	1512	} else {
	1513	SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
	1514	if ( !svpp ) {
	1515	charid = 0;
	1516	} else {
	1517	charid=(U16)SvIV( *svpp );
	1518	}
	1519	}
	1520	/* charid is now 0 if we dont know the char read, or nonzero if we do */
	1521	if ( charid ) {
	1522
	1523	U16 check;
	1524	U32 newstate = 0;
	1525
	1526	charid--;
	1527	if ( !trie->states[ state ].trans.list ) {
	1528	TRIE_LIST_NEW( state );
	1529	}
	1530	for ( check = 1; check <= TRIE_LIST_USED( state ); check++ ) {
	1531	if ( TRIE_LIST_ITEM( state, check ).forid == charid ) {
	1532	newstate = TRIE_LIST_ITEM( state, check ).newstate;
	1533	break;
	1534	}
	1535	}
	1536	if ( ! newstate ) {
	1537	newstate = next_alloc++;
	1538	TRIE_LIST_PUSH( state, charid, newstate );
	1539	transcount++;
	1540	}
	1541	state = newstate;
	1542	} else {
	1543	Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
	1544	}
	1545	}
	1546	}
	1547	TRIE_HANDLE_WORD(state);
	1548
	1549	} /* end second pass */
	1550
	1551	/* next alloc is the NEXT state to be allocated */
	1552	trie->statecount = next_alloc;
	1553	trie->states = (reg_trie_state *)
	1554	PerlMemShared_realloc( trie->states,
	1555	next_alloc
	1556	* sizeof(reg_trie_state) );
	1557
	1558	/* and now dump it out before we compress it */
	1559	DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
	1560	revcharmap, next_alloc,
	1561	depth+1)
	1562	);
	1563
	1564	trie->trans = (reg_trie_trans *)
	1565	PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
	1566	{
	1567	U32 state;
	1568	U32 tp = 0;
	1569	U32 zp = 0;
	1570
	1571
	1572	for( state=1 ; state < next_alloc ; state ++ ) {
	1573	U32 base=0;
	1574
	1575	/*
	1576	DEBUG_TRIE_COMPILE_MORE_r(
	1577	PerlIO_printf( Perl_debug_log, "tp: %d zp: %d ",tp,zp)
	1578	);
	1579	*/
	1580
	1581	if (trie->states[state].trans.list) {
	1582	U16 minid=TRIE_LIST_ITEM( state, 1).forid;
	1583	U16 maxid=minid;
	1584	U16 idx;
	1585
	1586	for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
	1587	const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
	1588	if ( forid < minid ) {
	1589	minid=forid;
	1590	} else if ( forid > maxid ) {
	1591	maxid=forid;
	1592	}
	1593	}
	1594	if ( transcount < tp + maxid - minid + 1) {
	1595	transcount *= 2;
	1596	trie->trans = (reg_trie_trans *)
	1597	PerlMemShared_realloc( trie->trans,
	1598	transcount
	1599	* sizeof(reg_trie_trans) );
	1600	Zero( trie->trans + (transcount / 2), transcount / 2 , reg_trie_trans );
	1601	}
	1602	base = trie->uniquecharcount + tp - minid;
	1603	if ( maxid == minid ) {
	1604	U32 set = 0;
	1605	for ( ; zp < tp ; zp++ ) {
	1606	if ( ! trie->trans[ zp ].next ) {
	1607	base = trie->uniquecharcount + zp - minid;
	1608	trie->trans[ zp ].next = TRIE_LIST_ITEM( state, 1).newstate;
	1609	trie->trans[ zp ].check = state;
	1610	set = 1;
	1611	break;
	1612	}
	1613	}
	1614	if ( !set ) {
	1615	trie->trans[ tp ].next = TRIE_LIST_ITEM( state, 1).newstate;
	1616	trie->trans[ tp ].check = state;
	1617	tp++;
	1618	zp = tp;
	1619	}
	1620	} else {
	1621	for ( idx=1; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
	1622	const U32 tid = base - trie->uniquecharcount + TRIE_LIST_ITEM( state, idx ).forid;
	1623	trie->trans[ tid ].next = TRIE_LIST_ITEM( state, idx ).newstate;
	1624	trie->trans[ tid ].check = state;
	1625	}
	1626	tp += ( maxid - minid + 1 );
	1627	}
	1628	Safefree(trie->states[ state ].trans.list);
	1629	}
	1630	/*
	1631	DEBUG_TRIE_COMPILE_MORE_r(
	1632	PerlIO_printf( Perl_debug_log, " base: %d\n",base);
	1633	);
	1634	*/
	1635	trie->states[ state ].trans.base=base;
	1636	}
	1637	trie->lasttrans = tp + 1;
	1638	}
	1639	} else {
	1640	/*
	1641	Second Pass -- Flat Table Representation.
	1642
	1643	we dont use the 0 slot of either trans[] or states[] so we add 1 to each.
	1644	We know that we will need Charcount+1 trans at most to store the data
	1645	(one row per char at worst case) So we preallocate both structures
	1646	assuming worst case.
	1647
	1648	We then construct the trie using only the .next slots of the entry
	1649	structs.
	1650
	1651	We use the .check field of the first entry of the node temporarily to
	1652	make compression both faster and easier by keeping track of how many non
	1653	zero fields are in the node.
	1654
	1655	Since trans are numbered from 1 any 0 pointer in the table is a FAIL
	1656	transition.
	1657
	1658	There are two terms at use here: state as a TRIE_NODEIDX() which is a
	1659	number representing the first entry of the node, and state as a
	1660	TRIE_NODENUM() which is the trans number. state 1 is TRIE_NODEIDX(1) and
	1661	TRIE_NODENUM(1), state 2 is TRIE_NODEIDX(2) and TRIE_NODENUM(3) if there
	1662	are 2 entrys per node. eg:
	1663
	1664	A B A B
	1665	1. 2 4 1. 3 7
	1666	2. 0 3 3. 0 5
	1667	3. 0 0 5. 0 0
	1668	4. 0 0 7. 0 0
	1669
	1670	The table is internally in the right hand, idx form. However as we also
	1671	have to deal with the states array which is indexed by nodenum we have to
	1672	use TRIE_NODENUM() to convert.
	1673
	1674	*/
	1675	DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
	1676	"%*sCompiling trie using table compiler\n",
	1677	(int)depth * 2 + 2, ""));
	1678
	1679	trie->trans = (reg_trie_trans *)
	1680	PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
	1681	* trie->uniquecharcount + 1,
	1682	sizeof(reg_trie_trans) );
	1683	trie->states = (reg_trie_state *)
	1684	PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
	1685	sizeof(reg_trie_state) );
	1686	next_alloc = trie->uniquecharcount + 1;
	1687
	1688
	1689	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	1690
	1691	regnode * const noper = NEXTOPER( cur );
	1692	const U8 uc = (U8)STRING( noper );
	1693	const U8 * const e = uc + STR_LEN( noper );
	1694
	1695	U32 state = 1; /* required init */
	1696
	1697	U16 charid = 0; /* sanity init */
	1698	U32 accept_state = 0; /* sanity init */
	1699	U8 scan = (U8)NULL; /* sanity init */
	1700
	1701	STRLEN foldlen = 0; /* required init */
	1702	U32 wordlen = 0; /* required init */
	1703	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1704
	1705	if ( OP(noper) != NOTHING ) {
	1706	for ( ; uc < e ; uc += len ) {
	1707
	1708	TRIE_READ_CHAR;
	1709
	1710	if ( uvc < 256 ) {
	1711	charid = trie->charmap[ uvc ];
	1712	} else {
	1713	SV* const * const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
	1714	charid = svpp ? (U16)SvIV(*svpp) : 0;
	1715	}
	1716	if ( charid ) {
	1717	charid--;
	1718	if ( !trie->trans[ state + charid ].next ) {
	1719	trie->trans[ state + charid ].next = next_alloc;
	1720	trie->trans[ state ].check++;
	1721	next_alloc += trie->uniquecharcount;
	1722	}
	1723	state = trie->trans[ state + charid ].next;
	1724	} else {
	1725	Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
	1726	}
	1727	/* charid is now 0 if we dont know the char read, or nonzero if we do */
	1728	}
	1729	}
	1730	accept_state = TRIE_NODENUM( state );
	1731	TRIE_HANDLE_WORD(accept_state);
	1732
	1733	} /* end second pass */
	1734
	1735	/* and now dump it out before we compress it */
	1736	DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
	1737	revcharmap,
	1738	next_alloc, depth+1));
	1739
	1740	{
	1741	/*
	1742	* Inplace compress the table.*
	1743
	1744	For sparse data sets the table constructed by the trie algorithm will
	1745	be mostly 0/FAIL transitions or to put it another way mostly empty.
	1746	(Note that leaf nodes will not contain any transitions.)
	1747
	1748	This algorithm compresses the tables by eliminating most such
	1749	transitions, at the cost of a modest bit of extra work during lookup:
	1750
	1751	- Each states[] entry contains a .base field which indicates the
	1752	index in the state[] array wheres its transition data is stored.
	1753
	1754	- If .base is 0 there are no valid transitions from that node.
	1755
	1756	- If .base is nonzero then charid is added to it to find an entry in
	1757	the trans array.
	1758
	1759	-If trans[states[state].base+charid].check!=state then the
	1760	transition is taken to be a 0/Fail transition. Thus if there are fail
	1761	transitions at the front of the node then the .base offset will point
	1762	somewhere inside the previous nodes data (or maybe even into a node
	1763	even earlier), but the .check field determines if the transition is
	1764	valid.
	1765
	1766	XXX - wrong maybe?
	1767	The following process inplace converts the table to the compressed
	1768	table: We first do not compress the root node 1,and mark its all its
	1769	.check pointers as 1 and set its .base pointer as 1 as well. This
	1770	allows to do a DFA construction from the compressed table later, and
	1771	ensures that any .base pointers we calculate later are greater than
	1772	0.
	1773
	1774	- We set 'pos' to indicate the first entry of the second node.
	1775
	1776	- We then iterate over the columns of the node, finding the first and
	1777	last used entry at l and m. We then copy l..m into pos..(pos+m-l),
	1778	and set the .check pointers accordingly, and advance pos
	1779	appropriately and repreat for the next node. Note that when we copy
	1780	the next pointers we have to convert them from the original
	1781	NODEIDX form to NODENUM form as the former is not valid post
	1782	compression.
	1783
	1784	- If a node has no transitions used we mark its base as 0 and do not
	1785	advance the pos pointer.
	1786
	1787	- If a node only has one transition we use a second pointer into the
	1788	structure to fill in allocated fail transitions from other states.
	1789	This pointer is independent of the main pointer and scans forward
	1790	looking for null transitions that are allocated to a state. When it
	1791	finds one it writes the single transition into the "hole". If the
	1792	pointer doesnt find one the single transition is appended as normal.
	1793
	1794	- Once compressed we can Renew/realloc the structures to release the
	1795	excess space.
	1796
	1797	See "Table-Compression Methods" in sec 3.9 of the Red Dragon,
	1798	specifically Fig 3.47 and the associated pseudocode.
	1799
	1800	demq
	1801	*/
	1802	const U32 laststate = TRIE_NODENUM( next_alloc );
	1803	U32 state, charid;
	1804	U32 pos = 0, zp=0;
	1805	trie->statecount = laststate;
	1806
	1807	for ( state = 1 ; state < laststate ; state++ ) {
	1808	U8 flag = 0;
	1809	const U32 stateidx = TRIE_NODEIDX( state );
	1810	const U32 o_used = trie->trans[ stateidx ].check;
	1811	U32 used = trie->trans[ stateidx ].check;
	1812	trie->trans[ stateidx ].check = 0;
	1813
	1814	for ( charid = 0 ; used && charid < trie->uniquecharcount ; charid++ ) {
	1815	if ( flag \|\| trie->trans[ stateidx + charid ].next ) {
	1816	if ( trie->trans[ stateidx + charid ].next ) {
	1817	if (o_used == 1) {
	1818	for ( ; zp < pos ; zp++ ) {
	1819	if ( ! trie->trans[ zp ].next ) {
	1820	break;
	1821	}
	1822	}
	1823	trie->states[ state ].trans.base = zp + trie->uniquecharcount - charid ;
	1824	trie->trans[ zp ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
	1825	trie->trans[ zp ].check = state;
	1826	if ( ++zp > pos ) pos = zp;
	1827	break;
	1828	}
	1829	used--;
	1830	}
	1831	if ( !flag ) {
	1832	flag = 1;
	1833	trie->states[ state ].trans.base = pos + trie->uniquecharcount - charid ;
	1834	}
	1835	trie->trans[ pos ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
	1836	trie->trans[ pos ].check = state;
	1837	pos++;
	1838	}
	1839	}
	1840	}
	1841	trie->lasttrans = pos + 1;
	1842	trie->states = (reg_trie_state *)
	1843	PerlMemShared_realloc( trie->states, laststate
	1844	* sizeof(reg_trie_state) );
	1845	DEBUG_TRIE_COMPILE_MORE_r(
	1846	PerlIO_printf( Perl_debug_log,
	1847	"%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
	1848	(int)depth * 2 + 2,"",
	1849	(int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1 ),
	1850	(IV)next_alloc,
	1851	(IV)pos,
	1852	( ( next_alloc - pos ) * 100 ) / (double)next_alloc );
	1853	);
	1854
	1855	} /* end table compress */
	1856	}
	1857	DEBUG_TRIE_COMPILE_MORE_r(
	1858	PerlIO_printf(Perl_debug_log, "%*sStatecount:%"UVxf" Lasttrans:%"UVxf"\n",
	1859	(int)depth * 2 + 2, "",
	1860	(UV)trie->statecount,
	1861	(UV)trie->lasttrans)
	1862	);
	1863	/* resize the trans array to remove unused space */
	1864	trie->trans = (reg_trie_trans *)
	1865	PerlMemShared_realloc( trie->trans, trie->lasttrans
	1866	* sizeof(reg_trie_trans) );
	1867
	1868	/* and now dump out the compressed format */
	1869	DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
	1870
	1871	{ /* Modify the program and insert the new TRIE node*/
	1872	U8 nodetype =(U8)(flags & 0xFF);
	1873	char *str=NULL;
	1874
	1875	#ifdef DEBUGGING
	1876	regnode *optimize = NULL;
	1877	#ifdef RE_TRACK_PATTERN_OFFSETS
	1878
	1879	U32 mjd_offset = 0;
	1880	U32 mjd_nodelen = 0;
	1881	#endif /* RE_TRACK_PATTERN_OFFSETS */
	1882	#endif /* DEBUGGING */
	1883	/*
	1884	This means we convert either the first branch or the first Exact,
	1885	depending on whether the thing following (in 'last') is a branch
	1886	or not and whther first is the startbranch (ie is it a sub part of
	1887	the alternation or is it the whole thing.)
	1888	Assuming its a sub part we conver the EXACT otherwise we convert
	1889	the whole branch sequence, including the first.
	1890	*/
	1891	/* Find the node we are going to overwrite */
	1892	if ( first != startbranch \|\| OP( last ) == BRANCH ) {
	1893	/* branch sub-chain */
	1894	NEXT_OFF( first ) = (U16)(last - first);
	1895	#ifdef RE_TRACK_PATTERN_OFFSETS
	1896	DEBUG_r({
	1897	mjd_offset= Node_Offset((convert));
	1898	mjd_nodelen= Node_Length((convert));
	1899	});
	1900	#endif
	1901	/* whole branch chain */
	1902	}
	1903	#ifdef RE_TRACK_PATTERN_OFFSETS
	1904	else {
	1905	DEBUG_r({
	1906	const regnode *nop = NEXTOPER( convert );
	1907	mjd_offset= Node_Offset((nop));
	1908	mjd_nodelen= Node_Length((nop));
	1909	});
	1910	}
	1911	DEBUG_OPTIMISE_r(
	1912	PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
	1913	(int)depth * 2 + 2, "",
	1914	(UV)mjd_offset, (UV)mjd_nodelen)
	1915	);
	1916	#endif
	1917	/* But first we check to see if there is a common prefix we can
	1918	split out as an EXACT and put in front of the TRIE node. */
	1919	trie->startstate= 1;
	1920	if ( trie->bitmap && !widecharmap && !trie->jump ) {
	1921	U32 state;
	1922	for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
	1923	U32 ofs = 0;
	1924	I32 idx = -1;
	1925	U32 count = 0;
	1926	const U32 base = trie->states[ state ].trans.base;
	1927
	1928	if ( trie->states[state].wordnum )
	1929	count = 1;
	1930
	1931	for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
	1932	if ( ( base + ofs >= trie->uniquecharcount ) &&
	1933	( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
	1934	trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
	1935	{
	1936	if ( ++count > 1 ) {
	1937	SV **tmp = av_fetch( revcharmap, ofs, 0);
	1938	const U8 ch = (U8)SvPV_nolen_const( *tmp );
	1939	if ( state == 1 ) break;
	1940	if ( count == 2 ) {
	1941	Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
	1942	DEBUG_OPTIMISE_r(
	1943	PerlIO_printf(Perl_debug_log,
	1944	"%*sNew Start State=%"UVuf" Class: [",
	1945	(int)depth * 2 + 2, "",
	1946	(UV)state));
	1947	if (idx >= 0) {
	1948	SV ** const tmp = av_fetch( revcharmap, idx, 0);
	1949	const U8 * const ch = (U8)SvPV_nolen_const( tmp );
	1950
	1951	TRIE_BITMAP_SET(trie,*ch);
	1952	if ( folder )
	1953	TRIE_BITMAP_SET(trie, folder[ *ch ]);
	1954	DEBUG_OPTIMISE_r(
	1955	PerlIO_printf(Perl_debug_log, (char*)ch)
	1956	);
	1957	}
	1958	}
	1959	TRIE_BITMAP_SET(trie,*ch);
	1960	if ( folder )
	1961	TRIE_BITMAP_SET(trie,folder[ *ch ]);
	1962	DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"%s", ch));
	1963	}
	1964	idx = ofs;
	1965	}
	1966	}
	1967	if ( count == 1 ) {
	1968	SV **tmp = av_fetch( revcharmap, idx, 0);
	1969	char ch = SvPV_nolen( tmp );
	1970	DEBUG_OPTIMISE_r({
	1971	SV *sv=sv_newmortal();
	1972	PerlIO_printf( Perl_debug_log,
	1973	"%*sPrefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n",
	1974	(int)depth * 2 + 2, "",
	1975	(UV)state, (UV)idx,
	1976	pv_pretty(sv, SvPV_nolen_const(tmp), SvCUR(tmp), 6,
	1977	PL_colors[0], PL_colors[1],
	1978	(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) \|
	1979	PERL_PV_ESCAPE_FIRSTCHAR
	1980	)
	1981	);
	1982	});
	1983	if ( state==1 ) {
	1984	OP( convert ) = nodetype;
	1985	str=STRING(convert);
	1986	STR_LEN(convert)=0;
	1987	}
	1988	while (*ch) {
	1989	str++ = ch++;
	1990	STR_LEN(convert)++;
	1991	}
	1992
	1993	} else {
	1994	#ifdef DEBUGGING
	1995	if (state>1)
	1996	DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"]\n"));
	1997	#endif
	1998	break;
	1999	}
	2000	}
	2001	if (str) {
	2002	regnode *n = convert+NODE_SZ_STR(convert);
	2003	NEXT_OFF(convert) = NODE_SZ_STR(convert);
	2004	trie->startstate = state;
	2005	trie->minlen -= (state - 1);
	2006	trie->maxlen -= (state - 1);
	2007	DEBUG_r({
	2008	regnode *fix = convert;
	2009	U32 word = trie->wordcount;
	2010	mjd_nodelen++;
	2011	Set_Node_Offset_Length(convert, mjd_offset, state - 1);
	2012	while( ++fix < n ) {
	2013	Set_Node_Offset_Length(fix, 0, 0);
	2014	}
	2015	while (word--) {
	2016	SV ** const tmp = av_fetch( trie_words, word, 0 );
	2017	if (tmp) {
	2018	if ( STR_LEN(convert) <= SvCUR(*tmp) )
	2019	sv_chop(tmp, SvPV_nolen(tmp) + STR_LEN(convert));
	2020	else
	2021	sv_chop(tmp, SvPV_nolen(tmp) + SvCUR(*tmp));
	2022	}
	2023	}
	2024	});
	2025	if (trie->maxlen) {
	2026	convert = n;
	2027	} else {
	2028	NEXT_OFF(convert) = (U16)(tail - convert);
	2029	DEBUG_r(optimize= n);
	2030	}
	2031	}
	2032	}
	2033	if (!jumper)
	2034	jumper = last;
	2035	if ( trie->maxlen ) {
	2036	NEXT_OFF( convert ) = (U16)(tail - convert);
	2037	ARG_SET( convert, data_slot );
	2038	/* Store the offset to the first unabsorbed branch in
	2039	jump[0], which is otherwise unused by the jump logic.
	2040	We use this when dumping a trie and during optimisation. */
	2041	if (trie->jump)
	2042	trie->jump[0] = (U16)(nextbranch - convert);
	2043
	2044	/* XXXX */
	2045	if ( !trie->states[trie->startstate].wordnum && trie->bitmap &&
	2046	( (char )jumper - (char )convert) >= (int)sizeof(struct regnode_charclass) )
	2047	{
	2048	OP( convert ) = TRIEC;
	2049	Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char);
	2050	PerlMemShared_free(trie->bitmap);
	2051	trie->bitmap= NULL;
	2052	} else
	2053	OP( convert ) = TRIE;
	2054
	2055	/* store the type in the flags */
	2056	convert->flags = nodetype;
	2057	DEBUG_r({
	2058	optimize = convert
	2059	+ NODE_STEP_REGNODE
	2060	+ regarglen[ OP( convert ) ];
	2061	});
	2062	/* XXX We really should free up the resource in trie now,
	2063	as we won't use them - (which resources?) dmq */
	2064	}
	2065	/* needed for dumping*/
	2066	DEBUG_r(if (optimize) {
	2067	regnode *opt = convert;
	2068
	2069	while ( ++opt < optimize) {
	2070	Set_Node_Offset_Length(opt,0,0);
	2071	}
	2072	/*
	2073	Try to clean up some of the debris left after the
	2074	optimisation.
	2075	*/
	2076	while( optimize < jumper ) {
	2077	mjd_nodelen += Node_Length((optimize));
	2078	OP( optimize ) = OPTIMIZED;
	2079	Set_Node_Offset_Length(optimize,0,0);
	2080	optimize++;
	2081	}
	2082	Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
	2083	});
	2084	} /* end node insert */
	2085	RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
	2086	#ifdef DEBUGGING
	2087	RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
	2088	RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
	2089	#else
	2090	SvREFCNT_dec(revcharmap);
	2091	#endif
	2092	return trie->jump
	2093	? MADE_JUMP_TRIE
	2094	: trie->startstate>1
	2095	? MADE_EXACT_TRIE
	2096	: MADE_TRIE;
	2097	}
	2098
	2099	STATIC void
	2100	S_make_trie_failtable(pTHX_ RExC_state_t pRExC_state, regnode source, regnode *stclass, U32 depth)
	2101	{
	2102	/* The Trie is constructed and compressed now so we can build a fail array now if its needed
	2103
	2104	This is basically the Aho-Corasick algorithm. Its from exercise 3.31 and 3.32 in the
	2105	"Red Dragon" -- Compilers, principles, techniques, and tools. Aho, Sethi, Ullman 1985/88
	2106	ISBN 0-201-10088-6
	2107
	2108	We find the fail state for each state in the trie, this state is the longest proper
	2109	suffix of the current states 'word' that is also a proper prefix of another word in our
	2110	trie. State 1 represents the word '' and is the thus the default fail state. This allows
	2111	the DFA not to have to restart after its tried and failed a word at a given point, it
	2112	simply continues as though it had been matching the other word in the first place.
	2113	Consider
	2114	'abcdgu'=~/abcdefg\|cdgu/
	2115	When we get to 'd' we are still matching the first word, we would encounter 'g' which would
	2116	fail, which would bring use to the state representing 'd' in the second word where we would
	2117	try 'g' and succeed, prodceding to match 'cdgu'.
	2118	*/
	2119	/* add a fail transition */
	2120	const U32 trie_offset = ARG(source);
	2121	reg_trie_data trie=(reg_trie_data )RExC_rxi->data->data[trie_offset];
	2122	U32 *q;
	2123	const U32 ucharcount = trie->uniquecharcount;
	2124	const U32 numstates = trie->statecount;
	2125	const U32 ubound = trie->lasttrans + ucharcount;
	2126	U32 q_read = 0;
	2127	U32 q_write = 0;
	2128	U32 charid;
	2129	U32 base = trie->states[ 1 ].trans.base;
	2130	U32 *fail;
	2131	reg_ac_data *aho;
	2132	const U32 data_slot = add_data( pRExC_state, 1, "T" );
	2133	GET_RE_DEBUG_FLAGS_DECL;
	2134	#ifndef DEBUGGING
	2135	PERL_UNUSED_ARG(depth);
	2136	#endif
	2137
	2138
	2139	ARG_SET( stclass, data_slot );
	2140	aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
	2141	RExC_rxi->data->data[ data_slot ] = (void*)aho;
	2142	aho->trie=trie_offset;
	2143	aho->states=(reg_trie_state )PerlMemShared_malloc( numstates sizeof(reg_trie_state) );
	2144	Copy( trie->states, aho->states, numstates, reg_trie_state );
	2145	Newxz( q, numstates, U32);
	2146	aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
	2147	aho->refcount = 1;
	2148	fail = aho->fail;
	2149	/* initialize fail[0..1] to be 1 so that we always have
	2150	a valid final fail state */
	2151	fail[ 0 ] = fail[ 1 ] = 1;
	2152
	2153	for ( charid = 0; charid < ucharcount ; charid++ ) {
	2154	const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 );
	2155	if ( newstate ) {
	2156	q[ q_write ] = newstate;
	2157	/* set to point at the root */
	2158	fail[ q[ q_write++ ] ]=1;
	2159	}
	2160	}
	2161	while ( q_read < q_write) {
	2162	const U32 cur = q[ q_read++ % numstates ];
	2163	base = trie->states[ cur ].trans.base;
	2164
	2165	for ( charid = 0 ; charid < ucharcount ; charid++ ) {
	2166	const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 );
	2167	if (ch_state) {
	2168	U32 fail_state = cur;
	2169	U32 fail_base;
	2170	do {
	2171	fail_state = fail[ fail_state ];
	2172	fail_base = aho->states[ fail_state ].trans.base;
	2173	} while ( !TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 ) );
	2174
	2175	fail_state = TRIE_TRANS_STATE( fail_state, fail_base, ucharcount, charid, 1 );
	2176	fail[ ch_state ] = fail_state;
	2177	if ( !aho->states[ ch_state ].wordnum && aho->states[ fail_state ].wordnum )
	2178	{
	2179	aho->states[ ch_state ].wordnum = aho->states[ fail_state ].wordnum;
	2180	}
	2181	q[ q_write++ % numstates] = ch_state;
	2182	}
	2183	}
	2184	}
	2185	/* restore fail[0..1] to 0 so that we "fall out" of the AC loop
	2186	when we fail in state 1, this allows us to use the
	2187	charclass scan to find a valid start char. This is based on the principle
	2188	that theres a good chance the string being searched contains lots of stuff
	2189	that cant be a start char.
	2190	*/
	2191	fail[ 0 ] = fail[ 1 ] = 0;
	2192	DEBUG_TRIE_COMPILE_r({
	2193	PerlIO_printf(Perl_debug_log,
	2194	"%*sStclass Failtable (%"UVuf" states): 0",
	2195	(int)(depth * 2), "", (UV)numstates
	2196	);
	2197	for( q_read=1; q_read<numstates; q_read++ ) {
	2198	PerlIO_printf(Perl_debug_log, ", %"UVuf, (UV)fail[q_read]);
	2199	}
	2200	PerlIO_printf(Perl_debug_log, "\n");
	2201	});
	2202	Safefree(q);
	2203	/RExC_seen \|= REG_SEEN_TRIEDFA;/
	2204	}
	2205
	2206
	2207	/*
	2208	* There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
	2209	* These need to be revisited when a newer toolchain becomes available.
	2210	*/
	2211	#if defined(__sparc64__) && defined(__GNUC__)
	2212	# if __GNUC__ < 2 \|\| (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
	2213	# undef SPARC64_GCC_WORKAROUND
	2214	# define SPARC64_GCC_WORKAROUND 1
	2215	# endif
	2216	#endif
	2217
	2218	#define DEBUG_PEEP(str,scan,depth) \
	2219	DEBUG_OPTIMISE_r({if (scan){ \
	2220	SV * const mysv=sv_newmortal(); \
	2221	regnode *Next = regnext(scan); \
	2222	regprop(RExC_rx, mysv, scan); \
	2223	PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
	2224	(int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
	2225	Next ? (REG_NODE_NUM(Next)) : 0 ); \
	2226	}});
	2227
	2228
	2229
	2230
	2231
	2232	#define JOIN_EXACT(scan,min,flags) \
	2233	if (PL_regkind[OP(scan)] == EXACT) \
	2234	join_exact(pRExC_state,(scan),(min),(flags),NULL,depth+1)
	2235
	2236	STATIC U32
	2237	S_join_exact(pTHX_ RExC_state_t pRExC_state, regnode scan, I32 min, U32 flags,regnode val, U32 depth) {
	2238	/* Merge several consecutive EXACTish nodes into one. */
	2239	regnode *n = regnext(scan);
	2240	U32 stringok = 1;
	2241	regnode *next = scan + NODE_SZ_STR(scan);
	2242	U32 merged = 0;
	2243	U32 stopnow = 0;
	2244	#ifdef DEBUGGING
	2245	regnode *stop = scan;
	2246	GET_RE_DEBUG_FLAGS_DECL;
	2247	#else
	2248	PERL_UNUSED_ARG(depth);
	2249	#endif
	2250	#ifndef EXPERIMENTAL_INPLACESCAN
	2251	PERL_UNUSED_ARG(flags);
	2252	PERL_UNUSED_ARG(val);
	2253	#endif
	2254	DEBUG_PEEP("join",scan,depth);
	2255
	2256	/* Skip NOTHING, merge EXACT. /
	2257	while (n &&
	2258	( PL_regkind[OP(n)] == NOTHING \|\|
	2259	(stringok && (OP(n) == OP(scan))))
	2260	&& NEXT_OFF(n)
	2261	&& NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
	2262
	2263	if (OP(n) == TAIL \|\| n > next)
	2264	stringok = 0;
	2265	if (PL_regkind[OP(n)] == NOTHING) {
	2266	DEBUG_PEEP("skip:",n,depth);
	2267	NEXT_OFF(scan) += NEXT_OFF(n);
	2268	next = n + NODE_STEP_REGNODE;
	2269	#ifdef DEBUGGING
	2270	if (stringok)
	2271	stop = n;
	2272	#endif
	2273	n = regnext(n);
	2274	}
	2275	else if (stringok) {
	2276	const unsigned int oldl = STR_LEN(scan);
	2277	regnode * const nnext = regnext(n);
	2278
	2279	DEBUG_PEEP("merg",n,depth);
	2280
	2281	merged++;
	2282	if (oldl + STR_LEN(n) > U8_MAX)
	2283	break;
	2284	NEXT_OFF(scan) += NEXT_OFF(n);
	2285	STR_LEN(scan) += STR_LEN(n);
	2286	next = n + NODE_SZ_STR(n);
	2287	/* Now we can overwrite n : /
	2288	Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
	2289	#ifdef DEBUGGING
	2290	stop = next - 1;
	2291	#endif
	2292	n = nnext;
	2293	if (stopnow) break;
	2294	}
	2295
	2296	#ifdef EXPERIMENTAL_INPLACESCAN
	2297	if (flags && !NEXT_OFF(n)) {
	2298	DEBUG_PEEP("atch", val, depth);
	2299	if (reg_off_by_arg[OP(n)]) {
	2300	ARG_SET(n, val - n);
	2301	}
	2302	else {
	2303	NEXT_OFF(n) = val - n;
	2304	}
	2305	stopnow = 1;
	2306	}
	2307	#endif
	2308	}
	2309
	2310	if (UTF && ( OP(scan) == EXACTF ) && ( STR_LEN(scan) >= 6 ) ) {
	2311	/*
	2312	Two problematic code points in Unicode casefolding of EXACT nodes:
	2313
	2314	U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
	2315	U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
	2316
	2317	which casefold to
	2318
	2319	Unicode UTF-8
	2320
	2321	U+03B9 U+0308 U+0301 0xCE 0xB9 0xCC 0x88 0xCC 0x81
	2322	U+03C5 U+0308 U+0301 0xCF 0x85 0xCC 0x88 0xCC 0x81
	2323
	2324	This means that in case-insensitive matching (or "loose matching",
	2325	as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
	2326	length of the above casefolded versions) can match a target string
	2327	of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
	2328	This would rather mess up the minimum length computation.
	2329
	2330	What we'll do is to look for the tail four bytes, and then peek
	2331	at the preceding two bytes to see whether we need to decrease
	2332	the minimum length by four (six minus two).
	2333
	2334	Thanks to the design of UTF-8, there cannot be false matches:
	2335	A sequence of valid UTF-8 bytes cannot be a subsequence of
	2336	another valid sequence of UTF-8 bytes.
	2337
	2338	*/
	2339	char * const s0 = STRING(scan), s, t;
	2340	char * const s1 = s0 + STR_LEN(scan) - 1;
	2341	char * const s2 = s1 - 4;
	2342	#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */
	2343	const char t0[] = "\xaf\x49\xaf\x42";
	2344	#else
	2345	const char t0[] = "\xcc\x88\xcc\x81";
	2346	#endif
	2347	const char * const t1 = t0 + 3;
	2348
	2349	for (s = s0 + 2;
	2350	s < s2 && (t = ninstr(s, s1, t0, t1));
	2351	s = t + 4) {
	2352	#ifdef EBCDIC
	2353	if (((U8)t[-1] == 0x68 && (U8)t[-2] == 0xB4) \|\|
	2354	((U8)t[-1] == 0x46 && (U8)t[-2] == 0xB5))
	2355	#else
	2356	if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) \|\|
	2357	((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
	2358	#endif
	2359	*min -= 4;
	2360	}
	2361	}
	2362
	2363	#ifdef DEBUGGING
	2364	/* Allow dumping */
	2365	n = scan + NODE_SZ_STR(scan);
	2366	while (n <= stop) {
	2367	if (PL_regkind[OP(n)] != NOTHING \|\| OP(n) == NOTHING) {
	2368	OP(n) = OPTIMIZED;
	2369	NEXT_OFF(n) = 0;
	2370	}
	2371	n++;
	2372	}
	2373	#endif
	2374	DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl",scan,depth)});
	2375	return stopnow;
	2376	}
	2377
	2378	/* REx optimizer. Converts nodes into quickier variants "in place".
	2379	Finds fixed substrings. */
	2380
	2381	/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
	2382	to the position after last scanned or to NULL. */
	2383
	2384	#define INIT_AND_WITHP \
	2385	assert(!and_withp); \
	2386	Newx(and_withp,1,struct regnode_charclass_class); \
	2387	SAVEFREEPV(and_withp)
	2388
	2389	/* this is a chain of data about sub patterns we are processing that
	2390	need to be handled seperately/specially in study_chunk. Its so
	2391	we can simulate recursion without losing state. */
	2392	struct scan_frame;
	2393	typedef struct scan_frame {
	2394	regnode last; / last node to process in this frame */
	2395	regnode next; / next node to process when last is reached */
	2396	struct scan_frame prev; /previous frame*/
	2397	I32 stop; /* what stopparen do we use */
	2398	} scan_frame;
	2399
	2400
	2401	#define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
	2402
	2403	STATIC I32
	2404	S_study_chunk(pTHX_ RExC_state_t pRExC_state, regnode *scanp,
	2405	I32 minlenp, I32 deltap,
	2406	regnode *last,
	2407	scan_data_t *data,
	2408	I32 stopparen,
	2409	U8* recursed,
	2410	struct regnode_charclass_class *and_withp,
	2411	U32 flags, U32 depth)
	2412	/* scanp: Start here (read-write). */
	2413	/* deltap: Write maxlen-minlen here. */
	2414	/* last: Stop before this one. */
	2415	/* data: string data about the pattern */
	2416	/* stopparen: treat close N as END */
	2417	/* recursed: which subroutines have we recursed into */
	2418	/* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
	2419	{
	2420	dVAR;
	2421	I32 min = 0, pars = 0, code;
	2422	regnode scan = scanp, *next;
	2423	I32 delta = 0;
	2424	int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
	2425	int is_inf_internal = 0; /* The studied chunk is infinite */
	2426	I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
	2427	scan_data_t data_fake;
	2428	SV *re_trie_maxbuff = NULL;
	2429	regnode *first_non_open = scan;
	2430	I32 stopmin = I32_MAX;
	2431	scan_frame *frame = NULL;
	2432
	2433	GET_RE_DEBUG_FLAGS_DECL;
	2434
	2435	#ifdef DEBUGGING
	2436	StructCopy(&zero_scan_data, &data_fake, scan_data_t);
	2437	#endif
	2438
	2439	if ( depth == 0 ) {
	2440	while (first_non_open && OP(first_non_open) == OPEN)
	2441	first_non_open=regnext(first_non_open);
	2442	}
	2443
	2444
	2445	fake_study_recurse:
	2446	while ( scan && OP(scan) != END && scan < last ){
	2447	/* Peephole optimizer: */
	2448	DEBUG_STUDYDATA("Peep:", data,depth);
	2449	DEBUG_PEEP("Peep",scan,depth);
	2450	JOIN_EXACT(scan,&min,0);
	2451
	2452	/* Follow the next-chain of the current node and optimize
	2453	away all the NOTHINGs from it. */
	2454	if (OP(scan) != CURLYX) {
	2455	const int max = (reg_off_by_arg[OP(scan)]
	2456	? I32_MAX
	2457	/* I32 may be smaller than U16 on CRAYs! */
	2458	: (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
	2459	int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
	2460	int noff;
	2461	regnode *n = scan;
	2462
	2463	/* Skip NOTHING and LONGJMP. */
	2464	while ((n = regnext(n))
	2465	&& ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
	2466	\|\| ((OP(n) == LONGJMP) && (noff = ARG(n))))
	2467	&& off + noff < max)
	2468	off += noff;
	2469	if (reg_off_by_arg[OP(scan)])
	2470	ARG(scan) = off;
	2471	else
	2472	NEXT_OFF(scan) = off;
	2473	}
	2474
	2475
	2476
	2477	/* The principal pseudo-switch. Cannot be a switch, since we
	2478	look into several different things. */
	2479	if (OP(scan) == BRANCH \|\| OP(scan) == BRANCHJ
	2480	\|\| OP(scan) == IFTHEN) {
	2481	next = regnext(scan);
	2482	code = OP(scan);
	2483	/* demq: the op(next)==code check is to see if we have "branch-branch" AFAICT */
	2484
	2485	if (OP(next) == code \|\| code == IFTHEN) {
	2486	/* NOTE - There is similar code to this block below for handling
	2487	TRIE nodes on a re-study. If you change stuff here check there
	2488	too. */
	2489	I32 max1 = 0, min1 = I32_MAX, num = 0;
	2490	struct regnode_charclass_class accum;
	2491	regnode * const startbranch=scan;
	2492
	2493	if (flags & SCF_DO_SUBSTR)
	2494	SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
	2495	if (flags & SCF_DO_STCLASS)
	2496	cl_init_zero(pRExC_state, &accum);
	2497
	2498	while (OP(scan) == code) {
	2499	I32 deltanext, minnext, f = 0, fake;
	2500	struct regnode_charclass_class this_class;
	2501
	2502	num++;
	2503	data_fake.flags = 0;
	2504	if (data) {
	2505	data_fake.whilem_c = data->whilem_c;
	2506	data_fake.last_closep = data->last_closep;
	2507	}
	2508	else
	2509	data_fake.last_closep = &fake;
	2510
	2511	data_fake.pos_delta = delta;
	2512	next = regnext(scan);
	2513	scan = NEXTOPER(scan);
	2514	if (code != BRANCH)
	2515	scan = NEXTOPER(scan);
	2516	if (flags & SCF_DO_STCLASS) {
	2517	cl_init(pRExC_state, &this_class);
	2518	data_fake.start_class = &this_class;
	2519	f = SCF_DO_STCLASS_AND;
	2520	}
	2521	if (flags & SCF_WHILEM_VISITED_POS)
	2522	f \|= SCF_WHILEM_VISITED_POS;
	2523
	2524	/* we suppose the run is continuous, last=next...*/
	2525	minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
	2526	next, &data_fake,
	2527	stopparen, recursed, NULL, f,depth+1);
	2528	if (min1 > minnext)
	2529	min1 = minnext;
	2530	if (max1 < minnext + deltanext)
	2531	max1 = minnext + deltanext;
	2532	if (deltanext == I32_MAX)
	2533	is_inf = is_inf_internal = 1;
	2534	scan = next;
	2535	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	2536	pars++;
	2537	if (data_fake.flags & SCF_SEEN_ACCEPT) {
	2538	if ( stopmin > minnext)
	2539	stopmin = min + min1;
	2540	flags &= ~SCF_DO_SUBSTR;
	2541	if (data)
	2542	data->flags \|= SCF_SEEN_ACCEPT;
	2543	}
	2544	if (data) {
	2545	if (data_fake.flags & SF_HAS_EVAL)
	2546	data->flags \|= SF_HAS_EVAL;
	2547	data->whilem_c = data_fake.whilem_c;
	2548	}
	2549	if (flags & SCF_DO_STCLASS)
	2550	cl_or(pRExC_state, &accum, &this_class);
	2551	}
	2552	if (code == IFTHEN && num < 2) /* Empty ELSE branch */
	2553	min1 = 0;
	2554	if (flags & SCF_DO_SUBSTR) {
	2555	data->pos_min += min1;
	2556	data->pos_delta += max1 - min1;
	2557	if (max1 != min1 \|\| is_inf)
	2558	data->longest = &(data->longest_float);
	2559	}
	2560	min += min1;
	2561	delta += max1 - min1;
	2562	if (flags & SCF_DO_STCLASS_OR) {
	2563	cl_or(pRExC_state, data->start_class, &accum);
	2564	if (min1) {
	2565	cl_and(data->start_class, and_withp);
	2566	flags &= ~SCF_DO_STCLASS;
	2567	}
	2568	}
	2569	else if (flags & SCF_DO_STCLASS_AND) {
	2570	if (min1) {
	2571	cl_and(data->start_class, &accum);
	2572	flags &= ~SCF_DO_STCLASS;
	2573	}
	2574	else {
	2575	/* Switch to OR mode: cache the old value of
	2576	* data->start_class */
	2577	INIT_AND_WITHP;
	2578	StructCopy(data->start_class, and_withp,
	2579	struct regnode_charclass_class);
	2580	flags &= ~SCF_DO_STCLASS_AND;
	2581	StructCopy(&accum, data->start_class,
	2582	struct regnode_charclass_class);
	2583	flags \|= SCF_DO_STCLASS_OR;
	2584	data->start_class->flags \|= ANYOF_EOS;
	2585	}
	2586	}
	2587
	2588	if (PERL_ENABLE_TRIE_OPTIMISATION && OP( startbranch ) == BRANCH ) {
	2589	/* demq.
	2590
	2591	Assuming this was/is a branch we are dealing with: 'scan' now
	2592	points at the item that follows the branch sequence, whatever
	2593	it is. We now start at the beginning of the sequence and look
	2594	for subsequences of
	2595
	2596	BRANCH->EXACT=>x1
	2597	BRANCH->EXACT=>x2
	2598	tail
	2599
	2600	which would be constructed from a pattern like /A\|LIST\|OF\|WORDS/
	2601
	2602	If we can find such a subseqence we need to turn the first
	2603	element into a trie and then add the subsequent branch exact
	2604	strings to the trie.
	2605
	2606	We have two cases
	2607
	2608	1. patterns where the whole set of branch can be converted.
	2609
	2610	2. patterns where only a subset can be converted.
	2611
	2612	In case 1 we can replace the whole set with a single regop
	2613	for the trie. In case 2 we need to keep the start and end
	2614	branchs so
	2615
	2616	'BRANCH EXACT; BRANCH EXACT; BRANCH X'
	2617	becomes BRANCH TRIE; BRANCH X;
	2618
	2619	There is an additional case, that being where there is a
	2620	common prefix, which gets split out into an EXACT like node
	2621	preceding the TRIE node.
	2622
	2623	If x(1..n)==tail then we can do a simple trie, if not we make
	2624	a "jump" trie, such that when we match the appropriate word
	2625	we "jump" to the appopriate tail node. Essentailly we turn
	2626	a nested if into a case structure of sorts.
	2627
	2628	*/
	2629
	2630	int made=0;
	2631	if (!re_trie_maxbuff) {
	2632	re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
	2633	if (!SvIOK(re_trie_maxbuff))
	2634	sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
	2635	}
	2636	if ( SvIV(re_trie_maxbuff)>=0 ) {
	2637	regnode *cur;
	2638	regnode first = (regnode )NULL;
	2639	regnode last = (regnode )NULL;
	2640	regnode *tail = scan;
	2641	U8 optype = 0;
	2642	U32 count=0;
	2643
	2644	#ifdef DEBUGGING
	2645	SV * const mysv = sv_newmortal(); /* for dumping */
	2646	#endif
	2647	/* var tail is used because there may be a TAIL
	2648	regop in the way. Ie, the exacts will point to the
	2649	thing following the TAIL, but the last branch will
	2650	point at the TAIL. So we advance tail. If we
	2651	have nested (?:) we may have to move through several
	2652	tails.
	2653	*/
	2654
	2655	while ( OP( tail ) == TAIL ) {
	2656	/* this is the TAIL generated by (?:) */
	2657	tail = regnext( tail );
	2658	}
	2659
	2660
	2661	DEBUG_OPTIMISE_r({
	2662	regprop(RExC_rx, mysv, tail );
	2663	PerlIO_printf( Perl_debug_log, "%*s%s%s\n",
	2664	(int)depth * 2 + 2, "",
	2665	"Looking for TRIE'able sequences. Tail node is: ",
	2666	SvPV_nolen_const( mysv )
	2667	);
	2668	});
	2669
	2670	/*
	2671
	2672	step through the branches, cur represents each
	2673	branch, noper is the first thing to be matched
	2674	as part of that branch and noper_next is the
	2675	regnext() of that node. if noper is an EXACT
	2676	and noper_next is the same as scan (our current
	2677	position in the regex) then the EXACT branch is
	2678	a possible optimization target. Once we have
	2679	two or more consequetive such branches we can
	2680	create a trie of the EXACT's contents and stich
	2681	it in place. If the sequence represents all of
	2682	the branches we eliminate the whole thing and
	2683	replace it with a single TRIE. If it is a
	2684	subsequence then we need to stitch it in. This
	2685	means the first branch has to remain, and needs
	2686	to be repointed at the item on the branch chain
	2687	following the last branch optimized. This could
	2688	be either a BRANCH, in which case the
	2689	subsequence is internal, or it could be the
	2690	item following the branch sequence in which
	2691	case the subsequence is at the end.
	2692
	2693	*/
	2694
	2695	/* dont use tail as the end marker for this traverse */
	2696	for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
	2697	regnode * const noper = NEXTOPER( cur );
	2698	#if defined(DEBUGGING) \|\| defined(NOJUMPTRIE)
	2699	regnode * const noper_next = regnext( noper );
	2700	#endif
	2701
	2702	DEBUG_OPTIMISE_r({
	2703	regprop(RExC_rx, mysv, cur);
	2704	PerlIO_printf( Perl_debug_log, "%*s- %s (%d)",
	2705	(int)depth * 2 + 2,"", SvPV_nolen_const( mysv ), REG_NODE_NUM(cur) );
	2706
	2707	regprop(RExC_rx, mysv, noper);
	2708	PerlIO_printf( Perl_debug_log, " -> %s",
	2709	SvPV_nolen_const(mysv));
	2710
	2711	if ( noper_next ) {
	2712	regprop(RExC_rx, mysv, noper_next );
	2713	PerlIO_printf( Perl_debug_log,"\t=> %s\t",
	2714	SvPV_nolen_const(mysv));
	2715	}
	2716	PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d)\n",
	2717	REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur) );
	2718	});
	2719	if ( (((first && optype!=NOTHING) ? OP( noper ) == optype
	2720	: PL_regkind[ OP( noper ) ] == EXACT )
	2721	\|\| OP(noper) == NOTHING )
	2722	#ifdef NOJUMPTRIE
	2723	&& noper_next == tail
	2724	#endif
	2725	&& count < U16_MAX)
	2726	{
	2727	count++;
	2728	if ( !first \|\| optype == NOTHING ) {
	2729	if (!first) first = cur;
	2730	optype = OP( noper );
	2731	} else {
	2732	last = cur;
	2733	}
	2734	} else {
	2735	if ( last ) {
	2736	make_trie( pRExC_state,
	2737	startbranch, first, cur, tail, count,
	2738	optype, depth+1 );
	2739	}
	2740	if ( PL_regkind[ OP( noper ) ] == EXACT
	2741	#ifdef NOJUMPTRIE
	2742	&& noper_next == tail
	2743	#endif
	2744	){
	2745	count = 1;
	2746	first = cur;
	2747	optype = OP( noper );
	2748	} else {
	2749	count = 0;
	2750	first = NULL;
	2751	optype = 0;
	2752	}
	2753	last = NULL;
	2754	}
	2755	}
	2756	DEBUG_OPTIMISE_r({
	2757	regprop(RExC_rx, mysv, cur);
	2758	PerlIO_printf( Perl_debug_log,
	2759	"%s- %s (%d) <SCAN FINISHED>\n", (int)depth 2 + 2,
	2760	"", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
	2761
	2762	});
	2763	if ( last ) {
	2764	made= make_trie( pRExC_state, startbranch, first, scan, tail, count, optype, depth+1 );
	2765	#ifdef TRIE_STUDY_OPT
	2766	if ( ((made == MADE_EXACT_TRIE &&
	2767	startbranch == first)
	2768	\|\| ( first_non_open == first )) &&
	2769	depth==0 ) {
	2770	flags \|= SCF_TRIE_RESTUDY;
	2771	if ( startbranch == first
	2772	&& scan == tail )
	2773	{
	2774	RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
	2775	}
	2776	}
	2777	#endif
	2778	}
	2779	}
	2780
	2781	} /* do trie */
	2782
	2783	}
	2784	else if ( code == BRANCHJ ) { /* single branch is optimized. */
	2785	scan = NEXTOPER(NEXTOPER(scan));
	2786	} else /* single branch is optimized. */
	2787	scan = NEXTOPER(scan);
	2788	continue;
	2789	} else if (OP(scan) == SUSPEND \|\| OP(scan) == GOSUB \|\| OP(scan) == GOSTART) {
	2790	scan_frame *newframe = NULL;
	2791	I32 paren;
	2792	regnode *start;
	2793	regnode *end;
	2794
	2795	if (OP(scan) != SUSPEND) {
	2796	/* set the pointer */
	2797	if (OP(scan) == GOSUB) {
	2798	paren = ARG(scan);
	2799	RExC_recurse[ARG2L(scan)] = scan;
	2800	start = RExC_open_parens[paren-1];
	2801	end = RExC_close_parens[paren-1];
	2802	} else {
	2803	paren = 0;
	2804	start = RExC_rxi->program + 1;
	2805	end = RExC_opend;
	2806	}
	2807	if (!recursed) {
	2808	Newxz(recursed, (((RExC_npar)>>3) +1), U8);
	2809	SAVEFREEPV(recursed);
	2810	}
	2811	if (!PAREN_TEST(recursed,paren+1)) {
	2812	PAREN_SET(recursed,paren+1);
	2813	Newx(newframe,1,scan_frame);
	2814	} else {
	2815	if (flags & SCF_DO_SUBSTR) {
	2816	SCAN_COMMIT(pRExC_state,data,minlenp);
	2817	data->longest = &(data->longest_float);
	2818	}
	2819	is_inf = is_inf_internal = 1;
	2820	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	2821	cl_anything(pRExC_state, data->start_class);
	2822	flags &= ~SCF_DO_STCLASS;
	2823	}
	2824	} else {
	2825	Newx(newframe,1,scan_frame);
	2826	paren = stopparen;
	2827	start = scan+2;
	2828	end = regnext(scan);
	2829	}
	2830	if (newframe) {
	2831	assert(start);
	2832	assert(end);
	2833	SAVEFREEPV(newframe);
	2834	newframe->next = regnext(scan);
	2835	newframe->last = last;
	2836	newframe->stop = stopparen;
	2837	newframe->prev = frame;
	2838
	2839	frame = newframe;
	2840	scan = start;
	2841	stopparen = paren;
	2842	last = end;
	2843
	2844	continue;
	2845	}
	2846	}
	2847	else if (OP(scan) == EXACT) {
	2848	I32 l = STR_LEN(scan);
	2849	UV uc;
	2850	if (UTF) {
	2851	const U8 * const s = (U8*)STRING(scan);
	2852	l = utf8_length(s, s + l);
	2853	uc = utf8_to_uvchr(s, NULL);
	2854	} else {
	2855	uc = ((U8)STRING(scan));
	2856	}
	2857	min += l;
	2858	if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
	2859	/* The code below prefers earlier match for fixed
	2860	offset, later match for variable offset. */
	2861	if (data->last_end == -1) { /* Update the start info. */
	2862	data->last_start_min = data->pos_min;
	2863	data->last_start_max = is_inf
	2864	? I32_MAX : data->pos_min + data->pos_delta;
	2865	}
	2866	sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
	2867	if (UTF)
	2868	SvUTF8_on(data->last_found);
	2869	{
	2870	SV * const sv = data->last_found;
	2871	MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
	2872	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	2873	if (mg && mg->mg_len >= 0)
	2874	mg->mg_len += utf8_length((U8*)STRING(scan),
	2875	(U8*)STRING(scan)+STR_LEN(scan));
	2876	}
	2877	data->last_end = data->pos_min + l;
	2878	data->pos_min += l; /* As in the first entry. */
	2879	data->flags &= ~SF_BEFORE_EOL;
	2880	}
	2881	if (flags & SCF_DO_STCLASS_AND) {
	2882	/* Check whether it is compatible with what we know already! */
	2883	int compat = 1;
	2884
	2885	if (uc >= 0x100 \|\|
	2886	(!(data->start_class->flags & (ANYOF_CLASS \| ANYOF_LOCALE))
	2887	&& !ANYOF_BITMAP_TEST(data->start_class, uc)
	2888	&& (!(data->start_class->flags & ANYOF_FOLD)
	2889	\|\| !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
	2890	)
	2891	compat = 0;
	2892	ANYOF_CLASS_ZERO(data->start_class);
	2893	ANYOF_BITMAP_ZERO(data->start_class);
	2894	if (compat)
	2895	ANYOF_BITMAP_SET(data->start_class, uc);
	2896	data->start_class->flags &= ~ANYOF_EOS;
	2897	if (uc < 0x100)
	2898	data->start_class->flags &= ~ANYOF_UNICODE_ALL;
	2899	}
	2900	else if (flags & SCF_DO_STCLASS_OR) {
	2901	/* false positive possible if the class is case-folded */
	2902	if (uc < 0x100)
	2903	ANYOF_BITMAP_SET(data->start_class, uc);
	2904	else
	2905	data->start_class->flags \|= ANYOF_UNICODE_ALL;
	2906	data->start_class->flags &= ~ANYOF_EOS;
	2907	cl_and(data->start_class, and_withp);
	2908	}
	2909	flags &= ~SCF_DO_STCLASS;
	2910	}
	2911	else if (PL_regkind[OP(scan)] == EXACT) { /* But OP != EXACT! */
	2912	I32 l = STR_LEN(scan);
	2913	UV uc = ((U8)STRING(scan));
	2914
	2915	/* Search for fixed substrings supports EXACT only. */
	2916	if (flags & SCF_DO_SUBSTR) {
	2917	assert(data);
	2918	SCAN_COMMIT(pRExC_state, data, minlenp);
	2919	}
	2920	if (UTF) {
	2921	const U8 * const s = (U8 *)STRING(scan);
	2922	l = utf8_length(s, s + l);
	2923	uc = utf8_to_uvchr(s, NULL);
	2924	}
	2925	min += l;
	2926	if (flags & SCF_DO_SUBSTR)
	2927	data->pos_min += l;
	2928	if (flags & SCF_DO_STCLASS_AND) {
	2929	/* Check whether it is compatible with what we know already! */
	2930	int compat = 1;
	2931
	2932	if (uc >= 0x100 \|\|
	2933	(!(data->start_class->flags & (ANYOF_CLASS \| ANYOF_LOCALE))
	2934	&& !ANYOF_BITMAP_TEST(data->start_class, uc)
	2935	&& !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
	2936	compat = 0;
	2937	ANYOF_CLASS_ZERO(data->start_class);
	2938	ANYOF_BITMAP_ZERO(data->start_class);
	2939	if (compat) {
	2940	ANYOF_BITMAP_SET(data->start_class, uc);
	2941	data->start_class->flags &= ~ANYOF_EOS;
	2942	data->start_class->flags \|= ANYOF_FOLD;
	2943	if (OP(scan) == EXACTFL)
	2944	data->start_class->flags \|= ANYOF_LOCALE;
	2945	}
	2946	}
	2947	else if (flags & SCF_DO_STCLASS_OR) {
	2948	if (data->start_class->flags & ANYOF_FOLD) {
	2949	/* false positive possible if the class is case-folded.
	2950	Assume that the locale settings are the same... */
	2951	if (uc < 0x100)
	2952	ANYOF_BITMAP_SET(data->start_class, uc);
	2953	data->start_class->flags &= ~ANYOF_EOS;
	2954	}
	2955	cl_and(data->start_class, and_withp);
	2956	}
	2957	flags &= ~SCF_DO_STCLASS;
	2958	}
	2959	else if (strchr((const char*)PL_varies,OP(scan))) {
	2960	I32 mincount, maxcount, minnext, deltanext, fl = 0;
	2961	I32 f = flags, pos_before = 0;
	2962	regnode * const oscan = scan;
	2963	struct regnode_charclass_class this_class;
	2964	struct regnode_charclass_class *oclass = NULL;
	2965	I32 next_is_eval = 0;
	2966
	2967	switch (PL_regkind[OP(scan)]) {
	2968	case WHILEM: /* End of (?:...)* . */
	2969	scan = NEXTOPER(scan);
	2970	goto finish;
	2971	case PLUS:
	2972	if (flags & (SCF_DO_SUBSTR \| SCF_DO_STCLASS)) {
	2973	next = NEXTOPER(scan);
	2974	if (OP(next) == EXACT \|\| (flags & SCF_DO_STCLASS)) {
	2975	mincount = 1;
	2976	maxcount = REG_INFTY;
	2977	next = regnext(scan);
	2978	scan = NEXTOPER(scan);
	2979	goto do_curly;
	2980	}
	2981	}
	2982	if (flags & SCF_DO_SUBSTR)
	2983	data->pos_min++;
	2984	min++;
	2985	/* Fall through. */
	2986	case STAR:
	2987	if (flags & SCF_DO_STCLASS) {
	2988	mincount = 0;
	2989	maxcount = REG_INFTY;
	2990	next = regnext(scan);
	2991	scan = NEXTOPER(scan);
	2992	goto do_curly;
	2993	}
	2994	is_inf = is_inf_internal = 1;
	2995	scan = regnext(scan);
	2996	if (flags & SCF_DO_SUBSTR) {
	2997	SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */
	2998	data->longest = &(data->longest_float);
	2999	}
	3000	goto optimize_curly_tail;
	3001	case CURLY:
	3002	if (stopparen>0 && (OP(scan)==CURLYN \|\| OP(scan)==CURLYM)
	3003	&& (scan->flags == stopparen))
	3004	{
	3005	mincount = 1;
	3006	maxcount = 1;
	3007	} else {
	3008	mincount = ARG1(scan);
	3009	maxcount = ARG2(scan);
	3010	}
	3011	next = regnext(scan);
	3012	if (OP(scan) == CURLYX) {
	3013	I32 lp = (data ? *(data->last_closep) : 0);
	3014	scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
	3015	}
	3016	scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
	3017	next_is_eval = (OP(scan) == EVAL);
	3018	do_curly:
	3019	if (flags & SCF_DO_SUBSTR) {
	3020	if (mincount == 0) SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */
	3021	pos_before = data->pos_min;
	3022	}
	3023	if (data) {
	3024	fl = data->flags;
	3025	data->flags &= ~(SF_HAS_PAR\|SF_IN_PAR\|SF_HAS_EVAL);
	3026	if (is_inf)
	3027	data->flags \|= SF_IS_INF;
	3028	}
	3029	if (flags & SCF_DO_STCLASS) {
	3030	cl_init(pRExC_state, &this_class);
	3031	oclass = data->start_class;
	3032	data->start_class = &this_class;
	3033	f \|= SCF_DO_STCLASS_AND;
	3034	f &= ~SCF_DO_STCLASS_OR;
	3035	}
	3036	/* These are the cases when once a subexpression
	3037	fails at a particular position, it cannot succeed
	3038	even after backtracking at the enclosing scope.
	3039
	3040	XXXX what if minimal match and we are at the
	3041	initial run of {n,m}? */
	3042	if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
	3043	f &= ~SCF_WHILEM_VISITED_POS;
	3044
	3045	/* This will finish on WHILEM, setting scan, or on NULL: */
	3046	minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
	3047	last, data, stopparen, recursed, NULL,
	3048	(mincount == 0
	3049	? (f & ~SCF_DO_SUBSTR) : f),depth+1);
	3050
	3051	if (flags & SCF_DO_STCLASS)
	3052	data->start_class = oclass;
	3053	if (mincount == 0 \|\| minnext == 0) {
	3054	if (flags & SCF_DO_STCLASS_OR) {
	3055	cl_or(pRExC_state, data->start_class, &this_class);
	3056	}
	3057	else if (flags & SCF_DO_STCLASS_AND) {
	3058	/* Switch to OR mode: cache the old value of
	3059	* data->start_class */
	3060	INIT_AND_WITHP;
	3061	StructCopy(data->start_class, and_withp,
	3062	struct regnode_charclass_class);
	3063	flags &= ~SCF_DO_STCLASS_AND;
	3064	StructCopy(&this_class, data->start_class,
	3065	struct regnode_charclass_class);
	3066	flags \|= SCF_DO_STCLASS_OR;
	3067	data->start_class->flags \|= ANYOF_EOS;
	3068	}
	3069	} else { /* Non-zero len */
	3070	if (flags & SCF_DO_STCLASS_OR) {
	3071	cl_or(pRExC_state, data->start_class, &this_class);
	3072	cl_and(data->start_class, and_withp);
	3073	}
	3074	else if (flags & SCF_DO_STCLASS_AND)
	3075	cl_and(data->start_class, &this_class);
	3076	flags &= ~SCF_DO_STCLASS;
	3077	}
	3078	if (!scan) /* It was not CURLYX, but CURLY. */
	3079	scan = next;
	3080	if ( /* ? quantifier ok, except for (?{ ... }) */
	3081	(next_is_eval \|\| !(mincount == 0 && maxcount == 1))
	3082	&& (minnext == 0) && (deltanext == 0)
	3083	&& data && !(data->flags & (SF_HAS_PAR\|SF_IN_PAR))
	3084	&& maxcount <= REG_INFTY/3 /* Complement check for big count */
	3085	&& ckWARN(WARN_REGEXP))
	3086	{
	3087	vWARN(RExC_parse,
	3088	"Quantifier unexpected on zero-length expression");
	3089	}
	3090
	3091	min += minnext * mincount;
	3092	is_inf_internal \|= ((maxcount == REG_INFTY
	3093	&& (minnext + deltanext) > 0)
	3094	\|\| deltanext == I32_MAX);
	3095	is_inf \|= is_inf_internal;
	3096	delta += (minnext + deltanext) * maxcount - minnext * mincount;
	3097
	3098	/* Try powerful optimization CURLYX => CURLYN. */
	3099	if ( OP(oscan) == CURLYX && data
	3100	&& data->flags & SF_IN_PAR
	3101	&& !(data->flags & SF_HAS_EVAL)
	3102	&& !deltanext && minnext == 1 ) {
	3103	/* Try to optimize to CURLYN. */
	3104	regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
	3105	regnode * const nxt1 = nxt;
	3106	#ifdef DEBUGGING
	3107	regnode *nxt2;
	3108	#endif
	3109
	3110	/* Skip open. */
	3111	nxt = regnext(nxt);
	3112	if (!strchr((const char*)PL_simple,OP(nxt))
	3113	&& !(PL_regkind[OP(nxt)] == EXACT
	3114	&& STR_LEN(nxt) == 1))
	3115	goto nogo;
	3116	#ifdef DEBUGGING
	3117	nxt2 = nxt;
	3118	#endif
	3119	nxt = regnext(nxt);
	3120	if (OP(nxt) != CLOSE)
	3121	goto nogo;
	3122	if (RExC_open_parens) {
	3123	RExC_open_parens[ARG(nxt1)-1]=oscan; /open->CURLYM/
	3124	RExC_close_parens[ARG(nxt1)-1]=nxt+2; /close->while/
	3125	}
	3126	/* Now we know that nxt2 is the only contents: */
	3127	oscan->flags = (U8)ARG(nxt);
	3128	OP(oscan) = CURLYN;
	3129	OP(nxt1) = NOTHING; /* was OPEN. */
	3130
	3131	#ifdef DEBUGGING
	3132	OP(nxt1 + 1) = OPTIMIZED; /* was count. */
	3133	NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
	3134	NEXT_OFF(nxt2) = 0; /* just for consistancy with CURLY. */
	3135	OP(nxt) = OPTIMIZED; /* was CLOSE. */
	3136	OP(nxt + 1) = OPTIMIZED; /* was count. */
	3137	NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
	3138	#endif
	3139	}
	3140	nogo:
	3141
	3142	/* Try optimization CURLYX => CURLYM. */
	3143	if ( OP(oscan) == CURLYX && data
	3144	&& !(data->flags & SF_HAS_PAR)
	3145	&& !(data->flags & SF_HAS_EVAL)
	3146	&& !deltanext /* atom is fixed width */
	3147	&& minnext != 0 /* CURLYM can't handle zero width */
	3148	) {
	3149	/* XXXX How to optimize if data == 0? */
	3150	/* Optimize to a simpler form. */
	3151	regnode nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; / OPEN */
	3152	regnode *nxt2;
	3153
	3154	OP(oscan) = CURLYM;
	3155	while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
	3156	&& (OP(nxt2) != WHILEM))
	3157	nxt = nxt2;
	3158	OP(nxt2) = SUCCEED; /* Whas WHILEM */
	3159	/* Need to optimize away parenths. */
	3160	if (data->flags & SF_IN_PAR) {
	3161	/* Set the parenth number. */
	3162	regnode nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; / OPEN*/
	3163
	3164	if (OP(nxt) != CLOSE)
	3165	FAIL("Panic opt close");
	3166	oscan->flags = (U8)ARG(nxt);
	3167	if (RExC_open_parens) {
	3168	RExC_open_parens[ARG(nxt1)-1]=oscan; /open->CURLYM/
	3169	RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /close->NOTHING/
	3170	}
	3171	OP(nxt1) = OPTIMIZED; /* was OPEN. */
	3172	OP(nxt) = OPTIMIZED; /* was CLOSE. */
	3173
	3174	#ifdef DEBUGGING
	3175	OP(nxt1 + 1) = OPTIMIZED; /* was count. */
	3176	OP(nxt + 1) = OPTIMIZED; /* was count. */
	3177	NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
	3178	NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
	3179	#endif
	3180	#if 0
	3181	while ( nxt1 && (OP(nxt1) != WHILEM)) {
	3182	regnode *nnxt = regnext(nxt1);
	3183
	3184	if (nnxt == nxt) {
	3185	if (reg_off_by_arg[OP(nxt1)])
	3186	ARG_SET(nxt1, nxt2 - nxt1);
	3187	else if (nxt2 - nxt1 < U16_MAX)
	3188	NEXT_OFF(nxt1) = nxt2 - nxt1;
	3189	else
	3190	OP(nxt) = NOTHING; /* Cannot beautify */
	3191	}
	3192	nxt1 = nnxt;
	3193	}
	3194	#endif
	3195	/* Optimize again: */
	3196	study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
	3197	NULL, stopparen, recursed, NULL, 0,depth+1);
	3198	}
	3199	else
	3200	oscan->flags = 0;
	3201	}
	3202	else if ((OP(oscan) == CURLYX)
	3203	&& (flags & SCF_WHILEM_VISITED_POS)
	3204	/* See the comment on a similar expression above.
	3205	However, this time it not a subexpression
	3206	we care about, but the expression itself. */
	3207	&& (maxcount == REG_INFTY)
	3208	&& data && ++data->whilem_c < 16) {
	3209	/* This stays as CURLYX, we can put the count/of pair. */
	3210	/* Find WHILEM (as in regexec.c) */
	3211	regnode *nxt = oscan + NEXT_OFF(oscan);
	3212
	3213	if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
	3214	nxt += ARG(nxt);
	3215	PREVOPER(nxt)->flags = (U8)(data->whilem_c
	3216	\| (RExC_whilem_seen << 4)); /* On WHILEM */
	3217	}
	3218	if (data && fl & (SF_HAS_PAR\|SF_IN_PAR))
	3219	pars++;
	3220	if (flags & SCF_DO_SUBSTR) {
	3221	SV *last_str = NULL;
	3222	int counted = mincount != 0;
	3223
	3224	if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
	3225	#if defined(SPARC64_GCC_WORKAROUND)
	3226	I32 b = 0;
	3227	STRLEN l = 0;
	3228	const char *s = NULL;
	3229	I32 old = 0;
	3230
	3231	if (pos_before >= data->last_start_min)
	3232	b = pos_before;
	3233	else
	3234	b = data->last_start_min;
	3235
	3236	l = 0;
	3237	s = SvPV_const(data->last_found, l);
	3238	old = b - data->last_start_min;
	3239
	3240	#else
	3241	I32 b = pos_before >= data->last_start_min
	3242	? pos_before : data->last_start_min;
	3243	STRLEN l;
	3244	const char * const s = SvPV_const(data->last_found, l);
	3245	I32 old = b - data->last_start_min;
	3246	#endif
	3247
	3248	if (UTF)
	3249	old = utf8_hop((U8)s, old) - (U8)s;
	3250
	3251	l -= old;
	3252	/* Get the added string: */
	3253	last_str = newSVpvn(s + old, l);
	3254	if (UTF)
	3255	SvUTF8_on(last_str);
	3256	if (deltanext == 0 && pos_before == b) {
	3257	/* What was added is a constant string */
	3258	if (mincount > 1) {
	3259	SvGROW(last_str, (mincount * l) + 1);
	3260	repeatcpy(SvPVX(last_str) + l,
	3261	SvPVX_const(last_str), l, mincount - 1);
	3262	SvCUR_set(last_str, SvCUR(last_str) * mincount);
	3263	/* Add additional parts. */
	3264	SvCUR_set(data->last_found,
	3265	SvCUR(data->last_found) - l);
	3266	sv_catsv(data->last_found, last_str);
	3267	{
	3268	SV * sv = data->last_found;
	3269	MAGIC *mg =
	3270	SvUTF8(sv) && SvMAGICAL(sv) ?
	3271	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	3272	if (mg && mg->mg_len >= 0)
	3273	mg->mg_len += CHR_SVLEN(last_str);
	3274	}
	3275	data->last_end += l * (mincount - 1);
	3276	}
	3277	} else {
	3278	/* start offset must point into the last copy */
	3279	data->last_start_min += minnext * (mincount - 1);
	3280	data->last_start_max += is_inf ? I32_MAX
	3281	: (maxcount - 1) * (minnext + data->pos_delta);
	3282	}
	3283	}
	3284	/* It is counted once already... */
	3285	data->pos_min += minnext * (mincount - counted);
	3286	data->pos_delta += - counted * deltanext +
	3287	(minnext + deltanext) * maxcount - minnext * mincount;
	3288	if (mincount != maxcount) {
	3289	/* Cannot extend fixed substrings found inside
	3290	the group. */
	3291	SCAN_COMMIT(pRExC_state,data,minlenp);
	3292	if (mincount && last_str) {
	3293	SV * const sv = data->last_found;
	3294	MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
	3295	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	3296
	3297	if (mg)
	3298	mg->mg_len = -1;
	3299	sv_setsv(sv, last_str);
	3300	data->last_end = data->pos_min;
	3301	data->last_start_min =
	3302	data->pos_min - CHR_SVLEN(last_str);
	3303	data->last_start_max = is_inf
	3304	? I32_MAX
	3305	: data->pos_min + data->pos_delta
	3306	- CHR_SVLEN(last_str);
	3307	}
	3308	data->longest = &(data->longest_float);
	3309	}
	3310	SvREFCNT_dec(last_str);
	3311	}
	3312	if (data && (fl & SF_HAS_EVAL))
	3313	data->flags \|= SF_HAS_EVAL;
	3314	optimize_curly_tail:
	3315	if (OP(oscan) != CURLYX) {
	3316	while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
	3317	&& NEXT_OFF(next))
	3318	NEXT_OFF(oscan) += NEXT_OFF(next);
	3319	}
	3320	continue;
	3321	default: /* REF and CLUMP only? */
	3322	if (flags & SCF_DO_SUBSTR) {
	3323	SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
	3324	data->longest = &(data->longest_float);
	3325	}
	3326	is_inf = is_inf_internal = 1;
	3327	if (flags & SCF_DO_STCLASS_OR)
	3328	cl_anything(pRExC_state, data->start_class);
	3329	flags &= ~SCF_DO_STCLASS;
	3330	break;
	3331	}
	3332	}
	3333	else if (strchr((const char*)PL_simple,OP(scan))) {
	3334	int value = 0;
	3335
	3336	if (flags & SCF_DO_SUBSTR) {
	3337	SCAN_COMMIT(pRExC_state,data,minlenp);
	3338	data->pos_min++;
	3339	}
	3340	min++;
	3341	if (flags & SCF_DO_STCLASS) {
	3342	data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */
	3343
	3344	/* Some of the logic below assumes that switching
	3345	locale on will only add false positives. */
	3346	switch (PL_regkind[OP(scan)]) {
	3347	case SANY:
	3348	default:
	3349	do_default:
	3350	/* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
	3351	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	3352	cl_anything(pRExC_state, data->start_class);
	3353	break;
	3354	case REG_ANY:
	3355	if (OP(scan) == SANY)
	3356	goto do_default;
	3357	if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
	3358	value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
	3359	\|\| (data->start_class->flags & ANYOF_CLASS));
	3360	cl_anything(pRExC_state, data->start_class);
	3361	}
	3362	if (flags & SCF_DO_STCLASS_AND \|\| !value)
	3363	ANYOF_BITMAP_CLEAR(data->start_class,'\n');
	3364	break;
	3365	case ANYOF:
	3366	if (flags & SCF_DO_STCLASS_AND)
	3367	cl_and(data->start_class,
	3368	(struct regnode_charclass_class*)scan);
	3369	else
	3370	cl_or(pRExC_state, data->start_class,
	3371	(struct regnode_charclass_class*)scan);
	3372	break;
	3373	case ALNUM:
	3374	if (flags & SCF_DO_STCLASS_AND) {
	3375	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3376	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
	3377	for (value = 0; value < 256; value++)
	3378	if (!isALNUM(value))
	3379	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3380	}
	3381	}
	3382	else {
	3383	if (data->start_class->flags & ANYOF_LOCALE)
	3384	ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
	3385	else {
	3386	for (value = 0; value < 256; value++)
	3387	if (isALNUM(value))
	3388	ANYOF_BITMAP_SET(data->start_class, value);
	3389	}
	3390	}
	3391	break;
	3392	case ALNUML:
	3393	if (flags & SCF_DO_STCLASS_AND) {
	3394	if (data->start_class->flags & ANYOF_LOCALE)
	3395	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
	3396	}
	3397	else {
	3398	ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
	3399	data->start_class->flags \|= ANYOF_LOCALE;
	3400	}
	3401	break;
	3402	case NALNUM:
	3403	if (flags & SCF_DO_STCLASS_AND) {
	3404	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3405	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
	3406	for (value = 0; value < 256; value++)
	3407	if (isALNUM(value))
	3408	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3409	}
	3410	}
	3411	else {
	3412	if (data->start_class->flags & ANYOF_LOCALE)
	3413	ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
	3414	else {
	3415	for (value = 0; value < 256; value++)
	3416	if (!isALNUM(value))
	3417	ANYOF_BITMAP_SET(data->start_class, value);
	3418	}
	3419	}
	3420	break;
	3421	case NALNUML:
	3422	if (flags & SCF_DO_STCLASS_AND) {
	3423	if (data->start_class->flags & ANYOF_LOCALE)
	3424	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
	3425	}
	3426	else {
	3427	data->start_class->flags \|= ANYOF_LOCALE;
	3428	ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
	3429	}
	3430	break;
	3431	case SPACE:
	3432	if (flags & SCF_DO_STCLASS_AND) {
	3433	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3434	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
	3435	for (value = 0; value < 256; value++)
	3436	if (!isSPACE(value))
	3437	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3438	}
	3439	}
	3440	else {
	3441	if (data->start_class->flags & ANYOF_LOCALE)
	3442	ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
	3443	else {
	3444	for (value = 0; value < 256; value++)
	3445	if (isSPACE(value))
	3446	ANYOF_BITMAP_SET(data->start_class, value);
	3447	}
	3448	}
	3449	break;
	3450	case SPACEL:
	3451	if (flags & SCF_DO_STCLASS_AND) {
	3452	if (data->start_class->flags & ANYOF_LOCALE)
	3453	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
	3454	}
	3455	else {
	3456	data->start_class->flags \|= ANYOF_LOCALE;
	3457	ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
	3458	}
	3459	break;
	3460	case NSPACE:
	3461	if (flags & SCF_DO_STCLASS_AND) {
	3462	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	3463	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
	3464	for (value = 0; value < 256; value++)
	3465	if (isSPACE(value))
	3466	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3467	}
	3468	}
	3469	else {
	3470	if (data->start_class->flags & ANYOF_LOCALE)
	3471	ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
	3472	else {
	3473	for (value = 0; value < 256; value++)
	3474	if (!isSPACE(value))
	3475	ANYOF_BITMAP_SET(data->start_class, value);
	3476	}
	3477	}
	3478	break;
	3479	case NSPACEL:
	3480	if (flags & SCF_DO_STCLASS_AND) {
	3481	if (data->start_class->flags & ANYOF_LOCALE) {
	3482	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
	3483	for (value = 0; value < 256; value++)
	3484	if (!isSPACE(value))
	3485	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3486	}
	3487	}
	3488	else {
	3489	data->start_class->flags \|= ANYOF_LOCALE;
	3490	ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
	3491	}
	3492	break;
	3493	case DIGIT:
	3494	if (flags & SCF_DO_STCLASS_AND) {
	3495	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT);
	3496	for (value = 0; value < 256; value++)
	3497	if (!isDIGIT(value))
	3498	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3499	}
	3500	else {
	3501	if (data->start_class->flags & ANYOF_LOCALE)
	3502	ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
	3503	else {
	3504	for (value = 0; value < 256; value++)
	3505	if (isDIGIT(value))
	3506	ANYOF_BITMAP_SET(data->start_class, value);
	3507	}
	3508	}
	3509	break;
	3510	case NDIGIT:
	3511	if (flags & SCF_DO_STCLASS_AND) {
	3512	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT);
	3513	for (value = 0; value < 256; value++)
	3514	if (isDIGIT(value))
	3515	ANYOF_BITMAP_CLEAR(data->start_class, value);
	3516	}
	3517	else {
	3518	if (data->start_class->flags & ANYOF_LOCALE)
	3519	ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
	3520	else {
	3521	for (value = 0; value < 256; value++)
	3522	if (!isDIGIT(value))
	3523	ANYOF_BITMAP_SET(data->start_class, value);
	3524	}
	3525	}
	3526	break;
	3527	}
	3528	if (flags & SCF_DO_STCLASS_OR)
	3529	cl_and(data->start_class, and_withp);
	3530	flags &= ~SCF_DO_STCLASS;
	3531	}
	3532	}
	3533	else if (PL_regkind[OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
	3534	data->flags \|= (OP(scan) == MEOL
	3535	? SF_BEFORE_MEOL
	3536	: SF_BEFORE_SEOL);
	3537	}
	3538	else if ( PL_regkind[OP(scan)] == BRANCHJ
	3539	/* Lookbehind, or need to calculate parens/evals/stclass: */
	3540	&& (scan->flags \|\| data \|\| (flags & SCF_DO_STCLASS))
	3541	&& (OP(scan) == IFMATCH \|\| OP(scan) == UNLESSM)) {
	3542	if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
	3543	\|\| OP(scan) == UNLESSM )
	3544	{
	3545	/* Negative Lookahead/lookbehind
	3546	In this case we can't do fixed string optimisation.
	3547	*/
	3548
	3549	I32 deltanext, minnext, fake = 0;
	3550	regnode *nscan;
	3551	struct regnode_charclass_class intrnl;
	3552	int f = 0;
	3553
	3554	data_fake.flags = 0;
	3555	if (data) {
	3556	data_fake.whilem_c = data->whilem_c;
	3557	data_fake.last_closep = data->last_closep;
	3558	}
	3559	else
	3560	data_fake.last_closep = &fake;
	3561	data_fake.pos_delta = delta;
	3562	if ( flags & SCF_DO_STCLASS && !scan->flags
	3563	&& OP(scan) == IFMATCH ) { /* Lookahead */
	3564	cl_init(pRExC_state, &intrnl);
	3565	data_fake.start_class = &intrnl;
	3566	f \|= SCF_DO_STCLASS_AND;
	3567	}
	3568	if (flags & SCF_WHILEM_VISITED_POS)
	3569	f \|= SCF_WHILEM_VISITED_POS;
	3570	next = regnext(scan);
	3571	nscan = NEXTOPER(NEXTOPER(scan));
	3572	minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
	3573	last, &data_fake, stopparen, recursed, NULL, f, depth+1);
	3574	if (scan->flags) {
	3575	if (deltanext) {
	3576	FAIL("Variable length lookbehind not implemented");
	3577	}
	3578	else if (minnext > (I32)U8_MAX) {
	3579	FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
	3580	}
	3581	scan->flags = (U8)minnext;
	3582	}
	3583	if (data) {
	3584	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	3585	pars++;
	3586	if (data_fake.flags & SF_HAS_EVAL)
	3587	data->flags \|= SF_HAS_EVAL;
	3588	data->whilem_c = data_fake.whilem_c;
	3589	}
	3590	if (f & SCF_DO_STCLASS_AND) {
	3591	const int was = (data->start_class->flags & ANYOF_EOS);
	3592
	3593	cl_and(data->start_class, &intrnl);
	3594	if (was)
	3595	data->start_class->flags \|= ANYOF_EOS;
	3596	}
	3597	}
	3598	#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
	3599	else {
	3600	/* Positive Lookahead/lookbehind
	3601	In this case we can do fixed string optimisation,
	3602	but we must be careful about it. Note in the case of
	3603	lookbehind the positions will be offset by the minimum
	3604	length of the pattern, something we won't know about
	3605	until after the recurse.
	3606	*/
	3607	I32 deltanext, fake = 0;
	3608	regnode *nscan;
	3609	struct regnode_charclass_class intrnl;
	3610	int f = 0;
	3611	/* We use SAVEFREEPV so that when the full compile
	3612	is finished perl will clean up the allocated
	3613	minlens when its all done. This was we don't
	3614	have to worry about freeing them when we know
	3615	they wont be used, which would be a pain.
	3616	*/
	3617	I32 *minnextp;
	3618	Newx( minnextp, 1, I32 );
	3619	SAVEFREEPV(minnextp);
	3620
	3621	if (data) {
	3622	StructCopy(data, &data_fake, scan_data_t);
	3623	if ((flags & SCF_DO_SUBSTR) && data->last_found) {
	3624	f \|= SCF_DO_SUBSTR;
	3625	if (scan->flags)
	3626	SCAN_COMMIT(pRExC_state, &data_fake,minlenp);
	3627	data_fake.last_found=newSVsv(data->last_found);
	3628	}
	3629	}
	3630	else
	3631	data_fake.last_closep = &fake;
	3632	data_fake.flags = 0;
	3633	data_fake.pos_delta = delta;
	3634	if (is_inf)
	3635	data_fake.flags \|= SF_IS_INF;
	3636	if ( flags & SCF_DO_STCLASS && !scan->flags
	3637	&& OP(scan) == IFMATCH ) { /* Lookahead */
	3638	cl_init(pRExC_state, &intrnl);
	3639	data_fake.start_class = &intrnl;
	3640	f \|= SCF_DO_STCLASS_AND;
	3641	}
	3642	if (flags & SCF_WHILEM_VISITED_POS)
	3643	f \|= SCF_WHILEM_VISITED_POS;
	3644	next = regnext(scan);
	3645	nscan = NEXTOPER(NEXTOPER(scan));
	3646
	3647	*minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext,
	3648	last, &data_fake, stopparen, recursed, NULL, f,depth+1);
	3649	if (scan->flags) {
	3650	if (deltanext) {
	3651	FAIL("Variable length lookbehind not implemented");
	3652	}
	3653	else if (*minnextp > (I32)U8_MAX) {
	3654	FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
	3655	}
	3656	scan->flags = (U8)*minnextp;
	3657	}
	3658
	3659	*minnextp += min;
	3660
	3661	if (f & SCF_DO_STCLASS_AND) {
	3662	const int was = (data->start_class->flags & ANYOF_EOS);
	3663
	3664	cl_and(data->start_class, &intrnl);
	3665	if (was)
	3666	data->start_class->flags \|= ANYOF_EOS;
	3667	}
	3668	if (data) {
	3669	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	3670	pars++;
	3671	if (data_fake.flags & SF_HAS_EVAL)
	3672	data->flags \|= SF_HAS_EVAL;
	3673	data->whilem_c = data_fake.whilem_c;
	3674	if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
	3675	if (RExC_rx->minlen<*minnextp)
	3676	RExC_rx->minlen=*minnextp;
	3677	SCAN_COMMIT(pRExC_state, &data_fake, minnextp);
	3678	SvREFCNT_dec(data_fake.last_found);
	3679
	3680	if ( data_fake.minlen_fixed != minlenp )
	3681	{
	3682	data->offset_fixed= data_fake.offset_fixed;
	3683	data->minlen_fixed= data_fake.minlen_fixed;
	3684	data->lookbehind_fixed+= scan->flags;
	3685	}
	3686	if ( data_fake.minlen_float != minlenp )
	3687	{
	3688	data->minlen_float= data_fake.minlen_float;
	3689	data->offset_float_min=data_fake.offset_float_min;
	3690	data->offset_float_max=data_fake.offset_float_max;
	3691	data->lookbehind_float+= scan->flags;
	3692	}
	3693	}
	3694	}
	3695
	3696
	3697	}
	3698	#endif
	3699	}
	3700	else if (OP(scan) == OPEN) {
	3701	if (stopparen != (I32)ARG(scan))
	3702	pars++;
	3703	}
	3704	else if (OP(scan) == CLOSE) {
	3705	if (stopparen == (I32)ARG(scan)) {
	3706	break;
	3707	}
	3708	if ((I32)ARG(scan) == is_par) {
	3709	next = regnext(scan);
	3710
	3711	if ( next && (OP(next) != WHILEM) && next < last)
	3712	is_par = 0; /* Disable optimization */
	3713	}
	3714	if (data)
	3715	*(data->last_closep) = ARG(scan);
	3716	}
	3717	else if (OP(scan) == EVAL) {
	3718	if (data)
	3719	data->flags \|= SF_HAS_EVAL;
	3720	}
	3721	else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
	3722	if (flags & SCF_DO_SUBSTR) {
	3723	SCAN_COMMIT(pRExC_state,data,minlenp);
	3724	flags &= ~SCF_DO_SUBSTR;
	3725	}
	3726	if (data && OP(scan)==ACCEPT) {
	3727	data->flags \|= SCF_SEEN_ACCEPT;
	3728	if (stopmin > min)
	3729	stopmin = min;
	3730	}
	3731	}
	3732	else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
	3733	{
	3734	if (flags & SCF_DO_SUBSTR) {
	3735	SCAN_COMMIT(pRExC_state,data,minlenp);
	3736	data->longest = &(data->longest_float);
	3737	}
	3738	is_inf = is_inf_internal = 1;
	3739	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	3740	cl_anything(pRExC_state, data->start_class);
	3741	flags &= ~SCF_DO_STCLASS;
	3742	}
	3743	else if (OP(scan) == GPOS) {
	3744	if (!(RExC_rx->extflags & RXf_GPOS_FLOAT) &&
	3745	!(delta \|\| is_inf \|\| (data && data->pos_delta)))
	3746	{
	3747	if (!(RExC_rx->extflags & RXf_ANCH) && (flags & SCF_DO_SUBSTR))
	3748	RExC_rx->extflags \|= RXf_ANCH_GPOS;
	3749	if (RExC_rx->gofs < (U32)min)
	3750	RExC_rx->gofs = min;
	3751	} else {
	3752	RExC_rx->extflags \|= RXf_GPOS_FLOAT;
	3753	RExC_rx->gofs = 0;
	3754	}
	3755	}
	3756	#ifdef TRIE_STUDY_OPT
	3757	#ifdef FULL_TRIE_STUDY
	3758	else if (PL_regkind[OP(scan)] == TRIE) {
	3759	/* NOTE - There is similar code to this block above for handling
	3760	BRANCH nodes on the initial study. If you change stuff here
	3761	check there too. */
	3762	regnode *trie_node= scan;
	3763	regnode *tail= regnext(scan);
	3764	reg_trie_data trie = (reg_trie_data)RExC_rxi->data->data[ ARG(scan) ];
	3765	I32 max1 = 0, min1 = I32_MAX;
	3766	struct regnode_charclass_class accum;
	3767
	3768	if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
	3769	SCAN_COMMIT(pRExC_state, data,minlenp); /* Cannot merge strings after this. */
	3770	if (flags & SCF_DO_STCLASS)
	3771	cl_init_zero(pRExC_state, &accum);
	3772
	3773	if (!trie->jump) {
	3774	min1= trie->minlen;
	3775	max1= trie->maxlen;
	3776	} else {
	3777	const regnode *nextbranch= NULL;
	3778	U32 word;
	3779
	3780	for ( word=1 ; word <= trie->wordcount ; word++)
	3781	{
	3782	I32 deltanext=0, minnext=0, f = 0, fake;
	3783	struct regnode_charclass_class this_class;
	3784
	3785	data_fake.flags = 0;
	3786	if (data) {
	3787	data_fake.whilem_c = data->whilem_c;
	3788	data_fake.last_closep = data->last_closep;
	3789	}
	3790	else
	3791	data_fake.last_closep = &fake;
	3792	data_fake.pos_delta = delta;
	3793	if (flags & SCF_DO_STCLASS) {
	3794	cl_init(pRExC_state, &this_class);
	3795	data_fake.start_class = &this_class;
	3796	f = SCF_DO_STCLASS_AND;
	3797	}
	3798	if (flags & SCF_WHILEM_VISITED_POS)
	3799	f \|= SCF_WHILEM_VISITED_POS;
	3800
	3801	if (trie->jump[word]) {
	3802	if (!nextbranch)
	3803	nextbranch = trie_node + trie->jump[0];
	3804	scan= trie_node + trie->jump[word];
	3805	/* We go from the jump point to the branch that follows
	3806	it. Note this means we need the vestigal unused branches
	3807	even though they arent otherwise used.
	3808	*/
	3809	minnext = study_chunk(pRExC_state, &scan, minlenp,
	3810	&deltanext, (regnode *)nextbranch, &data_fake,
	3811	stopparen, recursed, NULL, f,depth+1);
	3812	}
	3813	if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
	3814	nextbranch= regnext((regnode*)nextbranch);
	3815
	3816	if (min1 > (I32)(minnext + trie->minlen))
	3817	min1 = minnext + trie->minlen;
	3818	if (max1 < (I32)(minnext + deltanext + trie->maxlen))
	3819	max1 = minnext + deltanext + trie->maxlen;
	3820	if (deltanext == I32_MAX)
	3821	is_inf = is_inf_internal = 1;
	3822
	3823	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	3824	pars++;
	3825	if (data_fake.flags & SCF_SEEN_ACCEPT) {
	3826	if ( stopmin > min + min1)
	3827	stopmin = min + min1;
	3828	flags &= ~SCF_DO_SUBSTR;
	3829	if (data)
	3830	data->flags \|= SCF_SEEN_ACCEPT;
	3831	}
	3832	if (data) {
	3833	if (data_fake.flags & SF_HAS_EVAL)
	3834	data->flags \|= SF_HAS_EVAL;
	3835	data->whilem_c = data_fake.whilem_c;
	3836	}
	3837	if (flags & SCF_DO_STCLASS)
	3838	cl_or(pRExC_state, &accum, &this_class);
	3839	}
	3840	}
	3841	if (flags & SCF_DO_SUBSTR) {
	3842	data->pos_min += min1;
	3843	data->pos_delta += max1 - min1;
	3844	if (max1 != min1 \|\| is_inf)
	3845	data->longest = &(data->longest_float);
	3846	}
	3847	min += min1;
	3848	delta += max1 - min1;
	3849	if (flags & SCF_DO_STCLASS_OR) {
	3850	cl_or(pRExC_state, data->start_class, &accum);
	3851	if (min1) {
	3852	cl_and(data->start_class, and_withp);
	3853	flags &= ~SCF_DO_STCLASS;
	3854	}
	3855	}
	3856	else if (flags & SCF_DO_STCLASS_AND) {
	3857	if (min1) {
	3858	cl_and(data->start_class, &accum);
	3859	flags &= ~SCF_DO_STCLASS;
	3860	}
	3861	else {
	3862	/* Switch to OR mode: cache the old value of
	3863	* data->start_class */
	3864	INIT_AND_WITHP;
	3865	StructCopy(data->start_class, and_withp,
	3866	struct regnode_charclass_class);
	3867	flags &= ~SCF_DO_STCLASS_AND;
	3868	StructCopy(&accum, data->start_class,
	3869	struct regnode_charclass_class);
	3870	flags \|= SCF_DO_STCLASS_OR;
	3871	data->start_class->flags \|= ANYOF_EOS;
	3872	}
	3873	}
	3874	scan= tail;
	3875	continue;
	3876	}
	3877	#else
	3878	else if (PL_regkind[OP(scan)] == TRIE) {
	3879	reg_trie_data trie = (reg_trie_data)RExC_rxi->data->data[ ARG(scan) ];
	3880	U8*bang=NULL;
	3881
	3882	min += trie->minlen;
	3883	delta += (trie->maxlen - trie->minlen);
	3884	flags &= ~SCF_DO_STCLASS; /* xxx */
	3885	if (flags & SCF_DO_SUBSTR) {
	3886	SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
	3887	data->pos_min += trie->minlen;
	3888	data->pos_delta += (trie->maxlen - trie->minlen);
	3889	if (trie->maxlen != trie->minlen)
	3890	data->longest = &(data->longest_float);
	3891	}
	3892	if (trie->jump) /* no more substrings -- for now /grr*/
	3893	flags &= ~SCF_DO_SUBSTR;
	3894	}
	3895	#endif /* old or new */
	3896	#endif /* TRIE_STUDY_OPT */
	3897	/* Else: zero-length, ignore. */
	3898	scan = regnext(scan);
	3899	}
	3900	if (frame) {
	3901	last = frame->last;
	3902	scan = frame->next;
	3903	stopparen = frame->stop;
	3904	frame = frame->prev;
	3905	goto fake_study_recurse;
	3906	}
	3907
	3908	finish:
	3909	assert(!frame);
	3910	DEBUG_STUDYDATA("pre-fin:",data,depth);
	3911
	3912	*scanp = scan;
	3913	*deltap = is_inf_internal ? I32_MAX : delta;
	3914	if (flags & SCF_DO_SUBSTR && is_inf)
	3915	data->pos_delta = I32_MAX - data->pos_min;
	3916	if (is_par > (I32)U8_MAX)
	3917	is_par = 0;
	3918	if (is_par && pars==1 && data) {
	3919	data->flags \|= SF_IN_PAR;
	3920	data->flags &= ~SF_HAS_PAR;
	3921	}
	3922	else if (pars && data) {
	3923	data->flags \|= SF_HAS_PAR;
	3924	data->flags &= ~SF_IN_PAR;
	3925	}
	3926	if (flags & SCF_DO_STCLASS_OR)
	3927	cl_and(data->start_class, and_withp);
	3928	if (flags & SCF_TRIE_RESTUDY)
	3929	data->flags \|= SCF_TRIE_RESTUDY;
	3930
	3931	DEBUG_STUDYDATA("post-fin:",data,depth);
	3932
	3933	return min < stopmin ? min : stopmin;
	3934	}
	3935
	3936	STATIC U32
	3937	S_add_data(RExC_state_t pRExC_state, U32 n, const char s)
	3938	{
	3939	U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0;
	3940
	3941	Renewc(RExC_rxi->data,
	3942	sizeof(RExC_rxi->data) + sizeof(void) * (count + n - 1),
	3943	char, struct reg_data);
	3944	if(count)
	3945	Renew(RExC_rxi->data->what, count + n, U8);
	3946	else
	3947	Newx(RExC_rxi->data->what, n, U8);
	3948	RExC_rxi->data->count = count + n;
	3949	Copy(s, RExC_rxi->data->what + count, n, U8);
	3950	return count;
	3951	}
	3952
	3953	/XXX: todo make this not included in a non debugging perl /
	3954	#ifndef PERL_IN_XSUB_RE
	3955	void
	3956	Perl_reginitcolors(pTHX)
	3957	{
	3958	dVAR;
	3959	const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
	3960	if (s) {
	3961	char *t = savepv(s);
	3962	int i = 0;
	3963	PL_colors[0] = t;
	3964	while (++i < 6) {
	3965	t = strchr(t, '\t');
	3966	if (t) {
	3967	*t = '\0';
	3968	PL_colors[i] = ++t;
	3969	}
	3970	else
	3971	PL_colors[i] = t = (char *)"";
	3972	}
	3973	} else {
	3974	int i = 0;
	3975	while (i < 6)
	3976	PL_colors[i++] = (char *)"";
	3977	}
	3978	PL_colorset = 1;
	3979	}
	3980	#endif
	3981
	3982
	3983	#ifdef TRIE_STUDY_OPT
	3984	#define CHECK_RESTUDY_GOTO \
	3985	if ( \
	3986	(data.flags & SCF_TRIE_RESTUDY) \
	3987	&& ! restudied++ \
	3988	) goto reStudy
	3989	#else
	3990	#define CHECK_RESTUDY_GOTO
	3991	#endif
	3992
	3993	/*
	3994	- pregcomp - compile a regular expression into internal code
	3995	*
	3996	* We can't allocate space until we know how big the compiled form will be,
	3997	* but we can't compile it (and thus know how big it is) until we've got a
	3998	* place to put the code. So we cheat: we compile it twice, once with code
	3999	* generation turned off and size counting turned on, and once "for real".
	4000	* This also means that we don't allocate space until we are sure that the
	4001	* thing really will compile successfully, and we never have to move the
	4002	* code and thus invalidate pointers into it. (Note that it has to be in
	4003	* one piece because free() must be able to free it all.) [NB: not true in perl]
	4004	*
	4005	* Beware that the optimization-preparation code in here knows about some
	4006	* of the structure of the compiled regexp. [I'll say.]
	4007	*/
	4008
	4009
	4010
	4011	#ifndef PERL_IN_XSUB_RE
	4012	#define RE_ENGINE_PTR &PL_core_reg_engine
	4013	#else
	4014	extern const struct regexp_engine my_reg_engine;
	4015	#define RE_ENGINE_PTR &my_reg_engine
	4016	#endif
	4017
	4018	#ifndef PERL_IN_XSUB_RE
	4019	regexp *
	4020	Perl_pregcomp(pTHX_ char exp, char xend, PMOP *pm)
	4021	{
	4022	dVAR;
	4023	HV * const table = GvHV(PL_hintgv);
	4024	/* Dispatch a request to compile a regexp to correct
	4025	regexp engine. */
	4026	if (table) {
	4027	SV **ptr= hv_fetchs(table, "regcomp", FALSE);
	4028	GET_RE_DEBUG_FLAGS_DECL;
	4029	if (ptr && SvIOK(ptr) && SvIV(ptr)) {
	4030	const regexp_engine eng=INT2PTR(regexp_engine,SvIV(*ptr));
	4031	DEBUG_COMPILE_r({
	4032	PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
	4033	SvIV(*ptr));
	4034	});
	4035	return CALLREGCOMP_ENG(eng, exp, xend, pm);
	4036	}
	4037	}
	4038	return Perl_re_compile(aTHX_ exp, xend, pm);
	4039	}
	4040	#endif
	4041
	4042	regexp *
	4043	Perl_re_compile(pTHX_ char exp, char xend, PMOP *pm)
	4044	{
	4045	dVAR;
	4046	register regexp *r;
	4047	register regexp_internal *ri;
	4048	regnode *scan;
	4049	regnode *first;
	4050	I32 flags;
	4051	I32 minlen = 0;
	4052	I32 sawplus = 0;
	4053	I32 sawopen = 0;
	4054	scan_data_t data;
	4055	RExC_state_t RExC_state;
	4056	RExC_state_t * const pRExC_state = &RExC_state;
	4057	#ifdef TRIE_STUDY_OPT
	4058	int restudied= 0;
	4059	RExC_state_t copyRExC_state;
	4060	#endif
	4061	GET_RE_DEBUG_FLAGS_DECL;
	4062	DEBUG_r(if (!PL_colorset) reginitcolors());
	4063
	4064	if (exp == NULL)
	4065	FAIL("NULL regexp argument");
	4066
	4067	RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
	4068
	4069	DEBUG_COMPILE_r({
	4070	SV *dsv= sv_newmortal();
	4071	RE_PV_QUOTED_DECL(s, RExC_utf8,
	4072	dsv, exp, (xend - exp), 60);
	4073	PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
	4074	PL_colors[4],PL_colors[5],s);
	4075	});
	4076
	4077	redo_first_pass:
	4078	RExC_precomp = exp;
	4079	RExC_flags = pm->op_pmflags;
	4080	RExC_sawback = 0;
	4081
	4082	RExC_seen = 0;
	4083	RExC_seen_zerolen = *exp == '^' ? -1 : 0;
	4084	RExC_seen_evals = 0;
	4085	RExC_extralen = 0;
	4086
	4087	/* First pass: determine size, legality. */
	4088	RExC_parse = exp;
	4089	RExC_start = exp;
	4090	RExC_end = xend;
	4091	RExC_naughty = 0;
	4092	RExC_npar = 1;
	4093	RExC_nestroot = 0;
	4094	RExC_size = 0L;
	4095	RExC_emit = &PL_regdummy;
	4096	RExC_whilem_seen = 0;
	4097	RExC_charnames = NULL;
	4098	RExC_open_parens = NULL;
	4099	RExC_close_parens = NULL;
	4100	RExC_opend = NULL;
	4101	RExC_paren_names = NULL;
	4102	#ifdef DEBUGGING
	4103	RExC_paren_name_list = NULL;
	4104	#endif
	4105	RExC_recurse = NULL;
	4106	RExC_recurse_count = 0;
	4107
	4108	#if 0 /* REGC() is (currently) a NOP at the first pass.
	4109	* Clever compilers notice this and complain. --jhi */
	4110	REGC((U8)REG_MAGIC, (char*)RExC_emit);
	4111	#endif
	4112	DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n"));
	4113	if (reg(pRExC_state, 0, &flags,1) == NULL) {
	4114	RExC_precomp = NULL;
	4115	return(NULL);
	4116	}
	4117	if (RExC_utf8 && !RExC_orig_utf8) {
	4118	/* It's possible to write a regexp in ascii that represents unicode
	4119	codepoints outside of the byte range, such as via \x{100}. If we
	4120	detect such a sequence we have to convert the entire pattern to utf8
	4121	and then recompile, as our sizing calculation will have been based
	4122	on 1 byte == 1 character, but we will need to use utf8 to encode
	4123	at least some part of the pattern, and therefore must convert the whole
	4124	thing.
	4125	XXX: somehow figure out how to make this less expensive...
	4126	-- dmq */
	4127	STRLEN len = xend-exp;
	4128	DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
	4129	"UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
	4130	exp = (char)Perl_bytes_to_utf8(aTHX_ (U8)exp, &len);
	4131	xend = exp + len;
	4132	RExC_orig_utf8 = RExC_utf8;
	4133	SAVEFREEPV(exp);
	4134	goto redo_first_pass;
	4135	}
	4136	DEBUG_PARSE_r({
	4137	PerlIO_printf(Perl_debug_log,
	4138	"Required size %"IVdf" nodes\n"
	4139	"Starting second pass (creation)\n",
	4140	(IV)RExC_size);
	4141	RExC_lastnum=0;
	4142	RExC_lastparse=NULL;
	4143	});
	4144	/* Small enough for pointer-storage convention?
	4145	If extralen==0, this means that we will not need long jumps. */
	4146	if (RExC_size >= 0x10000L && RExC_extralen)
	4147	RExC_size += RExC_extralen;
	4148	else
	4149	RExC_extralen = 0;
	4150	if (RExC_whilem_seen > 15)
	4151	RExC_whilem_seen = 15;
	4152
	4153	/* Allocate space and zero-initialize. Note, the two step process
	4154	of zeroing when in debug mode, thus anything assigned has to
	4155	happen after that */
	4156	Newxz(r, 1, regexp);
	4157	Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
	4158	char, regexp_internal);
	4159	if ( r == NULL \|\| ri == NULL )
	4160	FAIL("Regexp out of space");
	4161	#ifdef DEBUGGING
	4162	/* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
	4163	Zero(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), char);
	4164	#else
	4165	/* bulk initialize base fields with 0. */
	4166	Zero(ri, sizeof(regexp_internal), char);
	4167	#endif
	4168
	4169	/* non-zero initialization begins here */
	4170	RXi_SET( r, ri );
	4171	r->engine= RE_ENGINE_PTR;
	4172	r->refcnt = 1;
	4173	r->prelen = xend - exp;
	4174	r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
	4175	{
	4176	bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
	4177	bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
	4178	bool has_runon = ((RExC_seen & REG_SEEN_RUN_ON_COMMENT)==REG_SEEN_RUN_ON_COMMENT);
	4179	U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) >> 12);
	4180	const char fptr = STD_PAT_MODS; /"msix"*/
	4181	char *p;
	4182	r->wraplen = r->prelen + has_minus + has_k + has_runon
	4183	+ (sizeof(STD_PAT_MODS) - 1)
	4184	+ (sizeof("(?:)") - 1);
	4185
	4186	Newx(r->wrapped, r->wraplen + 1, char );
	4187	p = r->wrapped;
	4188	p++='('; p++='?';
	4189	if (has_k)
	4190	p++ = KEEPCOPY_PAT_MOD; /'k'*/
	4191	{
	4192	char *r = p + (sizeof(STD_PAT_MODS) - 1) + has_minus - 1;
	4193	char *colon = r + 1;
	4194	char ch;
	4195
	4196	while((ch = *fptr++)) {
	4197	if(reganch & 1)
	4198	*p++ = ch;
	4199	else
	4200	*r-- = ch;
	4201	reganch >>= 1;
	4202	}
	4203	if(has_minus) {
	4204	*r = '-';
	4205	p = colon;
	4206	}
	4207	}
	4208
	4209	*p++ = ':';
	4210	Copy(RExC_precomp, p, r->prelen, char);
	4211	r->precomp = p;
	4212	p += r->prelen;
	4213	if (has_runon)
	4214	*p++ = '\n';
	4215	*p++ = ')';
	4216	*p = 0;
	4217	}
	4218
	4219	r->intflags = 0;
	4220	r->nparens = RExC_npar - 1; /* set early to validate backrefs */
	4221
	4222	if (RExC_seen & REG_SEEN_RECURSE) {
	4223	Newxz(RExC_open_parens, RExC_npar,regnode *);
	4224	SAVEFREEPV(RExC_open_parens);
	4225	Newxz(RExC_close_parens,RExC_npar,regnode *);
	4226	SAVEFREEPV(RExC_close_parens);
	4227	}
	4228
	4229	/* Useful during FAIL. */
	4230	#ifdef RE_TRACK_PATTERN_OFFSETS
	4231	Newxz(ri->u.offsets, 2RExC_size+1, U32); / MJD 20001228 */
	4232	DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log,
	4233	"%s %"UVuf" bytes for offset annotations.\n",
	4234	ri->u.offsets ? "Got" : "Couldn't get",
	4235	(UV)((2RExC_size+1) sizeof(U32))));
	4236	#endif
	4237	SetProgLen(ri,RExC_size);
	4238	RExC_rx = r;
	4239	RExC_rxi = ri;
	4240
	4241	/* Second pass: emit code. */
	4242	RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */
	4243	RExC_parse = exp;
	4244	RExC_end = xend;
	4245	RExC_naughty = 0;
	4246	RExC_npar = 1;
	4247	RExC_emit_start = ri->program;
	4248	RExC_emit = ri->program;
	4249	RExC_emit_bound = ri->program + RExC_size + 1;
	4250
	4251	/* Store the count of eval-groups for security checks: */
	4252	RExC_rx->seen_evals = RExC_seen_evals;
	4253	REGC((U8)REG_MAGIC, (char*) RExC_emit++);
	4254	if (reg(pRExC_state, 0, &flags,1) == NULL)
	4255	return(NULL);
	4256
	4257	/* XXXX To minimize changes to RE engine we always allocate
	4258	3-units-long substrs field. */
	4259	Newx(r->substrs, 1, struct reg_substr_data);
	4260	if (RExC_recurse_count) {
	4261	Newxz(RExC_recurse,RExC_recurse_count,regnode *);
	4262	SAVEFREEPV(RExC_recurse);
	4263	}
	4264
	4265	reStudy:
	4266	r->minlen = minlen = sawplus = sawopen = 0;
	4267	Zero(r->substrs, 1, struct reg_substr_data);
	4268
	4269	#ifdef TRIE_STUDY_OPT
	4270	if ( restudied ) {
	4271	U32 seen=RExC_seen;
	4272	DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log,"Restudying\n"));
	4273
	4274	RExC_state = copyRExC_state;
	4275	if (seen & REG_TOP_LEVEL_BRANCHES)
	4276	RExC_seen \|= REG_TOP_LEVEL_BRANCHES;
	4277	else
	4278	RExC_seen &= ~REG_TOP_LEVEL_BRANCHES;
	4279	if (data.last_found) {
	4280	SvREFCNT_dec(data.longest_fixed);
	4281	SvREFCNT_dec(data.longest_float);
	4282	SvREFCNT_dec(data.last_found);
	4283	}
	4284	StructCopy(&zero_scan_data, &data, scan_data_t);
	4285	} else {
	4286	StructCopy(&zero_scan_data, &data, scan_data_t);
	4287	copyRExC_state = RExC_state;
	4288	}
	4289	#else
	4290	StructCopy(&zero_scan_data, &data, scan_data_t);
	4291	#endif
	4292
	4293	/* Dig out information for optimizations. */
	4294	r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */
	4295	pm->op_pmflags = RExC_flags;
	4296	if (UTF)
	4297	r->extflags \|= RXf_UTF8; /* Unicode in it? */
	4298	ri->regstclass = NULL;
	4299	if (RExC_naughty >= 10) /* Probably an expensive pattern. */
	4300	r->intflags \|= PREGf_NAUGHTY;
	4301	scan = ri->program + 1; /* First BRANCH. */
	4302
	4303	/* testing for BRANCH here tells us whether there is "must appear"
	4304	data in the pattern. If there is then we can use it for optimisations */
	4305	if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES)) { /* Only one top-level choice. */
	4306	I32 fake;
	4307	STRLEN longest_float_length, longest_fixed_length;
	4308	struct regnode_charclass_class ch_class; /* pointed to by data */
	4309	int stclass_flag;
	4310	I32 last_close = 0; /* pointed to by data */
	4311
	4312	first = scan;
	4313	/* Skip introductions and multiplicators >= 1. */
	4314	while ((OP(first) == OPEN && (sawopen = 1)) \|\|
	4315	/* An OR of one alternative - should not happen now. */
	4316	(OP(first) == BRANCH && OP(regnext(first)) != BRANCH) \|\|
	4317	/* for now we can't handle lookbehind IFMATCH*/
	4318	(OP(first) == IFMATCH && !first->flags) \|\|
	4319	(OP(first) == PLUS) \|\|
	4320	(OP(first) == MINMOD) \|\|
	4321	/* An {n,m} with n>0 */
	4322	(PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) )
	4323	{
	4324
	4325	if (OP(first) == PLUS)
	4326	sawplus = 1;
	4327	else
	4328	first += regarglen[OP(first)];
	4329	if (OP(first) == IFMATCH) {
	4330	first = NEXTOPER(first);
	4331	first += EXTRA_STEP_2ARGS;
	4332	} else /* XXX possible optimisation for /(?=)/ */
	4333	first = NEXTOPER(first);
	4334	}
	4335
	4336	/* Starting-point info. */
	4337	again:
	4338	DEBUG_PEEP("first:",first,0);
	4339	/* Ignore EXACT as we deal with it later. */
	4340	if (PL_regkind[OP(first)] == EXACT) {
	4341	if (OP(first) == EXACT)
	4342	NOOP; /* Empty, get anchored substr later. */
	4343	else if ((OP(first) == EXACTF \|\| OP(first) == EXACTFL))
	4344	ri->regstclass = first;
	4345	}
	4346	#ifdef TRIE_STCLASS
	4347	else if (PL_regkind[OP(first)] == TRIE &&
	4348	((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0)
	4349	{
	4350	regnode *trie_op;
	4351	/* this can happen only on restudy */
	4352	if ( OP(first) == TRIE ) {
	4353	struct regnode_1 trieop = (struct regnode_1 )
	4354	PerlMemShared_calloc(1, sizeof(struct regnode_1));
	4355	StructCopy(first,trieop,struct regnode_1);
	4356	trie_op=(regnode *)trieop;
	4357	} else {
	4358	struct regnode_charclass trieop = (struct regnode_charclass )
	4359	PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
	4360	StructCopy(first,trieop,struct regnode_charclass);
	4361	trie_op=(regnode *)trieop;
	4362	}
	4363	OP(trie_op)+=2;
	4364	make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0);
	4365	ri->regstclass = trie_op;
	4366	}
	4367	#endif
	4368	else if (strchr((const char*)PL_simple,OP(first)))
	4369	ri->regstclass = first;
	4370	else if (PL_regkind[OP(first)] == BOUND \|\|
	4371	PL_regkind[OP(first)] == NBOUND)
	4372	ri->regstclass = first;
	4373	else if (PL_regkind[OP(first)] == BOL) {
	4374	r->extflags \|= (OP(first) == MBOL
	4375	? RXf_ANCH_MBOL
	4376	: (OP(first) == SBOL
	4377	? RXf_ANCH_SBOL
	4378	: RXf_ANCH_BOL));
	4379	first = NEXTOPER(first);
	4380	goto again;
	4381	}
	4382	else if (OP(first) == GPOS) {
	4383	r->extflags \|= RXf_ANCH_GPOS;
	4384	first = NEXTOPER(first);
	4385	goto again;
	4386	}
	4387	else if ((!sawopen \|\| !RExC_sawback) &&
	4388	(OP(first) == STAR &&
	4389	PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
	4390	!(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
	4391	{
	4392	/* turn .* into ^.* with an implied $=1 /
	4393	const int type =
	4394	(OP(NEXTOPER(first)) == REG_ANY)
	4395	? RXf_ANCH_MBOL
	4396	: RXf_ANCH_SBOL;
	4397	r->extflags \|= type;
	4398	r->intflags \|= PREGf_IMPLICIT;
	4399	first = NEXTOPER(first);
	4400	goto again;
	4401	}
	4402	if (sawplus && (!sawopen \|\| !RExC_sawback)
	4403	&& !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
	4404	/* x+ must match at the 1st pos of run of x's */
	4405	r->intflags \|= PREGf_SKIP;
	4406
	4407	/* Scan is after the zeroth branch, first is atomic matcher. */
	4408	#ifdef TRIE_STUDY_OPT
	4409	DEBUG_PARSE_r(
	4410	if (!restudied)
	4411	PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
	4412	(IV)(first - scan + 1))
	4413	);
	4414	#else
	4415	DEBUG_PARSE_r(
	4416	PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
	4417	(IV)(first - scan + 1))
	4418	);
	4419	#endif
	4420
	4421
	4422	/*
	4423	* If there's something expensive in the r.e., find the
	4424	* longest literal string that must appear and make it the
	4425	* regmust. Resolve ties in favor of later strings, since
	4426	* the regstart check works with the beginning of the r.e.
	4427	* and avoiding duplication strengthens checking. Not a
	4428	* strong reason, but sufficient in the absence of others.
	4429	* [Now we resolve ties in favor of the earlier string if
	4430	* it happens that c_offset_min has been invalidated, since the
	4431	* earlier string may buy us something the later one won't.]
	4432	*/
	4433
	4434	data.longest_fixed = newSVpvs("");
	4435	data.longest_float = newSVpvs("");
	4436	data.last_found = newSVpvs("");
	4437	data.longest = &(data.longest_fixed);
	4438	first = scan;
	4439	if (!ri->regstclass) {
	4440	cl_init(pRExC_state, &ch_class);
	4441	data.start_class = &ch_class;
	4442	stclass_flag = SCF_DO_STCLASS_AND;
	4443	} else /* XXXX Check for BOUND? */
	4444	stclass_flag = 0;
	4445	data.last_closep = &last_close;
	4446
	4447	minlen = study_chunk(pRExC_state, &first, &minlen, &fake, scan + RExC_size, /* Up to end */
	4448	&data, -1, NULL, NULL,
	4449	SCF_DO_SUBSTR \| SCF_WHILEM_VISITED_POS \| stclass_flag,0);
	4450
	4451
	4452	CHECK_RESTUDY_GOTO;
	4453
	4454
	4455	if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
	4456	&& data.last_start_min == 0 && data.last_end > 0
	4457	&& !RExC_seen_zerolen
	4458	&& !(RExC_seen & REG_SEEN_VERBARG)
	4459	&& (!(RExC_seen & REG_SEEN_GPOS) \|\| (r->extflags & RXf_ANCH_GPOS)))
	4460	r->extflags \|= RXf_CHECK_ALL;
	4461	scan_commit(pRExC_state, &data,&minlen,0);
	4462	SvREFCNT_dec(data.last_found);
	4463
	4464	/* Note that code very similar to this but for anchored string
	4465	follows immediately below, changes may need to be made to both.
	4466	Be careful.
	4467	*/
	4468	longest_float_length = CHR_SVLEN(data.longest_float);
	4469	if (longest_float_length
	4470	\|\| (data.flags & SF_FL_BEFORE_EOL
	4471	&& (!(data.flags & SF_FL_BEFORE_MEOL)
	4472	\|\| (RExC_flags & RXf_PMf_MULTILINE))))
	4473	{
	4474	I32 t,ml;
	4475
	4476	if (SvCUR(data.longest_fixed) /* ok to leave SvCUR */
	4477	&& data.offset_fixed == data.offset_float_min
	4478	&& SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
	4479	goto remove_float; /* As in (a)+. */
	4480
	4481	/* copy the information about the longest float from the reg_scan_data
	4482	over to the program. */
	4483	if (SvUTF8(data.longest_float)) {
	4484	r->float_utf8 = data.longest_float;
	4485	r->float_substr = NULL;
	4486	} else {
	4487	r->float_substr = data.longest_float;
	4488	r->float_utf8 = NULL;
	4489	}
	4490	/* float_end_shift is how many chars that must be matched that
	4491	follow this item. We calculate it ahead of time as once the
	4492	lookbehind offset is added in we lose the ability to correctly
	4493	calculate it.*/
	4494	ml = data.minlen_float ? *(data.minlen_float)
	4495	: (I32)longest_float_length;
	4496	r->float_end_shift = ml - data.offset_float_min
	4497	- longest_float_length + (SvTAIL(data.longest_float) != 0)
	4498	+ data.lookbehind_float;
	4499	r->float_min_offset = data.offset_float_min - data.lookbehind_float;
	4500	r->float_max_offset = data.offset_float_max;
	4501	if (data.offset_float_max < I32_MAX) /* Don't offset infinity */
	4502	r->float_max_offset -= data.lookbehind_float;
	4503
	4504	t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
	4505	&& (!(data.flags & SF_FL_BEFORE_MEOL)
	4506	\|\| (RExC_flags & RXf_PMf_MULTILINE)));
	4507	fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
	4508	}
	4509	else {
	4510	remove_float:
	4511	r->float_substr = r->float_utf8 = NULL;
	4512	SvREFCNT_dec(data.longest_float);
	4513	longest_float_length = 0;
	4514	}
	4515
	4516	/* Note that code very similar to this but for floating string
	4517	is immediately above, changes may need to be made to both.
	4518	Be careful.
	4519	*/
	4520	longest_fixed_length = CHR_SVLEN(data.longest_fixed);
	4521	if (longest_fixed_length
	4522	\|\| (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
	4523	&& (!(data.flags & SF_FIX_BEFORE_MEOL)
	4524	\|\| (RExC_flags & RXf_PMf_MULTILINE))))
	4525	{
	4526	I32 t,ml;
	4527
	4528	/* copy the information about the longest fixed
	4529	from the reg_scan_data over to the program. */
	4530	if (SvUTF8(data.longest_fixed)) {
	4531	r->anchored_utf8 = data.longest_fixed;
	4532	r->anchored_substr = NULL;
	4533	} else {
	4534	r->anchored_substr = data.longest_fixed;
	4535	r->anchored_utf8 = NULL;
	4536	}
	4537	/* fixed_end_shift is how many chars that must be matched that
	4538	follow this item. We calculate it ahead of time as once the
	4539	lookbehind offset is added in we lose the ability to correctly
	4540	calculate it.*/
	4541	ml = data.minlen_fixed ? *(data.minlen_fixed)
	4542	: (I32)longest_fixed_length;
	4543	r->anchored_end_shift = ml - data.offset_fixed
	4544	- longest_fixed_length + (SvTAIL(data.longest_fixed) != 0)
	4545	+ data.lookbehind_fixed;
	4546	r->anchored_offset = data.offset_fixed - data.lookbehind_fixed;
	4547
	4548	t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
	4549	&& (!(data.flags & SF_FIX_BEFORE_MEOL)
	4550	\|\| (RExC_flags & RXf_PMf_MULTILINE)));
	4551	fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
	4552	}
	4553	else {
	4554	r->anchored_substr = r->anchored_utf8 = NULL;
	4555	SvREFCNT_dec(data.longest_fixed);
	4556	longest_fixed_length = 0;
	4557	}
	4558	if (ri->regstclass
	4559	&& (OP(ri->regstclass) == REG_ANY \|\| OP(ri->regstclass) == SANY))
	4560	ri->regstclass = NULL;
	4561	if ((!(r->anchored_substr \|\| r->anchored_utf8) \|\| r->anchored_offset)
	4562	&& stclass_flag
	4563	&& !(data.start_class->flags & ANYOF_EOS)
	4564	&& !cl_is_anything(data.start_class))
	4565	{
	4566	const U32 n = add_data(pRExC_state, 1, "f");
	4567
	4568	Newx(RExC_rxi->data->data[n], 1,
	4569	struct regnode_charclass_class);
	4570	StructCopy(data.start_class,
	4571	(struct regnode_charclass_class*)RExC_rxi->data->data[n],
	4572	struct regnode_charclass_class);
	4573	ri->regstclass = (regnode*)RExC_rxi->data->data[n];
	4574	r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
	4575	DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
	4576	regprop(r, sv, (regnode*)data.start_class);
	4577	PerlIO_printf(Perl_debug_log,
	4578	"synthetic stclass \"%s\".\n",
	4579	SvPVX_const(sv));});
	4580	}
	4581
	4582	/* A temporary algorithm prefers floated substr to fixed one to dig more info. */
	4583	if (longest_fixed_length > longest_float_length) {
	4584	r->check_end_shift = r->anchored_end_shift;
	4585	r->check_substr = r->anchored_substr;
	4586	r->check_utf8 = r->anchored_utf8;
	4587	r->check_offset_min = r->check_offset_max = r->anchored_offset;
	4588	if (r->extflags & RXf_ANCH_SINGLE)
	4589	r->extflags \|= RXf_NOSCAN;
	4590	}
	4591	else {
	4592	r->check_end_shift = r->float_end_shift;
	4593	r->check_substr = r->float_substr;
	4594	r->check_utf8 = r->float_utf8;
	4595	r->check_offset_min = r->float_min_offset;
	4596	r->check_offset_max = r->float_max_offset;
	4597	}
	4598	/* XXXX Currently intuiting is not compatible with ANCH_GPOS.
	4599	This should be changed ASAP! */
	4600	if ((r->check_substr \|\| r->check_utf8) && !(r->extflags & RXf_ANCH_GPOS)) {
	4601	r->extflags \|= RXf_USE_INTUIT;
	4602	if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
	4603	r->extflags \|= RXf_INTUIT_TAIL;
	4604	}
	4605	/* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
	4606	if ( (STRLEN)minlen < longest_float_length )
	4607	minlen= longest_float_length;
	4608	if ( (STRLEN)minlen < longest_fixed_length )
	4609	minlen= longest_fixed_length;
	4610	*/
	4611	}
	4612	else {
	4613	/* Several toplevels. Best we can is to set minlen. */
	4614	I32 fake;
	4615	struct regnode_charclass_class ch_class;
	4616	I32 last_close = 0;
	4617
	4618	DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n"));
	4619
	4620	scan = ri->program + 1;
	4621	cl_init(pRExC_state, &ch_class);
	4622	data.start_class = &ch_class;
	4623	data.last_closep = &last_close;
	4624
	4625
	4626	minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size,
	4627	&data, -1, NULL, NULL, SCF_DO_STCLASS_AND\|SCF_WHILEM_VISITED_POS,0);
	4628
	4629	CHECK_RESTUDY_GOTO;
	4630
	4631	r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
	4632	= r->float_substr = r->float_utf8 = NULL;
	4633	if (!(data.start_class->flags & ANYOF_EOS)
	4634	&& !cl_is_anything(data.start_class))
	4635	{
	4636	const U32 n = add_data(pRExC_state, 1, "f");
	4637
	4638	Newx(RExC_rxi->data->data[n], 1,
	4639	struct regnode_charclass_class);
	4640	StructCopy(data.start_class,
	4641	(struct regnode_charclass_class*)RExC_rxi->data->data[n],
	4642	struct regnode_charclass_class);
	4643	ri->regstclass = (regnode*)RExC_rxi->data->data[n];
	4644	r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
	4645	DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
	4646	regprop(r, sv, (regnode*)data.start_class);
	4647	PerlIO_printf(Perl_debug_log,
	4648	"synthetic stclass \"%s\".\n",
	4649	SvPVX_const(sv));});
	4650	}
	4651	}
	4652
	4653	/* Guard against an embedded (?=) or (?<=) with a longer minlen than
	4654	the "real" pattern. */
	4655	DEBUG_OPTIMISE_r({
	4656	PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf"\n",
	4657	(IV)minlen, (IV)r->minlen);
	4658	});
	4659	r->minlenret = minlen;
	4660	if (r->minlen < minlen)
	4661	r->minlen = minlen;
	4662
	4663	if (RExC_seen & REG_SEEN_GPOS)
	4664	r->extflags \|= RXf_GPOS_SEEN;
	4665	if (RExC_seen & REG_SEEN_LOOKBEHIND)
	4666	r->extflags \|= RXf_LOOKBEHIND_SEEN;
	4667	if (RExC_seen & REG_SEEN_EVAL)
	4668	r->extflags \|= RXf_EVAL_SEEN;
	4669	if (RExC_seen & REG_SEEN_CANY)
	4670	r->extflags \|= RXf_CANY_SEEN;
	4671	if (RExC_seen & REG_SEEN_VERBARG)
	4672	r->intflags \|= PREGf_VERBARG_SEEN;
	4673	if (RExC_seen & REG_SEEN_CUTGROUP)
	4674	r->intflags \|= PREGf_CUTGROUP_SEEN;
	4675	if (RExC_paren_names)
	4676	r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
	4677	else
	4678	r->paren_names = NULL;
	4679	if (r->prelen == 3 && strEQ("\\s+", r->precomp))
	4680	r->extflags \|= RXf_WHITE;
	4681	else if (r->prelen == 1 && r->precomp[0] == '^')
	4682	r->extflags \|= RXf_START_ONLY;
	4683
	4684	#ifdef DEBUGGING
	4685	if (RExC_paren_names) {
	4686	ri->name_list_idx = add_data( pRExC_state, 1, "p" );
	4687	ri->data->data[ri->name_list_idx] = (void*)SvREFCNT_inc(RExC_paren_name_list);
	4688	} else
	4689	#endif
	4690	ri->name_list_idx = 0;
	4691
	4692	if (RExC_recurse_count) {
	4693	for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
	4694	const regnode *scan = RExC_recurse[RExC_recurse_count-1];
	4695	ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
	4696	}
	4697	}
	4698	Newxz(r->offs, RExC_npar, regexp_paren_pair);
	4699	/* assume we don't need to swap parens around before we match */
	4700
	4701	DEBUG_DUMP_r({
	4702	PerlIO_printf(Perl_debug_log,"Final program:\n");
	4703	regdump(r);
	4704	});
	4705	#ifdef RE_TRACK_PATTERN_OFFSETS
	4706	DEBUG_OFFSETS_r(if (ri->u.offsets) {
	4707	const U32 len = ri->u.offsets[0];
	4708	U32 i;
	4709	GET_RE_DEBUG_FLAGS_DECL;
	4710	PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]);
	4711	for (i = 1; i <= len; i++) {
	4712	if (ri->u.offsets[i2-1] \|\| ri->u.offsets[i2])
	4713	PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ",
	4714	(UV)i, (UV)ri->u.offsets[i2-1], (UV)ri->u.offsets[i2]);
	4715	}
	4716	PerlIO_printf(Perl_debug_log, "\n");
	4717	});
	4718	#endif
	4719	return(r);
	4720	}
	4721
	4722	#undef RE_ENGINE_PTR
	4723
	4724
	4725	SV*
	4726	Perl_reg_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags)
	4727	{
	4728	AV *retarray = NULL;
	4729	SV *ret;
	4730	if (flags & 1)
	4731	retarray=newAV();
	4732
	4733	if (rx && rx->paren_names) {
	4734	HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
	4735	if (he_str) {
	4736	IV i;
	4737	SV* sv_dat=HeVAL(he_str);
	4738	I32 nums=(I32)SvPVX(sv_dat);
	4739	for ( i=0; i<SvIVX(sv_dat); i++ ) {
	4740	if ((I32)(rx->nparens) >= nums[i]
	4741	&& rx->offs[nums[i]].start != -1
	4742	&& rx->offs[nums[i]].end != -1)
	4743	{
	4744	ret = CALLREG_NUMBUF(rx,nums[i],NULL);
	4745	if (!retarray)
	4746	return ret;
	4747	} else {
	4748	ret = newSVsv(&PL_sv_undef);
	4749	}
	4750	if (retarray) {
	4751	SvREFCNT_inc(ret);
	4752	av_push(retarray, ret);
	4753	}
	4754	}
	4755	if (retarray)
	4756	return (SV*)retarray;
	4757	}
	4758	}
	4759	return NULL;
	4760	}
	4761
	4762	SV*
	4763	Perl_reg_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv)
	4764	{
	4765	char *s = NULL;
	4766	I32 i = 0;
	4767	I32 s1, t1;
	4768	SV *sv = usesv ? usesv : newSVpvs("");
	4769
	4770	if (!rx->subbeg) {
	4771	sv_setsv(sv,&PL_sv_undef);
	4772	return sv;
	4773	}
	4774	else
	4775	if (paren == -2 && rx->offs[0].start != -1) {
	4776	/* $` */
	4777	i = rx->offs[0].start;
	4778	s = rx->subbeg;
	4779	}
	4780	else
	4781	if (paren == -1 && rx->offs[0].end != -1) {
	4782	/* $' */
	4783	s = rx->subbeg + rx->offs[0].end;
	4784	i = rx->sublen - rx->offs[0].end;
	4785	}
	4786	else
	4787	if ( 0 <= paren && paren <= (I32)rx->nparens &&
	4788	(s1 = rx->offs[paren].start) != -1 &&
	4789	(t1 = rx->offs[paren].end) != -1)
	4790	{
	4791	/* $& $1 ... */
	4792	i = t1 - s1;
	4793	s = rx->subbeg + s1;
	4794	} else {
	4795	sv_setsv(sv,&PL_sv_undef);
	4796	return sv;
	4797	}
	4798	assert(rx->sublen >= (s - rx->subbeg) + i );
	4799	if (i >= 0) {
	4800	const int oldtainted = PL_tainted;
	4801	TAINT_NOT;
	4802	sv_setpvn(sv, s, i);
	4803	PL_tainted = oldtainted;
	4804	if ( (rx->extflags & RXf_CANY_SEEN)
	4805	? (RX_MATCH_UTF8(rx)
	4806	&& (!i \|\| is_utf8_string((U8*)s, i)))
	4807	: (RX_MATCH_UTF8(rx)) )
	4808	{
	4809	SvUTF8_on(sv);
	4810	}
	4811	else
	4812	SvUTF8_off(sv);
	4813	if (PL_tainting) {
	4814	if (RX_MATCH_TAINTED(rx)) {
	4815	if (SvTYPE(sv) >= SVt_PVMG) {
	4816	MAGIC* const mg = SvMAGIC(sv);
	4817	MAGIC* mgt;
	4818	PL_tainted = 1;
	4819	SvMAGIC_set(sv, mg->mg_moremagic);
	4820	SvTAINT(sv);
	4821	if ((mgt = SvMAGIC(sv))) {
	4822	mg->mg_moremagic = mgt;
	4823	SvMAGIC_set(sv, mg);
	4824	}
	4825	} else {
	4826	PL_tainted = 1;
	4827	SvTAINT(sv);
	4828	}
	4829	} else
	4830	SvTAINTED_off(sv);
	4831	}
	4832	} else {
	4833	sv_setsv(sv,&PL_sv_undef);
	4834	}
	4835	return sv;
	4836	}
	4837
	4838
	4839	/* Scans the name of a named buffer from the pattern.
	4840	* If flags is REG_RSN_RETURN_NULL returns null.
	4841	* If flags is REG_RSN_RETURN_NAME returns an SV* containing the name
	4842	* If flags is REG_RSN_RETURN_DATA returns the data SV* corresponding
	4843	* to the parsed name as looked up in the RExC_paren_names hash.
	4844	* If there is an error throws a vFAIL().. type exception.
	4845	*/
	4846
	4847	#define REG_RSN_RETURN_NULL 0
	4848	#define REG_RSN_RETURN_NAME 1
	4849	#define REG_RSN_RETURN_DATA 2
	4850
	4851	STATIC SV*
	4852	S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
	4853	char *name_start = RExC_parse;
	4854
	4855	if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
	4856	/* skip IDFIRST by using do...while */
	4857	if (UTF)
	4858	do {
	4859	RExC_parse += UTF8SKIP(RExC_parse);
	4860	} while (isALNUM_utf8((U8*)RExC_parse));
	4861	else
	4862	do {
	4863	RExC_parse++;
	4864	} while (isALNUM(*RExC_parse));
	4865	}
	4866
	4867	if ( flags ) {
	4868	SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
	4869	(int)(RExC_parse - name_start)));
	4870	if (UTF)
	4871	SvUTF8_on(sv_name);
	4872	if ( flags == REG_RSN_RETURN_NAME)
	4873	return sv_name;
	4874	else if (flags==REG_RSN_RETURN_DATA) {
	4875	HE *he_str = NULL;
	4876	SV *sv_dat = NULL;
	4877	if ( ! sv_name ) /* should not happen*/
	4878	Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
	4879	if (RExC_paren_names)
	4880	he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
	4881	if ( he_str )
	4882	sv_dat = HeVAL(he_str);
	4883	if ( ! sv_dat )
	4884	vFAIL("Reference to nonexistent named group");
	4885	return sv_dat;
	4886	}
	4887	else {
	4888	Perl_croak(aTHX_ "panic: bad flag in reg_scan_name");
	4889	}
	4890	/* NOT REACHED */
	4891	}
	4892	return NULL;
	4893	}
	4894
	4895	#define DEBUG_PARSE_MSG(funcname) DEBUG_PARSE_r({ \
	4896	int rem=(int)(RExC_end - RExC_parse); \
	4897	int cut; \
	4898	int num; \
	4899	int iscut=0; \
	4900	if (rem>10) { \
	4901	rem=10; \
	4902	iscut=1; \
	4903	} \
	4904	cut=10-rem; \
	4905	if (RExC_lastparse!=RExC_parse) \
	4906	PerlIO_printf(Perl_debug_log," >%.s%-s", \
	4907	rem, RExC_parse, \
	4908	cut + 4, \
	4909	iscut ? "..." : "<" \
	4910	); \
	4911	else \
	4912	PerlIO_printf(Perl_debug_log,"%16s",""); \
	4913	\
	4914	if (SIZE_ONLY) \
	4915	num = RExC_size + 1; \
	4916	else \
	4917	num=REG_NODE_NUM(RExC_emit); \
	4918	if (RExC_lastnum!=num) \
	4919	PerlIO_printf(Perl_debug_log,"\|%4d",num); \
	4920	else \
	4921	PerlIO_printf(Perl_debug_log,"\|%4s",""); \
	4922	PerlIO_printf(Perl_debug_log,"\|%*s%-4s", \
	4923	(int)((depth*2)), "", \
	4924	(funcname) \
	4925	); \
	4926	RExC_lastnum=num; \
	4927	RExC_lastparse=RExC_parse; \
	4928	})
	4929
	4930
	4931
	4932	#define DEBUG_PARSE(funcname) DEBUG_PARSE_r({ \
	4933	DEBUG_PARSE_MSG((funcname)); \
	4934	PerlIO_printf(Perl_debug_log,"%4s","\n"); \
	4935	})
	4936	#define DEBUG_PARSE_FMT(funcname,fmt,args) DEBUG_PARSE_r({ \
	4937	DEBUG_PARSE_MSG((funcname)); \
	4938	PerlIO_printf(Perl_debug_log,fmt "\n",args); \
	4939	})
	4940	/*
	4941	- reg - regular expression, i.e. main body or parenthesized thing
	4942	*
	4943	* Caller must absorb opening parenthesis.
	4944	*
	4945	* Combining parenthesis handling with the base level of regular expression
	4946	* is a trifle forced, but the need to tie the tails of the branches to what
	4947	* follows makes it hard to avoid.
	4948	*/
	4949	#define REGTAIL(x,y,z) regtail((x),(y),(z),depth+1)
	4950	#ifdef DEBUGGING
	4951	#define REGTAIL_STUDY(x,y,z) regtail_study((x),(y),(z),depth+1)
	4952	#else
	4953	#define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
	4954	#endif
	4955
	4956	STATIC regnode *
	4957	S_reg(pTHX_ RExC_state_t pRExC_state, I32 paren, I32 flagp,U32 depth)
	4958	/* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
	4959	{
	4960	dVAR;
	4961	register regnode ret; / Will be the head of the group. */
	4962	register regnode *br;
	4963	register regnode *lastbr;
	4964	register regnode *ender = NULL;
	4965	register I32 parno = 0;
	4966	I32 flags;
	4967	const I32 oregflags = RExC_flags;
	4968	bool have_branch = 0;
	4969	bool is_open = 0;
	4970	I32 freeze_paren = 0;
	4971	I32 after_freeze = 0;
	4972
	4973	/* for (?g), (?gc), and (?o) warnings; warning
	4974	about (?c) will warn about (?g) -- japhy */
	4975
	4976	#define WASTED_O 0x01
	4977	#define WASTED_G 0x02
	4978	#define WASTED_C 0x04
	4979	#define WASTED_GC (0x02\|0x04)
	4980	I32 wastedflags = 0x00;
	4981
	4982	char * parse_start = RExC_parse; /* MJD */
	4983	char * const oregcomp_parse = RExC_parse;
	4984
	4985	GET_RE_DEBUG_FLAGS_DECL;
	4986	DEBUG_PARSE("reg ");
	4987
	4988	flagp = 0; / Tentatively. */
	4989
	4990
	4991	/* Make an OPEN node, if parenthesized. */
	4992	if (paren) {
	4993	if ( RExC_parse == '') { /* (VERB:ARG) /
	4994	char *start_verb = RExC_parse;
	4995	STRLEN verb_len = 0;
	4996	char *start_arg = NULL;
	4997	unsigned char op = 0;
	4998	int argok = 1;
	4999	int internal_argval = 0; /* internal_argval is only useful if !argok */
	5000	while ( RExC_parse && RExC_parse != ')' ) {
	5001	if ( *RExC_parse == ':' ) {
	5002	start_arg = RExC_parse + 1;
	5003	break;
	5004	}
	5005	RExC_parse++;
	5006	}
	5007	++start_verb;
	5008	verb_len = RExC_parse - start_verb;
	5009	if ( start_arg ) {
	5010	RExC_parse++;
	5011	while ( RExC_parse && RExC_parse != ')' )
	5012	RExC_parse++;
	5013	if ( *RExC_parse != ')' )
	5014	vFAIL("Unterminated verb pattern argument");
	5015	if ( RExC_parse == start_arg )
	5016	start_arg = NULL;
	5017	} else {
	5018	if ( *RExC_parse != ')' )
	5019	vFAIL("Unterminated verb pattern");
	5020	}
	5021
	5022	switch ( *start_verb ) {
	5023	case 'A': /* (ACCEPT) /
	5024	if ( memEQs(start_verb,verb_len,"ACCEPT") ) {
	5025	op = ACCEPT;
	5026	internal_argval = RExC_nestroot;
	5027	}
	5028	break;
	5029	case 'C': /* (COMMIT) /
	5030	if ( memEQs(start_verb,verb_len,"COMMIT") )
	5031	op = COMMIT;
	5032	break;
	5033	case 'F': /* (FAIL) /
	5034	if ( verb_len==1 \|\| memEQs(start_verb,verb_len,"FAIL") ) {
	5035	op = OPFAIL;
	5036	argok = 0;
	5037	}
	5038	break;
	5039	case ':': /* (:NAME) /
	5040	case 'M': /* (MARK:NAME) /
	5041	if ( verb_len==0 \|\| memEQs(start_verb,verb_len,"MARK") ) {
	5042	op = MARKPOINT;
	5043	argok = -1;
	5044	}
	5045	break;
	5046	case 'P': /* (PRUNE) /
	5047	if ( memEQs(start_verb,verb_len,"PRUNE") )
	5048	op = PRUNE;
	5049	break;
	5050	case 'S': /* (SKIP) /
	5051	if ( memEQs(start_verb,verb_len,"SKIP") )
	5052	op = SKIP;
	5053	break;
	5054	case 'T': /* (THEN) /
	5055	/* [19:06] <TimToady> :: is then */
	5056	if ( memEQs(start_verb,verb_len,"THEN") ) {
	5057	op = CUTGROUP;
	5058	RExC_seen \|= REG_SEEN_CUTGROUP;
	5059	}
	5060	break;
	5061	}
	5062	if ( ! op ) {
	5063	RExC_parse++;
	5064	vFAIL3("Unknown verb pattern '%.*s'",
	5065	verb_len, start_verb);
	5066	}
	5067	if ( argok ) {
	5068	if ( start_arg && internal_argval ) {
	5069	vFAIL3("Verb pattern '%.*s' may not have an argument",
	5070	verb_len, start_verb);
	5071	} else if ( argok < 0 && !start_arg ) {
	5072	vFAIL3("Verb pattern '%.*s' has a mandatory argument",
	5073	verb_len, start_verb);
	5074	} else {
	5075	ret = reganode(pRExC_state, op, internal_argval);
	5076	if ( ! internal_argval && ! SIZE_ONLY ) {
	5077	if (start_arg) {
	5078	SV *sv = newSVpvn( start_arg, RExC_parse - start_arg);
	5079	ARG(ret) = add_data( pRExC_state, 1, "S" );
	5080	RExC_rxi->data->data[ARG(ret)]=(void*)sv;
	5081	ret->flags = 0;
	5082	} else {
	5083	ret->flags = 1;
	5084	}
	5085	}
	5086	}
	5087	if (!internal_argval)
	5088	RExC_seen \|= REG_SEEN_VERBARG;
	5089	} else if ( start_arg ) {
	5090	vFAIL3("Verb pattern '%.*s' may not have an argument",
	5091	verb_len, start_verb);
	5092	} else {
	5093	ret = reg_node(pRExC_state, op);
	5094	}
	5095	nextchar(pRExC_state);
	5096	return ret;
	5097	} else
	5098	if (RExC_parse == '?') { / (?...) */
	5099	bool is_logical = 0;
	5100	const char * const seqstart = RExC_parse;
	5101
	5102	RExC_parse++;
	5103	paren = *RExC_parse++;
	5104	ret = NULL; /* For look-ahead/behind. */
	5105	switch (paren) {
	5106
	5107	case 'P': /* (?P...) variants for those used to PCRE/Python */
	5108	paren = *RExC_parse++;
	5109	if ( paren == '<') /* (?P<...>) named capture */
	5110	goto named_capture;
	5111	else if (paren == '>') { /* (?P>name) named recursion */
	5112	goto named_recursion;
	5113	}
	5114	else if (paren == '=') { /* (?P=...) named backref */
	5115	/* this pretty much dupes the code for \k<NAME> in regatom(), if
	5116	you change this make sure you change that */
	5117	char* name_start = RExC_parse;
	5118	U32 num = 0;
	5119	SV *sv_dat = reg_scan_name(pRExC_state,
	5120	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5121	if (RExC_parse == name_start \|\| *RExC_parse != ')')
	5122	vFAIL2("Sequence %.3s... not terminated",parse_start);
	5123
	5124	if (!SIZE_ONLY) {
	5125	num = add_data( pRExC_state, 1, "S" );
	5126	RExC_rxi->data->data[num]=(void*)sv_dat;
	5127	SvREFCNT_inc(sv_dat);
	5128	}
	5129	RExC_sawback = 1;
	5130	ret = reganode(pRExC_state,
	5131	(U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
	5132	num);
	5133	*flagp \|= HASWIDTH;
	5134
	5135	Set_Node_Offset(ret, parse_start+1);
	5136	Set_Node_Cur_Length(ret); /* MJD */
	5137
	5138	nextchar(pRExC_state);
	5139	return ret;
	5140	}
	5141	RExC_parse++;
	5142	vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
	5143	/NOTREACHED/
	5144	case '<': /* (?<...) */
	5145	if (*RExC_parse == '!')
	5146	paren = ',';
	5147	else if (*RExC_parse != '=')
	5148	named_capture:
	5149	{ /* (?<...>) */
	5150	char *name_start;
	5151	SV *svname;
	5152	paren= '>';
	5153	case '\'': /* (?'...') */
	5154	name_start= RExC_parse;
	5155	svname = reg_scan_name(pRExC_state,
	5156	SIZE_ONLY ? /* reverse test from the others */
	5157	REG_RSN_RETURN_NAME :
	5158	REG_RSN_RETURN_NULL);
	5159	if (RExC_parse == name_start) {
	5160	RExC_parse++;
	5161	vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
	5162	/NOTREACHED/
	5163	}
	5164	if (*RExC_parse != paren)
	5165	vFAIL2("Sequence (?%c... not terminated",
	5166	paren=='>' ? '<' : paren);
	5167	if (SIZE_ONLY) {
	5168	HE *he_str;
	5169	SV *sv_dat = NULL;
	5170	if (!svname) /* shouldnt happen */
	5171	Perl_croak(aTHX_
	5172	"panic: reg_scan_name returned NULL");
	5173	if (!RExC_paren_names) {
	5174	RExC_paren_names= newHV();
	5175	sv_2mortal((SV*)RExC_paren_names);
	5176	#ifdef DEBUGGING
	5177	RExC_paren_name_list= newAV();
	5178	sv_2mortal((SV*)RExC_paren_name_list);
	5179	#endif
	5180	}
	5181	he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
	5182	if ( he_str )
	5183	sv_dat = HeVAL(he_str);
	5184	if ( ! sv_dat ) {
	5185	/* croak baby croak */
	5186	Perl_croak(aTHX_
	5187	"panic: paren_name hash element allocation failed");
	5188	} else if ( SvPOK(sv_dat) ) {
	5189	/* (?\|...) can mean we have dupes so scan to check
	5190	its already been stored. Maybe a flag indicating
	5191	we are inside such a construct would be useful,
	5192	but the arrays are likely to be quite small, so
	5193	for now we punt -- dmq */
	5194	IV count = SvIV(sv_dat);
	5195	I32 pv = (I32)SvPVX(sv_dat);
	5196	IV i;
	5197	for ( i = 0 ; i < count ; i++ ) {
	5198	if ( pv[i] == RExC_npar ) {
	5199	count = 0;
	5200	break;
	5201	}
	5202	}
	5203	if ( count ) {
	5204	pv = (I32*)SvGROW(sv_dat, SvCUR(sv_dat) + sizeof(I32)+1);
	5205	SvCUR_set(sv_dat, SvCUR(sv_dat) + sizeof(I32));
	5206	pv[count] = RExC_npar;
	5207	SvIVX(sv_dat)++;
	5208	}
	5209	} else {
	5210	(void)SvUPGRADE(sv_dat,SVt_PVNV);
	5211	sv_setpvn(sv_dat, (char *)&(RExC_npar), sizeof(I32));
	5212	SvIOK_on(sv_dat);
	5213	SvIVX(sv_dat)= 1;
	5214	}
	5215	#ifdef DEBUGGING
	5216	if (!av_store(RExC_paren_name_list, RExC_npar, SvREFCNT_inc(svname)))
	5217	SvREFCNT_dec(svname);
	5218	#endif
	5219
	5220	/sv_dump(sv_dat);/
	5221	}
	5222	nextchar(pRExC_state);
	5223	paren = 1;
	5224	goto capturing_parens;
	5225	}
	5226	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	5227	RExC_parse++;
	5228	case '=': /* (?=...) */
	5229	case '!': /* (?!...) */
	5230	RExC_seen_zerolen++;
	5231	if (*RExC_parse == ')') {
	5232	ret=reg_node(pRExC_state, OPFAIL);
	5233	nextchar(pRExC_state);
	5234	return ret;
	5235	}
	5236	break;
	5237	case '\|': /* (?\|...) */
	5238	/* branch reset, behave like a (?:...) except that
	5239	buffers in alternations share the same numbers */
	5240	paren = ':';
	5241	after_freeze = freeze_paren = RExC_npar;
	5242	break;
	5243	case ':': /* (?:...) */
	5244	case '>': /* (?>...) */
	5245	break;
	5246	case '$': /* (?$...) */
	5247	case '@': /* (?@...) */
	5248	vFAIL2("Sequence (?%c...) not implemented", (int)paren);
	5249	break;
	5250	case '#': /* (?#...) */
	5251	while (RExC_parse && RExC_parse != ')')
	5252	RExC_parse++;
	5253	if (*RExC_parse != ')')
	5254	FAIL("Sequence (?#... not terminated");
	5255	nextchar(pRExC_state);
	5256	*flagp = TRYAGAIN;
	5257	return NULL;
	5258	case '0' : /* (?0) */
	5259	case 'R' : /* (?R) */
	5260	if (*RExC_parse != ')')
	5261	FAIL("Sequence (?R) not terminated");
	5262	ret = reg_node(pRExC_state, GOSTART);
	5263	*flagp \|= POSTPONED;
	5264	nextchar(pRExC_state);
	5265	return ret;
	5266	/notreached/
	5267	{ /* named and numeric backreferences */
	5268	I32 num;
	5269	case '&': /* (?&NAME) */
	5270	parse_start = RExC_parse - 1;
	5271	named_recursion:
	5272	{
	5273	SV *sv_dat = reg_scan_name(pRExC_state,
	5274	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5275	num = sv_dat ? ((I32 )SvPVX(sv_dat)) : 0;
	5276	}
	5277	goto gen_recurse_regop;
	5278	/* NOT REACHED */
	5279	case '+':
	5280	if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
	5281	RExC_parse++;
	5282	vFAIL("Illegal pattern");
	5283	}
	5284	goto parse_recursion;
	5285	/* NOT REACHED*/
	5286	case '-': /* (?-1) */
	5287	if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
	5288	RExC_parse--; /* rewind to let it be handled later */
	5289	goto parse_flags;
	5290	}
	5291	/FALLTHROUGH /
	5292	case '1': case '2': case '3': case '4': /* (?1) */
	5293	case '5': case '6': case '7': case '8': case '9':
	5294	RExC_parse--;
	5295	parse_recursion:
	5296	num = atoi(RExC_parse);
	5297	parse_start = RExC_parse - 1; /* MJD */
	5298	if (*RExC_parse == '-')
	5299	RExC_parse++;
	5300	while (isDIGIT(*RExC_parse))
	5301	RExC_parse++;
	5302	if (*RExC_parse!=')')
	5303	vFAIL("Expecting close bracket");
	5304
	5305	gen_recurse_regop:
	5306	if ( paren == '-' ) {
	5307	/*
	5308	Diagram of capture buffer numbering.
	5309	Top line is the normal capture buffer numbers
	5310	Botton line is the negative indexing as from
	5311	the X (the (?-2))
	5312
	5313	+ 1 2 3 4 5 X 6 7
	5314	/(a(x)y)(a(b(c(?-2)d)e)f)(g(h))/
	5315	- 5 4 3 2 1 X x x
	5316
	5317	*/
	5318	num = RExC_npar + num;
	5319	if (num < 1) {
	5320	RExC_parse++;
	5321	vFAIL("Reference to nonexistent group");
	5322	}
	5323	} else if ( paren == '+' ) {
	5324	num = RExC_npar + num - 1;
	5325	}
	5326
	5327	ret = reganode(pRExC_state, GOSUB, num);
	5328	if (!SIZE_ONLY) {
	5329	if (num > (I32)RExC_rx->nparens) {
	5330	RExC_parse++;
	5331	vFAIL("Reference to nonexistent group");
	5332	}
	5333	ARG2L_SET( ret, RExC_recurse_count++);
	5334	RExC_emit++;
	5335	DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
	5336	"Recurse #%"UVuf" to %"IVdf"\n", (UV)ARG(ret), (IV)ARG2L(ret)));
	5337	} else {
	5338	RExC_size++;
	5339	}
	5340	RExC_seen \|= REG_SEEN_RECURSE;
	5341	Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
	5342	Set_Node_Offset(ret, parse_start); /* MJD */
	5343
	5344	*flagp \|= POSTPONED;
	5345	nextchar(pRExC_state);
	5346	return ret;
	5347	} /* named and numeric backreferences */
	5348	/* NOT REACHED */
	5349
	5350	case '?': /* (??...) */
	5351	is_logical = 1;
	5352	if (*RExC_parse != '{') {
	5353	RExC_parse++;
	5354	vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
	5355	/NOTREACHED/
	5356	}
	5357	*flagp \|= POSTPONED;
	5358	paren = *RExC_parse++;
	5359	/* FALL THROUGH */
	5360	case '{': /* (?{...}) */
	5361	{
	5362	I32 count = 1;
	5363	U32 n = 0;
	5364	char c;
	5365	char *s = RExC_parse;
	5366
	5367	RExC_seen_zerolen++;
	5368	RExC_seen \|= REG_SEEN_EVAL;
	5369	while (count && (c = *RExC_parse)) {
	5370	if (c == '\\') {
	5371	if (RExC_parse[1])
	5372	RExC_parse++;
	5373	}
	5374	else if (c == '{')
	5375	count++;
	5376	else if (c == '}')
	5377	count--;
	5378	RExC_parse++;
	5379	}
	5380	if (*RExC_parse != ')') {
	5381	RExC_parse = s;
	5382	vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
	5383	}
	5384	if (!SIZE_ONLY) {
	5385	PAD *pad;
	5386	OP_4tree sop, rop;
	5387	SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
	5388
	5389	ENTER;
	5390	Perl_save_re_context(aTHX);
	5391	rop = sv_compile_2op(sv, &sop, "re", &pad);
	5392	sop->op_private \|= OPpREFCOUNTED;
	5393	/* re_dup will OpREFCNT_inc */
	5394	OpREFCNT_set(sop, 1);
	5395	LEAVE;
	5396
	5397	n = add_data(pRExC_state, 3, "nop");
	5398	RExC_rxi->data->data[n] = (void*)rop;
	5399	RExC_rxi->data->data[n+1] = (void*)sop;
	5400	RExC_rxi->data->data[n+2] = (void*)pad;
	5401	SvREFCNT_dec(sv);
	5402	}
	5403	else { /* First pass */
	5404	if (PL_reginterp_cnt < ++RExC_seen_evals
	5405	&& IN_PERL_RUNTIME)
	5406	/* No compiled RE interpolated, has runtime
	5407	components ===> unsafe. */
	5408	FAIL("Eval-group not allowed at runtime, use re 'eval'");
	5409	if (PL_tainting && PL_tainted)
	5410	FAIL("Eval-group in insecure regular expression");
	5411	#if PERL_VERSION > 8
	5412	if (IN_PERL_COMPILETIME)
	5413	PL_cv_has_eval = 1;
	5414	#endif
	5415	}
	5416
	5417	nextchar(pRExC_state);
	5418	if (is_logical) {
	5419	ret = reg_node(pRExC_state, LOGICAL);
	5420	if (!SIZE_ONLY)
	5421	ret->flags = 2;
	5422	REGTAIL(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
	5423	/* deal with the length of this later - MJD */
	5424	return ret;
	5425	}
	5426	ret = reganode(pRExC_state, EVAL, n);
	5427	Set_Node_Length(ret, RExC_parse - parse_start + 1);
	5428	Set_Node_Offset(ret, parse_start);
	5429	return ret;
	5430	}
	5431	case '(': /* (?(?{...})...) and (?(?=...)...) */
	5432	{
	5433	int is_define= 0;
	5434	if (RExC_parse[0] == '?') { /* (?(?...)) */
	5435	if (RExC_parse[1] == '=' \|\| RExC_parse[1] == '!'
	5436	\|\| RExC_parse[1] == '<'
	5437	\|\| RExC_parse[1] == '{') { /* Lookahead or eval. */
	5438	I32 flag;
	5439
	5440	ret = reg_node(pRExC_state, LOGICAL);
	5441	if (!SIZE_ONLY)
	5442	ret->flags = 1;
	5443	REGTAIL(pRExC_state, ret, reg(pRExC_state, 1, &flag,depth+1));
	5444	goto insert_if;
	5445	}
	5446	}
	5447	else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */
	5448	\|\| RExC_parse[0] == '\'' ) /* (?('NAME')...) */
	5449	{
	5450	char ch = RExC_parse[0] == '<' ? '>' : '\'';
	5451	char *name_start= RExC_parse++;
	5452	U32 num = 0;
	5453	SV *sv_dat=reg_scan_name(pRExC_state,
	5454	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5455	if (RExC_parse == name_start \|\| *RExC_parse != ch)
	5456	vFAIL2("Sequence (?(%c... not terminated",
	5457	(ch == '>' ? '<' : ch));
	5458	RExC_parse++;
	5459	if (!SIZE_ONLY) {
	5460	num = add_data( pRExC_state, 1, "S" );
	5461	RExC_rxi->data->data[num]=(void*)sv_dat;
	5462	SvREFCNT_inc(sv_dat);
	5463	}
	5464	ret = reganode(pRExC_state,NGROUPP,num);
	5465	goto insert_if_check_paren;
	5466	}
	5467	else if (RExC_parse[0] == 'D' &&
	5468	RExC_parse[1] == 'E' &&
	5469	RExC_parse[2] == 'F' &&
	5470	RExC_parse[3] == 'I' &&
	5471	RExC_parse[4] == 'N' &&
	5472	RExC_parse[5] == 'E')
	5473	{
	5474	ret = reganode(pRExC_state,DEFINEP,0);
	5475	RExC_parse +=6 ;
	5476	is_define = 1;
	5477	goto insert_if_check_paren;
	5478	}
	5479	else if (RExC_parse[0] == 'R') {
	5480	RExC_parse++;
	5481	parno = 0;
	5482	if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
	5483	parno = atoi(RExC_parse++);
	5484	while (isDIGIT(*RExC_parse))
	5485	RExC_parse++;
	5486	} else if (RExC_parse[0] == '&') {
	5487	SV *sv_dat;
	5488	RExC_parse++;
	5489	sv_dat = reg_scan_name(pRExC_state,
	5490	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	5491	parno = sv_dat ? ((I32 )SvPVX(sv_dat)) : 0;
	5492	}
	5493	ret = reganode(pRExC_state,INSUBP,parno);
	5494	goto insert_if_check_paren;
	5495	}
	5496	else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
	5497	/* (?(1)...) */
	5498	char c;
	5499	parno = atoi(RExC_parse++);
	5500
	5501	while (isDIGIT(*RExC_parse))
	5502	RExC_parse++;
	5503	ret = reganode(pRExC_state, GROUPP, parno);
	5504
	5505	insert_if_check_paren:
	5506	if ((c = *nextchar(pRExC_state)) != ')')
	5507	vFAIL("Switch condition not recognized");
	5508	insert_if:
	5509	REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
	5510	br = regbranch(pRExC_state, &flags, 1,depth+1);
	5511	if (br == NULL)
	5512	br = reganode(pRExC_state, LONGJMP, 0);
	5513	else
	5514	REGTAIL(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0));
	5515	c = *nextchar(pRExC_state);
	5516	if (flags&HASWIDTH)
	5517	*flagp \|= HASWIDTH;
	5518	if (c == '\|') {
	5519	if (is_define)
	5520	vFAIL("(?(DEFINE)....) does not allow branches");
	5521	lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one for optimizer. */
	5522	regbranch(pRExC_state, &flags, 1,depth+1);
	5523	REGTAIL(pRExC_state, ret, lastbr);
	5524	if (flags&HASWIDTH)
	5525	*flagp \|= HASWIDTH;
	5526	c = *nextchar(pRExC_state);
	5527	}
	5528	else
	5529	lastbr = NULL;
	5530	if (c != ')')
	5531	vFAIL("Switch (?(condition)... contains too many branches");
	5532	ender = reg_node(pRExC_state, TAIL);
	5533	REGTAIL(pRExC_state, br, ender);
	5534	if (lastbr) {
	5535	REGTAIL(pRExC_state, lastbr, ender);
	5536	REGTAIL(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender);
	5537	}
	5538	else
	5539	REGTAIL(pRExC_state, ret, ender);
	5540	RExC_size++; /* XXX WHY do we need this?!!
	5541	For large programs it seems to be required
	5542	but I can't figure out why. -- dmq*/
	5543	return ret;
	5544	}
	5545	else {
	5546	vFAIL2("Unknown switch condition (?(%.2s", RExC_parse);
	5547	}
	5548	}
	5549	case 0:
	5550	RExC_parse--; /* for vFAIL to print correctly */
	5551	vFAIL("Sequence (? incomplete");
	5552	break;
	5553	default:
	5554	--RExC_parse;
	5555	parse_flags: /* (?i) */
	5556	{
	5557	U32 posflags = 0, negflags = 0;
	5558	U32 *flagsp = &posflags;
	5559
	5560	while (*RExC_parse) {
	5561	/* && strchr("iogcmsx", RExC_parse) /
	5562	/* (?g), (?gc) and (?o) are useless here
	5563	and must be globally applied -- japhy */
	5564	switch (*RExC_parse) {
	5565	CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
	5566	case 'o':
	5567	case 'g':
	5568	if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	5569	const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
	5570	if (! (wastedflags & wflagbit) ) {
	5571	wastedflags \|= wflagbit;
	5572	vWARN5(
	5573	RExC_parse + 1,
	5574	"Useless (%s%c) - %suse /%c modifier",
	5575	flagsp == &negflags ? "?-" : "?",
	5576	*RExC_parse,
	5577	flagsp == &negflags ? "don't " : "",
	5578	*RExC_parse
	5579	);
	5580	}
	5581	}
	5582	break;
	5583
	5584	case 'c':
	5585	if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	5586	if (! (wastedflags & WASTED_C) ) {
	5587	wastedflags \|= WASTED_GC;
	5588	vWARN3(
	5589	RExC_parse + 1,
	5590	"Useless (%sc) - %suse /gc modifier",
	5591	flagsp == &negflags ? "?-" : "?",
	5592	flagsp == &negflags ? "don't " : ""
	5593	);
	5594	}
	5595	}
	5596	break;
	5597	case 'k':
	5598	if (flagsp == &negflags) {
	5599	if (SIZE_ONLY && ckWARN(WARN_REGEXP))
	5600	vWARN(RExC_parse + 1,"Useless use of (?-k)");
	5601	} else {
	5602	*flagsp \|= RXf_PMf_KEEPCOPY;
	5603	}
	5604	break;
	5605	case '-':
	5606	if (flagsp == &negflags) {
	5607	RExC_parse++;
	5608	vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
	5609	/NOTREACHED/
	5610	}
	5611	flagsp = &negflags;
	5612	wastedflags = 0; /* reset so (?g-c) warns twice */
	5613	break;
	5614	case ':':
	5615	paren = ':';
	5616	/FALLTHROUGH/
	5617	case ')':
	5618	RExC_flags \|= posflags;
	5619	RExC_flags &= ~negflags;
	5620	nextchar(pRExC_state);
	5621	if (paren != ':') {
	5622	*flagp = TRYAGAIN;
	5623	return NULL;
	5624	} else {
	5625	ret = NULL;
	5626	goto parse_rest;
	5627	}
	5628	/NOTREACHED/
	5629	default:
	5630	RExC_parse++;
	5631	vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
	5632	/NOTREACHED/
	5633	}
	5634	++RExC_parse;
	5635	}
	5636	}} /* one for the default block, one for the switch */
	5637	}
	5638	else { /* (...) */
	5639	capturing_parens:
	5640	parno = RExC_npar;
	5641	RExC_npar++;
	5642
	5643	ret = reganode(pRExC_state, OPEN, parno);
	5644	if (!SIZE_ONLY ){
	5645	if (!RExC_nestroot)
	5646	RExC_nestroot = parno;
	5647	if (RExC_seen & REG_SEEN_RECURSE
	5648	&& !RExC_open_parens[parno-1])
	5649	{
	5650	DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
	5651	"Setting open paren #%"IVdf" to %d\n",
	5652	(IV)parno, REG_NODE_NUM(ret)));
	5653	RExC_open_parens[parno-1]= ret;
	5654	}
	5655	}
	5656	Set_Node_Length(ret, 1); /* MJD */
	5657	Set_Node_Offset(ret, RExC_parse); /* MJD */
	5658	is_open = 1;
	5659	}
	5660	}
	5661	else /* ! paren */
	5662	ret = NULL;
	5663
	5664	parse_rest:
	5665	/* Pick up the branches, linking them together. */
	5666	parse_start = RExC_parse; /* MJD */
	5667	br = regbranch(pRExC_state, &flags, 1,depth+1);
	5668	/* branch_len = (paren != 0); */
	5669
	5670	if (br == NULL)
	5671	return(NULL);
	5672	if (*RExC_parse == '\|') {
	5673	if (!SIZE_ONLY && RExC_extralen) {
	5674	reginsert(pRExC_state, BRANCHJ, br, depth+1);
	5675	}
	5676	else { /* MJD */
	5677	reginsert(pRExC_state, BRANCH, br, depth+1);
	5678	Set_Node_Length(br, paren != 0);
	5679	Set_Node_Offset_To_R(br-RExC_emit_start, parse_start-RExC_start);
	5680	}
	5681	have_branch = 1;
	5682	if (SIZE_ONLY)
	5683	RExC_extralen += 1; /* For BRANCHJ-BRANCH. */
	5684	}
	5685	else if (paren == ':') {
	5686	*flagp \|= flags&SIMPLE;
	5687	}
	5688	if (is_open) { /* Starts with OPEN. */
	5689	REGTAIL(pRExC_state, ret, br); /* OPEN -> first. */
	5690	}
	5691	else if (paren != '?') /* Not Conditional */
	5692	ret = br;
	5693	*flagp \|= flags & (SPSTART \| HASWIDTH \| POSTPONED);
	5694	lastbr = br;
	5695	while (*RExC_parse == '\|') {
	5696	if (!SIZE_ONLY && RExC_extralen) {
	5697	ender = reganode(pRExC_state, LONGJMP,0);
	5698	REGTAIL(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender); /* Append to the previous. */
	5699	}
	5700	if (SIZE_ONLY)
	5701	RExC_extralen += 2; /* Account for LONGJMP. */
	5702	nextchar(pRExC_state);
	5703	if (freeze_paren) {
	5704	if (RExC_npar > after_freeze)
	5705	after_freeze = RExC_npar;
	5706	RExC_npar = freeze_paren;
	5707	}
	5708	br = regbranch(pRExC_state, &flags, 0, depth+1);
	5709
	5710	if (br == NULL)
	5711	return(NULL);
	5712	REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */
	5713	lastbr = br;
	5714	*flagp \|= flags & (SPSTART \| HASWIDTH \| POSTPONED);
	5715	}
	5716
	5717	if (have_branch \|\| paren != ':') {
	5718	/* Make a closing node, and hook it on the end. */
	5719	switch (paren) {
	5720	case ':':
	5721	ender = reg_node(pRExC_state, TAIL);
	5722	break;
	5723	case 1:
	5724	ender = reganode(pRExC_state, CLOSE, parno);
	5725	if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
	5726	DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
	5727	"Setting close paren #%"IVdf" to %d\n",
	5728	(IV)parno, REG_NODE_NUM(ender)));
	5729	RExC_close_parens[parno-1]= ender;
	5730	if (RExC_nestroot == parno)
	5731	RExC_nestroot = 0;
	5732	}
	5733	Set_Node_Offset(ender,RExC_parse+1); /* MJD */
	5734	Set_Node_Length(ender,1); /* MJD */
	5735	break;
	5736	case '<':
	5737	case ',':
	5738	case '=':
	5739	case '!':
	5740	*flagp &= ~HASWIDTH;
	5741	/* FALL THROUGH */
	5742	case '>':
	5743	ender = reg_node(pRExC_state, SUCCEED);
	5744	break;
	5745	case 0:
	5746	ender = reg_node(pRExC_state, END);
	5747	if (!SIZE_ONLY) {
	5748	assert(!RExC_opend); /* there can only be one! */
	5749	RExC_opend = ender;
	5750	}
	5751	break;
	5752	}
	5753	REGTAIL(pRExC_state, lastbr, ender);
	5754
	5755	if (have_branch && !SIZE_ONLY) {
	5756	if (depth==1)
	5757	RExC_seen \|= REG_TOP_LEVEL_BRANCHES;
	5758
	5759	/* Hook the tails of the branches to the closing node. */
	5760	for (br = ret; br; br = regnext(br)) {
	5761	const U8 op = PL_regkind[OP(br)];
	5762	if (op == BRANCH) {
	5763	REGTAIL_STUDY(pRExC_state, NEXTOPER(br), ender);
	5764	}
	5765	else if (op == BRANCHJ) {
	5766	REGTAIL_STUDY(pRExC_state, NEXTOPER(NEXTOPER(br)), ender);
	5767	}
	5768	}
	5769	}
	5770	}
	5771
	5772	{
	5773	const char *p;
	5774	static const char parens[] = "=!<,>";
	5775
	5776	if (paren && (p = strchr(parens, paren))) {
	5777	U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
	5778	int flag = (p - parens) > 1;
	5779
	5780	if (paren == '>')
	5781	node = SUSPEND, flag = 0;
	5782	reginsert(pRExC_state, node,ret, depth+1);
	5783	Set_Node_Cur_Length(ret);
	5784	Set_Node_Offset(ret, parse_start + 1);
	5785	ret->flags = flag;
	5786	REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
	5787	}
	5788	}
	5789
	5790	/* Check for proper termination. */
	5791	if (paren) {
	5792	RExC_flags = oregflags;
	5793	if (RExC_parse >= RExC_end \|\| *nextchar(pRExC_state) != ')') {
	5794	RExC_parse = oregcomp_parse;
	5795	vFAIL("Unmatched (");
	5796	}
	5797	}
	5798	else if (!paren && RExC_parse < RExC_end) {
	5799	if (*RExC_parse == ')') {
	5800	RExC_parse++;
	5801	vFAIL("Unmatched )");
	5802	}
	5803	else
	5804	FAIL("Junk on end of regexp"); /* "Can't happen". */
	5805	/* NOTREACHED */
	5806	}
	5807	if (after_freeze)
	5808	RExC_npar = after_freeze;
	5809	return(ret);
	5810	}
	5811
	5812	/*
	5813	- regbranch - one alternative of an \| operator
	5814	*
	5815	* Implements the concatenation operator.
	5816	*/
	5817	STATIC regnode *
	5818	S_regbranch(pTHX_ RExC_state_t pRExC_state, I32 flagp, I32 first, U32 depth)
	5819	{
	5820	dVAR;
	5821	register regnode *ret;
	5822	register regnode *chain = NULL;
	5823	register regnode *latest;
	5824	I32 flags = 0, c = 0;
	5825	GET_RE_DEBUG_FLAGS_DECL;
	5826	DEBUG_PARSE("brnc");
	5827
	5828	if (first)
	5829	ret = NULL;
	5830	else {
	5831	if (!SIZE_ONLY && RExC_extralen)
	5832	ret = reganode(pRExC_state, BRANCHJ,0);
	5833	else {
	5834	ret = reg_node(pRExC_state, BRANCH);
	5835	Set_Node_Length(ret, 1);
	5836	}
	5837	}
	5838
	5839	if (!first && SIZE_ONLY)
	5840	RExC_extralen += 1; /* BRANCHJ */
	5841
	5842	flagp = WORST; / Tentatively. */
	5843
	5844	RExC_parse--;
	5845	nextchar(pRExC_state);
	5846	while (RExC_parse < RExC_end && RExC_parse != '\|' && RExC_parse != ')') {
	5847	flags &= ~TRYAGAIN;
	5848	latest = regpiece(pRExC_state, &flags,depth+1);
	5849	if (latest == NULL) {
	5850	if (flags & TRYAGAIN)
	5851	continue;
	5852	return(NULL);
	5853	}
	5854	else if (ret == NULL)
	5855	ret = latest;
	5856	*flagp \|= flags&(HASWIDTH\|POSTPONED);
	5857	if (chain == NULL) /* First piece. */
	5858	*flagp \|= flags&SPSTART;
	5859	else {
	5860	RExC_naughty++;
	5861	REGTAIL(pRExC_state, chain, latest);
	5862	}
	5863	chain = latest;
	5864	c++;
	5865	}
	5866	if (chain == NULL) { /* Loop ran zero times. */
	5867	chain = reg_node(pRExC_state, NOTHING);
	5868	if (ret == NULL)
	5869	ret = chain;
	5870	}
	5871	if (c == 1) {
	5872	*flagp \|= flags&SIMPLE;
	5873	}
	5874
	5875	return ret;
	5876	}
	5877
	5878	/*
	5879	- regpiece - something followed by possible [*+?]
	5880	*
	5881	* Note that the branching code sequences used for ? and the general cases
	5882	* of * and + are somewhat optimized: they use the same NOTHING node as
	5883	* both the endmarker for their branch list and the body of the last branch.
	5884	* It might seem that this node could be dispensed with entirely, but the
	5885	* endmarker role is not redundant.
	5886	*/
	5887	STATIC regnode *
	5888	S_regpiece(pTHX_ RExC_state_t pRExC_state, I32 flagp, U32 depth)
	5889	{
	5890	dVAR;
	5891	register regnode *ret;
	5892	register char op;
	5893	register char *next;
	5894	I32 flags;
	5895	const char * const origparse = RExC_parse;
	5896	I32 min;
	5897	I32 max = REG_INFTY;
	5898	char *parse_start;
	5899	const char *maxpos = NULL;
	5900	GET_RE_DEBUG_FLAGS_DECL;
	5901	DEBUG_PARSE("piec");
	5902
	5903	ret = regatom(pRExC_state, &flags,depth+1);
	5904	if (ret == NULL) {
	5905	if (flags & TRYAGAIN)
	5906	*flagp \|= TRYAGAIN;
	5907	return(NULL);
	5908	}
	5909
	5910	op = *RExC_parse;
	5911
	5912	if (op == '{' && regcurly(RExC_parse)) {
	5913	maxpos = NULL;
	5914	parse_start = RExC_parse; /* MJD */
	5915	next = RExC_parse + 1;
	5916	while (isDIGIT(next) \|\| next == ',') {
	5917	if (*next == ',') {
	5918	if (maxpos)
	5919	break;
	5920	else
	5921	maxpos = next;
	5922	}
	5923	next++;
	5924	}
	5925	if (next == '}') { / got one */
	5926	if (!maxpos)
	5927	maxpos = next;
	5928	RExC_parse++;
	5929	min = atoi(RExC_parse);
	5930	if (*maxpos == ',')
	5931	maxpos++;
	5932	else
	5933	maxpos = RExC_parse;
	5934	max = atoi(maxpos);
	5935	if (!max && *maxpos != '0')
	5936	max = REG_INFTY; /* meaning "infinity" */
	5937	else if (max >= REG_INFTY)
	5938	vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
	5939	RExC_parse = next;
	5940	nextchar(pRExC_state);
	5941
	5942	do_curly:
	5943	if ((flags&SIMPLE)) {
	5944	RExC_naughty += 2 + RExC_naughty / 2;
	5945	reginsert(pRExC_state, CURLY, ret, depth+1);
	5946	Set_Node_Offset(ret, parse_start+1); /* MJD */
	5947	Set_Node_Cur_Length(ret);
	5948	}
	5949	else {
	5950	regnode * const w = reg_node(pRExC_state, WHILEM);
	5951
	5952	w->flags = 0;
	5953	REGTAIL(pRExC_state, ret, w);
	5954	if (!SIZE_ONLY && RExC_extralen) {
	5955	reginsert(pRExC_state, LONGJMP,ret, depth+1);
	5956	reginsert(pRExC_state, NOTHING,ret, depth+1);
	5957	NEXT_OFF(ret) = 3; /* Go over LONGJMP. */
	5958	}
	5959	reginsert(pRExC_state, CURLYX,ret, depth+1);
	5960	/* MJD hk */
	5961	Set_Node_Offset(ret, parse_start+1);
	5962	Set_Node_Length(ret,
	5963	op == '{' ? (RExC_parse - parse_start) : 1);
	5964
	5965	if (!SIZE_ONLY && RExC_extralen)
	5966	NEXT_OFF(ret) = 3; /* Go over NOTHING to LONGJMP. */
	5967	REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
	5968	if (SIZE_ONLY)
	5969	RExC_whilem_seen++, RExC_extralen += 3;
	5970	RExC_naughty += 4 + RExC_naughty; /* compound interest */
	5971	}
	5972	ret->flags = 0;
	5973
	5974	if (min > 0)
	5975	*flagp = WORST;
	5976	if (max > 0)
	5977	*flagp \|= HASWIDTH;
	5978	if (max && max < min)
	5979	vFAIL("Can't do {n,m} with n > m");
	5980	if (!SIZE_ONLY) {
	5981	ARG1_SET(ret, (U16)min);
	5982	ARG2_SET(ret, (U16)max);
	5983	}
	5984
	5985	goto nest_check;
	5986	}
	5987	}
	5988
	5989	if (!ISMULT1(op)) {
	5990	*flagp = flags;
	5991	return(ret);
	5992	}
	5993
	5994	#if 0 /* Now runtime fix should be reliable. */
	5995
	5996	/* if this is reinstated, don't forget to put this back into perldiag:
	5997
	5998	=item Regexp *+ operand could be empty at {#} in regex m/%s/
	5999
	6000	(F) The part of the regexp subject to either the * or + quantifier
	6001	could match an empty string. The {#} shows in the regular
	6002	expression about where the problem was discovered.
	6003
	6004	*/
	6005
	6006	if (!(flags&HASWIDTH) && op != '?')
	6007	vFAIL("Regexp *+ operand could be empty");
	6008	#endif
	6009
	6010	parse_start = RExC_parse;
	6011	nextchar(pRExC_state);
	6012
	6013	*flagp = (op != '+') ? (WORST\|SPSTART\|HASWIDTH) : (WORST\|HASWIDTH);
	6014
	6015	if (op == '*' && (flags&SIMPLE)) {
	6016	reginsert(pRExC_state, STAR, ret, depth+1);
	6017	ret->flags = 0;
	6018	RExC_naughty += 4;
	6019	}
	6020	else if (op == '*') {
	6021	min = 0;
	6022	goto do_curly;
	6023	}
	6024	else if (op == '+' && (flags&SIMPLE)) {
	6025	reginsert(pRExC_state, PLUS, ret, depth+1);
	6026	ret->flags = 0;
	6027	RExC_naughty += 3;
	6028	}
	6029	else if (op == '+') {
	6030	min = 1;
	6031	goto do_curly;
	6032	}
	6033	else if (op == '?') {
	6034	min = 0; max = 1;
	6035	goto do_curly;
	6036	}
	6037	nest_check:
	6038	if (!SIZE_ONLY && !(flags&(HASWIDTH\|POSTPONED)) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
	6039	vWARN3(RExC_parse,
	6040	"%.*s matches null string many times",
	6041	(int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
	6042	origparse);
	6043	}
	6044
	6045	if (RExC_parse < RExC_end && *RExC_parse == '?') {
	6046	nextchar(pRExC_state);
	6047	reginsert(pRExC_state, MINMOD, ret, depth+1);
	6048	REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
	6049	}
	6050	#ifndef REG_ALLOW_MINMOD_SUSPEND
	6051	else
	6052	#endif
	6053	if (RExC_parse < RExC_end && *RExC_parse == '+') {
	6054	regnode *ender;
	6055	nextchar(pRExC_state);
	6056	ender = reg_node(pRExC_state, SUCCEED);
	6057	REGTAIL(pRExC_state, ret, ender);
	6058	reginsert(pRExC_state, SUSPEND, ret, depth+1);
	6059	ret->flags = 0;
	6060	ender = reg_node(pRExC_state, TAIL);
	6061	REGTAIL(pRExC_state, ret, ender);
	6062	/ret= ender;/
	6063	}
	6064
	6065	if (RExC_parse < RExC_end && ISMULT2(RExC_parse)) {
	6066	RExC_parse++;
	6067	vFAIL("Nested quantifiers");
	6068	}
	6069
	6070	return(ret);
	6071	}
	6072
	6073
	6074	/* reg_namedseq(pRExC_state,UVp)
	6075
	6076	This is expected to be called by a parser routine that has
	6077	recognized'\N' and needs to handle the rest. RExC_parse is
	6078	expected to point at the first char following the N at the time
	6079	of the call.
	6080
	6081	If valuep is non-null then it is assumed that we are parsing inside
	6082	of a charclass definition and the first codepoint in the resolved
	6083	string is returned via *valuep and the routine will return NULL.
	6084	In this mode if a multichar string is returned from the charnames
	6085	handler a warning will be issued, and only the first char in the
	6086	sequence will be examined. If the string returned is zero length
	6087	then the value of *valuep is undefined and NON-NULL will
	6088	be returned to indicate failure. (This will NOT be a valid pointer
	6089	to a regnode.)
	6090
	6091	If value is null then it is assumed that we are parsing normal text
	6092	and inserts a new EXACT node into the program containing the resolved
	6093	string and returns a pointer to the new node. If the string is
	6094	zerolength a NOTHING node is emitted.
	6095
	6096	On success RExC_parse is set to the char following the endbrace.
	6097	Parsing failures will generate a fatal errorvia vFAIL(...)
	6098
	6099	NOTE: We cache all results from the charnames handler locally in
	6100	the RExC_charnames hash (created on first use) to prevent a charnames
	6101	handler from playing silly-buggers and returning a short string and
	6102	then a long string for a given pattern. Since the regexp program
	6103	size is calculated during an initial parse this would result
	6104	in a buffer overrun so we cache to prevent the charname result from
	6105	changing during the course of the parse.
	6106
	6107	*/
	6108	STATIC regnode *
	6109	S_reg_namedseq(pTHX_ RExC_state_t pRExC_state, UV valuep)
	6110	{
	6111	char * name; /* start of the content of the name */
	6112	char * endbrace; /* endbrace following the name */
	6113	SV *sv_str = NULL;
	6114	SV *sv_name = NULL;
	6115	STRLEN len; /* this has various purposes throughout the code */
	6116	bool cached = 0; /* if this is true then we shouldn't refcount dev sv_str */
	6117	regnode *ret = NULL;
	6118
	6119	if (*RExC_parse != '{') {
	6120	vFAIL("Missing braces on \\N{}");
	6121	}
	6122	name = RExC_parse+1;
	6123	endbrace = strchr(RExC_parse, '}');
	6124	if ( ! endbrace ) {
	6125	RExC_parse++;
	6126	vFAIL("Missing right brace on \\N{}");
	6127	}
	6128	RExC_parse = endbrace + 1;
	6129
	6130
	6131	/* RExC_parse points at the beginning brace,
	6132	endbrace points at the last */
	6133	if ( name[0]=='U' && name[1]=='+' ) {
	6134	/* its a "unicode hex" notation {U+89AB} */
	6135	I32 fl = PERL_SCAN_ALLOW_UNDERSCORES
	6136	\| PERL_SCAN_DISALLOW_PREFIX
	6137	\| (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
	6138	UV cp;
	6139	len = (STRLEN)(endbrace - name - 2);
	6140	cp = grok_hex(name + 2, &len, &fl, NULL);
	6141	if ( len != (STRLEN)(endbrace - name - 2) ) {
	6142	cp = 0xFFFD;
	6143	}
	6144	if (cp > 0xff)
	6145	RExC_utf8 = 1;
	6146	if ( valuep ) {
	6147	*valuep = cp;
	6148	return NULL;
	6149	}
	6150	sv_str= Perl_newSVpvf_nocontext("%c",(int)cp);
	6151	} else {
	6152	/* fetch the charnames handler for this scope */
	6153	HV * const table = GvHV(PL_hintgv);
	6154	SV **cvp= table ?
	6155	hv_fetchs(table, "charnames", FALSE) :
	6156	NULL;
	6157	SV cv= cvp ? cvp : NULL;
	6158	HE *he_str;
	6159	int count;
	6160	/* create an SV with the name as argument */
	6161	sv_name = newSVpvn(name, endbrace - name);
	6162
	6163	if (!table \|\| !(PL_hints & HINT_LOCALIZE_HH)) {
	6164	vFAIL2("Constant(\\N{%s}) unknown: "
	6165	"(possibly a missing \"use charnames ...\")",
	6166	SvPVX(sv_name));
	6167	}
	6168	if (!cvp \|\| !SvOK(cvp)) { / when $^H{charnames} = undef; */
	6169	vFAIL2("Constant(\\N{%s}): "
	6170	"$^H{charnames} is not defined",SvPVX(sv_name));
	6171	}
	6172
	6173
	6174
	6175	if (!RExC_charnames) {
	6176	/* make sure our cache is allocated */
	6177	RExC_charnames = newHV();
	6178	sv_2mortal((SV*)RExC_charnames);
	6179	}
	6180	/* see if we have looked this one up before */
	6181	he_str = hv_fetch_ent( RExC_charnames, sv_name, 0, 0 );
	6182	if ( he_str ) {
	6183	sv_str = HeVAL(he_str);
	6184	cached = 1;
	6185	} else {
	6186	dSP ;
	6187
	6188	ENTER ;
	6189	SAVETMPS ;
	6190	PUSHMARK(SP) ;
	6191
	6192	XPUSHs(sv_name);
	6193
	6194	PUTBACK ;
	6195
	6196	count= call_sv(cv, G_SCALAR);
	6197
	6198	if (count == 1) { /* XXXX is this right? dmq */
	6199	sv_str = POPs;
	6200	SvREFCNT_inc_simple_void(sv_str);
	6201	}
	6202
	6203	SPAGAIN ;
	6204	PUTBACK ;
	6205	FREETMPS ;
	6206	LEAVE ;
	6207
	6208	if ( !sv_str \|\| !SvOK(sv_str) ) {
	6209	vFAIL2("Constant(\\N{%s}): Call to &{$^H{charnames}} "
	6210	"did not return a defined value",SvPVX(sv_name));
	6211	}
	6212	if (hv_store_ent( RExC_charnames, sv_name, sv_str, 0))
	6213	cached = 1;
	6214	}
	6215	}
	6216	if (valuep) {
	6217	char *p = SvPV(sv_str, len);
	6218	if (len) {
	6219	STRLEN numlen = 1;
	6220	if ( SvUTF8(sv_str) ) {
	6221	valuep = utf8_to_uvchr((U8)p, &numlen);
	6222	if (*valuep > 0x7F)
	6223	RExC_utf8 = 1;
	6224	/* XXXX
	6225	We have to turn on utf8 for high bit chars otherwise
	6226	we get failures with
	6227
	6228	"ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
	6229	"SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
	6230
	6231	This is different from what \x{} would do with the same
	6232	codepoint, where the condition is > 0xFF.
	6233	- dmq
	6234	*/
	6235
	6236
	6237	} else {
	6238	valuep = (UV)p;
	6239	/* warn if we havent used the whole string? */
	6240	}
	6241	if (numlen<len && SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	6242	vWARN2(RExC_parse,
	6243	"Ignoring excess chars from \\N{%s} in character class",
	6244	SvPVX(sv_name)
	6245	);
	6246	}
	6247	} else if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	6248	vWARN2(RExC_parse,
	6249	"Ignoring zero length \\N{%s} in character class",
	6250	SvPVX(sv_name)
	6251	);
	6252	}
	6253	if (sv_name)
	6254	SvREFCNT_dec(sv_name);
	6255	if (!cached)
	6256	SvREFCNT_dec(sv_str);
	6257	return len ? NULL : (regnode *)&len;
	6258	} else if(SvCUR(sv_str)) {
	6259
	6260	char *s;
	6261	char p, pend;
	6262	STRLEN charlen = 1;
	6263	#ifdef DEBUGGING
	6264	char * parse_start = name-3; /* needed for the offsets */
	6265	#endif
	6266	GET_RE_DEBUG_FLAGS_DECL; /* needed for the offsets */
	6267
	6268	ret = reg_node(pRExC_state,
	6269	(U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
	6270	s= STRING(ret);
	6271
	6272	if ( RExC_utf8 && !SvUTF8(sv_str) ) {
	6273	sv_utf8_upgrade(sv_str);
	6274	} else if ( !RExC_utf8 && SvUTF8(sv_str) ) {
	6275	RExC_utf8= 1;
	6276	}
	6277
	6278	p = SvPV(sv_str, len);
	6279	pend = p + len;
	6280	/* len is the length written, charlen is the size the char read */
	6281	for ( len = 0; p < pend; p += charlen ) {
	6282	if (UTF) {
	6283	UV uvc = utf8_to_uvchr((U8*)p, &charlen);
	6284	if (FOLD) {
	6285	STRLEN foldlen,numlen;
	6286	U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
	6287	uvc = toFOLD_uni(uvc, tmpbuf, &foldlen);
	6288	/* Emit all the Unicode characters. */
	6289
	6290	for (foldbuf = tmpbuf;
	6291	foldlen;
	6292	foldlen -= numlen)
	6293	{
	6294	uvc = utf8_to_uvchr(foldbuf, &numlen);
	6295	if (numlen > 0) {
	6296	const STRLEN unilen = reguni(pRExC_state, uvc, s);
	6297	s += unilen;
	6298	len += unilen;
	6299	/* In EBCDIC the numlen
	6300	* and unilen can differ. */
	6301	foldbuf += numlen;
	6302	if (numlen >= foldlen)
	6303	break;
	6304	}
	6305	else
	6306	break; /* "Can't happen." */
	6307	}
	6308	} else {
	6309	const STRLEN unilen = reguni(pRExC_state, uvc, s);
	6310	if (unilen > 0) {
	6311	s += unilen;
	6312	len += unilen;
	6313	}
	6314	}
	6315	} else {
	6316	len++;
	6317	REGC(*p, s++);
	6318	}
	6319	}
	6320	if (SIZE_ONLY) {
	6321	RExC_size += STR_SZ(len);
	6322	} else {
	6323	STR_LEN(ret) = len;
	6324	RExC_emit += STR_SZ(len);
	6325	}
	6326	Set_Node_Cur_Length(ret); /* MJD */
	6327	RExC_parse--;
	6328	nextchar(pRExC_state);
	6329	} else {
	6330	ret = reg_node(pRExC_state,NOTHING);
	6331	}
	6332	if (!cached) {
	6333	SvREFCNT_dec(sv_str);
	6334	}
	6335	if (sv_name) {
	6336	SvREFCNT_dec(sv_name);
	6337	}
	6338	return ret;
	6339
	6340	}
	6341
	6342
	6343	/*
	6344	* reg_recode
	6345	*
	6346	* It returns the code point in utf8 for the value in *encp.
	6347	* value: a code value in the source encoding
	6348	* encp: a pointer to an Encode object
	6349	*
	6350	* If the result from Encode is not a single character,
	6351	* it returns U+FFFD (Replacement character) and sets *encp to NULL.
	6352	*/
	6353	STATIC UV
	6354	S_reg_recode(pTHX_ const char value, SV **encp)
	6355	{
	6356	STRLEN numlen = 1;
	6357	SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
	6358	const char * const s = encp && encp ? sv_recode_to_utf8(sv, encp)
	6359	: SvPVX(sv);
	6360	const STRLEN newlen = SvCUR(sv);
	6361	UV uv = UNICODE_REPLACEMENT;
	6362
	6363	if (newlen)
	6364	uv = SvUTF8(sv)
	6365	? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
	6366	: (U8)s;
	6367
	6368	if (!newlen \|\| numlen != newlen) {
	6369	uv = UNICODE_REPLACEMENT;
	6370	if (encp)
	6371	*encp = NULL;
	6372	}
	6373	return uv;
	6374	}
	6375
	6376
	6377	/*
	6378	- regatom - the lowest level
	6379
	6380	Try to identify anything special at the start of the pattern. If there
	6381	is, then handle it as required. This may involve generating a single regop,
	6382	such as for an assertion; or it may involve recursing, such as to
	6383	handle a () structure.
	6384
	6385	If the string doesn't start with something special then we gobble up
	6386	as much literal text as we can.
	6387
	6388	Once we have been able to handle whatever type of thing started the
	6389	sequence, we return.
	6390
	6391	Note: we have to be careful with escapes, as they can be both literal
	6392	and special, and in the case of \10 and friends can either, depending
	6393	on context. Specifically there are two seperate switches for handling
	6394	escape sequences, with the one for handling literal escapes requiring
	6395	a dummy entry for all of the special escapes that are actually handled
	6396	by the other.
	6397	*/
	6398
	6399	STATIC regnode *
	6400	S_regatom(pTHX_ RExC_state_t pRExC_state, I32 flagp, U32 depth)
	6401	{
	6402	dVAR;
	6403	register regnode *ret = NULL;
	6404	I32 flags;
	6405	char *parse_start = RExC_parse;
	6406	GET_RE_DEBUG_FLAGS_DECL;
	6407	DEBUG_PARSE("atom");
	6408	flagp = WORST; / Tentatively. */
	6409
	6410
	6411	tryagain:
	6412	switch (*RExC_parse) {
	6413	case '^':
	6414	RExC_seen_zerolen++;
	6415	nextchar(pRExC_state);
	6416	if (RExC_flags & RXf_PMf_MULTILINE)
	6417	ret = reg_node(pRExC_state, MBOL);
	6418	else if (RExC_flags & RXf_PMf_SINGLELINE)
	6419	ret = reg_node(pRExC_state, SBOL);
	6420	else
	6421	ret = reg_node(pRExC_state, BOL);
	6422	Set_Node_Length(ret, 1); /* MJD */
	6423	break;
	6424	case '$':
	6425	nextchar(pRExC_state);
	6426	if (*RExC_parse)
	6427	RExC_seen_zerolen++;
	6428	if (RExC_flags & RXf_PMf_MULTILINE)
	6429	ret = reg_node(pRExC_state, MEOL);
	6430	else if (RExC_flags & RXf_PMf_SINGLELINE)
	6431	ret = reg_node(pRExC_state, SEOL);
	6432	else
	6433	ret = reg_node(pRExC_state, EOL);
	6434	Set_Node_Length(ret, 1); /* MJD */
	6435	break;
	6436	case '.':
	6437	nextchar(pRExC_state);
	6438	if (RExC_flags & RXf_PMf_SINGLELINE)
	6439	ret = reg_node(pRExC_state, SANY);
	6440	else
	6441	ret = reg_node(pRExC_state, REG_ANY);
	6442	*flagp \|= HASWIDTH\|SIMPLE;
	6443	RExC_naughty++;
	6444	Set_Node_Length(ret, 1); /* MJD */
	6445	break;
	6446	case '[':
	6447	{
	6448	char * const oregcomp_parse = ++RExC_parse;
	6449	ret = regclass(pRExC_state,depth+1);
	6450	if (*RExC_parse != ']') {
	6451	RExC_parse = oregcomp_parse;
	6452	vFAIL("Unmatched [");
	6453	}
	6454	nextchar(pRExC_state);
	6455	*flagp \|= HASWIDTH\|SIMPLE;
	6456	Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
	6457	break;
	6458	}
	6459	case '(':
	6460	nextchar(pRExC_state);
	6461	ret = reg(pRExC_state, 1, &flags,depth+1);
	6462	if (ret == NULL) {
	6463	if (flags & TRYAGAIN) {
	6464	if (RExC_parse == RExC_end) {
	6465	/* Make parent create an empty node if needed. */
	6466	*flagp \|= TRYAGAIN;
	6467	return(NULL);
	6468	}
	6469	goto tryagain;
	6470	}
	6471	return(NULL);
	6472	}
	6473	*flagp \|= flags&(HASWIDTH\|SPSTART\|SIMPLE\|POSTPONED);
	6474	break;
	6475	case '\|':
	6476	case ')':
	6477	if (flags & TRYAGAIN) {
	6478	*flagp \|= TRYAGAIN;
	6479	return NULL;
	6480	}
	6481	vFAIL("Internal urp");
	6482	/* Supposed to be caught earlier. */
	6483	break;
	6484	case '{':
	6485	if (!regcurly(RExC_parse)) {
	6486	RExC_parse++;
	6487	goto defchar;
	6488	}
	6489	/* FALL THROUGH */
	6490	case '?':
	6491	case '+':
	6492	case '*':
	6493	RExC_parse++;
	6494	vFAIL("Quantifier follows nothing");
	6495	break;
	6496	case '\\':
	6497	/* Special Escapes
	6498
	6499	This switch handles escape sequences that resolve to some kind
	6500	of special regop and not to literal text. Escape sequnces that
	6501	resolve to literal text are handled below in the switch marked
	6502	"Literal Escapes".
	6503
	6504	Every entry in this switch must have a corresponding entry
	6505	in the literal escape switch. However, the opposite is not
	6506	required, as the default for this switch is to jump to the
	6507	literal text handling code.
	6508	*/
	6509	switch (*++RExC_parse) {
	6510	/* Special Escapes */
	6511	case 'A':
	6512	RExC_seen_zerolen++;
	6513	ret = reg_node(pRExC_state, SBOL);
	6514	*flagp \|= SIMPLE;
	6515	goto finish_meta_pat;
	6516	case 'G':
	6517	ret = reg_node(pRExC_state, GPOS);
	6518	RExC_seen \|= REG_SEEN_GPOS;
	6519	*flagp \|= SIMPLE;
	6520	goto finish_meta_pat;
	6521	case 'K':
	6522	RExC_seen_zerolen++;
	6523	ret = reg_node(pRExC_state, KEEPS);
	6524	*flagp \|= SIMPLE;
	6525	goto finish_meta_pat;
	6526	case 'Z':
	6527	ret = reg_node(pRExC_state, SEOL);
	6528	*flagp \|= SIMPLE;
	6529	RExC_seen_zerolen++; /* Do not optimize RE away */
	6530	goto finish_meta_pat;
	6531	case 'z':
	6532	ret = reg_node(pRExC_state, EOS);
	6533	*flagp \|= SIMPLE;
	6534	RExC_seen_zerolen++; /* Do not optimize RE away */
	6535	goto finish_meta_pat;
	6536	case 'C':
	6537	ret = reg_node(pRExC_state, CANY);
	6538	RExC_seen \|= REG_SEEN_CANY;
	6539	*flagp \|= HASWIDTH\|SIMPLE;
	6540	goto finish_meta_pat;
	6541	case 'X':
	6542	ret = reg_node(pRExC_state, CLUMP);
	6543	*flagp \|= HASWIDTH;
	6544	goto finish_meta_pat;
	6545	case 'w':
	6546	ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML : ALNUM));
	6547	*flagp \|= HASWIDTH\|SIMPLE;
	6548	goto finish_meta_pat;
	6549	case 'W':
	6550	ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML : NALNUM));
	6551	*flagp \|= HASWIDTH\|SIMPLE;
	6552	goto finish_meta_pat;
	6553	case 'b':
	6554	RExC_seen_zerolen++;
	6555	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	6556	ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL : BOUND));
	6557	*flagp \|= SIMPLE;
	6558	goto finish_meta_pat;
	6559	case 'B':
	6560	RExC_seen_zerolen++;
	6561	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	6562	ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL : NBOUND));
	6563	*flagp \|= SIMPLE;
	6564	goto finish_meta_pat;
	6565	case 's':
	6566	ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL : SPACE));
	6567	*flagp \|= HASWIDTH\|SIMPLE;
	6568	goto finish_meta_pat;
	6569	case 'S':
	6570	ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL : NSPACE));
	6571	*flagp \|= HASWIDTH\|SIMPLE;
	6572	goto finish_meta_pat;
	6573	case 'd':
	6574	ret = reg_node(pRExC_state, DIGIT);
	6575	*flagp \|= HASWIDTH\|SIMPLE;
	6576	goto finish_meta_pat;
	6577	case 'D':
	6578	ret = reg_node(pRExC_state, NDIGIT);
	6579	*flagp \|= HASWIDTH\|SIMPLE;
	6580	goto finish_meta_pat;
	6581	case 'v':
	6582	ret = reganode(pRExC_state, PRUNE, 0);
	6583	ret->flags = 1;
	6584	*flagp \|= SIMPLE;
	6585	goto finish_meta_pat;
	6586	case 'V':
	6587	ret = reganode(pRExC_state, SKIP, 0);
	6588	ret->flags = 1;
	6589	*flagp \|= SIMPLE;
	6590	finish_meta_pat:
	6591	nextchar(pRExC_state);
	6592	Set_Node_Length(ret, 2); /* MJD */
	6593	break;
	6594	case 'p':
	6595	case 'P':
	6596	{
	6597	char* const oldregxend = RExC_end;
	6598	#ifdef DEBUGGING
	6599	char* parse_start = RExC_parse - 2;
	6600	#endif
	6601
	6602	if (RExC_parse[1] == '{') {
	6603	/* a lovely hack--pretend we saw [\pX] instead */
	6604	RExC_end = strchr(RExC_parse, '}');
	6605	if (!RExC_end) {
	6606	const U8 c = (U8)*RExC_parse;
	6607	RExC_parse += 2;
	6608	RExC_end = oldregxend;
	6609	vFAIL2("Missing right brace on \\%c{}", c);
	6610	}
	6611	RExC_end++;
	6612	}
	6613	else {
	6614	RExC_end = RExC_parse + 2;
	6615	if (RExC_end > oldregxend)
	6616	RExC_end = oldregxend;
	6617	}
	6618	RExC_parse--;
	6619
	6620	ret = regclass(pRExC_state,depth+1);
	6621
	6622	RExC_end = oldregxend;
	6623	RExC_parse--;
	6624
	6625	Set_Node_Offset(ret, parse_start + 2);
	6626	Set_Node_Cur_Length(ret);
	6627	nextchar(pRExC_state);
	6628	*flagp \|= HASWIDTH\|SIMPLE;
	6629	}
	6630	break;
	6631	case 'N':
	6632	/* Handle \N{NAME} here and not below because it can be
	6633	multicharacter. join_exact() will join them up later on.
	6634	Also this makes sure that things like /\N{BLAH}+/ and
	6635	\N{BLAH} being multi char Just Happen. dmq*/
	6636	++RExC_parse;
	6637	ret= reg_namedseq(pRExC_state, NULL);
	6638	break;
	6639	case 'k': /* Handle \k<NAME> and \k'NAME' */
	6640	parse_named_seq:
	6641	{
	6642	char ch= RExC_parse[1];
	6643	if (ch != '<' && ch != '\'' && ch != '{') {
	6644	RExC_parse++;
	6645	vFAIL2("Sequence %.2s... not terminated",parse_start);
	6646	} else {
	6647	/* this pretty much dupes the code for (?P=...) in reg(), if
	6648	you change this make sure you change that */
	6649	char* name_start = (RExC_parse += 2);
	6650	U32 num = 0;
	6651	SV *sv_dat = reg_scan_name(pRExC_state,
	6652	SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
	6653	ch= (ch == '<') ? '>' : (ch == '{') ? '}' : '\'';
	6654	if (RExC_parse == name_start \|\| *RExC_parse != ch)
	6655	vFAIL2("Sequence %.3s... not terminated",parse_start);
	6656
	6657	if (!SIZE_ONLY) {
	6658	num = add_data( pRExC_state, 1, "S" );
	6659	RExC_rxi->data->data[num]=(void*)sv_dat;
	6660	SvREFCNT_inc(sv_dat);
	6661	}
	6662
	6663	RExC_sawback = 1;
	6664	ret = reganode(pRExC_state,
	6665	(U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
	6666	num);
	6667	*flagp \|= HASWIDTH;
	6668
	6669	/* override incorrect value set in reganode MJD */
	6670	Set_Node_Offset(ret, parse_start+1);
	6671	Set_Node_Cur_Length(ret); /* MJD */
	6672	nextchar(pRExC_state);
	6673
	6674	}
	6675	break;
	6676	}
	6677	case 'g':
	6678	case '1': case '2': case '3': case '4':
	6679	case '5': case '6': case '7': case '8': case '9':
	6680	{
	6681	I32 num;
	6682	bool isg = *RExC_parse == 'g';
	6683	bool isrel = 0;
	6684	bool hasbrace = 0;
	6685	if (isg) {
	6686	RExC_parse++;
	6687	if (*RExC_parse == '{') {
	6688	RExC_parse++;
	6689	hasbrace = 1;
	6690	}
	6691	if (*RExC_parse == '-') {
	6692	RExC_parse++;
	6693	isrel = 1;
	6694	}
	6695	if (hasbrace && !isDIGIT(*RExC_parse)) {
	6696	if (isrel) RExC_parse--;
	6697	RExC_parse -= 2;
	6698	goto parse_named_seq;
	6699	} }
	6700	num = atoi(RExC_parse);
	6701	if (isrel) {
	6702	num = RExC_npar - num;
	6703	if (num < 1)
	6704	vFAIL("Reference to nonexistent or unclosed group");
	6705	}
	6706	if (!isg && num > 9 && num >= RExC_npar)
	6707	goto defchar;
	6708	else {
	6709	char * const parse_start = RExC_parse - 1; /* MJD */
	6710	while (isDIGIT(*RExC_parse))
	6711	RExC_parse++;
	6712	if (parse_start == RExC_parse - 1)
	6713	vFAIL("Unterminated \\g... pattern");
	6714	if (hasbrace) {
	6715	if (*RExC_parse != '}')
	6716	vFAIL("Unterminated \\g{...} pattern");
	6717	RExC_parse++;
	6718	}
	6719	if (!SIZE_ONLY) {
	6720	if (num > (I32)RExC_rx->nparens)
	6721	vFAIL("Reference to nonexistent group");
	6722	}
	6723	RExC_sawback = 1;
	6724	ret = reganode(pRExC_state,
	6725	(U8)(FOLD ? (LOC ? REFFL : REFF) : REF),
	6726	num);
	6727	*flagp \|= HASWIDTH;
	6728
	6729	/* override incorrect value set in reganode MJD */
	6730	Set_Node_Offset(ret, parse_start+1);
	6731	Set_Node_Cur_Length(ret); /* MJD */
	6732	RExC_parse--;
	6733	nextchar(pRExC_state);
	6734	}
	6735	}
	6736	break;
	6737	case '\0':
	6738	if (RExC_parse >= RExC_end)
	6739	FAIL("Trailing \\");
	6740	/* FALL THROUGH */
	6741	default:
	6742	/* Do not generate "unrecognized" warnings here, we fall
	6743	back into the quick-grab loop below */
	6744	parse_start--;
	6745	goto defchar;
	6746	}
	6747	break;
	6748
	6749	case '#':
	6750	if (RExC_flags & RXf_PMf_EXTENDED) {
	6751	if ( reg_skipcomment( pRExC_state ) )
	6752	goto tryagain;
	6753	}
	6754	/* FALL THROUGH */
	6755
	6756	default: {
	6757	register STRLEN len;
	6758	register UV ender;
	6759	register char *p;
	6760	char *s;
	6761	STRLEN foldlen;
	6762	U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
	6763
	6764	parse_start = RExC_parse - 1;
	6765
	6766	RExC_parse++;
	6767
	6768	defchar:
	6769	ender = 0;
	6770	ret = reg_node(pRExC_state,
	6771	(U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
	6772	s = STRING(ret);
	6773	for (len = 0, p = RExC_parse - 1;
	6774	len < 127 && p < RExC_end;
	6775	len++)
	6776	{
	6777	char * const oldp = p;
	6778
	6779	if (RExC_flags & RXf_PMf_EXTENDED)
	6780	p = regwhite( pRExC_state, p );
	6781	switch (*p) {
	6782	case '^':
	6783	case '$':
	6784	case '.':
	6785	case '[':
	6786	case '(':
	6787	case ')':
	6788	case '\|':
	6789	goto loopdone;
	6790	case '\\':
	6791	/* Literal Escapes Switch
	6792
	6793	This switch is meant to handle escape sequences that
	6794	resolve to a literal character.
	6795
	6796	Every escape sequence that represents something
	6797	else, like an assertion or a char class, is handled
	6798	in the switch marked 'Special Escapes' above in this
	6799	routine, but also has an entry here as anything that
	6800	isn't explicitly mentioned here will be treated as
	6801	an unescaped equivalent literal.
	6802	*/
	6803
	6804	switch (*++p) {
	6805	/* These are all the special escapes. */
	6806	case 'A': /* Start assertion */
	6807	case 'b': case 'B': /* Word-boundary assertion*/
	6808	case 'C': /* Single char !DANGEROUS! */
	6809	case 'd': case 'D': /* digit class */
	6810	case 'g': case 'G': /* generic-backref, pos assertion */
	6811	case 'k': case 'K': /* named backref, keep marker */
	6812	case 'N': /* named char sequence */
	6813	case 'p': case 'P': /* unicode property */
	6814	case 's': case 'S': /* space class */
	6815	case 'v': case 'V': /* (PRUNE) and (SKIP) */
	6816	case 'w': case 'W': /* word class */
	6817	case 'X': /* eXtended Unicode "combining character sequence" */
	6818	case 'z': case 'Z': /* End of line/string assertion */
	6819	--p;
	6820	goto loopdone;
	6821
	6822	/* Anything after here is an escape that resolves to a
	6823	literal. (Except digits, which may or may not)
	6824	*/
	6825	case 'n':
	6826	ender = '\n';
	6827	p++;
	6828	break;
	6829	case 'r':
	6830	ender = '\r';
	6831	p++;
	6832	break;
	6833	case 't':
	6834	ender = '\t';
	6835	p++;
	6836	break;
	6837	case 'f':
	6838	ender = '\f';
	6839	p++;
	6840	break;
	6841	case 'e':
	6842	ender = ASCII_TO_NATIVE('\033');
	6843	p++;
	6844	break;
	6845	case 'a':
	6846	ender = ASCII_TO_NATIVE('\007');
	6847	p++;
	6848	break;
	6849	case 'x':
	6850	if (*++p == '{') {
	6851	char* const e = strchr(p, '}');
	6852
	6853	if (!e) {
	6854	RExC_parse = p + 1;
	6855	vFAIL("Missing right brace on \\x{}");
	6856	}
	6857	else {
	6858	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	6859	\| PERL_SCAN_DISALLOW_PREFIX;
	6860	STRLEN numlen = e - p - 1;
	6861	ender = grok_hex(p + 1, &numlen, &flags, NULL);
	6862	if (ender > 0xff)
	6863	RExC_utf8 = 1;
	6864	p = e + 1;
	6865	}
	6866	}
	6867	else {
	6868	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	6869	STRLEN numlen = 2;
	6870	ender = grok_hex(p, &numlen, &flags, NULL);
	6871	p += numlen;
	6872	}
	6873	if (PL_encoding && ender < 0x100)
	6874	goto recode_encoding;
	6875	break;
	6876	case 'c':
	6877	p++;
	6878	ender = UCHARAT(p++);
	6879	ender = toCTRL(ender);
	6880	break;
	6881	case '0': case '1': case '2': case '3':case '4':
	6882	case '5': case '6': case '7': case '8':case '9':
	6883	if (*p == '0' \|\|
	6884	(isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
	6885	I32 flags = 0;
	6886	STRLEN numlen = 3;
	6887	ender = grok_oct(p, &numlen, &flags, NULL);
	6888	p += numlen;
	6889	}
	6890	else {
	6891	--p;
	6892	goto loopdone;
	6893	}
	6894	if (PL_encoding && ender < 0x100)
	6895	goto recode_encoding;
	6896	break;
	6897	recode_encoding:
	6898	{
	6899	SV* enc = PL_encoding;
	6900	ender = reg_recode((const char)(U8)ender, &enc);
	6901	if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
	6902	vWARN(p, "Invalid escape in the specified encoding");
	6903	RExC_utf8 = 1;
	6904	}
	6905	break;
	6906	case '\0':
	6907	if (p >= RExC_end)
	6908	FAIL("Trailing \\");
	6909	/* FALL THROUGH */
	6910	default:
	6911	if (!SIZE_ONLY&& isALPHA(*p) && ckWARN(WARN_REGEXP))
	6912	vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
	6913	goto normal_default;
	6914	}
	6915	break;
	6916	default:
	6917	normal_default:
	6918	if (UTF8_IS_START(*p) && UTF) {
	6919	STRLEN numlen;
	6920	ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
	6921	&numlen, UTF8_ALLOW_DEFAULT);
	6922	p += numlen;
	6923	}
	6924	else
	6925	ender = *p++;
	6926	break;
	6927	}
	6928	if ( RExC_flags & RXf_PMf_EXTENDED)
	6929	p = regwhite( pRExC_state, p );
	6930	if (UTF && FOLD) {
	6931	/* Prime the casefolded buffer. */
	6932	ender = toFOLD_uni(ender, tmpbuf, &foldlen);
	6933	}
	6934	if (p < RExC_end && ISMULT2(p)) { /* Back off on ?+. /
	6935	if (len)
	6936	p = oldp;
	6937	else if (UTF) {
	6938	if (FOLD) {
	6939	/* Emit all the Unicode characters. */
	6940	STRLEN numlen;
	6941	for (foldbuf = tmpbuf;
	6942	foldlen;
	6943	foldlen -= numlen) {
	6944	ender = utf8_to_uvchr(foldbuf, &numlen);
	6945	if (numlen > 0) {
	6946	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6947	s += unilen;
	6948	len += unilen;
	6949	/* In EBCDIC the numlen
	6950	* and unilen can differ. */
	6951	foldbuf += numlen;
	6952	if (numlen >= foldlen)
	6953	break;
	6954	}
	6955	else
	6956	break; /* "Can't happen." */
	6957	}
	6958	}
	6959	else {
	6960	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6961	if (unilen > 0) {
	6962	s += unilen;
	6963	len += unilen;
	6964	}
	6965	}
	6966	}
	6967	else {
	6968	len++;
	6969	REGC((char)ender, s++);
	6970	}
	6971	break;
	6972	}
	6973	if (UTF) {
	6974	if (FOLD) {
	6975	/* Emit all the Unicode characters. */
	6976	STRLEN numlen;
	6977	for (foldbuf = tmpbuf;
	6978	foldlen;
	6979	foldlen -= numlen) {
	6980	ender = utf8_to_uvchr(foldbuf, &numlen);
	6981	if (numlen > 0) {
	6982	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6983	len += unilen;
	6984	s += unilen;
	6985	/* In EBCDIC the numlen
	6986	* and unilen can differ. */
	6987	foldbuf += numlen;
	6988	if (numlen >= foldlen)
	6989	break;
	6990	}
	6991	else
	6992	break;
	6993	}
	6994	}
	6995	else {
	6996	const STRLEN unilen = reguni(pRExC_state, ender, s);
	6997	if (unilen > 0) {
	6998	s += unilen;
	6999	len += unilen;
	7000	}
	7001	}
	7002	len--;
	7003	}
	7004	else
	7005	REGC((char)ender, s++);
	7006	}
	7007	loopdone:
	7008	RExC_parse = p - 1;
	7009	Set_Node_Cur_Length(ret); /* MJD */
	7010	nextchar(pRExC_state);
	7011	{
	7012	/* len is STRLEN which is unsigned, need to copy to signed */
	7013	IV iv = len;
	7014	if (iv < 0)
	7015	vFAIL("Internal disaster");
	7016	}
	7017	if (len > 0)
	7018	*flagp \|= HASWIDTH;
	7019	if (len == 1 && UNI_IS_INVARIANT(ender))
	7020	*flagp \|= SIMPLE;
	7021
	7022	if (SIZE_ONLY)
	7023	RExC_size += STR_SZ(len);
	7024	else {
	7025	STR_LEN(ret) = len;
	7026	RExC_emit += STR_SZ(len);
	7027	}
	7028	}
	7029	break;
	7030	}
	7031
	7032	return(ret);
	7033	}
	7034
	7035	STATIC char *
	7036	S_regwhite( RExC_state_t pRExC_state, char p )
	7037	{
	7038	const char *e = RExC_end;
	7039	while (p < e) {
	7040	if (isSPACE(*p))
	7041	++p;
	7042	else if (*p == '#') {
	7043	bool ended = 0;
	7044	do {
	7045	if (*p++ == '\n') {
	7046	ended = 1;
	7047	break;
	7048	}
	7049	} while (p < e);
	7050	if (!ended)
	7051	RExC_seen \|= REG_SEEN_RUN_ON_COMMENT;
	7052	}
	7053	else
	7054	break;
	7055	}
	7056	return p;
	7057	}
	7058
	7059	/* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
	7060	Character classes ([:foo:]) can also be negated ([:^foo:]).
	7061	Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
	7062	Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
	7063	but trigger failures because they are currently unimplemented. */
	7064
	7065	#define POSIXCC_DONE(c) ((c) == ':')
	7066	#define POSIXCC_NOTYET(c) ((c) == '=' \|\| (c) == '.')
	7067	#define POSIXCC(c) (POSIXCC_DONE(c) \|\| POSIXCC_NOTYET(c))
	7068
	7069	STATIC I32
	7070	S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
	7071	{
	7072	dVAR;
	7073	I32 namedclass = OOB_NAMEDCLASS;
	7074
	7075	if (value == '[' && RExC_parse + 1 < RExC_end &&
	7076	/* I smell either [: or [= or [. -- POSIX has been here, right? */
	7077	POSIXCC(UCHARAT(RExC_parse))) {
	7078	const char c = UCHARAT(RExC_parse);
	7079	char* const s = RExC_parse++;
	7080
	7081	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
	7082	RExC_parse++;
	7083	if (RExC_parse == RExC_end)
	7084	/* Grandfather lone [:, [=, [. */
	7085	RExC_parse = s;
	7086	else {
	7087	const char* const t = RExC_parse++; /* skip over the c */
	7088	assert(*t == c);
	7089
	7090	if (UCHARAT(RExC_parse) == ']') {
	7091	const char *posixcc = s + 1;
	7092	RExC_parse++; /* skip over the ending ] */
	7093
	7094	if (*s == ':') {
	7095	const I32 complement = posixcc == '^' ? posixcc++ : 0;
	7096	const I32 skip = t - posixcc;
	7097
	7098	/* Initially switch on the length of the name. */
	7099	switch (skip) {
	7100	case 4:
	7101	if (memEQ(posixcc, "word", 4)) /* this is not POSIX, this is the Perl \w */
	7102	namedclass = complement ? ANYOF_NALNUM : ANYOF_ALNUM;
	7103	break;
	7104	case 5:
	7105	/* Names all of length 5. */
	7106	/* alnum alpha ascii blank cntrl digit graph lower
	7107	print punct space upper */
	7108	/* Offset 4 gives the best switch position. */
	7109	switch (posixcc[4]) {
	7110	case 'a':
	7111	if (memEQ(posixcc, "alph", 4)) /* alpha */
	7112	namedclass = complement ? ANYOF_NALPHA : ANYOF_ALPHA;
	7113	break;
	7114	case 'e':
	7115	if (memEQ(posixcc, "spac", 4)) /* space */
	7116	namedclass = complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
	7117	break;
	7118	case 'h':
	7119	if (memEQ(posixcc, "grap", 4)) /* graph */
	7120	namedclass = complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
	7121	break;
	7122	case 'i':
	7123	if (memEQ(posixcc, "asci", 4)) /* ascii */
	7124	namedclass = complement ? ANYOF_NASCII : ANYOF_ASCII;
	7125	break;
	7126	case 'k':
	7127	if (memEQ(posixcc, "blan", 4)) /* blank */
	7128	namedclass = complement ? ANYOF_NBLANK : ANYOF_BLANK;
	7129	break;
	7130	case 'l':
	7131	if (memEQ(posixcc, "cntr", 4)) /* cntrl */
	7132	namedclass = complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
	7133	break;
	7134	case 'm':
	7135	if (memEQ(posixcc, "alnu", 4)) /* alnum */
	7136	namedclass = complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
	7137	break;
	7138	case 'r':
	7139	if (memEQ(posixcc, "lowe", 4)) /* lower */
	7140	namedclass = complement ? ANYOF_NLOWER : ANYOF_LOWER;
	7141	else if (memEQ(posixcc, "uppe", 4)) /* upper */
	7142	namedclass = complement ? ANYOF_NUPPER : ANYOF_UPPER;
	7143	break;
	7144	case 't':
	7145	if (memEQ(posixcc, "digi", 4)) /* digit */
	7146	namedclass = complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
	7147	else if (memEQ(posixcc, "prin", 4)) /* print */
	7148	namedclass = complement ? ANYOF_NPRINT : ANYOF_PRINT;
	7149	else if (memEQ(posixcc, "punc", 4)) /* punct */
	7150	namedclass = complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
	7151	break;
	7152	}
	7153	break;
	7154	case 6:
	7155	if (memEQ(posixcc, "xdigit", 6))
	7156	namedclass = complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
	7157	break;
	7158	}
	7159
	7160	if (namedclass == OOB_NAMEDCLASS)
	7161	Simple_vFAIL3("POSIX class [:%.*s:] unknown",
	7162	t - s - 1, s + 1);
	7163	assert (posixcc[skip] == ':');
	7164	assert (posixcc[skip+1] == ']');
	7165	} else if (!SIZE_ONLY) {
	7166	/* [[=foo=]] and [[.foo.]] are still future. */
	7167
	7168	/* adjust RExC_parse so the warning shows after
	7169	the class closes */
	7170	while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
	7171	RExC_parse++;
	7172	Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
	7173	}
	7174	} else {
	7175	/* Maternal grandfather:
	7176	* "[:" ending in ":" but not in ":]" */
	7177	RExC_parse = s;
	7178	}
	7179	}
	7180	}
	7181
	7182	return namedclass;
	7183	}
	7184
	7185	STATIC void
	7186	S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
	7187	{
	7188	dVAR;
	7189	if (POSIXCC(UCHARAT(RExC_parse))) {
	7190	const char *s = RExC_parse;
	7191	const char c = *s++;
	7192
	7193	while (isALNUM(*s))
	7194	s++;
	7195	if (s && c == s && s[1] == ']') {
	7196	if (ckWARN(WARN_REGEXP))
	7197	vWARN3(s+2,
	7198	"POSIX syntax [%c %c] belongs inside character classes",
	7199	c, c);
	7200
	7201	/* [[=foo=]] and [[.foo.]] are still future. */
	7202	if (POSIXCC_NOTYET(c)) {
	7203	/* adjust RExC_parse so the error shows after
	7204	the class closes */
	7205	while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
	7206	NOOP;
	7207	Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
	7208	}
	7209	}
	7210	}
	7211	}
	7212
	7213
	7214	#define _C_C_T_(NAME,TEST,WORD) \
	7215	ANYOF_##NAME: \
	7216	if (LOC) \
	7217	ANYOF_CLASS_SET(ret, ANYOF_##NAME); \
	7218	else { \
	7219	for (value = 0; value < 256; value++) \
	7220	if (TEST) \
	7221	ANYOF_BITMAP_SET(ret, value); \
	7222	} \
	7223	yesno = '+'; \
	7224	what = WORD; \
	7225	break; \
	7226	case ANYOF_N##NAME: \
	7227	if (LOC) \
	7228	ANYOF_CLASS_SET(ret, ANYOF_N##NAME); \
	7229	else { \
	7230	for (value = 0; value < 256; value++) \
	7231	if (!TEST) \
	7232	ANYOF_BITMAP_SET(ret, value); \
	7233	} \
	7234	yesno = '!'; \
	7235	what = WORD; \
	7236	break
	7237
	7238
	7239	/*
	7240	parse a class specification and produce either an ANYOF node that
	7241	matches the pattern or if the pattern matches a single char only and
	7242	that char is < 256 and we are case insensitive then we produce an
	7243	EXACT node instead.
	7244	*/
	7245
	7246	STATIC regnode *
	7247	S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
	7248	{
	7249	dVAR;
	7250	register UV value = 0;
	7251	register UV nextvalue;
	7252	register IV prevvalue = OOB_UNICODE;
	7253	register IV range = 0;
	7254	register regnode *ret;
	7255	STRLEN numlen;
	7256	IV namedclass;
	7257	char *rangebegin = NULL;
	7258	bool need_class = 0;
	7259	SV *listsv = NULL;
	7260	UV n;
	7261	bool optimize_invert = TRUE;
	7262	AV* unicode_alternate = NULL;
	7263	#ifdef EBCDIC
	7264	UV literal_endpoint = 0;
	7265	#endif
	7266	UV stored = 0; /* number of chars stored in the class */
	7267
	7268	regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
	7269	case we need to change the emitted regop to an EXACT. */
	7270	const char * orig_parse = RExC_parse;
	7271	GET_RE_DEBUG_FLAGS_DECL;
	7272	#ifndef DEBUGGING
	7273	PERL_UNUSED_ARG(depth);
	7274	#endif
	7275
	7276	DEBUG_PARSE("clas");
	7277
	7278	/* Assume we are going to generate an ANYOF node. */
	7279	ret = reganode(pRExC_state, ANYOF, 0);
	7280
	7281	if (!SIZE_ONLY)
	7282	ANYOF_FLAGS(ret) = 0;
	7283
	7284	if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
	7285	RExC_naughty++;
	7286	RExC_parse++;
	7287	if (!SIZE_ONLY)
	7288	ANYOF_FLAGS(ret) \|= ANYOF_INVERT;
	7289	}
	7290
	7291	if (SIZE_ONLY) {
	7292	RExC_size += ANYOF_SKIP;
	7293	listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
	7294	}
	7295	else {
	7296	RExC_emit += ANYOF_SKIP;
	7297	if (FOLD)
	7298	ANYOF_FLAGS(ret) \|= ANYOF_FOLD;
	7299	if (LOC)
	7300	ANYOF_FLAGS(ret) \|= ANYOF_LOCALE;
	7301	ANYOF_BITMAP_ZERO(ret);
	7302	listsv = newSVpvs("# comment\n");
	7303	}
	7304
	7305	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
	7306
	7307	if (!SIZE_ONLY && POSIXCC(nextvalue))
	7308	checkposixcc(pRExC_state);
	7309
	7310	/* allow 1st char to be ] (allowing it to be - is dealt with later) */
	7311	if (UCHARAT(RExC_parse) == ']')
	7312	goto charclassloop;
	7313
	7314	parseit:
	7315	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
	7316
	7317	charclassloop:
	7318
	7319	namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
	7320
	7321	if (!range)
	7322	rangebegin = RExC_parse;
	7323	if (UTF) {
	7324	value = utf8n_to_uvchr((U8*)RExC_parse,
	7325	RExC_end - RExC_parse,
	7326	&numlen, UTF8_ALLOW_DEFAULT);
	7327	RExC_parse += numlen;
	7328	}
	7329	else
	7330	value = UCHARAT(RExC_parse++);
	7331
	7332	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
	7333	if (value == '[' && POSIXCC(nextvalue))
	7334	namedclass = regpposixcc(pRExC_state, value);
	7335	else if (value == '\\') {
	7336	if (UTF) {
	7337	value = utf8n_to_uvchr((U8*)RExC_parse,
	7338	RExC_end - RExC_parse,
	7339	&numlen, UTF8_ALLOW_DEFAULT);
	7340	RExC_parse += numlen;
	7341	}
	7342	else
	7343	value = UCHARAT(RExC_parse++);
	7344	/* Some compilers cannot handle switching on 64-bit integer
	7345	* values, therefore value cannot be an UV. Yes, this will
	7346	* be a problem later if we want switch on Unicode.
	7347	* A similar issue a little bit later when switching on
	7348	* namedclass. --jhi */
	7349	switch ((I32)value) {
	7350	case 'w': namedclass = ANYOF_ALNUM; break;
	7351	case 'W': namedclass = ANYOF_NALNUM; break;
	7352	case 's': namedclass = ANYOF_SPACE; break;
	7353	case 'S': namedclass = ANYOF_NSPACE; break;
	7354	case 'd': namedclass = ANYOF_DIGIT; break;
	7355	case 'D': namedclass = ANYOF_NDIGIT; break;
	7356	case 'N': /* Handle \N{NAME} in class */
	7357	{
	7358	/* We only pay attention to the first char of
	7359	multichar strings being returned. I kinda wonder
	7360	if this makes sense as it does change the behaviour
	7361	from earlier versions, OTOH that behaviour was broken
	7362	as well. */
	7363	UV v; /* value is register so we cant & it /grrr */
	7364	if (reg_namedseq(pRExC_state, &v)) {
	7365	goto parseit;
	7366	}
	7367	value= v;
	7368	}
	7369	break;
	7370	case 'p':
	7371	case 'P':
	7372	{
	7373	char *e;
	7374	if (RExC_parse >= RExC_end)
	7375	vFAIL2("Empty \\%c{}", (U8)value);
	7376	if (*RExC_parse == '{') {
	7377	const U8 c = (U8)value;
	7378	e = strchr(RExC_parse++, '}');
	7379	if (!e)
	7380	vFAIL2("Missing right brace on \\%c{}", c);
	7381	while (isSPACE(UCHARAT(RExC_parse)))
	7382	RExC_parse++;
	7383	if (e == RExC_parse)
	7384	vFAIL2("Empty \\%c{}", c);
	7385	n = e - RExC_parse;
	7386	while (isSPACE(UCHARAT(RExC_parse + n - 1)))
	7387	n--;
	7388	}
	7389	else {
	7390	e = RExC_parse;
	7391	n = 1;
	7392	}
	7393	if (!SIZE_ONLY) {
	7394	if (UCHARAT(RExC_parse) == '^') {
	7395	RExC_parse++;
	7396	n--;
	7397	value = value == 'p' ? 'P' : 'p'; /* toggle */
	7398	while (isSPACE(UCHARAT(RExC_parse))) {
	7399	RExC_parse++;
	7400	n--;
	7401	}
	7402	}
	7403	Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
	7404	(value=='p' ? '+' : '!'), (int)n, RExC_parse);
	7405	}
	7406	RExC_parse = e + 1;
	7407	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	7408	namedclass = ANYOF_MAX; /* no official name, but it's named */
	7409	}
	7410	break;
	7411	case 'n': value = '\n'; break;
	7412	case 'r': value = '\r'; break;
	7413	case 't': value = '\t'; break;
	7414	case 'f': value = '\f'; break;
	7415	case 'b': value = '\b'; break;
	7416	case 'e': value = ASCII_TO_NATIVE('\033');break;
	7417	case 'a': value = ASCII_TO_NATIVE('\007');break;
	7418	case 'x':
	7419	if (*RExC_parse == '{') {
	7420	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	7421	\| PERL_SCAN_DISALLOW_PREFIX;
	7422	char * const e = strchr(RExC_parse++, '}');
	7423	if (!e)
	7424	vFAIL("Missing right brace on \\x{}");
	7425
	7426	numlen = e - RExC_parse;
	7427	value = grok_hex(RExC_parse, &numlen, &flags, NULL);
	7428	RExC_parse = e + 1;
	7429	}
	7430	else {
	7431	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	7432	numlen = 2;
	7433	value = grok_hex(RExC_parse, &numlen, &flags, NULL);
	7434	RExC_parse += numlen;
	7435	}
	7436	if (PL_encoding && value < 0x100)
	7437	goto recode_encoding;
	7438	break;
	7439	case 'c':
	7440	value = UCHARAT(RExC_parse++);
	7441	value = toCTRL(value);
	7442	break;
	7443	case '0': case '1': case '2': case '3': case '4':
	7444	case '5': case '6': case '7': case '8': case '9':
	7445	{
	7446	I32 flags = 0;
	7447	numlen = 3;
	7448	value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
	7449	RExC_parse += numlen;
	7450	if (PL_encoding && value < 0x100)
	7451	goto recode_encoding;
	7452	break;
	7453	}
	7454	recode_encoding:
	7455	{
	7456	SV* enc = PL_encoding;
	7457	value = reg_recode((const char)(U8)value, &enc);
	7458	if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
	7459	vWARN(RExC_parse,
	7460	"Invalid escape in the specified encoding");
	7461	break;
	7462	}
	7463	default:
	7464	if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
	7465	vWARN2(RExC_parse,
	7466	"Unrecognized escape \\%c in character class passed through",
	7467	(int)value);
	7468	break;
	7469	}
	7470	} /* end of \blah */
	7471	#ifdef EBCDIC
	7472	else
	7473	literal_endpoint++;
	7474	#endif
	7475
	7476	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
	7477
	7478	if (!SIZE_ONLY && !need_class)
	7479	ANYOF_CLASS_ZERO(ret);
	7480
	7481	need_class = 1;
	7482
	7483	/* a bad range like a-\d, a-[:digit:] ? */
	7484	if (range) {
	7485	if (!SIZE_ONLY) {
	7486	if (ckWARN(WARN_REGEXP)) {
	7487	const int w =
	7488	RExC_parse >= rangebegin ?
	7489	RExC_parse - rangebegin : 0;
	7490	vWARN4(RExC_parse,
	7491	"False [] range \"%.s\"",
	7492	w, w, rangebegin);
	7493	}
	7494	if (prevvalue < 256) {
	7495	ANYOF_BITMAP_SET(ret, prevvalue);
	7496	ANYOF_BITMAP_SET(ret, '-');
	7497	}
	7498	else {
	7499	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	7500	Perl_sv_catpvf(aTHX_ listsv,
	7501	"%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
	7502	}
	7503	}
	7504
	7505	range = 0; /* this was not a true range */
	7506	}
	7507
	7508
	7509
	7510	if (!SIZE_ONLY) {
	7511	const char *what = NULL;
	7512	char yesno = 0;
	7513
	7514	if (namedclass > OOB_NAMEDCLASS)
	7515	optimize_invert = FALSE;
	7516	/* Possible truncation here but in some 64-bit environments
	7517	* the compiler gets heartburn about switch on 64-bit values.
	7518	* A similar issue a little earlier when switching on value.
	7519	* --jhi */
	7520	switch ((I32)namedclass) {
	7521	case _C_C_T_(ALNUM, isALNUM(value), "Word");
	7522	case _C_C_T_(ALNUMC, isALNUMC(value), "Alnum");
	7523	case _C_C_T_(ALPHA, isALPHA(value), "Alpha");
	7524	case _C_C_T_(BLANK, isBLANK(value), "Blank");
	7525	case _C_C_T_(CNTRL, isCNTRL(value), "Cntrl");
	7526	case _C_C_T_(GRAPH, isGRAPH(value), "Graph");
	7527	case _C_C_T_(LOWER, isLOWER(value), "Lower");
	7528	case _C_C_T_(PRINT, isPRINT(value), "Print");
	7529	case _C_C_T_(PSXSPC, isPSXSPC(value), "Space");
	7530	case _C_C_T_(PUNCT, isPUNCT(value), "Punct");
	7531	case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
	7532	case _C_C_T_(UPPER, isUPPER(value), "Upper");
	7533	case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
	7534	case ANYOF_ASCII:
	7535	if (LOC)
	7536	ANYOF_CLASS_SET(ret, ANYOF_ASCII);
	7537	else {
	7538	#ifndef EBCDIC
	7539	for (value = 0; value < 128; value++)
	7540	ANYOF_BITMAP_SET(ret, value);
	7541	#else /* EBCDIC */
	7542	for (value = 0; value < 256; value++) {
	7543	if (isASCII(value))
	7544	ANYOF_BITMAP_SET(ret, value);
	7545	}
	7546	#endif /* EBCDIC */
	7547	}
	7548	yesno = '+';
	7549	what = "ASCII";
	7550	break;
	7551	case ANYOF_NASCII:
	7552	if (LOC)
	7553	ANYOF_CLASS_SET(ret, ANYOF_NASCII);
	7554	else {
	7555	#ifndef EBCDIC
	7556	for (value = 128; value < 256; value++)
	7557	ANYOF_BITMAP_SET(ret, value);
	7558	#else /* EBCDIC */
	7559	for (value = 0; value < 256; value++) {
	7560	if (!isASCII(value))
	7561	ANYOF_BITMAP_SET(ret, value);
	7562	}
	7563	#endif /* EBCDIC */
	7564	}
	7565	yesno = '!';
	7566	what = "ASCII";
	7567	break;
	7568	case ANYOF_DIGIT:
	7569	if (LOC)
	7570	ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
	7571	else {
	7572	/* consecutive digits assumed */
	7573	for (value = '0'; value <= '9'; value++)
	7574	ANYOF_BITMAP_SET(ret, value);
	7575	}
	7576	yesno = '+';
	7577	what = "Digit";
	7578	break;
	7579	case ANYOF_NDIGIT:
	7580	if (LOC)
	7581	ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
	7582	else {
	7583	/* consecutive digits assumed */
	7584	for (value = 0; value < '0'; value++)
	7585	ANYOF_BITMAP_SET(ret, value);
	7586	for (value = '9' + 1; value < 256; value++)
	7587	ANYOF_BITMAP_SET(ret, value);
	7588	}
	7589	yesno = '!';
	7590	what = "Digit";
	7591	break;
	7592	case ANYOF_MAX:
	7593	/* this is to handle \p and \P */
	7594	break;
	7595	default:
	7596	vFAIL("Invalid [::] class");
	7597	break;
	7598	}
	7599	if (what) {
	7600	/* Strings such as "+utf8::isWord\n" */
	7601	Perl_sv_catpvf(aTHX_ listsv, "%cutf8::Is%s\n", yesno, what);
	7602	}
	7603	if (LOC)
	7604	ANYOF_FLAGS(ret) \|= ANYOF_CLASS;
	7605	continue;
	7606	}
	7607	} /* end of namedclass \blah */
	7608
	7609	if (range) {
	7610	if (prevvalue > (IV)value) /* b-a */ {
	7611	const int w = RExC_parse - rangebegin;
	7612	Simple_vFAIL4("Invalid [] range \"%.s\"", w, w, rangebegin);
	7613	range = 0; /* not a valid range */
	7614	}
	7615	}
	7616	else {
	7617	prevvalue = value; /* save the beginning of the range */
	7618	if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
	7619	RExC_parse[1] != ']') {
	7620	RExC_parse++;
	7621
	7622	/* a bad range like \w-, [:word:]- ? */
	7623	if (namedclass > OOB_NAMEDCLASS) {
	7624	if (ckWARN(WARN_REGEXP)) {
	7625	const int w =
	7626	RExC_parse >= rangebegin ?
	7627	RExC_parse - rangebegin : 0;
	7628	vWARN4(RExC_parse,
	7629	"False [] range \"%.s\"",
	7630	w, w, rangebegin);
	7631	}
	7632	if (!SIZE_ONLY)
	7633	ANYOF_BITMAP_SET(ret, '-');
	7634	} else
	7635	range = 1; /* yeah, it's a range! */
	7636	continue; /* but do it the next time */
	7637	}
	7638	}
	7639
	7640	/* now is the next time */
	7641	/stored += (value - prevvalue + 1);/
	7642	if (!SIZE_ONLY) {
	7643	if (prevvalue < 256) {
	7644	const IV ceilvalue = value < 256 ? value : 255;
	7645	IV i;
	7646	#ifdef EBCDIC
	7647	/* In EBCDIC [\x89-\x91] should include
	7648	* the \x8e but [i-j] should not. */
	7649	if (literal_endpoint == 2 &&
	7650	((isLOWER(prevvalue) && isLOWER(ceilvalue)) \|\|
	7651	(isUPPER(prevvalue) && isUPPER(ceilvalue))))
	7652	{
	7653	if (isLOWER(prevvalue)) {
	7654	for (i = prevvalue; i <= ceilvalue; i++)
	7655	if (isLOWER(i))
	7656	ANYOF_BITMAP_SET(ret, i);
	7657	} else {
	7658	for (i = prevvalue; i <= ceilvalue; i++)
	7659	if (isUPPER(i))
	7660	ANYOF_BITMAP_SET(ret, i);
	7661	}
	7662	}
	7663	else
	7664	#endif
	7665	for (i = prevvalue; i <= ceilvalue; i++) {
	7666	if (!ANYOF_BITMAP_TEST(ret,i)) {
	7667	stored++;
	7668	ANYOF_BITMAP_SET(ret, i);
	7669	}
	7670	}
	7671	}
	7672	if (value > 255 \|\| UTF) {
	7673	const UV prevnatvalue = NATIVE_TO_UNI(prevvalue);
	7674	const UV natvalue = NATIVE_TO_UNI(value);
	7675	stored+=2; /* can't optimize this class */
	7676	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	7677	if (prevnatvalue < natvalue) { /* what about > ? */
	7678	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
	7679	prevnatvalue, natvalue);
	7680	}
	7681	else if (prevnatvalue == natvalue) {
	7682	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", natvalue);
	7683	if (FOLD) {
	7684	U8 foldbuf[UTF8_MAXBYTES_CASE+1];
	7685	STRLEN foldlen;
	7686	const UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
	7687
	7688	#ifdef EBCDIC /* RD t/uni/fold ff and 6b */
	7689	if (RExC_precomp[0] == ':' &&
	7690	RExC_precomp[1] == '[' &&
	7691	(f == 0xDF \|\| f == 0x92)) {
	7692	f = NATIVE_TO_UNI(f);
	7693	}
	7694	#endif
	7695	/* If folding and foldable and a single
	7696	* character, insert also the folded version
	7697	* to the charclass. */
	7698	if (f != value) {
	7699	#ifdef EBCDIC /* RD tunifold ligatures s,t fb05, fb06 */
	7700	if ((RExC_precomp[0] == ':' &&
	7701	RExC_precomp[1] == '[' &&
	7702	(f == 0xA2 &&
	7703	(value == 0xFB05 \|\| value == 0xFB06))) ?
	7704	foldlen == ((STRLEN)UNISKIP(f) - 1) :
	7705	foldlen == (STRLEN)UNISKIP(f) )
	7706	#else
	7707	if (foldlen == (STRLEN)UNISKIP(f))
	7708	#endif
	7709	Perl_sv_catpvf(aTHX_ listsv,
	7710	"%04"UVxf"\n", f);
	7711	else {
	7712	/* Any multicharacter foldings
	7713	* require the following transform:
	7714	* [ABCDEF] -> (?:[ABCabcDEFd]\|pq\|rst)
	7715	* where E folds into "pq" and F folds
	7716	* into "rst", all other characters
	7717	* fold to single characters. We save
	7718	* away these multicharacter foldings,
	7719	* to be later saved as part of the
	7720	* additional "s" data. */
	7721	SV *sv;
	7722
	7723	if (!unicode_alternate)
	7724	unicode_alternate = newAV();
	7725	sv = newSVpvn((char*)foldbuf, foldlen);
	7726	SvUTF8_on(sv);
	7727	av_push(unicode_alternate, sv);
	7728	}
	7729	}
	7730
	7731	/* If folding and the value is one of the Greek
	7732	* sigmas insert a few more sigmas to make the
	7733	* folding rules of the sigmas to work right.
	7734	* Note that not all the possible combinations
	7735	* are handled here: some of them are handled
	7736	* by the standard folding rules, and some of
	7737	* them (literal or EXACTF cases) are handled
	7738	* during runtime in regexec.c:S_find_byclass(). */
	7739	if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
	7740	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	7741	(UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
	7742	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	7743	(UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
	7744	}
	7745	else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
	7746	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	7747	(UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
	7748	}
	7749	}
	7750	}
	7751	#ifdef EBCDIC
	7752	literal_endpoint = 0;
	7753	#endif
	7754	}
	7755
	7756	range = 0; /* this range (if it was one) is done now */
	7757	}
	7758
	7759	if (need_class) {
	7760	ANYOF_FLAGS(ret) \|= ANYOF_LARGE;
	7761	if (SIZE_ONLY)
	7762	RExC_size += ANYOF_CLASS_ADD_SKIP;
	7763	else
	7764	RExC_emit += ANYOF_CLASS_ADD_SKIP;
	7765	}
	7766
	7767
	7768	if (SIZE_ONLY)
	7769	return ret;
	7770	/**** !SIZE_ONLY AFTER HERE *******/
	7771
	7772	if( stored == 1 && (value < 128 \|\| (value < 256 && !UTF))
	7773	&& !( ANYOF_FLAGS(ret) & ( ANYOF_FLAGS_ALL ^ ANYOF_FOLD ) )
	7774	) {
	7775	/* optimize single char class to an EXACT node
	7776	but only when its not a UTF/high char */
	7777	const char * cur_parse= RExC_parse;
	7778	RExC_emit = (regnode *)orig_emit;
	7779	RExC_parse = (char *)orig_parse;
	7780	ret = reg_node(pRExC_state,
	7781	(U8)((ANYOF_FLAGS(ret) & ANYOF_FOLD) ? EXACTF : EXACT));
	7782	RExC_parse = (char *)cur_parse;
	7783	*STRING(ret)= (char)value;
	7784	STR_LEN(ret)= 1;
	7785	RExC_emit += STR_SZ(1);
	7786	return ret;
	7787	}
	7788	/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
	7789	if ( /* If the only flag is folding (plus possibly inversion). */
	7790	((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD)
	7791	) {
	7792	for (value = 0; value < 256; ++value) {
	7793	if (ANYOF_BITMAP_TEST(ret, value)) {
	7794	UV fold = PL_fold[value];
	7795
	7796	if (fold != value)
	7797	ANYOF_BITMAP_SET(ret, fold);
	7798	}
	7799	}
	7800	ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
	7801	}
	7802
	7803	/* optimize inverted simple patterns (e.g. [^a-z]) */
	7804	if (optimize_invert &&
	7805	/* If the only flag is inversion. */
	7806	(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
	7807	for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
	7808	ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
	7809	ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
	7810	}
	7811	{
	7812	AV * const av = newAV();
	7813	SV *rv;
	7814	/* The 0th element stores the character class description
	7815	* in its textual form: used later (regexec.c:Perl_regclass_swash())
	7816	* to initialize the appropriate swash (which gets stored in
	7817	* the 1st element), and also useful for dumping the regnode.
	7818	* The 2nd element stores the multicharacter foldings,
	7819	* used later (regexec.c:S_reginclass()). */
	7820	av_store(av, 0, listsv);
	7821	av_store(av, 1, NULL);
	7822	av_store(av, 2, (SV*)unicode_alternate);
	7823	rv = newRV_noinc((SV*)av);
	7824	n = add_data(pRExC_state, 1, "s");
	7825	RExC_rxi->data->data[n] = (void*)rv;
	7826	ARG_SET(ret, n);
	7827	}
	7828	return ret;
	7829	}
	7830	#undef _C_C_T_
	7831
	7832
	7833	/* reg_skipcomment()
	7834
	7835	Absorbs an /x style # comments from the input stream.
	7836	Returns true if there is more text remaining in the stream.
	7837	Will set the REG_SEEN_RUN_ON_COMMENT flag if the comment
	7838	terminates the pattern without including a newline.
	7839
	7840	Note its the callers responsibility to ensure that we are
	7841	actually in /x mode
	7842
	7843	*/
	7844
	7845	STATIC bool
	7846	S_reg_skipcomment(pTHX_ RExC_state_t *pRExC_state)
	7847	{
	7848	bool ended = 0;
	7849	while (RExC_parse < RExC_end)
	7850	if (*RExC_parse++ == '\n') {
	7851	ended = 1;
	7852	break;
	7853	}
	7854	if (!ended) {
	7855	/* we ran off the end of the pattern without ending
	7856	the comment, so we have to add an \n when wrapping */
	7857	RExC_seen \|= REG_SEEN_RUN_ON_COMMENT;
	7858	return 0;
	7859	} else
	7860	return 1;
	7861	}
	7862
	7863	/* nextchar()
	7864
	7865	Advance that parse position, and optionall absorbs
	7866	"whitespace" from the inputstream.
	7867
	7868	Without /x "whitespace" means (?#...) style comments only,
	7869	with /x this means (?#...) and # comments and whitespace proper.
	7870
	7871	Returns the RExC_parse point from BEFORE the scan occurs.
	7872
	7873	This is the /x friendly way of saying RExC_parse++.
	7874	*/
	7875
	7876	STATIC char*
	7877	S_nextchar(pTHX_ RExC_state_t *pRExC_state)
	7878	{
	7879	char* const retval = RExC_parse++;
	7880
	7881	for (;;) {
	7882	if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
	7883	RExC_parse[2] == '#') {
	7884	while (*RExC_parse != ')') {
	7885	if (RExC_parse == RExC_end)
	7886	FAIL("Sequence (?#... not terminated");
	7887	RExC_parse++;
	7888	}
	7889	RExC_parse++;
	7890	continue;
	7891	}
	7892	if (RExC_flags & RXf_PMf_EXTENDED) {
	7893	if (isSPACE(*RExC_parse)) {
	7894	RExC_parse++;
	7895	continue;
	7896	}
	7897	else if (*RExC_parse == '#') {
	7898	if ( reg_skipcomment( pRExC_state ) )
	7899	continue;
	7900	}
	7901	}
	7902	return retval;
	7903	}
	7904	}
	7905
	7906	/*
	7907	- reg_node - emit a node
	7908	*/
	7909	STATIC regnode * /* Location. */
	7910	S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
	7911	{
	7912	dVAR;
	7913	register regnode *ptr;
	7914	regnode * const ret = RExC_emit;
	7915	GET_RE_DEBUG_FLAGS_DECL;
	7916
	7917	if (SIZE_ONLY) {
	7918	SIZE_ALIGN(RExC_size);
	7919	RExC_size += 1;
	7920	return(ret);
	7921	}
	7922	if (RExC_emit >= RExC_emit_bound)
	7923	Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d", op);
	7924
	7925	NODE_ALIGN_FILL(ret);
	7926	ptr = ret;
	7927	FILL_ADVANCE_NODE(ptr, op);
	7928	#ifdef RE_TRACK_PATTERN_OFFSETS
	7929	if (RExC_offsets) { /* MJD */
	7930	MJD_OFFSET_DEBUG(("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n",
	7931	"reg_node", __LINE__,
	7932	PL_reg_name[op],
	7933	(UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0]
	7934	? "Overwriting end of array!\n" : "OK",
	7935	(UV)(RExC_emit - RExC_emit_start),
	7936	(UV)(RExC_parse - RExC_start),
	7937	(UV)RExC_offsets[0]));
	7938	Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
	7939	}
	7940	#endif
	7941	RExC_emit = ptr;
	7942	return(ret);
	7943	}
	7944
	7945	/*
	7946	- reganode - emit a node with an argument
	7947	*/
	7948	STATIC regnode * /* Location. */
	7949	S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
	7950	{
	7951	dVAR;
	7952	register regnode *ptr;
	7953	regnode * const ret = RExC_emit;
	7954	GET_RE_DEBUG_FLAGS_DECL;
	7955
	7956	if (SIZE_ONLY) {
	7957	SIZE_ALIGN(RExC_size);
	7958	RExC_size += 2;
	7959	/*
	7960	We can't do this:
	7961
	7962	assert(2==regarglen[op]+1);
	7963
	7964	Anything larger than this has to allocate the extra amount.
	7965	If we changed this to be:
	7966
	7967	RExC_size += (1 + regarglen[op]);
	7968
	7969	then it wouldn't matter. Its not clear what side effect
	7970	might come from that so its not done so far.
	7971	-- dmq
	7972	*/
	7973	return(ret);
	7974	}
	7975	if (RExC_emit >= RExC_emit_bound)
	7976	Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d", op);
	7977
	7978	NODE_ALIGN_FILL(ret);
	7979	ptr = ret;
	7980	FILL_ADVANCE_NODE_ARG(ptr, op, arg);
	7981	#ifdef RE_TRACK_PATTERN_OFFSETS
	7982	if (RExC_offsets) { /* MJD */
	7983	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
	7984	"reganode",
	7985	__LINE__,
	7986	PL_reg_name[op],
	7987	(UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0] ?
	7988	"Overwriting end of array!\n" : "OK",
	7989	(UV)(RExC_emit - RExC_emit_start),
	7990	(UV)(RExC_parse - RExC_start),
	7991	(UV)RExC_offsets[0]));
	7992	Set_Cur_Node_Offset;
	7993	}
	7994	#endif
	7995	RExC_emit = ptr;
	7996	return(ret);
	7997	}
	7998
	7999	/*
	8000	- reguni - emit (if appropriate) a Unicode character
	8001	*/
	8002	STATIC STRLEN
	8003	S_reguni(pTHX_ const RExC_state_t pRExC_state, UV uv, char s)
	8004	{
	8005	dVAR;
	8006	return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8)s, uv) - (U8)s);
	8007	}
	8008
	8009	/*
	8010	- reginsert - insert an operator in front of already-emitted operand
	8011	*
	8012	* Means relocating the operand.
	8013	*/
	8014	STATIC void
	8015	S_reginsert(pTHX_ RExC_state_t pRExC_state, U8 op, regnode opnd, U32 depth)
	8016	{
	8017	dVAR;
	8018	register regnode *src;
	8019	register regnode *dst;
	8020	register regnode *place;
	8021	const int offset = regarglen[(U8)op];
	8022	const int size = NODE_STEP_REGNODE + offset;
	8023	GET_RE_DEBUG_FLAGS_DECL;
	8024	PERL_UNUSED_ARG(depth);
	8025	/* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
	8026	DEBUG_PARSE_FMT("inst"," - %s",PL_reg_name[op]);
	8027	if (SIZE_ONLY) {
	8028	RExC_size += size;
	8029	return;
	8030	}
	8031
	8032	src = RExC_emit;
	8033	RExC_emit += size;
	8034	dst = RExC_emit;
	8035	if (RExC_open_parens) {
	8036	int paren;
	8037	/DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);/
	8038	for ( paren=0 ; paren < RExC_npar ; paren++ ) {
	8039	if ( RExC_open_parens[paren] >= opnd ) {
	8040	/DEBUG_PARSE_FMT("open"," - %d",size);/
	8041	RExC_open_parens[paren] += size;
	8042	} else {
	8043	/DEBUG_PARSE_FMT("open"," - %s","ok");/
	8044	}
	8045	if ( RExC_close_parens[paren] >= opnd ) {
	8046	/DEBUG_PARSE_FMT("close"," - %d",size);/
	8047	RExC_close_parens[paren] += size;
	8048	} else {
	8049	/DEBUG_PARSE_FMT("close"," - %s","ok");/
	8050	}
	8051	}
	8052	}
	8053
	8054	while (src > opnd) {
	8055	StructCopy(--src, --dst, regnode);
	8056	#ifdef RE_TRACK_PATTERN_OFFSETS
	8057	if (RExC_offsets) { /* MJD 20010112 */
	8058	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n",
	8059	"reg_insert",
	8060	__LINE__,
	8061	PL_reg_name[op],
	8062	(UV)(dst - RExC_emit_start) > RExC_offsets[0]
	8063	? "Overwriting end of array!\n" : "OK",
	8064	(UV)(src - RExC_emit_start),
	8065	(UV)(dst - RExC_emit_start),
	8066	(UV)RExC_offsets[0]));
	8067	Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
	8068	Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
	8069	}
	8070	#endif
	8071	}
	8072
	8073
	8074	place = opnd; /* Op node, where operand used to be. */
	8075	#ifdef RE_TRACK_PATTERN_OFFSETS
	8076	if (RExC_offsets) { /* MJD */
	8077	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
	8078	"reginsert",
	8079	__LINE__,
	8080	PL_reg_name[op],
	8081	(UV)(place - RExC_emit_start) > RExC_offsets[0]
	8082	? "Overwriting end of array!\n" : "OK",
	8083	(UV)(place - RExC_emit_start),
	8084	(UV)(RExC_parse - RExC_start),
	8085	(UV)RExC_offsets[0]));
	8086	Set_Node_Offset(place, RExC_parse);
	8087	Set_Node_Length(place, 1);
	8088	}
	8089	#endif
	8090	src = NEXTOPER(place);
	8091	FILL_ADVANCE_NODE(place, op);
	8092	Zero(src, offset, regnode);
	8093	}
	8094
	8095	/*
	8096	- regtail - set the next-pointer at the end of a node chain of p to val.
	8097	- SEE ALSO: regtail_study
	8098	*/
	8099	/* TODO: All three parms should be const */
	8100	STATIC void
	8101	S_regtail(pTHX_ RExC_state_t pRExC_state, regnode p, const regnode *val,U32 depth)
	8102	{
	8103	dVAR;
	8104	register regnode *scan;
	8105	GET_RE_DEBUG_FLAGS_DECL;
	8106	#ifndef DEBUGGING
	8107	PERL_UNUSED_ARG(depth);
	8108	#endif
	8109
	8110	if (SIZE_ONLY)
	8111	return;
	8112
	8113	/* Find last node. */
	8114	scan = p;
	8115	for (;;) {
	8116	regnode * const temp = regnext(scan);
	8117	DEBUG_PARSE_r({
	8118	SV * const mysv=sv_newmortal();
	8119	DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
	8120	regprop(RExC_rx, mysv, scan);
	8121	PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n",
	8122	SvPV_nolen_const(mysv), REG_NODE_NUM(scan),
	8123	(temp == NULL ? "->" : ""),
	8124	(temp == NULL ? PL_reg_name[OP(val)] : "")
	8125	);
	8126	});
	8127	if (temp == NULL)
	8128	break;
	8129	scan = temp;
	8130	}
	8131
	8132	if (reg_off_by_arg[OP(scan)]) {
	8133	ARG_SET(scan, val - scan);
	8134	}
	8135	else {
	8136	NEXT_OFF(scan) = val - scan;
	8137	}
	8138	}
	8139
	8140	#ifdef DEBUGGING
	8141	/*
	8142	- regtail_study - set the next-pointer at the end of a node chain of p to val.
	8143	- Look for optimizable sequences at the same time.
	8144	- currently only looks for EXACT chains.
	8145
	8146	This is expermental code. The idea is to use this routine to perform
	8147	in place optimizations on branches and groups as they are constructed,
	8148	with the long term intention of removing optimization from study_chunk so
	8149	that it is purely analytical.
	8150
	8151	Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
	8152	to control which is which.
	8153
	8154	*/
	8155	/* TODO: All four parms should be const */
	8156
	8157	STATIC U8
	8158	S_regtail_study(pTHX_ RExC_state_t pRExC_state, regnode p, const regnode *val,U32 depth)
	8159	{
	8160	dVAR;
	8161	register regnode *scan;
	8162	U8 exact = PSEUDO;
	8163	#ifdef EXPERIMENTAL_INPLACESCAN
	8164	I32 min = 0;
	8165	#endif
	8166
	8167	GET_RE_DEBUG_FLAGS_DECL;
	8168
	8169
	8170	if (SIZE_ONLY)
	8171	return exact;
	8172
	8173	/* Find last node. */
	8174
	8175	scan = p;
	8176	for (;;) {
	8177	regnode * const temp = regnext(scan);
	8178	#ifdef EXPERIMENTAL_INPLACESCAN
	8179	if (PL_regkind[OP(scan)] == EXACT)
	8180	if (join_exact(pRExC_state,scan,&min,1,val,depth+1))
	8181	return EXACT;
	8182	#endif
	8183	if ( exact ) {
	8184	switch (OP(scan)) {
	8185	case EXACT:
	8186	case EXACTF:
	8187	case EXACTFL:
	8188	if( exact == PSEUDO )
	8189	exact= OP(scan);
	8190	else if ( exact != OP(scan) )
	8191	exact= 0;
	8192	case NOTHING:
	8193	break;
	8194	default:
	8195	exact= 0;
	8196	}
	8197	}
	8198	DEBUG_PARSE_r({
	8199	SV * const mysv=sv_newmortal();
	8200	DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
	8201	regprop(RExC_rx, mysv, scan);
	8202	PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n",
	8203	SvPV_nolen_const(mysv),
	8204	REG_NODE_NUM(scan),
	8205	PL_reg_name[exact]);
	8206	});
	8207	if (temp == NULL)
	8208	break;
	8209	scan = temp;
	8210	}
	8211	DEBUG_PARSE_r({
	8212	SV * const mysv_val=sv_newmortal();
	8213	DEBUG_PARSE_MSG("");
	8214	regprop(RExC_rx, mysv_val, val);
	8215	PerlIO_printf(Perl_debug_log, "~ attach to %s (%"IVdf") offset to %"IVdf"\n",
	8216	SvPV_nolen_const(mysv_val),
	8217	(IV)REG_NODE_NUM(val),
	8218	(IV)(val - scan)
	8219	);
	8220	});
	8221	if (reg_off_by_arg[OP(scan)]) {
	8222	ARG_SET(scan, val - scan);
	8223	}
	8224	else {
	8225	NEXT_OFF(scan) = val - scan;
	8226	}
	8227
	8228	return exact;
	8229	}
	8230	#endif
	8231
	8232	/*
	8233	- regcurly - a little FSA that accepts {\d+,?\d*}
	8234	*/
	8235	STATIC I32
	8236	S_regcurly(register const char *s)
	8237	{
	8238	if (*s++ != '{')
	8239	return FALSE;
	8240	if (!isDIGIT(*s))
	8241	return FALSE;
	8242	while (isDIGIT(*s))
	8243	s++;
	8244	if (*s == ',')
	8245	s++;
	8246	while (isDIGIT(*s))
	8247	s++;
	8248	if (*s != '}')
	8249	return FALSE;
	8250	return TRUE;
	8251	}
	8252
	8253
	8254	/*
	8255	- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
	8256	*/
	8257	void
	8258	Perl_regdump(pTHX_ const regexp *r)
	8259	{
	8260	#ifdef DEBUGGING
	8261	dVAR;
	8262	SV * const sv = sv_newmortal();
	8263	SV *dsv= sv_newmortal();
	8264	RXi_GET_DECL(r,ri);
	8265
	8266	(void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
	8267
	8268	/* Header fields of interest. */
	8269	if (r->anchored_substr) {
	8270	RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr),
	8271	RE_SV_DUMPLEN(r->anchored_substr), 30);
	8272	PerlIO_printf(Perl_debug_log,
	8273	"anchored %s%s at %"IVdf" ",
	8274	s, RE_SV_TAIL(r->anchored_substr),
	8275	(IV)r->anchored_offset);
	8276	} else if (r->anchored_utf8) {
	8277	RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8),
	8278	RE_SV_DUMPLEN(r->anchored_utf8), 30);
	8279	PerlIO_printf(Perl_debug_log,
	8280	"anchored utf8 %s%s at %"IVdf" ",
	8281	s, RE_SV_TAIL(r->anchored_utf8),
	8282	(IV)r->anchored_offset);
	8283	}
	8284	if (r->float_substr) {
	8285	RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr),
	8286	RE_SV_DUMPLEN(r->float_substr), 30);
	8287	PerlIO_printf(Perl_debug_log,
	8288	"floating %s%s at %"IVdf"..%"UVuf" ",
	8289	s, RE_SV_TAIL(r->float_substr),
	8290	(IV)r->float_min_offset, (UV)r->float_max_offset);
	8291	} else if (r->float_utf8) {
	8292	RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8),
	8293	RE_SV_DUMPLEN(r->float_utf8), 30);
	8294	PerlIO_printf(Perl_debug_log,
	8295	"floating utf8 %s%s at %"IVdf"..%"UVuf" ",
	8296	s, RE_SV_TAIL(r->float_utf8),
	8297	(IV)r->float_min_offset, (UV)r->float_max_offset);
	8298	}
	8299	if (r->check_substr \|\| r->check_utf8)
	8300	PerlIO_printf(Perl_debug_log,
	8301	(const char *)
	8302	(r->check_substr == r->float_substr
	8303	&& r->check_utf8 == r->float_utf8
	8304	? "(checking floating" : "(checking anchored"));
	8305	if (r->extflags & RXf_NOSCAN)
	8306	PerlIO_printf(Perl_debug_log, " noscan");
	8307	if (r->extflags & RXf_CHECK_ALL)
	8308	PerlIO_printf(Perl_debug_log, " isall");
	8309	if (r->check_substr \|\| r->check_utf8)
	8310	PerlIO_printf(Perl_debug_log, ") ");
	8311
	8312	if (ri->regstclass) {
	8313	regprop(r, sv, ri->regstclass);
	8314	PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv));
	8315	}
	8316	if (r->extflags & RXf_ANCH) {
	8317	PerlIO_printf(Perl_debug_log, "anchored");
	8318	if (r->extflags & RXf_ANCH_BOL)
	8319	PerlIO_printf(Perl_debug_log, "(BOL)");
	8320	if (r->extflags & RXf_ANCH_MBOL)
	8321	PerlIO_printf(Perl_debug_log, "(MBOL)");
	8322	if (r->extflags & RXf_ANCH_SBOL)
	8323	PerlIO_printf(Perl_debug_log, "(SBOL)");
	8324	if (r->extflags & RXf_ANCH_GPOS)
	8325	PerlIO_printf(Perl_debug_log, "(GPOS)");
	8326	PerlIO_putc(Perl_debug_log, ' ');
	8327	}
	8328	if (r->extflags & RXf_GPOS_SEEN)
	8329	PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", (UV)r->gofs);
	8330	if (r->intflags & PREGf_SKIP)
	8331	PerlIO_printf(Perl_debug_log, "plus ");
	8332	if (r->intflags & PREGf_IMPLICIT)
	8333	PerlIO_printf(Perl_debug_log, "implicit ");
	8334	PerlIO_printf(Perl_debug_log, "minlen %"IVdf" ", (IV)r->minlen);
	8335	if (r->extflags & RXf_EVAL_SEEN)
	8336	PerlIO_printf(Perl_debug_log, "with eval ");
	8337	PerlIO_printf(Perl_debug_log, "\n");
	8338	#else
	8339	PERL_UNUSED_CONTEXT;
	8340	PERL_UNUSED_ARG(r);
	8341	#endif /* DEBUGGING */
	8342	}
	8343
	8344	/*
	8345	- regprop - printable representation of opcode
	8346	*/
	8347	void
	8348	Perl_regprop(pTHX_ const regexp prog, SV sv, const regnode *o)
	8349	{
	8350	#ifdef DEBUGGING
	8351	dVAR;
	8352	register int k;
	8353	RXi_GET_DECL(prog,progi);
	8354	GET_RE_DEBUG_FLAGS_DECL;
	8355
	8356
	8357	sv_setpvn(sv, "", 0);
	8358
	8359	if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */
	8360	/* It would be nice to FAIL() here, but this may be called from
	8361	regexec.c, and it would be hard to supply pRExC_state. */
	8362	Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", (int)OP(o), (int)REGNODE_MAX);
	8363	sv_catpv(sv, PL_reg_name[OP(o)]); /* Take off const! */
	8364
	8365	k = PL_regkind[OP(o)];
	8366
	8367	if (k == EXACT) {
	8368	SV * const dsv = sv_2mortal(newSVpvs(""));
	8369	/* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
	8370	* is a crude hack but it may be the best for now since
	8371	* we have no flag "this EXACTish node was UTF-8"
	8372	* --jhi */
	8373	const char * const s =
	8374	pv_pretty(dsv, STRING(o), STR_LEN(o), 60,
	8375	PL_colors[0], PL_colors[1],
	8376	PERL_PV_ESCAPE_UNI_DETECT \|
	8377	PERL_PV_PRETTY_ELIPSES \|
	8378	PERL_PV_PRETTY_LTGT
	8379	);
	8380	Perl_sv_catpvf(aTHX_ sv, " %s", s );
	8381	} else if (k == TRIE) {
	8382	/* print the details of the trie in dumpuntil instead, as
	8383	* progi->data isn't available here */
	8384	const char op = OP(o);
	8385	const U32 n = ARG(o);
	8386	const reg_ac_data * const ac = IS_TRIE_AC(op) ?
	8387	(reg_ac_data *)progi->data->data[n] :
	8388	NULL;
	8389	const reg_trie_data * const trie
	8390	= (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
	8391
	8392	Perl_sv_catpvf(aTHX_ sv, "-%s",PL_reg_name[o->flags]);
	8393	DEBUG_TRIE_COMPILE_r(
	8394	Perl_sv_catpvf(aTHX_ sv,
	8395	"<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
	8396	(UV)trie->startstate,
	8397	(IV)trie->statecount-1, /* -1 because of the unused 0 element */
	8398	(UV)trie->wordcount,
	8399	(UV)trie->minlen,
	8400	(UV)trie->maxlen,
	8401	(UV)TRIE_CHARCOUNT(trie),
	8402	(UV)trie->uniquecharcount
	8403	)
	8404	);
	8405	if ( IS_ANYOF_TRIE(op) \|\| trie->bitmap ) {
	8406	int i;
	8407	int rangestart = -1;
	8408	U8* bitmap = IS_ANYOF_TRIE(op) ? (U8)ANYOF_BITMAP(o) : (U8)TRIE_BITMAP(trie);
	8409	Perl_sv_catpvf(aTHX_ sv, "[");
	8410	for (i = 0; i <= 256; i++) {
	8411	if (i < 256 && BITMAP_TEST(bitmap,i)) {
	8412	if (rangestart == -1)
	8413	rangestart = i;
	8414	} else if (rangestart != -1) {
	8415	if (i <= rangestart + 3)
	8416	for (; rangestart < i; rangestart++)
	8417	put_byte(sv, rangestart);
	8418	else {
	8419	put_byte(sv, rangestart);
	8420	sv_catpvs(sv, "-");
	8421	put_byte(sv, i - 1);
	8422	}
	8423	rangestart = -1;
	8424	}
	8425	}
	8426	Perl_sv_catpvf(aTHX_ sv, "]");
	8427	}
	8428
	8429	} else if (k == CURLY) {
	8430	if (OP(o) == CURLYM \|\| OP(o) == CURLYN \|\| OP(o) == CURLYX)
	8431	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
	8432	Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
	8433	}
	8434	else if (k == WHILEM && o->flags) /* Ordinal/of */
	8435	Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
	8436	else if (k == REF \|\| k == OPEN \|\| k == CLOSE \|\| k == GROUPP \|\| OP(o)==ACCEPT) {
	8437	Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */
	8438	if ( prog->paren_names ) {
	8439	if ( k != REF \|\| OP(o) < NREF) {
	8440	AV list= (AV )progi->data->data[progi->name_list_idx];
	8441	SV **name= av_fetch(list, ARG(o), 0 );
	8442	if (name)
	8443	Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
	8444	}
	8445	else {
	8446	AV list= (AV )progi->data->data[ progi->name_list_idx ];
	8447	SV sv_dat=(SV)progi->data->data[ ARG( o ) ];
	8448	I32 nums=(I32)SvPVX(sv_dat);
	8449	SV **name= av_fetch(list, nums[0], 0 );
	8450	I32 n;
	8451	if (name) {
	8452	for ( n=0; n<SvIVX(sv_dat); n++ ) {
	8453	Perl_sv_catpvf(aTHX_ sv, "%s%"IVdf,
	8454	(n ? "," : ""), (IV)nums[n]);
	8455	}
	8456	Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
	8457	}
	8458	}
	8459	}
	8460	} else if (k == GOSUB)
	8461	Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); /* Paren and offset */
	8462	else if (k == VERB) {
	8463	if (!o->flags)
	8464	Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
	8465	SVfARG((SV*)progi->data->data[ ARG( o ) ]));
	8466	} else if (k == LOGICAL)
	8467	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
	8468	else if (k == ANYOF) {
	8469	int i, rangestart = -1;
	8470	const U8 flags = ANYOF_FLAGS(o);
	8471
	8472	/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
	8473	static const char * const anyofs[] = {
	8474	"\\w",
	8475	"\\W",
	8476	"\\s",
	8477	"\\S",
	8478	"\\d",
	8479	"\\D",
	8480	"[:alnum:]",
	8481	"[:^alnum:]",
	8482	"[:alpha:]",
	8483	"[:^alpha:]",
	8484	"[:ascii:]",
	8485	"[:^ascii:]",
	8486	"[:ctrl:]",
	8487	"[:^ctrl:]",
	8488	"[:graph:]",
	8489	"[:^graph:]",
	8490	"[:lower:]",
	8491	"[:^lower:]",
	8492	"[:print:]",
	8493	"[:^print:]",
	8494	"[:punct:]",
	8495	"[:^punct:]",
	8496	"[:upper:]",
	8497	"[:^upper:]",
	8498	"[:xdigit:]",
	8499	"[:^xdigit:]",
	8500	"[:space:]",
	8501	"[:^space:]",
	8502	"[:blank:]",
	8503	"[:^blank:]"
	8504	};
	8505
	8506	if (flags & ANYOF_LOCALE)
	8507	sv_catpvs(sv, "{loc}");
	8508	if (flags & ANYOF_FOLD)
	8509	sv_catpvs(sv, "{i}");
	8510	Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
	8511	if (flags & ANYOF_INVERT)
	8512	sv_catpvs(sv, "^");
	8513	for (i = 0; i <= 256; i++) {
	8514	if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
	8515	if (rangestart == -1)
	8516	rangestart = i;
	8517	} else if (rangestart != -1) {
	8518	if (i <= rangestart + 3)
	8519	for (; rangestart < i; rangestart++)
	8520	put_byte(sv, rangestart);
	8521	else {
	8522	put_byte(sv, rangestart);
	8523	sv_catpvs(sv, "-");
	8524	put_byte(sv, i - 1);
	8525	}
	8526	rangestart = -1;
	8527	}
	8528	}
	8529
	8530	if (o->flags & ANYOF_CLASS)
	8531	for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
	8532	if (ANYOF_CLASS_TEST(o,i))
	8533	sv_catpv(sv, anyofs[i]);
	8534
	8535	if (flags & ANYOF_UNICODE)
	8536	sv_catpvs(sv, "{unicode}");
	8537	else if (flags & ANYOF_UNICODE_ALL)
	8538	sv_catpvs(sv, "{unicode_all}");
	8539
	8540	{
	8541	SV *lv;
	8542	SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
	8543
	8544	if (lv) {
	8545	if (sw) {
	8546	U8 s[UTF8_MAXBYTES_CASE+1];
	8547
	8548	for (i = 0; i <= 256; i++) { /* just the first 256 */
	8549	uvchr_to_utf8(s, i);
	8550
	8551	if (i < 256 && swash_fetch(sw, s, TRUE)) {
	8552	if (rangestart == -1)
	8553	rangestart = i;
	8554	} else if (rangestart != -1) {
	8555	if (i <= rangestart + 3)
	8556	for (; rangestart < i; rangestart++) {
	8557	const U8 * const e = uvchr_to_utf8(s,rangestart);
	8558	U8 *p;
	8559	for(p = s; p < e; p++)
	8560	put_byte(sv, *p);
	8561	}
	8562	else {
	8563	const U8 *e = uvchr_to_utf8(s,rangestart);
	8564	U8 *p;
	8565	for (p = s; p < e; p++)
	8566	put_byte(sv, *p);
	8567	sv_catpvs(sv, "-");
	8568	e = uvchr_to_utf8(s, i-1);
	8569	for (p = s; p < e; p++)
	8570	put_byte(sv, *p);
	8571	}
	8572	rangestart = -1;
	8573	}
	8574	}
	8575
	8576	sv_catpvs(sv, "..."); /* et cetera */
	8577	}
	8578
	8579	{
	8580	char *s = savesvpv(lv);
	8581	char * const origs = s;
	8582
	8583	while (s && s != '\n')
	8584	s++;
	8585
	8586	if (*s == '\n') {
	8587	const char * const t = ++s;
	8588
	8589	while (*s) {
	8590	if (*s == '\n')
	8591	*s = ' ';
	8592	s++;
	8593	}
	8594	if (s[-1] == ' ')
	8595	s[-1] = 0;
	8596
	8597	sv_catpv(sv, t);
	8598	}
	8599
	8600	Safefree(origs);
	8601	}
	8602	}
	8603	}
	8604
	8605	Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
	8606	}
	8607	else if (k == BRANCHJ && (OP(o) == UNLESSM \|\| OP(o) == IFMATCH))
	8608	Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
	8609	#else
	8610	PERL_UNUSED_CONTEXT;
	8611	PERL_UNUSED_ARG(sv);
	8612	PERL_UNUSED_ARG(o);
	8613	PERL_UNUSED_ARG(prog);
	8614	#endif /* DEBUGGING */
	8615	}
	8616
	8617	SV *
	8618	Perl_re_intuit_string(pTHX_ regexp *prog)
	8619	{ /* Assume that RE_INTUIT is set */
	8620	dVAR;
	8621	GET_RE_DEBUG_FLAGS_DECL;
	8622	PERL_UNUSED_CONTEXT;
	8623
	8624	DEBUG_COMPILE_r(
	8625	{
	8626	const char * const s = SvPV_nolen_const(prog->check_substr
	8627	? prog->check_substr : prog->check_utf8);
	8628
	8629	if (!PL_colorset) reginitcolors();
	8630	PerlIO_printf(Perl_debug_log,
	8631	"%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
	8632	PL_colors[4],
	8633	prog->check_substr ? "" : "utf8 ",
	8634	PL_colors[5],PL_colors[0],
	8635	s,
	8636	PL_colors[1],
	8637	(strlen(s) > 60 ? "..." : ""));
	8638	} );
	8639
	8640	return prog->check_substr ? prog->check_substr : prog->check_utf8;
	8641	}
	8642
	8643	/*
	8644	pregfree()
	8645
	8646	handles refcounting and freeing the perl core regexp structure. When
	8647	it is necessary to actually free the structure the first thing it
	8648	does is call the 'free' method of the regexp_engine associated to to
	8649	the regexp, allowing the handling of the void *pprivate; member
	8650	first. (This routine is not overridable by extensions, which is why
	8651	the extensions free is called first.)
	8652
	8653	See regdupe and regdupe_internal if you change anything here.
	8654	*/
	8655	#ifndef PERL_IN_XSUB_RE
	8656	void
	8657	Perl_pregfree(pTHX_ struct regexp *r)
	8658	{
	8659	dVAR;
	8660	GET_RE_DEBUG_FLAGS_DECL;
	8661
	8662	if (!r \|\| (--r->refcnt > 0))
	8663	return;
	8664	if (r->mother_re) {
	8665	ReREFCNT_dec(r->mother_re);
	8666	} else {
	8667	CALLREGFREE_PVT(r); /* free the private data */
	8668	if (r->paren_names)
	8669	SvREFCNT_dec(r->paren_names);
	8670	Safefree(r->wrapped);
	8671	}
	8672	if (r->substrs) {
	8673	if (r->anchored_substr)
	8674	SvREFCNT_dec(r->anchored_substr);
	8675	if (r->anchored_utf8)
	8676	SvREFCNT_dec(r->anchored_utf8);
	8677	if (r->float_substr)
	8678	SvREFCNT_dec(r->float_substr);
	8679	if (r->float_utf8)
	8680	SvREFCNT_dec(r->float_utf8);
	8681	Safefree(r->substrs);
	8682	}
	8683	RX_MATCH_COPY_FREE(r);
	8684	#ifdef PERL_OLD_COPY_ON_WRITE
	8685	if (r->saved_copy)
	8686	SvREFCNT_dec(r->saved_copy);
	8687	#endif
	8688	Safefree(r->swap);
	8689	Safefree(r->offs);
	8690	Safefree(r);
	8691	}
	8692
	8693	/* reg_temp_copy()
	8694
	8695	This is a hacky workaround to the structural issue of match results
	8696	being stored in the regexp structure which is in turn stored in
	8697	PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
	8698	could be PL_curpm in multiple contexts, and could require multiple
	8699	result sets being associated with the pattern simultaneously, such
	8700	as when doing a recursive match with (??{$qr})
	8701
	8702	The solution is to make a lightweight copy of the regexp structure
	8703	when a qr// is returned from the code executed by (??{$qr}) this
	8704	lightweight copy doesnt actually own any of its data except for
	8705	the starp/end and the actual regexp structure itself.
	8706
	8707	*/
	8708
	8709
	8710	regexp *
	8711	Perl_reg_temp_copy (pTHX_ struct regexp *r) {
	8712	regexp *ret;
	8713	register const I32 npar = r->nparens+1;
	8714	(void)ReREFCNT_inc(r);
	8715	Newx(ret, 1, regexp);
	8716	StructCopy(r, ret, regexp);
	8717	Newx(ret->offs, npar, regexp_paren_pair);
	8718	Copy(r->offs, ret->offs, npar, regexp_paren_pair);
	8719	ret->refcnt = 1;
	8720	if (r->substrs) {
	8721	Newx(ret->substrs, 1, struct reg_substr_data);
	8722	StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
	8723
	8724	SvREFCNT_inc_void(ret->anchored_substr);
	8725	SvREFCNT_inc_void(ret->anchored_utf8);
	8726	SvREFCNT_inc_void(ret->float_substr);
	8727	SvREFCNT_inc_void(ret->float_utf8);
	8728
	8729	/* check_substr and check_utf8, if non-NULL, point to either their
	8730	anchored or float namesakes, and don't hold a second reference. */
	8731	}
	8732	RX_MATCH_COPIED_off(ret);
	8733	#ifdef PERL_OLD_COPY_ON_WRITE
	8734	/* this is broken. */
	8735	assert(0);
	8736	if (ret->saved_copy)
	8737	ret->saved_copy=NULL;
	8738	#endif
	8739	ret->mother_re = r;
	8740	ret->swap = NULL;
	8741
	8742	return ret;
	8743	}
	8744	#endif
	8745
	8746	/* regfree_internal()
	8747
	8748	Free the private data in a regexp. This is overloadable by
	8749	extensions. Perl takes care of the regexp structure in pregfree(),
	8750	this covers the *pprivate pointer which technically perldoesnt
	8751	know about, however of course we have to handle the
	8752	regexp_internal structure when no extension is in use.
	8753
	8754	Note this is called before freeing anything in the regexp
	8755	structure.
	8756	*/
	8757
	8758	void
	8759	Perl_regfree_internal(pTHX_ struct regexp *r)
	8760	{
	8761	dVAR;
	8762	RXi_GET_DECL(r,ri);
	8763	GET_RE_DEBUG_FLAGS_DECL;
	8764
	8765	DEBUG_COMPILE_r({
	8766	if (!PL_colorset)
	8767	reginitcolors();
	8768	{
	8769	SV *dsv= sv_newmortal();
	8770	RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8),
	8771	dsv, r->precomp, r->prelen, 60);
	8772	PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
	8773	PL_colors[4],PL_colors[5],s);
	8774	}
	8775	});
	8776	#ifdef RE_TRACK_PATTERN_OFFSETS
	8777	if (ri->u.offsets)
	8778	Safefree(ri->u.offsets); /* 20010421 MJD */
	8779	#endif
	8780	if (ri->data) {
	8781	int n = ri->data->count;
	8782	PAD* new_comppad = NULL;
	8783	PAD* old_comppad;
	8784	PADOFFSET refcnt;
	8785
	8786	while (--n >= 0) {
	8787	/* If you add a ->what type here, update the comment in regcomp.h */
	8788	switch (ri->data->what[n]) {
	8789	case 's':
	8790	case 'S':
	8791	case 'u':
	8792	SvREFCNT_dec((SV*)ri->data->data[n]);
	8793	break;
	8794	case 'f':
	8795	Safefree(ri->data->data[n]);
	8796	break;
	8797	case 'p':
	8798	new_comppad = (AV*)ri->data->data[n];
	8799	break;
	8800	case 'o':
	8801	if (new_comppad == NULL)
	8802	Perl_croak(aTHX_ "panic: pregfree comppad");
	8803	PAD_SAVE_LOCAL(old_comppad,
	8804	/* Watch out for global destruction's random ordering. */
	8805	(SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL
	8806	);
	8807	OP_REFCNT_LOCK;
	8808	refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]);
	8809	OP_REFCNT_UNLOCK;
	8810	if (!refcnt)
	8811	op_free((OP_4tree*)ri->data->data[n]);
	8812
	8813	PAD_RESTORE_LOCAL(old_comppad);
	8814	SvREFCNT_dec((SV*)new_comppad);
	8815	new_comppad = NULL;
	8816	break;
	8817	case 'n':
	8818	break;
	8819	case 'T':
	8820	{ /* Aho Corasick add-on structure for a trie node.
	8821	Used in stclass optimization only */
	8822	U32 refcount;
	8823	reg_ac_data aho=(reg_ac_data)ri->data->data[n];
	8824	OP_REFCNT_LOCK;
	8825	refcount = --aho->refcount;
	8826	OP_REFCNT_UNLOCK;
	8827	if ( !refcount ) {
	8828	PerlMemShared_free(aho->states);
	8829	PerlMemShared_free(aho->fail);
	8830	/* do this last!!!! */
	8831	PerlMemShared_free(ri->data->data[n]);
	8832	PerlMemShared_free(ri->regstclass);
	8833	}
	8834	}
	8835	break;
	8836	case 't':
	8837	{
	8838	/* trie structure. */
	8839	U32 refcount;
	8840	reg_trie_data trie=(reg_trie_data)ri->data->data[n];
	8841	OP_REFCNT_LOCK;
	8842	refcount = --trie->refcount;
	8843	OP_REFCNT_UNLOCK;
	8844	if ( !refcount ) {
	8845	PerlMemShared_free(trie->charmap);
	8846	PerlMemShared_free(trie->states);
	8847	PerlMemShared_free(trie->trans);
	8848	if (trie->bitmap)
	8849	PerlMemShared_free(trie->bitmap);
	8850	if (trie->wordlen)
	8851	PerlMemShared_free(trie->wordlen);
	8852	if (trie->jump)
	8853	PerlMemShared_free(trie->jump);
	8854	if (trie->nextword)
	8855	PerlMemShared_free(trie->nextword);
	8856	/* do this last!!!! */
	8857	PerlMemShared_free(ri->data->data[n]);
	8858	}
	8859	}
	8860	break;
	8861	default:
	8862	Perl_croak(aTHX_ "panic: regfree data code '%c'", ri->data->what[n]);
	8863	}
	8864	}
	8865	Safefree(ri->data->what);
	8866	Safefree(ri->data);
	8867	}
	8868
	8869	Safefree(ri);
	8870	}
	8871
	8872	#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t))
	8873	#define av_dup_inc(s,t) (AV)SvREFCNT_inc(sv_dup((SV)s,t))
	8874	#define hv_dup_inc(s,t) (HV)SvREFCNT_inc(sv_dup((SV)s,t))
	8875	#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
	8876
	8877	/*
	8878	re_dup - duplicate a regexp.
	8879
	8880	This routine is expected to clone a given regexp structure. It is not
	8881	compiler under USE_ITHREADS.
	8882
	8883	After all of the core data stored in struct regexp is duplicated
	8884	the regexp_engine.dupe method is used to copy any private data
	8885	stored in the *pprivate pointer. This allows extensions to handle
	8886	any duplication it needs to do.
	8887
	8888	See pregfree() and regfree_internal() if you change anything here.
	8889	*/
	8890	#if defined(USE_ITHREADS)
	8891	#ifndef PERL_IN_XSUB_RE
	8892	regexp *
	8893	Perl_re_dup(pTHX_ const regexp r, CLONE_PARAMS param)
	8894	{
	8895	dVAR;
	8896	regexp *ret;
	8897	I32 npar;
	8898
	8899	if (!r)
	8900	return (REGEXP *)NULL;
	8901
	8902	if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r)))
	8903	return ret;
	8904
	8905
	8906	npar = r->nparens+1;
	8907	Newx(ret, 1, regexp);
	8908	StructCopy(r, ret, regexp);
	8909	Newx(ret->offs, npar, regexp_paren_pair);
	8910	Copy(r->offs, ret->offs, npar, regexp_paren_pair);
	8911	if(ret->swap) {
	8912	/* no need to copy these */
	8913	Newx(ret->swap, npar, regexp_paren_pair);
	8914	}
	8915
	8916	if (ret->substrs) {
	8917	/* Do it this way to avoid reading from *r after the StructCopy().
	8918	That way, if any of the sv_dup_inc()s dislodge *r from the L1
	8919	cache, it doesn't matter. */
	8920	const bool anchored = r->check_substr == r->anchored_substr;
	8921	Newx(ret->substrs, 1, struct reg_substr_data);
	8922	StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
	8923
	8924	ret->anchored_substr = sv_dup_inc(ret->anchored_substr, param);
	8925	ret->anchored_utf8 = sv_dup_inc(ret->anchored_utf8, param);
	8926	ret->float_substr = sv_dup_inc(ret->float_substr, param);
	8927	ret->float_utf8 = sv_dup_inc(ret->float_utf8, param);
	8928
	8929	/* check_substr and check_utf8, if non-NULL, point to either their
	8930	anchored or float namesakes, and don't hold a second reference. */
	8931
	8932	if (ret->check_substr) {
	8933	if (anchored) {
	8934	assert(r->check_utf8 == r->anchored_utf8);
	8935	ret->check_substr = ret->anchored_substr;
	8936	ret->check_utf8 = ret->anchored_utf8;
	8937	} else {
	8938	assert(r->check_substr == r->float_substr);
	8939	assert(r->check_utf8 == r->float_utf8);
	8940	ret->check_substr = ret->float_substr;
	8941	ret->check_utf8 = ret->float_utf8;
	8942	}
	8943	}
	8944	}
	8945
	8946	ret->wrapped = SAVEPVN(ret->wrapped, ret->wraplen+1);
	8947	ret->precomp = ret->wrapped + (ret->precomp - ret->wrapped);
	8948	ret->paren_names = hv_dup_inc(ret->paren_names, param);
	8949
	8950	if (ret->pprivate)
	8951	RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
	8952
	8953	if (RX_MATCH_COPIED(ret))
	8954	ret->subbeg = SAVEPVN(ret->subbeg, ret->sublen);
	8955	else
	8956	ret->subbeg = NULL;
	8957	#ifdef PERL_OLD_COPY_ON_WRITE
	8958	ret->saved_copy = NULL;
	8959	#endif
	8960
	8961	ret->mother_re = NULL;
	8962	ret->gofs = 0;
	8963	ret->seen_evals = 0;
	8964
	8965	ptr_table_store(PL_ptr_table, r, ret);
	8966	return ret;
	8967	}
	8968	#endif /* PERL_IN_XSUB_RE */
	8969
	8970	/*
	8971	regdupe_internal()
	8972
	8973	This is the internal complement to regdupe() which is used to copy
	8974	the structure pointed to by the *pprivate pointer in the regexp.
	8975	This is the core version of the extension overridable cloning hook.
	8976	The regexp structure being duplicated will be copied by perl prior
	8977	to this and will be provided as the regexp *r argument, however
	8978	with the /old/ structures pprivate pointer value. Thus this routine
	8979	may override any copying normally done by perl.
	8980
	8981	It returns a pointer to the new regexp_internal structure.
	8982	*/
	8983
	8984	void *
	8985	Perl_regdupe_internal(pTHX_ const regexp r, CLONE_PARAMS param)
	8986	{
	8987	dVAR;
	8988	regexp_internal *reti;
	8989	int len, npar;
	8990	RXi_GET_DECL(r,ri);
	8991
	8992	npar = r->nparens+1;
	8993	len = ProgLen(ri);
	8994
	8995	Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal);
	8996	Copy(ri->program, reti->program, len+1, regnode);
	8997
	8998
	8999	reti->regstclass = NULL;
	9000
	9001	if (ri->data) {
	9002	struct reg_data *d;
	9003	const int count = ri->data->count;
	9004	int i;
	9005
	9006	Newxc(d, sizeof(struct reg_data) + countsizeof(void ),
	9007	char, struct reg_data);
	9008	Newx(d->what, count, U8);
	9009
	9010	d->count = count;
	9011	for (i = 0; i < count; i++) {
	9012	d->what[i] = ri->data->what[i];
	9013	switch (d->what[i]) {
	9014	/* legal options are one of: sSfpontTu
	9015	see also regcomp.h and pregfree() */
	9016	case 's':
	9017	case 'S':
	9018	case 'p': /* actually an AV, but the dup function is identical. */
	9019	case 'u': /* actually an HV, but the dup function is identical. */
	9020	d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
	9021	break;
	9022	case 'f':
	9023	/* This is cheating. */
	9024	Newx(d->data[i], 1, struct regnode_charclass_class);
	9025	StructCopy(ri->data->data[i], d->data[i],
	9026	struct regnode_charclass_class);
	9027	reti->regstclass = (regnode*)d->data[i];
	9028	break;
	9029	case 'o':
	9030	/* Compiled op trees are readonly and in shared memory,
	9031	and can thus be shared without duplication. */
	9032	OP_REFCNT_LOCK;
	9033	d->data[i] = (void)OpREFCNT_inc((OP)ri->data->data[i]);
	9034	OP_REFCNT_UNLOCK;
	9035	break;
	9036	case 'T':
	9037	/* Trie stclasses are readonly and can thus be shared
	9038	* without duplication. We free the stclass in pregfree
	9039	* when the corresponding reg_ac_data struct is freed.
	9040	*/
	9041	reti->regstclass= ri->regstclass;
	9042	/* Fall through */
	9043	case 't':
	9044	OP_REFCNT_LOCK;
	9045	((reg_trie_data*)ri->data->data[i])->refcount++;
	9046	OP_REFCNT_UNLOCK;
	9047	/* Fall through */
	9048	case 'n':
	9049	d->data[i] = ri->data->data[i];
	9050	break;
	9051	default:
	9052	Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", ri->data->what[i]);
	9053	}
	9054	}
	9055
	9056	reti->data = d;
	9057	}
	9058	else
	9059	reti->data = NULL;
	9060
	9061	reti->name_list_idx = ri->name_list_idx;
	9062
	9063	#ifdef RE_TRACK_PATTERN_OFFSETS
	9064	if (ri->u.offsets) {
	9065	Newx(reti->u.offsets, 2*len+1, U32);
	9066	Copy(ri->u.offsets, reti->u.offsets, 2*len+1, U32);
	9067	}
	9068	#else
	9069	SetProgLen(reti,len);
	9070	#endif
	9071
	9072	return (void*)reti;
	9073	}
	9074
	9075	#endif /* USE_ITHREADS */
	9076
	9077	/*
	9078	reg_stringify()
	9079
	9080	converts a regexp embedded in a MAGIC struct to its stringified form,
	9081	caching the converted form in the struct and returns the cached
	9082	string.
	9083
	9084	If lp is nonnull then it is used to return the length of the
	9085	resulting string
	9086
	9087	If flags is nonnull and the returned string contains UTF8 then
	9088	(*flags & 1) will be true.
	9089
	9090	If haseval is nonnull then it is used to return whether the pattern
	9091	contains evals.
	9092
	9093	Normally called via macro:
	9094
	9095	CALLREG_STRINGIFY(mg,&len,&utf8);
	9096
	9097	And internally with
	9098
	9099	CALLREG_AS_STR(mg,&lp,&flags,&haseval)
	9100
	9101	See sv_2pv_flags() in sv.c for an example of internal usage.
	9102
	9103	*/
	9104	#ifndef PERL_IN_XSUB_RE
	9105
	9106	char *
	9107	Perl_reg_stringify(pTHX_ MAGIC mg, STRLEN lp, U32 flags, I32 haseval ) {
	9108	dVAR;
	9109	const regexp * const re = (regexp *)mg->mg_obj;
	9110	if (haseval)
	9111	*haseval = re->seen_evals;
	9112	if (flags)
	9113	*flags = ((re->extflags & RXf_UTF8) ? 1 : 0);
	9114	if (lp)
	9115	*lp = re->wraplen;
	9116	return re->wrapped;
	9117	}
	9118
	9119	/*
	9120	- regnext - dig the "next" pointer out of a node
	9121	*/
	9122	regnode *
	9123	Perl_regnext(pTHX_ register regnode *p)
	9124	{
	9125	dVAR;
	9126	register I32 offset;
	9127
	9128	if (!p)
	9129	return(NULL);
	9130
	9131	offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
	9132	if (offset == 0)
	9133	return(NULL);
	9134
	9135	return(p+offset);
	9136	}
	9137	#endif
	9138
	9139	STATIC void
	9140	S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
	9141	{
	9142	va_list args;
	9143	STRLEN l1 = strlen(pat1);
	9144	STRLEN l2 = strlen(pat2);
	9145	char buf[512];
	9146	SV *msv;
	9147	const char *message;
	9148
	9149	if (l1 > 510)
	9150	l1 = 510;
	9151	if (l1 + l2 > 510)
	9152	l2 = 510 - l1;
	9153	Copy(pat1, buf, l1 , char);
	9154	Copy(pat2, buf + l1, l2 , char);
	9155	buf[l1 + l2] = '\n';
	9156	buf[l1 + l2 + 1] = '\0';
	9157	#ifdef I_STDARG
	9158	/* ANSI variant takes additional second argument */
	9159	va_start(args, pat2);
	9160	#else
	9161	va_start(args);
	9162	#endif
	9163	msv = vmess(buf, &args);
	9164	va_end(args);
	9165	message = SvPV_const(msv,l1);
	9166	if (l1 > 512)
	9167	l1 = 512;
	9168	Copy(message, buf, l1 , char);
	9169	buf[l1-1] = '\0'; /* Overwrite \n */
	9170	Perl_croak(aTHX_ "%s", buf);
	9171	}
	9172
	9173	/* XXX Here's a total kludge. But we need to re-enter for swash routines. */
	9174
	9175	#ifndef PERL_IN_XSUB_RE
	9176	void
	9177	Perl_save_re_context(pTHX)
	9178	{
	9179	dVAR;
	9180
	9181	struct re_save_state *state;
	9182
	9183	SAVEVPTR(PL_curcop);
	9184	SSGROW(SAVESTACK_ALLOC_FOR_RE_SAVE_STATE + 1);
	9185
	9186	state = (struct re_save_state *)(PL_savestack + PL_savestack_ix);
	9187	PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
	9188	SSPUSHINT(SAVEt_RE_STATE);
	9189
	9190	Copy(&PL_reg_state, state, 1, struct re_save_state);
	9191
	9192	PL_reg_start_tmp = 0;
	9193	PL_reg_start_tmpl = 0;
	9194	PL_reg_oldsaved = NULL;
	9195	PL_reg_oldsavedlen = 0;
	9196	PL_reg_maxiter = 0;
	9197	PL_reg_leftiter = 0;
	9198	PL_reg_poscache = NULL;
	9199	PL_reg_poscache_size = 0;
	9200	#ifdef PERL_OLD_COPY_ON_WRITE
	9201	PL_nrs = NULL;
	9202	#endif
	9203
	9204	/* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
	9205	if (PL_curpm) {
	9206	const REGEXP * const rx = PM_GETRE(PL_curpm);
	9207	if (rx) {
	9208	U32 i;
	9209	for (i = 1; i <= rx->nparens; i++) {
	9210	char digits[TYPE_CHARS(long)];
	9211	const STRLEN len = my_snprintf(digits, sizeof(digits), "%lu", (long)i);
	9212	GV const const gvp
	9213	= (GV**)hv_fetch(PL_defstash, digits, len, 0);
	9214
	9215	if (gvp) {
	9216	GV * const gv = *gvp;
	9217	if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
	9218	save_scalar(gv);
	9219	}
	9220	}
	9221	}
	9222	}
	9223	}
	9224	#endif
	9225
	9226	static void
	9227	clear_re(pTHX_ void *r)
	9228	{
	9229	dVAR;
	9230	ReREFCNT_dec((regexp *)r);
	9231	}
	9232
	9233	#ifdef DEBUGGING
	9234
	9235	STATIC void
	9236	S_put_byte(pTHX_ SV *sv, int c)
	9237	{
	9238	if (isCNTRL(c) \|\| c == 255 \|\| !isPRINT(c))
	9239	Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
	9240	else if (c == '-' \|\| c == ']' \|\| c == '\\' \|\| c == '^')
	9241	Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
	9242	else
	9243	Perl_sv_catpvf(aTHX_ sv, "%c", c);
	9244	}
	9245
	9246
	9247	#define CLEAR_OPTSTART \
	9248	if (optstart) STMT_START { \
	9249	DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log, " (%"IVdf" nodes)\n", (IV)(node - optstart))); \
	9250	optstart=NULL; \
	9251	} STMT_END
	9252
	9253	#define DUMPUNTIL(b,e) CLEAR_OPTSTART; node=dumpuntil(r,start,(b),(e),last,sv,indent+1,depth+1);
	9254
	9255	STATIC const regnode *
	9256	S_dumpuntil(pTHX_ const regexp r, const regnode start, const regnode *node,
	9257	const regnode last, const regnode plast,
	9258	SV* sv, I32 indent, U32 depth)
	9259	{
	9260	dVAR;
	9261	register U8 op = PSEUDO; /* Arbitrary non-END op. */
	9262	register const regnode *next;
	9263	const regnode *optstart= NULL;
	9264
	9265	RXi_GET_DECL(r,ri);
	9266	GET_RE_DEBUG_FLAGS_DECL;
	9267
	9268	#ifdef DEBUG_DUMPUNTIL
	9269	PerlIO_printf(Perl_debug_log, "--- %d : %d - %d - %d\n",indent,node-start,
	9270	last ? last-start : 0,plast ? plast-start : 0);
	9271	#endif
	9272
	9273	if (plast && plast < last)
	9274	last= plast;
	9275
	9276	while (PL_regkind[op] != END && (!last \|\| node < last)) {
	9277	/* While that wasn't END last time... */
	9278	NODE_ALIGN(node);
	9279	op = OP(node);
	9280	if (op == CLOSE \|\| op == WHILEM)
	9281	indent--;
	9282	next = regnext((regnode *)node);
	9283
	9284	/* Where, what. */
	9285	if (OP(node) == OPTIMIZED) {
	9286	if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
	9287	optstart = node;
	9288	else
	9289	goto after_print;
	9290	} else
	9291	CLEAR_OPTSTART;
	9292
	9293	regprop(r, sv, node);
	9294	PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
	9295	(int)(2*indent + 1), "", SvPVX_const(sv));
	9296
	9297	if (OP(node) != OPTIMIZED) {
	9298	if (next == NULL) /* Next ptr. */
	9299	PerlIO_printf(Perl_debug_log, " (0)");
	9300	else if (PL_regkind[(U8)op] == BRANCH && PL_regkind[OP(next)] != BRANCH )
	9301	PerlIO_printf(Perl_debug_log, " (FAIL)");
	9302	else
	9303	PerlIO_printf(Perl_debug_log, " (%"IVdf")", (IV)(next - start));
	9304	(void)PerlIO_putc(Perl_debug_log, '\n');
	9305	}
	9306
	9307	after_print:
	9308	if (PL_regkind[(U8)op] == BRANCHJ) {
	9309	assert(next);
	9310	{
	9311	register const regnode *nnode = (OP(next) == LONGJMP
	9312	? regnext((regnode *)next)
	9313	: next);
	9314	if (last && nnode > last)
	9315	nnode = last;
	9316	DUMPUNTIL(NEXTOPER(NEXTOPER(node)), nnode);
	9317	}
	9318	}
	9319	else if (PL_regkind[(U8)op] == BRANCH) {
	9320	assert(next);
	9321	DUMPUNTIL(NEXTOPER(node), next);
	9322	}
	9323	else if ( PL_regkind[(U8)op] == TRIE ) {
	9324	const regnode *this_trie = node;
	9325	const char op = OP(node);
	9326	const U32 n = ARG(node);
	9327	const reg_ac_data * const ac = op>=AHOCORASICK ?
	9328	(reg_ac_data *)ri->data->data[n] :
	9329	NULL;
	9330	const reg_trie_data * const trie =
	9331	(reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
	9332	#ifdef DEBUGGING
	9333	AV const trie_words = (AV ) ri->data->data[n + TRIE_WORDS_OFFSET];
	9334	#endif
	9335	const regnode *nextbranch= NULL;
	9336	I32 word_idx;
	9337	sv_setpvn(sv, "", 0);
	9338	for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
	9339	SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
	9340
	9341	PerlIO_printf(Perl_debug_log, "%*s%s ",
	9342	(int)(2*(indent+3)), "",
	9343	elem_ptr ? pv_pretty(sv, SvPV_nolen_const(elem_ptr), SvCUR(elem_ptr), 60,
	9344	PL_colors[0], PL_colors[1],
	9345	(SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) \|
	9346	PERL_PV_PRETTY_ELIPSES \|
	9347	PERL_PV_PRETTY_LTGT
	9348	)
	9349	: "???"
	9350	);
	9351	if (trie->jump) {
	9352	U16 dist= trie->jump[word_idx+1];
	9353	PerlIO_printf(Perl_debug_log, "(%"UVuf")\n",
	9354	(UV)((dist ? this_trie + dist : next) - start));
	9355	if (dist) {
	9356	if (!nextbranch)
	9357	nextbranch= this_trie + trie->jump[0];
	9358	DUMPUNTIL(this_trie + dist, nextbranch);
	9359	}
	9360	if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
	9361	nextbranch= regnext((regnode *)nextbranch);
	9362	} else {
	9363	PerlIO_printf(Perl_debug_log, "\n");
	9364	}
	9365	}
	9366	if (last && next > last)
	9367	node= last;
	9368	else
	9369	node= next;
	9370	}
	9371	else if ( op == CURLY ) { /* "next" might be very big: optimizer */
	9372	DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS,
	9373	NEXTOPER(node) + EXTRA_STEP_2ARGS + 1);
	9374	}
	9375	else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
	9376	assert(next);
	9377	DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, next);
	9378	}
	9379	else if ( op == PLUS \|\| op == STAR) {
	9380	DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
	9381	}
	9382	else if (op == ANYOF) {
	9383	/* arglen 1 + class block */
	9384	node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE)
	9385	? ANYOF_CLASS_SKIP : ANYOF_SKIP);
	9386	node = NEXTOPER(node);
	9387	}
	9388	else if (PL_regkind[(U8)op] == EXACT) {
	9389	/* Literal string, where present. */
	9390	node += NODE_SZ_STR(node) - 1;
	9391	node = NEXTOPER(node);
	9392	}
	9393	else {
	9394	node = NEXTOPER(node);
	9395	node += regarglen[(U8)op];
	9396	}
	9397	if (op == CURLYX \|\| op == OPEN)
	9398	indent++;
	9399	}
	9400	CLEAR_OPTSTART;
	9401	#ifdef DEBUG_DUMPUNTIL
	9402	PerlIO_printf(Perl_debug_log, "--- %d\n", (int)indent);
	9403	#endif
	9404	return node;
	9405	}
	9406
	9407	#endif /* DEBUGGING */
	9408
	9409	/*
	9410	* Local variables:
	9411	* c-indentation-style: bsd
	9412	* c-basic-offset: 4
	9413	* indent-tabs-mode: t
	9414	* End:
	9415	*
	9416	* ex: set ts=8 sts=4 sw=4 noet:
	9417	*/