perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* regcomp.c
	2	*/
	3
	4	/*
	5	* "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
	6	*/
	7
	8	/* This file contains functions for compiling a regular expression. See
	9	* also regexec.c which funnily enough, contains functions for executing
	10	* a regular expression.
	11	*
	12	* This file is also copied at build time to ext/re/re_comp.c, where
	13	* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
	14	* This causes the main functions to be compiled under new names and with
	15	* debugging support added, which makes "use re 'debug'" work.
	16	*/
	17
	18	/* NOTE: this is derived from Henry Spencer's regexp code, and should not
	19	* confused with the original package (see point 3 below). Thanks, Henry!
	20	*/
	21
	22	/* Additional note: this code is very heavily munged from Henry's version
	23	* in places. In some spots I've traded clarity for efficiency, so don't
	24	* blame Henry for some of the lack of readability.
	25	*/
	26
	27	/* The names of the functions have been changed from regcomp and
	28	* regexec to pregcomp and pregexec in order to avoid conflicts
	29	* with the POSIX routines of the same names.
	30	*/
	31
	32	#ifdef PERL_EXT_RE_BUILD
	33	#include "re_top.h"
	34	#endif
	35
	36	/*
	37	* pregcomp and pregexec -- regsub and regerror are not used in perl
	38	*
	39	* Copyright (c) 1986 by University of Toronto.
	40	* Written by Henry Spencer. Not derived from licensed software.
	41	*
	42	* Permission is granted to anyone to use this software for any
	43	* purpose on any computer system, and to redistribute it freely,
	44	* subject to the following restrictions:
	45	*
	46	* 1. The author is not responsible for the consequences of use of
	47	* this software, no matter how awful, even if they arise
	48	* from defects in it.
	49	*
	50	* 2. The origin of this software must not be misrepresented, either
	51	* by explicit claim or by omission.
	52	*
	53	* 3. Altered versions must be plainly marked as such, and must not
	54	* be misrepresented as being the original software.
	55	*
	56	*
	57	**** Alterations to Henry's code are...
	58	****
	59	**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	60	**** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
	61	****
	62	**** You may distribute under the terms of either the GNU General Public
	63	**** License or the Artistic License, as specified in the README file.
	64
	65	*
	66	* Beware that some of this code is subtly aware of the way operator
	67	* precedence is structured in regular expressions. Serious changes in
	68	* regular-expression syntax might require a total rethink.
	69	*/
	70	#include "EXTERN.h"
	71	#define PERL_IN_REGCOMP_C
	72	#include "perl.h"
	73
	74	#ifndef PERL_IN_XSUB_RE
	75	# include "INTERN.h"
	76	#endif
	77
	78	#define REG_COMP_C
	79	#ifdef PERL_IN_XSUB_RE
	80	# include "re_comp.h"
	81	#else
	82	# include "regcomp.h"
	83	#endif
	84
	85	#ifdef op
	86	#undef op
	87	#endif /* op */
	88
	89	#ifdef MSDOS
	90	# if defined(BUGGY_MSC6)
	91	/* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
	92	# pragma optimize("a",off)
	93	/* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
	94	# pragma optimize("w",on )
	95	# endif /* BUGGY_MSC6 */
	96	#endif /* MSDOS */
	97
	98	#ifndef STATIC
	99	#define STATIC static
	100	#endif
	101
	102	typedef struct RExC_state_t {
	103	U32 flags; /* are we folding, multilining? */
	104	char precomp; / uncompiled string. */
	105	regexp *rx;
	106	char start; / Start of input for compile */
	107	char end; / End of input for compile */
	108	char parse; / Input-scan pointer. */
	109	I32 whilem_seen; /* number of WHILEM in this expr */
	110	regnode emit_start; / Start of emitted-code area */
	111	regnode emit; / Code-emit pointer; &regdummy = don't = compiling */
	112	I32 naughty; /* How bad is this pattern? */
	113	I32 sawback; /* Did we see \1, ...? */
	114	U32 seen;
	115	I32 size; /* Code size. */
	116	I32 npar; /* () count. */
	117	I32 extralen;
	118	I32 seen_zerolen;
	119	I32 seen_evals;
	120	I32 utf8;
	121	#if ADD_TO_REGEXEC
	122	char starttry; / -Dr: where regtry was called. */
	123	#define RExC_starttry (pRExC_state->starttry)
	124	#endif
	125	} RExC_state_t;
	126
	127	#define RExC_flags (pRExC_state->flags)
	128	#define RExC_precomp (pRExC_state->precomp)
	129	#define RExC_rx (pRExC_state->rx)
	130	#define RExC_start (pRExC_state->start)
	131	#define RExC_end (pRExC_state->end)
	132	#define RExC_parse (pRExC_state->parse)
	133	#define RExC_whilem_seen (pRExC_state->whilem_seen)
	134	#define RExC_offsets (pRExC_state->rx->offsets) /* I am not like the others */
	135	#define RExC_emit (pRExC_state->emit)
	136	#define RExC_emit_start (pRExC_state->emit_start)
	137	#define RExC_naughty (pRExC_state->naughty)
	138	#define RExC_sawback (pRExC_state->sawback)
	139	#define RExC_seen (pRExC_state->seen)
	140	#define RExC_size (pRExC_state->size)
	141	#define RExC_npar (pRExC_state->npar)
	142	#define RExC_extralen (pRExC_state->extralen)
	143	#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
	144	#define RExC_seen_evals (pRExC_state->seen_evals)
	145	#define RExC_utf8 (pRExC_state->utf8)
	146
	147	#define ISMULT1(c) ((c) == '*' \|\| (c) == '+' \|\| (c) == '?')
	148	#define ISMULT2(s) ((s) == '' \|\| (s) == '+' \|\| (s) == '?' \|\| \
	149	((*s) == '{' && regcurly(s)))
	150
	151	#ifdef SPSTART
	152	#undef SPSTART /* dratted cpp namespace... */
	153	#endif
	154	/*
	155	* Flags to be passed up and down.
	156	*/
	157	#define WORST 0 /* Worst case. */
	158	#define HASWIDTH 0x1 /* Known to match non-null strings. */
	159	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
	160	#define SPSTART 0x4 /* Starts with * or +. */
	161	#define TRYAGAIN 0x8 /* Weeded out a declaration. */
	162
	163	/* Length of a variant. */
	164
	165	typedef struct scan_data_t {
	166	I32 len_min;
	167	I32 len_delta;
	168	I32 pos_min;
	169	I32 pos_delta;
	170	SV *last_found;
	171	I32 last_end; /* min value, <0 unless valid. */
	172	I32 last_start_min;
	173	I32 last_start_max;
	174	SV *longest; / Either &l_fixed, or &l_float. */
	175	SV *longest_fixed;
	176	I32 offset_fixed;
	177	SV *longest_float;
	178	I32 offset_float_min;
	179	I32 offset_float_max;
	180	I32 flags;
	181	I32 whilem_c;
	182	I32 *last_closep;
	183	struct regnode_charclass_class *start_class;
	184	} scan_data_t;
	185
	186	/*
	187	* Forward declarations for pregcomp()'s friends.
	188	*/
	189
	190	static const scan_data_t zero_scan_data =
	191	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
	192
	193	#define SF_BEFORE_EOL (SF_BEFORE_SEOL\|SF_BEFORE_MEOL)
	194	#define SF_BEFORE_SEOL 0x1
	195	#define SF_BEFORE_MEOL 0x2
	196	#define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL\|SF_FIX_BEFORE_MEOL)
	197	#define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL\|SF_FL_BEFORE_MEOL)
	198
	199	#ifdef NO_UNARY_PLUS
	200	# define SF_FIX_SHIFT_EOL (0+2)
	201	# define SF_FL_SHIFT_EOL (0+4)
	202	#else
	203	# define SF_FIX_SHIFT_EOL (+2)
	204	# define SF_FL_SHIFT_EOL (+4)
	205	#endif
	206
	207	#define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
	208	#define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
	209
	210	#define SF_FL_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
	211	#define SF_FL_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
	212	#define SF_IS_INF 0x40
	213	#define SF_HAS_PAR 0x80
	214	#define SF_IN_PAR 0x100
	215	#define SF_HAS_EVAL 0x200
	216	#define SCF_DO_SUBSTR 0x400
	217	#define SCF_DO_STCLASS_AND 0x0800
	218	#define SCF_DO_STCLASS_OR 0x1000
	219	#define SCF_DO_STCLASS (SCF_DO_STCLASS_AND\|SCF_DO_STCLASS_OR)
	220	#define SCF_WHILEM_VISITED_POS 0x2000
	221
	222	#define UTF (RExC_utf8 != 0)
	223	#define LOC ((RExC_flags & PMf_LOCALE) != 0)
	224	#define FOLD ((RExC_flags & PMf_FOLD) != 0)
	225
	226	#define OOB_UNICODE 12345678
	227	#define OOB_NAMEDCLASS -1
	228
	229	#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
	230	#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
	231
	232
	233	/* length of regex to show in messages that don't mark a position within */
	234	#define RegexLengthToShowInErrorMessages 127
	235
	236	/*
	237	* If MARKER[12] are adjusted, be sure to adjust the constants at the top
	238	* of t/op/regmesg.t, the tests in t/op/re_tests, and those in
	239	* op/pragma/warn/regcomp.
	240	*/
	241	#define MARKER1 "<-- HERE" /* marker as it appears in the description */
	242	#define MARKER2 " <-- HERE " /* marker as it appears within the regex */
	243
	244	#define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/"
	245
	246	/*
	247	* Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
	248	* arg. Show regex, up to a maximum length. If it's too long, chop and add
	249	* "...".
	250	*/
	251	#define FAIL(msg) STMT_START { \
	252	const char *ellipses = ""; \
	253	IV len = RExC_end - RExC_precomp; \
	254	\
	255	if (!SIZE_ONLY) \
	256	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	257	if (len > RegexLengthToShowInErrorMessages) { \
	258	/* chop 10 shorter than the max, to ensure meaning of "..." */ \
	259	len = RegexLengthToShowInErrorMessages - 10; \
	260	ellipses = "..."; \
	261	} \
	262	Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \
	263	msg, (int)len, RExC_precomp, ellipses); \
	264	} STMT_END
	265
	266	/*
	267	* Simple_vFAIL -- like FAIL, but marks the current location in the scan
	268	*/
	269	#define Simple_vFAIL(m) STMT_START { \
	270	const IV offset = RExC_parse - RExC_precomp; \
	271	Perl_croak(aTHX_ "%s" REPORT_LOCATION, \
	272	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	273	} STMT_END
	274
	275	/*
	276	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
	277	*/
	278	#define vFAIL(m) STMT_START { \
	279	if (!SIZE_ONLY) \
	280	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	281	Simple_vFAIL(m); \
	282	} STMT_END
	283
	284	/*
	285	* Like Simple_vFAIL(), but accepts two arguments.
	286	*/
	287	#define Simple_vFAIL2(m,a1) STMT_START { \
	288	const IV offset = RExC_parse - RExC_precomp; \
	289	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, \
	290	(int)offset, RExC_precomp, RExC_precomp + offset); \
	291	} STMT_END
	292
	293	/*
	294	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
	295	*/
	296	#define vFAIL2(m,a1) STMT_START { \
	297	if (!SIZE_ONLY) \
	298	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	299	Simple_vFAIL2(m, a1); \
	300	} STMT_END
	301
	302
	303	/*
	304	* Like Simple_vFAIL(), but accepts three arguments.
	305	*/
	306	#define Simple_vFAIL3(m, a1, a2) STMT_START { \
	307	const IV offset = RExC_parse - RExC_precomp; \
	308	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, \
	309	(int)offset, RExC_precomp, RExC_precomp + offset); \
	310	} STMT_END
	311
	312	/*
	313	* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
	314	*/
	315	#define vFAIL3(m,a1,a2) STMT_START { \
	316	if (!SIZE_ONLY) \
	317	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \
	318	Simple_vFAIL3(m, a1, a2); \
	319	} STMT_END
	320
	321	/*
	322	* Like Simple_vFAIL(), but accepts four arguments.
	323	*/
	324	#define Simple_vFAIL4(m, a1, a2, a3) STMT_START { \
	325	const IV offset = RExC_parse - RExC_precomp; \
	326	S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, \
	327	(int)offset, RExC_precomp, RExC_precomp + offset); \
	328	} STMT_END
	329
	330	#define vWARN(loc,m) STMT_START { \
	331	const IV offset = loc - RExC_precomp; \
	332	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \
	333	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	334	} STMT_END
	335
	336	#define vWARNdep(loc,m) STMT_START { \
	337	const IV offset = loc - RExC_precomp; \
	338	Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
	339	"%s" REPORT_LOCATION, \
	340	m, (int)offset, RExC_precomp, RExC_precomp + offset); \
	341	} STMT_END
	342
	343
	344	#define vWARN2(loc, m, a1) STMT_START { \
	345	const IV offset = loc - RExC_precomp; \
	346	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	347	a1, (int)offset, RExC_precomp, RExC_precomp + offset); \
	348	} STMT_END
	349
	350	#define vWARN3(loc, m, a1, a2) STMT_START { \
	351	const IV offset = loc - RExC_precomp; \
	352	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	353	a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset); \
	354	} STMT_END
	355
	356	#define vWARN4(loc, m, a1, a2, a3) STMT_START { \
	357	const IV offset = loc - RExC_precomp; \
	358	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	359	a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
	360	} STMT_END
	361
	362	#define vWARN5(loc, m, a1, a2, a3, a4) STMT_START { \
	363	const IV offset = loc - RExC_precomp; \
	364	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
	365	a1, a2, a3, a4, (int)offset, RExC_precomp, RExC_precomp + offset); \
	366	} STMT_END
	367
	368
	369	/* Allow for side effects in s */
	370	#define REGC(c,s) STMT_START { \
	371	if (!SIZE_ONLY) *(s) = (c); else (void)(s); \
	372	} STMT_END
	373
	374	/* Macros for recording node offsets. 20001227 mjd@plover.com
	375	* Nodes are numbered 1, 2, 3, 4. Node #n's position is recorded in
	376	* element 2*n-1 of the array. Element #2n holds the byte length node #n.
	377	* Element 0 holds the number n.
	378	*/
	379
	380	#define MJD_OFFSET_DEBUG(x)
	381	/* #define MJD_OFFSET_DEBUG(x) DEBUG_r(Perl_warn_nocontext x) */
	382
	383
	384	#define Set_Node_Offset_To_R(node,byte) STMT_START { \
	385	if (! SIZE_ONLY) { \
	386	MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \
	387	__LINE__, (node), (byte))); \
	388	if((node) < 0) { \
	389	Perl_croak(aTHX_ "value of node is %d in Offset macro", (int)(node)); \
	390	} else { \
	391	RExC_offsets[2*(node)-1] = (byte); \
	392	} \
	393	} \
	394	} STMT_END
	395
	396	#define Set_Node_Offset(node,byte) \
	397	Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
	398	#define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
	399
	400	#define Set_Node_Length_To_R(node,len) STMT_START { \
	401	if (! SIZE_ONLY) { \
	402	MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \
	403	__LINE__, (int)(node), (int)(len))); \
	404	if((node) < 0) { \
	405	Perl_croak(aTHX_ "value of node is %d in Length macro", (int)(node)); \
	406	} else { \
	407	RExC_offsets[2*(node)] = (len); \
	408	} \
	409	} \
	410	} STMT_END
	411
	412	#define Set_Node_Length(node,len) \
	413	Set_Node_Length_To_R((node)-RExC_emit_start, len)
	414	#define Set_Cur_Node_Length(len) Set_Node_Length(RExC_emit, len)
	415	#define Set_Node_Cur_Length(node) \
	416	Set_Node_Length(node, RExC_parse - parse_start)
	417
	418	/* Get offsets and lengths */
	419	#define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
	420	#define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
	421
	422	static void clear_re(pTHX_ void *r);
	423
	424	/* Mark that we cannot extend a found fixed substring at this point.
	425	Updata the longest found anchored substring and the longest found
	426	floating substrings if needed. */
	427
	428	STATIC void
	429	S_scan_commit(pTHX_ const RExC_state_t pRExC_state, scan_data_t data)
	430	{
	431	const STRLEN l = CHR_SVLEN(data->last_found);
	432	const STRLEN old_l = CHR_SVLEN(*data->longest);
	433
	434	if ((l >= old_l) && ((l > old_l) \|\| (data->flags & SF_BEFORE_EOL))) {
	435	SvSetMagicSV(*data->longest, data->last_found);
	436	if (*data->longest == data->longest_fixed) {
	437	data->offset_fixed = l ? data->last_start_min : data->pos_min;
	438	if (data->flags & SF_BEFORE_EOL)
	439	data->flags
	440	\|= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
	441	else
	442	data->flags &= ~SF_FIX_BEFORE_EOL;
	443	}
	444	else {
	445	data->offset_float_min = l ? data->last_start_min : data->pos_min;
	446	data->offset_float_max = (l
	447	? data->last_start_max
	448	: data->pos_min + data->pos_delta);
	449	if ((U32)data->offset_float_max > (U32)I32_MAX)
	450	data->offset_float_max = I32_MAX;
	451	if (data->flags & SF_BEFORE_EOL)
	452	data->flags
	453	\|= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
	454	else
	455	data->flags &= ~SF_FL_BEFORE_EOL;
	456	}
	457	}
	458	SvCUR_set(data->last_found, 0);
	459	{
	460	SV * const sv = data->last_found;
	461	if (SvUTF8(sv) && SvMAGICAL(sv)) {
	462	MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
	463	if (mg)
	464	mg->mg_len = 0;
	465	}
	466	}
	467	data->last_end = -1;
	468	data->flags &= ~SF_BEFORE_EOL;
	469	}
	470
	471	/* Can match anything (initialization) */
	472	STATIC void
	473	S_cl_anything(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	474	{
	475	ANYOF_CLASS_ZERO(cl);
	476	ANYOF_BITMAP_SETALL(cl);
	477	cl->flags = ANYOF_EOS\|ANYOF_UNICODE_ALL;
	478	if (LOC)
	479	cl->flags \|= ANYOF_LOCALE;
	480	}
	481
	482	/* Can match anything (initialization) */
	483	STATIC int
	484	S_cl_is_anything(const struct regnode_charclass_class *cl)
	485	{
	486	int value;
	487
	488	for (value = 0; value <= ANYOF_MAX; value += 2)
	489	if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
	490	return 1;
	491	if (!(cl->flags & ANYOF_UNICODE_ALL))
	492	return 0;
	493	if (!ANYOF_BITMAP_TESTALLSET(cl))
	494	return 0;
	495	return 1;
	496	}
	497
	498	/* Can match anything (initialization) */
	499	STATIC void
	500	S_cl_init(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	501	{
	502	Zero(cl, 1, struct regnode_charclass_class);
	503	cl->type = ANYOF;
	504	cl_anything(pRExC_state, cl);
	505	}
	506
	507	STATIC void
	508	S_cl_init_zero(const RExC_state_t pRExC_state, struct regnode_charclass_class cl)
	509	{
	510	Zero(cl, 1, struct regnode_charclass_class);
	511	cl->type = ANYOF;
	512	cl_anything(pRExC_state, cl);
	513	if (LOC)
	514	cl->flags \|= ANYOF_LOCALE;
	515	}
	516
	517	/* 'And' a given class with another one. Can create false positives */
	518	/* We assume that cl is not inverted */
	519	STATIC void
	520	S_cl_and(struct regnode_charclass_class *cl,
	521	const struct regnode_charclass_class *and_with)
	522	{
	523	if (!(and_with->flags & ANYOF_CLASS)
	524	&& !(cl->flags & ANYOF_CLASS)
	525	&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	526	&& !(and_with->flags & ANYOF_FOLD)
	527	&& !(cl->flags & ANYOF_FOLD)) {
	528	int i;
	529
	530	if (and_with->flags & ANYOF_INVERT)
	531	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	532	cl->bitmap[i] &= ~and_with->bitmap[i];
	533	else
	534	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	535	cl->bitmap[i] &= and_with->bitmap[i];
	536	} /* XXXX: logic is complicated otherwise, leave it along for a moment. */
	537	if (!(and_with->flags & ANYOF_EOS))
	538	cl->flags &= ~ANYOF_EOS;
	539
	540	if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
	541	!(and_with->flags & ANYOF_INVERT)) {
	542	cl->flags &= ~ANYOF_UNICODE_ALL;
	543	cl->flags \|= ANYOF_UNICODE;
	544	ARG_SET(cl, ARG(and_with));
	545	}
	546	if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
	547	!(and_with->flags & ANYOF_INVERT))
	548	cl->flags &= ~ANYOF_UNICODE_ALL;
	549	if (!(and_with->flags & (ANYOF_UNICODE\|ANYOF_UNICODE_ALL)) &&
	550	!(and_with->flags & ANYOF_INVERT))
	551	cl->flags &= ~ANYOF_UNICODE;
	552	}
	553
	554	/* 'OR' a given class with another one. Can create false positives */
	555	/* We assume that cl is not inverted */
	556	STATIC void
	557	S_cl_or(const RExC_state_t pRExC_state, struct regnode_charclass_class cl, const struct regnode_charclass_class *or_with)
	558	{
	559	if (or_with->flags & ANYOF_INVERT) {
	560	/* We do not use
	561	* (B1 \| CL1) \| (!B2 & !CL2) = (B1 \| !B2 & !CL2) \| (CL1 \| (!B2 & !CL2))
	562	* <= (B1 \| !B2) \| (CL1 \| !CL2)
	563	* which is wasteful if CL2 is small, but we ignore CL2:
	564	* (B1 \| CL1) \| (!B2 & !CL2) <= (B1 \| CL1) \| !B2 = (B1 \| !B2) \| CL1
	565	* XXXX Can we handle case-fold? Unclear:
	566	* (OK1(i) \| OK1(i')) \| !(OK1(i) \| OK1(i')) =
	567	* (OK1(i) \| OK1(i')) \| (!OK1(i) & !OK1(i'))
	568	*/
	569	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	570	&& !(or_with->flags & ANYOF_FOLD)
	571	&& !(cl->flags & ANYOF_FOLD) ) {
	572	int i;
	573
	574	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	575	cl->bitmap[i] \|= ~or_with->bitmap[i];
	576	} /* XXXX: logic is complicated otherwise */
	577	else {
	578	cl_anything(pRExC_state, cl);
	579	}
	580	} else {
	581	/* (B1 \| CL1) \| (B2 \| CL2) = (B1 \| B2) \| (CL1 \| CL2)) */
	582	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
	583	&& (!(or_with->flags & ANYOF_FOLD)
	584	\|\| (cl->flags & ANYOF_FOLD)) ) {
	585	int i;
	586
	587	/* OR char bitmap and class bitmap separately */
	588	for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
	589	cl->bitmap[i] \|= or_with->bitmap[i];
	590	if (or_with->flags & ANYOF_CLASS) {
	591	for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
	592	cl->classflags[i] \|= or_with->classflags[i];
	593	cl->flags \|= ANYOF_CLASS;
	594	}
	595	}
	596	else { /* XXXX: logic is complicated, leave it along for a moment. */
	597	cl_anything(pRExC_state, cl);
	598	}
	599	}
	600	if (or_with->flags & ANYOF_EOS)
	601	cl->flags \|= ANYOF_EOS;
	602
	603	if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
	604	ARG(cl) != ARG(or_with)) {
	605	cl->flags \|= ANYOF_UNICODE_ALL;
	606	cl->flags &= ~ANYOF_UNICODE;
	607	}
	608	if (or_with->flags & ANYOF_UNICODE_ALL) {
	609	cl->flags \|= ANYOF_UNICODE_ALL;
	610	cl->flags &= ~ANYOF_UNICODE;
	611	}
	612	}
	613
	614	/*
	615
	616	make_trie(startbranch,first,last,tail,flags)
	617	startbranch: the first branch in the whole branch sequence
	618	first : start branch of sequence of branch-exact nodes.
	619	May be the same as startbranch
	620	last : Thing following the last branch.
	621	May be the same as tail.
	622	tail : item following the branch sequence
	623	flags : currently the OP() type we will be building one of /EXACT(\|F\|Fl)/
	624
	625	Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
	626
	627	A trie is an N'ary tree where the branches are determined by digital
	628	decomposition of the key. IE, at the root node you look up the 1st character and
	629	follow that branch repeat until you find the end of the branches. Nodes can be
	630	marked as "accepting" meaning they represent a complete word. Eg:
	631
	632	/he\|she\|his\|hers/
	633
	634	would convert into the following structure. Numbers represent states, letters
	635	following numbers represent valid transitions on the letter from that state, if
	636	the number is in square brackets it represents an accepting state, otherwise it
	637	will be in parenthesis.
	638
	639	+-h->+-e->[3]-+-r->(8)-+-s->[9]
	640	\| \|
	641	\| (2)
	642	\| \|
	643	(1) +-i->(6)-+-s->[7]
	644	\|
	645	+-s->(3)-+-h->(4)-+-e->[5]
	646
	647	Accept Word Mapping: 3=>1 (he),5=>2 (she), 7=>3 (his), 9=>4 (hers)
	648
	649	This shows that when matching against the string 'hers' we will begin at state 1
	650	read 'h' and move to state 2, read 'e' and move to state 3 which is accepting,
	651	then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
	652	is also accepting. Thus we know that we can match both 'he' and 'hers' with a
	653	single traverse. We store a mapping from accepting to state to which word was
	654	matched, and then when we have multiple possibilities we try to complete the
	655	rest of the regex in the order in which they occured in the alternation.
	656
	657	The only prior NFA like behaviour that would be changed by the TRIE support is
	658	the silent ignoring of duplicate alternations which are of the form:
	659
	660	/ (DUPE\|DUPE) X? (?{ ... }) Y /x
	661
	662	Thus EVAL blocks follwing a trie may be called a different number of times with
	663	and without the optimisation. With the optimisations dupes will be silently
	664	ignored. This inconsistant behaviour of EVAL type nodes is well established as
	665	the following demonstrates:
	666
	667	'words'=~/(word\|word\|word)(?{ print $1 })[xyz]/
	668
	669	which prints out 'word' three times, but
	670
	671	'words'=~/(word\|word\|word)(?{ print $1 })S/
	672
	673	which doesnt print it out at all. This is due to other optimisations kicking in.
	674
	675	Example of what happens on a structural level:
	676
	677	The regexp /(ac\|ad\|ab)+/ will produce the folowing debug output:
	678
	679	1: CURLYM[1] {1,32767}(18)
	680	5: BRANCH(8)
	681	6: EXACT <ac>(16)
	682	8: BRANCH(11)
	683	9: EXACT <ad>(16)
	684	11: BRANCH(14)
	685	12: EXACT <ab>(16)
	686	16: SUCCEED(0)
	687	17: NOTHING(18)
	688	18: END(0)
	689
	690	This would be optimizable with startbranch=5, first=5, last=16, tail=16
	691	and should turn into:
	692
	693	1: CURLYM[1] {1,32767}(18)
	694	5: TRIE(16)
	695	[Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
	696	<ac>
	697	<ad>
	698	<ab>
	699	16: SUCCEED(0)
	700	17: NOTHING(18)
	701	18: END(0)
	702
	703	Cases where tail != last would be like /(?foo\|bar)baz/:
	704
	705	1: BRANCH(4)
	706	2: EXACT <foo>(8)
	707	4: BRANCH(7)
	708	5: EXACT <bar>(8)
	709	7: TAIL(8)
	710	8: EXACT <baz>(10)
	711	10: END(0)
	712
	713	which would be optimizable with startbranch=1, first=1, last=7, tail=8
	714	and would end up looking like:
	715
	716	1: TRIE(8)
	717	[Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
	718	<foo>
	719	<bar>
	720	7: TAIL(8)
	721	8: EXACT <baz>(10)
	722	10: END(0)
	723
	724	*/
	725
	726	#define TRIE_DEBUG_CHAR \
	727	DEBUG_TRIE_COMPILE_r({ \
	728	SV *tmp; \
	729	if ( UTF ) { \
	730	tmp = newSVpvs( "" ); \
	731	pv_uni_display( tmp, uc, len, 60, UNI_DISPLAY_REGEX ); \
	732	} else { \
	733	tmp = Perl_newSVpvf_nocontext( "%c", (int)uvc ); \
	734	} \
	735	av_push( trie->revcharmap, tmp ); \
	736	})
	737
	738	#define TRIE_READ_CHAR STMT_START { \
	739	if ( UTF ) { \
	740	if ( folder ) { \
	741	if ( foldlen > 0 ) { \
	742	uvc = utf8n_to_uvuni( scan, UTF8_MAXLEN, &len, uniflags ); \
	743	foldlen -= len; \
	744	scan += len; \
	745	len = 0; \
	746	} else { \
	747	uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
	748	uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
	749	foldlen -= UNISKIP( uvc ); \
	750	scan = foldbuf + UNISKIP( uvc ); \
	751	} \
	752	} else { \
	753	uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
	754	} \
	755	} else { \
	756	uvc = (U32)*uc; \
	757	len = 1; \
	758	} \
	759	} STMT_END
	760
	761
	762	#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
	763	#define TRIE_LIST_CUR(state) ( TRIE_LIST_ITEM( state, 0 ).forid )
	764	#define TRIE_LIST_LEN(state) ( TRIE_LIST_ITEM( state, 0 ).newstate )
	765	#define TRIE_LIST_USED(idx) ( trie->states[state].trans.list ? (TRIE_LIST_CUR( idx ) - 1) : 0 )
	766
	767	#define TRIE_LIST_PUSH(state,fid,ns) STMT_START { \
	768	if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) { \
	769	TRIE_LIST_LEN( state ) *= 2; \
	770	Renew( trie->states[ state ].trans.list, \
	771	TRIE_LIST_LEN( state ), reg_trie_trans_le ); \
	772	} \
	773	TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid; \
	774	TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns; \
	775	TRIE_LIST_CUR( state )++; \
	776	} STMT_END
	777
	778	#define TRIE_LIST_NEW(state) STMT_START { \
	779	Newxz( trie->states[ state ].trans.list, \
	780	4, reg_trie_trans_le ); \
	781	TRIE_LIST_CUR( state ) = 1; \
	782	TRIE_LIST_LEN( state ) = 4; \
	783	} STMT_END
	784
	785	STATIC I32
	786	S_make_trie(pTHX_ RExC_state_t pRExC_state, regnode startbranch, regnode first, regnode last, regnode *tail, U32 flags)
	787	{
	788	dVAR;
	789	/* first pass, loop through and scan words */
	790	reg_trie_data *trie;
	791	regnode *cur;
	792	const U32 uniflags = UTF8_ALLOW_DEFAULT;
	793	STRLEN len = 0;
	794	UV uvc = 0;
	795	U16 curword = 0;
	796	U32 next_alloc = 0;
	797	/* we just use folder as a flag in utf8 */
	798	const U8 * const folder = ( flags == EXACTF
	799	? PL_fold
	800	: ( flags == EXACTFL
	801	? PL_fold_locale
	802	: NULL
	803	)
	804	);
	805
	806	const U32 data_slot = add_data( pRExC_state, 1, "t" );
	807	SV *re_trie_maxbuff;
	808
	809	GET_RE_DEBUG_FLAGS_DECL;
	810
	811	Newxz( trie, 1, reg_trie_data );
	812	trie->refcount = 1;
	813	RExC_rx->data->data[ data_slot ] = (void*)trie;
	814	Newxz( trie->charmap, 256, U16 );
	815	DEBUG_r({
	816	trie->words = newAV();
	817	trie->revcharmap = newAV();
	818	});
	819
	820
	821	re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
	822	if (!SvIOK(re_trie_maxbuff)) {
	823	sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
	824	}
	825
	826	/* -- First loop and Setup --
	827
	828	We first traverse the branches and scan each word to determine if it
	829	contains widechars, and how many unique chars there are, this is
	830	important as we have to build a table with at least as many columns as we
	831	have unique chars.
	832
	833	We use an array of integers to represent the character codes 0..255
	834	(trie->charmap) and we use a an HV* to store unicode characters. We use the
	835	native representation of the character value as the key and IV's for the
	836	coded index.
	837
	838	TODO If we keep track of how many times each character is used we can
	839	remap the columns so that the table compression later on is more
	840	efficient in terms of memory by ensuring most common value is in the
	841	middle and the least common are on the outside. IMO this would be better
	842	than a most to least common mapping as theres a decent chance the most
	843	common letter will share a node with the least common, meaning the node
	844	will not be compressable. With a middle is most common approach the worst
	845	case is when we have the least common nodes twice.
	846
	847	*/
	848
	849
	850	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	851	regnode * const noper = NEXTOPER( cur );
	852	const U8 uc = (U8)STRING( noper );
	853	const U8 * const e = uc + STR_LEN( noper );
	854	STRLEN foldlen = 0;
	855	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	856	const U8 scan = (U8)NULL;
	857
	858	for ( ; uc < e ; uc += len ) {
	859	trie->charcount++;
	860	TRIE_READ_CHAR;
	861	if ( uvc < 256 ) {
	862	if ( !trie->charmap[ uvc ] ) {
	863	trie->charmap[ uvc ]=( ++trie->uniquecharcount );
	864	if ( folder )
	865	trie->charmap[ folder[ uvc ] ] = trie->charmap[ uvc ];
	866	TRIE_DEBUG_CHAR;
	867	}
	868	} else {
	869	SV** svpp;
	870	if ( !trie->widecharmap )
	871	trie->widecharmap = newHV();
	872
	873	svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 1 );
	874
	875	if ( !svpp )
	876	Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
	877
	878	if ( !SvTRUE( *svpp ) ) {
	879	sv_setiv( *svpp, ++trie->uniquecharcount );
	880	TRIE_DEBUG_CHAR;
	881	}
	882	}
	883	}
	884	trie->wordcount++;
	885	} /* end first pass */
	886	DEBUG_TRIE_COMPILE_r(
	887	PerlIO_printf( Perl_debug_log, "TRIE(%s): W:%d C:%d Uq:%d \n",
	888	( trie->widecharmap ? "UTF8" : "NATIVE" ), trie->wordcount,
	889	(int)trie->charcount, trie->uniquecharcount )
	890	);
	891
	892
	893	/*
	894	We now know what we are dealing with in terms of unique chars and
	895	string sizes so we can calculate how much memory a naive
	896	representation using a flat table will take. If it's over a reasonable
	897	limit (as specified by ${^RE_TRIE_MAXBUF}) we use a more memory
	898	conservative but potentially much slower representation using an array
	899	of lists.
	900
	901	At the end we convert both representations into the same compressed
	902	form that will be used in regexec.c for matching with. The latter
	903	is a form that cannot be used to construct with but has memory
	904	properties similar to the list form and access properties similar
	905	to the table form making it both suitable for fast searches and
	906	small enough that its feasable to store for the duration of a program.
	907
	908	See the comment in the code where the compressed table is produced
	909	inplace from the flat tabe representation for an explanation of how
	910	the compression works.
	911
	912	*/
	913
	914
	915	if ( (IV)( ( trie->charcount + 1 ) * trie->uniquecharcount + 1) > SvIV(re_trie_maxbuff) ) {
	916	/*
	917	Second Pass -- Array Of Lists Representation
	918
	919	Each state will be represented by a list of charid:state records
	920	(reg_trie_trans_le) the first such element holds the CUR and LEN
	921	points of the allocated array. (See defines above).
	922
	923	We build the initial structure using the lists, and then convert
	924	it into the compressed table form which allows faster lookups
	925	(but cant be modified once converted).
	926
	927
	928	*/
	929
	930
	931	STRLEN transcount = 1;
	932
	933	Newxz( trie->states, trie->charcount + 2, reg_trie_state );
	934	TRIE_LIST_NEW(1);
	935	next_alloc = 2;
	936
	937	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	938
	939	regnode * const noper = NEXTOPER( cur );
	940	U8 uc = (U8)STRING( noper );
	941	const U8 * const e = uc + STR_LEN( noper );
	942	U32 state = 1; /* required init */
	943	U16 charid = 0; /* sanity init */
	944	U8 scan = (U8)NULL; /* sanity init */
	945	STRLEN foldlen = 0; /* required init */
	946	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	947
	948	for ( ; uc < e ; uc += len ) {
	949
	950	TRIE_READ_CHAR;
	951
	952	if ( uvc < 256 ) {
	953	charid = trie->charmap[ uvc ];
	954	} else {
	955	SV** const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
	956	if ( !svpp ) {
	957	charid = 0;
	958	} else {
	959	charid=(U16)SvIV( *svpp );
	960	}
	961	}
	962	if ( charid ) {
	963
	964	U16 check;
	965	U32 newstate = 0;
	966
	967	charid--;
	968	if ( !trie->states[ state ].trans.list ) {
	969	TRIE_LIST_NEW( state );
	970	}
	971	for ( check = 1; check <= TRIE_LIST_USED( state ); check++ ) {
	972	if ( TRIE_LIST_ITEM( state, check ).forid == charid ) {
	973	newstate = TRIE_LIST_ITEM( state, check ).newstate;
	974	break;
	975	}
	976	}
	977	if ( ! newstate ) {
	978	newstate = next_alloc++;
	979	TRIE_LIST_PUSH( state, charid, newstate );
	980	transcount++;
	981	}
	982	state = newstate;
	983	} else {
	984	Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
	985	}
	986	/* charid is now 0 if we dont know the char read, or nonzero if we do */
	987	}
	988
	989	if ( !trie->states[ state ].wordnum ) {
	990	/* we havent inserted this word into the structure yet. */
	991	trie->states[ state ].wordnum = ++curword;
	992
	993	DEBUG_r({
	994	/* store the word for dumping */
	995	SV* tmp = newSVpvn( STRING( noper ), STR_LEN( noper ) );
	996	if ( UTF ) SvUTF8_on( tmp );
	997	av_push( trie->words, tmp );
	998	});
	999
	1000	} else {
	1001	/EMPTY/; /* It's a dupe. So ignore it. */
	1002	}
	1003
	1004	} /* end second pass */
	1005
	1006	trie->laststate = next_alloc;
	1007	Renew( trie->states, next_alloc, reg_trie_state );
	1008
	1009	DEBUG_TRIE_COMPILE_MORE_r({
	1010	U32 state;
	1011
	1012	/* print out the table precompression. */
	1013
	1014	PerlIO_printf( Perl_debug_log, "\nState :Word \| Transition Data\n" );
	1015	PerlIO_printf( Perl_debug_log, "------:-----+-----------------" );
	1016
	1017	for( state=1 ; state < next_alloc ; state ++ ) {
	1018	U16 charid;
	1019
	1020	PerlIO_printf( Perl_debug_log, "\n %04"UVXf" :", (UV)state );
	1021	if ( ! trie->states[ state ].wordnum ) {
	1022	PerlIO_printf( Perl_debug_log, "%5s\| ","");
	1023	} else {
	1024	PerlIO_printf( Perl_debug_log, "W%04x\| ",
	1025	trie->states[ state ].wordnum
	1026	);
	1027	}
	1028	for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
	1029	SV **tmp = av_fetch( trie->revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
	1030	PerlIO_printf( Perl_debug_log, "%s:%3X=%04"UVXf" \| ",
	1031	SvPV_nolen_const( *tmp ),
	1032	TRIE_LIST_ITEM(state,charid).forid,
	1033	(UV)TRIE_LIST_ITEM(state,charid).newstate
	1034	);
	1035	}
	1036
	1037	}
	1038	PerlIO_printf( Perl_debug_log, "\n\n" );
	1039	});
	1040
	1041	Newxz( trie->trans, transcount ,reg_trie_trans );
	1042	{
	1043	U32 state;
	1044	U32 tp = 0;
	1045	U32 zp = 0;
	1046
	1047
	1048	for( state=1 ; state < next_alloc ; state ++ ) {
	1049	U32 base=0;
	1050
	1051	/*
	1052	DEBUG_TRIE_COMPILE_MORE_r(
	1053	PerlIO_printf( Perl_debug_log, "tp: %d zp: %d ",tp,zp)
	1054	);
	1055	*/
	1056
	1057	if (trie->states[state].trans.list) {
	1058	U16 minid=TRIE_LIST_ITEM( state, 1).forid;
	1059	U16 maxid=minid;
	1060	U16 idx;
	1061
	1062	for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
	1063	const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
	1064	if ( forid < minid ) {
	1065	minid=forid;
	1066	} else if ( forid > maxid ) {
	1067	maxid=forid;
	1068	}
	1069	}
	1070	if ( transcount < tp + maxid - minid + 1) {
	1071	transcount *= 2;
	1072	Renew( trie->trans, transcount, reg_trie_trans );
	1073	Zero( trie->trans + (transcount / 2), transcount / 2 , reg_trie_trans );
	1074	}
	1075	base = trie->uniquecharcount + tp - minid;
	1076	if ( maxid == minid ) {
	1077	U32 set = 0;
	1078	for ( ; zp < tp ; zp++ ) {
	1079	if ( ! trie->trans[ zp ].next ) {
	1080	base = trie->uniquecharcount + zp - minid;
	1081	trie->trans[ zp ].next = TRIE_LIST_ITEM( state, 1).newstate;
	1082	trie->trans[ zp ].check = state;
	1083	set = 1;
	1084	break;
	1085	}
	1086	}
	1087	if ( !set ) {
	1088	trie->trans[ tp ].next = TRIE_LIST_ITEM( state, 1).newstate;
	1089	trie->trans[ tp ].check = state;
	1090	tp++;
	1091	zp = tp;
	1092	}
	1093	} else {
	1094	for ( idx=1; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
	1095	const U32 tid = base - trie->uniquecharcount + TRIE_LIST_ITEM( state, idx ).forid;
	1096	trie->trans[ tid ].next = TRIE_LIST_ITEM( state, idx ).newstate;
	1097	trie->trans[ tid ].check = state;
	1098	}
	1099	tp += ( maxid - minid + 1 );
	1100	}
	1101	Safefree(trie->states[ state ].trans.list);
	1102	}
	1103	/*
	1104	DEBUG_TRIE_COMPILE_MORE_r(
	1105	PerlIO_printf( Perl_debug_log, " base: %d\n",base);
	1106	);
	1107	*/
	1108	trie->states[ state ].trans.base=base;
	1109	}
	1110	trie->lasttrans = tp + 1;
	1111	}
	1112	} else {
	1113	/*
	1114	Second Pass -- Flat Table Representation.
	1115
	1116	we dont use the 0 slot of either trans[] or states[] so we add 1 to each.
	1117	We know that we will need Charcount+1 trans at most to store the data
	1118	(one row per char at worst case) So we preallocate both structures
	1119	assuming worst case.
	1120
	1121	We then construct the trie using only the .next slots of the entry
	1122	structs.
	1123
	1124	We use the .check field of the first entry of the node temporarily to
	1125	make compression both faster and easier by keeping track of how many non
	1126	zero fields are in the node.
	1127
	1128	Since trans are numbered from 1 any 0 pointer in the table is a FAIL
	1129	transition.
	1130
	1131	There are two terms at use here: state as a TRIE_NODEIDX() which is a
	1132	number representing the first entry of the node, and state as a
	1133	TRIE_NODENUM() which is the trans number. state 1 is TRIE_NODEIDX(1) and
	1134	TRIE_NODENUM(1), state 2 is TRIE_NODEIDX(2) and TRIE_NODENUM(3) if there
	1135	are 2 entrys per node. eg:
	1136
	1137	A B A B
	1138	1. 2 4 1. 3 7
	1139	2. 0 3 3. 0 5
	1140	3. 0 0 5. 0 0
	1141	4. 0 0 7. 0 0
	1142
	1143	The table is internally in the right hand, idx form. However as we also
	1144	have to deal with the states array which is indexed by nodenum we have to
	1145	use TRIE_NODENUM() to convert.
	1146
	1147	*/
	1148
	1149	Newxz( trie->trans, ( trie->charcount + 1 ) * trie->uniquecharcount + 1,
	1150	reg_trie_trans );
	1151	Newxz( trie->states, trie->charcount + 2, reg_trie_state );
	1152	next_alloc = trie->uniquecharcount + 1;
	1153
	1154	for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
	1155
	1156	regnode * const noper = NEXTOPER( cur );
	1157	const U8 uc = (U8)STRING( noper );
	1158	const U8 * const e = uc + STR_LEN( noper );
	1159
	1160	U32 state = 1; /* required init */
	1161
	1162	U16 charid = 0; /* sanity init */
	1163	U32 accept_state = 0; /* sanity init */
	1164	U8 scan = (U8)NULL; /* sanity init */
	1165
	1166	STRLEN foldlen = 0; /* required init */
	1167	U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
	1168
	1169
	1170	for ( ; uc < e ; uc += len ) {
	1171
	1172	TRIE_READ_CHAR;
	1173
	1174	if ( uvc < 256 ) {
	1175	charid = trie->charmap[ uvc ];
	1176	} else {
	1177	SV* const * const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
	1178	charid = svpp ? (U16)SvIV(*svpp) : 0;
	1179	}
	1180	if ( charid ) {
	1181	charid--;
	1182	if ( !trie->trans[ state + charid ].next ) {
	1183	trie->trans[ state + charid ].next = next_alloc;
	1184	trie->trans[ state ].check++;
	1185	next_alloc += trie->uniquecharcount;
	1186	}
	1187	state = trie->trans[ state + charid ].next;
	1188	} else {
	1189	Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
	1190	}
	1191	/* charid is now 0 if we dont know the char read, or nonzero if we do */
	1192	}
	1193
	1194	accept_state = TRIE_NODENUM( state );
	1195	if ( !trie->states[ accept_state ].wordnum ) {
	1196	/* we havent inserted this word into the structure yet. */
	1197	trie->states[ accept_state ].wordnum = ++curword;
	1198
	1199	DEBUG_r({
	1200	/* store the word for dumping */
	1201	SV* tmp = newSVpvn( STRING( noper ), STR_LEN( noper ) );
	1202	if ( UTF ) SvUTF8_on( tmp );
	1203	av_push( trie->words, tmp );
	1204	});
	1205
	1206	} else {
	1207	/EMPTY/; /* Its a dupe. So ignore it. */
	1208	}
	1209
	1210	} /* end second pass */
	1211
	1212	DEBUG_TRIE_COMPILE_MORE_r({
	1213	/*
	1214	print out the table precompression so that we can do a visual check
	1215	that they are identical.
	1216	*/
	1217	U32 state;
	1218	U16 charid;
	1219	PerlIO_printf( Perl_debug_log, "\nChar : " );
	1220
	1221	for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
	1222	SV **tmp = av_fetch( trie->revcharmap, charid, 0);
	1223	if ( tmp ) {
	1224	PerlIO_printf( Perl_debug_log, "%4.4s ", SvPV_nolen_const( *tmp ) );
	1225	}
	1226	}
	1227
	1228	PerlIO_printf( Perl_debug_log, "\nState+-" );
	1229
	1230	for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
	1231	PerlIO_printf( Perl_debug_log, "%4s-", "----" );
	1232	}
	1233
	1234	PerlIO_printf( Perl_debug_log, "\n" );
	1235
	1236	for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
	1237
	1238	PerlIO_printf( Perl_debug_log, "%04"UVXf" : ", (UV)TRIE_NODENUM( state ) );
	1239
	1240	for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
	1241	PerlIO_printf( Perl_debug_log, "%04"UVXf" ",
	1242	(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next ) );
	1243	}
	1244	if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
	1245	PerlIO_printf( Perl_debug_log, " (%04"UVXf")\n", (UV)trie->trans[ state ].check );
	1246	} else {
	1247	PerlIO_printf( Perl_debug_log, " (%04"UVXf") W%04X\n", (UV)trie->trans[ state ].check,
	1248	trie->states[ TRIE_NODENUM( state ) ].wordnum );
	1249	}
	1250	}
	1251	PerlIO_printf( Perl_debug_log, "\n\n" );
	1252	});
	1253	{
	1254	/*
	1255	* Inplace compress the table.*
	1256
	1257	For sparse data sets the table constructed by the trie algorithm will
	1258	be mostly 0/FAIL transitions or to put it another way mostly empty.
	1259	(Note that leaf nodes will not contain any transitions.)
	1260
	1261	This algorithm compresses the tables by eliminating most such
	1262	transitions, at the cost of a modest bit of extra work during lookup:
	1263
	1264	- Each states[] entry contains a .base field which indicates the
	1265	index in the state[] array wheres its transition data is stored.
	1266
	1267	- If .base is 0 there are no valid transitions from that node.
	1268
	1269	- If .base is nonzero then charid is added to it to find an entry in
	1270	the trans array.
	1271
	1272	-If trans[states[state].base+charid].check!=state then the
	1273	transition is taken to be a 0/Fail transition. Thus if there are fail
	1274	transitions at the front of the node then the .base offset will point
	1275	somewhere inside the previous nodes data (or maybe even into a node
	1276	even earlier), but the .check field determines if the transition is
	1277	valid.
	1278
	1279	The following process inplace converts the table to the compressed
	1280	table: We first do not compress the root node 1,and mark its all its
	1281	.check pointers as 1 and set its .base pointer as 1 as well. This
	1282	allows to do a DFA construction from the compressed table later, and
	1283	ensures that any .base pointers we calculate later are greater than
	1284	0.
	1285
	1286	- We set 'pos' to indicate the first entry of the second node.
	1287
	1288	- We then iterate over the columns of the node, finding the first and
	1289	last used entry at l and m. We then copy l..m into pos..(pos+m-l),
	1290	and set the .check pointers accordingly, and advance pos
	1291	appropriately and repreat for the next node. Note that when we copy
	1292	the next pointers we have to convert them from the original
	1293	NODEIDX form to NODENUM form as the former is not valid post
	1294	compression.
	1295
	1296	- If a node has no transitions used we mark its base as 0 and do not
	1297	advance the pos pointer.
	1298
	1299	- If a node only has one transition we use a second pointer into the
	1300	structure to fill in allocated fail transitions from other states.
	1301	This pointer is independent of the main pointer and scans forward
	1302	looking for null transitions that are allocated to a state. When it
	1303	finds one it writes the single transition into the "hole". If the
	1304	pointer doesnt find one the single transition is appeneded as normal.
	1305
	1306	- Once compressed we can Renew/realloc the structures to release the
	1307	excess space.
	1308
	1309	See "Table-Compression Methods" in sec 3.9 of the Red Dragon,
	1310	specifically Fig 3.47 and the associated pseudocode.
	1311
	1312	demq
	1313	*/
	1314	const U32 laststate = TRIE_NODENUM( next_alloc );
	1315	U32 state, charid;
	1316	U32 pos = 0, zp=0;
	1317	trie->laststate = laststate;
	1318
	1319	for ( state = 1 ; state < laststate ; state++ ) {
	1320	U8 flag = 0;
	1321	const U32 stateidx = TRIE_NODEIDX( state );
	1322	const U32 o_used = trie->trans[ stateidx ].check;
	1323	U32 used = trie->trans[ stateidx ].check;
	1324	trie->trans[ stateidx ].check = 0;
	1325
	1326	for ( charid = 0 ; used && charid < trie->uniquecharcount ; charid++ ) {
	1327	if ( flag \|\| trie->trans[ stateidx + charid ].next ) {
	1328	if ( trie->trans[ stateidx + charid ].next ) {
	1329	if (o_used == 1) {
	1330	for ( ; zp < pos ; zp++ ) {
	1331	if ( ! trie->trans[ zp ].next ) {
	1332	break;
	1333	}
	1334	}
	1335	trie->states[ state ].trans.base = zp + trie->uniquecharcount - charid ;
	1336	trie->trans[ zp ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
	1337	trie->trans[ zp ].check = state;
	1338	if ( ++zp > pos ) pos = zp;
	1339	break;
	1340	}
	1341	used--;
	1342	}
	1343	if ( !flag ) {
	1344	flag = 1;
	1345	trie->states[ state ].trans.base = pos + trie->uniquecharcount - charid ;
	1346	}
	1347	trie->trans[ pos ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
	1348	trie->trans[ pos ].check = state;
	1349	pos++;
	1350	}
	1351	}
	1352	}
	1353	trie->lasttrans = pos + 1;
	1354	Renew( trie->states, laststate + 1, reg_trie_state);
	1355	DEBUG_TRIE_COMPILE_MORE_r(
	1356	PerlIO_printf( Perl_debug_log,
	1357	" Alloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
	1358	(int)( ( trie->charcount + 1 ) * trie->uniquecharcount + 1 ),
	1359	(IV)next_alloc,
	1360	(IV)pos,
	1361	( ( next_alloc - pos ) * 100 ) / (double)next_alloc );
	1362	);
	1363
	1364	} /* end table compress */
	1365	}
	1366	/* resize the trans array to remove unused space */
	1367	Renew( trie->trans, trie->lasttrans, reg_trie_trans);
	1368
	1369	DEBUG_TRIE_COMPILE_r({
	1370	U32 state;
	1371	/*
	1372	Now we print it out again, in a slightly different form as there is additional
	1373	info we want to be able to see when its compressed. They are close enough for
	1374	visual comparison though.
	1375	*/
	1376	PerlIO_printf( Perl_debug_log, "\nChar : %-6s%-6s%-4s ","Match","Base","Ofs" );
	1377
	1378	for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
	1379	SV **tmp = av_fetch( trie->revcharmap, state, 0);
	1380	if ( tmp ) {
	1381	PerlIO_printf( Perl_debug_log, "%4.4s ", SvPV_nolen_const( *tmp ) );
	1382	}
	1383	}
	1384	PerlIO_printf( Perl_debug_log, "\n-----:-----------------------");
	1385
	1386	for( state = 0 ; state < trie->uniquecharcount ; state++ )
	1387	PerlIO_printf( Perl_debug_log, "-----");
	1388	PerlIO_printf( Perl_debug_log, "\n");
	1389
	1390	for( state = 1 ; state < trie->laststate ; state++ ) {
	1391	const U32 base = trie->states[ state ].trans.base;
	1392
	1393	PerlIO_printf( Perl_debug_log, "#%04"UVXf" ", (UV)state);
	1394
	1395	if ( trie->states[ state ].wordnum ) {
	1396	PerlIO_printf( Perl_debug_log, " W%04X", trie->states[ state ].wordnum );
	1397	} else {
	1398	PerlIO_printf( Perl_debug_log, "%6s", "" );
	1399	}
	1400
	1401	PerlIO_printf( Perl_debug_log, " @%04"UVXf" ", (UV)base );
	1402
	1403	if ( base ) {
	1404	U32 ofs = 0;
	1405
	1406	while( ( base + ofs < trie->uniquecharcount ) \|\|
	1407	( base + ofs - trie->uniquecharcount < trie->lasttrans
	1408	&& trie->trans[ base + ofs - trie->uniquecharcount ].check != state))
	1409	ofs++;
	1410
	1411	PerlIO_printf( Perl_debug_log, "+%02"UVXf"[ ", (UV)ofs);
	1412
	1413	for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
	1414	if ( ( base + ofs >= trie->uniquecharcount ) &&
	1415	( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
	1416	trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
	1417	{
	1418	PerlIO_printf( Perl_debug_log, "%04"UVXf" ",
	1419	(UV)trie->trans[ base + ofs - trie->uniquecharcount ].next );
	1420	} else {
	1421	PerlIO_printf( Perl_debug_log, "%4s "," 0" );
	1422	}
	1423	}
	1424
	1425	PerlIO_printf( Perl_debug_log, "]");
	1426
	1427	}
	1428	PerlIO_printf( Perl_debug_log, "\n" );
	1429	}
	1430	});
	1431
	1432	{
	1433	/* now finally we "stitch in" the new TRIE node
	1434	This means we convert either the first branch or the first Exact,
	1435	depending on whether the thing following (in 'last') is a branch
	1436	or not and whther first is the startbranch (ie is it a sub part of
	1437	the alternation or is it the whole thing.)
	1438	Assuming its a sub part we conver the EXACT otherwise we convert
	1439	the whole branch sequence, including the first.
	1440	*/
	1441	regnode *convert;
	1442
	1443
	1444
	1445
	1446	if ( first == startbranch && OP( last ) != BRANCH ) {
	1447	convert = first;
	1448	} else {
	1449	convert = NEXTOPER( first );
	1450	NEXT_OFF( first ) = (U16)(last - first);
	1451	}
	1452
	1453	OP( convert ) = TRIE + (U8)( flags - EXACT );
	1454	NEXT_OFF( convert ) = (U16)(tail - convert);
	1455	ARG_SET( convert, data_slot );
	1456
	1457	/* tells us if we need to handle accept buffers specially */
	1458	convert->flags = ( RExC_seen_evals ? 1 : 0 );
	1459
	1460
	1461	/* needed for dumping*/
	1462	DEBUG_r({
	1463	regnode *optimize = convert + NODE_STEP_REGNODE + regarglen[ TRIE ];
	1464	/* We now need to mark all of the space originally used by the
	1465	branches as optimized away. This keeps the dumpuntil from
	1466	throwing a wobbly as it doesnt use regnext() to traverse the
	1467	opcodes.
	1468	*/
	1469	while( optimize < last ) {
	1470	OP( optimize ) = OPTIMIZED;
	1471	optimize++;
	1472	}
	1473	});
	1474	} /* end node insert */
	1475	return 1;
	1476	}
	1477
	1478
	1479
	1480	/*
	1481	* There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
	1482	* These need to be revisited when a newer toolchain becomes available.
	1483	*/
	1484	#if defined(__sparc64__) && defined(__GNUC__)
	1485	# if __GNUC__ < 2 \|\| (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
	1486	# undef SPARC64_GCC_WORKAROUND
	1487	# define SPARC64_GCC_WORKAROUND 1
	1488	# endif
	1489	#endif
	1490
	1491	/* REx optimizer. Converts nodes into quickier variants "in place".
	1492	Finds fixed substrings. */
	1493
	1494	/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
	1495	to the position after last scanned or to NULL. */
	1496
	1497
	1498	STATIC I32
	1499	S_study_chunk(pTHX_ RExC_state_t pRExC_state, regnode scanp, I32 deltap,
	1500	regnode last, scan_data_t data, U32 flags, U32 depth)
	1501	/* scanp: Start here (read-write). */
	1502	/* deltap: Write maxlen-minlen here. */
	1503	/* last: Stop before this one. */
	1504	{
	1505	dVAR;
	1506	I32 min = 0, pars = 0, code;
	1507	regnode scan = scanp, *next;
	1508	I32 delta = 0;
	1509	int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
	1510	int is_inf_internal = 0; /* The studied chunk is infinite */
	1511	I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
	1512	scan_data_t data_fake;
	1513	struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */
	1514	SV *re_trie_maxbuff = NULL;
	1515
	1516	GET_RE_DEBUG_FLAGS_DECL;
	1517
	1518	while (scan && OP(scan) != END && scan < last) {
	1519	/* Peephole optimizer: */
	1520	DEBUG_OPTIMISE_r({
	1521	SV * const mysv=sv_newmortal();
	1522	regprop(RExC_rx, mysv, scan);
	1523	PerlIO_printf(Perl_debug_log, "%*speep: %s (0x%08"UVXf")\n",
	1524	(int)depth*2, "", SvPV_nolen_const(mysv), PTR2UV(scan));
	1525	});
	1526
	1527	if (PL_regkind[(U8)OP(scan)] == EXACT) {
	1528	/* Merge several consecutive EXACTish nodes into one. */
	1529	regnode *n = regnext(scan);
	1530	U32 stringok = 1;
	1531	#ifdef DEBUGGING
	1532	regnode *stop = scan;
	1533	#endif
	1534
	1535	next = scan + NODE_SZ_STR(scan);
	1536	/* Skip NOTHING, merge EXACT. /
	1537	while (n &&
	1538	( PL_regkind[(U8)OP(n)] == NOTHING \|\|
	1539	(stringok && (OP(n) == OP(scan))))
	1540	&& NEXT_OFF(n)
	1541	&& NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
	1542	if (OP(n) == TAIL \|\| n > next)
	1543	stringok = 0;
	1544	if (PL_regkind[(U8)OP(n)] == NOTHING) {
	1545	NEXT_OFF(scan) += NEXT_OFF(n);
	1546	next = n + NODE_STEP_REGNODE;
	1547	#ifdef DEBUGGING
	1548	if (stringok)
	1549	stop = n;
	1550	#endif
	1551	n = regnext(n);
	1552	}
	1553	else if (stringok) {
	1554	const int oldl = STR_LEN(scan);
	1555	regnode * const nnext = regnext(n);
	1556
	1557	if (oldl + STR_LEN(n) > U8_MAX)
	1558	break;
	1559	NEXT_OFF(scan) += NEXT_OFF(n);
	1560	STR_LEN(scan) += STR_LEN(n);
	1561	next = n + NODE_SZ_STR(n);
	1562	/* Now we can overwrite n : /
	1563	Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
	1564	#ifdef DEBUGGING
	1565	stop = next - 1;
	1566	#endif
	1567	n = nnext;
	1568	}
	1569	}
	1570
	1571	if (UTF && ( OP(scan) == EXACTF ) && ( STR_LEN(scan) >= 6 ) ) {
	1572	/*
	1573	Two problematic code points in Unicode casefolding of EXACT nodes:
	1574
	1575	U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
	1576	U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
	1577
	1578	which casefold to
	1579
	1580	Unicode UTF-8
	1581
	1582	U+03B9 U+0308 U+0301 0xCE 0xB9 0xCC 0x88 0xCC 0x81
	1583	U+03C5 U+0308 U+0301 0xCF 0x85 0xCC 0x88 0xCC 0x81
	1584
	1585	This means that in case-insensitive matching (or "loose matching",
	1586	as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
	1587	length of the above casefolded versions) can match a target string
	1588	of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
	1589	This would rather mess up the minimum length computation.
	1590
	1591	What we'll do is to look for the tail four bytes, and then peek
	1592	at the preceding two bytes to see whether we need to decrease
	1593	the minimum length by four (six minus two).
	1594
	1595	Thanks to the design of UTF-8, there cannot be false matches:
	1596	A sequence of valid UTF-8 bytes cannot be a subsequence of
	1597	another valid sequence of UTF-8 bytes.
	1598
	1599	*/
	1600	char * const s0 = STRING(scan), s, t;
	1601	char * const s1 = s0 + STR_LEN(scan) - 1;
	1602	char * const s2 = s1 - 4;
	1603	const char t0[] = "\xcc\x88\xcc\x81";
	1604	const char * const t1 = t0 + 3;
	1605
	1606	for (s = s0 + 2;
	1607	s < s2 && (t = ninstr(s, s1, t0, t1));
	1608	s = t + 4) {
	1609	if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) \|\|
	1610	((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
	1611	min -= 4;
	1612	}
	1613	}
	1614
	1615	#ifdef DEBUGGING
	1616	/* Allow dumping */
	1617	n = scan + NODE_SZ_STR(scan);
	1618	while (n <= stop) {
	1619	if (PL_regkind[(U8)OP(n)] != NOTHING \|\| OP(n) == NOTHING) {
	1620	OP(n) = OPTIMIZED;
	1621	NEXT_OFF(n) = 0;
	1622	}
	1623	n++;
	1624	}
	1625	#endif
	1626	}
	1627
	1628
	1629
	1630	/* Follow the next-chain of the current node and optimize
	1631	away all the NOTHINGs from it. */
	1632	if (OP(scan) != CURLYX) {
	1633	const int max = (reg_off_by_arg[OP(scan)]
	1634	? I32_MAX
	1635	/* I32 may be smaller than U16 on CRAYs! */
	1636	: (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
	1637	int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
	1638	int noff;
	1639	regnode *n = scan;
	1640
	1641	/* Skip NOTHING and LONGJMP. */
	1642	while ((n = regnext(n))
	1643	&& ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
	1644	\|\| ((OP(n) == LONGJMP) && (noff = ARG(n))))
	1645	&& off + noff < max)
	1646	off += noff;
	1647	if (reg_off_by_arg[OP(scan)])
	1648	ARG(scan) = off;
	1649	else
	1650	NEXT_OFF(scan) = off;
	1651	}
	1652
	1653	/* The principal pseudo-switch. Cannot be a switch, since we
	1654	look into several different things. */
	1655	if (OP(scan) == BRANCH \|\| OP(scan) == BRANCHJ
	1656	\|\| OP(scan) == IFTHEN \|\| OP(scan) == SUSPEND) {
	1657	next = regnext(scan);
	1658	code = OP(scan);
	1659	/* demq: the op(next)==code check is to see if we have "branch-branch" AFAICT */
	1660
	1661	if (OP(next) == code \|\| code == IFTHEN \|\| code == SUSPEND) {
	1662	I32 max1 = 0, min1 = I32_MAX, num = 0;
	1663	struct regnode_charclass_class accum;
	1664	regnode * const startbranch=scan;
	1665
	1666	if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
	1667	scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
	1668	if (flags & SCF_DO_STCLASS)
	1669	cl_init_zero(pRExC_state, &accum);
	1670
	1671	while (OP(scan) == code) {
	1672	I32 deltanext, minnext, f = 0, fake;
	1673	struct regnode_charclass_class this_class;
	1674
	1675	num++;
	1676	data_fake.flags = 0;
	1677	if (data) {
	1678	data_fake.whilem_c = data->whilem_c;
	1679	data_fake.last_closep = data->last_closep;
	1680	}
	1681	else
	1682	data_fake.last_closep = &fake;
	1683	next = regnext(scan);
	1684	scan = NEXTOPER(scan);
	1685	if (code != BRANCH)
	1686	scan = NEXTOPER(scan);
	1687	if (flags & SCF_DO_STCLASS) {
	1688	cl_init(pRExC_state, &this_class);
	1689	data_fake.start_class = &this_class;
	1690	f = SCF_DO_STCLASS_AND;
	1691	}
	1692	if (flags & SCF_WHILEM_VISITED_POS)
	1693	f \|= SCF_WHILEM_VISITED_POS;
	1694
	1695	/* we suppose the run is continuous, last=next...*/
	1696	minnext = study_chunk(pRExC_state, &scan, &deltanext,
	1697	next, &data_fake, f,depth+1);
	1698	if (min1 > minnext)
	1699	min1 = minnext;
	1700	if (max1 < minnext + deltanext)
	1701	max1 = minnext + deltanext;
	1702	if (deltanext == I32_MAX)
	1703	is_inf = is_inf_internal = 1;
	1704	scan = next;
	1705	if (data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	1706	pars++;
	1707	if (data && (data_fake.flags & SF_HAS_EVAL))
	1708	data->flags \|= SF_HAS_EVAL;
	1709	if (data)
	1710	data->whilem_c = data_fake.whilem_c;
	1711	if (flags & SCF_DO_STCLASS)
	1712	cl_or(pRExC_state, &accum, &this_class);
	1713	if (code == SUSPEND)
	1714	break;
	1715	}
	1716	if (code == IFTHEN && num < 2) /* Empty ELSE branch */
	1717	min1 = 0;
	1718	if (flags & SCF_DO_SUBSTR) {
	1719	data->pos_min += min1;
	1720	data->pos_delta += max1 - min1;
	1721	if (max1 != min1 \|\| is_inf)
	1722	data->longest = &(data->longest_float);
	1723	}
	1724	min += min1;
	1725	delta += max1 - min1;
	1726	if (flags & SCF_DO_STCLASS_OR) {
	1727	cl_or(pRExC_state, data->start_class, &accum);
	1728	if (min1) {
	1729	cl_and(data->start_class, &and_with);
	1730	flags &= ~SCF_DO_STCLASS;
	1731	}
	1732	}
	1733	else if (flags & SCF_DO_STCLASS_AND) {
	1734	if (min1) {
	1735	cl_and(data->start_class, &accum);
	1736	flags &= ~SCF_DO_STCLASS;
	1737	}
	1738	else {
	1739	/* Switch to OR mode: cache the old value of
	1740	* data->start_class */
	1741	StructCopy(data->start_class, &and_with,
	1742	struct regnode_charclass_class);
	1743	flags &= ~SCF_DO_STCLASS_AND;
	1744	StructCopy(&accum, data->start_class,
	1745	struct regnode_charclass_class);
	1746	flags \|= SCF_DO_STCLASS_OR;
	1747	data->start_class->flags \|= ANYOF_EOS;
	1748	}
	1749	}
	1750
	1751	/* demq.
	1752
	1753	Assuming this was/is a branch we are dealing with: 'scan' now
	1754	points at the item that follows the branch sequence, whatever
	1755	it is. We now start at the beginning of the sequence and look
	1756	for subsequences of
	1757
	1758	BRANCH->EXACT=>X
	1759	BRANCH->EXACT=>X
	1760
	1761	which would be constructed from a pattern like /A\|LIST\|OF\|WORDS/
	1762
	1763	If we can find such a subseqence we need to turn the first
	1764	element into a trie and then add the subsequent branch exact
	1765	strings to the trie.
	1766
	1767	We have two cases
	1768
	1769	1. patterns where the whole set of branch can be converted to a trie,
	1770
	1771	2. patterns where only a subset of the alternations can be
	1772	converted to a trie.
	1773
	1774	In case 1 we can replace the whole set with a single regop
	1775	for the trie. In case 2 we need to keep the start and end
	1776	branchs so
	1777
	1778	'BRANCH EXACT; BRANCH EXACT; BRANCH X'
	1779	becomes BRANCH TRIE; BRANCH X;
	1780
	1781	Hypthetically when we know the regex isnt anchored we can
	1782	turn a case 1 into a DFA and let it rip... Every time it finds a match
	1783	it would just call its tail, no WHILEM/CURLY needed.
	1784
	1785	*/
	1786	if (DO_TRIE) {
	1787	if (!re_trie_maxbuff) {
	1788	re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
	1789	if (!SvIOK(re_trie_maxbuff))
	1790	sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
	1791	}
	1792	if ( SvIV(re_trie_maxbuff)>=0 && OP( startbranch )==BRANCH ) {
	1793	regnode *cur;
	1794	regnode first = (regnode )NULL;
	1795	regnode last = (regnode )NULL;
	1796	regnode *tail = scan;
	1797	U8 optype = 0;
	1798	U32 count=0;
	1799
	1800	#ifdef DEBUGGING
	1801	SV * const mysv = sv_newmortal(); /* for dumping */
	1802	#endif
	1803	/* var tail is used because there may be a TAIL
	1804	regop in the way. Ie, the exacts will point to the
	1805	thing following the TAIL, but the last branch will
	1806	point at the TAIL. So we advance tail. If we
	1807	have nested (?:) we may have to move through several
	1808	tails.
	1809	*/
	1810
	1811	while ( OP( tail ) == TAIL ) {
	1812	/* this is the TAIL generated by (?:) */
	1813	tail = regnext( tail );
	1814	}
	1815
	1816	DEBUG_OPTIMISE_r({
	1817	regprop(RExC_rx, mysv, tail );
	1818	PerlIO_printf( Perl_debug_log, "%*s%s%s%s\n",
	1819	(int)depth * 2 + 2, "", "Tail node is:", SvPV_nolen_const( mysv ),
	1820	(RExC_seen_evals) ? "[EVAL]" : ""
	1821	);
	1822	});
	1823	/*
	1824
	1825	step through the branches, cur represents each
	1826	branch, noper is the first thing to be matched
	1827	as part of that branch and noper_next is the
	1828	regnext() of that node. if noper is an EXACT
	1829	and noper_next is the same as scan (our current
	1830	position in the regex) then the EXACT branch is
	1831	a possible optimization target. Once we have
	1832	two or more consequetive such branches we can
	1833	create a trie of the EXACT's contents and stich
	1834	it in place. If the sequence represents all of
	1835	the branches we eliminate the whole thing and
	1836	replace it with a single TRIE. If it is a
	1837	subsequence then we need to stitch it in. This
	1838	means the first branch has to remain, and needs
	1839	to be repointed at the item on the branch chain
	1840	following the last branch optimized. This could
	1841	be either a BRANCH, in which case the
	1842	subsequence is internal, or it could be the
	1843	item following the branch sequence in which
	1844	case the subsequence is at the end.
	1845
	1846	*/
	1847
	1848	/* dont use tail as the end marker for this traverse */
	1849	for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
	1850	regnode * const noper = NEXTOPER( cur );
	1851	regnode * const noper_next = regnext( noper );
	1852
	1853	DEBUG_OPTIMISE_r({
	1854	regprop(RExC_rx, mysv, cur);
	1855	PerlIO_printf( Perl_debug_log, "%*s%s",
	1856	(int)depth * 2 + 2," ", SvPV_nolen_const( mysv ) );
	1857
	1858	regprop(RExC_rx, mysv, noper);
	1859	PerlIO_printf( Perl_debug_log, " -> %s",
	1860	SvPV_nolen_const(mysv));
	1861
	1862	if ( noper_next ) {
	1863	regprop(RExC_rx, mysv, noper_next );
	1864	PerlIO_printf( Perl_debug_log,"\t=> %s\t",
	1865	SvPV_nolen_const(mysv));
	1866	}
	1867	PerlIO_printf( Perl_debug_log, "0x%p,0x%p,0x%p)\n",
	1868	first, last, cur );
	1869	});
	1870	if ( ( first ? OP( noper ) == optype
	1871	: PL_regkind[ (U8)OP( noper ) ] == EXACT )
	1872	&& noper_next == tail && count<U16_MAX)
	1873	{
	1874	count++;
	1875	if ( !first ) {
	1876	first = cur;
	1877	optype = OP( noper );
	1878	} else {
	1879	DEBUG_OPTIMISE_r(
	1880	if (!last ) {
	1881	regprop(RExC_rx, mysv, first);
	1882	PerlIO_printf( Perl_debug_log, "%*s%s",
	1883	(int)depth * 2 + 2, "F:", SvPV_nolen_const( mysv ) );
	1884	regprop(RExC_rx, mysv, NEXTOPER(first) );
	1885	PerlIO_printf( Perl_debug_log, " -> %s\n",
	1886	SvPV_nolen_const( mysv ) );
	1887	}
	1888	);
	1889	last = cur;
	1890	DEBUG_OPTIMISE_r({
	1891	regprop(RExC_rx, mysv, cur);
	1892	PerlIO_printf( Perl_debug_log, "%*s%s",
	1893	(int)depth * 2 + 2, "N:", SvPV_nolen_const( mysv ) );
	1894	regprop(RExC_rx, mysv, noper );
	1895	PerlIO_printf( Perl_debug_log, " -> %s\n",
	1896	SvPV_nolen_const( mysv ) );
	1897	});
	1898	}
	1899	} else {
	1900	if ( last ) {
	1901	DEBUG_OPTIMISE_r(
	1902	PerlIO_printf( Perl_debug_log, "%*s%s\n",
	1903	(int)depth * 2 + 2, "E:", "END" );
	1904	);
	1905	make_trie( pRExC_state, startbranch, first, cur, tail, optype );
	1906	}
	1907	if ( PL_regkind[ (U8)OP( noper ) ] == EXACT
	1908	&& noper_next == tail )
	1909	{
	1910	count = 1;
	1911	first = cur;
	1912	optype = OP( noper );
	1913	} else {
	1914	count = 0;
	1915	first = NULL;
	1916	optype = 0;
	1917	}
	1918	last = NULL;
	1919	}
	1920	}
	1921	DEBUG_OPTIMISE_r({
	1922	regprop(RExC_rx, mysv, cur);
	1923	PerlIO_printf( Perl_debug_log,
	1924	"%s%s\t(0x%p,0x%p,0x%p)\n", (int)depth 2 + 2,
	1925	" ", SvPV_nolen_const( mysv ), first, last, cur);
	1926
	1927	});
	1928	if ( last ) {
	1929	DEBUG_OPTIMISE_r(
	1930	PerlIO_printf( Perl_debug_log, "%*s%s\n",
	1931	(int)depth * 2 + 2, "E:", "==END==" );
	1932	);
	1933	make_trie( pRExC_state, startbranch, first, scan, tail, optype );
	1934	}
	1935	}
	1936	}
	1937	}
	1938	else if ( code == BRANCHJ ) { /* single branch is optimized. */
	1939	scan = NEXTOPER(NEXTOPER(scan));
	1940	} else /* single branch is optimized. */
	1941	scan = NEXTOPER(scan);
	1942	continue;
	1943	}
	1944	else if (OP(scan) == EXACT) {
	1945	I32 l = STR_LEN(scan);
	1946	UV uc;
	1947	if (UTF) {
	1948	const U8 * const s = (U8*)STRING(scan);
	1949	l = utf8_length(s, s + l);
	1950	uc = utf8_to_uvchr(s, NULL);
	1951	} else {
	1952	uc = ((U8)STRING(scan));
	1953	}
	1954	min += l;
	1955	if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
	1956	/* The code below prefers earlier match for fixed
	1957	offset, later match for variable offset. */
	1958	if (data->last_end == -1) { /* Update the start info. */
	1959	data->last_start_min = data->pos_min;
	1960	data->last_start_max = is_inf
	1961	? I32_MAX : data->pos_min + data->pos_delta;
	1962	}
	1963	sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
	1964	if (UTF)
	1965	SvUTF8_on(data->last_found);
	1966	{
	1967	SV * const sv = data->last_found;
	1968	MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
	1969	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	1970	if (mg && mg->mg_len >= 0)
	1971	mg->mg_len += utf8_length((U8*)STRING(scan),
	1972	(U8*)STRING(scan)+STR_LEN(scan));
	1973	}
	1974	data->last_end = data->pos_min + l;
	1975	data->pos_min += l; /* As in the first entry. */
	1976	data->flags &= ~SF_BEFORE_EOL;
	1977	}
	1978	if (flags & SCF_DO_STCLASS_AND) {
	1979	/* Check whether it is compatible with what we know already! */
	1980	int compat = 1;
	1981
	1982	if (uc >= 0x100 \|\|
	1983	(!(data->start_class->flags & (ANYOF_CLASS \| ANYOF_LOCALE))
	1984	&& !ANYOF_BITMAP_TEST(data->start_class, uc)
	1985	&& (!(data->start_class->flags & ANYOF_FOLD)
	1986	\|\| !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
	1987	)
	1988	compat = 0;
	1989	ANYOF_CLASS_ZERO(data->start_class);
	1990	ANYOF_BITMAP_ZERO(data->start_class);
	1991	if (compat)
	1992	ANYOF_BITMAP_SET(data->start_class, uc);
	1993	data->start_class->flags &= ~ANYOF_EOS;
	1994	if (uc < 0x100)
	1995	data->start_class->flags &= ~ANYOF_UNICODE_ALL;
	1996	}
	1997	else if (flags & SCF_DO_STCLASS_OR) {
	1998	/* false positive possible if the class is case-folded */
	1999	if (uc < 0x100)
	2000	ANYOF_BITMAP_SET(data->start_class, uc);
	2001	else
	2002	data->start_class->flags \|= ANYOF_UNICODE_ALL;
	2003	data->start_class->flags &= ~ANYOF_EOS;
	2004	cl_and(data->start_class, &and_with);
	2005	}
	2006	flags &= ~SCF_DO_STCLASS;
	2007	}
	2008	else if (PL_regkind[(U8)OP(scan)] == EXACT) { /* But OP != EXACT! */
	2009	I32 l = STR_LEN(scan);
	2010	UV uc = ((U8)STRING(scan));
	2011
	2012	/* Search for fixed substrings supports EXACT only. */
	2013	if (flags & SCF_DO_SUBSTR) {
	2014	assert(data);
	2015	scan_commit(pRExC_state, data);
	2016	}
	2017	if (UTF) {
	2018	const U8 * const s = (U8 *)STRING(scan);
	2019	l = utf8_length(s, s + l);
	2020	uc = utf8_to_uvchr(s, NULL);
	2021	}
	2022	min += l;
	2023	if (flags & SCF_DO_SUBSTR)
	2024	data->pos_min += l;
	2025	if (flags & SCF_DO_STCLASS_AND) {
	2026	/* Check whether it is compatible with what we know already! */
	2027	int compat = 1;
	2028
	2029	if (uc >= 0x100 \|\|
	2030	(!(data->start_class->flags & (ANYOF_CLASS \| ANYOF_LOCALE))
	2031	&& !ANYOF_BITMAP_TEST(data->start_class, uc)
	2032	&& !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
	2033	compat = 0;
	2034	ANYOF_CLASS_ZERO(data->start_class);
	2035	ANYOF_BITMAP_ZERO(data->start_class);
	2036	if (compat) {
	2037	ANYOF_BITMAP_SET(data->start_class, uc);
	2038	data->start_class->flags &= ~ANYOF_EOS;
	2039	data->start_class->flags \|= ANYOF_FOLD;
	2040	if (OP(scan) == EXACTFL)
	2041	data->start_class->flags \|= ANYOF_LOCALE;
	2042	}
	2043	}
	2044	else if (flags & SCF_DO_STCLASS_OR) {
	2045	if (data->start_class->flags & ANYOF_FOLD) {
	2046	/* false positive possible if the class is case-folded.
	2047	Assume that the locale settings are the same... */
	2048	if (uc < 0x100)
	2049	ANYOF_BITMAP_SET(data->start_class, uc);
	2050	data->start_class->flags &= ~ANYOF_EOS;
	2051	}
	2052	cl_and(data->start_class, &and_with);
	2053	}
	2054	flags &= ~SCF_DO_STCLASS;
	2055	}
	2056	else if (strchr((const char*)PL_varies,OP(scan))) {
	2057	I32 mincount, maxcount, minnext, deltanext, fl = 0;
	2058	I32 f = flags, pos_before = 0;
	2059	regnode * const oscan = scan;
	2060	struct regnode_charclass_class this_class;
	2061	struct regnode_charclass_class *oclass = NULL;
	2062	I32 next_is_eval = 0;
	2063
	2064	switch (PL_regkind[(U8)OP(scan)]) {
	2065	case WHILEM: /* End of (?:...)* . */
	2066	scan = NEXTOPER(scan);
	2067	goto finish;
	2068	case PLUS:
	2069	if (flags & (SCF_DO_SUBSTR \| SCF_DO_STCLASS)) {
	2070	next = NEXTOPER(scan);
	2071	if (OP(next) == EXACT \|\| (flags & SCF_DO_STCLASS)) {
	2072	mincount = 1;
	2073	maxcount = REG_INFTY;
	2074	next = regnext(scan);
	2075	scan = NEXTOPER(scan);
	2076	goto do_curly;
	2077	}
	2078	}
	2079	if (flags & SCF_DO_SUBSTR)
	2080	data->pos_min++;
	2081	min++;
	2082	/* Fall through. */
	2083	case STAR:
	2084	if (flags & SCF_DO_STCLASS) {
	2085	mincount = 0;
	2086	maxcount = REG_INFTY;
	2087	next = regnext(scan);
	2088	scan = NEXTOPER(scan);
	2089	goto do_curly;
	2090	}
	2091	is_inf = is_inf_internal = 1;
	2092	scan = regnext(scan);
	2093	if (flags & SCF_DO_SUBSTR) {
	2094	scan_commit(pRExC_state, data); /* Cannot extend fixed substrings */
	2095	data->longest = &(data->longest_float);
	2096	}
	2097	goto optimize_curly_tail;
	2098	case CURLY:
	2099	mincount = ARG1(scan);
	2100	maxcount = ARG2(scan);
	2101	next = regnext(scan);
	2102	if (OP(scan) == CURLYX) {
	2103	I32 lp = (data ? *(data->last_closep) : 0);
	2104	scan->flags = ((lp <= U8_MAX) ? (U8)lp : U8_MAX);
	2105	}
	2106	scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
	2107	next_is_eval = (OP(scan) == EVAL);
	2108	do_curly:
	2109	if (flags & SCF_DO_SUBSTR) {
	2110	if (mincount == 0) scan_commit(pRExC_state,data); /* Cannot extend fixed substrings */
	2111	pos_before = data->pos_min;
	2112	}
	2113	if (data) {
	2114	fl = data->flags;
	2115	data->flags &= ~(SF_HAS_PAR\|SF_IN_PAR\|SF_HAS_EVAL);
	2116	if (is_inf)
	2117	data->flags \|= SF_IS_INF;
	2118	}
	2119	if (flags & SCF_DO_STCLASS) {
	2120	cl_init(pRExC_state, &this_class);
	2121	oclass = data->start_class;
	2122	data->start_class = &this_class;
	2123	f \|= SCF_DO_STCLASS_AND;
	2124	f &= ~SCF_DO_STCLASS_OR;
	2125	}
	2126	/* These are the cases when once a subexpression
	2127	fails at a particular position, it cannot succeed
	2128	even after backtracking at the enclosing scope.
	2129
	2130	XXXX what if minimal match and we are at the
	2131	initial run of {n,m}? */
	2132	if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
	2133	f &= ~SCF_WHILEM_VISITED_POS;
	2134
	2135	/* This will finish on WHILEM, setting scan, or on NULL: */
	2136	minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data,
	2137	(mincount == 0
	2138	? (f & ~SCF_DO_SUBSTR) : f),depth+1);
	2139
	2140	if (flags & SCF_DO_STCLASS)
	2141	data->start_class = oclass;
	2142	if (mincount == 0 \|\| minnext == 0) {
	2143	if (flags & SCF_DO_STCLASS_OR) {
	2144	cl_or(pRExC_state, data->start_class, &this_class);
	2145	}
	2146	else if (flags & SCF_DO_STCLASS_AND) {
	2147	/* Switch to OR mode: cache the old value of
	2148	* data->start_class */
	2149	StructCopy(data->start_class, &and_with,
	2150	struct regnode_charclass_class);
	2151	flags &= ~SCF_DO_STCLASS_AND;
	2152	StructCopy(&this_class, data->start_class,
	2153	struct regnode_charclass_class);
	2154	flags \|= SCF_DO_STCLASS_OR;
	2155	data->start_class->flags \|= ANYOF_EOS;
	2156	}
	2157	} else { /* Non-zero len */
	2158	if (flags & SCF_DO_STCLASS_OR) {
	2159	cl_or(pRExC_state, data->start_class, &this_class);
	2160	cl_and(data->start_class, &and_with);
	2161	}
	2162	else if (flags & SCF_DO_STCLASS_AND)
	2163	cl_and(data->start_class, &this_class);
	2164	flags &= ~SCF_DO_STCLASS;
	2165	}
	2166	if (!scan) /* It was not CURLYX, but CURLY. */
	2167	scan = next;
	2168	if ( /* ? quantifier ok, except for (?{ ... }) */
	2169	(next_is_eval \|\| !(mincount == 0 && maxcount == 1))
	2170	&& (minnext == 0) && (deltanext == 0)
	2171	&& data && !(data->flags & (SF_HAS_PAR\|SF_IN_PAR))
	2172	&& maxcount <= REG_INFTY/3 /* Complement check for big count */
	2173	&& ckWARN(WARN_REGEXP))
	2174	{
	2175	vWARN(RExC_parse,
	2176	"Quantifier unexpected on zero-length expression");
	2177	}
	2178
	2179	min += minnext * mincount;
	2180	is_inf_internal \|= ((maxcount == REG_INFTY
	2181	&& (minnext + deltanext) > 0)
	2182	\|\| deltanext == I32_MAX);
	2183	is_inf \|= is_inf_internal;
	2184	delta += (minnext + deltanext) * maxcount - minnext * mincount;
	2185
	2186	/* Try powerful optimization CURLYX => CURLYN. */
	2187	if ( OP(oscan) == CURLYX && data
	2188	&& data->flags & SF_IN_PAR
	2189	&& !(data->flags & SF_HAS_EVAL)
	2190	&& !deltanext && minnext == 1 ) {
	2191	/* Try to optimize to CURLYN. */
	2192	regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
	2193	regnode * const nxt1 = nxt;
	2194	#ifdef DEBUGGING
	2195	regnode *nxt2;
	2196	#endif
	2197
	2198	/* Skip open. */
	2199	nxt = regnext(nxt);
	2200	if (!strchr((const char*)PL_simple,OP(nxt))
	2201	&& !(PL_regkind[(U8)OP(nxt)] == EXACT
	2202	&& STR_LEN(nxt) == 1))
	2203	goto nogo;
	2204	#ifdef DEBUGGING
	2205	nxt2 = nxt;
	2206	#endif
	2207	nxt = regnext(nxt);
	2208	if (OP(nxt) != CLOSE)
	2209	goto nogo;
	2210	/* Now we know that nxt2 is the only contents: */
	2211	oscan->flags = (U8)ARG(nxt);
	2212	OP(oscan) = CURLYN;
	2213	OP(nxt1) = NOTHING; /* was OPEN. */
	2214	#ifdef DEBUGGING
	2215	OP(nxt1 + 1) = OPTIMIZED; /* was count. */
	2216	NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
	2217	NEXT_OFF(nxt2) = 0; /* just for consistancy with CURLY. */
	2218	OP(nxt) = OPTIMIZED; /* was CLOSE. */
	2219	OP(nxt + 1) = OPTIMIZED; /* was count. */
	2220	NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
	2221	#endif
	2222	}
	2223	nogo:
	2224
	2225	/* Try optimization CURLYX => CURLYM. */
	2226	if ( OP(oscan) == CURLYX && data
	2227	&& !(data->flags & SF_HAS_PAR)
	2228	&& !(data->flags & SF_HAS_EVAL)
	2229	&& !deltanext /* atom is fixed width */
	2230	&& minnext != 0 /* CURLYM can't handle zero width */
	2231	) {
	2232	/* XXXX How to optimize if data == 0? */
	2233	/* Optimize to a simpler form. */
	2234	regnode nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; / OPEN */
	2235	regnode *nxt2;
	2236
	2237	OP(oscan) = CURLYM;
	2238	while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
	2239	&& (OP(nxt2) != WHILEM))
	2240	nxt = nxt2;
	2241	OP(nxt2) = SUCCEED; /* Whas WHILEM */
	2242	/* Need to optimize away parenths. */
	2243	if (data->flags & SF_IN_PAR) {
	2244	/* Set the parenth number. */
	2245	regnode nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; / OPEN*/
	2246
	2247	if (OP(nxt) != CLOSE)
	2248	FAIL("Panic opt close");
	2249	oscan->flags = (U8)ARG(nxt);
	2250	OP(nxt1) = OPTIMIZED; /* was OPEN. */
	2251	OP(nxt) = OPTIMIZED; /* was CLOSE. */
	2252	#ifdef DEBUGGING
	2253	OP(nxt1 + 1) = OPTIMIZED; /* was count. */
	2254	OP(nxt + 1) = OPTIMIZED; /* was count. */
	2255	NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
	2256	NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
	2257	#endif
	2258	#if 0
	2259	while ( nxt1 && (OP(nxt1) != WHILEM)) {
	2260	regnode *nnxt = regnext(nxt1);
	2261
	2262	if (nnxt == nxt) {
	2263	if (reg_off_by_arg[OP(nxt1)])
	2264	ARG_SET(nxt1, nxt2 - nxt1);
	2265	else if (nxt2 - nxt1 < U16_MAX)
	2266	NEXT_OFF(nxt1) = nxt2 - nxt1;
	2267	else
	2268	OP(nxt) = NOTHING; /* Cannot beautify */
	2269	}
	2270	nxt1 = nnxt;
	2271	}
	2272	#endif
	2273	/* Optimize again: */
	2274	study_chunk(pRExC_state, &nxt1, &deltanext, nxt,
	2275	NULL, 0,depth+1);
	2276	}
	2277	else
	2278	oscan->flags = 0;
	2279	}
	2280	else if ((OP(oscan) == CURLYX)
	2281	&& (flags & SCF_WHILEM_VISITED_POS)
	2282	/* See the comment on a similar expression above.
	2283	However, this time it not a subexpression
	2284	we care about, but the expression itself. */
	2285	&& (maxcount == REG_INFTY)
	2286	&& data && ++data->whilem_c < 16) {
	2287	/* This stays as CURLYX, we can put the count/of pair. */
	2288	/* Find WHILEM (as in regexec.c) */
	2289	regnode *nxt = oscan + NEXT_OFF(oscan);
	2290
	2291	if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
	2292	nxt += ARG(nxt);
	2293	PREVOPER(nxt)->flags = (U8)(data->whilem_c
	2294	\| (RExC_whilem_seen << 4)); /* On WHILEM */
	2295	}
	2296	if (data && fl & (SF_HAS_PAR\|SF_IN_PAR))
	2297	pars++;
	2298	if (flags & SCF_DO_SUBSTR) {
	2299	SV *last_str = NULL;
	2300	int counted = mincount != 0;
	2301
	2302	if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
	2303	#if defined(SPARC64_GCC_WORKAROUND)
	2304	I32 b = 0;
	2305	STRLEN l = 0;
	2306	const char *s = NULL;
	2307	I32 old = 0;
	2308
	2309	if (pos_before >= data->last_start_min)
	2310	b = pos_before;
	2311	else
	2312	b = data->last_start_min;
	2313
	2314	l = 0;
	2315	s = SvPV_const(data->last_found, l);
	2316	old = b - data->last_start_min;
	2317
	2318	#else
	2319	I32 b = pos_before >= data->last_start_min
	2320	? pos_before : data->last_start_min;
	2321	STRLEN l;
	2322	const char * const s = SvPV_const(data->last_found, l);
	2323	I32 old = b - data->last_start_min;
	2324	#endif
	2325
	2326	if (UTF)
	2327	old = utf8_hop((U8)s, old) - (U8)s;
	2328
	2329	l -= old;
	2330	/* Get the added string: */
	2331	last_str = newSVpvn(s + old, l);
	2332	if (UTF)
	2333	SvUTF8_on(last_str);
	2334	if (deltanext == 0 && pos_before == b) {
	2335	/* What was added is a constant string */
	2336	if (mincount > 1) {
	2337	SvGROW(last_str, (mincount * l) + 1);
	2338	repeatcpy(SvPVX(last_str) + l,
	2339	SvPVX_const(last_str), l, mincount - 1);
	2340	SvCUR_set(last_str, SvCUR(last_str) * mincount);
	2341	/* Add additional parts. */
	2342	SvCUR_set(data->last_found,
	2343	SvCUR(data->last_found) - l);
	2344	sv_catsv(data->last_found, last_str);
	2345	{
	2346	SV * sv = data->last_found;
	2347	MAGIC *mg =
	2348	SvUTF8(sv) && SvMAGICAL(sv) ?
	2349	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	2350	if (mg && mg->mg_len >= 0)
	2351	mg->mg_len += CHR_SVLEN(last_str);
	2352	}
	2353	data->last_end += l * (mincount - 1);
	2354	}
	2355	} else {
	2356	/* start offset must point into the last copy */
	2357	data->last_start_min += minnext * (mincount - 1);
	2358	data->last_start_max += is_inf ? I32_MAX
	2359	: (maxcount - 1) * (minnext + data->pos_delta);
	2360	}
	2361	}
	2362	/* It is counted once already... */
	2363	data->pos_min += minnext * (mincount - counted);
	2364	data->pos_delta += - counted * deltanext +
	2365	(minnext + deltanext) * maxcount - minnext * mincount;
	2366	if (mincount != maxcount) {
	2367	/* Cannot extend fixed substrings found inside
	2368	the group. */
	2369	scan_commit(pRExC_state,data);
	2370	if (mincount && last_str) {
	2371	SV * const sv = data->last_found;
	2372	MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
	2373	mg_find(sv, PERL_MAGIC_utf8) : NULL;
	2374
	2375	if (mg)
	2376	mg->mg_len = -1;
	2377	sv_setsv(sv, last_str);
	2378	data->last_end = data->pos_min;
	2379	data->last_start_min =
	2380	data->pos_min - CHR_SVLEN(last_str);
	2381	data->last_start_max = is_inf
	2382	? I32_MAX
	2383	: data->pos_min + data->pos_delta
	2384	- CHR_SVLEN(last_str);
	2385	}
	2386	data->longest = &(data->longest_float);
	2387	}
	2388	SvREFCNT_dec(last_str);
	2389	}
	2390	if (data && (fl & SF_HAS_EVAL))
	2391	data->flags \|= SF_HAS_EVAL;
	2392	optimize_curly_tail:
	2393	if (OP(oscan) != CURLYX) {
	2394	while (PL_regkind[(U8)OP(next = regnext(oscan))] == NOTHING
	2395	&& NEXT_OFF(next))
	2396	NEXT_OFF(oscan) += NEXT_OFF(next);
	2397	}
	2398	continue;
	2399	default: /* REF and CLUMP only? */
	2400	if (flags & SCF_DO_SUBSTR) {
	2401	scan_commit(pRExC_state,data); /* Cannot expect anything... */
	2402	data->longest = &(data->longest_float);
	2403	}
	2404	is_inf = is_inf_internal = 1;
	2405	if (flags & SCF_DO_STCLASS_OR)
	2406	cl_anything(pRExC_state, data->start_class);
	2407	flags &= ~SCF_DO_STCLASS;
	2408	break;
	2409	}
	2410	}
	2411	else if (strchr((const char*)PL_simple,OP(scan))) {
	2412	int value = 0;
	2413
	2414	if (flags & SCF_DO_SUBSTR) {
	2415	scan_commit(pRExC_state,data);
	2416	data->pos_min++;
	2417	}
	2418	min++;
	2419	if (flags & SCF_DO_STCLASS) {
	2420	data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */
	2421
	2422	/* Some of the logic below assumes that switching
	2423	locale on will only add false positives. */
	2424	switch (PL_regkind[(U8)OP(scan)]) {
	2425	case SANY:
	2426	default:
	2427	do_default:
	2428	/* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
	2429	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	2430	cl_anything(pRExC_state, data->start_class);
	2431	break;
	2432	case REG_ANY:
	2433	if (OP(scan) == SANY)
	2434	goto do_default;
	2435	if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
	2436	value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
	2437	\|\| (data->start_class->flags & ANYOF_CLASS));
	2438	cl_anything(pRExC_state, data->start_class);
	2439	}
	2440	if (flags & SCF_DO_STCLASS_AND \|\| !value)
	2441	ANYOF_BITMAP_CLEAR(data->start_class,'\n');
	2442	break;
	2443	case ANYOF:
	2444	if (flags & SCF_DO_STCLASS_AND)
	2445	cl_and(data->start_class,
	2446	(struct regnode_charclass_class*)scan);
	2447	else
	2448	cl_or(pRExC_state, data->start_class,
	2449	(struct regnode_charclass_class*)scan);
	2450	break;
	2451	case ALNUM:
	2452	if (flags & SCF_DO_STCLASS_AND) {
	2453	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	2454	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
	2455	for (value = 0; value < 256; value++)
	2456	if (!isALNUM(value))
	2457	ANYOF_BITMAP_CLEAR(data->start_class, value);
	2458	}
	2459	}
	2460	else {
	2461	if (data->start_class->flags & ANYOF_LOCALE)
	2462	ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
	2463	else {
	2464	for (value = 0; value < 256; value++)
	2465	if (isALNUM(value))
	2466	ANYOF_BITMAP_SET(data->start_class, value);
	2467	}
	2468	}
	2469	break;
	2470	case ALNUML:
	2471	if (flags & SCF_DO_STCLASS_AND) {
	2472	if (data->start_class->flags & ANYOF_LOCALE)
	2473	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
	2474	}
	2475	else {
	2476	ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
	2477	data->start_class->flags \|= ANYOF_LOCALE;
	2478	}
	2479	break;
	2480	case NALNUM:
	2481	if (flags & SCF_DO_STCLASS_AND) {
	2482	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	2483	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
	2484	for (value = 0; value < 256; value++)
	2485	if (isALNUM(value))
	2486	ANYOF_BITMAP_CLEAR(data->start_class, value);
	2487	}
	2488	}
	2489	else {
	2490	if (data->start_class->flags & ANYOF_LOCALE)
	2491	ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
	2492	else {
	2493	for (value = 0; value < 256; value++)
	2494	if (!isALNUM(value))
	2495	ANYOF_BITMAP_SET(data->start_class, value);
	2496	}
	2497	}
	2498	break;
	2499	case NALNUML:
	2500	if (flags & SCF_DO_STCLASS_AND) {
	2501	if (data->start_class->flags & ANYOF_LOCALE)
	2502	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
	2503	}
	2504	else {
	2505	data->start_class->flags \|= ANYOF_LOCALE;
	2506	ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
	2507	}
	2508	break;
	2509	case SPACE:
	2510	if (flags & SCF_DO_STCLASS_AND) {
	2511	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	2512	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
	2513	for (value = 0; value < 256; value++)
	2514	if (!isSPACE(value))
	2515	ANYOF_BITMAP_CLEAR(data->start_class, value);
	2516	}
	2517	}
	2518	else {
	2519	if (data->start_class->flags & ANYOF_LOCALE)
	2520	ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
	2521	else {
	2522	for (value = 0; value < 256; value++)
	2523	if (isSPACE(value))
	2524	ANYOF_BITMAP_SET(data->start_class, value);
	2525	}
	2526	}
	2527	break;
	2528	case SPACEL:
	2529	if (flags & SCF_DO_STCLASS_AND) {
	2530	if (data->start_class->flags & ANYOF_LOCALE)
	2531	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
	2532	}
	2533	else {
	2534	data->start_class->flags \|= ANYOF_LOCALE;
	2535	ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
	2536	}
	2537	break;
	2538	case NSPACE:
	2539	if (flags & SCF_DO_STCLASS_AND) {
	2540	if (!(data->start_class->flags & ANYOF_LOCALE)) {
	2541	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
	2542	for (value = 0; value < 256; value++)
	2543	if (isSPACE(value))
	2544	ANYOF_BITMAP_CLEAR(data->start_class, value);
	2545	}
	2546	}
	2547	else {
	2548	if (data->start_class->flags & ANYOF_LOCALE)
	2549	ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
	2550	else {
	2551	for (value = 0; value < 256; value++)
	2552	if (!isSPACE(value))
	2553	ANYOF_BITMAP_SET(data->start_class, value);
	2554	}
	2555	}
	2556	break;
	2557	case NSPACEL:
	2558	if (flags & SCF_DO_STCLASS_AND) {
	2559	if (data->start_class->flags & ANYOF_LOCALE) {
	2560	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
	2561	for (value = 0; value < 256; value++)
	2562	if (!isSPACE(value))
	2563	ANYOF_BITMAP_CLEAR(data->start_class, value);
	2564	}
	2565	}
	2566	else {
	2567	data->start_class->flags \|= ANYOF_LOCALE;
	2568	ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
	2569	}
	2570	break;
	2571	case DIGIT:
	2572	if (flags & SCF_DO_STCLASS_AND) {
	2573	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT);
	2574	for (value = 0; value < 256; value++)
	2575	if (!isDIGIT(value))
	2576	ANYOF_BITMAP_CLEAR(data->start_class, value);
	2577	}
	2578	else {
	2579	if (data->start_class->flags & ANYOF_LOCALE)
	2580	ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
	2581	else {
	2582	for (value = 0; value < 256; value++)
	2583	if (isDIGIT(value))
	2584	ANYOF_BITMAP_SET(data->start_class, value);
	2585	}
	2586	}
	2587	break;
	2588	case NDIGIT:
	2589	if (flags & SCF_DO_STCLASS_AND) {
	2590	ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT);
	2591	for (value = 0; value < 256; value++)
	2592	if (isDIGIT(value))
	2593	ANYOF_BITMAP_CLEAR(data->start_class, value);
	2594	}
	2595	else {
	2596	if (data->start_class->flags & ANYOF_LOCALE)
	2597	ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
	2598	else {
	2599	for (value = 0; value < 256; value++)
	2600	if (!isDIGIT(value))
	2601	ANYOF_BITMAP_SET(data->start_class, value);
	2602	}
	2603	}
	2604	break;
	2605	}
	2606	if (flags & SCF_DO_STCLASS_OR)
	2607	cl_and(data->start_class, &and_with);
	2608	flags &= ~SCF_DO_STCLASS;
	2609	}
	2610	}
	2611	else if (PL_regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
	2612	data->flags \|= (OP(scan) == MEOL
	2613	? SF_BEFORE_MEOL
	2614	: SF_BEFORE_SEOL);
	2615	}
	2616	else if ( PL_regkind[(U8)OP(scan)] == BRANCHJ
	2617	/* Lookbehind, or need to calculate parens/evals/stclass: */
	2618	&& (scan->flags \|\| data \|\| (flags & SCF_DO_STCLASS))
	2619	&& (OP(scan) == IFMATCH \|\| OP(scan) == UNLESSM)) {
	2620	/* Lookahead/lookbehind */
	2621	I32 deltanext, minnext, fake = 0;
	2622	regnode *nscan;
	2623	struct regnode_charclass_class intrnl;
	2624	int f = 0;
	2625
	2626	data_fake.flags = 0;
	2627	if (data) {
	2628	data_fake.whilem_c = data->whilem_c;
	2629	data_fake.last_closep = data->last_closep;
	2630	}
	2631	else
	2632	data_fake.last_closep = &fake;
	2633	if ( flags & SCF_DO_STCLASS && !scan->flags
	2634	&& OP(scan) == IFMATCH ) { /* Lookahead */
	2635	cl_init(pRExC_state, &intrnl);
	2636	data_fake.start_class = &intrnl;
	2637	f \|= SCF_DO_STCLASS_AND;
	2638	}
	2639	if (flags & SCF_WHILEM_VISITED_POS)
	2640	f \|= SCF_WHILEM_VISITED_POS;
	2641	next = regnext(scan);
	2642	nscan = NEXTOPER(NEXTOPER(scan));
	2643	minnext = study_chunk(pRExC_state, &nscan, &deltanext, last, &data_fake, f,depth+1);
	2644	if (scan->flags) {
	2645	if (deltanext) {
	2646	vFAIL("Variable length lookbehind not implemented");
	2647	}
	2648	else if (minnext > U8_MAX) {
	2649	vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
	2650	}
	2651	scan->flags = (U8)minnext;
	2652	}
	2653	if (data && data_fake.flags & (SF_HAS_PAR\|SF_IN_PAR))
	2654	pars++;
	2655	if (data && (data_fake.flags & SF_HAS_EVAL))
	2656	data->flags \|= SF_HAS_EVAL;
	2657	if (data)
	2658	data->whilem_c = data_fake.whilem_c;
	2659	if (f & SCF_DO_STCLASS_AND) {
	2660	const int was = (data->start_class->flags & ANYOF_EOS);
	2661
	2662	cl_and(data->start_class, &intrnl);
	2663	if (was)
	2664	data->start_class->flags \|= ANYOF_EOS;
	2665	}
	2666	}
	2667	else if (OP(scan) == OPEN) {
	2668	pars++;
	2669	}
	2670	else if (OP(scan) == CLOSE) {
	2671	if ((I32)ARG(scan) == is_par) {
	2672	next = regnext(scan);
	2673
	2674	if ( next && (OP(next) != WHILEM) && next < last)
	2675	is_par = 0; /* Disable optimization */
	2676	}
	2677	if (data)
	2678	*(data->last_closep) = ARG(scan);
	2679	}
	2680	else if (OP(scan) == EVAL) {
	2681	if (data)
	2682	data->flags \|= SF_HAS_EVAL;
	2683	}
	2684	else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded follows */
	2685	if (flags & SCF_DO_SUBSTR) {
	2686	scan_commit(pRExC_state,data);
	2687	data->longest = &(data->longest_float);
	2688	}
	2689	is_inf = is_inf_internal = 1;
	2690	if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
	2691	cl_anything(pRExC_state, data->start_class);
	2692	flags &= ~SCF_DO_STCLASS;
	2693	}
	2694	/* Else: zero-length, ignore. */
	2695	scan = regnext(scan);
	2696	}
	2697
	2698	finish:
	2699	*scanp = scan;
	2700	*deltap = is_inf_internal ? I32_MAX : delta;
	2701	if (flags & SCF_DO_SUBSTR && is_inf)
	2702	data->pos_delta = I32_MAX - data->pos_min;
	2703	if (is_par > U8_MAX)
	2704	is_par = 0;
	2705	if (is_par && pars==1 && data) {
	2706	data->flags \|= SF_IN_PAR;
	2707	data->flags &= ~SF_HAS_PAR;
	2708	}
	2709	else if (pars && data) {
	2710	data->flags \|= SF_HAS_PAR;
	2711	data->flags &= ~SF_IN_PAR;
	2712	}
	2713	if (flags & SCF_DO_STCLASS_OR)
	2714	cl_and(data->start_class, &and_with);
	2715	return min;
	2716	}
	2717
	2718	STATIC I32
	2719	S_add_data(RExC_state_t pRExC_state, I32 n, const char s)
	2720	{
	2721	if (RExC_rx->data) {
	2722	Renewc(RExC_rx->data,
	2723	sizeof(RExC_rx->data) + sizeof(void) * (RExC_rx->data->count + n - 1),
	2724	char, struct reg_data);
	2725	Renew(RExC_rx->data->what, RExC_rx->data->count + n, U8);
	2726	RExC_rx->data->count += n;
	2727	}
	2728	else {
	2729	Newxc(RExC_rx->data, sizeof(RExC_rx->data) + sizeof(void) * (n - 1),
	2730	char, struct reg_data);
	2731	Newx(RExC_rx->data->what, n, U8);
	2732	RExC_rx->data->count = n;
	2733	}
	2734	Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8);
	2735	return RExC_rx->data->count - n;
	2736	}
	2737
	2738	#ifndef PERL_IN_XSUB_RE
	2739	void
	2740	Perl_reginitcolors(pTHX)
	2741	{
	2742	dVAR;
	2743	const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
	2744	if (s) {
	2745	char *t = savepv(s);
	2746	int i = 0;
	2747	PL_colors[0] = t;
	2748	while (++i < 6) {
	2749	t = strchr(t, '\t');
	2750	if (t) {
	2751	*t = '\0';
	2752	PL_colors[i] = ++t;
	2753	}
	2754	else
	2755	PL_colors[i] = t = (char *)"";
	2756	}
	2757	} else {
	2758	int i = 0;
	2759	while (i < 6)
	2760	PL_colors[i++] = (char *)"";
	2761	}
	2762	PL_colorset = 1;
	2763	}
	2764	#endif
	2765
	2766	/*
	2767	- pregcomp - compile a regular expression into internal code
	2768	*
	2769	* We can't allocate space until we know how big the compiled form will be,
	2770	* but we can't compile it (and thus know how big it is) until we've got a
	2771	* place to put the code. So we cheat: we compile it twice, once with code
	2772	* generation turned off and size counting turned on, and once "for real".
	2773	* This also means that we don't allocate space until we are sure that the
	2774	* thing really will compile successfully, and we never have to move the
	2775	* code and thus invalidate pointers into it. (Note that it has to be in
	2776	* one piece because free() must be able to free it all.) [NB: not true in perl]
	2777	*
	2778	* Beware that the optimization-preparation code in here knows about some
	2779	* of the structure of the compiled regexp. [I'll say.]
	2780	*/
	2781	regexp *
	2782	Perl_pregcomp(pTHX_ char exp, char xend, PMOP *pm)
	2783	{
	2784	dVAR;
	2785	register regexp *r;
	2786	regnode *scan;
	2787	regnode *first;
	2788	I32 flags;
	2789	I32 minlen = 0;
	2790	I32 sawplus = 0;
	2791	I32 sawopen = 0;
	2792	scan_data_t data;
	2793	RExC_state_t RExC_state;
	2794	RExC_state_t *pRExC_state = &RExC_state;
	2795
	2796	GET_RE_DEBUG_FLAGS_DECL;
	2797
	2798	if (exp == NULL)
	2799	FAIL("NULL regexp argument");
	2800
	2801	RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
	2802
	2803	RExC_precomp = exp;
	2804	DEBUG_r(if (!PL_colorset) reginitcolors());
	2805	DEBUG_COMPILE_r({
	2806	PerlIO_printf(Perl_debug_log, "%sCompiling REx%s \"%s%*s%s\"\n",
	2807	PL_colors[4],PL_colors[5],PL_colors[0],
	2808	(int)(xend - exp), RExC_precomp, PL_colors[1]);
	2809	});
	2810	RExC_flags = pm->op_pmflags;
	2811	RExC_sawback = 0;
	2812
	2813	RExC_seen = 0;
	2814	RExC_seen_zerolen = *exp == '^' ? -1 : 0;
	2815	RExC_seen_evals = 0;
	2816	RExC_extralen = 0;
	2817
	2818	/* First pass: determine size, legality. */
	2819	RExC_parse = exp;
	2820	RExC_start = exp;
	2821	RExC_end = xend;
	2822	RExC_naughty = 0;
	2823	RExC_npar = 1;
	2824	RExC_size = 0L;
	2825	RExC_emit = &PL_regdummy;
	2826	RExC_whilem_seen = 0;
	2827	#if 0 /* REGC() is (currently) a NOP at the first pass.
	2828	* Clever compilers notice this and complain. --jhi */
	2829	REGC((U8)REG_MAGIC, (char*)RExC_emit);
	2830	#endif
	2831	if (reg(pRExC_state, 0, &flags) == NULL) {
	2832	RExC_precomp = NULL;
	2833	return(NULL);
	2834	}
	2835	DEBUG_COMPILE_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
	2836
	2837	/* Small enough for pointer-storage convention?
	2838	If extralen==0, this means that we will not need long jumps. */
	2839	if (RExC_size >= 0x10000L && RExC_extralen)
	2840	RExC_size += RExC_extralen;
	2841	else
	2842	RExC_extralen = 0;
	2843	if (RExC_whilem_seen > 15)
	2844	RExC_whilem_seen = 15;
	2845
	2846	/* Allocate space and initialize. */
	2847	Newxc(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode),
	2848	char, regexp);
	2849	if (r == NULL)
	2850	FAIL("Regexp out of space");
	2851
	2852	#ifdef DEBUGGING
	2853	/* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
	2854	Zero(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), char);
	2855	#endif
	2856	r->refcnt = 1;
	2857	r->prelen = xend - exp;
	2858	r->precomp = savepvn(RExC_precomp, r->prelen);
	2859	r->subbeg = NULL;
	2860	#ifdef PERL_OLD_COPY_ON_WRITE
	2861	r->saved_copy = NULL;
	2862	#endif
	2863	r->reganch = pm->op_pmflags & PMf_COMPILETIME;
	2864	r->nparens = RExC_npar - 1; /* set early to validate backrefs */
	2865	r->lastparen = 0; /* mg.c reads this. */
	2866
	2867	r->substrs = 0; /* Useful during FAIL. */
	2868	r->startp = 0; /* Useful during FAIL. */
	2869	r->endp = 0; /* Useful during FAIL. */
	2870
	2871	Newxz(r->offsets, 2RExC_size+1, U32); / MJD 20001228 */
	2872	if (r->offsets) {
	2873	r->offsets[0] = RExC_size;
	2874	}
	2875	DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log,
	2876	"%s %"UVuf" bytes for offset annotations.\n",
	2877	r->offsets ? "Got" : "Couldn't get",
	2878	(UV)((2RExC_size+1) sizeof(U32))));
	2879
	2880	RExC_rx = r;
	2881
	2882	/* Second pass: emit code. */
	2883	RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */
	2884	RExC_parse = exp;
	2885	RExC_end = xend;
	2886	RExC_naughty = 0;
	2887	RExC_npar = 1;
	2888	RExC_emit_start = r->program;
	2889	RExC_emit = r->program;
	2890	/* Store the count of eval-groups for security checks: */
	2891	RExC_emit->next_off = (U16)((RExC_seen_evals > U16_MAX) ? U16_MAX : RExC_seen_evals);
	2892	REGC((U8)REG_MAGIC, (char*) RExC_emit++);
	2893	r->data = 0;
	2894	if (reg(pRExC_state, 0, &flags) == NULL)
	2895	return(NULL);
	2896
	2897
	2898	/* Dig out information for optimizations. */
	2899	r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
	2900	pm->op_pmflags = RExC_flags;
	2901	if (UTF)
	2902	r->reganch \|= ROPT_UTF8; /* Unicode in it? */
	2903	r->regstclass = NULL;
	2904	if (RExC_naughty >= 10) /* Probably an expensive pattern. */
	2905	r->reganch \|= ROPT_NAUGHTY;
	2906	scan = r->program + 1; /* First BRANCH. */
	2907
	2908	/* XXXX To minimize changes to RE engine we always allocate
	2909	3-units-long substrs field. */
	2910	Newxz(r->substrs, 1, struct reg_substr_data);
	2911
	2912	StructCopy(&zero_scan_data, &data, scan_data_t);
	2913	/* XXXX Should not we check for something else? Usually it is OPEN1... */
	2914	if (OP(scan) != BRANCH) { /* Only one top-level choice. */
	2915	I32 fake;
	2916	STRLEN longest_float_length, longest_fixed_length;
	2917	struct regnode_charclass_class ch_class;
	2918	int stclass_flag;
	2919	I32 last_close = 0;
	2920
	2921	first = scan;
	2922	/* Skip introductions and multiplicators >= 1. */
	2923	while ((OP(first) == OPEN && (sawopen = 1)) \|\|
	2924	/* An OR of one alternative - should not happen now. */
	2925	(OP(first) == BRANCH && OP(regnext(first)) != BRANCH) \|\|
	2926	(OP(first) == PLUS) \|\|
	2927	(OP(first) == MINMOD) \|\|
	2928	/* An {n,m} with n>0 */
	2929	(PL_regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) {
	2930	if (OP(first) == PLUS)
	2931	sawplus = 1;
	2932	else
	2933	first += regarglen[(U8)OP(first)];
	2934	first = NEXTOPER(first);
	2935	}
	2936
	2937	/* Starting-point info. */
	2938	again:
	2939	if (PL_regkind[(U8)OP(first)] == EXACT) {
	2940	if (OP(first) == EXACT)
	2941	/EMPTY/; /* Empty, get anchored substr later. */
	2942	else if ((OP(first) == EXACTF \|\| OP(first) == EXACTFL))
	2943	r->regstclass = first;
	2944	}
	2945	else if (strchr((const char*)PL_simple,OP(first)))
	2946	r->regstclass = first;
	2947	else if (PL_regkind[(U8)OP(first)] == BOUND \|\|
	2948	PL_regkind[(U8)OP(first)] == NBOUND)
	2949	r->regstclass = first;
	2950	else if (PL_regkind[(U8)OP(first)] == BOL) {
	2951	r->reganch \|= (OP(first) == MBOL
	2952	? ROPT_ANCH_MBOL
	2953	: (OP(first) == SBOL
	2954	? ROPT_ANCH_SBOL
	2955	: ROPT_ANCH_BOL));
	2956	first = NEXTOPER(first);
	2957	goto again;
	2958	}
	2959	else if (OP(first) == GPOS) {
	2960	r->reganch \|= ROPT_ANCH_GPOS;
	2961	first = NEXTOPER(first);
	2962	goto again;
	2963	}
	2964	else if (!sawopen && (OP(first) == STAR &&
	2965	PL_regkind[(U8)OP(NEXTOPER(first))] == REG_ANY) &&
	2966	!(r->reganch & ROPT_ANCH) )
	2967	{
	2968	/* turn .* into ^.* with an implied $=1 /
	2969	const int type =
	2970	(OP(NEXTOPER(first)) == REG_ANY)
	2971	? ROPT_ANCH_MBOL
	2972	: ROPT_ANCH_SBOL;
	2973	r->reganch \|= type \| ROPT_IMPLICIT;
	2974	first = NEXTOPER(first);
	2975	goto again;
	2976	}
	2977	if (sawplus && (!sawopen \|\| !RExC_sawback)
	2978	&& !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
	2979	/* x+ must match at the 1st pos of run of x's */
	2980	r->reganch \|= ROPT_SKIP;
	2981
	2982	/* Scan is after the zeroth branch, first is atomic matcher. */
	2983	DEBUG_COMPILE_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
	2984	(IV)(first - scan + 1)));
	2985	/*
	2986	* If there's something expensive in the r.e., find the
	2987	* longest literal string that must appear and make it the
	2988	* regmust. Resolve ties in favor of later strings, since
	2989	* the regstart check works with the beginning of the r.e.
	2990	* and avoiding duplication strengthens checking. Not a
	2991	* strong reason, but sufficient in the absence of others.
	2992	* [Now we resolve ties in favor of the earlier string if
	2993	* it happens that c_offset_min has been invalidated, since the
	2994	* earlier string may buy us something the later one won't.]
	2995	*/
	2996	minlen = 0;
	2997
	2998	data.longest_fixed = newSVpvs("");
	2999	data.longest_float = newSVpvs("");
	3000	data.last_found = newSVpvs("");
	3001	data.longest = &(data.longest_fixed);
	3002	first = scan;
	3003	if (!r->regstclass) {
	3004	cl_init(pRExC_state, &ch_class);
	3005	data.start_class = &ch_class;
	3006	stclass_flag = SCF_DO_STCLASS_AND;
	3007	} else /* XXXX Check for BOUND? */
	3008	stclass_flag = 0;
	3009	data.last_closep = &last_close;
	3010
	3011	minlen = study_chunk(pRExC_state, &first, &fake, scan + RExC_size, /* Up to end */
	3012	&data, SCF_DO_SUBSTR \| SCF_WHILEM_VISITED_POS \| stclass_flag,0);
	3013	if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
	3014	&& data.last_start_min == 0 && data.last_end > 0
	3015	&& !RExC_seen_zerolen
	3016	&& (!(RExC_seen & REG_SEEN_GPOS) \|\| (r->reganch & ROPT_ANCH_GPOS)))
	3017	r->reganch \|= ROPT_CHECK_ALL;
	3018	scan_commit(pRExC_state, &data);
	3019	SvREFCNT_dec(data.last_found);
	3020
	3021	longest_float_length = CHR_SVLEN(data.longest_float);
	3022	if (longest_float_length
	3023	\|\| (data.flags & SF_FL_BEFORE_EOL
	3024	&& (!(data.flags & SF_FL_BEFORE_MEOL)
	3025	\|\| (RExC_flags & PMf_MULTILINE)))) {
	3026	int t;
	3027
	3028	if (SvCUR(data.longest_fixed) /* ok to leave SvCUR */
	3029	&& data.offset_fixed == data.offset_float_min
	3030	&& SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
	3031	goto remove_float; /* As in (a)+. */
	3032
	3033	if (SvUTF8(data.longest_float)) {
	3034	r->float_utf8 = data.longest_float;
	3035	r->float_substr = NULL;
	3036	} else {
	3037	r->float_substr = data.longest_float;
	3038	r->float_utf8 = NULL;
	3039	}
	3040	r->float_min_offset = data.offset_float_min;
	3041	r->float_max_offset = data.offset_float_max;
	3042	t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
	3043	&& (!(data.flags & SF_FL_BEFORE_MEOL)
	3044	\|\| (RExC_flags & PMf_MULTILINE)));
	3045	fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
	3046	}
	3047	else {
	3048	remove_float:
	3049	r->float_substr = r->float_utf8 = NULL;
	3050	SvREFCNT_dec(data.longest_float);
	3051	longest_float_length = 0;
	3052	}
	3053
	3054	longest_fixed_length = CHR_SVLEN(data.longest_fixed);
	3055	if (longest_fixed_length
	3056	\|\| (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
	3057	&& (!(data.flags & SF_FIX_BEFORE_MEOL)
	3058	\|\| (RExC_flags & PMf_MULTILINE)))) {
	3059	int t;
	3060
	3061	if (SvUTF8(data.longest_fixed)) {
	3062	r->anchored_utf8 = data.longest_fixed;
	3063	r->anchored_substr = NULL;
	3064	} else {
	3065	r->anchored_substr = data.longest_fixed;
	3066	r->anchored_utf8 = NULL;
	3067	}
	3068	r->anchored_offset = data.offset_fixed;
	3069	t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
	3070	&& (!(data.flags & SF_FIX_BEFORE_MEOL)
	3071	\|\| (RExC_flags & PMf_MULTILINE)));
	3072	fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
	3073	}
	3074	else {
	3075	r->anchored_substr = r->anchored_utf8 = NULL;
	3076	SvREFCNT_dec(data.longest_fixed);
	3077	longest_fixed_length = 0;
	3078	}
	3079	if (r->regstclass
	3080	&& (OP(r->regstclass) == REG_ANY \|\| OP(r->regstclass) == SANY))
	3081	r->regstclass = NULL;
	3082	if ((!(r->anchored_substr \|\| r->anchored_utf8) \|\| r->anchored_offset)
	3083	&& stclass_flag
	3084	&& !(data.start_class->flags & ANYOF_EOS)
	3085	&& !cl_is_anything(data.start_class))
	3086	{
	3087	const I32 n = add_data(pRExC_state, 1, "f");
	3088
	3089	Newx(RExC_rx->data->data[n], 1,
	3090	struct regnode_charclass_class);
	3091	StructCopy(data.start_class,
	3092	(struct regnode_charclass_class*)RExC_rx->data->data[n],
	3093	struct regnode_charclass_class);
	3094	r->regstclass = (regnode*)RExC_rx->data->data[n];
	3095	r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */
	3096	DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
	3097	regprop(r, sv, (regnode*)data.start_class);
	3098	PerlIO_printf(Perl_debug_log,
	3099	"synthetic stclass \"%s\".\n",
	3100	SvPVX_const(sv));});
	3101	}
	3102
	3103	/* A temporary algorithm prefers floated substr to fixed one to dig more info. */
	3104	if (longest_fixed_length > longest_float_length) {
	3105	r->check_substr = r->anchored_substr;
	3106	r->check_utf8 = r->anchored_utf8;
	3107	r->check_offset_min = r->check_offset_max = r->anchored_offset;
	3108	if (r->reganch & ROPT_ANCH_SINGLE)
	3109	r->reganch \|= ROPT_NOSCAN;
	3110	}
	3111	else {
	3112	r->check_substr = r->float_substr;
	3113	r->check_utf8 = r->float_utf8;
	3114	r->check_offset_min = data.offset_float_min;
	3115	r->check_offset_max = data.offset_float_max;
	3116	}
	3117	/* XXXX Currently intuiting is not compatible with ANCH_GPOS.
	3118	This should be changed ASAP! */
	3119	if ((r->check_substr \|\| r->check_utf8) && !(r->reganch & ROPT_ANCH_GPOS)) {
	3120	r->reganch \|= RE_USE_INTUIT;
	3121	if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
	3122	r->reganch \|= RE_INTUIT_TAIL;
	3123	}
	3124	}
	3125	else {
	3126	/* Several toplevels. Best we can is to set minlen. */
	3127	I32 fake;
	3128	struct regnode_charclass_class ch_class;
	3129	I32 last_close = 0;
	3130
	3131	DEBUG_COMPILE_r(PerlIO_printf(Perl_debug_log, "\n"));
	3132	scan = r->program + 1;
	3133	cl_init(pRExC_state, &ch_class);
	3134	data.start_class = &ch_class;
	3135	data.last_closep = &last_close;
	3136	minlen = study_chunk(pRExC_state, &scan, &fake, scan + RExC_size, &data, SCF_DO_STCLASS_AND\|SCF_WHILEM_VISITED_POS,0);
	3137	r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
	3138	= r->float_substr = r->float_utf8 = NULL;
	3139	if (!(data.start_class->flags & ANYOF_EOS)
	3140	&& !cl_is_anything(data.start_class))
	3141	{
	3142	const I32 n = add_data(pRExC_state, 1, "f");
	3143
	3144	Newx(RExC_rx->data->data[n], 1,
	3145	struct regnode_charclass_class);
	3146	StructCopy(data.start_class,
	3147	(struct regnode_charclass_class*)RExC_rx->data->data[n],
	3148	struct regnode_charclass_class);
	3149	r->regstclass = (regnode*)RExC_rx->data->data[n];
	3150	r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */
	3151	DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
	3152	regprop(r, sv, (regnode*)data.start_class);
	3153	PerlIO_printf(Perl_debug_log,
	3154	"synthetic stclass \"%s\".\n",
	3155	SvPVX_const(sv));});
	3156	}
	3157	}
	3158
	3159	r->minlen = minlen;
	3160	if (RExC_seen & REG_SEEN_GPOS)
	3161	r->reganch \|= ROPT_GPOS_SEEN;
	3162	if (RExC_seen & REG_SEEN_LOOKBEHIND)
	3163	r->reganch \|= ROPT_LOOKBEHIND_SEEN;
	3164	if (RExC_seen & REG_SEEN_EVAL)
	3165	r->reganch \|= ROPT_EVAL_SEEN;
	3166	if (RExC_seen & REG_SEEN_CANY)
	3167	r->reganch \|= ROPT_CANY_SEEN;
	3168	Newxz(r->startp, RExC_npar, I32);
	3169	Newxz(r->endp, RExC_npar, I32);
	3170	DEBUG_COMPILE_r(regdump(r));
	3171	return(r);
	3172	}
	3173
	3174	/*
	3175	- reg - regular expression, i.e. main body or parenthesized thing
	3176	*
	3177	* Caller must absorb opening parenthesis.
	3178	*
	3179	* Combining parenthesis handling with the base level of regular expression
	3180	* is a trifle forced, but the need to tie the tails of the branches to what
	3181	* follows makes it hard to avoid.
	3182	*/
	3183	STATIC regnode *
	3184	S_reg(pTHX_ RExC_state_t pRExC_state, I32 paren, I32 flagp)
	3185	/* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
	3186	{
	3187	dVAR;
	3188	register regnode ret; / Will be the head of the group. */
	3189	register regnode *br;
	3190	register regnode *lastbr;
	3191	register regnode *ender = NULL;
	3192	register I32 parno = 0;
	3193	I32 flags;
	3194	const I32 oregflags = RExC_flags;
	3195	bool have_branch = 0;
	3196	bool is_open = 0;
	3197
	3198	/* for (?g), (?gc), and (?o) warnings; warning
	3199	about (?c) will warn about (?g) -- japhy */
	3200
	3201	#define WASTED_O 0x01
	3202	#define WASTED_G 0x02
	3203	#define WASTED_C 0x04
	3204	#define WASTED_GC (0x02\|0x04)
	3205	I32 wastedflags = 0x00;
	3206
	3207	char * parse_start = RExC_parse; /* MJD */
	3208	char * const oregcomp_parse = RExC_parse;
	3209
	3210	flagp = 0; / Tentatively. */
	3211
	3212
	3213	/* Make an OPEN node, if parenthesized. */
	3214	if (paren) {
	3215	if (RExC_parse == '?') { / (?...) */
	3216	U32 posflags = 0, negflags = 0;
	3217	U32 *flagsp = &posflags;
	3218	bool is_logical = 0;
	3219	const char * const seqstart = RExC_parse;
	3220
	3221	RExC_parse++;
	3222	paren = *RExC_parse++;
	3223	ret = NULL; /* For look-ahead/behind. */
	3224	switch (paren) {
	3225	case '<': /* (?<...) */
	3226	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	3227	if (*RExC_parse == '!')
	3228	paren = ',';
	3229	if (RExC_parse != '=' && RExC_parse != '!')
	3230	goto unknown;
	3231	RExC_parse++;
	3232	case '=': /* (?=...) */
	3233	case '!': /* (?!...) */
	3234	RExC_seen_zerolen++;
	3235	case ':': /* (?:...) */
	3236	case '>': /* (?>...) */
	3237	break;
	3238	case '$': /* (?$...) */
	3239	case '@': /* (?@...) */
	3240	vFAIL2("Sequence (?%c...) not implemented", (int)paren);
	3241	break;
	3242	case '#': /* (?#...) */
	3243	while (RExC_parse && RExC_parse != ')')
	3244	RExC_parse++;
	3245	if (*RExC_parse != ')')
	3246	FAIL("Sequence (?#... not terminated");
	3247	nextchar(pRExC_state);
	3248	*flagp = TRYAGAIN;
	3249	return NULL;
	3250	case 'p': /* (?p...) */
	3251	if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
	3252	vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
	3253	/* FALL THROUGH*/
	3254	case '?': /* (??...) */
	3255	is_logical = 1;
	3256	if (*RExC_parse != '{')
	3257	goto unknown;
	3258	paren = *RExC_parse++;
	3259	/* FALL THROUGH */
	3260	case '{': /* (?{...}) */
	3261	{
	3262	I32 count = 1, n = 0;
	3263	char c;
	3264	char *s = RExC_parse;
	3265
	3266	RExC_seen_zerolen++;
	3267	RExC_seen \|= REG_SEEN_EVAL;
	3268	while (count && (c = *RExC_parse)) {
	3269	if (c == '\\') {
	3270	if (RExC_parse[1])
	3271	RExC_parse++;
	3272	}
	3273	else if (c == '{')
	3274	count++;
	3275	else if (c == '}')
	3276	count--;
	3277	RExC_parse++;
	3278	}
	3279	if (*RExC_parse != ')') {
	3280	RExC_parse = s;
	3281	vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
	3282	}
	3283	if (!SIZE_ONLY) {
	3284	PAD *pad;
	3285	OP_4tree sop, rop;
	3286	SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
	3287
	3288	ENTER;
	3289	Perl_save_re_context(aTHX);
	3290	rop = sv_compile_2op(sv, &sop, "re", &pad);
	3291	sop->op_private \|= OPpREFCOUNTED;
	3292	/* re_dup will OpREFCNT_inc */
	3293	OpREFCNT_set(sop, 1);
	3294	LEAVE;
	3295
	3296	n = add_data(pRExC_state, 3, "nop");
	3297	RExC_rx->data->data[n] = (void*)rop;
	3298	RExC_rx->data->data[n+1] = (void*)sop;
	3299	RExC_rx->data->data[n+2] = (void*)pad;
	3300	SvREFCNT_dec(sv);
	3301	}
	3302	else { /* First pass */
	3303	if (PL_reginterp_cnt < ++RExC_seen_evals
	3304	&& IN_PERL_RUNTIME)
	3305	/* No compiled RE interpolated, has runtime
	3306	components ===> unsafe. */
	3307	FAIL("Eval-group not allowed at runtime, use re 'eval'");
	3308	if (PL_tainting && PL_tainted)
	3309	FAIL("Eval-group in insecure regular expression");
	3310	#if PERL_VERSION > 8
	3311	if (IN_PERL_COMPILETIME)
	3312	PL_cv_has_eval = 1;
	3313	#endif
	3314	}
	3315
	3316	nextchar(pRExC_state);
	3317	if (is_logical) {
	3318	ret = reg_node(pRExC_state, LOGICAL);
	3319	if (!SIZE_ONLY)
	3320	ret->flags = 2;
	3321	regtail(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
	3322	/* deal with the length of this later - MJD */
	3323	return ret;
	3324	}
	3325	ret = reganode(pRExC_state, EVAL, n);
	3326	Set_Node_Length(ret, RExC_parse - parse_start + 1);
	3327	Set_Node_Offset(ret, parse_start);
	3328	return ret;
	3329	}
	3330	case '(': /* (?(?{...})...) and (?(?=...)...) */
	3331	{
	3332	if (RExC_parse[0] == '?') { /* (?(?...)) */
	3333	if (RExC_parse[1] == '=' \|\| RExC_parse[1] == '!'
	3334	\|\| RExC_parse[1] == '<'
	3335	\|\| RExC_parse[1] == '{') { /* Lookahead or eval. */
	3336	I32 flag;
	3337
	3338	ret = reg_node(pRExC_state, LOGICAL);
	3339	if (!SIZE_ONLY)
	3340	ret->flags = 1;
	3341	regtail(pRExC_state, ret, reg(pRExC_state, 1, &flag));
	3342	goto insert_if;
	3343	}
	3344	}
	3345	else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
	3346	/* (?(1)...) */
	3347	char c;
	3348	parno = atoi(RExC_parse++);
	3349
	3350	while (isDIGIT(*RExC_parse))
	3351	RExC_parse++;
	3352	ret = reganode(pRExC_state, GROUPP, parno);
	3353
	3354	if ((c = *nextchar(pRExC_state)) != ')')
	3355	vFAIL("Switch condition not recognized");
	3356	insert_if:
	3357	regtail(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
	3358	br = regbranch(pRExC_state, &flags, 1);
	3359	if (br == NULL)
	3360	br = reganode(pRExC_state, LONGJMP, 0);
	3361	else
	3362	regtail(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0));
	3363	c = *nextchar(pRExC_state);
	3364	if (flags&HASWIDTH)
	3365	*flagp \|= HASWIDTH;
	3366	if (c == '\|') {
	3367	lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one for optimizer. */
	3368	regbranch(pRExC_state, &flags, 1);
	3369	regtail(pRExC_state, ret, lastbr);
	3370	if (flags&HASWIDTH)
	3371	*flagp \|= HASWIDTH;
	3372	c = *nextchar(pRExC_state);
	3373	}
	3374	else
	3375	lastbr = NULL;
	3376	if (c != ')')
	3377	vFAIL("Switch (?(condition)... contains too many branches");
	3378	ender = reg_node(pRExC_state, TAIL);
	3379	regtail(pRExC_state, br, ender);
	3380	if (lastbr) {
	3381	regtail(pRExC_state, lastbr, ender);
	3382	regtail(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender);
	3383	}
	3384	else
	3385	regtail(pRExC_state, ret, ender);
	3386	return ret;
	3387	}
	3388	else {
	3389	vFAIL2("Unknown switch condition (?(%.2s", RExC_parse);
	3390	}
	3391	}
	3392	case 0:
	3393	RExC_parse--; /* for vFAIL to print correctly */
	3394	vFAIL("Sequence (? incomplete");
	3395	break;
	3396	default:
	3397	--RExC_parse;
	3398	parse_flags: /* (?i) */
	3399	while (RExC_parse && strchr("iogcmsx", RExC_parse)) {
	3400	/* (?g), (?gc) and (?o) are useless here
	3401	and must be globally applied -- japhy */
	3402
	3403	if (RExC_parse == 'o' \|\| RExC_parse == 'g') {
	3404	if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	3405	const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
	3406	if (! (wastedflags & wflagbit) ) {
	3407	wastedflags \|= wflagbit;
	3408	vWARN5(
	3409	RExC_parse + 1,
	3410	"Useless (%s%c) - %suse /%c modifier",
	3411	flagsp == &negflags ? "?-" : "?",
	3412	*RExC_parse,
	3413	flagsp == &negflags ? "don't " : "",
	3414	*RExC_parse
	3415	);
	3416	}
	3417	}
	3418	}
	3419	else if (*RExC_parse == 'c') {
	3420	if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
	3421	if (! (wastedflags & WASTED_C) ) {
	3422	wastedflags \|= WASTED_GC;
	3423	vWARN3(
	3424	RExC_parse + 1,
	3425	"Useless (%sc) - %suse /gc modifier",
	3426	flagsp == &negflags ? "?-" : "?",
	3427	flagsp == &negflags ? "don't " : ""
	3428	);
	3429	}
	3430	}
	3431	}
	3432	else { pmflag(flagsp, *RExC_parse); }
	3433
	3434	++RExC_parse;
	3435	}
	3436	if (*RExC_parse == '-') {
	3437	flagsp = &negflags;
	3438	wastedflags = 0; /* reset so (?g-c) warns twice */
	3439	++RExC_parse;
	3440	goto parse_flags;
	3441	}
	3442	RExC_flags \|= posflags;
	3443	RExC_flags &= ~negflags;
	3444	if (*RExC_parse == ':') {
	3445	RExC_parse++;
	3446	paren = ':';
	3447	break;
	3448	}
	3449	unknown:
	3450	if (*RExC_parse != ')') {
	3451	RExC_parse++;
	3452	vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
	3453	}
	3454	nextchar(pRExC_state);
	3455	*flagp = TRYAGAIN;
	3456	return NULL;
	3457	}
	3458	}
	3459	else { /* (...) */
	3460	parno = RExC_npar;
	3461	RExC_npar++;
	3462	ret = reganode(pRExC_state, OPEN, parno);
	3463	Set_Node_Length(ret, 1); /* MJD */
	3464	Set_Node_Offset(ret, RExC_parse); /* MJD */
	3465	is_open = 1;
	3466	}
	3467	}
	3468	else /* ! paren */
	3469	ret = NULL;
	3470
	3471	/* Pick up the branches, linking them together. */
	3472	parse_start = RExC_parse; /* MJD */
	3473	br = regbranch(pRExC_state, &flags, 1);
	3474	/* branch_len = (paren != 0); */
	3475
	3476	if (br == NULL)
	3477	return(NULL);
	3478	if (*RExC_parse == '\|') {
	3479	if (!SIZE_ONLY && RExC_extralen) {
	3480	reginsert(pRExC_state, BRANCHJ, br);
	3481	}
	3482	else { /* MJD */
	3483	reginsert(pRExC_state, BRANCH, br);
	3484	Set_Node_Length(br, paren != 0);
	3485	Set_Node_Offset_To_R(br-RExC_emit_start, parse_start-RExC_start);
	3486	}
	3487	have_branch = 1;
	3488	if (SIZE_ONLY)
	3489	RExC_extralen += 1; /* For BRANCHJ-BRANCH. */
	3490	}
	3491	else if (paren == ':') {
	3492	*flagp \|= flags&SIMPLE;
	3493	}
	3494	if (is_open) { /* Starts with OPEN. */
	3495	regtail(pRExC_state, ret, br); /* OPEN -> first. */
	3496	}
	3497	else if (paren != '?') /* Not Conditional */
	3498	ret = br;
	3499	*flagp \|= flags & (SPSTART \| HASWIDTH);
	3500	lastbr = br;
	3501	while (*RExC_parse == '\|') {
	3502	if (!SIZE_ONLY && RExC_extralen) {
	3503	ender = reganode(pRExC_state, LONGJMP,0);
	3504	regtail(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender); /* Append to the previous. */
	3505	}
	3506	if (SIZE_ONLY)
	3507	RExC_extralen += 2; /* Account for LONGJMP. */
	3508	nextchar(pRExC_state);
	3509	br = regbranch(pRExC_state, &flags, 0);
	3510
	3511	if (br == NULL)
	3512	return(NULL);
	3513	regtail(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */
	3514	lastbr = br;
	3515	if (flags&HASWIDTH)
	3516	*flagp \|= HASWIDTH;
	3517	*flagp \|= flags&SPSTART;
	3518	}
	3519
	3520	if (have_branch \|\| paren != ':') {
	3521	/* Make a closing node, and hook it on the end. */
	3522	switch (paren) {
	3523	case ':':
	3524	ender = reg_node(pRExC_state, TAIL);
	3525	break;
	3526	case 1:
	3527	ender = reganode(pRExC_state, CLOSE, parno);
	3528	Set_Node_Offset(ender,RExC_parse+1); /* MJD */
	3529	Set_Node_Length(ender,1); /* MJD */
	3530	break;
	3531	case '<':
	3532	case ',':
	3533	case '=':
	3534	case '!':
	3535	*flagp &= ~HASWIDTH;
	3536	/* FALL THROUGH */
	3537	case '>':
	3538	ender = reg_node(pRExC_state, SUCCEED);
	3539	break;
	3540	case 0:
	3541	ender = reg_node(pRExC_state, END);
	3542	break;
	3543	}
	3544	regtail(pRExC_state, lastbr, ender);
	3545
	3546	if (have_branch) {
	3547	/* Hook the tails of the branches to the closing node. */
	3548	for (br = ret; br != NULL; br = regnext(br)) {
	3549	regoptail(pRExC_state, br, ender);
	3550	}
	3551	}
	3552	}
	3553
	3554	{
	3555	const char *p;
	3556	static const char parens[] = "=!<,>";
	3557
	3558	if (paren && (p = strchr(parens, paren))) {
	3559	U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
	3560	int flag = (p - parens) > 1;
	3561
	3562	if (paren == '>')
	3563	node = SUSPEND, flag = 0;
	3564	reginsert(pRExC_state, node,ret);
	3565	Set_Node_Cur_Length(ret);
	3566	Set_Node_Offset(ret, parse_start + 1);
	3567	ret->flags = flag;
	3568	regtail(pRExC_state, ret, reg_node(pRExC_state, TAIL));
	3569	}
	3570	}
	3571
	3572	/* Check for proper termination. */
	3573	if (paren) {
	3574	RExC_flags = oregflags;
	3575	if (RExC_parse >= RExC_end \|\| *nextchar(pRExC_state) != ')') {
	3576	RExC_parse = oregcomp_parse;
	3577	vFAIL("Unmatched (");
	3578	}
	3579	}
	3580	else if (!paren && RExC_parse < RExC_end) {
	3581	if (*RExC_parse == ')') {
	3582	RExC_parse++;
	3583	vFAIL("Unmatched )");
	3584	}
	3585	else
	3586	FAIL("Junk on end of regexp"); /* "Can't happen". */
	3587	/* NOTREACHED */
	3588	}
	3589
	3590	return(ret);
	3591	}
	3592
	3593	/*
	3594	- regbranch - one alternative of an \| operator
	3595	*
	3596	* Implements the concatenation operator.
	3597	*/
	3598	STATIC regnode *
	3599	S_regbranch(pTHX_ RExC_state_t pRExC_state, I32 flagp, I32 first)
	3600	{
	3601	dVAR;
	3602	register regnode *ret;
	3603	register regnode *chain = NULL;
	3604	register regnode *latest;
	3605	I32 flags = 0, c = 0;
	3606
	3607	if (first)
	3608	ret = NULL;
	3609	else {
	3610	if (!SIZE_ONLY && RExC_extralen)
	3611	ret = reganode(pRExC_state, BRANCHJ,0);
	3612	else {
	3613	ret = reg_node(pRExC_state, BRANCH);
	3614	Set_Node_Length(ret, 1);
	3615	}
	3616	}
	3617
	3618	if (!first && SIZE_ONLY)
	3619	RExC_extralen += 1; /* BRANCHJ */
	3620
	3621	flagp = WORST; / Tentatively. */
	3622
	3623	RExC_parse--;
	3624	nextchar(pRExC_state);
	3625	while (RExC_parse < RExC_end && RExC_parse != '\|' && RExC_parse != ')') {
	3626	flags &= ~TRYAGAIN;
	3627	latest = regpiece(pRExC_state, &flags);
	3628	if (latest == NULL) {
	3629	if (flags & TRYAGAIN)
	3630	continue;
	3631	return(NULL);
	3632	}
	3633	else if (ret == NULL)
	3634	ret = latest;
	3635	*flagp \|= flags&HASWIDTH;
	3636	if (chain == NULL) /* First piece. */
	3637	*flagp \|= flags&SPSTART;
	3638	else {
	3639	RExC_naughty++;
	3640	regtail(pRExC_state, chain, latest);
	3641	}
	3642	chain = latest;
	3643	c++;
	3644	}
	3645	if (chain == NULL) { /* Loop ran zero times. */
	3646	chain = reg_node(pRExC_state, NOTHING);
	3647	if (ret == NULL)
	3648	ret = chain;
	3649	}
	3650	if (c == 1) {
	3651	*flagp \|= flags&SIMPLE;
	3652	}
	3653
	3654	return ret;
	3655	}
	3656
	3657	/*
	3658	- regpiece - something followed by possible [*+?]
	3659	*
	3660	* Note that the branching code sequences used for ? and the general cases
	3661	* of * and + are somewhat optimized: they use the same NOTHING node as
	3662	* both the endmarker for their branch list and the body of the last branch.
	3663	* It might seem that this node could be dispensed with entirely, but the
	3664	* endmarker role is not redundant.
	3665	*/
	3666	STATIC regnode *
	3667	S_regpiece(pTHX_ RExC_state_t pRExC_state, I32 flagp)
	3668	{
	3669	dVAR;
	3670	register regnode *ret;
	3671	register char op;
	3672	register char *next;
	3673	I32 flags;
	3674	const char * const origparse = RExC_parse;
	3675	char *maxpos;
	3676	I32 min;
	3677	I32 max = REG_INFTY;
	3678	char *parse_start;
	3679
	3680	ret = regatom(pRExC_state, &flags);
	3681	if (ret == NULL) {
	3682	if (flags & TRYAGAIN)
	3683	*flagp \|= TRYAGAIN;
	3684	return(NULL);
	3685	}
	3686
	3687	op = *RExC_parse;
	3688
	3689	if (op == '{' && regcurly(RExC_parse)) {
	3690	parse_start = RExC_parse; /* MJD */
	3691	next = RExC_parse + 1;
	3692	maxpos = NULL;
	3693	while (isDIGIT(next) \|\| next == ',') {
	3694	if (*next == ',') {
	3695	if (maxpos)
	3696	break;
	3697	else
	3698	maxpos = next;
	3699	}
	3700	next++;
	3701	}
	3702	if (next == '}') { / got one */
	3703	if (!maxpos)
	3704	maxpos = next;
	3705	RExC_parse++;
	3706	min = atoi(RExC_parse);
	3707	if (*maxpos == ',')
	3708	maxpos++;
	3709	else
	3710	maxpos = RExC_parse;
	3711	max = atoi(maxpos);
	3712	if (!max && *maxpos != '0')
	3713	max = REG_INFTY; /* meaning "infinity" */
	3714	else if (max >= REG_INFTY)
	3715	vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
	3716	RExC_parse = next;
	3717	nextchar(pRExC_state);
	3718
	3719	do_curly:
	3720	if ((flags&SIMPLE)) {
	3721	RExC_naughty += 2 + RExC_naughty / 2;
	3722	reginsert(pRExC_state, CURLY, ret);
	3723	Set_Node_Offset(ret, parse_start+1); /* MJD */
	3724	Set_Node_Cur_Length(ret);
	3725	}
	3726	else {
	3727	regnode *w = reg_node(pRExC_state, WHILEM);
	3728
	3729	w->flags = 0;
	3730	regtail(pRExC_state, ret, w);
	3731	if (!SIZE_ONLY && RExC_extralen) {
	3732	reginsert(pRExC_state, LONGJMP,ret);
	3733	reginsert(pRExC_state, NOTHING,ret);
	3734	NEXT_OFF(ret) = 3; /* Go over LONGJMP. */
	3735	}
	3736	reginsert(pRExC_state, CURLYX,ret);
	3737	/* MJD hk */
	3738	Set_Node_Offset(ret, parse_start+1);
	3739	Set_Node_Length(ret,
	3740	op == '{' ? (RExC_parse - parse_start) : 1);
	3741
	3742	if (!SIZE_ONLY && RExC_extralen)
	3743	NEXT_OFF(ret) = 3; /* Go over NOTHING to LONGJMP. */
	3744	regtail(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
	3745	if (SIZE_ONLY)
	3746	RExC_whilem_seen++, RExC_extralen += 3;
	3747	RExC_naughty += 4 + RExC_naughty; /* compound interest */
	3748	}
	3749	ret->flags = 0;
	3750
	3751	if (min > 0)
	3752	*flagp = WORST;
	3753	if (max > 0)
	3754	*flagp \|= HASWIDTH;
	3755	if (max && max < min)
	3756	vFAIL("Can't do {n,m} with n > m");
	3757	if (!SIZE_ONLY) {
	3758	ARG1_SET(ret, (U16)min);
	3759	ARG2_SET(ret, (U16)max);
	3760	}
	3761
	3762	goto nest_check;
	3763	}
	3764	}
	3765
	3766	if (!ISMULT1(op)) {
	3767	*flagp = flags;
	3768	return(ret);
	3769	}
	3770
	3771	#if 0 /* Now runtime fix should be reliable. */
	3772
	3773	/* if this is reinstated, don't forget to put this back into perldiag:
	3774
	3775	=item Regexp *+ operand could be empty at {#} in regex m/%s/
	3776
	3777	(F) The part of the regexp subject to either the * or + quantifier
	3778	could match an empty string. The {#} shows in the regular
	3779	expression about where the problem was discovered.
	3780
	3781	*/
	3782
	3783	if (!(flags&HASWIDTH) && op != '?')
	3784	vFAIL("Regexp *+ operand could be empty");
	3785	#endif
	3786
	3787	parse_start = RExC_parse;
	3788	nextchar(pRExC_state);
	3789
	3790	*flagp = (op != '+') ? (WORST\|SPSTART\|HASWIDTH) : (WORST\|HASWIDTH);
	3791
	3792	if (op == '*' && (flags&SIMPLE)) {
	3793	reginsert(pRExC_state, STAR, ret);
	3794	ret->flags = 0;
	3795	RExC_naughty += 4;
	3796	}
	3797	else if (op == '*') {
	3798	min = 0;
	3799	goto do_curly;
	3800	}
	3801	else if (op == '+' && (flags&SIMPLE)) {
	3802	reginsert(pRExC_state, PLUS, ret);
	3803	ret->flags = 0;
	3804	RExC_naughty += 3;
	3805	}
	3806	else if (op == '+') {
	3807	min = 1;
	3808	goto do_curly;
	3809	}
	3810	else if (op == '?') {
	3811	min = 0; max = 1;
	3812	goto do_curly;
	3813	}
	3814	nest_check:
	3815	if (!SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
	3816	vWARN3(RExC_parse,
	3817	"%.*s matches null string many times",
	3818	(int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
	3819	origparse);
	3820	}
	3821
	3822	if (*RExC_parse == '?') {
	3823	nextchar(pRExC_state);
	3824	reginsert(pRExC_state, MINMOD, ret);
	3825	regtail(pRExC_state, ret, ret + NODE_STEP_REGNODE);
	3826	}
	3827	if (ISMULT2(RExC_parse)) {
	3828	RExC_parse++;
	3829	vFAIL("Nested quantifiers");
	3830	}
	3831
	3832	return(ret);
	3833	}
	3834
	3835	/*
	3836	- regatom - the lowest level
	3837	*
	3838	* Optimization: gobbles an entire sequence of ordinary characters so that
	3839	* it can turn them into a single node, which is smaller to store and
	3840	* faster to run. Backslashed characters are exceptions, each becoming a
	3841	* separate node; the code is simpler that way and it's not worth fixing.
	3842	*
	3843	* [Yes, it is worth fixing, some scripts can run twice the speed.] */
	3844	STATIC regnode *
	3845	S_regatom(pTHX_ RExC_state_t pRExC_state, I32 flagp)
	3846	{
	3847	dVAR;
	3848	register regnode *ret = NULL;
	3849	I32 flags;
	3850	char *parse_start = RExC_parse;
	3851
	3852	flagp = WORST; / Tentatively. */
	3853
	3854	tryagain:
	3855	switch (*RExC_parse) {
	3856	case '^':
	3857	RExC_seen_zerolen++;
	3858	nextchar(pRExC_state);
	3859	if (RExC_flags & PMf_MULTILINE)
	3860	ret = reg_node(pRExC_state, MBOL);
	3861	else if (RExC_flags & PMf_SINGLELINE)
	3862	ret = reg_node(pRExC_state, SBOL);
	3863	else
	3864	ret = reg_node(pRExC_state, BOL);
	3865	Set_Node_Length(ret, 1); /* MJD */
	3866	break;
	3867	case '$':
	3868	nextchar(pRExC_state);
	3869	if (*RExC_parse)
	3870	RExC_seen_zerolen++;
	3871	if (RExC_flags & PMf_MULTILINE)
	3872	ret = reg_node(pRExC_state, MEOL);
	3873	else if (RExC_flags & PMf_SINGLELINE)
	3874	ret = reg_node(pRExC_state, SEOL);
	3875	else
	3876	ret = reg_node(pRExC_state, EOL);
	3877	Set_Node_Length(ret, 1); /* MJD */
	3878	break;
	3879	case '.':
	3880	nextchar(pRExC_state);
	3881	if (RExC_flags & PMf_SINGLELINE)
	3882	ret = reg_node(pRExC_state, SANY);
	3883	else
	3884	ret = reg_node(pRExC_state, REG_ANY);
	3885	*flagp \|= HASWIDTH\|SIMPLE;
	3886	RExC_naughty++;
	3887	Set_Node_Length(ret, 1); /* MJD */
	3888	break;
	3889	case '[':
	3890	{
	3891	char *oregcomp_parse = ++RExC_parse;
	3892	ret = regclass(pRExC_state);
	3893	if (*RExC_parse != ']') {
	3894	RExC_parse = oregcomp_parse;
	3895	vFAIL("Unmatched [");
	3896	}
	3897	nextchar(pRExC_state);
	3898	*flagp \|= HASWIDTH\|SIMPLE;
	3899	Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
	3900	break;
	3901	}
	3902	case '(':
	3903	nextchar(pRExC_state);
	3904	ret = reg(pRExC_state, 1, &flags);
	3905	if (ret == NULL) {
	3906	if (flags & TRYAGAIN) {
	3907	if (RExC_parse == RExC_end) {
	3908	/* Make parent create an empty node if needed. */
	3909	*flagp \|= TRYAGAIN;
	3910	return(NULL);
	3911	}
	3912	goto tryagain;
	3913	}
	3914	return(NULL);
	3915	}
	3916	*flagp \|= flags&(HASWIDTH\|SPSTART\|SIMPLE);
	3917	break;
	3918	case '\|':
	3919	case ')':
	3920	if (flags & TRYAGAIN) {
	3921	*flagp \|= TRYAGAIN;
	3922	return NULL;
	3923	}
	3924	vFAIL("Internal urp");
	3925	/* Supposed to be caught earlier. */
	3926	break;
	3927	case '{':
	3928	if (!regcurly(RExC_parse)) {
	3929	RExC_parse++;
	3930	goto defchar;
	3931	}
	3932	/* FALL THROUGH */
	3933	case '?':
	3934	case '+':
	3935	case '*':
	3936	RExC_parse++;
	3937	vFAIL("Quantifier follows nothing");
	3938	break;
	3939	case '\\':
	3940	switch (*++RExC_parse) {
	3941	case 'A':
	3942	RExC_seen_zerolen++;
	3943	ret = reg_node(pRExC_state, SBOL);
	3944	*flagp \|= SIMPLE;
	3945	nextchar(pRExC_state);
	3946	Set_Node_Length(ret, 2); /* MJD */
	3947	break;
	3948	case 'G':
	3949	ret = reg_node(pRExC_state, GPOS);
	3950	RExC_seen \|= REG_SEEN_GPOS;
	3951	*flagp \|= SIMPLE;
	3952	nextchar(pRExC_state);
	3953	Set_Node_Length(ret, 2); /* MJD */
	3954	break;
	3955	case 'Z':
	3956	ret = reg_node(pRExC_state, SEOL);
	3957	*flagp \|= SIMPLE;
	3958	RExC_seen_zerolen++; /* Do not optimize RE away */
	3959	nextchar(pRExC_state);
	3960	break;
	3961	case 'z':
	3962	ret = reg_node(pRExC_state, EOS);
	3963	*flagp \|= SIMPLE;
	3964	RExC_seen_zerolen++; /* Do not optimize RE away */
	3965	nextchar(pRExC_state);
	3966	Set_Node_Length(ret, 2); /* MJD */
	3967	break;
	3968	case 'C':
	3969	ret = reg_node(pRExC_state, CANY);
	3970	RExC_seen \|= REG_SEEN_CANY;
	3971	*flagp \|= HASWIDTH\|SIMPLE;
	3972	nextchar(pRExC_state);
	3973	Set_Node_Length(ret, 2); /* MJD */
	3974	break;
	3975	case 'X':
	3976	ret = reg_node(pRExC_state, CLUMP);
	3977	*flagp \|= HASWIDTH;
	3978	nextchar(pRExC_state);
	3979	Set_Node_Length(ret, 2); /* MJD */
	3980	break;
	3981	case 'w':
	3982	ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML : ALNUM));
	3983	*flagp \|= HASWIDTH\|SIMPLE;
	3984	nextchar(pRExC_state);
	3985	Set_Node_Length(ret, 2); /* MJD */
	3986	break;
	3987	case 'W':
	3988	ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML : NALNUM));
	3989	*flagp \|= HASWIDTH\|SIMPLE;
	3990	nextchar(pRExC_state);
	3991	Set_Node_Length(ret, 2); /* MJD */
	3992	break;
	3993	case 'b':
	3994	RExC_seen_zerolen++;
	3995	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	3996	ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL : BOUND));
	3997	*flagp \|= SIMPLE;
	3998	nextchar(pRExC_state);
	3999	Set_Node_Length(ret, 2); /* MJD */
	4000	break;
	4001	case 'B':
	4002	RExC_seen_zerolen++;
	4003	RExC_seen \|= REG_SEEN_LOOKBEHIND;
	4004	ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL : NBOUND));
	4005	*flagp \|= SIMPLE;
	4006	nextchar(pRExC_state);
	4007	Set_Node_Length(ret, 2); /* MJD */
	4008	break;
	4009	case 's':
	4010	ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL : SPACE));
	4011	*flagp \|= HASWIDTH\|SIMPLE;
	4012	nextchar(pRExC_state);
	4013	Set_Node_Length(ret, 2); /* MJD */
	4014	break;
	4015	case 'S':
	4016	ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL : NSPACE));
	4017	*flagp \|= HASWIDTH\|SIMPLE;
	4018	nextchar(pRExC_state);
	4019	Set_Node_Length(ret, 2); /* MJD */
	4020	break;
	4021	case 'd':
	4022	ret = reg_node(pRExC_state, DIGIT);
	4023	*flagp \|= HASWIDTH\|SIMPLE;
	4024	nextchar(pRExC_state);
	4025	Set_Node_Length(ret, 2); /* MJD */
	4026	break;
	4027	case 'D':
	4028	ret = reg_node(pRExC_state, NDIGIT);
	4029	*flagp \|= HASWIDTH\|SIMPLE;
	4030	nextchar(pRExC_state);
	4031	Set_Node_Length(ret, 2); /* MJD */
	4032	break;
	4033	case 'p':
	4034	case 'P':
	4035	{
	4036	char* oldregxend = RExC_end;
	4037	char* parse_start = RExC_parse - 2;
	4038
	4039	if (RExC_parse[1] == '{') {
	4040	/* a lovely hack--pretend we saw [\pX] instead */
	4041	RExC_end = strchr(RExC_parse, '}');
	4042	if (!RExC_end) {
	4043	U8 c = (U8)*RExC_parse;
	4044	RExC_parse += 2;
	4045	RExC_end = oldregxend;
	4046	vFAIL2("Missing right brace on \\%c{}", c);
	4047	}
	4048	RExC_end++;
	4049	}
	4050	else {
	4051	RExC_end = RExC_parse + 2;
	4052	if (RExC_end > oldregxend)
	4053	RExC_end = oldregxend;
	4054	}
	4055	RExC_parse--;
	4056
	4057	ret = regclass(pRExC_state);
	4058
	4059	RExC_end = oldregxend;
	4060	RExC_parse--;
	4061
	4062	Set_Node_Offset(ret, parse_start + 2);
	4063	Set_Node_Cur_Length(ret);
	4064	nextchar(pRExC_state);
	4065	*flagp \|= HASWIDTH\|SIMPLE;
	4066	}
	4067	break;
	4068	case 'n':
	4069	case 'r':
	4070	case 't':
	4071	case 'f':
	4072	case 'e':
	4073	case 'a':
	4074	case 'x':
	4075	case 'c':
	4076	case '0':
	4077	goto defchar;
	4078	case '1': case '2': case '3': case '4':
	4079	case '5': case '6': case '7': case '8': case '9':
	4080	{
	4081	const I32 num = atoi(RExC_parse);
	4082
	4083	if (num > 9 && num >= RExC_npar)
	4084	goto defchar;
	4085	else {
	4086	char * parse_start = RExC_parse - 1; /* MJD */
	4087	while (isDIGIT(*RExC_parse))
	4088	RExC_parse++;
	4089
	4090	if (!SIZE_ONLY && num > (I32)RExC_rx->nparens)
	4091	vFAIL("Reference to nonexistent group");
	4092	RExC_sawback = 1;
	4093	ret = reganode(pRExC_state,
	4094	(U8)(FOLD ? (LOC ? REFFL : REFF) : REF),
	4095	num);
	4096	*flagp \|= HASWIDTH;
	4097
	4098	/* override incorrect value set in reganode MJD */
	4099	Set_Node_Offset(ret, parse_start+1);
	4100	Set_Node_Cur_Length(ret); /* MJD */
	4101	RExC_parse--;
	4102	nextchar(pRExC_state);
	4103	}
	4104	}
	4105	break;
	4106	case '\0':
	4107	if (RExC_parse >= RExC_end)
	4108	FAIL("Trailing \\");
	4109	/* FALL THROUGH */
	4110	default:
	4111	/* Do not generate "unrecognized" warnings here, we fall
	4112	back into the quick-grab loop below */
	4113	parse_start--;
	4114	goto defchar;
	4115	}
	4116	break;
	4117
	4118	case '#':
	4119	if (RExC_flags & PMf_EXTENDED) {
	4120	while (RExC_parse < RExC_end && *RExC_parse != '\n') RExC_parse++;
	4121	if (RExC_parse < RExC_end)
	4122	goto tryagain;
	4123	}
	4124	/* FALL THROUGH */
	4125
	4126	default: {
	4127	register STRLEN len;
	4128	register UV ender;
	4129	register char *p;
	4130	char oldp, s;
	4131	STRLEN foldlen;
	4132	U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
	4133
	4134	parse_start = RExC_parse - 1;
	4135
	4136	RExC_parse++;
	4137
	4138	defchar:
	4139	ender = 0;
	4140	ret = reg_node(pRExC_state,
	4141	(U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
	4142	s = STRING(ret);
	4143	for (len = 0, p = RExC_parse - 1;
	4144	len < 127 && p < RExC_end;
	4145	len++)
	4146	{
	4147	oldp = p;
	4148
	4149	if (RExC_flags & PMf_EXTENDED)
	4150	p = regwhite(p, RExC_end);
	4151	switch (*p) {
	4152	case '^':
	4153	case '$':
	4154	case '.':
	4155	case '[':
	4156	case '(':
	4157	case ')':
	4158	case '\|':
	4159	goto loopdone;
	4160	case '\\':
	4161	switch (*++p) {
	4162	case 'A':
	4163	case 'C':
	4164	case 'X':
	4165	case 'G':
	4166	case 'Z':
	4167	case 'z':
	4168	case 'w':
	4169	case 'W':
	4170	case 'b':
	4171	case 'B':
	4172	case 's':
	4173	case 'S':
	4174	case 'd':
	4175	case 'D':
	4176	case 'p':
	4177	case 'P':
	4178	--p;
	4179	goto loopdone;
	4180	case 'n':
	4181	ender = '\n';
	4182	p++;
	4183	break;
	4184	case 'r':
	4185	ender = '\r';
	4186	p++;
	4187	break;
	4188	case 't':
	4189	ender = '\t';
	4190	p++;
	4191	break;
	4192	case 'f':
	4193	ender = '\f';
	4194	p++;
	4195	break;
	4196	case 'e':
	4197	ender = ASCII_TO_NATIVE('\033');
	4198	p++;
	4199	break;
	4200	case 'a':
	4201	ender = ASCII_TO_NATIVE('\007');
	4202	p++;
	4203	break;
	4204	case 'x':
	4205	if (*++p == '{') {
	4206	char* const e = strchr(p, '}');
	4207
	4208	if (!e) {
	4209	RExC_parse = p + 1;
	4210	vFAIL("Missing right brace on \\x{}");
	4211	}
	4212	else {
	4213	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	4214	\| PERL_SCAN_DISALLOW_PREFIX;
	4215	STRLEN numlen = e - p - 1;
	4216	ender = grok_hex(p + 1, &numlen, &flags, NULL);
	4217	if (ender > 0xff)
	4218	RExC_utf8 = 1;
	4219	p = e + 1;
	4220	}
	4221	}
	4222	else {
	4223	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	4224	STRLEN numlen = 2;
	4225	ender = grok_hex(p, &numlen, &flags, NULL);
	4226	p += numlen;
	4227	}
	4228	break;
	4229	case 'c':
	4230	p++;
	4231	ender = UCHARAT(p++);
	4232	ender = toCTRL(ender);
	4233	break;
	4234	case '0': case '1': case '2': case '3':case '4':
	4235	case '5': case '6': case '7': case '8':case '9':
	4236	if (*p == '0' \|\|
	4237	(isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
	4238	I32 flags = 0;
	4239	STRLEN numlen = 3;
	4240	ender = grok_oct(p, &numlen, &flags, NULL);
	4241	p += numlen;
	4242	}
	4243	else {
	4244	--p;
	4245	goto loopdone;
	4246	}
	4247	break;
	4248	case '\0':
	4249	if (p >= RExC_end)
	4250	FAIL("Trailing \\");
	4251	/* FALL THROUGH */
	4252	default:
	4253	if (!SIZE_ONLY&& isALPHA(*p) && ckWARN(WARN_REGEXP))
	4254	vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
	4255	goto normal_default;
	4256	}
	4257	break;
	4258	default:
	4259	normal_default:
	4260	if (UTF8_IS_START(*p) && UTF) {
	4261	STRLEN numlen;
	4262	ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
	4263	&numlen, UTF8_ALLOW_DEFAULT);
	4264	p += numlen;
	4265	}
	4266	else
	4267	ender = *p++;
	4268	break;
	4269	}
	4270	if (RExC_flags & PMf_EXTENDED)
	4271	p = regwhite(p, RExC_end);
	4272	if (UTF && FOLD) {
	4273	/* Prime the casefolded buffer. */
	4274	ender = toFOLD_uni(ender, tmpbuf, &foldlen);
	4275	}
	4276	if (ISMULT2(p)) { /* Back off on ?+. /
	4277	if (len)
	4278	p = oldp;
	4279	else if (UTF) {
	4280	STRLEN unilen;
	4281
	4282	if (FOLD) {
	4283	/* Emit all the Unicode characters. */
	4284	STRLEN numlen;
	4285	for (foldbuf = tmpbuf;
	4286	foldlen;
	4287	foldlen -= numlen) {
	4288	ender = utf8_to_uvchr(foldbuf, &numlen);
	4289	if (numlen > 0) {
	4290	reguni(pRExC_state, ender, s, &unilen);
	4291	s += unilen;
	4292	len += unilen;
	4293	/* In EBCDIC the numlen
	4294	* and unilen can differ. */
	4295	foldbuf += numlen;
	4296	if (numlen >= foldlen)
	4297	break;
	4298	}
	4299	else
	4300	break; /* "Can't happen." */
	4301	}
	4302	}
	4303	else {
	4304	reguni(pRExC_state, ender, s, &unilen);
	4305	if (unilen > 0) {
	4306	s += unilen;
	4307	len += unilen;
	4308	}
	4309	}
	4310	}
	4311	else {
	4312	len++;
	4313	REGC((char)ender, s++);
	4314	}
	4315	break;
	4316	}
	4317	if (UTF) {
	4318	STRLEN unilen;
	4319
	4320	if (FOLD) {
	4321	/* Emit all the Unicode characters. */
	4322	STRLEN numlen;
	4323	for (foldbuf = tmpbuf;
	4324	foldlen;
	4325	foldlen -= numlen) {
	4326	ender = utf8_to_uvchr(foldbuf, &numlen);
	4327	if (numlen > 0) {
	4328	reguni(pRExC_state, ender, s, &unilen);
	4329	len += unilen;
	4330	s += unilen;
	4331	/* In EBCDIC the numlen
	4332	* and unilen can differ. */
	4333	foldbuf += numlen;
	4334	if (numlen >= foldlen)
	4335	break;
	4336	}
	4337	else
	4338	break;
	4339	}
	4340	}
	4341	else {
	4342	reguni(pRExC_state, ender, s, &unilen);
	4343	if (unilen > 0) {
	4344	s += unilen;
	4345	len += unilen;
	4346	}
	4347	}
	4348	len--;
	4349	}
	4350	else
	4351	REGC((char)ender, s++);
	4352	}
	4353	loopdone:
	4354	RExC_parse = p - 1;
	4355	Set_Node_Cur_Length(ret); /* MJD */
	4356	nextchar(pRExC_state);
	4357	{
	4358	/* len is STRLEN which is unsigned, need to copy to signed */
	4359	IV iv = len;
	4360	if (iv < 0)
	4361	vFAIL("Internal disaster");
	4362	}
	4363	if (len > 0)
	4364	*flagp \|= HASWIDTH;
	4365	if (len == 1 && UNI_IS_INVARIANT(ender))
	4366	*flagp \|= SIMPLE;
	4367	if (!SIZE_ONLY)
	4368	STR_LEN(ret) = len;
	4369	if (SIZE_ONLY)
	4370	RExC_size += STR_SZ(len);
	4371	else
	4372	RExC_emit += STR_SZ(len);
	4373	}
	4374	break;
	4375	}
	4376
	4377	/* If the encoding pragma is in effect recode the text of
	4378	* any EXACT-kind nodes. */
	4379	if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
	4380	STRLEN oldlen = STR_LEN(ret);
	4381	SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
	4382
	4383	if (RExC_utf8)
	4384	SvUTF8_on(sv);
	4385	if (sv_utf8_downgrade(sv, TRUE)) {
	4386	const char * const s = sv_recode_to_utf8(sv, PL_encoding);
	4387	const STRLEN newlen = SvCUR(sv);
	4388
	4389	if (SvUTF8(sv))
	4390	RExC_utf8 = 1;
	4391	if (!SIZE_ONLY) {
	4392	GET_RE_DEBUG_FLAGS_DECL;
	4393	DEBUG_COMPILE_r(PerlIO_printf(Perl_debug_log, "recode %s to %s\n",
	4394	(int)oldlen, STRING(ret),
	4395	(int)newlen, s));
	4396	Copy(s, STRING(ret), newlen, char);
	4397	STR_LEN(ret) += newlen - oldlen;
	4398	RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
	4399	} else
	4400	RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
	4401	}
	4402	}
	4403
	4404	return(ret);
	4405	}
	4406
	4407	STATIC char *
	4408	S_regwhite(char p, const char e)
	4409	{
	4410	while (p < e) {
	4411	if (isSPACE(*p))
	4412	++p;
	4413	else if (*p == '#') {
	4414	do {
	4415	p++;
	4416	} while (p < e && *p != '\n');
	4417	}
	4418	else
	4419	break;
	4420	}
	4421	return p;
	4422	}
	4423
	4424	/* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
	4425	Character classes ([:foo:]) can also be negated ([:^foo:]).
	4426	Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
	4427	Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
	4428	but trigger failures because they are currently unimplemented. */
	4429
	4430	#define POSIXCC_DONE(c) ((c) == ':')
	4431	#define POSIXCC_NOTYET(c) ((c) == '=' \|\| (c) == '.')
	4432	#define POSIXCC(c) (POSIXCC_DONE(c) \|\| POSIXCC_NOTYET(c))
	4433
	4434	STATIC I32
	4435	S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
	4436	{
	4437	dVAR;
	4438	I32 namedclass = OOB_NAMEDCLASS;
	4439
	4440	if (value == '[' && RExC_parse + 1 < RExC_end &&
	4441	/* I smell either [: or [= or [. -- POSIX has been here, right? */
	4442	POSIXCC(UCHARAT(RExC_parse))) {
	4443	const char c = UCHARAT(RExC_parse);
	4444	char* const s = RExC_parse++;
	4445
	4446	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
	4447	RExC_parse++;
	4448	if (RExC_parse == RExC_end)
	4449	/* Grandfather lone [:, [=, [. */
	4450	RExC_parse = s;
	4451	else {
	4452	const char* t = RExC_parse++; /* skip over the c */
	4453	const char *posixcc;
	4454
	4455	assert(*t == c);
	4456
	4457	if (UCHARAT(RExC_parse) == ']') {
	4458	RExC_parse++; /* skip over the ending ] */
	4459	posixcc = s + 1;
	4460	if (*s == ':') {
	4461	const I32 complement = posixcc == '^' ? posixcc++ : 0;
	4462	const I32 skip = t - posixcc;
	4463
	4464	/* Initially switch on the length of the name. */
	4465	switch (skip) {
	4466	case 4:
	4467	if (memEQ(posixcc, "word", 4)) {
	4468	/* this is not POSIX, this is the Perl \w */;
	4469	namedclass
	4470	= complement ? ANYOF_NALNUM : ANYOF_ALNUM;
	4471	}
	4472	break;
	4473	case 5:
	4474	/* Names all of length 5. */
	4475	/* alnum alpha ascii blank cntrl digit graph lower
	4476	print punct space upper */
	4477	/* Offset 4 gives the best switch position. */
	4478	switch (posixcc[4]) {
	4479	case 'a':
	4480	if (memEQ(posixcc, "alph", 4)) {
	4481	/* a */
	4482	namedclass
	4483	= complement ? ANYOF_NALPHA : ANYOF_ALPHA;
	4484	}
	4485	break;
	4486	case 'e':
	4487	if (memEQ(posixcc, "spac", 4)) {
	4488	/* e */
	4489	namedclass
	4490	= complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
	4491	}
	4492	break;
	4493	case 'h':
	4494	if (memEQ(posixcc, "grap", 4)) {
	4495	/* h */
	4496	namedclass
	4497	= complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
	4498	}
	4499	break;
	4500	case 'i':
	4501	if (memEQ(posixcc, "asci", 4)) {
	4502	/* i */
	4503	namedclass
	4504	= complement ? ANYOF_NASCII : ANYOF_ASCII;
	4505	}
	4506	break;
	4507	case 'k':
	4508	if (memEQ(posixcc, "blan", 4)) {
	4509	/* k */
	4510	namedclass
	4511	= complement ? ANYOF_NBLANK : ANYOF_BLANK;
	4512	}
	4513	break;
	4514	case 'l':
	4515	if (memEQ(posixcc, "cntr", 4)) {
	4516	/* l */
	4517	namedclass
	4518	= complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
	4519	}
	4520	break;
	4521	case 'm':
	4522	if (memEQ(posixcc, "alnu", 4)) {
	4523	/* m */
	4524	namedclass
	4525	= complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
	4526	}
	4527	break;
	4528	case 'r':
	4529	if (memEQ(posixcc, "lowe", 4)) {
	4530	/* r */
	4531	namedclass
	4532	= complement ? ANYOF_NLOWER : ANYOF_LOWER;
	4533	}
	4534	if (memEQ(posixcc, "uppe", 4)) {
	4535	/* r */
	4536	namedclass
	4537	= complement ? ANYOF_NUPPER : ANYOF_UPPER;
	4538	}
	4539	break;
	4540	case 't':
	4541	if (memEQ(posixcc, "digi", 4)) {
	4542	/* t */
	4543	namedclass
	4544	= complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
	4545	}
	4546	if (memEQ(posixcc, "prin", 4)) {
	4547	/* t */
	4548	namedclass
	4549	= complement ? ANYOF_NPRINT : ANYOF_PRINT;
	4550	}
	4551	if (memEQ(posixcc, "punc", 4)) {
	4552	/* t */
	4553	namedclass
	4554	= complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
	4555	}
	4556	break;
	4557	}
	4558	break;
	4559	case 6:
	4560	if (memEQ(posixcc, "xdigit", 6)) {
	4561	namedclass
	4562	= complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
	4563	}
	4564	break;
	4565	}
	4566
	4567	if (namedclass == OOB_NAMEDCLASS)
	4568	{
	4569	Simple_vFAIL3("POSIX class [:%.*s:] unknown",
	4570	t - s - 1, s + 1);
	4571	}
	4572	assert (posixcc[skip] == ':');
	4573	assert (posixcc[skip+1] == ']');
	4574	} else if (!SIZE_ONLY) {
	4575	/* [[=foo=]] and [[.foo.]] are still future. */
	4576
	4577	/* adjust RExC_parse so the warning shows after
	4578	the class closes */
	4579	while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
	4580	RExC_parse++;
	4581	Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
	4582	}
	4583	} else {
	4584	/* Maternal grandfather:
	4585	* "[:" ending in ":" but not in ":]" */
	4586	RExC_parse = s;
	4587	}
	4588	}
	4589	}
	4590
	4591	return namedclass;
	4592	}
	4593
	4594	STATIC void
	4595	S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
	4596	{
	4597	dVAR;
	4598	if (!SIZE_ONLY && POSIXCC(UCHARAT(RExC_parse))) {
	4599	const char *s = RExC_parse;
	4600	const char c = *s++;
	4601
	4602	while(s && isALNUM(s))
	4603	s++;
	4604	if (s && c == s && s[1] == ']') {
	4605	if (ckWARN(WARN_REGEXP))
	4606	vWARN3(s+2,
	4607	"POSIX syntax [%c %c] belongs inside character classes",
	4608	c, c);
	4609
	4610	/* [[=foo=]] and [[.foo.]] are still future. */
	4611	if (POSIXCC_NOTYET(c)) {
	4612	/* adjust RExC_parse so the error shows after
	4613	the class closes */
	4614	while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
	4615	;
	4616	Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
	4617	}
	4618	}
	4619	}
	4620	}
	4621
	4622	STATIC regnode *
	4623	S_regclass(pTHX_ RExC_state_t *pRExC_state)
	4624	{
	4625	dVAR;
	4626	register UV value;
	4627	register UV nextvalue;
	4628	register IV prevvalue = OOB_UNICODE;
	4629	register IV range = 0;
	4630	register regnode *ret;
	4631	STRLEN numlen;
	4632	IV namedclass;
	4633	char *rangebegin = NULL;
	4634	bool need_class = 0;
	4635	SV *listsv = NULL;
	4636	register char *e;
	4637	UV n;
	4638	bool optimize_invert = TRUE;
	4639	AV* unicode_alternate = NULL;
	4640	#ifdef EBCDIC
	4641	UV literal_endpoint = 0;
	4642	#endif
	4643
	4644	ret = reganode(pRExC_state, ANYOF, 0);
	4645
	4646	if (!SIZE_ONLY)
	4647	ANYOF_FLAGS(ret) = 0;
	4648
	4649	if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
	4650	RExC_naughty++;
	4651	RExC_parse++;
	4652	if (!SIZE_ONLY)
	4653	ANYOF_FLAGS(ret) \|= ANYOF_INVERT;
	4654	}
	4655
	4656	if (SIZE_ONLY) {
	4657	RExC_size += ANYOF_SKIP;
	4658	listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
	4659	}
	4660	else {
	4661	RExC_emit += ANYOF_SKIP;
	4662	if (FOLD)
	4663	ANYOF_FLAGS(ret) \|= ANYOF_FOLD;
	4664	if (LOC)
	4665	ANYOF_FLAGS(ret) \|= ANYOF_LOCALE;
	4666	ANYOF_BITMAP_ZERO(ret);
	4667	listsv = newSVpvs("# comment\n");
	4668	}
	4669
	4670	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
	4671
	4672	if (!SIZE_ONLY && POSIXCC(nextvalue))
	4673	checkposixcc(pRExC_state);
	4674
	4675	/* allow 1st char to be ] (allowing it to be - is dealt with later) */
	4676	if (UCHARAT(RExC_parse) == ']')
	4677	goto charclassloop;
	4678
	4679	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
	4680
	4681	charclassloop:
	4682
	4683	namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
	4684
	4685	if (!range)
	4686	rangebegin = RExC_parse;
	4687	if (UTF) {
	4688	value = utf8n_to_uvchr((U8*)RExC_parse,
	4689	RExC_end - RExC_parse,
	4690	&numlen, UTF8_ALLOW_DEFAULT);
	4691	RExC_parse += numlen;
	4692	}
	4693	else
	4694	value = UCHARAT(RExC_parse++);
	4695	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
	4696	if (value == '[' && POSIXCC(nextvalue))
	4697	namedclass = regpposixcc(pRExC_state, value);
	4698	else if (value == '\\') {
	4699	if (UTF) {
	4700	value = utf8n_to_uvchr((U8*)RExC_parse,
	4701	RExC_end - RExC_parse,
	4702	&numlen, UTF8_ALLOW_DEFAULT);
	4703	RExC_parse += numlen;
	4704	}
	4705	else
	4706	value = UCHARAT(RExC_parse++);
	4707	/* Some compilers cannot handle switching on 64-bit integer
	4708	* values, therefore value cannot be an UV. Yes, this will
	4709	* be a problem later if we want switch on Unicode.
	4710	* A similar issue a little bit later when switching on
	4711	* namedclass. --jhi */
	4712	switch ((I32)value) {
	4713	case 'w': namedclass = ANYOF_ALNUM; break;
	4714	case 'W': namedclass = ANYOF_NALNUM; break;
	4715	case 's': namedclass = ANYOF_SPACE; break;
	4716	case 'S': namedclass = ANYOF_NSPACE; break;
	4717	case 'd': namedclass = ANYOF_DIGIT; break;
	4718	case 'D': namedclass = ANYOF_NDIGIT; break;
	4719	case 'p':
	4720	case 'P':
	4721	if (RExC_parse >= RExC_end)
	4722	vFAIL2("Empty \\%c{}", (U8)value);
	4723	if (*RExC_parse == '{') {
	4724	const U8 c = (U8)value;
	4725	e = strchr(RExC_parse++, '}');
	4726	if (!e)
	4727	vFAIL2("Missing right brace on \\%c{}", c);
	4728	while (isSPACE(UCHARAT(RExC_parse)))
	4729	RExC_parse++;
	4730	if (e == RExC_parse)
	4731	vFAIL2("Empty \\%c{}", c);
	4732	n = e - RExC_parse;
	4733	while (isSPACE(UCHARAT(RExC_parse + n - 1)))
	4734	n--;
	4735	}
	4736	else {
	4737	e = RExC_parse;
	4738	n = 1;
	4739	}
	4740	if (!SIZE_ONLY) {
	4741	if (UCHARAT(RExC_parse) == '^') {
	4742	RExC_parse++;
	4743	n--;
	4744	value = value == 'p' ? 'P' : 'p'; /* toggle */
	4745	while (isSPACE(UCHARAT(RExC_parse))) {
	4746	RExC_parse++;
	4747	n--;
	4748	}
	4749	}
	4750	Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
	4751	(value=='p' ? '+' : '!'), (int)n, RExC_parse);
	4752	}
	4753	RExC_parse = e + 1;
	4754	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	4755	namedclass = ANYOF_MAX; /* no official name, but it's named */
	4756	break;
	4757	case 'n': value = '\n'; break;
	4758	case 'r': value = '\r'; break;
	4759	case 't': value = '\t'; break;
	4760	case 'f': value = '\f'; break;
	4761	case 'b': value = '\b'; break;
	4762	case 'e': value = ASCII_TO_NATIVE('\033');break;
	4763	case 'a': value = ASCII_TO_NATIVE('\007');break;
	4764	case 'x':
	4765	if (*RExC_parse == '{') {
	4766	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	4767	\| PERL_SCAN_DISALLOW_PREFIX;
	4768	e = strchr(RExC_parse++, '}');
	4769	if (!e)
	4770	vFAIL("Missing right brace on \\x{}");
	4771
	4772	numlen = e - RExC_parse;
	4773	value = grok_hex(RExC_parse, &numlen, &flags, NULL);
	4774	RExC_parse = e + 1;
	4775	}
	4776	else {
	4777	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
	4778	numlen = 2;
	4779	value = grok_hex(RExC_parse, &numlen, &flags, NULL);
	4780	RExC_parse += numlen;
	4781	}
	4782	break;
	4783	case 'c':
	4784	value = UCHARAT(RExC_parse++);
	4785	value = toCTRL(value);
	4786	break;
	4787	case '0': case '1': case '2': case '3': case '4':
	4788	case '5': case '6': case '7': case '8': case '9':
	4789	{
	4790	I32 flags = 0;
	4791	numlen = 3;
	4792	value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
	4793	RExC_parse += numlen;
	4794	break;
	4795	}
	4796	default:
	4797	if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
	4798	vWARN2(RExC_parse,
	4799	"Unrecognized escape \\%c in character class passed through",
	4800	(int)value);
	4801	break;
	4802	}
	4803	} /* end of \blah */
	4804	#ifdef EBCDIC
	4805	else
	4806	literal_endpoint++;
	4807	#endif
	4808
	4809	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
	4810
	4811	if (!SIZE_ONLY && !need_class)
	4812	ANYOF_CLASS_ZERO(ret);
	4813
	4814	need_class = 1;
	4815
	4816	/* a bad range like a-\d, a-[:digit:] ? */
	4817	if (range) {
	4818	if (!SIZE_ONLY) {
	4819	if (ckWARN(WARN_REGEXP)) {
	4820	const int w =
	4821	RExC_parse >= rangebegin ?
	4822	RExC_parse - rangebegin : 0;
	4823	vWARN4(RExC_parse,
	4824	"False [] range \"%.s\"",
	4825	w, w, rangebegin);
	4826	}
	4827	if (prevvalue < 256) {
	4828	ANYOF_BITMAP_SET(ret, prevvalue);
	4829	ANYOF_BITMAP_SET(ret, '-');
	4830	}
	4831	else {
	4832	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	4833	Perl_sv_catpvf(aTHX_ listsv,
	4834	"%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
	4835	}
	4836	}
	4837
	4838	range = 0; /* this was not a true range */
	4839	}
	4840
	4841	if (!SIZE_ONLY) {
	4842	const char *what = NULL;
	4843	char yesno = 0;
	4844
	4845	if (namedclass > OOB_NAMEDCLASS)
	4846	optimize_invert = FALSE;
	4847	/* Possible truncation here but in some 64-bit environments
	4848	* the compiler gets heartburn about switch on 64-bit values.
	4849	* A similar issue a little earlier when switching on value.
	4850	* --jhi */
	4851	switch ((I32)namedclass) {
	4852	case ANYOF_ALNUM:
	4853	if (LOC)
	4854	ANYOF_CLASS_SET(ret, ANYOF_ALNUM);
	4855	else {
	4856	for (value = 0; value < 256; value++)
	4857	if (isALNUM(value))
	4858	ANYOF_BITMAP_SET(ret, value);
	4859	}
	4860	yesno = '+';
	4861	what = "Word";
	4862	break;
	4863	case ANYOF_NALNUM:
	4864	if (LOC)
	4865	ANYOF_CLASS_SET(ret, ANYOF_NALNUM);
	4866	else {
	4867	for (value = 0; value < 256; value++)
	4868	if (!isALNUM(value))
	4869	ANYOF_BITMAP_SET(ret, value);
	4870	}
	4871	yesno = '!';
	4872	what = "Word";
	4873	break;
	4874	case ANYOF_ALNUMC:
	4875	if (LOC)
	4876	ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
	4877	else {
	4878	for (value = 0; value < 256; value++)
	4879	if (isALNUMC(value))
	4880	ANYOF_BITMAP_SET(ret, value);
	4881	}
	4882	yesno = '+';
	4883	what = "Alnum";
	4884	break;
	4885	case ANYOF_NALNUMC:
	4886	if (LOC)
	4887	ANYOF_CLASS_SET(ret, ANYOF_NALNUMC);
	4888	else {
	4889	for (value = 0; value < 256; value++)
	4890	if (!isALNUMC(value))
	4891	ANYOF_BITMAP_SET(ret, value);
	4892	}
	4893	yesno = '!';
	4894	what = "Alnum";
	4895	break;
	4896	case ANYOF_ALPHA:
	4897	if (LOC)
	4898	ANYOF_CLASS_SET(ret, ANYOF_ALPHA);
	4899	else {
	4900	for (value = 0; value < 256; value++)
	4901	if (isALPHA(value))
	4902	ANYOF_BITMAP_SET(ret, value);
	4903	}
	4904	yesno = '+';
	4905	what = "Alpha";
	4906	break;
	4907	case ANYOF_NALPHA:
	4908	if (LOC)
	4909	ANYOF_CLASS_SET(ret, ANYOF_NALPHA);
	4910	else {
	4911	for (value = 0; value < 256; value++)
	4912	if (!isALPHA(value))
	4913	ANYOF_BITMAP_SET(ret, value);
	4914	}
	4915	yesno = '!';
	4916	what = "Alpha";
	4917	break;
	4918	case ANYOF_ASCII:
	4919	if (LOC)
	4920	ANYOF_CLASS_SET(ret, ANYOF_ASCII);
	4921	else {
	4922	#ifndef EBCDIC
	4923	for (value = 0; value < 128; value++)
	4924	ANYOF_BITMAP_SET(ret, value);
	4925	#else /* EBCDIC */
	4926	for (value = 0; value < 256; value++) {
	4927	if (isASCII(value))
	4928	ANYOF_BITMAP_SET(ret, value);
	4929	}
	4930	#endif /* EBCDIC */
	4931	}
	4932	yesno = '+';
	4933	what = "ASCII";
	4934	break;
	4935	case ANYOF_NASCII:
	4936	if (LOC)
	4937	ANYOF_CLASS_SET(ret, ANYOF_NASCII);
	4938	else {
	4939	#ifndef EBCDIC
	4940	for (value = 128; value < 256; value++)
	4941	ANYOF_BITMAP_SET(ret, value);
	4942	#else /* EBCDIC */
	4943	for (value = 0; value < 256; value++) {
	4944	if (!isASCII(value))
	4945	ANYOF_BITMAP_SET(ret, value);
	4946	}
	4947	#endif /* EBCDIC */
	4948	}
	4949	yesno = '!';
	4950	what = "ASCII";
	4951	break;
	4952	case ANYOF_BLANK:
	4953	if (LOC)
	4954	ANYOF_CLASS_SET(ret, ANYOF_BLANK);
	4955	else {
	4956	for (value = 0; value < 256; value++)
	4957	if (isBLANK(value))
	4958	ANYOF_BITMAP_SET(ret, value);
	4959	}
	4960	yesno = '+';
	4961	what = "Blank";
	4962	break;
	4963	case ANYOF_NBLANK:
	4964	if (LOC)
	4965	ANYOF_CLASS_SET(ret, ANYOF_NBLANK);
	4966	else {
	4967	for (value = 0; value < 256; value++)
	4968	if (!isBLANK(value))
	4969	ANYOF_BITMAP_SET(ret, value);
	4970	}
	4971	yesno = '!';
	4972	what = "Blank";
	4973	break;
	4974	case ANYOF_CNTRL:
	4975	if (LOC)
	4976	ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
	4977	else {
	4978	for (value = 0; value < 256; value++)
	4979	if (isCNTRL(value))
	4980	ANYOF_BITMAP_SET(ret, value);
	4981	}
	4982	yesno = '+';
	4983	what = "Cntrl";
	4984	break;
	4985	case ANYOF_NCNTRL:
	4986	if (LOC)
	4987	ANYOF_CLASS_SET(ret, ANYOF_NCNTRL);
	4988	else {
	4989	for (value = 0; value < 256; value++)
	4990	if (!isCNTRL(value))
	4991	ANYOF_BITMAP_SET(ret, value);
	4992	}
	4993	yesno = '!';
	4994	what = "Cntrl";
	4995	break;
	4996	case ANYOF_DIGIT:
	4997	if (LOC)
	4998	ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
	4999	else {
	5000	/* consecutive digits assumed */
	5001	for (value = '0'; value <= '9'; value++)
	5002	ANYOF_BITMAP_SET(ret, value);
	5003	}
	5004	yesno = '+';
	5005	what = "Digit";
	5006	break;
	5007	case ANYOF_NDIGIT:
	5008	if (LOC)
	5009	ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
	5010	else {
	5011	/* consecutive digits assumed */
	5012	for (value = 0; value < '0'; value++)
	5013	ANYOF_BITMAP_SET(ret, value);
	5014	for (value = '9' + 1; value < 256; value++)
	5015	ANYOF_BITMAP_SET(ret, value);
	5016	}
	5017	yesno = '!';
	5018	what = "Digit";
	5019	break;
	5020	case ANYOF_GRAPH:
	5021	if (LOC)
	5022	ANYOF_CLASS_SET(ret, ANYOF_GRAPH);
	5023	else {
	5024	for (value = 0; value < 256; value++)
	5025	if (isGRAPH(value))
	5026	ANYOF_BITMAP_SET(ret, value);
	5027	}
	5028	yesno = '+';
	5029	what = "Graph";
	5030	break;
	5031	case ANYOF_NGRAPH:
	5032	if (LOC)
	5033	ANYOF_CLASS_SET(ret, ANYOF_NGRAPH);
	5034	else {
	5035	for (value = 0; value < 256; value++)
	5036	if (!isGRAPH(value))
	5037	ANYOF_BITMAP_SET(ret, value);
	5038	}
	5039	yesno = '!';
	5040	what = "Graph";
	5041	break;
	5042	case ANYOF_LOWER:
	5043	if (LOC)
	5044	ANYOF_CLASS_SET(ret, ANYOF_LOWER);
	5045	else {
	5046	for (value = 0; value < 256; value++)
	5047	if (isLOWER(value))
	5048	ANYOF_BITMAP_SET(ret, value);
	5049	}
	5050	yesno = '+';
	5051	what = "Lower";
	5052	break;
	5053	case ANYOF_NLOWER:
	5054	if (LOC)
	5055	ANYOF_CLASS_SET(ret, ANYOF_NLOWER);
	5056	else {
	5057	for (value = 0; value < 256; value++)
	5058	if (!isLOWER(value))
	5059	ANYOF_BITMAP_SET(ret, value);
	5060	}
	5061	yesno = '!';
	5062	what = "Lower";
	5063	break;
	5064	case ANYOF_PRINT:
	5065	if (LOC)
	5066	ANYOF_CLASS_SET(ret, ANYOF_PRINT);
	5067	else {
	5068	for (value = 0; value < 256; value++)
	5069	if (isPRINT(value))
	5070	ANYOF_BITMAP_SET(ret, value);
	5071	}
	5072	yesno = '+';
	5073	what = "Print";
	5074	break;
	5075	case ANYOF_NPRINT:
	5076	if (LOC)
	5077	ANYOF_CLASS_SET(ret, ANYOF_NPRINT);
	5078	else {
	5079	for (value = 0; value < 256; value++)
	5080	if (!isPRINT(value))
	5081	ANYOF_BITMAP_SET(ret, value);
	5082	}
	5083	yesno = '!';
	5084	what = "Print";
	5085	break;
	5086	case ANYOF_PSXSPC:
	5087	if (LOC)
	5088	ANYOF_CLASS_SET(ret, ANYOF_PSXSPC);
	5089	else {
	5090	for (value = 0; value < 256; value++)
	5091	if (isPSXSPC(value))
	5092	ANYOF_BITMAP_SET(ret, value);
	5093	}
	5094	yesno = '+';
	5095	what = "Space";
	5096	break;
	5097	case ANYOF_NPSXSPC:
	5098	if (LOC)
	5099	ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC);
	5100	else {
	5101	for (value = 0; value < 256; value++)
	5102	if (!isPSXSPC(value))
	5103	ANYOF_BITMAP_SET(ret, value);
	5104	}
	5105	yesno = '!';
	5106	what = "Space";
	5107	break;
	5108	case ANYOF_PUNCT:
	5109	if (LOC)
	5110	ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
	5111	else {
	5112	for (value = 0; value < 256; value++)
	5113	if (isPUNCT(value))
	5114	ANYOF_BITMAP_SET(ret, value);
	5115	}
	5116	yesno = '+';
	5117	what = "Punct";
	5118	break;
	5119	case ANYOF_NPUNCT:
	5120	if (LOC)
	5121	ANYOF_CLASS_SET(ret, ANYOF_NPUNCT);
	5122	else {
	5123	for (value = 0; value < 256; value++)
	5124	if (!isPUNCT(value))
	5125	ANYOF_BITMAP_SET(ret, value);
	5126	}
	5127	yesno = '!';
	5128	what = "Punct";
	5129	break;
	5130	case ANYOF_SPACE:
	5131	if (LOC)
	5132	ANYOF_CLASS_SET(ret, ANYOF_SPACE);
	5133	else {
	5134	for (value = 0; value < 256; value++)
	5135	if (isSPACE(value))
	5136	ANYOF_BITMAP_SET(ret, value);
	5137	}
	5138	yesno = '+';
	5139	what = "SpacePerl";
	5140	break;
	5141	case ANYOF_NSPACE:
	5142	if (LOC)
	5143	ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
	5144	else {
	5145	for (value = 0; value < 256; value++)
	5146	if (!isSPACE(value))
	5147	ANYOF_BITMAP_SET(ret, value);
	5148	}
	5149	yesno = '!';
	5150	what = "SpacePerl";
	5151	break;
	5152	case ANYOF_UPPER:
	5153	if (LOC)
	5154	ANYOF_CLASS_SET(ret, ANYOF_UPPER);
	5155	else {
	5156	for (value = 0; value < 256; value++)
	5157	if (isUPPER(value))
	5158	ANYOF_BITMAP_SET(ret, value);
	5159	}
	5160	yesno = '+';
	5161	what = "Upper";
	5162	break;
	5163	case ANYOF_NUPPER:
	5164	if (LOC)
	5165	ANYOF_CLASS_SET(ret, ANYOF_NUPPER);
	5166	else {
	5167	for (value = 0; value < 256; value++)
	5168	if (!isUPPER(value))
	5169	ANYOF_BITMAP_SET(ret, value);
	5170	}
	5171	yesno = '!';
	5172	what = "Upper";
	5173	break;
	5174	case ANYOF_XDIGIT:
	5175	if (LOC)
	5176	ANYOF_CLASS_SET(ret, ANYOF_XDIGIT);
	5177	else {
	5178	for (value = 0; value < 256; value++)
	5179	if (isXDIGIT(value))
	5180	ANYOF_BITMAP_SET(ret, value);
	5181	}
	5182	yesno = '+';
	5183	what = "XDigit";
	5184	break;
	5185	case ANYOF_NXDIGIT:
	5186	if (LOC)
	5187	ANYOF_CLASS_SET(ret, ANYOF_NXDIGIT);
	5188	else {
	5189	for (value = 0; value < 256; value++)
	5190	if (!isXDIGIT(value))
	5191	ANYOF_BITMAP_SET(ret, value);
	5192	}
	5193	yesno = '!';
	5194	what = "XDigit";
	5195	break;
	5196	case ANYOF_MAX:
	5197	/* this is to handle \p and \P */
	5198	break;
	5199	default:
	5200	vFAIL("Invalid [::] class");
	5201	break;
	5202	}
	5203	if (what) {
	5204	/* Strings such as "+utf8::isWord\n" */
	5205	Perl_sv_catpvf(aTHX_ listsv, "%cutf8::Is%s\n", yesno, what);
	5206	}
	5207	if (LOC)
	5208	ANYOF_FLAGS(ret) \|= ANYOF_CLASS;
	5209	continue;
	5210	}
	5211	} /* end of namedclass \blah */
	5212
	5213	if (range) {
	5214	if (prevvalue > (IV)value) /* b-a */ {
	5215	const int w = RExC_parse - rangebegin;
	5216	Simple_vFAIL4("Invalid [] range \"%.s\"", w, w, rangebegin);
	5217	range = 0; /* not a valid range */
	5218	}
	5219	}
	5220	else {
	5221	prevvalue = value; /* save the beginning of the range */
	5222	if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
	5223	RExC_parse[1] != ']') {
	5224	RExC_parse++;
	5225
	5226	/* a bad range like \w-, [:word:]- ? */
	5227	if (namedclass > OOB_NAMEDCLASS) {
	5228	if (ckWARN(WARN_REGEXP)) {
	5229	const int w =
	5230	RExC_parse >= rangebegin ?
	5231	RExC_parse - rangebegin : 0;
	5232	vWARN4(RExC_parse,
	5233	"False [] range \"%.s\"",
	5234	w, w, rangebegin);
	5235	}
	5236	if (!SIZE_ONLY)
	5237	ANYOF_BITMAP_SET(ret, '-');
	5238	} else
	5239	range = 1; /* yeah, it's a range! */
	5240	continue; /* but do it the next time */
	5241	}
	5242	}
	5243
	5244	/* now is the next time */
	5245	if (!SIZE_ONLY) {
	5246	IV i;
	5247
	5248	if (prevvalue < 256) {
	5249	const IV ceilvalue = value < 256 ? value : 255;
	5250
	5251	#ifdef EBCDIC
	5252	/* In EBCDIC [\x89-\x91] should include
	5253	* the \x8e but [i-j] should not. */
	5254	if (literal_endpoint == 2 &&
	5255	((isLOWER(prevvalue) && isLOWER(ceilvalue)) \|\|
	5256	(isUPPER(prevvalue) && isUPPER(ceilvalue))))
	5257	{
	5258	if (isLOWER(prevvalue)) {
	5259	for (i = prevvalue; i <= ceilvalue; i++)
	5260	if (isLOWER(i))
	5261	ANYOF_BITMAP_SET(ret, i);
	5262	} else {
	5263	for (i = prevvalue; i <= ceilvalue; i++)
	5264	if (isUPPER(i))
	5265	ANYOF_BITMAP_SET(ret, i);
	5266	}
	5267	}
	5268	else
	5269	#endif
	5270	for (i = prevvalue; i <= ceilvalue; i++)
	5271	ANYOF_BITMAP_SET(ret, i);
	5272	}
	5273	if (value > 255 \|\| UTF) {
	5274	const UV prevnatvalue = NATIVE_TO_UNI(prevvalue);
	5275	const UV natvalue = NATIVE_TO_UNI(value);
	5276
	5277	ANYOF_FLAGS(ret) \|= ANYOF_UNICODE;
	5278	if (prevnatvalue < natvalue) { /* what about > ? */
	5279	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
	5280	prevnatvalue, natvalue);
	5281	}
	5282	else if (prevnatvalue == natvalue) {
	5283	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", natvalue);
	5284	if (FOLD) {
	5285	U8 foldbuf[UTF8_MAXBYTES_CASE+1];
	5286	STRLEN foldlen;
	5287	const UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
	5288
	5289	/* If folding and foldable and a single
	5290	* character, insert also the folded version
	5291	* to the charclass. */
	5292	if (f != value) {
	5293	if (foldlen == (STRLEN)UNISKIP(f))
	5294	Perl_sv_catpvf(aTHX_ listsv,
	5295	"%04"UVxf"\n", f);
	5296	else {
	5297	/* Any multicharacter foldings
	5298	* require the following transform:
	5299	* [ABCDEF] -> (?:[ABCabcDEFd]\|pq\|rst)
	5300	* where E folds into "pq" and F folds
	5301	* into "rst", all other characters
	5302	* fold to single characters. We save
	5303	* away these multicharacter foldings,
	5304	* to be later saved as part of the
	5305	* additional "s" data. */
	5306	SV *sv;
	5307
	5308	if (!unicode_alternate)
	5309	unicode_alternate = newAV();
	5310	sv = newSVpvn((char*)foldbuf, foldlen);
	5311	SvUTF8_on(sv);
	5312	av_push(unicode_alternate, sv);
	5313	}
	5314	}
	5315
	5316	/* If folding and the value is one of the Greek
	5317	* sigmas insert a few more sigmas to make the
	5318	* folding rules of the sigmas to work right.
	5319	* Note that not all the possible combinations
	5320	* are handled here: some of them are handled
	5321	* by the standard folding rules, and some of
	5322	* them (literal or EXACTF cases) are handled
	5323	* during runtime in regexec.c:S_find_byclass(). */
	5324	if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
	5325	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	5326	(UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
	5327	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	5328	(UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
	5329	}
	5330	else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
	5331	Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
	5332	(UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
	5333	}
	5334	}
	5335	}
	5336	#ifdef EBCDIC
	5337	literal_endpoint = 0;
	5338	#endif
	5339	}
	5340
	5341	range = 0; /* this range (if it was one) is done now */
	5342	}
	5343
	5344	if (need_class) {
	5345	ANYOF_FLAGS(ret) \|= ANYOF_LARGE;
	5346	if (SIZE_ONLY)
	5347	RExC_size += ANYOF_CLASS_ADD_SKIP;
	5348	else
	5349	RExC_emit += ANYOF_CLASS_ADD_SKIP;
	5350	}
	5351
	5352	/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
	5353	if (!SIZE_ONLY &&
	5354	/* If the only flag is folding (plus possibly inversion). */
	5355	((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD)
	5356	) {
	5357	for (value = 0; value < 256; ++value) {
	5358	if (ANYOF_BITMAP_TEST(ret, value)) {
	5359	UV fold = PL_fold[value];
	5360
	5361	if (fold != value)
	5362	ANYOF_BITMAP_SET(ret, fold);
	5363	}
	5364	}
	5365	ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
	5366	}
	5367
	5368	/* optimize inverted simple patterns (e.g. [^a-z]) */
	5369	if (!SIZE_ONLY && optimize_invert &&
	5370	/* If the only flag is inversion. */
	5371	(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
	5372	for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
	5373	ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
	5374	ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
	5375	}
	5376
	5377	if (!SIZE_ONLY) {
	5378	AV * const av = newAV();
	5379	SV *rv;
	5380
	5381	/* The 0th element stores the character class description
	5382	* in its textual form: used later (regexec.c:Perl_regclass_swash())
	5383	* to initialize the appropriate swash (which gets stored in
	5384	* the 1st element), and also useful for dumping the regnode.
	5385	* The 2nd element stores the multicharacter foldings,
	5386	* used later (regexec.c:S_reginclass()). */
	5387	av_store(av, 0, listsv);
	5388	av_store(av, 1, NULL);
	5389	av_store(av, 2, (SV*)unicode_alternate);
	5390	rv = newRV_noinc((SV*)av);
	5391	n = add_data(pRExC_state, 1, "s");
	5392	RExC_rx->data->data[n] = (void*)rv;
	5393	ARG_SET(ret, n);
	5394	}
	5395
	5396	return ret;
	5397	}
	5398
	5399	STATIC char*
	5400	S_nextchar(pTHX_ RExC_state_t *pRExC_state)
	5401	{
	5402	char* const retval = RExC_parse++;
	5403
	5404	for (;;) {
	5405	if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
	5406	RExC_parse[2] == '#') {
	5407	while (*RExC_parse != ')') {
	5408	if (RExC_parse == RExC_end)
	5409	FAIL("Sequence (?#... not terminated");
	5410	RExC_parse++;
	5411	}
	5412	RExC_parse++;
	5413	continue;
	5414	}
	5415	if (RExC_flags & PMf_EXTENDED) {
	5416	if (isSPACE(*RExC_parse)) {
	5417	RExC_parse++;
	5418	continue;
	5419	}
	5420	else if (*RExC_parse == '#') {
	5421	while (RExC_parse < RExC_end)
	5422	if (*RExC_parse++ == '\n') break;
	5423	continue;
	5424	}
	5425	}
	5426	return retval;
	5427	}
	5428	}
	5429
	5430	/*
	5431	- reg_node - emit a node
	5432	*/
	5433	STATIC regnode * /* Location. */
	5434	S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
	5435	{
	5436	dVAR;
	5437	register regnode *ptr;
	5438	regnode * const ret = RExC_emit;
	5439
	5440	if (SIZE_ONLY) {
	5441	SIZE_ALIGN(RExC_size);
	5442	RExC_size += 1;
	5443	return(ret);
	5444	}
	5445
	5446	NODE_ALIGN_FILL(ret);
	5447	ptr = ret;
	5448	FILL_ADVANCE_NODE(ptr, op);
	5449	if (RExC_offsets) { /* MJD */
	5450	MJD_OFFSET_DEBUG(("%s:%u: (op %s) %s %u <- %u (len %u) (max %u).\n",
	5451	"reg_node", __LINE__,
	5452	reg_name[op],
	5453	RExC_emit - RExC_emit_start > RExC_offsets[0]
	5454	? "Overwriting end of array!\n" : "OK",
	5455	RExC_emit - RExC_emit_start,
	5456	RExC_parse - RExC_start,
	5457	RExC_offsets[0]));
	5458	Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
	5459	}
	5460
	5461	RExC_emit = ptr;
	5462
	5463	return(ret);
	5464	}
	5465
	5466	/*
	5467	- reganode - emit a node with an argument
	5468	*/
	5469	STATIC regnode * /* Location. */
	5470	S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
	5471	{
	5472	dVAR;
	5473	register regnode *ptr;
	5474	regnode * const ret = RExC_emit;
	5475
	5476	if (SIZE_ONLY) {
	5477	SIZE_ALIGN(RExC_size);
	5478	RExC_size += 2;
	5479	return(ret);
	5480	}
	5481
	5482	NODE_ALIGN_FILL(ret);
	5483	ptr = ret;
	5484	FILL_ADVANCE_NODE_ARG(ptr, op, arg);
	5485	if (RExC_offsets) { /* MJD */
	5486	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %u <- %u (max %u).\n",
	5487	"reganode",
	5488	__LINE__,
	5489	reg_name[op],
	5490	RExC_emit - RExC_emit_start > RExC_offsets[0] ?
	5491	"Overwriting end of array!\n" : "OK",
	5492	RExC_emit - RExC_emit_start,
	5493	RExC_parse - RExC_start,
	5494	RExC_offsets[0]));
	5495	Set_Cur_Node_Offset;
	5496	}
	5497
	5498	RExC_emit = ptr;
	5499
	5500	return(ret);
	5501	}
	5502
	5503	/*
	5504	- reguni - emit (if appropriate) a Unicode character
	5505	*/
	5506	STATIC void
	5507	S_reguni(pTHX_ const RExC_state_t pRExC_state, UV uv, char s, STRLEN* lenp)
	5508	{
	5509	dVAR;
	5510	lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8)s, uv) - (U8*)s);
	5511	}
	5512
	5513	/*
	5514	- reginsert - insert an operator in front of already-emitted operand
	5515	*
	5516	* Means relocating the operand.
	5517	*/
	5518	STATIC void
	5519	S_reginsert(pTHX_ RExC_state_t pRExC_state, U8 op, regnode opnd)
	5520	{
	5521	dVAR;
	5522	register regnode *src;
	5523	register regnode *dst;
	5524	register regnode *place;
	5525	const int offset = regarglen[(U8)op];
	5526
	5527	/* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
	5528
	5529	if (SIZE_ONLY) {
	5530	RExC_size += NODE_STEP_REGNODE + offset;
	5531	return;
	5532	}
	5533
	5534	src = RExC_emit;
	5535	RExC_emit += NODE_STEP_REGNODE + offset;
	5536	dst = RExC_emit;
	5537	while (src > opnd) {
	5538	StructCopy(--src, --dst, regnode);
	5539	if (RExC_offsets) { /* MJD 20010112 */
	5540	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s copy %u -> %u (max %u).\n",
	5541	"reg_insert",
	5542	__LINE__,
	5543	reg_name[op],
	5544	dst - RExC_emit_start > RExC_offsets[0]
	5545	? "Overwriting end of array!\n" : "OK",
	5546	src - RExC_emit_start,
	5547	dst - RExC_emit_start,
	5548	RExC_offsets[0]));
	5549	Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
	5550	Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
	5551	}
	5552	}
	5553
	5554
	5555	place = opnd; /* Op node, where operand used to be. */
	5556	if (RExC_offsets) { /* MJD */
	5557	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %u <- %u (max %u).\n",
	5558	"reginsert",
	5559	__LINE__,
	5560	reg_name[op],
	5561	place - RExC_emit_start > RExC_offsets[0]
	5562	? "Overwriting end of array!\n" : "OK",
	5563	place - RExC_emit_start,
	5564	RExC_parse - RExC_start,
	5565	RExC_offsets[0]));
	5566	Set_Node_Offset(place, RExC_parse);
	5567	Set_Node_Length(place, 1);
	5568	}
	5569	src = NEXTOPER(place);
	5570	FILL_ADVANCE_NODE(place, op);
	5571	Zero(src, offset, regnode);
	5572	}
	5573
	5574	/*
	5575	- regtail - set the next-pointer at the end of a node chain of p to val.
	5576	*/
	5577	/* TODO: All three parms should be const */
	5578	STATIC void
	5579	S_regtail(pTHX_ const RExC_state_t pRExC_state, regnode p, const regnode *val)
	5580	{
	5581	dVAR;
	5582	register regnode *scan;
	5583
	5584	if (SIZE_ONLY)
	5585	return;
	5586
	5587	/* Find last node. */
	5588	scan = p;
	5589	for (;;) {
	5590	regnode * const temp = regnext(scan);
	5591	if (temp == NULL)
	5592	break;
	5593	scan = temp;
	5594	}
	5595
	5596	if (reg_off_by_arg[OP(scan)]) {
	5597	ARG_SET(scan, val - scan);
	5598	}
	5599	else {
	5600	NEXT_OFF(scan) = val - scan;
	5601	}
	5602	}
	5603
	5604	/*
	5605	- regoptail - regtail on operand of first argument; nop if operandless
	5606	*/
	5607	/* TODO: All three parms should be const */
	5608	STATIC void
	5609	S_regoptail(pTHX_ const RExC_state_t pRExC_state, regnode p, const regnode *val)
	5610	{
	5611	dVAR;
	5612	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
	5613	if (p == NULL \|\| SIZE_ONLY)
	5614	return;
	5615	if (PL_regkind[(U8)OP(p)] == BRANCH) {
	5616	regtail(pRExC_state, NEXTOPER(p), val);
	5617	}
	5618	else if ( PL_regkind[(U8)OP(p)] == BRANCHJ) {
	5619	regtail(pRExC_state, NEXTOPER(NEXTOPER(p)), val);
	5620	}
	5621	else
	5622	return;
	5623	}
	5624
	5625	/*
	5626	- regcurly - a little FSA that accepts {\d+,?\d*}
	5627	*/
	5628	STATIC I32
	5629	S_regcurly(register const char *s)
	5630	{
	5631	if (*s++ != '{')
	5632	return FALSE;
	5633	if (!isDIGIT(*s))
	5634	return FALSE;
	5635	while (isDIGIT(*s))
	5636	s++;
	5637	if (*s == ',')
	5638	s++;
	5639	while (isDIGIT(*s))
	5640	s++;
	5641	if (*s != '}')
	5642	return FALSE;
	5643	return TRUE;
	5644	}
	5645
	5646
	5647	/*
	5648	- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
	5649	*/
	5650	void
	5651	Perl_regdump(pTHX_ const regexp *r)
	5652	{
	5653	#ifdef DEBUGGING
	5654	dVAR;
	5655	SV * const sv = sv_newmortal();
	5656
	5657	(void)dumpuntil(r, r->program, r->program + 1, NULL, sv, 0);
	5658
	5659	/* Header fields of interest. */
	5660	if (r->anchored_substr)
	5661	PerlIO_printf(Perl_debug_log,
	5662	"anchored \"%s%.*s%s\"%s at %"IVdf" ",
	5663	PL_colors[0],
	5664	(int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)),
	5665	SvPVX_const(r->anchored_substr),
	5666	PL_colors[1],
	5667	SvTAIL(r->anchored_substr) ? "$" : "",
	5668	(IV)r->anchored_offset);
	5669	else if (r->anchored_utf8)
	5670	PerlIO_printf(Perl_debug_log,
	5671	"anchored utf8 \"%s%.*s%s\"%s at %"IVdf" ",
	5672	PL_colors[0],
	5673	(int)(SvCUR(r->anchored_utf8) - (SvTAIL(r->anchored_utf8)!=0)),
	5674	SvPVX_const(r->anchored_utf8),
	5675	PL_colors[1],
	5676	SvTAIL(r->anchored_utf8) ? "$" : "",
	5677	(IV)r->anchored_offset);
	5678	if (r->float_substr)
	5679	PerlIO_printf(Perl_debug_log,
	5680	"floating \"%s%.*s%s\"%s at %"IVdf"..%"UVuf" ",
	5681	PL_colors[0],
	5682	(int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)),
	5683	SvPVX_const(r->float_substr),
	5684	PL_colors[1],
	5685	SvTAIL(r->float_substr) ? "$" : "",
	5686	(IV)r->float_min_offset, (UV)r->float_max_offset);
	5687	else if (r->float_utf8)
	5688	PerlIO_printf(Perl_debug_log,
	5689	"floating utf8 \"%s%.*s%s\"%s at %"IVdf"..%"UVuf" ",
	5690	PL_colors[0],
	5691	(int)(SvCUR(r->float_utf8) - (SvTAIL(r->float_utf8)!=0)),
	5692	SvPVX_const(r->float_utf8),
	5693	PL_colors[1],
	5694	SvTAIL(r->float_utf8) ? "$" : "",
	5695	(IV)r->float_min_offset, (UV)r->float_max_offset);
	5696	if (r->check_substr \|\| r->check_utf8)
	5697	PerlIO_printf(Perl_debug_log,
	5698	r->check_substr == r->float_substr
	5699	&& r->check_utf8 == r->float_utf8
	5700	? "(checking floating" : "(checking anchored");
	5701	if (r->reganch & ROPT_NOSCAN)
	5702	PerlIO_printf(Perl_debug_log, " noscan");
	5703	if (r->reganch & ROPT_CHECK_ALL)
	5704	PerlIO_printf(Perl_debug_log, " isall");
	5705	if (r->check_substr \|\| r->check_utf8)
	5706	PerlIO_printf(Perl_debug_log, ") ");
	5707
	5708	if (r->regstclass) {
	5709	regprop(r, sv, r->regstclass);
	5710	PerlIO_printf(Perl_debug_log, "stclass \"%s\" ", SvPVX_const(sv));
	5711	}
	5712	if (r->reganch & ROPT_ANCH) {
	5713	PerlIO_printf(Perl_debug_log, "anchored");
	5714	if (r->reganch & ROPT_ANCH_BOL)
	5715	PerlIO_printf(Perl_debug_log, "(BOL)");
	5716	if (r->reganch & ROPT_ANCH_MBOL)
	5717	PerlIO_printf(Perl_debug_log, "(MBOL)");
	5718	if (r->reganch & ROPT_ANCH_SBOL)
	5719	PerlIO_printf(Perl_debug_log, "(SBOL)");
	5720	if (r->reganch & ROPT_ANCH_GPOS)
	5721	PerlIO_printf(Perl_debug_log, "(GPOS)");
	5722	PerlIO_putc(Perl_debug_log, ' ');
	5723	}
	5724	if (r->reganch & ROPT_GPOS_SEEN)
	5725	PerlIO_printf(Perl_debug_log, "GPOS ");
	5726	if (r->reganch & ROPT_SKIP)
	5727	PerlIO_printf(Perl_debug_log, "plus ");
	5728	if (r->reganch & ROPT_IMPLICIT)
	5729	PerlIO_printf(Perl_debug_log, "implicit ");
	5730	PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
	5731	if (r->reganch & ROPT_EVAL_SEEN)
	5732	PerlIO_printf(Perl_debug_log, "with eval ");
	5733	PerlIO_printf(Perl_debug_log, "\n");
	5734	if (r->offsets) {
	5735	const U32 len = r->offsets[0];
	5736	GET_RE_DEBUG_FLAGS_DECL;
	5737	DEBUG_OFFSETS_r({
	5738	U32 i;
	5739	PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]);
	5740	for (i = 1; i <= len; i++)
	5741	PerlIO_printf(Perl_debug_log, "%"UVuf"[%"UVuf"] ",
	5742	(UV)r->offsets[i2-1], (UV)r->offsets[i2]);
	5743	PerlIO_printf(Perl_debug_log, "\n");
	5744	});
	5745	}
	5746	#else
	5747	PERL_UNUSED_CONTEXT;
	5748	PERL_UNUSED_ARG(r);
	5749	#endif /* DEBUGGING */
	5750	}
	5751
	5752	/*
	5753	- regprop - printable representation of opcode
	5754	*/
	5755	void
	5756	Perl_regprop(pTHX_ const regexp prog, SV sv, const regnode *o)
	5757	{
	5758	#ifdef DEBUGGING
	5759	dVAR;
	5760	register int k;
	5761
	5762	sv_setpvn(sv, "", 0);
	5763	if (OP(o) >= reg_num) /* regnode.type is unsigned */
	5764	/* It would be nice to FAIL() here, but this may be called from
	5765	regexec.c, and it would be hard to supply pRExC_state. */
	5766	Perl_croak(aTHX_ "Corrupted regexp opcode");
	5767	sv_catpv(sv, reg_name[OP(o)]); /* Take off const! */
	5768
	5769	k = PL_regkind[(U8)OP(o)];
	5770
	5771	if (k == EXACT) {
	5772	SV * const dsv = sv_2mortal(newSVpvs(""));
	5773	/* Using is_utf8_string() is a crude hack but it may
	5774	* be the best for now since we have no flag "this EXACTish
	5775	* node was UTF-8" --jhi */
	5776	const bool do_utf8 = is_utf8_string((U8*)STRING(o), STR_LEN(o));
	5777	const char * const s = do_utf8 ?
	5778	pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60,
	5779	UNI_DISPLAY_REGEX) :
	5780	STRING(o);
	5781	const int len = do_utf8 ?
	5782	strlen(s) :
	5783	STR_LEN(o);
	5784	Perl_sv_catpvf(aTHX_ sv, " <%s%.*s%s>",
	5785	PL_colors[0],
	5786	len, s,
	5787	PL_colors[1]);
	5788	} else if (k == TRIE) {
	5789	/EMPTY/;
	5790	/* print the details od the trie in dumpuntil instead, as
	5791	* prog->data isn't available here */
	5792	} else if (k == CURLY) {
	5793	if (OP(o) == CURLYM \|\| OP(o) == CURLYN \|\| OP(o) == CURLYX)
	5794	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
	5795	Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
	5796	}
	5797	else if (k == WHILEM && o->flags) /* Ordinal/of */
	5798	Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
	5799	else if (k == REF \|\| k == OPEN \|\| k == CLOSE \|\| k == GROUPP )
	5800	Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */
	5801	else if (k == LOGICAL)
	5802	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
	5803	else if (k == ANYOF) {
	5804	int i, rangestart = -1;
	5805	const U8 flags = ANYOF_FLAGS(o);
	5806
	5807	/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
	5808	static const char * const anyofs[] = {
	5809	"\\w",
	5810	"\\W",
	5811	"\\s",
	5812	"\\S",
	5813	"\\d",
	5814	"\\D",
	5815	"[:alnum:]",
	5816	"[:^alnum:]",
	5817	"[:alpha:]",
	5818	"[:^alpha:]",
	5819	"[:ascii:]",
	5820	"[:^ascii:]",
	5821	"[:ctrl:]",
	5822	"[:^ctrl:]",
	5823	"[:graph:]",
	5824	"[:^graph:]",
	5825	"[:lower:]",
	5826	"[:^lower:]",
	5827	"[:print:]",
	5828	"[:^print:]",
	5829	"[:punct:]",
	5830	"[:^punct:]",
	5831	"[:upper:]",
	5832	"[:^upper:]",
	5833	"[:xdigit:]",
	5834	"[:^xdigit:]",
	5835	"[:space:]",
	5836	"[:^space:]",
	5837	"[:blank:]",
	5838	"[:^blank:]"
	5839	};
	5840
	5841	if (flags & ANYOF_LOCALE)
	5842	sv_catpvs(sv, "{loc}");
	5843	if (flags & ANYOF_FOLD)
	5844	sv_catpvs(sv, "{i}");
	5845	Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
	5846	if (flags & ANYOF_INVERT)
	5847	sv_catpvs(sv, "^");
	5848	for (i = 0; i <= 256; i++) {
	5849	if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
	5850	if (rangestart == -1)
	5851	rangestart = i;
	5852	} else if (rangestart != -1) {
	5853	if (i <= rangestart + 3)
	5854	for (; rangestart < i; rangestart++)
	5855	put_byte(sv, rangestart);
	5856	else {
	5857	put_byte(sv, rangestart);
	5858	sv_catpvs(sv, "-");
	5859	put_byte(sv, i - 1);
	5860	}
	5861	rangestart = -1;
	5862	}
	5863	}
	5864
	5865	if (o->flags & ANYOF_CLASS)
	5866	for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
	5867	if (ANYOF_CLASS_TEST(o,i))
	5868	sv_catpv(sv, anyofs[i]);
	5869
	5870	if (flags & ANYOF_UNICODE)
	5871	sv_catpvs(sv, "{unicode}");
	5872	else if (flags & ANYOF_UNICODE_ALL)
	5873	sv_catpvs(sv, "{unicode_all}");
	5874
	5875	{
	5876	SV *lv;
	5877	SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
	5878
	5879	if (lv) {
	5880	if (sw) {
	5881	U8 s[UTF8_MAXBYTES_CASE+1];
	5882
	5883	for (i = 0; i <= 256; i++) { /* just the first 256 */
	5884	uvchr_to_utf8(s, i);
	5885
	5886	if (i < 256 && swash_fetch(sw, s, TRUE)) {
	5887	if (rangestart == -1)
	5888	rangestart = i;
	5889	} else if (rangestart != -1) {
	5890	if (i <= rangestart + 3)
	5891	for (; rangestart < i; rangestart++) {
	5892	const U8 * const e = uvchr_to_utf8(s,rangestart);
	5893	U8 *p;
	5894	for(p = s; p < e; p++)
	5895	put_byte(sv, *p);
	5896	}
	5897	else {
	5898	const U8 *e = uvchr_to_utf8(s,rangestart);
	5899	U8 *p;
	5900	for (p = s; p < e; p++)
	5901	put_byte(sv, *p);
	5902	sv_catpvs(sv, "-");
	5903	e = uvchr_to_utf8(s, i-1);
	5904	for (p = s; p < e; p++)
	5905	put_byte(sv, *p);
	5906	}
	5907	rangestart = -1;
	5908	}
	5909	}
	5910
	5911	sv_catpvs(sv, "..."); /* et cetera */
	5912	}
	5913
	5914	{
	5915	char *s = savesvpv(lv);
	5916	char * const origs = s;
	5917
	5918	while(s && s != '\n') s++;
	5919
	5920	if (*s == '\n') {
	5921	const char * const t = ++s;
	5922
	5923	while (*s) {
	5924	if (*s == '\n')
	5925	*s = ' ';
	5926	s++;
	5927	}
	5928	if (s[-1] == ' ')
	5929	s[-1] = 0;
	5930
	5931	sv_catpv(sv, t);
	5932	}
	5933
	5934	Safefree(origs);
	5935	}
	5936	}
	5937	}
	5938
	5939	Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
	5940	}
	5941	else if (k == BRANCHJ && (OP(o) == UNLESSM \|\| OP(o) == IFMATCH))
	5942	Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags);
	5943	#else
	5944	PERL_UNUSED_CONTEXT;
	5945	PERL_UNUSED_ARG(sv);
	5946	PERL_UNUSED_ARG(o);
	5947	#endif /* DEBUGGING */
	5948	}
	5949
	5950	SV *
	5951	Perl_re_intuit_string(pTHX_ regexp *prog)
	5952	{ /* Assume that RE_INTUIT is set */
	5953	dVAR;
	5954	GET_RE_DEBUG_FLAGS_DECL;
	5955	PERL_UNUSED_CONTEXT;
	5956
	5957	DEBUG_COMPILE_r(
	5958	{
	5959	const char * const s = SvPV_nolen_const(prog->check_substr
	5960	? prog->check_substr : prog->check_utf8);
	5961
	5962	if (!PL_colorset) reginitcolors();
	5963	PerlIO_printf(Perl_debug_log,
	5964	"%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
	5965	PL_colors[4],
	5966	prog->check_substr ? "" : "utf8 ",
	5967	PL_colors[5],PL_colors[0],
	5968	s,
	5969	PL_colors[1],
	5970	(strlen(s) > 60 ? "..." : ""));
	5971	} );
	5972
	5973	return prog->check_substr ? prog->check_substr : prog->check_utf8;
	5974	}
	5975
	5976	void
	5977	Perl_pregfree(pTHX_ struct regexp *r)
	5978	{
	5979	dVAR;
	5980	#ifdef DEBUGGING
	5981	SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
	5982	#endif
	5983	GET_RE_DEBUG_FLAGS_DECL;
	5984
	5985	if (!r \|\| (--r->refcnt > 0))
	5986	return;
	5987	DEBUG_r(if (re_debug_flags && (SvIV(re_debug_flags) & RE_DEBUG_COMPILE)) {
	5988	const char * const s = (r->reganch & ROPT_UTF8)
	5989	? pv_uni_display(dsv, (U8*)r->precomp, r->prelen, 60, UNI_DISPLAY_REGEX)
	5990	: pv_display(dsv, r->precomp, r->prelen, 0, 60);
	5991	const int len = SvCUR(dsv);
	5992	if (!PL_colorset)
	5993	reginitcolors();
	5994	PerlIO_printf(Perl_debug_log,
	5995	"%sFreeing REx:%s %s%.s%s%s\n",
	5996	PL_colors[4],PL_colors[5],PL_colors[0],
	5997	len, len, s,
	5998	PL_colors[1],
	5999	len > 60 ? "..." : "");
	6000	});
	6001
	6002	/* gcov results gave these as non-null 100% of the time, so there's no
	6003	optimisation in checking them before calling Safefree */
	6004	Safefree(r->precomp);
	6005	Safefree(r->offsets); /* 20010421 MJD */
	6006	RX_MATCH_COPY_FREE(r);
	6007	#ifdef PERL_OLD_COPY_ON_WRITE
	6008	if (r->saved_copy)
	6009	SvREFCNT_dec(r->saved_copy);
	6010	#endif
	6011	if (r->substrs) {
	6012	if (r->anchored_substr)
	6013	SvREFCNT_dec(r->anchored_substr);
	6014	if (r->anchored_utf8)
	6015	SvREFCNT_dec(r->anchored_utf8);
	6016	if (r->float_substr)
	6017	SvREFCNT_dec(r->float_substr);
	6018	if (r->float_utf8)
	6019	SvREFCNT_dec(r->float_utf8);
	6020	Safefree(r->substrs);
	6021	}
	6022	if (r->data) {
	6023	int n = r->data->count;
	6024	PAD* new_comppad = NULL;
	6025	PAD* old_comppad;
	6026	PADOFFSET refcnt;
	6027
	6028	while (--n >= 0) {
	6029	/* If you add a ->what type here, update the comment in regcomp.h */
	6030	switch (r->data->what[n]) {
	6031	case 's':
	6032	SvREFCNT_dec((SV*)r->data->data[n]);
	6033	break;
	6034	case 'f':
	6035	Safefree(r->data->data[n]);
	6036	break;
	6037	case 'p':
	6038	new_comppad = (AV*)r->data->data[n];
	6039	break;
	6040	case 'o':
	6041	if (new_comppad == NULL)
	6042	Perl_croak(aTHX_ "panic: pregfree comppad");
	6043	PAD_SAVE_LOCAL(old_comppad,
	6044	/* Watch out for global destruction's random ordering. */
	6045	(SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL
	6046	);
	6047	OP_REFCNT_LOCK;
	6048	refcnt = OpREFCNT_dec((OP_4tree*)r->data->data[n]);
	6049	OP_REFCNT_UNLOCK;
	6050	if (!refcnt)
	6051	op_free((OP_4tree*)r->data->data[n]);
	6052
	6053	PAD_RESTORE_LOCAL(old_comppad);
	6054	SvREFCNT_dec((SV*)new_comppad);
	6055	new_comppad = NULL;
	6056	break;
	6057	case 'n':
	6058	break;
	6059	case 't':
	6060	{
	6061	reg_trie_data * const trie=(reg_trie_data*)r->data->data[n];
	6062	U32 refcount;
	6063	OP_REFCNT_LOCK;
	6064	refcount = --trie->refcount;
	6065	OP_REFCNT_UNLOCK;
	6066	if ( !refcount ) {
	6067	Safefree(trie->charmap);
	6068	if (trie->widecharmap)
	6069	SvREFCNT_dec((SV*)trie->widecharmap);
	6070	Safefree(trie->states);
	6071	Safefree(trie->trans);
	6072	#ifdef DEBUGGING
	6073	if (trie->words)
	6074	SvREFCNT_dec((SV*)trie->words);
	6075	if (trie->revcharmap)
	6076	SvREFCNT_dec((SV*)trie->revcharmap);
	6077	#endif
	6078	Safefree(r->data->data[n]); /* do this last!!!! */
	6079	}
	6080	break;
	6081	}
	6082	default:
	6083	Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]);
	6084	}
	6085	}
	6086	Safefree(r->data->what);
	6087	Safefree(r->data);
	6088	}
	6089	Safefree(r->startp);
	6090	Safefree(r->endp);
	6091	Safefree(r);
	6092	}
	6093
	6094	#ifndef PERL_IN_XSUB_RE
	6095	/*
	6096	- regnext - dig the "next" pointer out of a node
	6097	*/
	6098	regnode *
	6099	Perl_regnext(pTHX_ register regnode *p)
	6100	{
	6101	dVAR;
	6102	register I32 offset;
	6103
	6104	if (p == &PL_regdummy)
	6105	return(NULL);
	6106
	6107	offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
	6108	if (offset == 0)
	6109	return(NULL);
	6110
	6111	return(p+offset);
	6112	}
	6113	#endif
	6114
	6115	STATIC void
	6116	S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
	6117	{
	6118	va_list args;
	6119	STRLEN l1 = strlen(pat1);
	6120	STRLEN l2 = strlen(pat2);
	6121	char buf[512];
	6122	SV *msv;
	6123	const char *message;
	6124
	6125	if (l1 > 510)
	6126	l1 = 510;
	6127	if (l1 + l2 > 510)
	6128	l2 = 510 - l1;
	6129	Copy(pat1, buf, l1 , char);
	6130	Copy(pat2, buf + l1, l2 , char);
	6131	buf[l1 + l2] = '\n';
	6132	buf[l1 + l2 + 1] = '\0';
	6133	#ifdef I_STDARG
	6134	/* ANSI variant takes additional second argument */
	6135	va_start(args, pat2);
	6136	#else
	6137	va_start(args);
	6138	#endif
	6139	msv = vmess(buf, &args);
	6140	va_end(args);
	6141	message = SvPV_const(msv,l1);
	6142	if (l1 > 512)
	6143	l1 = 512;
	6144	Copy(message, buf, l1 , char);
	6145	buf[l1-1] = '\0'; /* Overwrite \n */
	6146	Perl_croak(aTHX_ "%s", buf);
	6147	}
	6148
	6149	/* XXX Here's a total kludge. But we need to re-enter for swash routines. */
	6150
	6151	#ifndef PERL_IN_XSUB_RE
	6152	void
	6153	Perl_save_re_context(pTHX)
	6154	{
	6155	dVAR;
	6156
	6157	struct re_save_state *state;
	6158
	6159	SAVEVPTR(PL_curcop);
	6160	SSGROW(SAVESTACK_ALLOC_FOR_RE_SAVE_STATE + 1);
	6161
	6162	state = (struct re_save_state *)(PL_savestack + PL_savestack_ix);
	6163	PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
	6164	SSPUSHINT(SAVEt_RE_STATE);
	6165
	6166	Copy(&PL_reg_state, state, 1, struct re_save_state);
	6167
	6168	PL_reg_start_tmp = 0;
	6169	PL_reg_start_tmpl = 0;
	6170	PL_reg_oldsaved = NULL;
	6171	PL_reg_oldsavedlen = 0;
	6172	PL_reg_maxiter = 0;
	6173	PL_reg_leftiter = 0;
	6174	PL_reg_poscache = NULL;
	6175	PL_reg_poscache_size = 0;
	6176	#ifdef PERL_OLD_COPY_ON_WRITE
	6177	PL_nrs = NULL;
	6178	#endif
	6179
	6180	/* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
	6181	if (PL_curpm) {
	6182	const REGEXP * const rx = PM_GETRE(PL_curpm);
	6183	if (rx) {
	6184	U32 i;
	6185	for (i = 1; i <= rx->nparens; i++) {
	6186	char digits[TYPE_CHARS(long)];
	6187	const STRLEN len = my_sprintf(digits, "%lu", (long)i);
	6188	GV const const gvp
	6189	= (GV**)hv_fetch(PL_defstash, digits, len, 0);
	6190
	6191	if (gvp) {
	6192	GV * const gv = *gvp;
	6193	if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
	6194	save_scalar(gv);
	6195	}
	6196	}
	6197	}
	6198	}
	6199	}
	6200	#endif
	6201
	6202	static void
	6203	clear_re(pTHX_ void *r)
	6204	{
	6205	dVAR;
	6206	ReREFCNT_dec((regexp *)r);
	6207	}
	6208
	6209	#ifdef DEBUGGING
	6210
	6211	STATIC void
	6212	S_put_byte(pTHX_ SV *sv, int c)
	6213	{
	6214	if (isCNTRL(c) \|\| c == 255 \|\| !isPRINT(c))
	6215	Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
	6216	else if (c == '-' \|\| c == ']' \|\| c == '\\' \|\| c == '^')
	6217	Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
	6218	else
	6219	Perl_sv_catpvf(aTHX_ sv, "%c", c);
	6220	}
	6221
	6222
	6223	STATIC const regnode *
	6224	S_dumpuntil(pTHX_ const regexp r, const regnode start, const regnode *node,
	6225	const regnode last, SV sv, I32 l)
	6226	{
	6227	dVAR;
	6228	register U8 op = EXACT; /* Arbitrary non-END op. */
	6229	register const regnode *next;
	6230
	6231	while (op != END && (!last \|\| node < last)) {
	6232	/* While that wasn't END last time... */
	6233
	6234	NODE_ALIGN(node);
	6235	op = OP(node);
	6236	if (op == CLOSE)
	6237	l--;
	6238	next = regnext((regnode *)node);
	6239	/* Where, what. */
	6240	if (OP(node) == OPTIMIZED)
	6241	goto after_print;
	6242	regprop(r, sv, node);
	6243	PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
	6244	(int)(2*l + 1), "", SvPVX_const(sv));
	6245	if (next == NULL) /* Next ptr. */
	6246	PerlIO_printf(Perl_debug_log, "(0)");
	6247	else
	6248	PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
	6249	(void)PerlIO_putc(Perl_debug_log, '\n');
	6250	after_print:
	6251	if (PL_regkind[(U8)op] == BRANCHJ) {
	6252	register const regnode *nnode = (OP(next) == LONGJMP
	6253	? regnext((regnode *)next)
	6254	: next);
	6255	if (last && nnode > last)
	6256	nnode = last;
	6257	node = dumpuntil(r, start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
	6258	}
	6259	else if (PL_regkind[(U8)op] == BRANCH) {
	6260	node = dumpuntil(r, start, NEXTOPER(node), next, sv, l + 1);
	6261	}
	6262	else if ( PL_regkind[(U8)op] == TRIE ) {
	6263	const I32 n = ARG(node);
	6264	const reg_trie_data * const trie = (reg_trie_data*)r->data->data[n];
	6265	const I32 arry_len = av_len(trie->words)+1;
	6266	I32 word_idx;
	6267	PerlIO_printf(Perl_debug_log,
	6268	"%*s[Words:%d Chars Stored:%d Unique Chars:%d States:%"IVdf"%s]\n",
	6269	(int)(2*(l+3)),
	6270	"",
	6271	trie->wordcount,
	6272	(int)trie->charcount,
	6273	trie->uniquecharcount,
	6274	(IV)trie->laststate-1,
	6275	node->flags ? " EVAL mode" : "");
	6276
	6277	for (word_idx=0; word_idx < arry_len; word_idx++) {
	6278	SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
	6279	if (elem_ptr) {
	6280	PerlIO_printf(Perl_debug_log, "%*s<%s%s%s>\n",
	6281	(int)(2*(l+4)), "",
	6282	PL_colors[0],
	6283	SvPV_nolen_const(*elem_ptr),
	6284	PL_colors[1]
	6285	);
	6286	/*
	6287	if (next == NULL)
	6288	PerlIO_printf(Perl_debug_log, "(0)\n");
	6289	else
	6290	PerlIO_printf(Perl_debug_log, "(%"IVdf")\n", (IV)(next - start));
	6291	*/
	6292	}
	6293
	6294	}
	6295
	6296	node = NEXTOPER(node);
	6297	node += regarglen[(U8)op];
	6298
	6299	}
	6300	else if ( op == CURLY) { /* "next" might be very big: optimizer */
	6301	node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
	6302	NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
	6303	}
	6304	else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
	6305	node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
	6306	next, sv, l + 1);
	6307	}
	6308	else if ( op == PLUS \|\| op == STAR) {
	6309	node = dumpuntil(r, start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
	6310	}
	6311	else if (op == ANYOF) {
	6312	/* arglen 1 + class block */
	6313	node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE)
	6314	? ANYOF_CLASS_SKIP : ANYOF_SKIP);
	6315	node = NEXTOPER(node);
	6316	}
	6317	else if (PL_regkind[(U8)op] == EXACT) {
	6318	/* Literal string, where present. */
	6319	node += NODE_SZ_STR(node) - 1;
	6320	node = NEXTOPER(node);
	6321	}
	6322	else {
	6323	node = NEXTOPER(node);
	6324	node += regarglen[(U8)op];
	6325	}
	6326	if (op == CURLYX \|\| op == OPEN)
	6327	l++;
	6328	else if (op == WHILEM)
	6329	l--;
	6330	}
	6331	return node;
	6332	}
	6333
	6334	#endif /* DEBUGGING */
	6335
	6336	/*
	6337	* Local variables:
	6338	* c-indentation-style: bsd
	6339	* c-basic-offset: 4
	6340	* indent-tabs-mode: t
	6341	* End:
	6342	*
	6343	* ex: set ts=8 sts=4 sw=4 noet:
	6344	*/