X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/3c13cae629d936c43bca9d992cc445d93287af8e..13c3945fa95c36eaef31b4e8262adcfb27142c4b:/regcomp.c

diff --git a/regcomp.c b/regcomp.c
index 021e14e..d09d89f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -81,6 +81,7 @@
 #define REG_COMP_C
 #ifdef PERL_IN_XSUB_RE
 #  include "re_comp.h"
+extern const struct regexp_engine my_reg_engine;
 #else
 #  include "regcomp.h"
 #endif
@@ -133,7 +134,6 @@ typedef struct RExC_state_t {
     I32		nestroot;		/* root parens we are in - used by accept */
     I32		extralen;
     I32		seen_zerolen;
-    I32		seen_evals;
     regnode	**open_parens;		/* pointers to open parens */
     regnode	**close_parens;		/* pointers to close parens */
     regnode	*opend;			/* END node in program */
@@ -159,6 +159,7 @@ typedef struct RExC_state_t {
     char 	*starttry;		/* -Dr: where regtry was called. */
 #define RExC_starttry	(pRExC_state->starttry)
 #endif
+    SV		*runtime_code_qr;	/* qr with the runtime code blocks */
 #ifdef DEBUGGING
     const char  *lastparse;
     I32         lastnum;
@@ -193,7 +194,6 @@ typedef struct RExC_state_t {
 #define RExC_nestroot   (pRExC_state->nestroot)
 #define RExC_extralen	(pRExC_state->extralen)
 #define RExC_seen_zerolen	(pRExC_state->seen_zerolen)
-#define RExC_seen_evals	(pRExC_state->seen_evals)
 #define RExC_utf8	(pRExC_state->utf8)
 #define RExC_uni_semantics	(pRExC_state->uni_semantics)
 #define RExC_orig_utf8	(pRExC_state->orig_utf8)
@@ -221,7 +221,7 @@ typedef struct RExC_state_t {
 #define	WORST		0	/* Worst case. */
 #define	HASWIDTH	0x01	/* Known to match non-null strings. */
 
-/* Simple enough to be STAR/PLUS operand, in an EXACT node must be a single
+/* Simple enough to be STAR/PLUS operand; in an EXACT node must be a single
  * character, and if utf8, must be invariant.  Note that this is not the same
  * thing as REGNODE_SIMPLE */
 #define	SIMPLE		0x02
@@ -399,14 +399,18 @@ static const scan_data_t zero_scan_data =
 #define UNI_SEMANTICS (get_regex_charset(RExC_flags) == REGEX_UNICODE_CHARSET)
 #define AT_LEAST_UNI_SEMANTICS (get_regex_charset(RExC_flags) >= REGEX_UNICODE_CHARSET)
 #define ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_RESTRICTED_CHARSET)
-#define MORE_ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
 #define AT_LEAST_ASCII_RESTRICTED (get_regex_charset(RExC_flags) >= REGEX_ASCII_RESTRICTED_CHARSET)
+#define ASCII_FOLD_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
 
 #define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
 
-#define OOB_UNICODE		12345678
 #define OOB_NAMEDCLASS		-1
 
+/* There is no code point that is out-of-bounds, so this is problematic.  But
+ * its only current use is to initialize a variable that is always set before
+ * looked at. */
+#define OOB_UNICODE		0xDEADBEEF
+
 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
 
@@ -2631,9 +2635,9 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
  *      problematic sequences.  This delta is used by the caller to adjust the
  *      min length of the match, and the delta between min and max, so that the
  *      optimizer doesn't reject these possibilities based on size constraints.
- * 2)   These sequences are not currently correctly handled by the trie code
- *      either, so it changes the joined node type to ops that are not handled
- *      by trie's, those new ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
+ * 2)   These sequences require special handling by the trie code, so it
+ *      changes the joined node type to ops for the trie's benefit, those new
+ *      ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
  * 3)   This is sufficient for the two Greek sequences (described below), but
  *      the one involving the Sharp s (\xDF) needs more.  The node type
  *      EXACTFU_SS is used for an EXACTFU node that contains at least one "ss"
@@ -3380,7 +3384,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  * the end pointer. */
                                 if ( !first ) {
                                     first = cur;
-                                    trietype = noper_trietype;
 				    if ( noper_trietype == NOTHING ) {
 #if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
 					regnode * const noper_next = regnext( noper );
@@ -3388,8 +3391,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 					U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
 #endif
 
-                                        if ( noper_next_trietype )
+                                        if ( noper_next_trietype ) {
 					    trietype = noper_next_trietype;
+                                        } else if (noper_next_type)  {
+                                            /* a NOTHING regop is 1 regop wide. We need at least two
+                                             * for a trie so we can't merge this in */
+                                            first = NULL;
+                                        }
+                                    } else {
+                                        trietype = noper_trietype;
                                     }
                                 } else {
                                     if ( trietype == NOTHING )
@@ -4399,6 +4409,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 	    data->flags |= (OP(scan) == MEOL
 			    ? SF_BEFORE_MEOL
 			    : SF_BEFORE_SEOL);
+	    SCAN_COMMIT(pRExC_state, data, minlenp);
+
 	}
 	else if (  PL_regkind[OP(scan)] == BRANCHJ
 		 /* Lookbehind, or need to calculate parens/evals/stclass: */
@@ -4950,16 +4962,246 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 }
 #endif
 
-/* public(ish) wrapper for Perl_re_op_compile that only takes an SV
- * pattern rather than a list of OPs */
+/* public(ish) entry point for the perl core's own regex compiling code.
+ * It's actually a wrapper for Perl_re_op_compile that only takes an SV
+ * pattern rather than a list of OPs, and uses the internal engine rather
+ * than the current one */
 
 REGEXP *
 Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
 {
     SV *pat = pattern; /* defeat constness! */
     PERL_ARGS_ASSERT_RE_COMPILE;
-    return Perl_re_op_compile(aTHX_ &pat, 1, NULL, current_re_engine(),
-				NULL, NULL, rx_flags, 0);
+    return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
+#ifdef PERL_IN_XSUB_RE
+                                &my_reg_engine,
+#else
+                                &PL_core_reg_engine,
+#endif
+                                NULL, NULL, rx_flags, 0);
+}
+
+/* see if there are any run-time code blocks in the pattern.
+ * False positives are allowed */
+
+static bool
+S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
+		    U32 pm_flags, char *pat, STRLEN plen)
+{
+    int n = 0;
+    STRLEN s;
+
+    /* avoid infinitely recursing when we recompile the pattern parcelled up
+     * as qr'...'. A single constant qr// string can't have have any
+     * run-time component in it, and thus, no runtime code. (A non-qr
+     * string, however, can, e.g. $x =~ '(?{})') */
+    if  ((pm_flags & PMf_IS_QR) && expr && expr->op_type == OP_CONST)
+	return 0;
+
+    for (s = 0; s < plen; s++) {
+	if (n < pRExC_state->num_code_blocks
+	    && s == pRExC_state->code_blocks[n].start)
+	{
+	    s = pRExC_state->code_blocks[n].end;
+	    n++;
+	    continue;
+	}
+	/* TODO ideally should handle [..], (#..), /#.../x to reduce false
+	 * positives here */
+	if (pat[s] == '(' && pat[s+1] == '?' &&
+	    (pat[s+2] == '{' || (pat[s+2] == '?' && pat[s+3] == '{'))
+	)
+	    return 1;
+    }
+    return 0;
+}
+
+/* Handle run-time code blocks. We will already have compiled any direct
+ * or indirect literal code blocks. Now, take the pattern 'pat' and make a
+ * copy of it, but with any literal code blocks blanked out and
+ * appropriate chars escaped; then feed it into
+ *
+ *    eval "qr'modified_pattern'"
+ *
+ * For example,
+ *
+ *       a\bc(?{"this was literal"})def'ghi\\jkl(?{"this is runtime"})mno
+ *
+ * becomes
+ *
+ *    qr'a\\bc                       def\'ghi\\\\jkl(?{"this is runtime"})mno'
+ *
+ * After eval_sv()-ing that, grab any new code blocks from the returned qr
+ * and merge them with any code blocks of the original regexp.
+ *
+ * If the pat is non-UTF8, while the evalled qr is UTF8, don't merge;
+ * instead, just save the qr and return FALSE; this tells our caller that
+ * the original pattern needs upgrading to utf8.
+ */
+
+static bool
+S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
+    char *pat, STRLEN plen)
+{
+    SV *qr;
+
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    if (pRExC_state->runtime_code_qr) {
+	/* this is the second time we've been called; this should
+	 * only happen if the main pattern got upgraded to utf8
+	 * during compilation; re-use the qr we compiled first time
+	 * round (which should be utf8 too)
+	 */
+	qr = pRExC_state->runtime_code_qr;
+	pRExC_state->runtime_code_qr = NULL;
+	assert(RExC_utf8 && SvUTF8(qr));
+    }
+    else {
+	int n = 0;
+	STRLEN s;
+	char *p, *newpat;
+	int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
+	SV *sv, *qr_ref;
+	dSP;
+
+	/* determine how many extra chars we need for ' and \ escaping */
+	for (s = 0; s < plen; s++) {
+	    if (pat[s] == '\'' || pat[s] == '\\')
+		newlen++;
+	}
+
+	Newx(newpat, newlen, char);
+	p = newpat;
+	*p++ = 'q'; *p++ = 'r'; *p++ = '\'';
+
+	for (s = 0; s < plen; s++) {
+	    if (n < pRExC_state->num_code_blocks
+		&& s == pRExC_state->code_blocks[n].start)
+	    {
+		/* blank out literal code block */
+		assert(pat[s] == '(');
+		while (s <= pRExC_state->code_blocks[n].end) {
+		    *p++ = ' ';
+		    s++;
+		}
+		s--;
+		n++;
+		continue;
+	    }
+	    if (pat[s] == '\'' || pat[s] == '\\')
+		*p++ = '\\';
+	    *p++ = pat[s];
+	}
+	*p++ = '\'';
+	if (pRExC_state->pm_flags & RXf_PMf_EXTENDED)
+	    *p++ = 'x';
+	*p++ = '\0';
+	DEBUG_COMPILE_r({
+	    PerlIO_printf(Perl_debug_log,
+		"%sre-parsing pattern for runtime code:%s %s\n",
+		PL_colors[4],PL_colors[5],newpat);
+	});
+
+	sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0);
+	Safefree(newpat);
+
+	ENTER;
+	SAVETMPS;
+	save_re_context();
+	PUSHSTACKi(PERLSI_REQUIRE);
+	/* this causes the toker to collapse \\ into \ when parsing
+	 * qr''; normally only q'' does this. It also alters hints
+	 * handling */
+	PL_reg_state.re_reparsing = TRUE;
+	eval_sv(sv, G_SCALAR);
+	SvREFCNT_dec(sv);
+	SPAGAIN;
+	qr_ref = POPs;
+	PUTBACK;
+	if (SvTRUE(ERRSV))
+	    Perl_croak(aTHX_ "%s", SvPVx_nolen_const(ERRSV));
+	assert(SvROK(qr_ref));
+	qr = SvRV(qr_ref);
+	assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp);
+	/* the leaving below frees the tmp qr_ref.
+	 * Give qr a life of its own */
+	SvREFCNT_inc(qr);
+	POPSTACK;
+	FREETMPS;
+	LEAVE;
+
+    }
+
+    if (!RExC_utf8 && SvUTF8(qr)) {
+	/* first time through; the pattern got upgraded; save the
+	 * qr for the next time through */
+	assert(!pRExC_state->runtime_code_qr);
+	pRExC_state->runtime_code_qr = qr;
+	return 0;
+    }
+
+
+    /* extract any code blocks within the returned qr//  */
+
+
+    /* merge the main (r1) and run-time (r2) code blocks into one */
+    {
+	RXi_GET_DECL(((struct regexp*)SvANY(qr)), r2);
+	struct reg_code_block *new_block, *dst;
+	RExC_state_t * const r1 = pRExC_state; /* convenient alias */
+	int i1 = 0, i2 = 0;
+
+	if (!r2->num_code_blocks) /* we guessed wrong */
+	    return 1;
+
+	Newx(new_block,
+	    r1->num_code_blocks + r2->num_code_blocks,
+	    struct reg_code_block);
+	dst = new_block;
+
+	while (    i1 < r1->num_code_blocks
+		|| i2 < r2->num_code_blocks)
+	{
+	    struct reg_code_block *src;
+	    bool is_qr = 0;
+
+	    if (i1 == r1->num_code_blocks) {
+		src = &r2->code_blocks[i2++];
+		is_qr = 1;
+	    }
+	    else if (i2 == r2->num_code_blocks)
+		src = &r1->code_blocks[i1++];
+	    else if (  r1->code_blocks[i1].start
+	             < r2->code_blocks[i2].start)
+	    {
+		src = &r1->code_blocks[i1++];
+		assert(src->end < r2->code_blocks[i2].start);
+	    }
+	    else {
+		assert(  r1->code_blocks[i1].start
+		       > r2->code_blocks[i2].start);
+		src = &r2->code_blocks[i2++];
+		is_qr = 1;
+		assert(src->end < r1->code_blocks[i1].start);
+	    }
+
+	    assert(pat[src->start] == '(');
+	    assert(pat[src->end]   == ')');
+	    dst->start	    = src->start;
+	    dst->end	    = src->end;
+	    dst->block	    = src->block;
+	    dst->src_regex  = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr)
+				    : src->src_regex;
+	    dst++;
+	}
+	r1->num_code_blocks += r2->num_code_blocks;
+	Safefree(r1->code_blocks);
+	r1->code_blocks = new_block;
+    }
+
+    SvREFCNT_dec(qr);
+    return 1;
 }
 
 
@@ -4990,8 +5232,9 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
  *
  * orig_rx_flags contains RXf_* flags. See perlreapi.pod for more details.
  *
- * pm_flags contains the PMf_* flags from the calling PMOP. Currently
- * we're only interested in PMf_HAS_CV and PMf_IS_QR.
+ * pm_flags contains the PMf_* flags, typically based on those from the
+ * pm_flags field of the related PMOP. Currently we're only interested in
+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
  *
  * We can't allocate space until we know how big the compiled form will be,
  * but we can't compile it (and thus know how big it is) until we've got a
@@ -5009,7 +5252,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
 REGEXP *
 Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 		    OP *expr, const regexp_engine* eng, REGEXP *VOL old_re,
-		     int *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
+		     bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
 {
     dVAR;
     REGEXP *rx;
@@ -5032,7 +5275,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     bool used_setjump = FALSE;
     regex_charset initial_charset = get_regex_charset(orig_rx_flags);
     bool code_is_utf8 = 0;
-
+    bool VOL recompile = 0;
+    bool runtime_code = 0;
     U8 jump_ret = 0;
     dJMPENV;
     scan_data_t data;
@@ -5075,9 +5319,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 	PL_L1PosixGraph = _new_invlist_C_array(L1PosixGraph_invlist);
 	PL_PosixGraph = _new_invlist_C_array(PosixGraph_invlist);
 
-	PL_L1PosixAlnum = _new_invlist_C_array(L1PosixAlnum_invlist);
-	PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist);
-
 	PL_L1PosixLower = _new_invlist_C_array(L1PosixLower_invlist);
 	PL_PosixLower = _new_invlist_C_array(PosixLower_invlist);
 
@@ -5110,7 +5351,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     pRExC_state->num_code_blocks = 0;
 
     if (is_bare_re)
-	*is_bare_re = 0;
+	*is_bare_re = FALSE;
 
     if (expr && (expr->op_type == OP_LIST ||
 		(expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
@@ -5159,6 +5400,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 	    OP *o = NULL;
 	    int n = 0;
 	    bool utf8 = 0;
+            STRLEN orig_patlen = 0;
 
 	    if (pRExC_state->num_code_blocks) {
 		o = cLISTOPx(expr)->op_first;
@@ -5200,24 +5442,54 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 		    o = o->op_sibling;;
 		}
 
+		if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
+			(sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+		{
+		    sv_setsv(pat, sv);
+		    /* overloading involved: all bets are off over literal
+		     * code. Pretend we haven't seen it */
+		    pRExC_state->num_code_blocks -= n;
+		    n = 0;
+                    rx = NULL;
+
+		}
+		else  {
+                    while (SvAMAGIC(msv)
+                            && (sv = AMG_CALLunary(msv, string_amg))
+                            && sv != msv)
+                    {
+                        msv = sv;
+                        SvGETMAGIC(msv);
+                    }
+                    if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
+                        msv = SvRV(msv);
+                    orig_patlen = SvCUR(pat);
+                    sv_catsv_nomg(pat, msv);
+                    rx = msv;
+                    if (code)
+                        pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+                }
+
 		/* extract any code blocks within any embedded qr//'s */
-		rx = msv;
-		if (SvROK(rx))
-		    rx = SvRV(rx);
-		if (SvTYPE(rx) == SVt_REGEXP
+		if (rx && SvTYPE(rx) == SVt_REGEXP
 		    && RX_ENGINE((REGEXP*)rx)->op_comp)
 		{
 
 		    RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
 		    if (ri->num_code_blocks) {
 			int i;
+			/* the presence of an embedded qr// with code means
+			 * we should always recompile: the text of the
+			 * qr// may not have changed, but it may be a
+			 * different closure than last time */
+			recompile = 1;
 			Renew(pRExC_state->code_blocks,
 			    pRExC_state->num_code_blocks + ri->num_code_blocks,
 			    struct reg_code_block);
 			pRExC_state->num_code_blocks += ri->num_code_blocks;
 			for (i=0; i < ri->num_code_blocks; i++) {
 			    struct reg_code_block *src, *dst;
-			    STRLEN offset =  SvCUR(pat)
+			    STRLEN offset =  orig_patlen
 				+ ((struct regexp *)SvANY(rx))->pre_prefix;
 			    assert(n < pRExC_state->num_code_blocks);
 			    src = &ri->code_blocks[i];
@@ -5233,27 +5505,20 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 			}
 		    }
 		}
-
-		if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
-			(sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
-		{
-		    sv_setsv(pat, sv);
-		    /* overloading involved: all bets are off over literal
-		     * code. Pretend we haven't seen it */
-		    pRExC_state->num_code_blocks -= n;
-		    n = 0;
-
-		}
-		else {
-		    sv_catsv_nomg(pat, msv);
-		    if (code)
-			pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
-		}
 	    }
 	    SvSETMAGIC(pat);
 	}
-	else
+	else {
+            SV *sv;
 	    pat = *patternp;
+            while (SvAMAGIC(pat)
+                    && (sv = AMG_CALLunary(pat, string_amg))
+                    && sv != pat)
+            {
+                pat = sv;
+                SvGETMAGIC(pat);
+            }
+        }
 
 	/* handle bare regex: foo =~ $re */
 	{
@@ -5262,7 +5527,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 		re = SvRV(re);
 	    if (SvTYPE(re) == SVt_REGEXP) {
 		if (is_bare_re)
-		    *is_bare_re = 1;
+		    *is_bare_re = TRUE;
 		SvREFCNT_inc(re);
 		Safefree(pRExC_state->code_blocks);
 		return (REGEXP*)re;
@@ -5330,6 +5595,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
     RExC_uni_semantics = 0;
     RExC_contains_locale = 0;
+    pRExC_state->runtime_code_qr = NULL;
 
     /****************** LONG JUMP TARGET HERE***********************/
     /* Longjmp back to here if have to switch in midstream to utf8 */
@@ -5417,18 +5683,31 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* return old regex if pattern hasn't changed */
 
     if (   old_re
+        && !recompile
 	&& !!RX_UTF8(old_re) == !!RExC_utf8
 	&& RX_PRECOMP(old_re)
 	&& RX_PRELEN(old_re) == plen
 	&& memEQ(RX_PRECOMP(old_re), exp, plen))
     {
-	ReREFCNT_inc(old_re);
-	if (used_setjump) {
-	    JMPENV_POP;
+	/* with runtime code, always recompile */
+	runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
+					    exp, plen);
+	if (!runtime_code) {
+	    if (used_setjump) {
+		JMPENV_POP;
+	    }
+	    Safefree(pRExC_state->code_blocks);
+	    return old_re;
 	}
-	Safefree(pRExC_state->code_blocks);
-	return old_re;
     }
+    else if ((pm_flags & PMf_USE_RE_EVAL)
+		/* this second condition covers the non-regex literal case,
+		 * i.e.  $foo =~ '(?{})'. */
+		|| ( !PL_reg_state.re_reparsing && IN_PERL_COMPILETIME
+		    && (PL_hints & HINT_RE_EVAL))
+    )
+	runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
+			    exp, plen);
 
 #ifdef TRIE_STUDY_OPT
     restudied = 0;
@@ -5449,12 +5728,24 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_precomp = exp;
     RExC_flags = rx_flags;
     RExC_pm_flags = pm_flags;
+
+    if (runtime_code) {
+	if (PL_tainting && PL_tainted)
+	    Perl_croak(aTHX_ "Eval-group in insecure regular expression");
+
+	if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
+	    /* whoops, we have a non-utf8 pattern, whilst run-time code
+	     * got compiled as utf8. Try again with a utf8 pattern */
+	     JMPENV_JUMP(UTF8_LONGJMP);
+	}
+    }
+    assert(!pRExC_state->runtime_code_qr);
+
     RExC_sawback = 0;
 
     RExC_seen = 0;
     RExC_in_lookbehind = 0;
     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
-    RExC_seen_evals = 0;
     RExC_extralen = 0;
     RExC_override_recoding = 0;
 
@@ -5483,7 +5774,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        * Clever compilers notice this and complain. --jhi */
     REGC((U8)REG_MAGIC, (char*)RExC_emit);
 #endif
-    DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n"));
+    DEBUG_PARSE_r(
+	PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n");
+        RExC_lastnum=0;
+        RExC_lastparse=NULL;
+    );
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
 	RExC_precomp = NULL;
 	Safefree(pRExC_state->code_blocks);
@@ -5651,8 +5946,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_emit_bound = ri->program + RExC_size + 1;
     pRExC_state->code_index = 0;
 
-    /* Store the count of eval-groups for security checks: */
-    RExC_rx->seen_evals = RExC_seen_evals;
     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
 	ReREFCNT_dec(rx);   
@@ -5805,7 +6098,7 @@ reStudy:
 	else if ((!sawopen || !RExC_sawback) &&
 	    (OP(first) == STAR &&
 	    PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
-	    !(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
+	    !(r->extflags & RXf_ANCH) && !pRExC_state->num_code_blocks)
 	{
 	    /* turn .* into ^.* with an implied $*=1 */
 	    const int type =
@@ -5818,7 +6111,7 @@ reStudy:
 	    goto again;
 	}
 	if (sawplus && !sawlookahead && (!sawopen || !RExC_sawback)
-	    && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
+	    && !pRExC_state->num_code_blocks) /* May examine pos and $& */
 	    /* x+ must match at the 1st pos of run of x's */
 	    r->intflags |= PREGf_SKIP;
 
@@ -6091,7 +6384,7 @@ reStudy:
 	r->extflags |= RXf_GPOS_SEEN;
     if (RExC_seen & REG_SEEN_LOOKBEHIND)
 	r->extflags |= RXf_LOOKBEHIND_SEEN;
-    if (RExC_seen & REG_SEEN_EVAL)
+    if (pRExC_state->num_code_blocks)
 	r->extflags |= RXf_EVAL_SEEN;
     if (RExC_seen & REG_SEEN_CANY)
 	r->extflags |= RXf_CANY_SEEN;
@@ -6099,6 +6392,8 @@ reStudy:
 	r->intflags |= PREGf_VERBARG_SEEN;
     if (RExC_seen & REG_SEEN_CUTGROUP)
 	r->intflags |= PREGf_CUTGROUP_SEEN;
+    if (pm_flags & PMf_USE_RE_EVAL)
+	r->intflags |= PREGf_USE_RE_EVAL;
     if (RExC_paren_names)
         RXp_PAREN_NAMES(r) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
     else
@@ -6586,8 +6881,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
 	    do {
 		RExC_parse++;
 	    } while (isALNUM(*RExC_parse));
+    } else {
+	RExC_parse++; /* so the <- from the vFAIL is after the offending character */
+        vFAIL("Group name must start with a non-digit word character");
     }
-
     if ( flags ) {
         SV* sv_name
 	    = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
@@ -6611,7 +6908,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
             Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
 		       (unsigned long) flags);
         }
-        /* NOT REACHED */
+        assert(0); /* NOT REACHED */
     }
     return NULL;
 }
@@ -7017,8 +7314,6 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
     }
 }
 
-#ifndef PERL_IN_XSUB_RE
-
 STATIC IV
 S_invlist_search(pTHX_ SV* const invlist, const UV cp)
 {
@@ -7061,6 +7356,8 @@ S_invlist_search(pTHX_ SV* const invlist, const UV cp)
     return high - 1;
 }
 
+#ifndef PERL_IN_XSUB_RE
+
 void
 Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV end, U8* swatch)
 {
@@ -7144,7 +7441,6 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
     return;
 }
 
-
 void
 Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
 {
@@ -7649,6 +7945,18 @@ Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
 
 #endif
 
+STATIC bool
+S__invlist_contains_cp(pTHX_ SV* const invlist, const UV cp)
+{
+    /* Does <invlist> contain code point <cp> as part of the set? */
+
+    IV index = invlist_search(invlist, cp);
+
+    PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP;
+
+    return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index);
+}
+
 PERL_STATIC_INLINE SV*
 S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
     return _add_range_to_invlist(invlist, cp, cp);
@@ -7806,6 +8114,36 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
     return TRUE;
 }
 
+PERL_STATIC_INLINE UV
+S_invlist_highest(pTHX_ SV* const invlist)
+{
+    /* Returns the highest code point that matches an inversion list.  This API
+     * has an ambiguity, as it returns 0 under either the highest is actually
+     * 0, or if the list is empty.  If this distinction matters to you, check
+     * for emptiness before calling this function */
+
+    UV len = invlist_len(invlist);
+    UV *array;
+
+    PERL_ARGS_ASSERT_INVLIST_HIGHEST;
+
+    if (len == 0) {
+	return 0;
+    }
+
+    array = invlist_array(invlist);
+
+    /* The last element in the array in the inversion list always starts a
+     * range that goes to infinity.  That range may be for code points that are
+     * matched in the inversion list, or it may be for ones that aren't
+     * matched.  In the latter case, the highest code point in the set is one
+     * less than the beginning of this range; otherwise it is the final element
+     * of this range: infinity */
+    return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
+           ? UV_MAX
+           : array[len - 1] - 1;
+}
+
 #ifndef PERL_IN_XSUB_RE
 SV *
 Perl__invlist_contents(pTHX_ SV* const invlist)
@@ -7861,6 +8199,77 @@ S_invlist_dump(pTHX_ SV* const invlist, const char * const header)
 }
 #endif
 
+#if 0
+bool
+S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
+{
+    /* Return a boolean as to if the two passed in inversion lists are
+     * identical.  The final argument, if TRUE, says to take the complement of
+     * the second inversion list before doing the comparison */
+
+    UV* array_a = invlist_array(a);
+    UV* array_b = invlist_array(b);
+    UV len_a = invlist_len(a);
+    UV len_b = invlist_len(b);
+
+    UV i = 0;		    /* current index into the arrays */
+    bool retval = TRUE;     /* Assume are identical until proven otherwise */
+
+    PERL_ARGS_ASSERT__INVLISTEQ;
+
+    /* If are to compare 'a' with the complement of b, set it
+     * up so are looking at b's complement. */
+    if (complement_b) {
+
+        /* The complement of nothing is everything, so <a> would have to have
+         * just one element, starting at zero (ending at infinity) */
+        if (len_b == 0) {
+            return (len_a == 1 && array_a[0] == 0);
+        }
+        else if (array_b[0] == 0) {
+
+            /* Otherwise, to complement, we invert.  Here, the first element is
+             * 0, just remove it.  To do this, we just pretend the array starts
+             * one later, and clear the flag as we don't have to do anything
+             * else later */
+
+            array_b++;
+            len_b--;
+            complement_b = FALSE;
+        }
+        else {
+
+            /* But if the first element is not zero, we unshift a 0 before the
+             * array.  The data structure reserves a space for that 0 (which
+             * should be a '1' right now), so physical shifting is unneeded,
+             * but temporarily change that element to 0.  Before exiting the
+             * routine, we must restore the element to '1' */
+            array_b--;
+            len_b++;
+            array_b[0] = 0;
+        }
+    }
+
+    /* Make sure that the lengths are the same, as well as the final element
+     * before looping through the remainder.  (Thus we test the length, final,
+     * and first elements right off the bat) */
+    if (len_a != len_b || array_a[len_a-1] != array_b[len_a-1]) {
+        retval = FALSE;
+    }
+    else for (i = 0; i < len_a - 1; i++) {
+        if (array_a[i] != array_b[i]) {
+            retval = FALSE;
+            break;
+        }
+    }
+
+    if (complement_b) {
+        array_b[0] = 1;
+    }
+    return retval;
+}
+#endif
+
 #undef HEADER_LENGTH
 #undef INVLIST_INITIAL_LENGTH
 #undef TO_INTERNAL_SIZE
@@ -8068,7 +8477,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 		    ret = reganode(pRExC_state,
 				   ((! FOLD)
 				     ? NREF
-				     : (MORE_ASCII_RESTRICTED)
+				     : (ASCII_FOLD_RESTRICTED)
 				       ? NREFFA
                                        : (AT_LEAST_UNI_SEMANTICS)
                                          ? NREFFU
@@ -8225,7 +8634,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     		     num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                 }
                 goto gen_recurse_regop;
-                /* NOT REACHED */
+                assert(0); /* NOT REACHED */
             case '+':
                 if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
                     RExC_parse++;
@@ -8295,7 +8704,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 nextchar(pRExC_state);
                 return ret;
             } /* named and numeric backreferences */
-            /* NOT REACHED */
+            assert(0); /* NOT REACHED */
 
 	    case '?':           /* (??...) */
 		is_logical = 1;
@@ -8309,98 +8718,51 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 		/* FALL THROUGH */
 	    case '{':           /* (?{...}) */
 	    {
-		I32 count = 1;
 		U32 n = 0;
-		char c;
-		char *s = RExC_parse;
+		struct reg_code_block *cb;
 
 		RExC_seen_zerolen++;
-		RExC_seen |= REG_SEEN_EVAL;
 
-		if (   pRExC_state->num_code_blocks
-		    && pRExC_state->code_index < pRExC_state->num_code_blocks
-		    && pRExC_state->code_blocks[pRExC_state->code_index].start
-			== (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+		if (   !pRExC_state->num_code_blocks
+		    || pRExC_state->code_index >= pRExC_state->num_code_blocks
+		    || pRExC_state->code_blocks[pRExC_state->code_index].start
+			!= (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
 			    - RExC_start)
 		) {
-		    /* this is a pre-compiled literal (?{}) */
-		    struct reg_code_block *cb =
-			&pRExC_state->code_blocks[pRExC_state->code_index];
-		    RExC_parse = RExC_start + cb->end;
-		    if (SIZE_ONLY)
-			RExC_seen_evals++;
-		    else {
-			OP *o = cb->block;
-			if (cb->src_regex) {
-			    n = add_data(pRExC_state, 2, "rl");
-			    RExC_rxi->data->data[n] =
-				(void*)SvREFCNT_inc((SV*)cb->src_regex);
-			RExC_rxi->data->data[n+1] = (void*)o->op_next;
-			}
-			else {
-			    n = add_data(pRExC_state, 1,
-				   (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l");
-			    RExC_rxi->data->data[n] = (void*)o->op_next;
-			}
-		    }
-		    pRExC_state->code_index++;
+		    if (RExC_pm_flags & PMf_USE_RE_EVAL)
+			FAIL("panic: Sequence (?{...}): no code block found\n");
+		    FAIL("Eval-group not allowed at runtime, use re 'eval'");
 		}
-		else {
-		    while (count && (c = *RExC_parse)) {
-			if (c == '\\') {
-			    if (RExC_parse[1])
-				RExC_parse++;
-			}
-			else if (c == '{')
-			    count++;
-			else if (c == '}')
-			    count--;
-			RExC_parse++;
-		    }
-		    if (*RExC_parse != ')') {
-			RExC_parse = s;
-			vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
-		    }
-		    if (!SIZE_ONLY) {
-			PAD *pad;
-			OP_4tree *sop, *rop;
-			SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
-
-			ENTER;
-			Perl_save_re_context(aTHX);
-			rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
-			sop->op_private |= OPpREFCOUNTED;
-			/* re_dup will OpREFCNT_inc */
-			OpREFCNT_set(sop, 1);
-			LEAVE;
-
-			n = add_data(pRExC_state, 3, "nop");
-			RExC_rxi->data->data[n] = (void*)rop;
-			RExC_rxi->data->data[n+1] = (void*)sop;
-			RExC_rxi->data->data[n+2] = (void*)pad;
-			SvREFCNT_dec(sv);
+		/* this is a pre-compiled code block (?{...}) */
+		cb = &pRExC_state->code_blocks[pRExC_state->code_index];
+		RExC_parse = RExC_start + cb->end;
+		if (!SIZE_ONLY) {
+		    OP *o = cb->block;
+		    if (cb->src_regex) {
+			n = add_data(pRExC_state, 2, "rl");
+			RExC_rxi->data->data[n] =
+			    (void*)SvREFCNT_inc((SV*)cb->src_regex);
+			RExC_rxi->data->data[n+1] = (void*)o;
 		    }
-		    else {						/* First pass */
-			if (PL_reginterp_cnt < ++RExC_seen_evals
-			    && IN_PERL_RUNTIME)
-			    /* No compiled RE interpolated, has runtime
-			       components ===> unsafe.  */
-			    FAIL("Eval-group not allowed at runtime, use re 'eval'");
-			if (PL_tainting && PL_tainted)
-			    FAIL("Eval-group in insecure regular expression");
-    #if PERL_VERSION > 8
-			if (IN_PERL_COMPILETIME)
-			    PL_cv_has_eval = 1;
-    #endif
+		    else {
+			n = add_data(pRExC_state, 1,
+			       (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l");
+			RExC_rxi->data->data[n] = (void*)o;
 		    }
 		}
+		pRExC_state->code_index++;
 		nextchar(pRExC_state);
 
 		if (is_logical) {
+                    regnode *eval;
 		    ret = reg_node(pRExC_state, LOGICAL);
-		    if (!SIZE_ONLY)
+                    eval = reganode(pRExC_state, EVAL, n);
+		    if (!SIZE_ONLY) {
 			ret->flags = 2;
-                    REGTAIL(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
+                        /* for later propagation into (??{}) return value */
+                        eval->flags = (U8) (RExC_flags & RXf_PMf_COMPILETIME);
+                    }
+                    REGTAIL(pRExC_state, ret, eval);
                     /* deal with the length of this later - MJD */
 		    return ret;
 		}
@@ -8930,7 +9292,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 	}
 	else
 	    FAIL("Junk on end of regexp");	/* "Can't happen". */
-	/* NOTREACHED */
+	assert(0); /* NOTREACHED */
     }
 
     if (RExC_in_lookbehind) {
@@ -9463,6 +9825,81 @@ S_reg_recode(pTHX_ const char value, SV **encp)
     return uv;
 }
 
+PERL_STATIC_INLINE U8
+S_compute_EXACTish(pTHX_ RExC_state_t *pRExC_state)
+{
+    U8 op;
+
+    PERL_ARGS_ASSERT_COMPUTE_EXACTISH;
+
+    if (! FOLD) {
+        return EXACT;
+    }
+
+    op = get_regex_charset(RExC_flags);
+    if (op >= REGEX_ASCII_RESTRICTED_CHARSET) {
+        op--; /* /a is same as /u, and map /aa's offset to what /a's would have
+                 been, so there is no hole */
+    }
+
+    return op + EXACTF;
+}
+
+PERL_STATIC_INLINE void
+S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, STRLEN len, UV code_point)
+{
+    /* This knows the details about sizing an EXACTish node, and potentially
+     * populating it with a single character.  If <len> is non-zero, it assumes
+     * that the node has already been populated, and just does the sizing,
+     * ignoring <code_point>.  Otherwise it looks at <code_point> and
+     * calculates what <len> should be.  In pass 1, it sizes the node
+     * appropriately.  In pass 2, it additionally will populate the node's
+     * STRING with <code_point>, if <len> is 0.
+     *
+     * It knows that under FOLD, UTF characters and the Latin Sharp S must be
+     * folded (the latter only when the rules indicate it can match 'ss') */
+
+    bool len_passed_in = cBOOL(len != 0);
+    U8 character[UTF8_MAXBYTES_CASE+1];
+
+    PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
+
+    if (! len_passed_in) {
+        if (UTF) {
+            if (FOLD) {
+                to_uni_fold(NATIVE_TO_UNI(code_point), character, &len);
+            }
+            else {
+                uvchr_to_utf8( character, code_point);
+                len = UTF8SKIP(character);
+            }
+        }
+        else if (! FOLD
+                 || code_point != LATIN_SMALL_LETTER_SHARP_S
+                 || ASCII_FOLD_RESTRICTED
+                 || ! AT_LEAST_UNI_SEMANTICS)
+        {
+            *character = (U8) code_point;
+            len = 1;
+        }
+        else {
+            *character = 's';
+            *(character + 1) = 's';
+            len = 2;
+        }
+    }
+
+    if (SIZE_ONLY) {
+        RExC_size += STR_SZ(len);
+    }
+    else {
+        RExC_emit += STR_SZ(len);
+        STR_LEN(node) = len;
+        if (! len_passed_in) {
+            Copy((char *) character, STRING(node), len, char);
+        }
+    }
+}
 
 /*
  - regatom - the lowest level
@@ -9479,8 +9916,46 @@ S_reg_recode(pTHX_ const char value, SV **encp)
    sequence, we return.
 
    Note: we have to be careful with escapes, as they can be both literal
-   and special, and in the case of \10 and friends can either, depending
-   on context. Specifically there are two separate switches for handling
+   and special, and in the case of \10 and friends, context determines which.
+
+   A summary of the code structure is:
+
+   switch (first_byte) {
+	cases for each special:
+	    handle this special;
+	    break;
+	case '\\':
+	    switch (2nd byte) {
+		cases for each unambiguous special:
+		    handle this special;
+		    break;
+		cases for each ambigous special/literal:
+		    disambiguate;
+		    if (special)  handle here
+		    else goto defchar;
+		default: // unambiguously literal:
+		    goto defchar;
+	    }
+	default:  // is a literal char
+	    // FALL THROUGH
+	defchar:
+	    create EXACTish node for literal;
+	    while (more input and node isn't full) {
+		switch (input_byte) {
+		   cases for each special;
+                       make sure parse pointer is set so that the next call to
+                           regatom will see this special first
+                       goto loopdone; // EXACTish node terminated by prev. char
+		   default:
+		       append char to EXACTISH node;
+		}
+	        get next input byte;
+	    }
+        loopdone:
+   }
+   return the generated node;
+
+   Specifically there are two separate switches for handling
    escape sequences, with the one for handling literal escapes requiring
    a dummy entry for all of the special escapes that are actually handled
    by the other.
@@ -9634,43 +10109,17 @@ tryagain:
 	    *flagp |= HASWIDTH;
 	    goto finish_meta_pat;
 	case 'w':
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = ALNUML;
-		    break;
-		case REGEX_UNICODE_CHARSET:
-		    op = ALNUMU;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = ALNUMA;
-		    break;
-		case REGEX_DEPENDS_CHARSET:
-		    op = ALNUM;
-		    break;
-		default:
-		    goto bad_charset;
+	    op = ALNUM + get_regex_charset(RExC_flags);
+            if (op > ALNUMA) {  /* /aa is same as /a */
+                op = ALNUMA;
             }
 	    ret = reg_node(pRExC_state, op);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    goto finish_meta_pat;
 	case 'W':
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = NALNUML;
-		    break;
-		case REGEX_UNICODE_CHARSET:
-		    op = NALNUMU;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = NALNUMA;
-		    break;
-		case REGEX_DEPENDS_CHARSET:
-		    op = NALNUM;
-		    break;
-		default:
-		    goto bad_charset;
+	    op = NALNUM + get_regex_charset(RExC_flags);
+            if (op > NALNUMA) { /* /aa is same as /a */
+                op = NALNUMA;
             }
 	    ret = reg_node(pRExC_state, op);
 	    *flagp |= HASWIDTH|SIMPLE;
@@ -9678,22 +10127,9 @@ tryagain:
 	case 'b':
 	    RExC_seen_zerolen++;
 	    RExC_seen |= REG_SEEN_LOOKBEHIND;
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = BOUNDL;
-		    break;
-		case REGEX_UNICODE_CHARSET:
-		    op = BOUNDU;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = BOUNDA;
-		    break;
-		case REGEX_DEPENDS_CHARSET:
-		    op = BOUND;
-		    break;
-		default:
-		    goto bad_charset;
+	    op = BOUND + get_regex_charset(RExC_flags);
+            if (op > BOUNDA) {  /* /aa is same as /a */
+                op = BOUNDA;
             }
 	    ret = reg_node(pRExC_state, op);
 	    FLAGS(ret) = get_regex_charset(RExC_flags);
@@ -9702,103 +10138,45 @@ tryagain:
 	case 'B':
 	    RExC_seen_zerolen++;
 	    RExC_seen |= REG_SEEN_LOOKBEHIND;
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = NBOUNDL;
-		    break;
-		case REGEX_UNICODE_CHARSET:
-		    op = NBOUNDU;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = NBOUNDA;
-		    break;
-		case REGEX_DEPENDS_CHARSET:
-		    op = NBOUND;
-		    break;
-		default:
-		    goto bad_charset;
+	    op = NBOUND + get_regex_charset(RExC_flags);
+            if (op > NBOUNDA) { /* /aa is same as /a */
+                op = NBOUNDA;
             }
 	    ret = reg_node(pRExC_state, op);
 	    FLAGS(ret) = get_regex_charset(RExC_flags);
 	    *flagp |= SIMPLE;
 	    goto finish_meta_pat;
 	case 's':
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = SPACEL;
-		    break;
-		case REGEX_UNICODE_CHARSET:
-		    op = SPACEU;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = SPACEA;
-		    break;
-		case REGEX_DEPENDS_CHARSET:
-		    op = SPACE;
-		    break;
-		default:
-		    goto bad_charset;
+	    op = SPACE + get_regex_charset(RExC_flags);
+            if (op > SPACEA) {  /* /aa is same as /a */
+                op = SPACEA;
             }
 	    ret = reg_node(pRExC_state, op);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    goto finish_meta_pat;
 	case 'S':
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = NSPACEL;
-		    break;
-		case REGEX_UNICODE_CHARSET:
-		    op = NSPACEU;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = NSPACEA;
-		    break;
-		case REGEX_DEPENDS_CHARSET:
-		    op = NSPACE;
-		    break;
-		default:
-		    goto bad_charset;
-            }
-	    ret = reg_node(pRExC_state, op);
-	    *flagp |= HASWIDTH|SIMPLE;
-	    goto finish_meta_pat;
-	case 'd':
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = DIGITL;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = DIGITA;
-		    break;
-		case REGEX_DEPENDS_CHARSET: /* No difference between these */
-		case REGEX_UNICODE_CHARSET:
-		    op = DIGIT;
-		    break;
-		default:
-		    goto bad_charset;
+	    op = NSPACE + get_regex_charset(RExC_flags);
+            if (op > NSPACEA) { /* /aa is same as /a */
+                op = NSPACEA;
             }
 	    ret = reg_node(pRExC_state, op);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    goto finish_meta_pat;
 	case 'D':
-	    switch (get_regex_charset(RExC_flags)) {
-		case REGEX_LOCALE_CHARSET:
-		    op = NDIGITL;
-		    break;
-		case REGEX_ASCII_RESTRICTED_CHARSET:
-		case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-		    op = NDIGITA;
-		    break;
-		case REGEX_DEPENDS_CHARSET: /* No difference between these */
-		case REGEX_UNICODE_CHARSET:
-		    op = NDIGIT;
-		    break;
-		default:
-		    goto bad_charset;
+            op = NDIGIT;
+            goto join_D_and_d;
+	case 'd':
+            op = DIGIT;
+        join_D_and_d:
+            {
+                U8 offset = get_regex_charset(RExC_flags);
+                if (offset == REGEX_UNICODE_CHARSET) {
+                    offset = REGEX_DEPENDS_CHARSET;
+                }
+                else if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                    offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                }
+                op += offset;
             }
 	    ret = reg_node(pRExC_state, op);
 	    *flagp |= HASWIDTH|SIMPLE;
@@ -9899,7 +10277,7 @@ tryagain:
                 ret = reganode(pRExC_state,
                                ((! FOLD)
                                  ? NREF
-				 : (MORE_ASCII_RESTRICTED)
+				 : (ASCII_FOLD_RESTRICTED)
 				   ? NREFFA
                                    : (AT_LEAST_UNI_SEMANTICS)
                                      ? NREFFU
@@ -9949,6 +10327,7 @@ tryagain:
                         vFAIL("Reference to nonexistent or unclosed group");
                 }
 		if (!isg && num > 9 && num >= RExC_npar)
+                    /* Probably a character specified in octal, e.g. \35 */
 		    goto defchar;
 		else {
 		    char * const parse_start = RExC_parse - 1; /* MJD */
@@ -9969,7 +10348,7 @@ tryagain:
 		    ret = reganode(pRExC_state,
 				   ((! FOLD)
 				     ? REF
-				     : (MORE_ASCII_RESTRICTED)
+				     : (ASCII_FOLD_RESTRICTED)
 				       ? REFFA
                                        : (AT_LEAST_UNI_SEMANTICS)
                                          ? REFFU
@@ -10020,20 +10399,14 @@ tryagain:
 	    STRLEN foldlen;
 	    U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
             U8 node_type;
+            bool next_is_quantifier;
 
 	    /* Is this a LATIN LOWER CASE SHARP S in an EXACTFU node?  If so,
 	     * it is folded to 'ss' even if not utf8 */
 	    bool is_exactfu_sharp_s;
 
 	    ender = 0;
-            node_type = ((! FOLD) ? EXACT
-		        : (LOC)
-			  ? EXACTFL
-			  : (MORE_ASCII_RESTRICTED)
-			    ? EXACTFA
-			    : (AT_LEAST_UNI_SEMANTICS)
-			      ? EXACTFU
-			      : EXACTF);
+            node_type = compute_EXACTish(pRExC_state);
 	    ret = reg_node(pRExC_state, node_type);
 	    s = STRING(ret);
 
@@ -10163,38 +10536,39 @@ tryagain:
 			    break;
 			}
 		    case 'x':
-			if (*++p == '{') {
-			    char* const e = strchr(p, '}');
+			{
+			    STRLEN brace_len = len;
+			    UV result;
+			    const char* error_msg;
 
-			    if (!e) {
-				RExC_parse = p + 1;
-				vFAIL("Missing right brace on \\x{}");
+			    bool valid = grok_bslash_x(p,
+						       &result,
+						       &brace_len,
+						       &error_msg,
+						       1);
+			    p += brace_len;
+			    if (! valid) {
+				RExC_parse = p;	/* going to die anyway; point
+						   to exact spot of failure */
+				vFAIL(error_msg);
 			    }
 			    else {
-                                I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                                    | PERL_SCAN_DISALLOW_PREFIX;
-                                STRLEN numlen = e - p - 1;
-				ender = grok_hex(p + 1, &numlen, &flags, NULL);
-				if (ender > 0xff)
-				    REQUIRE_UTF8;
-				p = e + 1;
+				ender = result;
 			    }
+			    if (PL_encoding && ender < 0x100) {
+				goto recode_encoding;
+			    }
+			    if (ender > 0xff) {
+				REQUIRE_UTF8;
+			    }
+			    break;
 			}
-			else {
-                            I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-			    STRLEN numlen = 2;
-			    ender = grok_hex(p, &numlen, &flags, NULL);
-			    p += numlen;
-			}
-			if (PL_encoding && ender < 0x100)
-			    goto recode_encoding;
-			break;
 		    case 'c':
 			p++;
 			ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
 			break;
 		    case '0': case '1': case '2': case '3':case '4':
-		    case '5': case '6': case '7': case '8':case '9':
+		    case '5': case '6': case '7':
 			if (*p == '0' ||
 			    (isDIGIT(p[1]) && atoi(p) >= RExC_npar))
 			{
@@ -10227,7 +10601,7 @@ tryagain:
 			    FAIL("Trailing \\");
 			/* FALL THROUGH */
 		    default:
-			if (!SIZE_ONLY&& isALPHA(*p)) {
+			if (!SIZE_ONLY&& isALNUMC(*p)) {
 			    ckWARN2reg(p + 1, "Unrecognized escape \\%.1s passed through", p);
 			}
 			goto normal_default;
@@ -10261,6 +10635,20 @@ tryagain:
 			              && ender == LATIN_SMALL_LETTER_SHARP_S);
 		if ( RExC_flags & RXf_PMf_EXTENDED)
 		    p = regwhite( pRExC_state, p );
+
+                /* If the next thing is a quantifier, it applies to this
+                 * character only, which means that this character has to be in
+                 * its own node and can't just be appended to the string in an
+                 * existing node, so if there are already other characters in
+                 * the node, close the node with just them, and set up to do
+                 * this character again next time through, when it will be the
+                 * only thing in its new node */
+                if ((next_is_quantifier = (p < RExC_end && ISMULT2(p))) && len)
+		{
+                    p = oldp;
+                    goto loopdone;
+                }
+
 		if ((UTF && FOLD) || is_exactfu_sharp_s) {
 		    /* Prime the casefolded buffer.  Locale rules, which apply
 		     * only to code points < 256, aren't known until execution,
@@ -10277,114 +10665,24 @@ tryagain:
 			    foldlen = 2;
 			}
 		    }
-		    else if (isASCII(ender)) {	/* Note: Here can't also be LOC
-						 */
-			ender = toLOWER(ender);
-			*tmpbuf = (U8) ender;
-			foldlen = 1;
-		    }
-		    else if (! MORE_ASCII_RESTRICTED && ! LOC) {
-
-			/* Locale and /aa require more selectivity about the
-			 * fold, so are handled below.  Otherwise, here, just
-			 * use the fold */
-			ender = toFOLD_uni(ender, tmpbuf, &foldlen);
-		    }
-		    else {
-			/* Under locale rules or /aa we are not to mix,
-			 * respectively, ords < 256 or ASCII with non-.  So
-			 * reject folds that mix them, using only the
-			 * non-folded code point.  So do the fold to a
-			 * temporary, and inspect each character in it. */
-			U8 trialbuf[UTF8_MAXBYTES_CASE+1];
-			U8* s = trialbuf;
-			UV tmpender = toFOLD_uni(ender, trialbuf, &foldlen);
-			U8* e = s + foldlen;
-			bool fold_ok = TRUE;
-
-			while (s < e) {
-			    if (isASCII(*s)
-				|| (LOC && (UTF8_IS_INVARIANT(*s)
-					   || UTF8_IS_DOWNGRADEABLE_START(*s))))
-			    {
-				fold_ok = FALSE;
-				break;
-			    }
-			    s += UTF8SKIP(s);
-			}
-			if (fold_ok) {
-			    Copy(trialbuf, tmpbuf, foldlen, U8);
-			    ender = tmpender;
-			}
-			else {
-			    uvuni_to_utf8(tmpbuf, ender);
-			    foldlen = UNISKIP(ender);
-			}
-		    }
-		}
-		if (p < RExC_end && ISMULT2(p)) { /* Back off on ?+*. */
-		    if (len)
-			p = oldp;
-		    else if (UTF || is_exactfu_sharp_s) {
-			 if (FOLD) {
-			      /* Emit all the Unicode characters. */
-			      STRLEN numlen;
-			      for (foldbuf = tmpbuf;
-				   foldlen;
-				   foldlen -= numlen) {
-
-				   /* tmpbuf has been constructed by us, so we
-				    * know it is valid utf8 */
-				   ender = valid_utf8_to_uvchr(foldbuf, &numlen);
-				   if (numlen > 0) {
-					const STRLEN unilen = reguni(pRExC_state, ender, s);
-					s       += unilen;
-					len     += unilen;
-					/* In EBCDIC the numlen
-					 * and unilen can differ. */
-					foldbuf += numlen;
-					if (numlen >= foldlen)
-					     break;
-				   }
-				   else
-					break; /* "Can't happen." */
-			      }
-			 }
-			 else {
-			      const STRLEN unilen = reguni(pRExC_state, ender, s);
-			      if (unilen > 0) {
-				   s   += unilen;
-				   len += unilen;
-			      }
-			 }
-		    }
 		    else {
-			len++;
-			REGC((char)ender, s++);
+			ender = _to_uni_fold_flags(ender, tmpbuf, &foldlen,
+                                FOLD_FLAGS_FULL
+                                 | ((LOC) ?  FOLD_FLAGS_LOCALE
+                                          : (ASCII_FOLD_RESTRICTED)
+                                            ? FOLD_FLAGS_NOMIX_ASCII
+                                            : 0)
+                            );
 		    }
-		    break;
 		}
                 if (UTF || is_exactfu_sharp_s) {
-		     if (FOLD) {
-		          /* Emit all the Unicode characters. */
-			  STRLEN numlen;
-			  for (foldbuf = tmpbuf;
-			       foldlen;
-			       foldlen -= numlen) {
-			       ender = valid_utf8_to_uvchr(foldbuf, &numlen);
-			       if (numlen > 0) {
-				    const STRLEN unilen = reguni(pRExC_state, ender, s);
-				    len     += unilen;
-				    s       += unilen;
-				    /* In EBCDIC the numlen
-				     * and unilen can differ. */
-				    foldbuf += numlen;
-				    if (numlen >= foldlen)
-					 break;
-			       }
-			       else
-				    break;
-			  }
+                    if (FOLD) {
+                        if (! SIZE_ONLY) {
+                            /* Emit all the Unicode characters. */
+                            Copy(tmpbuf, s, foldlen, char);
+                        }
+                        len += foldlen;
+                        s += foldlen;
 		     }
 		     else {
 			  const STRLEN unilen = reguni(pRExC_state, ender, s);
@@ -10393,10 +10691,26 @@ tryagain:
 			       len += unilen;
 			  }
 		     }
+
+                     /* The loop increments <len> each time, as all but this
+                      * path through it add a single byte to the EXACTish node.
+                      * But this one has changed len to be the correct final
+                      * value, so subtract one to cancel out the increment that
+                      * follows */
 		     len--;
 		}
 		else {
 		    REGC((char)ender, s++);
+                }
+
+		if (next_is_quantifier) {
+
+                    /* Here, the next input is a quantifier, and to get here,
+                     * the current character is the only one in the node.
+                     * Also, here <len> doesn't include the final byte for this
+                     * character */
+                    len++;
+                    goto loopdone;
 		}
 	    }
 	loopdone:   /* Jumped to when encounters something that shouldn't be in
@@ -10415,22 +10729,12 @@ tryagain:
 	    if (len == 1 && UNI_IS_INVARIANT(ender))
 		*flagp |= SIMPLE;
 
-	    if (SIZE_ONLY)
-		RExC_size += STR_SZ(len);
-	    else {
-		STR_LEN(ret) = len;
-		RExC_emit += STR_SZ(len);
-            }
+            alloc_maybe_populate_EXACT(pRExC_state, ret, len, 0);
 	}
 	break;
     }
 
     return(ret);
-
-/* Jumped to when an unrecognized character set is encountered */
-bad_charset:
-    Perl_croak(aTHX_ "panic: Unknown regex character set encoding: %u", get_regex_charset(RExC_flags));
-    return(NULL);
 }
 
 STATIC char *
@@ -10505,7 +10809,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 		    switch (skip) {
 		    case 4:
 			if (memEQ(posixcc, "word", 4)) /* this is not POSIX, this is the Perl \w */
-			    namedclass = complement ? ANYOF_NALNUM : ANYOF_ALNUM;
+			    namedclass = ANYOF_ALNUM;
 			break;
 		    case 5:
 			/* Names all of length 5.  */
@@ -10515,57 +10819,63 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 			switch (posixcc[4]) {
 			case 'a':
 			    if (memEQ(posixcc, "alph", 4)) /* alpha */
-				namedclass = complement ? ANYOF_NALPHA : ANYOF_ALPHA;
+				namedclass = ANYOF_ALPHA;
 			    break;
 			case 'e':
 			    if (memEQ(posixcc, "spac", 4)) /* space */
-				namedclass = complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
+				namedclass = ANYOF_PSXSPC;
 			    break;
 			case 'h':
 			    if (memEQ(posixcc, "grap", 4)) /* graph */
-				namedclass = complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
+				namedclass = ANYOF_GRAPH;
 			    break;
 			case 'i':
 			    if (memEQ(posixcc, "asci", 4)) /* ascii */
-				namedclass = complement ? ANYOF_NASCII : ANYOF_ASCII;
+				namedclass = ANYOF_ASCII;
 			    break;
 			case 'k':
 			    if (memEQ(posixcc, "blan", 4)) /* blank */
-				namedclass = complement ? ANYOF_NBLANK : ANYOF_BLANK;
+				namedclass = ANYOF_BLANK;
 			    break;
 			case 'l':
 			    if (memEQ(posixcc, "cntr", 4)) /* cntrl */
-				namedclass = complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
+				namedclass = ANYOF_CNTRL;
 			    break;
 			case 'm':
 			    if (memEQ(posixcc, "alnu", 4)) /* alnum */
-				namedclass = complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
+				namedclass = ANYOF_ALNUMC;
 			    break;
 			case 'r':
 			    if (memEQ(posixcc, "lowe", 4)) /* lower */
-				namedclass = complement ? ANYOF_NLOWER : ANYOF_LOWER;
+				namedclass = ANYOF_LOWER;
 			    else if (memEQ(posixcc, "uppe", 4)) /* upper */
-				namedclass = complement ? ANYOF_NUPPER : ANYOF_UPPER;
+				namedclass = ANYOF_UPPER;
 			    break;
 			case 't':
 			    if (memEQ(posixcc, "digi", 4)) /* digit */
-				namedclass = complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
+				namedclass = ANYOF_DIGIT;
 			    else if (memEQ(posixcc, "prin", 4)) /* print */
-				namedclass = complement ? ANYOF_NPRINT : ANYOF_PRINT;
+				namedclass = ANYOF_PRINT;
 			    else if (memEQ(posixcc, "punc", 4)) /* punct */
-				namedclass = complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
+				namedclass = ANYOF_PUNCT;
 			    break;
 			}
 			break;
 		    case 6:
 			if (memEQ(posixcc, "xdigit", 6))
-			    namedclass = complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
+			    namedclass = ANYOF_XDIGIT;
 			break;
 		    }
 
 		    if (namedclass == OOB_NAMEDCLASS)
 			Simple_vFAIL3("POSIX class [:%.*s:] unknown",
 				      t - s - 1, s + 1);
+
+                    /* The #defines are structured so each complement is +1 to
+                     * the normal one */
+                    if (complement) {
+                        namedclass++;
+                    }
 		    assert (posixcc[skip] == ':');
 		    assert (posixcc[skip+1] == ']');
 		} else if (!SIZE_ONLY) {
@@ -10696,7 +11006,7 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
  *                determined at run-time
  * run_time_list  is a SV* that contains text names of properties that are to
  *                be computed at run time.  This concatenates <Xpropertyname>
- *                to it, apppropriately
+ *                to it, appropriately
  * This is essentially DO_POSIX, but we know only the Latin1 values at compile
  * time */
 #define DO_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,      \
@@ -10722,16 +11032,18 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
     }
 
 /* Like DO_POSIX_LATIN1_ONLY_KNOWN, but for the complement.  A combination of
- * this and DO_N_POSIX */
+ * this and DO_N_POSIX.  Sets <matches_above_unicode> only if it can; unchanged
+ * otherwise */
 #define DO_N_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,    \
-                              l1_sourcelist, Xpropertyname, run_time_list) \
+       l1_sourcelist, Xpropertyname, run_time_list, matches_above_unicode) \
     if (AT_LEAST_ASCII_RESTRICTED) {                                       \
         _invlist_union_complement_2nd(destlist, sourcelist, &destlist);    \
     }                                                                      \
     else {                                                                 \
         Perl_sv_catpvf(aTHX_ run_time_list, "!utf8::%s\n", Xpropertyname); \
+        matches_above_unicode = TRUE;                                      \
 	if (LOC) {                                                         \
-	    ANYOF_CLASS_SET(node, namedclass);				   \
+            ANYOF_CLASS_SET(node, namedclass);				   \
 	}                                                                  \
 	else {                                                             \
             SV* scratch_list = NULL;                                       \
@@ -10749,171 +11061,6 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 	}                                                                  \
     }
 
-STATIC U8
-S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-
-    /* Handle the setting of folds in the bitmap for non-locale ANYOF nodes.
-     * Locale folding is done at run-time, so this function should not be
-     * called for nodes that are for locales.
-     *
-     * This function sets the bit corresponding to the fold of the input
-     * 'value', if not already set.  The fold of 'f' is 'F', and the fold of
-     * 'F' is 'f'.
-     *
-     * It also knows about the characters that are in the bitmap that have
-     * folds that are matchable only outside it, and sets the appropriate lists
-     * and flags.
-     *
-     * It returns the number of bits that actually changed from 0 to 1 */
-
-    U8 stored = 0;
-    U8 fold;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD;
-
-    fold = (AT_LEAST_UNI_SEMANTICS) ? PL_fold_latin1[value]
-                                    : PL_fold[value];
-
-    /* It assumes the bit for 'value' has already been set */
-    if (fold != value && ! ANYOF_BITMAP_TEST(node, fold)) {
-        ANYOF_BITMAP_SET(node, fold);
-        stored++;
-    }
-    if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value) && (! isASCII(value) || ! MORE_ASCII_RESTRICTED)) {
-	/* Certain Latin1 characters have matches outside the bitmap.  To get
-	 * here, 'value' is one of those characters.   None of these matches is
-	 * valid for ASCII characters under /aa, which have been excluded by
-	 * the 'if' above.  The matches fall into three categories:
-	 * 1) They are singly folded-to or -from an above 255 character, as
-	 *    LATIN SMALL LETTER Y WITH DIAERESIS and LATIN CAPITAL LETTER Y
-	 *    WITH DIAERESIS;
-	 * 2) They are part of a multi-char fold with another character in the
-	 *    bitmap, only LATIN SMALL LETTER SHARP S => "ss" fits that bill;
-	 * 3) They are part of a multi-char fold with a character not in the
-	 *    bitmap, such as various ligatures.
-	 * We aren't dealing fully with multi-char folds, except we do deal
-	 * with the pattern containing a character that has a multi-char fold
-	 * (not so much the inverse).
-	 * For types 1) and 3), the matches only happen when the target string
-	 * is utf8; that's not true for 2), and we set a flag for it.
-	 *
-	 * The code below adds to the passed in inversion list the single fold
-	 * closures for 'value'.  The values are hard-coded here so that an
-	 * innocent-looking character class, like /[ks]/i won't have to go out
-	 * to disk to find the possible matches.  XXX It would be better to
-	 * generate these via regen, in case a new version of the Unicode
-	 * standard adds new mappings, though that is not really likely. */
-	switch (value) {
-	    case 'k':
-	    case 'K':
-		/* KELVIN SIGN */
-		*invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212A);
-		break;
-	    case 's':
-	    case 'S':
-		/* LATIN SMALL LETTER LONG S */
-		*invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x017F);
-		break;
-	    case MICRO_SIGN:
-		*invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-						 GREEK_SMALL_LETTER_MU);
-		*invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-						 GREEK_CAPITAL_LETTER_MU);
-		break;
-	    case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
-	    case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
-		/* ANGSTROM SIGN */
-		*invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212B);
-		if (DEPENDS_SEMANTICS) {    /* See DEPENDS comment below */
-		    *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-						     PL_fold_latin1[value]);
-		}
-		break;
-	    case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
-		*invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-					LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
-		break;
-	    case LATIN_SMALL_LETTER_SHARP_S:
-		*invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-					LATIN_CAPITAL_LETTER_SHARP_S);
-
-		/* Under /a, /d, and /u, this can match the two chars "ss" */
-		if (! MORE_ASCII_RESTRICTED) {
-		    add_alternate(alternate_ptr, (U8 *) "ss", 2);
-
-		    /* And under /u or /a, it can match even if the target is
-		     * not utf8 */
-		    if (AT_LEAST_UNI_SEMANTICS) {
-			ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
-		    }
-		}
-		break;
-	    case 'F': case 'f':
-	    case 'I': case 'i':
-	    case 'L': case 'l':
-	    case 'T': case 't':
-	    case 'A': case 'a':
-	    case 'H': case 'h':
-	    case 'J': case 'j':
-	    case 'N': case 'n':
-	    case 'W': case 'w':
-	    case 'Y': case 'y':
-                /* These all are targets of multi-character folds from code
-                 * points that require UTF8 to express, so they can't match
-                 * unless the target string is in UTF-8, so no action here is
-                 * necessary, as regexec.c properly handles the general case
-                 * for UTF-8 matching */
-		break;
-	    default:
-		/* Use deprecated warning to increase the chances of this
-		 * being output */
-		ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%x; please use the perlbug utility to report;", value);
-		break;
-	}
-    }
-    else if (DEPENDS_SEMANTICS
-	    && ! isASCII(value)
-	    && PL_fold_latin1[value] != value)
-    {
-	   /* Under DEPENDS rules, non-ASCII Latin1 characters match their
-	    * folds only when the target string is in UTF-8.  We add the fold
-	    * here to the list of things to match outside the bitmap, which
-	    * won't be looked at unless it is UTF8 (or else if something else
-	    * says to look even if not utf8, but those things better not happen
-	    * under DEPENDS semantics. */
-	*invlist_ptr = add_cp_to_invlist(*invlist_ptr, PL_fold_latin1[value]);
-    }
-
-    return stored;
-}
-
-
-PERL_STATIC_INLINE U8
-S_set_regclass_bit(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-    /* This inline function sets a bit in the bitmap if not already set, and if
-     * appropriate, its fold, returning the number of bits that actually
-     * changed from 0 to 1 */
-
-    U8 stored;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT;
-
-    if (ANYOF_BITMAP_TEST(node, value)) {   /* Already set */
-	return 0;
-    }
-
-    ANYOF_BITMAP_SET(node, value);
-    stored = 1;
-
-    if (FOLD && ! LOC) {	/* Locale folds aren't known until runtime */
-	stored += set_regclass_bit_fold(pRExC_state, node, value, invlist_ptr, alternate_ptr);
-    }
-
-    return stored;
-}
-
 STATIC void
 S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
 {
@@ -10932,6 +11079,15 @@ S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
     return;
 }
 
+/* The names of properties whose definitions are not known at compile time are
+ * stored in this SV, after a constant heading.  So if the length has been
+ * changed since initialization, then there is a run-time definition. */
+#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
+
+/* This converts the named class defined in regcomp.h to its equivalent class
+ * number defined in handy.h. */
+#define namedclass_to_classnum(class)  ((class) / 2)
+
 /*
    parse a class specification and produce either an ANYOF node that
    matches the pattern or perhaps will be optimized into an EXACTish node
@@ -10944,12 +11100,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 {
     dVAR;
     register UV nextvalue;
-    register IV prevvalue = OOB_UNICODE;
+    register UV prevvalue = OOB_UNICODE;
     register IV range = 0;
     UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
     register regnode *ret;
     STRLEN numlen;
-    IV namedclass;
+    IV namedclass = OOB_NAMEDCLASS;
     char *rangebegin = NULL;
     bool need_class = 0;
     bool allow_full_fold = TRUE;   /* Assume wants multi-char folding */
@@ -10957,6 +11113,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
 				      than just initialized.  */
     SV* properties = NULL;    /* Code points that match \p{} \P{} */
+    SV* posixes = NULL;     /* Code points that match classes like, [:word:],
+                               extended beyond the Latin1 range */
     UV element_count = 0;   /* Number of distinct elements in the class.
 			       Optimizations may be possible if this is tiny */
     UV n;
@@ -10971,37 +11129,34 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 
     /* Set if a component of this character class is user-defined; just passed
      * on to the engine */
-    UV has_user_defined_property = 0;
-
-    /* code points this node matches that can't be stored in the bitmap */
-    SV* nonbitmap = NULL;
-
-    /* The items that are to match that aren't stored in the bitmap, but are a
-     * result of things that are stored there.  This is the fold closure of
-     * such a character, either because it has DEPENDS semantics and shouldn't
-     * be matched unless the target string is utf8, or is a code point that is
-     * too large for the bit map, as for example, the fold of the MICRO SIGN is
-     * above 255.  This all is solely for performance reasons.  By having this
-     * code know the outside-the-bitmap folds that the bitmapped characters are
-     * involved with, we don't have to go out to disk to find the list of
-     * matches, unless the character class includes code points that aren't
-     * storable in the bit map.  That means that a character class with an 's'
-     * in it, for example, doesn't need to go out to disk to find everything
-     * that matches.  A 2nd list is used so that the 'nonbitmap' list is kept
-     * empty unless there is something whose fold we don't know about, and will
-     * have to go out to the disk to find. */
-    SV* l1_fold_invlist = NULL;
+    bool has_user_defined_property = FALSE;
+
+    /* inversion list of code points this node matches only when the target
+     * string is in UTF-8.  (Because is under /d) */
+    SV* depends_list = NULL;
+
+    /* inversion list of code points this node matches.  For much of the
+     * function, it includes only those that match regardless of the utf8ness
+     * of the target string */
+    SV* cp_list = NULL;
 
     /* List of multi-character folds that are matched by this node */
     AV* unicode_alternate  = NULL;
 #ifdef EBCDIC
+    /* In a range, counts how many 0-2 of the ends of it came from literals,
+     * not escapes.  Thus we can tell if 'A' was input vs \x{C1} */
     UV literal_endpoint = 0;
 #endif
-    UV stored = 0;  /* how many chars stored in the bitmap */
+    bool invert = FALSE;    /* Is this class to be complemented */
+
+    /* Is there any thing like \W or [:^digit:] that matches above the legal
+     * Unicode range? */
+    bool runtime_posix_matches_above_Unicode = FALSE;
 
     regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
         case we need to change the emitted regop to an EXACT. */
     const char * orig_parse = RExC_parse;
+    const I32 orig_size = RExC_size;
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGCLASS;
@@ -11022,8 +11177,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     if (UCHARAT(RExC_parse) == '^') {	/* Complement of range. */
 	RExC_naughty++;
 	RExC_parse++;
-	if (!SIZE_ONLY)
-	    ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+        invert = TRUE;
 
         /* We have decided to not allow multi-char folds in inverted character
 	 * classes, due to the confusion that can happen, especially with
@@ -11045,7 +11199,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 	if (LOC) {
 	    ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
 	}
-	ANYOF_BITMAP_ZERO(ret);
 	listsv = newSVpvs("# comment\n");
 	initial_listsv_len = SvCUR(listsv);
     }
@@ -11145,9 +11298,9 @@ parseit:
 		    n = 1;
 		}
 		if (!SIZE_ONLY) {
-                    SV** invlistsvp;
                     SV* invlist;
                     char* name;
+
 		    if (UCHARAT(RExC_parse) == '^') {
 			 RExC_parse++;
 			 n--;
@@ -11186,10 +11339,7 @@ parseit:
                     if (   ! swash
                         || ! SvROK(swash)
                         || ! SvTYPE(SvRV(swash)) == SVt_PVHV
-                        || ! (invlistsvp =
-				hv_fetchs(MUTABLE_HV(SvRV(swash)),
-                                "INVLIST", FALSE))
-                        || ! (invlist = *invlistsvp))
+                        || ! (invlist = _get_swash_invlist(swash)))
 		    {
                         if (swash) {
                             SvREFCNT_dec(swash);
@@ -11202,11 +11352,15 @@ parseit:
                         Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
                                         (value == 'p' ? '+' : '!'),
                                         name);
-                        has_user_defined_property = 1;
+                        has_user_defined_property = TRUE;
 
                         /* We don't know yet, so have to assume that the
                          * property could match something in the Latin1 range,
-                         * hence something that isn't utf8 */
+                         * hence something that isn't utf8.  Note that this
+                         * would cause things in <depends_list> to match
+                         * inappropriately, except that any \p{}, including
+                         * this one forces Unicode semantics, which means there
+                         * is <no depends_list> */
                         ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
                     }
                     else {
@@ -11214,17 +11368,14 @@ parseit:
                         /* Here, did get the swash and its inversion list.  If
                          * the swash is from a user-defined property, then this
                          * whole character class should be regarded as such */
-                        SV** user_defined_svp =
-                                            hv_fetchs(MUTABLE_HV(SvRV(swash)),
-                                                        "USER_DEFINED", FALSE);
-                        if (user_defined_svp) {
-                            has_user_defined_property
-                                                    |= SvUV(*user_defined_svp);
-                        }
+                        has_user_defined_property =
+                                                _is_swash_user_defined(swash);
 
                         /* Invert if asking for the complement */
                         if (value == 'P') {
-			    _invlist_union_complement_2nd(properties, invlist, &properties);
+			    _invlist_union_complement_2nd(properties,
+                                                          invlist,
+                                                          &properties);
 
                             /* The swash can't be used as-is, because we've
 			     * inverted things; delay removing it to here after
@@ -11271,22 +11422,18 @@ parseit:
 		}
 		break;
 	    case 'x':
-		if (*RExC_parse == '{') {
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                        | PERL_SCAN_DISALLOW_PREFIX;
-		    char * const e = strchr(RExC_parse++, '}');
-                    if (!e)
-                        vFAIL("Missing right brace on \\x{}");
-
-		    numlen = e - RExC_parse;
-		    value = grok_hex(RExC_parse, &numlen, &flags, NULL);
-		    RExC_parse = e + 1;
-		}
-		else {
-                    I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-		    numlen = 2;
-		    value = grok_hex(RExC_parse, &numlen, &flags, NULL);
+		RExC_parse--;	/* function expects to be pointed at the 'x' */
+		{
+		    const char* error_msg;
+		    bool valid = grok_bslash_x(RExC_parse,
+					       &value,
+					       &numlen,
+					       &error_msg,
+					       1);
 		    RExC_parse += numlen;
+		    if (! valid) {
+			vFAIL(error_msg);
+		    }
 		}
 		if (PL_encoding && value < 0x100)
 		    goto recode_encoding;
@@ -11330,15 +11477,20 @@ parseit:
 	    literal_endpoint++;
 #endif
 
-	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
-
-	    /* What matches in a locale is not known until runtime, so need to
-	     * (one time per class) allocate extra space to pass to regexec.
-	     * The space will contain a bit for each named class that is to be
-	     * matched against.  This isn't needed for \p{} and pseudo-classes,
-	     * as they are not affected by locale, and hence are dealt with
-	     * separately */
-	    if (LOC && namedclass < ANYOF_MAX && ! need_class) {
+            /* What matches in a locale is not known until runtime.  This
+             * includes what the Posix classes (like \w, [:space:]) match.
+             * Room must be reserved (one time per class) to store such
+             * classes, either if Perl is compiled so that locale nodes always
+             * should have this space, or if there is such class info to be
+             * stored.  The space will contain a bit for each named class that
+             * is to be matched against.  This isn't needed for \p{} and
+             * pseudo-classes, as they are not affected by locale, and hence
+             * are dealt with separately */
+	    if (LOC
+                && ! need_class
+                && (ANYOF_LOCALE == ANYOF_CLASS
+                    || (namedclass > OOB_NAMEDCLASS && namedclass < ANYOF_MAX)))
+            {
 		need_class = 1;
 		if (SIZE_ONLY) {
 		    RExC_size += ANYOF_CLASS_SKIP - ANYOF_SKIP;
@@ -11350,6 +11502,8 @@ parseit:
 		ANYOF_FLAGS(ret) |= ANYOF_CLASS;
 	    }
 
+	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
+
 	    /* a bad range like a-\d, a-[:digit:].  The '-' is taken as a
 	     * literal, as is the character that began the false range, i.e.
 	     * the 'a' in the examples */
@@ -11361,51 +11515,41 @@ parseit:
 		    ckWARN4reg(RExC_parse,
 			       "False [] range \"%*.*s\"",
 			       w, w, rangebegin);
-
-		    stored +=
-                         set_regclass_bit(pRExC_state, ret, '-', &l1_fold_invlist, &unicode_alternate);
-		    if (prevvalue < 256) {
-			stored +=
-                         set_regclass_bit(pRExC_state, ret, (U8) prevvalue, &l1_fold_invlist, &unicode_alternate);
-		    }
-		    else {
-			nonbitmap = add_cp_to_invlist(nonbitmap, prevvalue);
-		    }
+                    cp_list = add_cp_to_invlist(cp_list, '-');
+                    cp_list = add_cp_to_invlist(cp_list, prevvalue);
 		}
 
 		range = 0; /* this was not a true range */
+                element_count += 2; /* So counts for three values */
 	    }
 
-	    if (!SIZE_ONLY) {
-
-		/* Possible truncation here but in some 64-bit environments
-		 * the compiler gets heartburn about switch on 64-bit values.
-		 * A similar issue a little earlier when switching on value.
-		 * --jhi */
+	    if (! SIZE_ONLY) {
 		switch ((I32)namedclass) {
 
 		case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
-		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
 		    break;
 		case ANYOF_NALNUMC:
-		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
+		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv,
+                        runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_ALPHA:
-		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
 		    break;
 		case ANYOF_NALPHA:
-		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
+		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv,
+                        runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_ASCII:
 		    if (LOC) {
 			ANYOF_CLASS_SET(ret, namedclass);
 		    }
                     else {
-                        _invlist_union(properties, PL_ASCII, &properties);
+                        _invlist_union(posixes, PL_ASCII, &posixes);
                     }
 		    break;
 		case ANYOF_NASCII:
@@ -11413,59 +11557,61 @@ parseit:
 			ANYOF_CLASS_SET(ret, namedclass);
 		    }
                     else {
-                        _invlist_union_complement_2nd(properties,
-                                                    PL_ASCII, &properties);
+                        _invlist_union_complement_2nd(posixes,
+                                                    PL_ASCII, &posixes);
                         if (DEPENDS_SEMANTICS) {
                             ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
                         }
                     }
 		    break;
 		case ANYOF_BLANK:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixBlank, PL_XPosixBlank);
 		    break;
 		case ANYOF_NBLANK:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixBlank, PL_XPosixBlank);
 		    break;
 		case ANYOF_CNTRL:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixCntrl, PL_XPosixCntrl);
 		    break;
 		case ANYOF_NCNTRL:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixCntrl, PL_XPosixCntrl);
 		    break;
 		case ANYOF_DIGIT:
 		    /* There are no digits in the Latin1 range outside of
 		     * ASCII, so call the macro that doesn't have to resolve
 		     * them */
-		    DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, properties,
+		    DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, posixes,
                         PL_PosixDigit, "XPosixDigit", listsv);
 		    break;
 		case ANYOF_NDIGIT:
-		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
+		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv,
+                        runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_GRAPH:
-		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
 		    break;
 		case ANYOF_NGRAPH:
-		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
+		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv,
+                        runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_HORIZWS:
-		    /* For these, we use the nonbitmap, as /d doesn't make a
+		    /* For these, we use the cp_list, as /d doesn't make a
 		     * difference in what these match.  There would be problems
 		     * if these characters had folds other than themselves, as
-		     * nonbitmap is subject to folding.  It turns out that \h
+		     * cp_list is subject to folding.  It turns out that \h
 		     * is just a synonym for XPosixBlank */
-		    _invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap);
+		    _invlist_union(cp_list, PL_XPosixBlank, &cp_list);
 		    break;
 		case ANYOF_NHORIZWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                 PL_XPosixBlank, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                 PL_XPosixBlank, &cp_list);
 		    break;
 		case ANYOF_LOWER:
 		case ANYOF_NLOWER:
@@ -11488,45 +11634,48 @@ parseit:
 			Xname = "XPosixLower";
 		    }
 		    if (namedclass == ANYOF_LOWER) {
-			DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+			DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                     ascii_source, l1_source, Xname, listsv);
 		    }
 		    else {
 			DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                            properties, ascii_source, l1_source, Xname, listsv);
+                            posixes, ascii_source, l1_source, Xname, listsv,
+                            runtime_posix_matches_above_Unicode);
 		    }
 		    break;
 		}
 		case ANYOF_PRINT:
-		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
 		    break;
 		case ANYOF_NPRINT:
-		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
+		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv,
+                        runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_PUNCT:
-		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
 		    break;
 		case ANYOF_NPUNCT:
-		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
+		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv,
+                        runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_PSXSPC:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixSpace, PL_XPosixSpace);
 		    break;
 		case ANYOF_NPSXSPC:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixSpace, PL_XPosixSpace);
 		    break;
 		case ANYOF_SPACE:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PerlSpace, PL_XPerlSpace);
 		    break;
 		case ANYOF_NSPACE:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PerlSpace, PL_XPerlSpace);
 		    break;
 		case ANYOF_UPPER:   /* Same as LOWER, above */
@@ -11547,40 +11696,42 @@ parseit:
 			Xname = "XPosixUpper";
 		    }
 		    if (namedclass == ANYOF_UPPER) {
-			DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+			DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                     ascii_source, l1_source, Xname, listsv);
 		    }
 		    else {
 			DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                        properties, ascii_source, l1_source, Xname, listsv);
+                        posixes, ascii_source, l1_source, Xname, listsv,
+                        runtime_posix_matches_above_Unicode);
 		    }
 		    break;
 		}
 		case ANYOF_ALNUM:   /* Really is 'Word' */
-		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                             PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
 		    break;
 		case ANYOF_NALNUM:
-		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
+		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv,
+                            runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_VERTWS:
-		    /* For these, we use the nonbitmap, as /d doesn't make a
+		    /* For these, we use the cp_list, as /d doesn't make a
 		     * difference in what these match.  There would be problems
 		     * if these characters had folds other than themselves, as
-		     * nonbitmap is subject to folding */
-		    _invlist_union(nonbitmap, PL_VertSpace, &nonbitmap);
+		     * cp_list is subject to folding */
+		    _invlist_union(cp_list, PL_VertSpace, &cp_list);
 		    break;
 		case ANYOF_NVERTWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                    PL_VertSpace, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                    PL_VertSpace, &cp_list);
 		    break;
 		case ANYOF_XDIGIT:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixXDigit, PL_XPosixXDigit);
 		    break;
 		case ANYOF_NXDIGIT:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixXDigit, PL_XPosixXDigit);
 		    break;
 		case ANYOF_MAX:
@@ -11591,19 +11742,19 @@ parseit:
 		    break;
 		}
 
-		continue;
+		continue;   /* Go get next character */
 	    }
 	} /* end of namedclass \blah */
 
 	if (range) {
-	    if (prevvalue > (IV)value) /* b-a */ {
+	    if (prevvalue > value) /* b-a */ {
 		const int w = RExC_parse - rangebegin;
 		Simple_vFAIL4("Invalid [] range \"%*.*s\"", w, w, rangebegin);
 		range = 0; /* not a valid range */
 	    }
 	}
 	else {
-	    prevvalue = value; /* save the beginning of the range */
+            prevvalue = value; /* save the beginning of the potential range */
 	    if (RExC_parse+1 < RExC_end
 		&& *RExC_parse == '-'
 		&& RExC_parse[1] != ']')
@@ -11620,135 +11771,452 @@ parseit:
 			       "False [] range \"%*.*s\"",
 			       w, w, rangebegin);
 		    }
-		    if (!SIZE_ONLY)
-			stored +=
-                            set_regclass_bit(pRExC_state, ret, '-', &l1_fold_invlist, &unicode_alternate);
+                    if (!SIZE_ONLY) {
+                        cp_list = add_cp_to_invlist(cp_list, '-');
+                        element_count++;
+                    }
 		} else
 		    range = 1;	/* yeah, it's a range! */
 		continue;	/* but do it the next time */
 	    }
 	}
 
+        /* Here, <prevvalue> is the beginning of the range, if any; or <value>
+         * if not */
+
 	/* non-Latin1 code point implies unicode semantics.  Must be set in
 	 * pass1 so is there for the whole of pass 2 */
 	if (value > 255) {
 	    RExC_uni_semantics = 1;
 	}
 
-	/* now is the next time */
+        /* Ready to process either the single value, or the completed range */
 	if (!SIZE_ONLY) {
-	    if (prevvalue < 256) {
-	        const IV ceilvalue = value < 256 ? value : 255;
-		IV i;
-#ifdef EBCDIC
-		/* In EBCDIC [\x89-\x91] should include
-		 * the \x8e but [i-j] should not. */
-		if (literal_endpoint == 2 &&
-		    ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-		     (isUPPER(prevvalue) && isUPPER(ceilvalue))))
-		{
-		    if (isLOWER(prevvalue)) {
-			for (i = prevvalue; i <= ceilvalue; i++)
-			    if (isLOWER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
-				stored +=
-                                  set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-			    }
-		    } else {
-			for (i = prevvalue; i <= ceilvalue; i++)
-			    if (isUPPER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
-				stored +=
-                                  set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-			    }
-		    }
-		}
-		else
-#endif
-		      for (i = prevvalue; i <= ceilvalue; i++) {
-			stored += set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-	              }
-	  }
-	  if (value > 255) {
-	    const UV prevnatvalue  = NATIVE_TO_UNI(prevvalue);
-	    const UV natvalue      = NATIVE_TO_UNI(value);
-	    nonbitmap = _add_range_to_invlist(nonbitmap, prevnatvalue, natvalue);
-	}
-#ifdef EBCDIC
-	    literal_endpoint = 0;
+#ifndef EBCDIC
+            cp_list = _add_range_to_invlist(cp_list, prevvalue, value);
+#else
+            UV* this_range = _new_invlist(1);
+            _append_range_to_invlist(this_range, prevvalue, value);
+
+            /* In EBCDIC, the ranges 'A-Z' and 'a-z' are each not contiguous.
+             * If this range was specified using something like 'i-j', we want
+             * to include only the 'i' and the 'j', and not anything in
+             * between, so exclude non-ASCII, non-alphabetics from it.
+             * However, if the range was specified with something like
+             * [\x89-\x91] or [\x89-j], all code points within it should be
+             * included.  literal_endpoint==2 means both ends of the range used
+             * a literal character, not \x{foo} */
+	    if (literal_endpoint == 2
+                && (prevvalue >= 'a' && value <= 'z')
+                    || (prevvalue >= 'A' && value <= 'Z'))
+            {
+                _invlist_intersection(this_range, PL_ASCII, &this_range, );
+                _invlist_intersection(this_range, PL_Alpha, &this_range, );
+            }
+            _invlist_union(cp_list, this_range, &cp_list);
+            literal_endpoint = 0;
 #endif
         }
 
 	range = 0; /* this range (if it was one) is done now */
-    }
+    } /* End of loop through all the text within the brackets */
+
+    /* If the character class contains only a single element, it may be
+     * optimizable into another node type which is smaller and runs faster.
+     * Check if this is the case for this class */
+    if (element_count == 1) {
+        U8 op = END;
+        U8 arg = 0;
+
+        if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like \w or
+                                              [:digit:] or \p{foo} */
+
+            /* Certain named classes have equivalents that can appear outside a
+             * character class, e.g. \w, \H.  We use these instead of a
+             * character class. */
+            switch ((I32)namedclass) {
+                U8 offset;
+
+                /* The first group is for node types that depend on the charset
+                 * modifier to the regex.  We first calculate the base node
+                 * type, and if it should be inverted */
+
+                case ANYOF_NALNUM:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_ALNUM:
+                    op = ALNUM;
+                    goto join_charset_classes;
+
+                case ANYOF_NSPACE:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_SPACE:
+                    op = SPACE;
+                    goto join_charset_classes;
+
+                case ANYOF_NDIGIT:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_DIGIT:
+                    op = DIGIT;
+
+                  join_charset_classes:
+
+                    /* Now that we have the base node type, we take advantage
+                     * of the enum ordering of the charset modifiers to get the
+                     * exact node type,  For example the base SPACE also has
+                     * SPACEL, SPACEU, and SPACEA */
+
+                    offset = get_regex_charset(RExC_flags);
+
+                    /* /aa is the same as /a for these */
+                    if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                        offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                    }
+                    else if (op == DIGIT && offset == REGEX_UNICODE_CHARSET) {
+                        offset = REGEX_DEPENDS_CHARSET; /* There is no DIGITU */
+                    }
 
+                    op += offset;
+
+                    /* The number of varieties of each of these is the same,
+                     * hence, so is the delta between the normal and
+                     * complemented nodes */
+                    if (invert) {
+                        op += NALNUM - ALNUM;
+                    }
+                    break;
 
+                /* The second group doesn't depend of the charset modifiers.
+                 * We just have normal and complemented */
+                case ANYOF_NHORIZWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_HORIZWS:
+                  is_horizws:
+                    op = (invert) ? NHORIZWS : HORIZWS;
+                    break;
+
+                case ANYOF_NVERTWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_VERTWS:
+                    op = (invert) ? NVERTWS : VERTWS;
+                    break;
+
+                case ANYOF_MAX:
+                    break;
+
+                case ANYOF_NBLANK:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_BLANK:
+                    if (AT_LEAST_UNI_SEMANTICS && ! AT_LEAST_ASCII_RESTRICTED) {
+                        goto is_horizws;
+                    }
+                    /* FALLTHROUGH */
+                default:
+                    /* A generic posix class.  All the /a ones can be handled
+                     * by the POSIXA opcode.  And all are closed under folding
+                     * in the ASCII range, so FOLD doesn't matter */
+                    if (AT_LEAST_ASCII_RESTRICTED
+                        || (! LOC && namedclass == ANYOF_ASCII))
+                    {
+                        /* The odd numbered ones are the complements of the
+                         * next-lower even number one */
+                        if (namedclass % 2 == 1) {
+                            invert = ! invert;
+                            namedclass--;
+                        }
+                        arg = namedclass_to_classnum(namedclass);
+                        op = (invert) ? NPOSIXA : POSIXA;
+                    }
+                    break;
+            }
+        }
+        else if (value == prevvalue) {
+
+            /* Here, the class consists of just a single code point */
+
+            if (invert) {
+                if (! LOC && value == '\n') {
+                    op = REG_ANY; /* Optimize [^\n] */
+                }
+            }
+            else if (value < 256 || UTF) {
+
+                /* Optimize a single value into an EXACTish node, but not if it
+                 * would require converting the pattern to UTF-8. */
+                op = compute_EXACTish(pRExC_state);
+            }
+        } /* Otherwise is a range */
+        else if (! LOC) {   /* locale could vary these */
+            if (prevvalue == '0') {
+                if (value == '9') {
+                    op = (invert) ? NDIGITA : DIGITA;
+                }
+            }
+        }
+
+        /* Here, we have changed <op> away from its initial value iff we found
+         * an optimization */
+        if (op != END) {
+
+            /* Throw away this ANYOF regnode, and emit the calculated one,
+             * which should correspond to the beginning, not current, state of
+             * the parse */
+            const char * cur_parse = RExC_parse;
+            RExC_parse = (char *)orig_parse;
+            if ( SIZE_ONLY) {
+                if (! LOC) {
+
+                    /* To get locale nodes to not use the full ANYOF size would
+                     * require moving the code above that writes the portions
+                     * of it that aren't in other nodes to after this point.
+                     * e.g.  ANYOF_CLASS_SET */
+                    RExC_size = orig_size;
+                }
+            }
+            else {
+                RExC_emit = (regnode *)orig_emit;
+            }
+
+            ret = reg_node(pRExC_state, op);
+
+            if (PL_regkind[op] == POSIXD) {
+                if (! SIZE_ONLY) {
+                    FLAGS(ret) = arg;
+                }
+            }
+            else if (PL_regkind[op] == EXACT) {
+                alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value);
+            }
+
+            RExC_parse = (char *) cur_parse;
+
+            SvREFCNT_dec(listsv);
+            return ret;
+        }
+    }
 
     if (SIZE_ONLY)
         return ret;
     /****** !SIZE_ONLY AFTER HERE *********/
 
-    /* If folding and there are code points above 255, we calculate all
-     * characters that could fold to or from the ones already on the list */
-    if (FOLD && nonbitmap) {
+    /* If folding, we calculate all characters that could fold to or from the
+     * ones already on the list */
+    if (FOLD && cp_list) {
 	UV start, end;	/* End points of code point ranges */
 
 	SV* fold_intersection = NULL;
 
-	/* This is a list of all the characters that participate in folds
-	    * (except marks, etc in multi-char folds */
-	if (! PL_utf8_foldable) {
-	    SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
-	    PL_utf8_foldable = _swash_to_invlist(swash);
-            SvREFCNT_dec(swash);
-	}
+        /* In the Latin1 range, the characters that can be folded-to or -from
+         * are precisely the alphabetic characters.  If the highest code point
+         * is within Latin1, we can use the compiled-in list, and not have to
+         * go out to disk. */
+        if (invlist_highest(cp_list) < 256) {
+            _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
+        }
+        else {
 
-	/* This is a hash that for a particular fold gives all characters
-	    * that are involved in it */
-	if (! PL_utf8_foldclosures) {
+            /* Here, there are non-Latin1 code points, so we will have to go
+             * fetch the list of all the characters that participate in folds
+             */
+            if (! PL_utf8_foldable) {
+                SV* swash = swash_init("utf8", "_Perl_Any_Folds",
+                                       &PL_sv_undef, 1, 0);
+                PL_utf8_foldable = _get_swash_invlist(swash);
+                SvREFCNT_dec(swash);
+            }
 
-	    /* If we were unable to find any folds, then we likely won't be
-	     * able to find the closures.  So just create an empty list.
-	     * Folding will effectively be restricted to the non-Unicode rules
-	     * hard-coded into Perl.  (This case happens legitimately during
-	     * compilation of Perl itself before the Unicode tables are
-	     * generated) */
-	    if (invlist_len(PL_utf8_foldable) == 0) {
-		PL_utf8_foldclosures = newHV();
-	    } else {
-		/* If the folds haven't been read in, call a fold function
-		    * to force that */
-		if (! PL_utf8_tofold) {
-		    U8 dummy[UTF8_MAXBYTES+1];
-		    STRLEN dummy_len;
-
-		    /* This particular string is above \xff in both UTF-8 and
-		     * UTFEBCDIC */
-		    to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
-		    assert(PL_utf8_tofold); /* Verify that worked */
-		}
-		PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
-	    }
-	}
+            /* This is a hash that for a particular fold gives all characters
+             * that are involved in it */
+            if (! PL_utf8_foldclosures) {
+
+                /* If we were unable to find any folds, then we likely won't be
+                 * able to find the closures.  So just create an empty list.
+                 * Folding will effectively be restricted to the non-Unicode
+                 * rules hard-coded into Perl.  (This case happens legitimately
+                 * during compilation of Perl itself before the Unicode tables
+                 * are generated) */
+                if (invlist_len(PL_utf8_foldable) == 0) {
+                    PL_utf8_foldclosures = newHV();
+                }
+                else {
+                    /* If the folds haven't been read in, call a fold function
+                     * to force that */
+                    if (! PL_utf8_tofold) {
+                        U8 dummy[UTF8_MAXBYTES+1];
+                        STRLEN dummy_len;
+
+                        /* This particular string is above \xff in both UTF-8
+                         * and UTFEBCDIC */
+                        to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+                        assert(PL_utf8_tofold); /* Verify that worked */
+                    }
+                    PL_utf8_foldclosures =
+                                        _swash_inversion_hash(PL_utf8_tofold);
+                }
+            }
 
-	/* Only the characters in this class that participate in folds need be
-	 * checked.  Get the intersection of this class and all the possible
-	 * characters that are foldable.  This can quickly narrow down a large
-	 * class */
-	_invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
+            /* Only the characters in this class that participate in folds need
+             * be checked.  Get the intersection of this class and all the
+             * possible characters that are foldable.  This can quickly narrow
+             * down a large class */
+            _invlist_intersection(PL_utf8_foldable, cp_list,
+                                  &fold_intersection);
+        }
 
 	/* Now look at the foldable characters in this class individually */
 	invlist_iterinit(fold_intersection);
 	while (invlist_iternext(fold_intersection, &start, &end)) {
 	    UV j;
 
+            /* Locale folding for Latin1 characters is deferred until runtime */
+            if (LOC && start < 256) {
+                start = 256;
+            }
+
 	    /* Look at every character in the range */
 	    for (j = start; j <= end; j++) {
 
-		/* Get its fold */
 		U8 foldbuf[UTF8_MAXBYTES_CASE+1];
 		STRLEN foldlen;
-		const UV f =
-                    _to_uni_fold_flags(j, foldbuf, &foldlen,
-                                       (allow_full_fold) ? FOLD_FLAGS_FULL : 0);
+                UV f;
+
+                if (j < 256) {
+
+                    /* We have the latin1 folding rules hard-coded here so that
+                     * an innocent-looking character class, like /[ks]/i won't
+                     * have to go out to disk to find the possible matches.
+                     * XXX It would be better to generate these via regen, in
+                     * case a new version of the Unicode standard adds new
+                     * mappings, though that is not really likely, and may be
+                     * caught by the default: case of the switch below. */
+
+                    if (PL_fold_latin1[j] != j) {
+
+                        /* ASCII is always matched; non-ASCII is matched only
+                         * under Unicode rules */
+                        if (isASCII(j) || AT_LEAST_UNI_SEMANTICS) {
+                            cp_list =
+                                add_cp_to_invlist(cp_list, PL_fold_latin1[j]);
+                        }
+                        else {
+                            depends_list =
+                             add_cp_to_invlist(depends_list, PL_fold_latin1[j]);
+                        }
+                    }
+
+                    if (HAS_NONLATIN1_FOLD_CLOSURE(j)
+                        && (! isASCII(j) || ! ASCII_FOLD_RESTRICTED))
+                    {
+                        /* Certain Latin1 characters have matches outside
+                         * Latin1, or are multi-character.  To get here, 'j' is
+                         * one of those characters.   None of these matches is
+                         * valid for ASCII characters under /aa, which is why
+                         * the 'if' just above excludes those.  The matches
+                         * fall into three categories:
+                         * 1) They are singly folded-to or -from an above 255
+                         *    character, e.g., LATIN SMALL LETTER Y WITH
+                         *    DIAERESIS and LATIN CAPITAL LETTER Y WITH
+                         *    DIAERESIS;
+                         * 2) They are part of a multi-char fold with another
+                         *    latin1 character; only LATIN SMALL LETTER
+                         *    SHARP S => "ss" fits this;
+                         * 3) They are part of a multi-char fold with a
+                         *    character outside of Latin1, such as various
+                         *    ligatures.
+                        * We aren't dealing fully with multi-char folds, except
+                        * we do deal with the pattern containing a character
+                        * that has a multi-char fold (not so much the inverse).
+                        * For types 1) and 3), the matches only happen when the
+                        * target string is utf8; that's not true for 2), and we
+                        * set a flag for it.
+                        *
+                        * The code below adds the single fold closures for 'j'
+                        * to the inversion list. */
+                        switch (j) {
+                            case 'k':
+                            case 'K':
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, KELVIN_SIGN);
+                                break;
+                            case 's':
+                            case 'S':
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    LATIN_SMALL_LETTER_LONG_S);
+                                break;
+                            case MICRO_SIGN:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    GREEK_CAPITAL_LETTER_MU);
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    GREEK_SMALL_LETTER_MU);
+                                break;
+                            case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
+                            case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, ANGSTROM_SIGN);
+                                break;
+                            case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                        LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
+                                break;
+                            case LATIN_SMALL_LETTER_SHARP_S:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                LATIN_CAPITAL_LETTER_SHARP_S);
+
+                                /* Under /a, /d, and /u, this can match the two
+                                 * chars "ss" */
+                                if (! ASCII_FOLD_RESTRICTED) {
+                                    add_alternate(&unicode_alternate,
+                                                  (U8 *) "ss", 2);
+
+                                    /* And under /u or /a, it can match even if
+                                     * the target is not utf8 */
+                                    if (AT_LEAST_UNI_SEMANTICS) {
+                                        ANYOF_FLAGS(ret) |=
+                                                    ANYOF_NONBITMAP_NON_UTF8;
+                                    }
+                                }
+                                break;
+                            case 'F': case 'f':
+                            case 'I': case 'i':
+                            case 'L': case 'l':
+                            case 'T': case 't':
+                            case 'A': case 'a':
+                            case 'H': case 'h':
+                            case 'J': case 'j':
+                            case 'N': case 'n':
+                            case 'W': case 'w':
+                            case 'Y': case 'y':
+                                /* These all are targets of multi-character
+                                 * folds from code points that require UTF8 to
+                                 * express, so they can't match unless the
+                                 * target string is in UTF-8, so no action here
+                                 * is necessary, as regexec.c properly handles
+                                 * the general case for UTF-8 matching */
+                                break;
+                            default:
+                                /* Use deprecated warning to increase the
+                                 * chances of this being output */
+                                ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%"UVXf"; please use the perlbug utility to report;", j);
+                                break;
+                        }
+                    }
+                    continue;
+                }
+
+                /* Here is an above Latin1 character.  We don't have the rules
+                 * hard-coded for it.  First, get its fold */
+		f = _to_uni_fold_flags(j, foldbuf, &foldlen,
+                                    ((allow_full_fold) ? FOLD_FLAGS_FULL : 0)
+                                    | ((LOC)
+                                        ? FOLD_FLAGS_LOCALE
+                                        : (ASCII_FOLD_RESTRICTED)
+                                            ? FOLD_FLAGS_NOMIX_ASCII
+                                            : 0));
 
 		if (foldlen > (STRLEN)UNISKIP(f)) {
 
@@ -11764,54 +12232,25 @@ parseit:
 
 			/* If any of the folded characters of this are in the
 			 * Latin1 range, tell the regex engine that this can
-			 * match a non-utf8 target string.  The only multi-byte
-			 * fold whose source is in the Latin1 range (U+00DF)
-			 * applies only when the target string is utf8, or
-			 * under unicode rules */
-			if (j > 255 || AT_LEAST_UNI_SEMANTICS) {
-			    while (loc < e) {
-
-				/* Can't mix ascii with non- under /aa */
-				if (MORE_ASCII_RESTRICTED
-				    && (isASCII(*loc) != isASCII(j)))
-				{
-				    goto end_multi_fold;
-				}
-				if (UTF8_IS_INVARIANT(*loc)
-				    || UTF8_IS_DOWNGRADEABLE_START(*loc))
-				{
-                                    /* Can't mix above and below 256 under LOC
-                                     */
-				    if (LOC) {
-					goto end_multi_fold;
-				    }
-				    ANYOF_FLAGS(ret)
-					    |= ANYOF_NONBITMAP_NON_UTF8;
-				    break;
-				}
-				loc += UTF8SKIP(loc);
-			    }
-			}
+			 * match a non-utf8 target string.  */
+                        while (loc < e) {
+                            if (UTF8_IS_INVARIANT(*loc)
+                                || UTF8_IS_DOWNGRADEABLE_START(*loc))
+                            {
+                                ANYOF_FLAGS(ret)
+                                        |= ANYOF_NONBITMAP_NON_UTF8;
+                                break;
+                            }
+                            loc += UTF8SKIP(loc);
+                        }
 
 			add_alternate(&unicode_alternate, foldbuf, foldlen);
-		    end_multi_fold: ;
-		    }
-
-		    /* This is special-cased, as it is the only letter which
-		     * has both a multi-fold and single-fold in Latin1.  All
-		     * the other chars that have single and multi-folds are
-		     * always in utf8, and the utf8 folding algorithm catches
-		     * them */
-		    if (! LOC && j == LATIN_CAPITAL_LETTER_SHARP_S) {
-			stored += set_regclass_bit(pRExC_state,
-					ret,
-					LATIN_SMALL_LETTER_SHARP_S,
-					&l1_fold_invlist, &unicode_alternate);
 		    }
 		}
-		else {
-		    /* Single character fold.  Add everything in its fold
-		     * closure to the list that this node should match */
+                else {
+                    /* Single character fold of above Latin1.  Add everything
+                     * in its fold closure to the list that this node should
+                     * match */
 		    SV** listp;
 
 		    /* The fold closures data structure is a hash with the keys
@@ -11834,94 +12273,317 @@ parseit:
 			    /* /aa doesn't allow folds between ASCII and non-;
 			     * /l doesn't allow them between above and below
 			     * 256 */
-			    if ((MORE_ASCII_RESTRICTED
-				 && (isASCII(c) != isASCII(j)))
-				    || (LOC && ((c < 256) != (j < 256))))
+			    if ((ASCII_FOLD_RESTRICTED
+                                      && (isASCII(c) != isASCII(j)))
+				|| (LOC && ((c < 256) != (j < 256))))
 			    {
 				continue;
 			    }
 
-			    if (c < 256 && AT_LEAST_UNI_SEMANTICS) {
-				stored += set_regclass_bit(pRExC_state,
-					ret,
-					(U8) c,
-					&l1_fold_invlist, &unicode_alternate);
-			    }
-				/* It may be that the code point is already in
-				 * this range or already in the bitmap, in
-				 * which case we need do nothing */
-			    else if ((c < start || c > end)
-					&& (c > 255
-					    || ! ANYOF_BITMAP_TEST(ret, c)))
-			    {
-				nonbitmap = add_cp_to_invlist(nonbitmap, c);
+                            /* Folds involving non-ascii Latin1 characters
+                             * under /d are added to a separate list */
+			    if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+                            {
+				cp_list = add_cp_to_invlist(cp_list, c);
+                            }
+                            else {
+                              depends_list = add_cp_to_invlist(depends_list, c);
 			    }
 			}
 		    }
 		}
-	    }
+            }
 	}
 	SvREFCNT_dec(fold_intersection);
     }
 
-    /* Combine the two lists into one. */
-    if (l1_fold_invlist) {
-	if (nonbitmap) {
-	    _invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
-	    SvREFCNT_dec(l1_fold_invlist);
-	}
-	else {
-	    nonbitmap = l1_fold_invlist;
-	}
+    /* And combine the result (if any) with any inversion list from posix
+     * classes.  The lists are kept separate up to now because we don't want to
+     * fold the classes (folding of those is automatically handled by the swash
+     * fetching code) */
+    if (posixes) {
+        if (! DEPENDS_SEMANTICS) {
+            if (cp_list) {
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
+            }
+            else {
+                cp_list = posixes;
+            }
+        }
+        else {
+            /* Under /d, we put into a separate list the Latin1 things that
+             * match only when the target string is utf8 */
+            SV* nonascii_but_latin1_properties = NULL;
+            _invlist_intersection(posixes, PL_Latin1,
+                                  &nonascii_but_latin1_properties);
+            _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
+                              &nonascii_but_latin1_properties);
+            _invlist_subtract(posixes, nonascii_but_latin1_properties,
+                              &posixes);
+            if (cp_list) {
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
+            }
+            else {
+                cp_list = posixes;
+            }
+
+            if (depends_list) {
+                _invlist_union(depends_list, nonascii_but_latin1_properties,
+                               &depends_list);
+                SvREFCNT_dec(nonascii_but_latin1_properties);
+            }
+            else {
+                depends_list = nonascii_but_latin1_properties;
+            }
+        }
     }
 
     /* And combine the result (if any) with any inversion list from properties.
-     * The lists are kept separate up to now because we don't want to fold the
-     * properties */
+     * The lists are kept separate up to now so that we can distinguish the two
+     * in regards to matching above-Unicode.  A run-time warning is generated
+     * if a Unicode property is matched against a non-Unicode code point. But,
+     * we allow user-defined properties to match anything, without any warning,
+     * and we also suppress the warning if there is a portion of the character
+     * class that isn't a Unicode property, and which matches above Unicode, \W
+     * or [\x{110000}] for example.
+     * (Note that in this case, unlike the Posix one above, there is no
+     * <depends_list>, because having a Unicode property forces Unicode
+     * semantics */
     if (properties) {
-	if (nonbitmap) {
-	    _invlist_union(nonbitmap, properties, &nonbitmap);
-	    SvREFCNT_dec(properties);
-	}
-	else {
-	    nonbitmap = properties;
-	}
+        bool warn_super = ! has_user_defined_property;
+        if (cp_list) {
+
+            /* If it matters to the final outcome, see if a non-property
+             * component of the class matches above Unicode.  If so, the
+             * warning gets suppressed.  This is true even if just a single
+             * such code point is specified, as though not strictly correct if
+             * another such code point is matched against, the fact that they
+             * are using above-Unicode code points indicates they should know
+             * the issues involved */
+            if (warn_super) {
+                bool non_prop_matches_above_Unicode =
+                            runtime_posix_matches_above_Unicode
+                            | (invlist_highest(cp_list) > PERL_UNICODE_MAX);
+                if (invert) {
+                    non_prop_matches_above_Unicode =
+                                            !  non_prop_matches_above_Unicode;
+                }
+                warn_super = ! non_prop_matches_above_Unicode;
+            }
+
+            _invlist_union(properties, cp_list, &cp_list);
+            SvREFCNT_dec(properties);
+        }
+        else {
+            cp_list = properties;
+        }
+
+        if (warn_super) {
+            ANYOF_FLAGS(ret) |= ANYOF_WARN_SUPER;
+        }
     }
 
-    /* Here, <nonbitmap> contains all the code points we can determine at
-     * compile time that we haven't put into the bitmap.  Go through it, and
-     * for things that belong in the bitmap, put them there, and delete from
-     * <nonbitmap> */
-    if (nonbitmap) {
+    /* Here, we have calculated what code points should be in the character
+     * class.
+     *
+     * Now we can see about various optimizations.  Fold calculation (which we
+     * did above) needs to take place before inversion.  Otherwise /[^k]/i
+     * would invert to include K, which under /i would match k, which it
+     * shouldn't.  Therefore we can't invert folded locale now, as it won't be
+     * folded until runtime */
+
+    /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
+     * at compile time.  Besides not inverting folded locale now, we can't invert
+     * if there are things such as \w, which aren't known until runtime */
+    if (invert
+        && ! (LOC && (FOLD || (ANYOF_FLAGS(ret) & ANYOF_CLASS)))
+	&& ! depends_list
+	&& ! unicode_alternate
+	&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+    {
+        _invlist_invert(cp_list);
 
-	/* Above-ASCII code points in /d have to stay in <nonbitmap>, as they
-	 * possibly only should match when the target string is UTF-8 */
-	UV max_cp_to_set = (DEPENDS_SEMANTICS) ? 127 : 255;
+        /* Any swash can't be used as-is, because we've inverted things */
+        if (swash) {
+            SvREFCNT_dec(swash);
+            swash = NULL;
+        }
+
+	/* Clear the invert flag since have just done it here */
+	invert = FALSE;
+    }
+
+    /* If we didn't do folding, it's because some information isn't available
+     * until runtime; set the run-time fold flag for these.  (We don't have to
+     * worry about properties folding, as that is taken care of by the swash
+     * fetching) */
+    if (FOLD && (LOC || unicode_alternate))
+    {
+       ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
+    }
+
+    /* Some character classes are equivalent to other nodes.  Such nodes take
+     * up less room and generally fewer operations to execute than ANYOF nodes.
+     * Above, we checked for and optimized into some such equivalents for
+     * certain common classes that are easy to test.  Getting to this point in
+     * the code means that the class didn't get optimized there.  Since this
+     * code is only executed in Pass 2, it is too late to save space--it has
+     * been allocated in Pass 1, and currently isn't given back.  But turning
+     * things into an EXACTish node can allow the optimizer to join it to any
+     * adjacent such nodes.  And if the class is equivalent to things like /./,
+     * expensive run-time swashes can be avoided.  Now that we have more
+     * complete information, we can find things necessarily missed by the
+     * earlier code.  I (khw) am not sure how much to look for here.  It would
+     * be easy, but perhaps too slow, to check any candidates against all the
+     * node types they could possibly match using _invlistEQ(). */
+
+    if (cp_list
+        && ! unicode_alternate
+        && ! invert
+        && ! depends_list
+        && ! (ANYOF_FLAGS(ret) & ANYOF_CLASS)
+        && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+    {
+       UV start, end;
+       U8 op = END;  /* The optimzation node-type */
+        const char * cur_parse= RExC_parse;
+
+       invlist_iterinit(cp_list);
+       if (! invlist_iternext(cp_list, &start, &end)) {
+
+            /* Here, the list is empty.  This happens, for example, when a
+             * Unicode property is the only thing in the character class, and
+             * it doesn't match anything.  (perluniprops.pod notes such
+             * properties) */
+            op = OPFAIL;
+        }
+        else if (start == end) {    /* The range is a single code point */
+            if (! invlist_iternext(cp_list, &start, &end)
+
+                    /* Don't do this optimization if it would require changing
+                     * the pattern to UTF-8 */
+                && (start < 256 || UTF))
+            {
+                /* Here, the list contains a single code point.  Can optimize
+                 * into an EXACT node */
+
+                value = start;
+
+                if (! FOLD) {
+                    op = EXACT;
+                }
+                else if (LOC) {
+
+                    /* A locale node under folding with one code point can be
+                     * an EXACTFL, as its fold won't be calculated until
+                     * runtime */
+                    op = EXACTFL;
+                }
+                else {
+
+                    /* Here, we are generally folding, but there is only one
+                     * code point to match.  If we have to, we use an EXACT
+                     * node, but it would be better for joining with adjacent
+                     * nodes in the optimization pass if we used the same
+                     * EXACTFish node that any such are likely to be.  We can
+                     * do this iff the code point doesn't participate in any
+                     * folds.  For example, an EXACTF of a colon is the same as
+                     * an EXACT one, since nothing folds to or from a colon.
+                     * In the Latin1 range, being an alpha means that the
+                     * character participates in a fold (except for the
+                     * feminine and masculine ordinals, which I (khw) don't
+                     * think are worrying about optimizing for). */
+                    if (value < 256) {
+                        if (isALPHA_L1(value)) {
+                            op = EXACT;
+                        }
+                    }
+                    else {
+                        if (! PL_utf8_foldable) {
+                            SV* swash = swash_init("utf8", "_Perl_Any_Folds",
+                                                &PL_sv_undef, 1, 0);
+                            PL_utf8_foldable = _get_swash_invlist(swash);
+                            SvREFCNT_dec(swash);
+                        }
+                        if (_invlist_contains_cp(PL_utf8_foldable, value)) {
+                            op = EXACT;
+                        }
+                    }
+
+                    /* If we haven't found the node type, above, it means we
+                     * can use the prevailing one */
+                    if (op == END) {
+                        op = compute_EXACTish(pRExC_state);
+                    }
+                }
+            }
+        }
+        else if (start == 0) {
+            if (end == UV_MAX) {
+                op = SANY;
+            }
+            else if (end == '\n' - 1
+                    && invlist_iternext(cp_list, &start, &end)
+                    && start == '\n' + 1 && end == UV_MAX)
+            {
+                op = REG_ANY;
+            }
+        }
+
+        if (op != END) {
+            RExC_parse = (char *)orig_parse;
+            RExC_emit = (regnode *)orig_emit;
+
+            ret = reg_node(pRExC_state, op);
+
+            RExC_parse = (char *)cur_parse;
+
+            if (PL_regkind[op] == EXACT) {
+                alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value);
+            }
+
+            SvREFCNT_dec(listsv);
+            return ret;
+        }
+    }
+
+    /* Here, <cp_list> contains all the code points we can determine at
+     * compile time that match under all conditions.  Go through it, and
+     * for things that belong in the bitmap, put them there, and delete from
+     * <cp_list>.  While we are at it, see if everything above 255 is in the
+     * list, and if so, set a flag to speed up execution */
+    ANYOF_BITMAP_ZERO(ret);
+    if (cp_list) {
 
 	/* This gets set if we actually need to modify things */
 	bool change_invlist = FALSE;
 
 	UV start, end;
 
-	/* Start looking through <nonbitmap> */
-	invlist_iterinit(nonbitmap);
-	while (invlist_iternext(nonbitmap, &start, &end)) {
+	/* Start looking through <cp_list> */
+	invlist_iterinit(cp_list);
+	while (invlist_iternext(cp_list, &start, &end)) {
 	    UV high;
 	    int i;
 
+            if (end == UV_MAX && start <= 256) {
+                ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
+            }
+
 	    /* Quit if are above what we should change */
-	    if (start > max_cp_to_set) {
+	    if (start > 255) {
 		break;
 	    }
 
 	    change_invlist = TRUE;
 
 	    /* Set all the bits in the range, up to the max that we are doing */
-	    high = (end < max_cp_to_set) ? end : max_cp_to_set;
+	    high = (end < 255) ? end : 255;
 	    for (i = start; i <= (int) high; i++) {
 		if (! ANYOF_BITMAP_TEST(ret, i)) {
 		    ANYOF_BITMAP_SET(ret, i);
-		    stored++;
 		    prevvalue = value;
 		    value = i;
 		}
@@ -11929,249 +12591,33 @@ parseit:
 	}
 
         /* Done with loop; remove any code points that are in the bitmap from
-         * <nonbitmap> */
+         * <cp_list> */
 	if (change_invlist) {
-	    _invlist_subtract(nonbitmap,
-		              (DEPENDS_SEMANTICS)
-			        ? PL_ASCII
-			        : PL_Latin1,
-                              &nonbitmap);
+	    _invlist_subtract(cp_list, PL_Latin1, &cp_list);
 	}
 
 	/* If have completely emptied it, remove it completely */
-	if (invlist_len(nonbitmap) == 0) {
-	    SvREFCNT_dec(nonbitmap);
-	    nonbitmap = NULL;
+	if (invlist_len(cp_list) == 0) {
+	    SvREFCNT_dec(cp_list);
+	    cp_list = NULL;
 	}
     }
 
-    /* Here, we have calculated what code points should be in the character
-     * class.  <nonbitmap> does not overlap the bitmap except possibly in the
-     * case of DEPENDS rules.
-     *
-     * Now we can see about various optimizations.  Fold calculation (which we
-     * did above) needs to take place before inversion.  Otherwise /[^k]/i
-     * would invert to include K, which under /i would match k, which it
-     * shouldn't. */
-
-    /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
-     * set the FOLD flag yet, so this does optimize those.  It doesn't
-     * optimize locale.  Doing so perhaps could be done as long as there is
-     * nothing like \w in it; some thought also would have to be given to the
-     * interaction with above 0x100 chars */
-    if ((ANYOF_FLAGS(ret) & ANYOF_INVERT)
-        && ! LOC
-	&& ! unicode_alternate
-	/* In case of /d, there are some things that should match only when in
-	 * not in the bitmap, i.e., they require UTF8 to match.  These are
-	 * listed in nonbitmap, but if ANYOF_NONBITMAP_NON_UTF8 is set in this
-	 * case, they don't require UTF8, so can invert here */
-	&& (! nonbitmap
-	    || ! DEPENDS_SEMANTICS
-	    || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
-	&& SvCUR(listsv) == initial_listsv_len)
-    {
-	int i;
-	if (! nonbitmap) {
-	    for (i = 0; i < 256; ++i) {
-		if (ANYOF_BITMAP_TEST(ret, i)) {
-		    ANYOF_BITMAP_CLEAR(ret, i);
-		}
-		else {
-		    ANYOF_BITMAP_SET(ret, i);
-		    prevvalue = value;
-		    value = i;
-		}
-	    }
-	    /* The inversion means that everything above 255 is matched */
-	    ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
-	}
-	else {
-	    /* Here, also has things outside the bitmap that may overlap with
-	     * the bitmap.  We have to sync them up, so that they get inverted
-	     * in both places.  Earlier, we removed all overlaps except in the
-	     * case of /d rules, so no syncing is needed except for this case
-	     */
-	    SV *remove_list = NULL;
-
-	    if (DEPENDS_SEMANTICS) {
-		UV start, end;
-
-		/* Set the bits that correspond to the ones that aren't in the
-		 * bitmap.  Otherwise, when we invert, we'll miss these.
-		 * Earlier, we removed from the nonbitmap all code points
-		 * < 128, so there is no extra work here */
-		invlist_iterinit(nonbitmap);
-		while (invlist_iternext(nonbitmap, &start, &end)) {
-		    if (start > 255) {  /* The bit map goes to 255 */
-			break;
-		    }
-		    if (end > 255) {
-			end = 255;
-		    }
-		    for (i = start; i <= (int) end; ++i) {
-			ANYOF_BITMAP_SET(ret, i);
-			prevvalue = value;
-			value = i;
-		    }
-		}
-	    }
-
-	    /* Now invert both the bitmap and the nonbitmap.  Anything in the
-	     * bitmap has to also be removed from the non-bitmap, but again,
-	     * there should not be overlap unless is /d rules. */
-	    _invlist_invert(nonbitmap);
-
-	    /* Any swash can't be used as-is, because we've inverted things */
-	    if (swash) {
-		SvREFCNT_dec(swash);
-		swash = NULL;
-	    }
-
-	    for (i = 0; i < 256; ++i) {
-		if (ANYOF_BITMAP_TEST(ret, i)) {
-		    ANYOF_BITMAP_CLEAR(ret, i);
-		    if (DEPENDS_SEMANTICS) {
-			if (! remove_list) {
-			    remove_list = _new_invlist(2);
-			}
-			remove_list = add_cp_to_invlist(remove_list, i);
-		    }
-		}
-		else {
-		    ANYOF_BITMAP_SET(ret, i);
-		    prevvalue = value;
-		    value = i;
-		}
-	    }
-
-	    /* And do the removal */
-	    if (DEPENDS_SEMANTICS) {
-		if (remove_list) {
-		    _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
-		    SvREFCNT_dec(remove_list);
-		}
-	    }
-	    else {
-		/* There is no overlap for non-/d, so just delete anything
-		 * below 256 */
-		_invlist_intersection(nonbitmap, PL_AboveLatin1, &nonbitmap);
-	    }
-	}
-
-	stored = 256 - stored;
-
-	/* Clear the invert flag since have just done it here */
-	ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
-    }
-
-    /* Folding in the bitmap is taken care of above, but not for locale (for
-     * which we have to wait to see what folding is in effect at runtime), and
-     * for some things not in the bitmap (only the upper latin folds in this
-     * case, as all other single-char folding has been set above).  Set
-     * run-time fold flag for these */
-    if (FOLD && (LOC
-		|| (DEPENDS_SEMANTICS
-		    && nonbitmap
-		    && ! (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
-		|| unicode_alternate))
-    {
-	ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
+    if (invert) {
+        ANYOF_FLAGS(ret) |= ANYOF_INVERT;
     }
 
-    /* A single character class can be "optimized" into an EXACTish node.
-     * Note that since we don't currently count how many characters there are
-     * outside the bitmap, we are XXX missing optimization possibilities for
-     * them.  This optimization can't happen unless this is a truly single
-     * character class, which means that it can't be an inversion into a
-     * many-character class, and there must be no possibility of there being
-     * things outside the bitmap.  'stored' (only) for locales doesn't include
-     * \w, etc, so have to make a special test that they aren't present
-     *
-     * Similarly A 2-character class of the very special form like [bB] can be
-     * optimized into an EXACTFish node, but only for non-locales, and for
-     * characters which only have the two folds; so things like 'fF' and 'Ii'
-     * wouldn't work because they are part of the fold of 'LATIN SMALL LIGATURE
-     * FI'. */
-    if (! nonbitmap
-	&& ! unicode_alternate
-	&& SvCUR(listsv) == initial_listsv_len
-	&& ! (ANYOF_FLAGS(ret) & (ANYOF_INVERT|ANYOF_UNICODE_ALL))
-        && (((stored == 1 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
-                              || (! ANYOF_CLASS_TEST_ANY_SET(ret)))))
-	    || (stored == 2 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
-				 && (! _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value))
-				 /* If the latest code point has a fold whose
-				  * bit is set, it must be the only other one */
-				&& ((prevvalue = PL_fold_latin1[value]) != (IV)value)
-				 && ANYOF_BITMAP_TEST(ret, prevvalue)))))
-    {
-        /* Note that the information needed to decide to do this optimization
-         * is not currently available until the 2nd pass, and that the actually
-	 * used EXACTish node takes less space than the calculated ANYOF node,
-	 * and hence the amount of space calculated in the first pass is larger
-         * than actually used, so this optimization doesn't gain us any space.
-	 * But an EXACT node is faster than an ANYOF node, and can be combined
-	 * with any adjacent EXACT nodes later by the optimizer for further
-	 * gains.  The speed of executing an EXACTF is similar to an ANYOF
-	 * node, so the optimization advantage comes from the ability to join
-	 * it to adjacent EXACT nodes */
-
-        const char * cur_parse= RExC_parse;
-	U8 op;
-        RExC_emit = (regnode *)orig_emit;
-        RExC_parse = (char *)orig_parse;
-
-	if (stored == 1) {
-
-	    /* A locale node with one point can be folded; all the other cases
-	     * with folding will have two points, since we calculate them above
-	     */
-	    if (ANYOF_FLAGS(ret) & ANYOF_LOC_NONBITMAP_FOLD) {
-		 op = EXACTFL;
-	    }
-	    else {
-		op = EXACT;
-	    }
-	}
-	else {   /* else 2 chars in the bit map: the folds of each other */
-
-	    /* Use the folded value, which for the cases where we get here,
-	     * is just the lower case of the current one (which may resolve to
-	     * itself, or to the other one */
-	    value = toLOWER_LATIN1(value);
-
-	    /* To join adjacent nodes, they must be the exact EXACTish type.
-	     * Try to use the most likely type, by using EXACTFA if possible,
-	     * then EXACTFU if the regex calls for it, or is required because
-	     * the character is non-ASCII.  (If <value> is ASCII, its fold is
-	     * also ASCII for the cases where we get here.) */
-	    if (MORE_ASCII_RESTRICTED && isASCII(value)) {
-		op = EXACTFA;
-	    }
-	    else if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) {
-		op = EXACTFU;
-	    }
-	    else {    /* Otherwise, more likely to be EXACTF type */
-		op = EXACTF;
-	    }
-	}
-
-	ret = reg_node(pRExC_state, op);
-        RExC_parse = (char *)cur_parse;
-	if (UTF && ! NATIVE_IS_INVARIANT(value)) {
-	    *STRING(ret)= UTF8_EIGHT_BIT_HI((U8) value);
-	    *(STRING(ret) + 1)= UTF8_EIGHT_BIT_LO((U8) value);
-	    STR_LEN(ret)= 2;
-	    RExC_emit += STR_SZ(2);
+    /* Here, the bitmap has been populated with all the Latin1 code points that
+     * always match.  Can now add to the overall list those that match only
+     * when the target string is UTF-8 (<depends_list>). */
+    if (depends_list) {
+	if (cp_list) {
+	    _invlist_union(cp_list, depends_list, &cp_list);
+	    SvREFCNT_dec(depends_list);
 	}
 	else {
-	    *STRING(ret)= (char)value;
-	    STR_LEN(ret)= 1;
-	    RExC_emit += STR_SZ(1);
+	    cp_list = depends_list;
 	}
-	SvREFCNT_dec(listsv);
-        return ret;
     }
 
     /* If there is a swash and more than one element, we can't use the swash in
@@ -12180,8 +12626,9 @@ parseit:
 	SvREFCNT_dec(swash);
 	swash = NULL;
     }
-    if (! nonbitmap
-	&& SvCUR(listsv) == initial_listsv_len
+
+    if (! cp_list
+	&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
 	&& ! unicode_alternate)
     {
 	ARG_SET(ret, ANYOF_NONBITMAP_EMPTY);
@@ -12197,24 +12644,24 @@ parseit:
 	 *       swash is stored there now.
 	 * av[2] stores the multicharacter foldings, used later in
 	 *       regexec.c:S_reginclass().
-	 * av[3] stores the nonbitmap inversion list for use in addition or
-	 *       instead of av[0]; not used if av[1] isn't NULL
+	 * av[3] stores the cp_list inversion list for use in addition or
+	 *       instead of av[0]; used only if av[1] is NULL
 	 * av[4] is set if any component of the class is from a user-defined
-	 *       property; not used if av[1] isn't NULL */
+	 *       property; used only if av[1] is NULL */
 	AV * const av = newAV();
 	SV *rv;
 
-	av_store(av, 0, (SvCUR(listsv) == initial_listsv_len)
-			? &PL_sv_undef
-			: listsv);
+	av_store(av, 0, (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+			? listsv
+			: &PL_sv_undef);
 	if (swash) {
 	    av_store(av, 1, swash);
-	    SvREFCNT_dec(nonbitmap);
+	    SvREFCNT_dec(cp_list);
 	}
 	else {
 	    av_store(av, 1, NULL);
-	    if (nonbitmap) {
-		av_store(av, 3, nonbitmap);
+	    if (cp_list) {
+		av_store(av, 3, cp_list);
 		av_store(av, 4, newSVuv(has_user_defined_property));
 	    }
 	}
@@ -12237,6 +12684,7 @@ parseit:
     }
     return ret;
 }
+#undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
 
 
 /* reg_skipcomment()
@@ -12830,6 +13278,40 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 #ifdef DEBUGGING
     dVAR;
     register int k;
+
+    /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
+    static const char * const anyofs[] = {
+        "\\w",
+        "\\W",
+        "\\s",
+        "\\S",
+        "\\d",
+        "\\D",
+        "[:alnum:]",
+        "[:^alnum:]",
+        "[:alpha:]",
+        "[:^alpha:]",
+        "[:ascii:]",
+        "[:^ascii:]",
+        "[:cntrl:]",
+        "[:^cntrl:]",
+        "[:graph:]",
+        "[:^graph:]",
+        "[:lower:]",
+        "[:^lower:]",
+        "[:print:]",
+        "[:^print:]",
+        "[:punct:]",
+        "[:^punct:]",
+        "[:upper:]",
+        "[:^upper:]",
+        "[:xdigit:]",
+        "[:^xdigit:]",
+        "[:space:]",
+        "[:^space:]",
+        "[:blank:]",
+        "[:^blank:]"
+    };
     RXi_GET_DECL(prog,progi);
     GET_RE_DEBUG_FLAGS_DECL;
     
@@ -12950,39 +13432,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 	const U8 flags = ANYOF_FLAGS(o);
 	int do_sep = 0;
 
-	/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
-	static const char * const anyofs[] = {
-	    "\\w",
-	    "\\W",
-	    "\\s",
-	    "\\S",
-	    "\\d",
-	    "\\D",
-	    "[:alnum:]",
-	    "[:^alnum:]",
-	    "[:alpha:]",
-	    "[:^alpha:]",
-	    "[:ascii:]",
-	    "[:^ascii:]",
-	    "[:cntrl:]",
-	    "[:^cntrl:]",
-	    "[:graph:]",
-	    "[:^graph:]",
-	    "[:lower:]",
-	    "[:^lower:]",
-	    "[:print:]",
-	    "[:^print:]",
-	    "[:punct:]",
-	    "[:^punct:]",
-	    "[:upper:]",
-	    "[:^upper:]",
-	    "[:xdigit:]",
-	    "[:^xdigit:]",
-	    "[:space:]",
-	    "[:^space:]",
-	    "[:blank:]",
-	    "[:^blank:]"
-	};
 
 	if (flags & ANYOF_LOCALE)
 	    sv_catpvs(sv, "{loc}");
@@ -13120,6 +13569,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 
 	Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
     }
+    else if (k == POSIXD) {
+        U8 index = FLAGS(o) * 2;
+        if (index > (sizeof(anyofs) / sizeof(anyofs[0]))) {
+            Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
+        }
+        else {
+            sv_catpv(sv, anyofs[index]);
+        }
+    }
     else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
 	Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
 #else
@@ -13230,7 +13688,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
 {
     struct regexp *ret;
     struct regexp *const r = (struct regexp *)SvANY(rx);
-    register const I32 npar = r->nparens+1;
 
     PERL_ARGS_ASSERT_REG_TEMP_COPY;
 
@@ -13250,8 +13707,11 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
     SvLEN_set(ret_x, 0);
     SvSTASH_set(ret_x, NULL);
     SvMAGIC_set(ret_x, NULL);
-    Newx(ret->offs, npar, regexp_paren_pair);
-    Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    if (r->offs) {
+        const I32 npar = r->nparens+1;
+        Newx(ret->offs, npar, regexp_paren_pair);
+        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    }
     if (r->substrs) {
         Newx(ret->substrs, 1, struct reg_substr_data);
 	StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
@@ -13321,9 +13781,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
 
     if (ri->data) {
 	int n = ri->data->count;
-	PAD* new_comppad = NULL;
-	PAD* old_comppad;
-	PADOFFSET refcnt;
 
 	while (--n >= 0) {
           /* If you add a ->what type here, update the comment in regcomp.h */
@@ -13338,29 +13795,8 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
 	    case 'f':
 		Safefree(ri->data->data[n]);
 		break;
-	    case 'p':
-		new_comppad = MUTABLE_AV(ri->data->data[n]);
-		break;
-	    case 'o':
-		if (new_comppad == NULL)
-		    Perl_croak(aTHX_ "panic: pregfree comppad");
-		PAD_SAVE_LOCAL(old_comppad,
-		    /* Watch out for global destruction's random ordering. */
-		    (SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL
-		);
-		OP_REFCNT_LOCK;
-		refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]);
-		OP_REFCNT_UNLOCK;
-		if (!refcnt)
-                    op_free((OP_4tree*)ri->data->data[n]);
-
-		PAD_RESTORE_LOCAL(old_comppad);
-		SvREFCNT_dec(MUTABLE_SV(new_comppad));
-		new_comppad = NULL;
-		break;
 	    case 'l':
 	    case 'L':
-	    case 'n':
 	        break;
             case 'T':	        
                 { /* Aho Corasick add-on structure for a trie node.
@@ -13579,13 +14015,11 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
 	for (i = 0; i < count; i++) {
 	    d->what[i] = ri->data->what[i];
 	    switch (d->what[i]) {
-	        /* legal options are one of: sSfpontTua
-	           see also regcomp.h and pregfree() */
+	        /* see also regcomp.h and regfree_internal() */
 	    case 'a': /* actually an AV, but the dup function is identical.  */
 	    case 'r':
 	    case 's':
 	    case 'S':
-	    case 'p': /* actually an AV, but the dup function is identical.  */
 	    case 'u': /* actually an HV, but the dup function is identical.  */
 		d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
 		break;
@@ -13596,13 +14030,6 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
 			    struct regnode_charclass_class);
 		reti->regstclass = (regnode*)d->data[i];
 		break;
-	    case 'o':
-		/* Compiled op trees are readonly and in shared memory,
-		   and can thus be shared without duplication. */
-		OP_REFCNT_LOCK;
-		d->data[i] = (void*)OpREFCNT_inc((OP*)ri->data->data[i]);
-		OP_REFCNT_UNLOCK;
-		break;
 	    case 'T':
 		/* Trie stclasses are readonly and can thus be shared
 		 * without duplication. We free the stclass in pregfree
@@ -13617,7 +14044,6 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
 		/* Fall through */
 	    case 'l':
 	    case 'L':
-	    case 'n':
 		d->data[i] = ri->data->data[i];
 		break;
             default:
@@ -13727,8 +14153,6 @@ Perl_save_re_context(pTHX)
 
     Copy(&PL_reg_state, state, 1, struct re_save_state);
 
-    PL_reg_start_tmp = 0;
-    PL_reg_start_tmpl = 0;
     PL_reg_oldsaved = NULL;
     PL_reg_oldsavedlen = 0;
     PL_reg_maxiter = 0;