This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Change and comment a problematic #define
[perl5.git] / regcomp.c
index b106cc1..e2218bd 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -90,6 +90,8 @@
 #  include "charclass_invlists.h"
 #endif
 
+#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
+
 #ifdef op
 #undef op
 #endif /* op */
 #define        STATIC  static
 #endif
 
+
 typedef struct RExC_state_t {
-    U32                flags;                  /* are we folding, multilining? */
+    U32                flags;                  /* RXf_* are we folding, multilining? */
+    U32                pm_flags;               /* PMf_* stuff from the calling PMOP */
     char       *precomp;               /* uncompiled string. */
     REGEXP     *rx_sv;                 /* The SV that is the regexp. */
     regexp     *rx;                    /* perl core regexp structure */
@@ -129,7 +133,6 @@ typedef struct RExC_state_t {
     I32                nestroot;               /* root parens we are in - used by accept */
     I32                extralen;
     I32                seen_zerolen;
-    I32                seen_evals;
     regnode    **open_parens;          /* pointers to open parens */
     regnode    **close_parens;         /* pointers to close parens */
     regnode    *opend;                 /* END node in program */
@@ -147,10 +150,15 @@ typedef struct RExC_state_t {
     I32                in_lookbehind;
     I32                contains_locale;
     I32                override_recoding;
+    struct reg_code_block *code_blocks;        /* positions of literal (?{})
+                                           within pattern */
+    int                num_code_blocks;        /* size of code_blocks[] */
+    int                code_index;             /* next code_blocks[] slot */
 #if ADD_TO_REGEXEC
     char       *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry  (pRExC_state->starttry)
 #endif
+    SV         *runtime_code_qr;       /* qr with the runtime code blocks */
 #ifdef DEBUGGING
     const char  *lastparse;
     I32         lastnum;
@@ -162,6 +170,7 @@ typedef struct RExC_state_t {
 } RExC_state_t;
 
 #define RExC_flags     (pRExC_state->flags)
+#define RExC_pm_flags  (pRExC_state->pm_flags)
 #define RExC_precomp   (pRExC_state->precomp)
 #define RExC_rx_sv     (pRExC_state->rx_sv)
 #define RExC_rx                (pRExC_state->rx)
@@ -184,7 +193,6 @@ typedef struct RExC_state_t {
 #define RExC_nestroot   (pRExC_state->nestroot)
 #define RExC_extralen  (pRExC_state->extralen)
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
-#define RExC_seen_evals        (pRExC_state->seen_evals)
 #define RExC_utf8      (pRExC_state->utf8)
 #define RExC_uni_semantics     (pRExC_state->uni_semantics)
 #define RExC_orig_utf8 (pRExC_state->orig_utf8)
@@ -213,7 +221,8 @@ typedef struct RExC_state_t {
 #define        HASWIDTH        0x01    /* Known to match non-null strings. */
 
 /* Simple enough to be STAR/PLUS operand, in an EXACT node must be a single
- * character, and if utf8, must be invariant.  Note that this is not the same thing as REGNODE_SIMPLE */
+ * character, and if utf8, must be invariant.  Note that this is not the same
+ * thing as REGNODE_SIMPLE */
 #define        SIMPLE          0x02
 #define        SPSTART         0x04    /* Starts with * or +. */
 #define TRYAGAIN       0x08    /* Weeded out a declaration. */
@@ -394,9 +403,13 @@ static const scan_data_t zero_scan_data =
 
 #define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
 
-#define OOB_UNICODE            12345678
 #define OOB_NAMEDCLASS         -1
 
+/* There is no code point that is out-of-bounds, so this is problematic.  But
+ * its only current use is to initialize a variable that is always set before
+ * looked at. */
+#define OOB_UNICODE            0xDEADBEEF
+
 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
 
@@ -1552,7 +1565,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     if (!SvIOK(re_trie_maxbuff)) {
         sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
     }
-    DEBUG_OPTIMISE_r({
+    DEBUG_TRIE_COMPILE_r({
                 PerlIO_printf( Perl_debug_log,
                   "%*smake_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
                   (int)depth * 2 + 2, "", 
@@ -1594,9 +1607,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
      */
 
     for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
-        regnode * const noper = NEXTOPER( cur );
+        regnode *noper = NEXTOPER( cur );
         const U8 *uc = (U8*)STRING( noper );
-        const U8 * const e  = uc + STR_LEN( noper );
+        const U8 *e  = uc + STR_LEN( noper );
         STRLEN foldlen = 0;
         U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
         STRLEN skiplen = 0;
@@ -1606,9 +1619,18 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         bool set_bit = trie->bitmap ? 1 : 0; /*store the first char in the bitmap?*/
 
         if (OP(noper) == NOTHING) {
-            trie->minlen= 0;
-            continue;
+            regnode *noper_next= regnext(noper);
+            if (noper_next != tail && OP(noper_next) == flags) {
+                noper = noper_next;
+                uc= (U8*)STRING(noper);
+                e= uc + STR_LEN(noper);
+               trie->minlen= STR_LEN(noper);
+            } else {
+               trie->minlen= 0;
+               continue;
+           }
         }
+
         if ( set_bit ) { /* bitmap only alloced when !(UTF&&Folding) */
             TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte
                                           regardless of encoding */
@@ -1747,9 +1769,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
 
-           regnode * const noper = NEXTOPER( cur );
+            regnode *noper   = NEXTOPER( cur );
            U8 *uc           = (U8*)STRING( noper );
-           const U8 * const e = uc + STR_LEN( noper );
+            const U8 *e      = uc + STR_LEN( noper );
            U32 state        = 1;         /* required init */
            U16 charid       = 0;         /* sanity init */
            U8 *scan         = (U8*)NULL; /* sanity init */
@@ -1758,6 +1780,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
            U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
             STRLEN skiplen   = 0;
 
+            if (OP(noper) == NOTHING) {
+                regnode *noper_next= regnext(noper);
+                if (noper_next != tail && OP(noper_next) == flags) {
+                    noper = noper_next;
+                    uc= (U8*)STRING(noper);
+                    e= uc + STR_LEN(noper);
+                }
+            }
+
             if (OP(noper) != NOTHING) {
                 for ( ; uc < e ; uc += len ) {
 
@@ -1945,9 +1976,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
 
-           regnode * const noper   = NEXTOPER( cur );
+            regnode *noper   = NEXTOPER( cur );
            const U8 *uc     = (U8*)STRING( noper );
-           const U8 * const e = uc + STR_LEN( noper );
+            const U8 *e      = uc + STR_LEN( noper );
 
             U32 state        = 1;         /* required init */
 
@@ -1960,6 +1991,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
             STRLEN skiplen   = 0;
             U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
 
+            if (OP(noper) == NOTHING) {
+                regnode *noper_next= regnext(noper);
+                if (noper_next != tail && OP(noper_next) == flags) {
+                    noper = noper_next;
+                    uc= (U8*)STRING(noper);
+                    e= uc + STR_LEN(noper);
+                }
+            }
 
             if ( OP(noper) != NOTHING ) {
                 for ( ; uc < e ; uc += len ) {
@@ -3234,7 +3273,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         }
 
                         
-                        DEBUG_OPTIMISE_r({
+                        DEBUG_TRIE_COMPILE_r({
                             regprop(RExC_rx, mysv, tail );
                             PerlIO_printf( Perl_debug_log, "%*s%s%s\n",
                                 (int)depth * 2 + 2, "", 
@@ -3300,9 +3339,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                             U8 noper_trietype = TRIE_TYPE( noper_type );
 #if defined(DEBUGGING) || defined(NOJUMPTRIE)
                             regnode * const noper_next = regnext( noper );
+                           U8 noper_next_type = (noper_next && noper_next != tail) ? OP(noper_next) : 0;
+                           U8 noper_next_trietype = (noper_next && noper_next != tail) ? TRIE_TYPE( noper_next_type ) :0;
 #endif
 
-                            DEBUG_OPTIMISE_r({
+                            DEBUG_TRIE_COMPILE_r({
                                 regprop(RExC_rx, mysv, cur);
                                 PerlIO_printf( Perl_debug_log, "%*s- %s (%d)",
                                    (int)depth * 2 + 2,"", SvPV_nolen_const( mysv ), REG_NODE_NUM(cur) );
@@ -3316,8 +3357,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                   PerlIO_printf( Perl_debug_log,"\t=> %s\t",
                                     SvPV_nolen_const(mysv));
                                 }
-                                PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d)\n",
-                                   REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur) );
+                                PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d,tt==%s,nt==%s,nnt==%s)\n",
+                                   REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
+                                  PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype] 
+                               );
                             });
 
                             /* Is noper a trieable nodetype that can be merged with the
@@ -3325,22 +3368,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                             if ( noper_trietype
                                   &&
                                   (
-                                        /* XXX: Currently we cannot allow a NOTHING node to be the first element
-                                         * of a TRIEABLE sequence, Otherwise we will overwrite the regop following
-                                         * the NOTHING with the TRIE regop later on. This is because a NOTHING node
-                                         * is only one regnode wide, and a TRIE is two regnodes. An example of a
-                                         * problematic pattern is: "x" =~ /\A(?>(?:(?:)A|B|C?x))\z/
-                                         * At a later point of time we can somewhat workaround this by handling
-                                         * NOTHING -> EXACT sequences as generated by /(?:)A|(?:)B/ type patterns,
-                                         * as we can effectively ignore the NOTHING regop in that case.
-                                         * This clause, which allows NOTHING to start a sequence is left commented
-                                         * out as a reference.
-                                         * - Yves
-
-                                           ( noper_trietype == NOTHING)
-                                           || ( trietype == NOTHING )
-                                        */
-                                        ( noper_trietype == NOTHING && trietype )
+                                        ( noper_trietype == NOTHING)
+                                        || ( trietype == NOTHING )
                                         || ( trietype == noper_trietype )
                                   )
 #ifdef NOJUMPTRIE
@@ -3352,15 +3381,32 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  * Either we are the first node in a new trieable sequence,
                                  * in which case we do some bookkeeping, otherwise we update
                                  * the end pointer. */
-                                count++;
                                 if ( !first ) {
                                     first = cur;
-                                    trietype = noper_trietype;
+                                   if ( noper_trietype == NOTHING ) {
+#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
+                                       regnode * const noper_next = regnext( noper );
+                                        U8 noper_next_type = (noper_next && noper_next!=tail) ? OP(noper_next) : 0;
+                                       U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
+#endif
+
+                                        if ( noper_next_trietype ) {
+                                           trietype = noper_next_trietype;
+                                        } else if (noper_next_type)  {
+                                            /* a NOTHING regop is 1 regop wide. We need at least two
+                                             * for a trie so we can't merge this in */
+                                            first = NULL;
+                                        }
+                                    } else {
+                                        trietype = noper_trietype;
+                                    }
                                 } else {
                                     if ( trietype == NOTHING )
                                         trietype = noper_trietype;
                                     last = cur;
                                 }
+                               if (first)
+                                   count++;
                             } /* end handle mergable triable node */
                             else {
                                 /* handle unmergable node -
@@ -3396,31 +3442,53 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                 }
                             } /* end handle unmergable node */
                         } /* loop over branches */
-                        DEBUG_OPTIMISE_r({
+                        DEBUG_TRIE_COMPILE_r({
                             regprop(RExC_rx, mysv, cur);
                             PerlIO_printf( Perl_debug_log,
                               "%*s- %s (%d) <SCAN FINISHED>\n", (int)depth * 2 + 2,
                               "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
 
                         });
-                        if ( last && trietype != NOTHING ) {
-                            /* the last branch of the sequence was part of a trie,
-                             * so we have to construct it here outside of the loop
-                             */
-                            made= make_trie( pRExC_state, startbranch, first, scan, tail, count, trietype, depth+1 );
+                        if ( last ) {
+                            if ( trietype != NOTHING ) {
+                                /* the last branch of the sequence was part of a trie,
+                                 * so we have to construct it here outside of the loop
+                                 */
+                                made= make_trie( pRExC_state, startbranch, first, scan, tail, count, trietype, depth+1 );
 #ifdef TRIE_STUDY_OPT
-                            if ( ((made == MADE_EXACT_TRIE && 
-                                 startbranch == first) 
-                                 || ( first_non_open == first )) && 
-                                 depth==0 ) {
-                                flags |= SCF_TRIE_RESTUDY;
-                                if ( startbranch == first 
-                                     && scan == tail ) 
-                                {
-                                    RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
+                                if ( ((made == MADE_EXACT_TRIE &&
+                                     startbranch == first)
+                                     || ( first_non_open == first )) &&
+                                     depth==0 ) {
+                                    flags |= SCF_TRIE_RESTUDY;
+                                    if ( startbranch == first
+                                         && scan == tail )
+                                    {
+                                        RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
+                                    }
                                 }
-                            }
 #endif
+                            } else {
+                                /* at this point we know whatever we have is a NOTHING sequence/branch
+                                 * AND if 'startbranch' is 'first' then we can turn the whole thing into a NOTHING
+                                 */
+                                if ( startbranch == first ) {
+                                    regnode *opt;
+                                    /* the entire thing is a NOTHING sequence, something like this:
+                                     * (?:|) So we can turn it into a plain NOTHING op. */
+                                    DEBUG_TRIE_COMPILE_r({
+                                        regprop(RExC_rx, mysv, cur);
+                                        PerlIO_printf( Perl_debug_log,
+                                          "%*s- %s (%d) <NOTHING BRANCH SEQUENCE>\n", (int)depth * 2 + 2,
+                                          "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
+
+                                    });
+                                    OP(startbranch)= NOTHING;
+                                    NEXT_OFF(startbranch)= tail - startbranch;
+                                    for ( opt= startbranch + 1; opt < tail ; opt++ )
+                                        OP(opt)= OPTIMIZED;
+                                }
+                            }
                         } /* end if ( last) */
                     } /* TRIE_MAXBUF is non zero */
                     
@@ -3559,7 +3627,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     * elsewhere that does compute the fold settles down, it
                     * can be extracted out and re-used here */
                    for (i = 0; i < 256; i++){
-                       if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)) {
+                       if (HAS_NONLATIN1_FOLD_CLOSURE(i)) {
                            ANYOF_BITMAP_SET(data->start_class, i);
                        }
                    }
@@ -4340,11 +4408,39 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            data->flags |= (OP(scan) == MEOL
                            ? SF_BEFORE_MEOL
                            : SF_BEFORE_SEOL);
+           SCAN_COMMIT(pRExC_state, data, minlenp);
+
        }
        else if (  PL_regkind[OP(scan)] == BRANCHJ
                 /* Lookbehind, or need to calculate parens/evals/stclass: */
                   && (scan->flags || data || (flags & SCF_DO_STCLASS))
                   && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
+            if ( OP(scan) == UNLESSM &&
+                 scan->flags == 0 &&
+                 OP(NEXTOPER(NEXTOPER(scan))) == NOTHING &&
+                 OP(regnext(NEXTOPER(NEXTOPER(scan)))) == SUCCEED
+            ) {
+                regnode *opt;
+                regnode *upto= regnext(scan);
+                DEBUG_PARSE_r({
+                    SV * const mysv_val=sv_newmortal();
+                    DEBUG_STUDYDATA("OPFAIL",data,depth);
+
+                    /*DEBUG_PARSE_MSG("opfail");*/
+                    regprop(RExC_rx, mysv_val, upto);
+                    PerlIO_printf(Perl_debug_log, "~ replace with OPFAIL pointed at %s (%"IVdf") offset %"IVdf"\n",
+                                  SvPV_nolen_const(mysv_val),
+                                  (IV)REG_NODE_NUM(upto),
+                                  (IV)(upto - scan)
+                    );
+                });
+                OP(scan) = OPFAIL;
+                NEXT_OFF(scan) = upto - scan;
+                for (opt= scan + 1; opt < upto ; opt++)
+                    OP(opt) = OPTIMIZED;
+                scan= upto;
+                continue;
+            }
             if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY 
                 || OP(scan) == UNLESSM )
             {
@@ -4509,8 +4605,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         }
                     }
                 }
-
-
            }
 #endif
        }
@@ -4811,71 +4905,359 @@ Perl_reginitcolors(pTHX)
 #endif        
 
 /*
- pregcomp - compile a regular expression into internal code
* pregcomp - compile a regular expression into internal code
  *
- * We can't allocate space until we know how big the compiled form will be,
- * but we can't compile it (and thus know how big it is) until we've got a
- * place to put the code.  So we cheat:  we compile it twice, once with code
- * generation turned off and size counting turned on, and once "for real".
- * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the
- * code and thus invalidate pointers into it.  (Note that it has to be in
- * one piece because free() must be able to free it all.) [NB: not true in perl]
- *
- * Beware that the optimization-preparation code in here knows about some
- * of the structure of the compiled regexp.  [I'll say.]
+ * Decides which engine's compiler to call based on the hint currently in
+ * scope
  */
 
+#ifndef PERL_IN_XSUB_RE 
 
+/* return the currently in-scope regex engine (or the default if none)  */
+
+regexp_engine const *
+Perl_current_re_engine(pTHX)
+{
+    dVAR;
+
+    if (IN_PERL_COMPILETIME) {
+       HV * const table = GvHV(PL_hintgv);
+       SV **ptr;
+
+       if (!table)
+           return &PL_core_reg_engine;
+       ptr = hv_fetchs(table, "regcomp", FALSE);
+       if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr)))
+           return &PL_core_reg_engine;
+       return INT2PTR(regexp_engine*,SvIV(*ptr));
+    }
+    else {
+       SV *ptr;
+       if (!PL_curcop->cop_hints_hash)
+           return &PL_core_reg_engine;
+       ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0);
+       if ( !(ptr && SvIOK(ptr) && SvIV(ptr)))
+           return &PL_core_reg_engine;
+       return INT2PTR(regexp_engine*,SvIV(ptr));
+    }
+}
 
-#ifndef PERL_IN_XSUB_RE
-#define RE_ENGINE_PTR &PL_core_reg_engine
-#else
-extern const struct regexp_engine my_reg_engine;
-#define RE_ENGINE_PTR &my_reg_engine
-#endif
 
-#ifndef PERL_IN_XSUB_RE 
 REGEXP *
 Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 {
     dVAR;
-    HV * const table = GvHV(PL_hintgv);
+    regexp_engine const *eng = current_re_engine();
+    GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_PREGCOMP;
 
-    /* Dispatch a request to compile a regexp to correct 
-       regexp engine. */
-    if (table) {
-        SV **ptr= hv_fetchs(table, "regcomp", FALSE);
-        GET_RE_DEBUG_FLAGS_DECL;
-        if (ptr && SvIOK(*ptr) && SvIV(*ptr)) {
-            const regexp_engine *eng=INT2PTR(regexp_engine*,SvIV(*ptr));
-            DEBUG_COMPILE_r({
-                PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
-                    SvIV(*ptr));
-            });            
-            return CALLREGCOMP_ENG(eng, pattern, flags);
-        } 
-    }
-    return Perl_re_compile(aTHX_ pattern, flags);
+    /* Dispatch a request to compile a regexp to correct regexp engine. */
+    DEBUG_COMPILE_r({
+       PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
+                       PTR2UV(eng));
+    });
+    return CALLREGCOMP_ENG(eng, pattern, flags);
 }
 #endif
 
+/* public(ish) wrapper for Perl_re_op_compile that only takes an SV
+ * pattern rather than a list of OPs */
+
+REGEXP *
+Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
+{
+    SV *pat = pattern; /* defeat constness! */
+    PERL_ARGS_ASSERT_RE_COMPILE;
+    return Perl_re_op_compile(aTHX_ &pat, 1, NULL, current_re_engine(),
+                               NULL, NULL, rx_flags, 0);
+}
+
+/* see if there are any run-time code blocks in the pattern.
+ * False positives are allowed */
+
+static bool
+S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
+                   U32 pm_flags, char *pat, STRLEN plen)
+{
+    int n = 0;
+    STRLEN s;
+
+    /* avoid infinitely recursing when we recompile the pattern parcelled up
+     * as qr'...'. A single constant qr// string can't have have any
+     * run-time component in it, and thus, no runtime code. (A non-qr
+     * string, however, can, e.g. $x =~ '(?{})') */
+    if  ((pm_flags & PMf_IS_QR) && expr && expr->op_type == OP_CONST)
+       return 0;
+
+    for (s = 0; s < plen; s++) {
+       if (n < pRExC_state->num_code_blocks
+           && s == pRExC_state->code_blocks[n].start)
+       {
+           s = pRExC_state->code_blocks[n].end;
+           n++;
+           continue;
+       }
+       /* TODO ideally should handle [..], (#..), /#.../x to reduce false
+        * positives here */
+       if (pat[s] == '(' && pat[s+1] == '?' &&
+           (pat[s+2] == '{' || (pat[s+2] == '?' && pat[s+3] == '{'))
+       )
+           return 1;
+    }
+    return 0;
+}
+
+/* Handle run-time code blocks. We will already have compiled any direct
+ * or indirect literal code blocks. Now, take the pattern 'pat' and make a
+ * copy of it, but with any literal code blocks blanked out and
+ * appropriate chars escaped; then feed it into
+ *
+ *    eval "qr'modified_pattern'"
+ *
+ * For example,
+ *
+ *       a\bc(?{"this was literal"})def'ghi\\jkl(?{"this is runtime"})mno
+ *
+ * becomes
+ *
+ *    qr'a\\bc                       def\'ghi\\\\jkl(?{"this is runtime"})mno'
+ *
+ * After eval_sv()-ing that, grab any new code blocks from the returned qr
+ * and merge them with any code blocks of the original regexp.
+ *
+ * If the pat is non-UTF8, while the evalled qr is UTF8, don't merge;
+ * instead, just save the qr and return FALSE; this tells our caller that
+ * the original pattern needs upgrading to utf8.
+ */
+
+static bool
+S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
+    char *pat, STRLEN plen)
+{
+    SV *qr;
+
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    if (pRExC_state->runtime_code_qr) {
+       /* this is the second time we've been called; this should
+        * only happen if the main pattern got upgraded to utf8
+        * during compilation; re-use the qr we compiled first time
+        * round (which should be utf8 too)
+        */
+       qr = pRExC_state->runtime_code_qr;
+       pRExC_state->runtime_code_qr = NULL;
+       assert(RExC_utf8 && SvUTF8(qr));
+    }
+    else {
+       int n = 0;
+       STRLEN s;
+       char *p, *newpat;
+       int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
+       SV *sv, *qr_ref;
+       dSP;
+
+       /* determine how many extra chars we need for ' and \ escaping */
+       for (s = 0; s < plen; s++) {
+           if (pat[s] == '\'' || pat[s] == '\\')
+               newlen++;
+       }
+
+       Newx(newpat, newlen, char);
+       p = newpat;
+       *p++ = 'q'; *p++ = 'r'; *p++ = '\'';
+
+       for (s = 0; s < plen; s++) {
+           if (n < pRExC_state->num_code_blocks
+               && s == pRExC_state->code_blocks[n].start)
+           {
+               /* blank out literal code block */
+               assert(pat[s] == '(');
+               while (s <= pRExC_state->code_blocks[n].end) {
+                   *p++ = ' ';
+                   s++;
+               }
+               s--;
+               n++;
+               continue;
+           }
+           if (pat[s] == '\'' || pat[s] == '\\')
+               *p++ = '\\';
+           *p++ = pat[s];
+       }
+       *p++ = '\'';
+       if (pRExC_state->pm_flags & RXf_PMf_EXTENDED)
+           *p++ = 'x';
+       *p++ = '\0';
+       DEBUG_COMPILE_r({
+           PerlIO_printf(Perl_debug_log,
+               "%sre-parsing pattern for runtime code:%s %s\n",
+               PL_colors[4],PL_colors[5],newpat);
+       });
+
+       sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0);
+       Safefree(newpat);
+
+       ENTER;
+       SAVETMPS;
+       save_re_context();
+       PUSHSTACKi(PERLSI_REQUIRE);
+       /* this causes the toker to collapse \\ into \ when parsing
+        * qr''; normally only q'' does this. It also alters hints
+        * handling */
+       PL_reg_state.re_reparsing = TRUE;
+       eval_sv(sv, G_SCALAR);
+       SvREFCNT_dec(sv);
+       SPAGAIN;
+       qr_ref = POPs;
+       PUTBACK;
+       if (SvTRUE(ERRSV))
+           Perl_croak(aTHX_ "%s", SvPVx_nolen_const(ERRSV));
+       assert(SvROK(qr_ref));
+       qr = SvRV(qr_ref);
+       assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp);
+       /* the leaving below frees the tmp qr_ref.
+        * Give qr a life of its own */
+       SvREFCNT_inc(qr);
+       POPSTACK;
+       FREETMPS;
+       LEAVE;
+
+    }
+
+    if (!RExC_utf8 && SvUTF8(qr)) {
+       /* first time through; the pattern got upgraded; save the
+        * qr for the next time through */
+       assert(!pRExC_state->runtime_code_qr);
+       pRExC_state->runtime_code_qr = qr;
+       return 0;
+    }
+
+
+    /* extract any code blocks within the returned qr//  */
+
+
+    /* merge the main (r1) and run-time (r2) code blocks into one */
+    {
+       RXi_GET_DECL(((struct regexp*)SvANY(qr)), r2);
+       struct reg_code_block *new_block, *dst;
+       RExC_state_t * const r1 = pRExC_state; /* convenient alias */
+       int i1 = 0, i2 = 0;
+
+       if (!r2->num_code_blocks) /* we guessed wrong */
+           return 1;
+
+       Newx(new_block,
+           r1->num_code_blocks + r2->num_code_blocks,
+           struct reg_code_block);
+       dst = new_block;
+
+       while (    i1 < r1->num_code_blocks
+               || i2 < r2->num_code_blocks)
+       {
+           struct reg_code_block *src;
+           bool is_qr = 0;
+
+           if (i1 == r1->num_code_blocks) {
+               src = &r2->code_blocks[i2++];
+               is_qr = 1;
+           }
+           else if (i2 == r2->num_code_blocks)
+               src = &r1->code_blocks[i1++];
+           else if (  r1->code_blocks[i1].start
+                    < r2->code_blocks[i2].start)
+           {
+               src = &r1->code_blocks[i1++];
+               assert(src->end < r2->code_blocks[i2].start);
+           }
+           else {
+               assert(  r1->code_blocks[i1].start
+                      > r2->code_blocks[i2].start);
+               src = &r2->code_blocks[i2++];
+               is_qr = 1;
+               assert(src->end < r1->code_blocks[i1].start);
+           }
+
+           assert(pat[src->start] == '(');
+           assert(pat[src->end]   == ')');
+           dst->start      = src->start;
+           dst->end        = src->end;
+           dst->block      = src->block;
+           dst->src_regex  = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr)
+                                   : src->src_regex;
+           dst++;
+       }
+       r1->num_code_blocks += r2->num_code_blocks;
+       Safefree(r1->code_blocks);
+       r1->code_blocks = new_block;
+    }
+
+    SvREFCNT_dec(qr);
+    return 1;
+}
+
+
+/*
+ * Perl_re_op_compile - the perl internal RE engine's function to compile a
+ * regular expression into internal code.
+ * The pattern may be passed either as:
+ *    a list of SVs (patternp plus pat_count)
+ *    a list of OPs (expr)
+ * If both are passed, the SV list is used, but the OP list indicates
+ * which SVs are actually pre-compiled code blocks
+ *
+ * The SVs in the list have magic and qr overloading applied to them (and
+ * the list may be modified in-place with replacement SVs in the latter
+ * case).
+ *
+ * If the pattern hasn't changed from old_re, then old_re will be
+ * returned.
+ *
+ * eng is the current engine. If that engine has an op_comp method, then
+ * handle directly (i.e. we assume that op_comp was us); otherwise, just
+ * do the initial concatenation of arguments and pass on to the external
+ * engine.
+ *
+ * If is_bare_re is not null, set it to a boolean indicating whether the
+ * arg list reduced (after overloading) to a single bare regex which has
+ * been returned (i.e. /$qr/).
+ *
+ * orig_rx_flags contains RXf_* flags. See perlreapi.pod for more details.
+ *
+ * pm_flags contains the PMf_* flags, typically based on those from the
+ * pm_flags field of the related PMOP. Currently we're only interested in
+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
+ *
+ * We can't allocate space until we know how big the compiled form will be,
+ * but we can't compile it (and thus know how big it is) until we've got a
+ * place to put the code.  So we cheat:  we compile it twice, once with code
+ * generation turned off and size counting turned on, and once "for real".
+ * This also means that we don't allocate space until we are sure that the
+ * thing really will compile successfully, and we never have to move the
+ * code and thus invalidate pointers into it.  (Note that it has to be in
+ * one piece because free() must be able to free it all.) [NB: not true in perl]
+ *
+ * Beware that the optimization-preparation code in here knows about some
+ * of the structure of the compiled regexp.  [I'll say.]
+ */
+
 REGEXP *
-Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
+Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
+                   OP *expr, const regexp_engine* eng, REGEXP *VOL old_re,
+                    bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
 {
     dVAR;
     REGEXP *rx;
     struct regexp *r;
     register regexp_internal *ri;
     STRLEN plen;
-    char* VOL exp;
+    char  * VOL exp;
     char* xend;
     regnode *scan;
     I32 flags;
     I32 minlen = 0;
-    U32 pm_flags;
+    U32 rx_flags;
+    SV * VOL pat;
 
     /* these are all flags - maybe they should be turned
      * into a single int with different bit masks */
@@ -4883,8 +5265,10 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     I32 sawplus = 0;
     I32 sawopen = 0;
     bool used_setjump = FALSE;
-    regex_charset initial_charset = get_regex_charset(orig_pm_flags);
-
+    regex_charset initial_charset = get_regex_charset(orig_rx_flags);
+    bool code_is_utf8 = 0;
+    bool VOL recompile = 0;
+    bool runtime_code = 0;
     U8 jump_ret = 0;
     dJMPENV;
     scan_data_t data;
@@ -4896,7 +5280,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 #endif    
     GET_RE_DEBUG_FLAGS_DECL;
 
-    PERL_ARGS_ASSERT_RE_COMPILE;
+    PERL_ARGS_ASSERT_RE_OP_COMPILE;
 
     DEBUG_r(if (!PL_colorset) reginitcolors());
 
@@ -4958,16 +5342,255 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     }
 #endif
 
-    exp = SvPV(pattern, plen);
+    pRExC_state->code_blocks = NULL;
+    pRExC_state->num_code_blocks = 0;
+
+    if (is_bare_re)
+       *is_bare_re = FALSE;
+
+    if (expr && (expr->op_type == OP_LIST ||
+               (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
+
+       /* is the source UTF8, and how many code blocks are there? */
+       OP *o;
+       int ncode = 0;
+
+       for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+           if (o->op_type == OP_CONST && SvUTF8(cSVOPo_sv))
+               code_is_utf8 = 1;
+           else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+               /* count of DO blocks */
+               ncode++;
+       }
+       if (ncode) {
+           pRExC_state->num_code_blocks = ncode;
+           Newx(pRExC_state->code_blocks, ncode, struct reg_code_block);
+       }
+    }
+
+    if (pat_count) {
+       /* handle a list of SVs */
+
+       SV **svp;
+
+       /* apply magic and RE overloading to each arg */
+       for (svp = patternp; svp < patternp + pat_count; svp++) {
+           SV *rx = *svp;
+           SvGETMAGIC(rx);
+           if (SvROK(rx) && SvAMAGIC(rx)) {
+               SV *sv = AMG_CALLunary(rx, regexp_amg);
+               if (sv) {
+                   if (SvROK(sv))
+                       sv = SvRV(sv);
+                   if (SvTYPE(sv) != SVt_REGEXP)
+                       Perl_croak(aTHX_ "Overloaded qr did not return a REGEXP");
+                   *svp = sv;
+               }
+           }
+       }
+
+       if (pat_count > 1) {
+           /* concat multiple args and find any code block indexes */
+
+           OP *o = NULL;
+           int n = 0;
+           bool utf8 = 0;
+            STRLEN orig_patlen = 0;
+
+           if (pRExC_state->num_code_blocks) {
+               o = cLISTOPx(expr)->op_first;
+               assert(o->op_type == OP_PUSHMARK);
+               o = o->op_sibling;
+           }
+
+           pat = newSVpvn("", 0);
+           SAVEFREESV(pat);
+
+           /* determine if the pattern is going to be utf8 (needed
+            * in advance to align code block indices correctly).
+            * XXX This could fail to be detected for an arg with
+            * overloading but not concat overloading; but the main effect
+            * in this obscure case is to need a 'use re eval' for a
+            * literal code block */
+           for (svp = patternp; svp < patternp + pat_count; svp++) {
+               if (SvUTF8(*svp))
+                   utf8 = 1;
+           }
+           if (utf8)
+               SvUTF8_on(pat);
+
+           for (svp = patternp; svp < patternp + pat_count; svp++) {
+               SV *sv, *msv = *svp;
+               SV *rx;
+               bool code = 0;
+               if (o) {
+                   if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+                       assert(n < pRExC_state->num_code_blocks);
+                       pRExC_state->code_blocks[n].start = SvCUR(pat);
+                       pRExC_state->code_blocks[n].block = o;
+                       pRExC_state->code_blocks[n].src_regex = NULL;
+                       n++;
+                       code = 1;
+                       o = o->op_sibling; /* skip CONST */
+                       assert(o);
+                   }
+                   o = o->op_sibling;;
+               }
+
+               if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
+                       (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+               {
+                   sv_setsv(pat, sv);
+                   /* overloading involved: all bets are off over literal
+                    * code. Pretend we haven't seen it */
+                   pRExC_state->num_code_blocks -= n;
+                   n = 0;
+                    rx = NULL;
+
+               }
+               else  {
+                    while (SvAMAGIC(msv)
+                            && (sv = AMG_CALLunary(msv, string_amg))
+                            && sv != msv)
+                    {
+                        msv = sv;
+                        SvGETMAGIC(msv);
+                    }
+                    if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
+                        msv = SvRV(msv);
+                    orig_patlen = SvCUR(pat);
+                    sv_catsv_nomg(pat, msv);
+                    rx = msv;
+                    if (code)
+                        pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+                }
+
+               /* extract any code blocks within any embedded qr//'s */
+               if (rx && SvTYPE(rx) == SVt_REGEXP
+                   && RX_ENGINE((REGEXP*)rx)->op_comp)
+               {
+
+                   RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
+                   if (ri->num_code_blocks) {
+                       int i;
+                       /* the presence of an embedded qr// with code means
+                        * we should always recompile: the text of the
+                        * qr// may not have changed, but it may be a
+                        * different closure than last time */
+                       recompile = 1;
+                       Renew(pRExC_state->code_blocks,
+                           pRExC_state->num_code_blocks + ri->num_code_blocks,
+                           struct reg_code_block);
+                       pRExC_state->num_code_blocks += ri->num_code_blocks;
+                       for (i=0; i < ri->num_code_blocks; i++) {
+                           struct reg_code_block *src, *dst;
+                           STRLEN offset =  orig_patlen
+                               + ((struct regexp *)SvANY(rx))->pre_prefix;
+                           assert(n < pRExC_state->num_code_blocks);
+                           src = &ri->code_blocks[i];
+                           dst = &pRExC_state->code_blocks[n];
+                           dst->start      = src->start + offset;
+                           dst->end        = src->end   + offset;
+                           dst->block      = src->block;
+                           dst->src_regex  = (REGEXP*) SvREFCNT_inc( (SV*)
+                                                   src->src_regex
+                                                       ? src->src_regex
+                                                       : (REGEXP*)rx);
+                           n++;
+                       }
+                   }
+               }
+           }
+           SvSETMAGIC(pat);
+       }
+       else {
+            SV *sv;
+           pat = *patternp;
+            while (SvAMAGIC(pat)
+                    && (sv = AMG_CALLunary(pat, string_amg))
+                    && sv != pat)
+            {
+                pat = sv;
+                SvGETMAGIC(pat);
+            }
+        }
 
-    if (plen == 0) { /* ignore the utf8ness if the pattern is 0 length */
-       RExC_utf8 = RExC_orig_utf8 = 0;
+       /* handle bare regex: foo =~ $re */
+       {
+           SV *re = pat;
+           if (SvROK(re))
+               re = SvRV(re);
+           if (SvTYPE(re) == SVt_REGEXP) {
+               if (is_bare_re)
+                   *is_bare_re = TRUE;
+               SvREFCNT_inc(re);
+               Safefree(pRExC_state->code_blocks);
+               return (REGEXP*)re;
+           }
+       }
     }
     else {
-       RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+       /* not a list of SVs, so must be a list of OPs */
+       assert(expr);
+       if (expr->op_type == OP_LIST) {
+           int i = -1;
+           bool is_code = 0;
+           OP *o;
+
+           pat = newSVpvn("", 0);
+           SAVEFREESV(pat);
+           if (code_is_utf8)
+               SvUTF8_on(pat);
+
+           /* given a list of CONSTs and DO blocks in expr, append all
+            * the CONSTs to pat, and record the start and end of each
+            * code block in code_blocks[] (each DO{} op is followed by an
+            * OP_CONST containing the corresponding literal '(?{...})
+            * text)
+            */
+           for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+               if (o->op_type == OP_CONST) {
+                   sv_catsv(pat, cSVOPo_sv);
+                   if (is_code) {
+                       pRExC_state->code_blocks[i].end = SvCUR(pat)-1;
+                       is_code = 0;
+                   }
+               }
+               else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+                   assert(i+1 < pRExC_state->num_code_blocks);
+                   pRExC_state->code_blocks[++i].start = SvCUR(pat);
+                   pRExC_state->code_blocks[i].block = o;
+                   pRExC_state->code_blocks[i].src_regex = NULL;
+                   is_code = 1;
+               }
+           }
+       }
+       else {
+           assert(expr->op_type == OP_CONST);
+           pat = cSVOPx_sv(expr);
+       }
+    }
+
+    exp = SvPV_nomg(pat, plen);
+
+    if (!eng->op_comp) {
+       if ((SvUTF8(pat) && IN_BYTES)
+               || SvGMAGICAL(pat) || SvAMAGIC(pat))
+       {
+           /* make a temporary copy; either to convert to bytes,
+            * or to avoid repeating get-magic / overloaded stringify */
+           pat = newSVpvn_flags(exp, plen, SVs_TEMP |
+                                       (IN_BYTES ? 0 : SvUTF8(pat)));
+       }
+       Safefree(pRExC_state->code_blocks);
+       return CALLREGCOMP_ENG(eng, pat, orig_rx_flags);
     }
+
+    /* ignore the utf8ness if the pattern is 0 length */
+    RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
     RExC_uni_semantics = 0;
     RExC_contains_locale = 0;
+    pRExC_state->runtime_code_qr = NULL;
 
     /****************** LONG JUMP TARGET HERE***********************/
     /* Longjmp back to here if have to switch in midstream to utf8 */
@@ -4988,7 +5611,10 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
         });
     }
     else {  /* longjumped back */
-        STRLEN len = plen;
+       U8 *src, *dst;
+       int n=0;
+       STRLEN s = 0, d = 0;
+       bool do_end = 0;
 
         /* If the cause for the longjmp was other than changing to utf8, pop
          * our own setjmp, and longjmp to the correct handler */
@@ -5009,19 +5635,81 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
         -- dmq */
         DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
            "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
-        exp = (char*)Perl_bytes_to_utf8(aTHX_
-                                       (U8*)SvPV_nomg(pattern, plen),
-                                       &len);
-        xend = exp + len;
-        RExC_orig_utf8 = RExC_utf8 = 1;
-        SAVEFREEPV(exp);
+
+       /* upgrade pattern to UTF8, and if there are code blocks,
+        * recalculate the indices.
+        * This is essentially an unrolled Perl_bytes_to_utf8() */
+
+       src = (U8*)SvPV_nomg(pat, plen);
+       Newx(dst, plen * 2 + 1, U8);
+
+       while (s < plen) {
+           const UV uv = NATIVE_TO_ASCII(src[s]);
+           if (UNI_IS_INVARIANT(uv))
+               dst[d]   = (U8)UTF_TO_NATIVE(uv);
+           else {
+               dst[d++] = (U8)UTF8_EIGHT_BIT_HI(uv);
+               dst[d]   = (U8)UTF8_EIGHT_BIT_LO(uv);
+           }
+           if (n < pRExC_state->num_code_blocks) {
+               if (!do_end && pRExC_state->code_blocks[n].start == s) {
+                   pRExC_state->code_blocks[n].start = d;
+                   assert(dst[d] == '(');
+                   do_end = 1;
+               }
+               else if (do_end && pRExC_state->code_blocks[n].end == s) {
+                   pRExC_state->code_blocks[n].end = d;
+                   assert(dst[d] == ')');
+                   do_end = 0;
+                   n++;
+               }
+           }
+           s++;
+           d++;
+       }
+       dst[d] = '\0';
+       plen = d;
+       exp = (char*) dst;
+       xend = exp + plen;
+       SAVEFREEPV(exp);
+       RExC_orig_utf8 = RExC_utf8 = 1;
+    }
+
+    /* return old regex if pattern hasn't changed */
+
+    if (   old_re
+        && !recompile
+       && !!RX_UTF8(old_re) == !!RExC_utf8
+       && RX_PRECOMP(old_re)
+       && RX_PRELEN(old_re) == plen
+       && memEQ(RX_PRECOMP(old_re), exp, plen))
+    {
+       /* with runtime code, always recompile */
+       runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
+                                           exp, plen);
+       if (!runtime_code) {
+           ReREFCNT_inc(old_re);
+           if (used_setjump) {
+               JMPENV_POP;
+           }
+           Safefree(pRExC_state->code_blocks);
+           return old_re;
+       }
     }
+    else if ((pm_flags & PMf_USE_RE_EVAL)
+               /* this second condition covers the non-regex literal case,
+                * i.e.  $foo =~ '(?{})'. */
+               || ( !PL_reg_state.re_reparsing && IN_PERL_COMPILETIME
+                   && (PL_hints & HINT_RE_EVAL))
+    )
+       runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
+                           exp, plen);
 
 #ifdef TRIE_STUDY_OPT
     restudied = 0;
 #endif
 
-    pm_flags = orig_pm_flags;
+    rx_flags = orig_rx_flags;
 
     if (initial_charset == REGEX_LOCALE_CHARSET) {
        RExC_contains_locale = 1;
@@ -5030,17 +5718,30 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
        /* Set to use unicode semantics if the pattern is in utf8 and has the
         * 'depends' charset specified, as it means unicode when utf8  */
-       set_regex_charset(&pm_flags, REGEX_UNICODE_CHARSET);
+       set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
     }
 
     RExC_precomp = exp;
-    RExC_flags = pm_flags;
+    RExC_flags = rx_flags;
+    RExC_pm_flags = pm_flags;
+
+    if (runtime_code) {
+       if (PL_tainting && PL_tainted)
+           Perl_croak(aTHX_ "Eval-group in insecure regular expression");
+
+       if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
+           /* whoops, we have a non-utf8 pattern, whilst run-time code
+            * got compiled as utf8. Try again with a utf8 pattern */
+            JMPENV_JUMP(UTF8_LONGJMP);
+       }
+    }
+    assert(!pRExC_state->runtime_code_qr);
+
     RExC_sawback = 0;
 
     RExC_seen = 0;
     RExC_in_lookbehind = 0;
     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
-    RExC_seen_evals = 0;
     RExC_extralen = 0;
     RExC_override_recoding = 0;
 
@@ -5063,6 +5764,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 #endif
     RExC_recurse = NULL;
     RExC_recurse_count = 0;
+    pRExC_state->code_index = 0;
 
 #if 0 /* REGC() is (currently) a NOP at the first pass.
        * Clever compilers notice this and complain. --jhi */
@@ -5075,6 +5777,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     );
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
        RExC_precomp = NULL;
+       Safefree(pRExC_state->code_blocks);
        return(NULL);
     }
 
@@ -5094,9 +5797,9 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
     /* The first pass could have found things that force Unicode semantics */
     if ((RExC_utf8 || RExC_uni_semantics)
-        && get_regex_charset(pm_flags) == REGEX_DEPENDS_CHARSET)
+        && get_regex_charset(rx_flags) == REGEX_DEPENDS_CHARSET)
     {
-       set_regex_charset(&pm_flags, REGEX_UNICODE_CHARSET);
+       set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
     }
 
     /* Small enough for pointer-storage convention?
@@ -5127,8 +5830,15 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
     /* non-zero initialization begins here */
     RXi_SET( r, ri );
-    r->engine= RE_ENGINE_PTR;
-    r->extflags = pm_flags;
+    r->engine= eng;
+    r->extflags = rx_flags;
+    if (pm_flags & PMf_IS_QR) {
+       ri->code_blocks = pRExC_state->code_blocks;
+       ri->num_code_blocks = pRExC_state->num_code_blocks;
+    }
+    else
+       SAVEFREEPV(pRExC_state->code_blocks);
+
     {
         bool has_p     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
         bool has_charset = (get_regex_charset(r->extflags) != REGEX_DEPENDS_CHARSET);
@@ -5160,7 +5870,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
         p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
        SvPOK_on(rx);
-       SvFLAGS(rx) |= SvUTF8(pattern);
+       if (RExC_utf8)
+           SvFLAGS(rx) |= SVf_UTF8;
         *p++='('; *p++='?';
 
         /* If a default, cover it using the caret */
@@ -5220,7 +5931,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     RExC_rxi = ri;
 
     /* Second pass: emit code. */
-    RExC_flags = pm_flags;     /* don't let top level (?i) bleed */
+    RExC_flags = rx_flags;     /* don't let top level (?i) bleed */
+    RExC_pm_flags = pm_flags;
     RExC_parse = exp;
     RExC_end = xend;
     RExC_naughty = 0;
@@ -5228,9 +5940,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     RExC_emit_start = ri->program;
     RExC_emit = ri->program;
     RExC_emit_bound = ri->program + RExC_size + 1;
+    pRExC_state->code_index = 0;
 
-    /* Store the count of eval-groups for security checks: */
-    RExC_rx->seen_evals = RExC_seen_evals;
     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
        ReREFCNT_dec(rx);   
@@ -5383,7 +6094,7 @@ reStudy:
        else if ((!sawopen || !RExC_sawback) &&
            (OP(first) == STAR &&
            PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
-           !(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
+           !(r->extflags & RXf_ANCH) && !pRExC_state->num_code_blocks)
        {
            /* turn .* into ^.* with an implied $*=1 */
            const int type =
@@ -5396,7 +6107,7 @@ reStudy:
            goto again;
        }
        if (sawplus && !sawlookahead && (!sawopen || !RExC_sawback)
-           && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
+           && !pRExC_state->num_code_blocks) /* May examine pos and $& */
            /* x+ must match at the 1st pos of run of x's */
            r->intflags |= PREGf_SKIP;
 
@@ -5669,7 +6380,7 @@ reStudy:
        r->extflags |= RXf_GPOS_SEEN;
     if (RExC_seen & REG_SEEN_LOOKBEHIND)
        r->extflags |= RXf_LOOKBEHIND_SEEN;
-    if (RExC_seen & REG_SEEN_EVAL)
+    if (pRExC_state->num_code_blocks)
        r->extflags |= RXf_EVAL_SEEN;
     if (RExC_seen & REG_SEEN_CANY)
        r->extflags |= RXf_CANY_SEEN;
@@ -5677,6 +6388,8 @@ reStudy:
        r->intflags |= PREGf_VERBARG_SEEN;
     if (RExC_seen & REG_SEEN_CUTGROUP)
        r->intflags |= PREGf_CUTGROUP_SEEN;
+    if (pm_flags & PMf_USE_RE_EVAL)
+       r->intflags |= PREGf_USE_RE_EVAL;
     if (RExC_paren_names)
         RXp_PAREN_NAMES(r) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
     else
@@ -5747,8 +6460,6 @@ reStudy:
     return rx;
 }
 
-#undef RE_ENGINE_PTR
-
 
 SV*
 Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
@@ -6166,8 +6877,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
            do {
                RExC_parse++;
            } while (isALNUM(*RExC_parse));
+    } else {
+       RExC_parse++; /* so the <- from the vFAIL is after the offending character */
+        vFAIL("Group name must start with a non-digit word character");
     }
-
     if ( flags ) {
         SV* sv_name
            = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
@@ -6191,7 +6904,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
             Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
                       (unsigned long) flags);
         }
-        /* NOT REACHED */
+        assert(0); /* NOT REACHED */
     }
     return NULL;
 }
@@ -6518,9 +7231,8 @@ S_invlist_trim(pTHX_ SV* const invlist)
 
 #define _invlist_union_complement_2nd(a, b, output) _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
 
-#ifndef PERL_IN_XSUB_RE
-void
-Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end)
+STATIC void
+S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end)
 {
    /* Subject to change or removal.  Append the range from 'start' to 'end' at
     * the end of the inversion list.  The range must be above any existing
@@ -6598,6 +7310,8 @@ Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV
     }
 }
 
+#ifndef PERL_IN_XSUB_RE
+
 STATIC IV
 S_invlist_search(pTHX_ SV* const invlist, const UV cp)
 {
@@ -7184,10 +7898,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
     return;
 }
 
-#endif
-
-STATIC SV*
-S_add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
+SV*
+Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
 {
     /* Add the range from 'start' to 'end' inclusive to the inversion list's
      * set.  A pointer to the inversion list is returned.  This may actually be
@@ -7228,9 +7940,11 @@ S_add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
     return invlist;
 }
 
+#endif
+
 PERL_STATIC_INLINE SV*
 S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
-    return add_range_to_invlist(invlist, cp, cp);
+    return _add_range_to_invlist(invlist, cp, cp);
 }
 
 #ifndef PERL_IN_XSUB_RE
@@ -7385,16 +8099,46 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
     return TRUE;
 }
 
-#ifndef PERL_IN_XSUB_RE
-SV *
-Perl__invlist_contents(pTHX_ SV* const invlist)
+PERL_STATIC_INLINE UV
+S_invlist_highest(pTHX_ SV* const invlist)
 {
-    /* Get the contents of an inversion list into a string SV so that they can
-     * be printed out.  It uses the format traditionally done for debug tracing
-     */
-
-    UV start, end;
-    SV* output = newSVpvs("\n");
+    /* Returns the highest code point that matches an inversion list.  This API
+     * has an ambiguity, as it returns 0 under either the highest is actually
+     * 0, or if the list is empty.  If this distinction matters to you, check
+     * for emptiness before calling this function */
+
+    UV len = invlist_len(invlist);
+    UV *array;
+
+    PERL_ARGS_ASSERT_INVLIST_HIGHEST;
+
+    if (len == 0) {
+       return 0;
+    }
+
+    array = invlist_array(invlist);
+
+    /* The last element in the array in the inversion list always starts a
+     * range that goes to infinity.  That range may be for code points that are
+     * matched in the inversion list, or it may be for ones that aren't
+     * matched.  In the latter case, the highest code point in the set is one
+     * less than the beginning of this range; otherwise it is the final element
+     * of this range: infinity */
+    return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
+           ? UV_MAX
+           : array[len - 1] - 1;
+}
+
+#ifndef PERL_IN_XSUB_RE
+SV *
+Perl__invlist_contents(pTHX_ SV* const invlist)
+{
+    /* Get the contents of an inversion list into a string SV so that they can
+     * be printed out.  It uses the format traditionally done for debug tracing
+     */
+
+    UV start, end;
+    SV* output = newSVpvs("\n");
 
     PERL_ARGS_ASSERT__INVLIST_CONTENTS;
 
@@ -7804,7 +8548,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                 }
                 goto gen_recurse_regop;
-                /* NOT REACHED */
+                assert(0); /* NOT REACHED */
             case '+':
                 if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
                     RExC_parse++;
@@ -7874,7 +8618,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 nextchar(pRExC_state);
                 return ret;
             } /* named and numeric backreferences */
-            /* NOT REACHED */
+            assert(0); /* NOT REACHED */
 
            case '?':           /* (??...) */
                is_logical = 1;
@@ -7888,67 +8632,51 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                /* FALL THROUGH */
            case '{':           /* (?{...}) */
            {
-               I32 count = 1;
                U32 n = 0;
-               char c;
-               char *s = RExC_parse;
+               struct reg_code_block *cb;
 
                RExC_seen_zerolen++;
-               RExC_seen |= REG_SEEN_EVAL;
-               while (count && (c = *RExC_parse)) {
-                   if (c == '\\') {
-                       if (RExC_parse[1])
-                           RExC_parse++;
-                   }
-                   else if (c == '{')
-                       count++;
-                   else if (c == '}')
-                       count--;
-                   RExC_parse++;
-               }
-               if (*RExC_parse != ')') {
-                   RExC_parse = s;
-                   vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
+
+               if (   !pRExC_state->num_code_blocks
+                   || pRExC_state->code_index >= pRExC_state->num_code_blocks
+                   || pRExC_state->code_blocks[pRExC_state->code_index].start
+                       != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+                           - RExC_start)
+               ) {
+                   if (RExC_pm_flags & PMf_USE_RE_EVAL)
+                       FAIL("panic: Sequence (?{...}): no code block found\n");
+                   FAIL("Eval-group not allowed at runtime, use re 'eval'");
                }
+               /* this is a pre-compiled code block (?{...}) */
+               cb = &pRExC_state->code_blocks[pRExC_state->code_index];
+               RExC_parse = RExC_start + cb->end;
                if (!SIZE_ONLY) {
-                   PAD *pad;
-                   OP_4tree *sop, *rop;
-                   SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
-
-                   ENTER;
-                   Perl_save_re_context(aTHX);
-                   rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
-                   sop->op_private |= OPpREFCOUNTED;
-                   /* re_dup will OpREFCNT_inc */
-                   OpREFCNT_set(sop, 1);
-                   LEAVE;
-
-                   n = add_data(pRExC_state, 3, "nop");
-                   RExC_rxi->data->data[n] = (void*)rop;
-                   RExC_rxi->data->data[n+1] = (void*)sop;
-                   RExC_rxi->data->data[n+2] = (void*)pad;
-                   SvREFCNT_dec(sv);
-               }
-               else {                                          /* First pass */
-                   if (PL_reginterp_cnt < ++RExC_seen_evals
-                       && IN_PERL_RUNTIME)
-                       /* No compiled RE interpolated, has runtime
-                          components ===> unsafe.  */
-                       FAIL("Eval-group not allowed at runtime, use re 'eval'");
-                   if (PL_tainting && PL_tainted)
-                       FAIL("Eval-group in insecure regular expression");
-#if PERL_VERSION > 8
-                   if (IN_PERL_COMPILETIME)
-                       PL_cv_has_eval = 1;
-#endif
+                   OP *o = cb->block;
+                   if (cb->src_regex) {
+                       n = add_data(pRExC_state, 2, "rl");
+                       RExC_rxi->data->data[n] =
+                           (void*)SvREFCNT_inc((SV*)cb->src_regex);
+                       RExC_rxi->data->data[n+1] = (void*)o;
+                   }
+                   else {
+                       n = add_data(pRExC_state, 1,
+                              (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l");
+                       RExC_rxi->data->data[n] = (void*)o;
+                   }
                }
-
+               pRExC_state->code_index++;
                nextchar(pRExC_state);
+
                if (is_logical) {
+                    regnode *eval;
                    ret = reg_node(pRExC_state, LOGICAL);
-                   if (!SIZE_ONLY)
+                    eval = reganode(pRExC_state, EVAL, n);
+                   if (!SIZE_ONLY) {
                        ret->flags = 2;
-                    REGTAIL(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
+                        /* for later propagation into (??{}) return value */
+                        eval->flags = (U8) (RExC_flags & RXf_PMf_COMPILETIME);
+                    }
+                    REGTAIL(pRExC_state, ret, eval);
                     /* deal with the length of this later - MJD */
                    return ret;
                }
@@ -8378,9 +9106,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             }
            break;
        }
+        DEBUG_PARSE_r(if (!SIZE_ONLY) {
+            SV * const mysv_val1=sv_newmortal();
+            SV * const mysv_val2=sv_newmortal();
+            DEBUG_PARSE_MSG("lsbr");
+            regprop(RExC_rx, mysv_val1, lastbr);
+            regprop(RExC_rx, mysv_val2, ender);
+            PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+                          SvPV_nolen_const(mysv_val1),
+                          (IV)REG_NODE_NUM(lastbr),
+                          SvPV_nolen_const(mysv_val2),
+                          (IV)REG_NODE_NUM(ender),
+                          (IV)(ender - lastbr)
+            );
+        });
         REGTAIL(pRExC_state, lastbr, ender);
 
        if (have_branch && !SIZE_ONLY) {
+            char is_nothing= 1;
            if (depth==1)
                RExC_seen |= REG_TOP_LEVEL_BRANCHES;
 
@@ -8389,11 +9132,44 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                const U8 op = PL_regkind[OP(br)];
                if (op == BRANCH) {
                     REGTAIL_STUDY(pRExC_state, NEXTOPER(br), ender);
+                    if (OP(NEXTOPER(br)) != NOTHING || regnext(NEXTOPER(br)) != ender)
+                        is_nothing= 0;
                }
                else if (op == BRANCHJ) {
                     REGTAIL_STUDY(pRExC_state, NEXTOPER(NEXTOPER(br)), ender);
+                    /* for now we always disable this optimisation * /
+                    if (OP(NEXTOPER(NEXTOPER(br))) != NOTHING || regnext(NEXTOPER(NEXTOPER(br))) != ender)
+                    */
+                        is_nothing= 0;
                }
            }
+            if (is_nothing) {
+                br= PL_regkind[OP(ret)] != BRANCH ? regnext(ret) : ret;
+                DEBUG_PARSE_r(if (!SIZE_ONLY) {
+                    SV * const mysv_val1=sv_newmortal();
+                    SV * const mysv_val2=sv_newmortal();
+                    DEBUG_PARSE_MSG("NADA");
+                    regprop(RExC_rx, mysv_val1, ret);
+                    regprop(RExC_rx, mysv_val2, ender);
+                    PerlIO_printf(Perl_debug_log, "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+                                  SvPV_nolen_const(mysv_val1),
+                                  (IV)REG_NODE_NUM(ret),
+                                  SvPV_nolen_const(mysv_val2),
+                                  (IV)REG_NODE_NUM(ender),
+                                  (IV)(ender - ret)
+                    );
+                });
+                OP(br)= NOTHING;
+                if (OP(ender) == TAIL) {
+                    NEXT_OFF(br)= 0;
+                    RExC_emit= br + 1;
+                } else {
+                    regnode *opt;
+                    for ( opt= br + 1; opt < ender ; opt++ )
+                        OP(opt)= OPTIMIZED;
+                    NEXT_OFF(br)= ender - br;
+                }
+            }
        }
     }
 
@@ -8430,7 +9206,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
        }
        else
            FAIL("Junk on end of regexp");      /* "Can't happen". */
-       /* NOTREACHED */
+       assert(0); /* NOTREACHED */
     }
 
     if (RExC_in_lookbehind) {
@@ -8979,8 +9755,46 @@ S_reg_recode(pTHX_ const char value, SV **encp)
    sequence, we return.
 
    Note: we have to be careful with escapes, as they can be both literal
-   and special, and in the case of \10 and friends can either, depending
-   on context. Specifically there are two separate switches for handling
+   and special, and in the case of \10 and friends, context determines which.
+
+   A summary of the code structure is:
+
+   switch (first_byte) {
+       cases for each special:
+           handle this special;
+           break;
+       case '\\':
+           switch (2nd byte) {
+               cases for each unambiguous special:
+                   handle this special;
+                   break;
+               cases for each ambigous special/literal:
+                   disambiguate;
+                   if (special)  handle here
+                   else goto defchar;
+               default: // unambiguously literal:
+                   goto defchar;
+           }
+       default:  // is a literal char
+           // FALL THROUGH
+       defchar:
+           create EXACTish node for literal;
+           while (more input and node isn't full) {
+               switch (input_byte) {
+                  cases for each special;
+                       make sure parse pointer is set so that the next call to
+                           regatom will see this special first
+                       goto loopdone; // EXACTish node terminated by prev. char
+                  default:
+                      append char to EXACTISH node;
+               }
+               get next input byte;
+           }
+        loopdone:
+   }
+   return the generated node;
+
+   Specifically there are two separate switches for handling
    escape sequences, with the one for handling literal escapes requiring
    a dummy entry for all of the special escapes that are actually handled
    by the other.
@@ -9073,12 +9887,6 @@ tryagain:
        vFAIL("Internal urp");
                                /* Supposed to be caught earlier. */
        break;
-    case '{':
-       if (!regcurly(RExC_parse)) {
-           RExC_parse++;
-           goto defchar;
-       }
-       /* FALL THROUGH */
     case '?':
     case '+':
     case '*':
@@ -9140,43 +9948,17 @@ tryagain:
            *flagp |= HASWIDTH;
            goto finish_meta_pat;
        case 'w':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = ALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = ALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = ALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = ALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = ALNUM + get_regex_charset(RExC_flags);
+            if (op > ALNUMA) {  /* /aa is same as /a */
+                op = ALNUMA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'W':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NALNUM + get_regex_charset(RExC_flags);
+            if (op > NALNUMA) { /* /aa is same as /a */
+                op = NALNUMA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
@@ -9184,133 +9966,56 @@ tryagain:
        case 'b':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = BOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = BOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = BOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = BOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = BOUND + get_regex_charset(RExC_flags);
+            if (op > BOUNDA) {  /* /aa is same as /a */
+                op = BOUNDA;
             }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
-           if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
-               ckWARNregdep(RExC_parse, "\"\\b{\" is deprecated; use \"\\b\\{\" instead");
-           }
            goto finish_meta_pat;
        case 'B':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NBOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NBOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NBOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NBOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NBOUND + get_regex_charset(RExC_flags);
+            if (op > NBOUNDA) { /* /aa is same as /a */
+                op = NBOUNDA;
             }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
-           if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
-               ckWARNregdep(RExC_parse, "\"\\B{\" is deprecated; use \"\\B\\{\" instead");
-           }
            goto finish_meta_pat;
        case 's':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = SPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = SPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = SPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = SPACE;
-                   break;
-               default:
-                   goto bad_charset;
+           op = SPACE + get_regex_charset(RExC_flags);
+            if (op > SPACEA) {  /* /aa is same as /a */
+                op = SPACEA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'S':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NSPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NSPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NSPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NSPACE;
-                   break;
-               default:
-                   goto bad_charset;
-            }
-           ret = reg_node(pRExC_state, op);
-           *flagp |= HASWIDTH|SIMPLE;
-           goto finish_meta_pat;
-       case 'd':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = DIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = DIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = DIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NSPACE + get_regex_charset(RExC_flags);
+            if (op > NSPACEA) { /* /aa is same as /a */
+                op = NSPACEA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'D':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NDIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NDIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = NDIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+            op = NDIGIT;
+            goto join_D_and_d;
+       case 'd':
+            op = DIGIT;
+        join_D_and_d:
+            {
+                U8 offset = get_regex_charset(RExC_flags);
+                if (offset == REGEX_UNICODE_CHARSET) {
+                    offset = REGEX_DEPENDS_CHARSET;
+                }
+                else if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                    offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                }
+                op += offset;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
@@ -9461,6 +10166,7 @@ tryagain:
                         vFAIL("Reference to nonexistent or unclosed group");
                 }
                if (!isg && num > 9 && num >= RExC_npar)
+                    /* Probably a character specified in octal, e.g. \35 */
                    goto defchar;
                else {
                    char * const parse_start = RExC_parse - 1; /* MJD */
@@ -9538,14 +10244,18 @@ tryagain:
            bool is_exactfu_sharp_s;
 
            ender = 0;
-            node_type = ((! FOLD) ? EXACT
-                       : (LOC)
-                         ? EXACTFL
-                         : (MORE_ASCII_RESTRICTED)
-                           ? EXACTFA
-                           : (AT_LEAST_UNI_SEMANTICS)
-                             ? EXACTFU
-                             : EXACTF);
+            if (! FOLD) {
+                node_type = EXACT;
+            }
+            else {
+                node_type = get_regex_charset(RExC_flags);
+                if (node_type >= REGEX_ASCII_RESTRICTED_CHARSET) {
+                    node_type--; /* /a is same as /u, and map /aa's offset to
+                                    what /a's would have been, so there is no
+                                    hole */
+                }
+                node_type += EXACTF;
+            }
            ret = reg_node(pRExC_state, node_type);
            s = STRING(ret);
 
@@ -9675,38 +10385,39 @@ tryagain:
                            break;
                        }
                    case 'x':
-                       if (*++p == '{') {
-                           char* const e = strchr(p, '}');
+                       {
+                           STRLEN brace_len = len;
+                           UV result;
+                           const char* error_msg;
 
-                           if (!e) {
-                               RExC_parse = p + 1;
-                               vFAIL("Missing right brace on \\x{}");
+                           bool valid = grok_bslash_x(p,
+                                                      &result,
+                                                      &brace_len,
+                                                      &error_msg,
+                                                      1);
+                           p += brace_len;
+                           if (! valid) {
+                               RExC_parse = p; /* going to die anyway; point
+                                                  to exact spot of failure */
+                               vFAIL(error_msg);
                            }
                            else {
-                                I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                                    | PERL_SCAN_DISALLOW_PREFIX;
-                                STRLEN numlen = e - p - 1;
-                               ender = grok_hex(p + 1, &numlen, &flags, NULL);
-                               if (ender > 0xff)
-                                   REQUIRE_UTF8;
-                               p = e + 1;
+                               ender = result;
                            }
+                           if (PL_encoding && ender < 0x100) {
+                               goto recode_encoding;
+                           }
+                           if (ender > 0xff) {
+                               REQUIRE_UTF8;
+                           }
+                           break;
                        }
-                       else {
-                            I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                           STRLEN numlen = 2;
-                           ender = grok_hex(p, &numlen, &flags, NULL);
-                           p += numlen;
-                       }
-                       if (PL_encoding && ender < 0x100)
-                           goto recode_encoding;
-                       break;
                    case 'c':
                        p++;
                        ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
                        break;
                    case '0': case '1': case '2': case '3':case '4':
-                   case '5': case '6': case '7': case '8':case '9':
+                   case '5': case '6': case '7':
                        if (*p == '0' ||
                            (isDIGIT(p[1]) && atoi(p) >= RExC_npar))
                        {
@@ -9739,16 +10450,23 @@ tryagain:
                            FAIL("Trailing \\");
                        /* FALL THROUGH */
                    default:
-                       if (!SIZE_ONLY&& isALPHA(*p)) {
-                           /* Include any { following the alpha to emphasize
-                            * that it could be part of an escape at some point
-                            * in the future */
-                           int len = (*(p + 1) == '{') ? 2 : 1;
-                           ckWARN3reg(p + len, "Unrecognized escape \\%.*s passed through", len, p);
+                       if (!SIZE_ONLY&& isALNUMC(*p)) {
+                           ckWARN2reg(p + 1, "Unrecognized escape \\%.1s passed through", p);
                        }
                        goto normal_default;
                    }
                    break;
+               case '{':
+                   /* Currently we don't warn when the lbrace is at the start
+                    * of a construct.  This catches it in the middle of a
+                    * literal string, or when its the first thing after
+                    * something like "\b" */
+                   if (! SIZE_ONLY
+                       && (len || (p > RExC_start && isALPHA_A(*(p -1)))))
+                   {
+                       ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through");
+                   }
+                   /*FALLTHROUGH*/
                default:
                  normal_default:
                    if (UTF8_IS_START(*p) && UTF) {
@@ -9931,11 +10649,6 @@ tryagain:
     }
 
     return(ret);
-
-/* Jumped to when an unrecognized character set is encountered */
-bad_charset:
-    Perl_croak(aTHX_ "panic: Unknown regex character set encoding: %u", get_regex_charset(RExC_flags));
-    return(NULL);
 }
 
 STATIC char *
@@ -10206,6 +10919,14 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
  * time */
 #define DO_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,      \
                               l1_sourcelist, Xpropertyname, run_time_list) \
+       /* First, resolve whether to use the ASCII-only list or the L1     \
+        * list */                                                         \
+        DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(node, class, destlist,      \
+                ((AT_LEAST_ASCII_RESTRICTED) ? sourcelist : l1_sourcelist),\
+                Xpropertyname, run_time_list)
+
+#define DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(node, class, destlist, sourcelist, \
+                Xpropertyname, run_time_list)                              \
     /* If not /a matching, there are going to be code points we will have  \
      * to defer to runtime to look-up */                                   \
     if (! AT_LEAST_ASCII_RESTRICTED) {                                     \
@@ -10215,24 +10936,22 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
         ANYOF_CLASS_SET(node, class);                                      \
     }                                                                      \
     else {                                                                 \
-        _invlist_union(destlist,                                           \
-                       (AT_LEAST_ASCII_RESTRICTED)                         \
-                           ? sourcelist                                    \
-                           : l1_sourcelist,                                \
-                       &destlist);                                         \
+        _invlist_union(destlist, sourcelist, &destlist);                   \
     }
 
 /* Like DO_POSIX_LATIN1_ONLY_KNOWN, but for the complement.  A combination of
- * this and DO_N_POSIX */
+ * this and DO_N_POSIX.  Sets <matches_above_unicode> only if it can; unchanged
+ * otherwise */
 #define DO_N_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,    \
-                              l1_sourcelist, Xpropertyname, run_time_list) \
+       l1_sourcelist, Xpropertyname, run_time_list, matches_above_unicode) \
     if (AT_LEAST_ASCII_RESTRICTED) {                                       \
         _invlist_union_complement_2nd(destlist, sourcelist, &destlist);    \
     }                                                                      \
     else {                                                                 \
         Perl_sv_catpvf(aTHX_ run_time_list, "!utf8::%s\n", Xpropertyname); \
+        matches_above_unicode = TRUE;                                      \
        if (LOC) {                                                         \
-           ANYOF_CLASS_SET(node, namedclass);                             \
+            ANYOF_CLASS_SET(node, namedclass);                            \
        }                                                                  \
        else {                                                             \
             SV* scratch_list = NULL;                                       \
@@ -10250,171 +10969,6 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
        }                                                                  \
     }
 
-STATIC U8
-S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-
-    /* Handle the setting of folds in the bitmap for non-locale ANYOF nodes.
-     * Locale folding is done at run-time, so this function should not be
-     * called for nodes that are for locales.
-     *
-     * This function sets the bit corresponding to the fold of the input
-     * 'value', if not already set.  The fold of 'f' is 'F', and the fold of
-     * 'F' is 'f'.
-     *
-     * It also knows about the characters that are in the bitmap that have
-     * folds that are matchable only outside it, and sets the appropriate lists
-     * and flags.
-     *
-     * It returns the number of bits that actually changed from 0 to 1 */
-
-    U8 stored = 0;
-    U8 fold;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD;
-
-    fold = (AT_LEAST_UNI_SEMANTICS) ? PL_fold_latin1[value]
-                                    : PL_fold[value];
-
-    /* It assumes the bit for 'value' has already been set */
-    if (fold != value && ! ANYOF_BITMAP_TEST(node, fold)) {
-        ANYOF_BITMAP_SET(node, fold);
-        stored++;
-    }
-    if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value) && (! isASCII(value) || ! MORE_ASCII_RESTRICTED)) {
-       /* Certain Latin1 characters have matches outside the bitmap.  To get
-        * here, 'value' is one of those characters.   None of these matches is
-        * valid for ASCII characters under /aa, which have been excluded by
-        * the 'if' above.  The matches fall into three categories:
-        * 1) They are singly folded-to or -from an above 255 character, as
-        *    LATIN SMALL LETTER Y WITH DIAERESIS and LATIN CAPITAL LETTER Y
-        *    WITH DIAERESIS;
-        * 2) They are part of a multi-char fold with another character in the
-        *    bitmap, only LATIN SMALL LETTER SHARP S => "ss" fits that bill;
-        * 3) They are part of a multi-char fold with a character not in the
-        *    bitmap, such as various ligatures.
-        * We aren't dealing fully with multi-char folds, except we do deal
-        * with the pattern containing a character that has a multi-char fold
-        * (not so much the inverse).
-        * For types 1) and 3), the matches only happen when the target string
-        * is utf8; that's not true for 2), and we set a flag for it.
-        *
-        * The code below adds to the passed in inversion list the single fold
-        * closures for 'value'.  The values are hard-coded here so that an
-        * innocent-looking character class, like /[ks]/i won't have to go out
-        * to disk to find the possible matches.  XXX It would be better to
-        * generate these via regen, in case a new version of the Unicode
-        * standard adds new mappings, though that is not really likely. */
-       switch (value) {
-           case 'k':
-           case 'K':
-               /* KELVIN SIGN */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212A);
-               break;
-           case 's':
-           case 'S':
-               /* LATIN SMALL LETTER LONG S */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x017F);
-               break;
-           case MICRO_SIGN:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                GREEK_SMALL_LETTER_MU);
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                GREEK_CAPITAL_LETTER_MU);
-               break;
-           case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
-           case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
-               /* ANGSTROM SIGN */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212B);
-               if (DEPENDS_SEMANTICS) {    /* See DEPENDS comment below */
-                   *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                    PL_fold_latin1[value]);
-               }
-               break;
-           case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                       LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
-               break;
-           case LATIN_SMALL_LETTER_SHARP_S:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                       LATIN_CAPITAL_LETTER_SHARP_S);
-
-               /* Under /a, /d, and /u, this can match the two chars "ss" */
-               if (! MORE_ASCII_RESTRICTED) {
-                   add_alternate(alternate_ptr, (U8 *) "ss", 2);
-
-                   /* And under /u or /a, it can match even if the target is
-                    * not utf8 */
-                   if (AT_LEAST_UNI_SEMANTICS) {
-                       ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
-                   }
-               }
-               break;
-           case 'F': case 'f':
-           case 'I': case 'i':
-           case 'L': case 'l':
-           case 'T': case 't':
-           case 'A': case 'a':
-           case 'H': case 'h':
-           case 'J': case 'j':
-           case 'N': case 'n':
-           case 'W': case 'w':
-           case 'Y': case 'y':
-                /* These all are targets of multi-character folds from code
-                 * points that require UTF8 to express, so they can't match
-                 * unless the target string is in UTF-8, so no action here is
-                 * necessary, as regexec.c properly handles the general case
-                 * for UTF-8 matching */
-               break;
-           default:
-               /* Use deprecated warning to increase the chances of this
-                * being output */
-               ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%x; please use the perlbug utility to report;", value);
-               break;
-       }
-    }
-    else if (DEPENDS_SEMANTICS
-           && ! isASCII(value)
-           && PL_fold_latin1[value] != value)
-    {
-          /* Under DEPENDS rules, non-ASCII Latin1 characters match their
-           * folds only when the target string is in UTF-8.  We add the fold
-           * here to the list of things to match outside the bitmap, which
-           * won't be looked at unless it is UTF8 (or else if something else
-           * says to look even if not utf8, but those things better not happen
-           * under DEPENDS semantics. */
-       *invlist_ptr = add_cp_to_invlist(*invlist_ptr, PL_fold_latin1[value]);
-    }
-
-    return stored;
-}
-
-
-PERL_STATIC_INLINE U8
-S_set_regclass_bit(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-    /* This inline function sets a bit in the bitmap if not already set, and if
-     * appropriate, its fold, returning the number of bits that actually
-     * changed from 0 to 1 */
-
-    U8 stored;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT;
-
-    if (ANYOF_BITMAP_TEST(node, value)) {   /* Already set */
-       return 0;
-    }
-
-    ANYOF_BITMAP_SET(node, value);
-    stored = 1;
-
-    if (FOLD && ! LOC) {       /* Locale folds aren't known until runtime */
-       stored += set_regclass_bit_fold(pRExC_state, node, value, invlist_ptr, alternate_ptr);
-    }
-
-    return stored;
-}
-
 STATIC void
 S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
 {
@@ -10450,7 +11004,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
     register regnode *ret;
     STRLEN numlen;
-    IV namedclass;
+    IV namedclass = OOB_NAMEDCLASS;
     char *rangebegin = NULL;
     bool need_class = 0;
     bool allow_full_fold = TRUE;   /* Assume wants multi-char folding */
@@ -10458,10 +11012,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
                                      than just initialized.  */
     SV* properties = NULL;    /* Code points that match \p{} \P{} */
+    SV* posixes = NULL;     /* Code points that match classes like, [:word:],
+                               extended beyond the Latin1 range */
     UV element_count = 0;   /* Number of distinct elements in the class.
                               Optimizations may be possible if this is tiny */
     UV n;
 
+    /* Certain named classes have equivalents that can appear outside a
+     * character class, e.g. \w.  These flags are set for these classes.  The
+     * first flag indicates the op depends on the character set modifier, like
+     * /d, /u....  The second is for those that don't have this dependency. */
+    bool has_special_charset_op = FALSE;
+    bool has_special_non_charset_op = FALSE;
+
     /* Unicode properties are stored in a swash; this holds the current one
      * being parsed.  If this swash is the only above-latin1 component of the
      * character class, an optimization is to pass it directly on to the
@@ -10472,33 +11035,30 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 
     /* Set if a component of this character class is user-defined; just passed
      * on to the engine */
-    UV has_user_defined_property = 0;
-
-    /* code points this node matches that can't be stored in the bitmap */
-    SV* nonbitmap = NULL;
-
-    /* The items that are to match that aren't stored in the bitmap, but are a
-     * result of things that are stored there.  This is the fold closure of
-     * such a character, either because it has DEPENDS semantics and shouldn't
-     * be matched unless the target string is utf8, or is a code point that is
-     * too large for the bit map, as for example, the fold of the MICRO SIGN is
-     * above 255.  This all is solely for performance reasons.  By having this
-     * code know the outside-the-bitmap folds that the bitmapped characters are
-     * involved with, we don't have to go out to disk to find the list of
-     * matches, unless the character class includes code points that aren't
-     * storable in the bit map.  That means that a character class with an 's'
-     * in it, for example, doesn't need to go out to disk to find everything
-     * that matches.  A 2nd list is used so that the 'nonbitmap' list is kept
-     * empty unless there is something whose fold we don't know about, and will
-     * have to go out to the disk to find. */
-    SV* l1_fold_invlist = NULL;
+    bool has_user_defined_property = FALSE;
+
+    /* inversion list of code points this node matches only when the target
+     * string is in UTF-8.  (Because is under /d) */
+    SV* depends_list = NULL;
+
+    /* inversion list of code points this node matches.  For much of the
+     * function, it includes only those that match regardless of the utf8ness
+     * of the target string */
+    SV* cp_list = NULL;
 
     /* List of multi-character folds that are matched by this node */
     AV* unicode_alternate  = NULL;
 #ifdef EBCDIC
+    /* In a range, counts how many 0-2 of the ends of it came from literals,
+     * not escapes.  Thus we can tell if 'A' was input vs \x{C1} */
     UV literal_endpoint = 0;
 #endif
     UV stored = 0;  /* how many chars stored in the bitmap */
+    bool invert = FALSE;    /* Is this class to be complemented */
+
+    /* Is there any thing like \W or [:^digit:] that matches above the legal
+     * Unicode range? */
+    bool runtime_posix_matches_above_Unicode = FALSE;
 
     regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
         case we need to change the emitted regop to an EXACT. */
@@ -10523,8 +11083,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     if (UCHARAT(RExC_parse) == '^') {  /* Complement of range. */
        RExC_naughty++;
        RExC_parse++;
-       if (!SIZE_ONLY)
-           ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+        invert = TRUE;
 
         /* We have decided to not allow multi-char folds in inverted character
         * classes, due to the confusion that can happen, especially with
@@ -10649,6 +11208,7 @@ parseit:
                     SV** invlistsvp;
                     SV* invlist;
                     char* name;
+
                    if (UCHARAT(RExC_parse) == '^') {
                         RExC_parse++;
                         n--;
@@ -10703,7 +11263,7 @@ parseit:
                         Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
                                         (value == 'p' ? '+' : '!'),
                                         name);
-                        has_user_defined_property = 1;
+                        has_user_defined_property = TRUE;
 
                         /* We don't know yet, so have to assume that the
                          * property could match something in the Latin1 range,
@@ -10715,17 +11275,14 @@ parseit:
                         /* Here, did get the swash and its inversion list.  If
                          * the swash is from a user-defined property, then this
                          * whole character class should be regarded as such */
-                        SV** user_defined_svp =
-                                            hv_fetchs(MUTABLE_HV(SvRV(swash)),
-                                                        "USER_DEFINED", FALSE);
-                        if (user_defined_svp) {
-                            has_user_defined_property
-                                                    |= SvUV(*user_defined_svp);
-                        }
+                        has_user_defined_property =
+                                                _is_swash_user_defined(swash);
 
                         /* Invert if asking for the complement */
                         if (value == 'P') {
-                           _invlist_union_complement_2nd(properties, invlist, &properties);
+                           _invlist_union_complement_2nd(properties,
+                                                          invlist,
+                                                          &properties);
 
                             /* The swash can't be used as-is, because we've
                             * inverted things; delay removing it to here after
@@ -10772,22 +11329,18 @@ parseit:
                }
                break;
            case 'x':
-               if (*RExC_parse == '{') {
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                        | PERL_SCAN_DISALLOW_PREFIX;
-                   char * const e = strchr(RExC_parse++, '}');
-                    if (!e)
-                        vFAIL("Missing right brace on \\x{}");
-
-                   numlen = e - RExC_parse;
-                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
-                   RExC_parse = e + 1;
-               }
-               else {
-                    I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                   numlen = 2;
-                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
+               RExC_parse--;   /* function expects to be pointed at the 'x' */
+               {
+                   const char* error_msg;
+                   bool valid = grok_bslash_x(RExC_parse,
+                                              &value,
+                                              &numlen,
+                                              &error_msg,
+                                              1);
                    RExC_parse += numlen;
+                   if (! valid) {
+                       vFAIL(error_msg);
+                   }
                }
                if (PL_encoding && value < 0x100)
                    goto recode_encoding;
@@ -10831,15 +11384,20 @@ parseit:
            literal_endpoint++;
 #endif
 
-       if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
-
-           /* What matches in a locale is not known until runtime, so need to
-            * (one time per class) allocate extra space to pass to regexec.
-            * The space will contain a bit for each named class that is to be
-            * matched against.  This isn't needed for \p{} and pseudo-classes,
-            * as they are not affected by locale, and hence are dealt with
-            * separately */
-           if (LOC && namedclass < ANYOF_MAX && ! need_class) {
+            /* What matches in a locale is not known until runtime.  This
+             * includes what the Posix classes (like \w, [:space:]) match.
+             * Room must be reserved (one time per class) to store such
+             * classes, either if Perl is compiled so that locale nodes always
+             * should have this space, or if there is such class info to be
+             * stored.  The space will contain a bit for each named class that
+             * is to be matched against.  This isn't needed for \p{} and
+             * pseudo-classes, as they are not affected by locale, and hence
+             * are dealt with separately */
+           if (LOC
+                && ! need_class
+                && (ANYOF_LOCALE == ANYOF_CLASS
+                    || (namedclass > OOB_NAMEDCLASS && namedclass < ANYOF_MAX)))
+            {
                need_class = 1;
                if (SIZE_ONLY) {
                    RExC_size += ANYOF_CLASS_SKIP - ANYOF_SKIP;
@@ -10851,6 +11409,8 @@ parseit:
                ANYOF_FLAGS(ret) |= ANYOF_CLASS;
            }
 
+       if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
+
            /* a bad range like a-\d, a-[:digit:].  The '-' is taken as a
             * literal, as is the character that began the false range, i.e.
             * the 'a' in the examples */
@@ -10862,51 +11422,75 @@ parseit:
                    ckWARN4reg(RExC_parse,
                               "False [] range \"%*.*s\"",
                               w, w, rangebegin);
-
-                   stored +=
-                         set_regclass_bit(pRExC_state, ret, '-', &l1_fold_invlist, &unicode_alternate);
-                   if (prevvalue < 256) {
-                       stored +=
-                         set_regclass_bit(pRExC_state, ret, (U8) prevvalue, &l1_fold_invlist, &unicode_alternate);
-                   }
-                   else {
-                       nonbitmap = add_cp_to_invlist(nonbitmap, prevvalue);
-                   }
+                    cp_list = add_cp_to_invlist(cp_list, '-');
+                    cp_list = add_cp_to_invlist(cp_list, prevvalue);
                }
 
                range = 0; /* this was not a true range */
+                element_count += 2; /* So counts for three values */
            }
 
-           if (!SIZE_ONLY) {
+           if (SIZE_ONLY) {
+
+                /* In the first pass, do a little extra work so below can
+                 * possibly optimize the whole node to one of the nodes that
+                 * correspond to the classes given below */
+
+                /* The optimization will only take place if there is a single
+                 * element in the class, so can skip if there is more than one
+                 */
+                if (element_count == 1) {
 
                /* Possible truncation here but in some 64-bit environments
                 * the compiler gets heartburn about switch on 64-bit values.
                 * A similar issue a little earlier when switching on value.
                 * --jhi */
+                    switch ((I32)namedclass) {
+                        case ANYOF_ALNUM:
+                        case ANYOF_NALNUM:
+                        case ANYOF_DIGIT:
+                        case ANYOF_NDIGIT:
+                        case ANYOF_SPACE:
+                        case ANYOF_NSPACE:
+                            has_special_charset_op = TRUE;
+                            break;
+
+                        case ANYOF_HORIZWS:
+                        case ANYOF_NHORIZWS:
+                        case ANYOF_VERTWS:
+                        case ANYOF_NVERTWS:
+                            has_special_non_charset_op = TRUE;
+                            break;
+                    }
+                }
+            }
+            else {
                switch ((I32)namedclass) {
 
                case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
                    break;
                case ANYOF_NALNUMC:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv,
+                        runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_ALPHA:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
                    break;
                case ANYOF_NALPHA:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv,
+                        runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_ASCII:
                    if (LOC) {
                        ANYOF_CLASS_SET(ret, namedclass);
                    }
                     else {
-                        _invlist_union(properties, PL_ASCII, &properties);
+                        _invlist_union(posixes, PL_ASCII, &posixes);
                     }
                    break;
                case ANYOF_NASCII:
@@ -10914,58 +11498,65 @@ parseit:
                        ANYOF_CLASS_SET(ret, namedclass);
                    }
                     else {
-                        _invlist_union_complement_2nd(properties,
-                                                    PL_ASCII, &properties);
+                        _invlist_union_complement_2nd(posixes,
+                                                    PL_ASCII, &posixes);
                         if (DEPENDS_SEMANTICS) {
                             ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
                         }
                     }
                    break;
                case ANYOF_BLANK:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixBlank, PL_XPosixBlank);
                    break;
                case ANYOF_NBLANK:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixBlank, PL_XPosixBlank);
                    break;
                case ANYOF_CNTRL:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixCntrl, PL_XPosixCntrl);
                    break;
                case ANYOF_NCNTRL:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixCntrl, PL_XPosixCntrl);
                    break;
                case ANYOF_DIGIT:
-                   /* Ignore the compiler warning for this macro, planned to
-                    * be eliminated later */
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
+                   /* There are no digits in the Latin1 range outside of
+                    * ASCII, so call the macro that doesn't have to resolve
+                    * them */
+                   DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, posixes,
+                        PL_PosixDigit, "XPosixDigit", listsv);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_NDIGIT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv,
+                        runtime_posix_matches_above_Unicode);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_GRAPH:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
                    break;
                case ANYOF_NGRAPH:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv,
+                        runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_HORIZWS:
-                   /* For these, we use the nonbitmap, as /d doesn't make a
+                   /* For these, we use the cp_list, as /d doesn't make a
                     * difference in what these match.  There would be problems
                     * if these characters had folds other than themselves, as
-                    * nonbitmap is subject to folding.  It turns out that \h
+                    * cp_list is subject to folding.  It turns out that \h
                     * is just a synonym for XPosixBlank */
-                   _invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap);
+                   _invlist_union(cp_list, PL_XPosixBlank, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_NHORIZWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                 PL_XPosixBlank, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                 PL_XPosixBlank, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_LOWER:
                case ANYOF_NLOWER:
@@ -10988,46 +11579,51 @@ parseit:
                        Xname = "XPosixLower";
                    }
                    if (namedclass == ANYOF_LOWER) {
-                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                     ascii_source, l1_source, Xname, listsv);
                    }
                    else {
                        DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                            properties, ascii_source, l1_source, Xname, listsv);
+                            posixes, ascii_source, l1_source, Xname, listsv,
+                            runtime_posix_matches_above_Unicode);
                    }
                    break;
                }
                case ANYOF_PRINT:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
                    break;
                case ANYOF_NPRINT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv,
+                        runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_PUNCT:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                         PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
                    break;
                case ANYOF_NPUNCT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv,
+                        runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_PSXSPC:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixSpace, PL_XPosixSpace);
                    break;
                case ANYOF_NPSXSPC:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixSpace, PL_XPosixSpace);
                    break;
                case ANYOF_SPACE:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PerlSpace, PL_XPerlSpace);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_NSPACE:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PerlSpace, PL_XPerlSpace);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_UPPER:   /* Same as LOWER, above */
                case ANYOF_NUPPER:
@@ -11047,40 +11643,46 @@ parseit:
                        Xname = "XPosixUpper";
                    }
                    if (namedclass == ANYOF_UPPER) {
-                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                     ascii_source, l1_source, Xname, listsv);
                    }
                    else {
                        DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                        properties, ascii_source, l1_source, Xname, listsv);
+                        posixes, ascii_source, l1_source, Xname, listsv,
+                        runtime_posix_matches_above_Unicode);
                    }
                    break;
                }
                case ANYOF_ALNUM:   /* Really is 'Word' */
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                             PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_NALNUM:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv,
+                            runtime_posix_matches_above_Unicode);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_VERTWS:
-                   /* For these, we use the nonbitmap, as /d doesn't make a
+                   /* For these, we use the cp_list, as /d doesn't make a
                     * difference in what these match.  There would be problems
                     * if these characters had folds other than themselves, as
-                    * nonbitmap is subject to folding */
-                   _invlist_union(nonbitmap, PL_VertSpace, &nonbitmap);
+                    * cp_list is subject to folding */
+                   _invlist_union(cp_list, PL_VertSpace, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_NVERTWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                    PL_VertSpace, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                    PL_VertSpace, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_XDIGIT:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixXDigit, PL_XPosixXDigit);
                    break;
                case ANYOF_NXDIGIT:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixXDigit, PL_XPosixXDigit);
                    break;
                case ANYOF_MAX:
@@ -11120,9 +11722,8 @@ parseit:
                               "False [] range \"%*.*s\"",
                               w, w, rangebegin);
                    }
-                   if (!SIZE_ONLY)
-                       stored +=
-                            set_regclass_bit(pRExC_state, ret, '-', &l1_fold_invlist, &unicode_alternate);
+                    if (!SIZE_ONLY)
+                        cp_list = add_cp_to_invlist(cp_list, '-');
                } else
                    range = 1;  /* yeah, it's a range! */
                continue;       /* but do it the next time */
@@ -11137,117 +11738,344 @@ parseit:
 
        /* now is the next time */
        if (!SIZE_ONLY) {
-           if (prevvalue < 256) {
-               const IV ceilvalue = value < 256 ? value : 255;
-               IV i;
-#ifdef EBCDIC
-               /* In EBCDIC [\x89-\x91] should include
-                * the \x8e but [i-j] should not. */
-               if (literal_endpoint == 2 &&
-                   ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-                    (isUPPER(prevvalue) && isUPPER(ceilvalue))))
-               {
-                   if (isLOWER(prevvalue)) {
-                       for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isLOWER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
-                               stored +=
-                                  set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-                           }
-                   } else {
-                       for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isUPPER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
-                               stored +=
-                                  set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-                           }
-                   }
-               }
-               else
-#endif
-                     for (i = prevvalue; i <= ceilvalue; i++) {
-                       stored += set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-                     }
-         }
-         if (value > 255) {
-           const UV prevnatvalue  = NATIVE_TO_UNI(prevvalue);
-           const UV natvalue      = NATIVE_TO_UNI(value);
-           nonbitmap = add_range_to_invlist(nonbitmap, prevnatvalue, natvalue);
-       }
-#ifdef EBCDIC
-           literal_endpoint = 0;
+#ifndef EBCDIC
+            cp_list = _add_range_to_invlist(cp_list, prevvalue, value);
+#else
+            UV* this_range = _new_invlist(1);
+            _append_range_to_invlist(this_range, prevvalue, value);
+
+            /* In EBCDIC, the ranges 'A-Z' and 'a-z' are each not contiguous.
+             * If this range was specified using something like 'i-j', we want
+             * to include only the 'i' and the 'j', and not anything in
+             * between, so exclude non-ASCII, non-alphabetics from it.
+             * However, if the range was specified with something like
+             * [\x89-\x91] or [\x89-j], all code points within it should be
+             * included.  literal_endpoint==2 means both ends of the range used
+             * a literal character, not \x{foo} */
+           if (literal_endpoint == 2
+                && (prevvalue >= 'a' && value <= 'z')
+                    || (prevvalue >= 'A' && value <= 'Z'))
+            {
+                _invlist_intersection(this_range, PL_ASCII, &this_range, );
+                _invlist_intersection(this_range, PL_Alpha, &this_range, );
+            }
+            _invlist_union(cp_list, this_range, &cp_list);
+            literal_endpoint = 0;
 #endif
         }
 
        range = 0; /* this range (if it was one) is done now */
     }
 
+    /* [\w] can be optimized into \w, but not if there is anything else in the
+     * brackets (except for an initial '^' which indictes omplementing).  We
+     * also can optimize the common special case /[0-9]/ into /\d/a */
+    if (element_count == 1 &&
+        (has_special_charset_op
+         || has_special_non_charset_op
+         || (prevvalue == '0' && value == '9')))
+    {
+        U8 op;
+        const char * cur_parse = RExC_parse;
+
+        if (has_special_charset_op) {
+            U8 offset = get_regex_charset(RExC_flags);
+
+            /* /aa is the same as /a for these */
+            if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                offset = REGEX_ASCII_RESTRICTED_CHARSET;
+            }
+            switch ((I32)namedclass) {
+                case ANYOF_NALNUM:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_ALNUM:
+                    op = ALNUM;
+                    break;
+                case ANYOF_NSPACE:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_SPACE:
+                    op = SPACE;
+                    break;
+                case ANYOF_NDIGIT:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_DIGIT:
+                    op = DIGIT;
+
+                    /* There is no DIGITU */
+                    if (offset == REGEX_UNICODE_CHARSET) {
+                        offset = REGEX_DEPENDS_CHARSET;
+                    }
+                    break;
+                default:
+                    Perl_croak(aTHX_ "panic: Named character class %"IVdf" is not expected to have a non-[...] version", namedclass);
+            }
+
+            /* The number of varieties of each of these is the same, hence, so
+             * is the delta between the normal and complemented nodes */
+            if (invert) {
+                offset += NALNUM - ALNUM;
+            }
+
+            op += offset;
+        }
+        else if (has_special_non_charset_op) {
+            switch ((I32)namedclass) {
+                case ANYOF_NHORIZWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_HORIZWS:
+                    op = HORIZWS;
+                    break;
+                case ANYOF_NVERTWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_VERTWS:
+                    op = VERTWS;
+                    break;
+                default:
+                    Perl_croak(aTHX_ "panic: Named character class %"IVdf" is not expected to have a non-[...] version", namedclass);
+            }
+
+            /* The complement version of each of these nodes is adjacently next
+             * */
+            if (invert) {
+                op++;
+            }
+        }
+        else {  /* The remaining possibility is [0-9] */
+            op = (invert) ? NDIGITA : DIGITA;
+        }
 
+        /* Throw away this ANYOF regnode, and emit the calculated one, which
+         * should correspond to the beginning, not current, state of the parse
+         */
+        RExC_parse = (char *)orig_parse;
+        RExC_emit = (regnode *)orig_emit;
+        ret = reg_node(pRExC_state, op);
+        RExC_parse = (char *) cur_parse;
+
+        SvREFCNT_dec(listsv);
+        return ret;
+    }
 
     if (SIZE_ONLY)
         return ret;
     /****** !SIZE_ONLY AFTER HERE *********/
 
-    /* If folding and there are code points above 255, we calculate all
-     * characters that could fold to or from the ones already on the list */
-    if (FOLD && nonbitmap) {
+    /* If folding, we calculate all characters that could fold to or from the
+     * ones already on the list */
+    if (FOLD && cp_list) {
        UV start, end;  /* End points of code point ranges */
 
        SV* fold_intersection = NULL;
 
-       /* This is a list of all the characters that participate in folds
-           * (except marks, etc in multi-char folds */
-       if (! PL_utf8_foldable) {
-           SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
-           PL_utf8_foldable = _swash_to_invlist(swash);
-            SvREFCNT_dec(swash);
-       }
+        /* In the Latin1 range, the characters that can be folded-to or -from
+         * are precisely the alphabetic characters.  If the highest code point
+         * is within Latin1, we can use the compiled-in list, and not have to
+         * go out to disk. */
+        if (invlist_highest(cp_list) < 256) {
+            _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
+        }
+        else {
 
-       /* This is a hash that for a particular fold gives all characters
-           * that are involved in it */
-       if (! PL_utf8_foldclosures) {
+            /* This is a list of all the characters that participate in folds
+             * (except marks, etc in multi-char folds */
+            if (! PL_utf8_foldable) {
+                SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
+                PL_utf8_foldable = _swash_to_invlist(swash);
+                SvREFCNT_dec(swash);
+            }
 
-           /* If we were unable to find any folds, then we likely won't be
-            * able to find the closures.  So just create an empty list.
-            * Folding will effectively be restricted to the non-Unicode rules
-            * hard-coded into Perl.  (This case happens legitimately during
-            * compilation of Perl itself before the Unicode tables are
-            * generated) */
-           if (invlist_len(PL_utf8_foldable) == 0) {
-               PL_utf8_foldclosures = newHV();
-           } else {
-               /* If the folds haven't been read in, call a fold function
-                   * to force that */
-               if (! PL_utf8_tofold) {
-                   U8 dummy[UTF8_MAXBYTES+1];
-                   STRLEN dummy_len;
-
-                   /* This particular string is above \xff in both UTF-8 and
-                    * UTFEBCDIC */
-                   to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
-                   assert(PL_utf8_tofold); /* Verify that worked */
-               }
-               PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
-           }
-       }
+            /* This is a hash that for a particular fold gives all characters
+             * that are involved in it */
+            if (! PL_utf8_foldclosures) {
+
+                /* If we were unable to find any folds, then we likely won't be
+                 * able to find the closures.  So just create an empty list.
+                 * Folding will effectively be restricted to the non-Unicode
+                 * rules hard-coded into Perl.  (This case happens legitimately
+                 * during compilation of Perl itself before the Unicode tables
+                 * are generated) */
+                if (invlist_len(PL_utf8_foldable) == 0) {
+                    PL_utf8_foldclosures = newHV();
+                }
+                else {
+                    /* If the folds haven't been read in, call a fold function
+                     * to force that */
+                    if (! PL_utf8_tofold) {
+                        U8 dummy[UTF8_MAXBYTES+1];
+                        STRLEN dummy_len;
+
+                        /* This particular string is above \xff in both UTF-8
+                         * and UTFEBCDIC */
+                        to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+                        assert(PL_utf8_tofold); /* Verify that worked */
+                    }
+                    PL_utf8_foldclosures =
+                                        _swash_inversion_hash(PL_utf8_tofold);
+                }
+            }
 
-       /* Only the characters in this class that participate in folds need be
-        * checked.  Get the intersection of this class and all the possible
-        * characters that are foldable.  This can quickly narrow down a large
-        * class */
-       _invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
+            /* Only the characters in this class that participate in folds need
+             * be checked.  Get the intersection of this class and all the
+             * possible characters that are foldable.  This can quickly narrow
+             * down a large class */
+            _invlist_intersection(PL_utf8_foldable, cp_list,
+                                  &fold_intersection);
+        }
 
        /* Now look at the foldable characters in this class individually */
        invlist_iterinit(fold_intersection);
        while (invlist_iternext(fold_intersection, &start, &end)) {
            UV j;
 
+            /* Locale folding for Latin1 characters is deferred until runtime */
+            if (LOC && start < 256) {
+                start = 256;
+            }
+
            /* Look at every character in the range */
            for (j = start; j <= end; j++) {
 
-               /* Get its fold */
                U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                STRLEN foldlen;
-               const UV f =
-                    _to_uni_fold_flags(j, foldbuf, &foldlen, allow_full_fold);
+                UV f;
+
+                if (j < 256) {
+
+                    /* We have the latin1 folding rules hard-coded here so that
+                     * an innocent-looking character class, like /[ks]/i won't
+                     * have to go out to disk to find the possible matches.
+                     * XXX It would be better to generate these via regen, in
+                     * case a new version of the Unicode standard adds new
+                     * mappings, though that is not really likely, and may be
+                     * caught by the default: case of the switch below. */
+
+                    if (PL_fold_latin1[j] != j) {
+
+                        /* ASCII is always matched; non-ASCII is matched only
+                         * under Unicode rules */
+                        if (isASCII(j) || AT_LEAST_UNI_SEMANTICS) {
+                            cp_list =
+                                add_cp_to_invlist(cp_list, PL_fold_latin1[j]);
+                        }
+                        else {
+                            depends_list =
+                                add_cp_to_invlist(depends_list, PL_fold_latin1[j]);
+                        }
+                    }
+
+                    if (HAS_NONLATIN1_FOLD_CLOSURE(j)
+                        && (! isASCII(j) || ! MORE_ASCII_RESTRICTED))
+                    {
+                        /* Certain Latin1 characters have matches outside
+                         * Latin1, or are multi-character.  To get here, 'j' is
+                         * one of those characters.   None of these matches is
+                         * valid for ASCII characters under /aa, which is why
+                         * the 'if' just above excludes those.  The matches
+                         * fall into three categories:
+                         * 1) They are singly folded-to or -from an above 255
+                         *    character, e.g., LATIN SMALL LETTER Y WITH
+                         *    DIAERESIS and LATIN CAPITAL LETTER Y WITH
+                         *    DIAERESIS;
+                         * 2) They are part of a multi-char fold with another
+                         *    latin1 character; only LATIN SMALL LETTER
+                         *    SHARP S => "ss" fits this;
+                         * 3) They are part of a multi-char fold with a
+                         *    character outside of Latin1, such as various
+                         *    ligatures.
+                        * We aren't dealing fully with multi-char folds, except
+                        * we do deal with the pattern containing a character
+                        * that has a multi-char fold (not so much the inverse).
+                        * For types 1) and 3), the matches only happen when the
+                        * target string is utf8; that's not true for 2), and we
+                        * set a flag for it.
+                        *
+                        * The code below adds the single fold closures for 'j'
+                        * to the inversion list. */
+                        switch (j) {
+                            case 'k':
+                            case 'K':
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, KELVIN_SIGN);
+                                break;
+                            case 's':
+                            case 'S':
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    LATIN_SMALL_LETTER_LONG_S);
+                                break;
+                            case MICRO_SIGN:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    GREEK_SMALL_LETTER_MU);
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    GREEK_CAPITAL_LETTER_MU);
+                                break;
+                            case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
+                            case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, ANGSTROM_SIGN);
+                                break;
+                            case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                        LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
+                                break;
+                            case LATIN_SMALL_LETTER_SHARP_S:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                LATIN_CAPITAL_LETTER_SHARP_S);
+
+                                /* Under /a, /d, and /u, this can match the two
+                                 * chars "ss" */
+                                if (! MORE_ASCII_RESTRICTED) {
+                                    add_alternate(&unicode_alternate,
+                                                  (U8 *) "ss", 2);
+
+                                    /* And under /u or /a, it can match even if
+                                     * the target is not utf8 */
+                                    if (AT_LEAST_UNI_SEMANTICS) {
+                                        ANYOF_FLAGS(ret) |=
+                                                    ANYOF_NONBITMAP_NON_UTF8;
+                                    }
+                                }
+                                break;
+                            case 'F': case 'f':
+                            case 'I': case 'i':
+                            case 'L': case 'l':
+                            case 'T': case 't':
+                            case 'A': case 'a':
+                            case 'H': case 'h':
+                            case 'J': case 'j':
+                            case 'N': case 'n':
+                            case 'W': case 'w':
+                            case 'Y': case 'y':
+                                /* These all are targets of multi-character
+                                 * folds from code points that require UTF8 to
+                                 * express, so they can't match unless the
+                                 * target string is in UTF-8, so no action here
+                                 * is necessary, as regexec.c properly handles
+                                 * the general case for UTF-8 matching */
+                                break;
+                            default:
+                                /* Use deprecated warning to increase the
+                                 * chances of this being output */
+                                ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%"UVXf"; please use the perlbug utility to report;", j);
+                                break;
+                        }
+                    }
+                    continue;
+                }
+
+                /* Here is an above Latin1 character.  We don't have the rules
+                 * hard-coded for it.  First, get its fold */
+               f = _to_uni_fold_flags(j, foldbuf, &foldlen,
+                                    ((allow_full_fold) ? FOLD_FLAGS_FULL : 0)
+                                    | ((LOC)
+                                        ? FOLD_FLAGS_LOCALE
+                                        : (MORE_ASCII_RESTRICTED)
+                                            ? FOLD_FLAGS_NOMIX_ASCII
+                                            : 0));
 
                if (foldlen > (STRLEN)UNISKIP(f)) {
 
@@ -11263,54 +12091,25 @@ parseit:
 
                        /* If any of the folded characters of this are in the
                         * Latin1 range, tell the regex engine that this can
-                        * match a non-utf8 target string.  The only multi-byte
-                        * fold whose source is in the Latin1 range (U+00DF)
-                        * applies only when the target string is utf8, or
-                        * under unicode rules */
-                       if (j > 255 || AT_LEAST_UNI_SEMANTICS) {
-                           while (loc < e) {
-
-                               /* Can't mix ascii with non- under /aa */
-                               if (MORE_ASCII_RESTRICTED
-                                   && (isASCII(*loc) != isASCII(j)))
-                               {
-                                   goto end_multi_fold;
-                               }
-                               if (UTF8_IS_INVARIANT(*loc)
-                                   || UTF8_IS_DOWNGRADEABLE_START(*loc))
-                               {
-                                    /* Can't mix above and below 256 under LOC
-                                     */
-                                   if (LOC) {
-                                       goto end_multi_fold;
-                                   }
-                                   ANYOF_FLAGS(ret)
-                                           |= ANYOF_NONBITMAP_NON_UTF8;
-                                   break;
-                               }
-                               loc += UTF8SKIP(loc);
-                           }
-                       }
+                        * match a non-utf8 target string.  */
+                        while (loc < e) {
+                            if (UTF8_IS_INVARIANT(*loc)
+                                || UTF8_IS_DOWNGRADEABLE_START(*loc))
+                            {
+                                ANYOF_FLAGS(ret)
+                                        |= ANYOF_NONBITMAP_NON_UTF8;
+                                break;
+                            }
+                            loc += UTF8SKIP(loc);
+                        }
 
                        add_alternate(&unicode_alternate, foldbuf, foldlen);
-                   end_multi_fold: ;
-                   }
-
-                   /* This is special-cased, as it is the only letter which
-                    * has both a multi-fold and single-fold in Latin1.  All
-                    * the other chars that have single and multi-folds are
-                    * always in utf8, and the utf8 folding algorithm catches
-                    * them */
-                   if (! LOC && j == LATIN_CAPITAL_LETTER_SHARP_S) {
-                       stored += set_regclass_bit(pRExC_state,
-                                       ret,
-                                       LATIN_SMALL_LETTER_SHARP_S,
-                                       &l1_fold_invlist, &unicode_alternate);
                    }
                }
-               else {
-                   /* Single character fold.  Add everything in its fold
-                    * closure to the list that this node should match */
+                else {
+                    /* Single character fold of above Latin1.  Add everything
+                     * in its fold closure to the list that this node should
+                     * match */
                    SV** listp;
 
                    /* The fold closures data structure is a hash with the keys
@@ -11333,90 +12132,174 @@ parseit:
                            /* /aa doesn't allow folds between ASCII and non-;
                             * /l doesn't allow them between above and below
                             * 256 */
-                           if ((MORE_ASCII_RESTRICTED
-                                && (isASCII(c) != isASCII(j)))
-                                   || (LOC && ((c < 256) != (j < 256))))
+                           if ((MORE_ASCII_RESTRICTED && (isASCII(c) != isASCII(j)))
+                               || (LOC && ((c < 256) != (j < 256))))
                            {
                                continue;
                            }
 
-                           if (c < 256 && AT_LEAST_UNI_SEMANTICS) {
-                               stored += set_regclass_bit(pRExC_state,
-                                       ret,
-                                       (U8) c,
-                                       &l1_fold_invlist, &unicode_alternate);
-                           }
-                               /* It may be that the code point is already in
-                                * this range or already in the bitmap, in
-                                * which case we need do nothing */
-                           else if ((c < start || c > end)
-                                       && (c > 255
-                                           || ! ANYOF_BITMAP_TEST(ret, c)))
-                           {
-                               nonbitmap = add_cp_to_invlist(nonbitmap, c);
+                            /* Folds involving non-ascii Latin1 characters
+                             * under /d are added to a separate list */
+                           if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+                            {
+                               cp_list = add_cp_to_invlist(cp_list, c);
+                            }
+                            else {
+                                depends_list = add_cp_to_invlist(depends_list, c);
                            }
                        }
                    }
                }
-           }
+            }
        }
        SvREFCNT_dec(fold_intersection);
     }
 
-    /* Combine the two lists into one. */
-    if (l1_fold_invlist) {
-       if (nonbitmap) {
-           _invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
-           SvREFCNT_dec(l1_fold_invlist);
-       }
-       else {
-           nonbitmap = l1_fold_invlist;
-       }
+    /* And combine the result (if any) with any inversion list from posix
+     * classes.  The lists are kept separate up to now because we don't want to
+     * fold the classes */
+    if (posixes) {
+        if (AT_LEAST_UNI_SEMANTICS) {
+            if (cp_list) {
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
+            }
+            else {
+                cp_list = posixes;
+            }
+        }
+        else {
+
+            /* Under /d, we put into a separate list the Latin1 things that
+             * match only when the target string is utf8 */
+            SV* nonascii_but_latin1_properties = NULL;
+            _invlist_intersection(posixes, PL_Latin1,
+                                  &nonascii_but_latin1_properties);
+            _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
+                              &nonascii_but_latin1_properties);
+            _invlist_subtract(posixes, nonascii_but_latin1_properties,
+                              &posixes);
+            if (cp_list) {
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
+            }
+            else {
+                cp_list = posixes;
+            }
+
+            if (depends_list) {
+                _invlist_union(depends_list, nonascii_but_latin1_properties,
+                               &depends_list);
+                SvREFCNT_dec(nonascii_but_latin1_properties);
+            }
+            else {
+                depends_list = nonascii_but_latin1_properties;
+            }
+        }
     }
 
     /* And combine the result (if any) with any inversion list from properties.
-     * The lists are kept separate up to now because we don't want to fold the
-     * properties */
+     * The lists are kept separate up to now so that we can distinguish the two
+     * in regards to matching above-Unicode.  A run-time warning is generated
+     * if a Unicode property is matched against a non-Unicode code point. But,
+     * we allow user-defined properties to match anything, without any warning,
+     * and we also suppress the warning if there is a portion of the character
+     * class that isn't a Unicode property, and which matches above Unicode, \W
+     * or [\x{110000}] for example.
+     * (Note that in this case, unlike the Posix one above, there is no
+     * <depends_list>, because having a Unicode property forces Unicode
+     * semantics */
     if (properties) {
-       if (nonbitmap) {
-           _invlist_union(nonbitmap, properties, &nonbitmap);
-           SvREFCNT_dec(properties);
-       }
-       else {
-           nonbitmap = properties;
-       }
+        bool warn_super = ! has_user_defined_property;
+        if (cp_list) {
+
+            /* If it matters to the final outcome, see if a non-property
+             * component of the class matches above Unicode.  If so, the
+             * warning gets suppressed.  This is true even if just a single
+             * such code point is specified, as though not strictly correct if
+             * another such code point is matched against, the fact that they
+             * are using above-Unicode code points indicates they should know
+             * the issues involved */
+            if (warn_super) {
+                bool non_prop_matches_above_Unicode =
+                            runtime_posix_matches_above_Unicode
+                            | (invlist_highest(cp_list) > PERL_UNICODE_MAX);
+                if (invert) {
+                    non_prop_matches_above_Unicode =
+                                            !  non_prop_matches_above_Unicode;
+                }
+                warn_super = ! non_prop_matches_above_Unicode;
+            }
+
+            _invlist_union(properties, cp_list, &cp_list);
+            SvREFCNT_dec(properties);
+        }
+        else {
+            cp_list = properties;
+        }
+
+        if (warn_super) {
+            ANYOF_FLAGS(ret) |= ANYOF_WARN_SUPER;
+        }
     }
 
-    /* Here, <nonbitmap> contains all the code points we can determine at
-     * compile time that we haven't put into the bitmap.  Go through it, and
-     * for things that belong in the bitmap, put them there, and delete from
-     * <nonbitmap> */
-    if (nonbitmap) {
+    /* Here, we have calculated what code points should be in the character
+     * class.
+     *
+     * Now we can see about various optimizations.  Fold calculation (which we
+     * did above) needs to take place before inversion.  Otherwise /[^k]/i
+     * would invert to include K, which under /i would match k, which it
+     * shouldn't. */
+
+    /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
+     * set the FOLD flag yet, so this does optimize those.  It doesn't
+     * optimize locale.  Doing so perhaps could be done as long as there is
+     * nothing like \w in it; some thought also would have to be given to the
+     * interaction with above 0x100 chars */
+    if (invert
+        && ! LOC
+       && ! depends_list
+       && ! unicode_alternate
+       && SvCUR(listsv) == initial_listsv_len)
+    {
+        _invlist_invert(cp_list);
 
-       /* Above-ASCII code points in /d have to stay in <nonbitmap>, as they
-        * possibly only should match when the target string is UTF-8 */
-       UV max_cp_to_set = (DEPENDS_SEMANTICS) ? 127 : 255;
+        /* Any swash can't be used as-is, because we've inverted things */
+        if (swash) {
+            SvREFCNT_dec(swash);
+            swash = NULL;
+        }
+
+       /* Clear the invert flag since have just done it here */
+       invert = FALSE;
+    }
+
+    /* Here, <cp_list> contains all the code points we can determine at
+     * compile time that match under all conditions.  Go through it, and
+     * for things that belong in the bitmap, put them there, and delete from
+     * <cp_list> */
+    if (cp_list) {
 
        /* This gets set if we actually need to modify things */
        bool change_invlist = FALSE;
 
        UV start, end;
 
-       /* Start looking through <nonbitmap> */
-       invlist_iterinit(nonbitmap);
-       while (invlist_iternext(nonbitmap, &start, &end)) {
+       /* Start looking through <cp_list> */
+       invlist_iterinit(cp_list);
+       while (invlist_iternext(cp_list, &start, &end)) {
            UV high;
            int i;
 
            /* Quit if are above what we should change */
-           if (start > max_cp_to_set) {
+           if (start > 255) {
                break;
            }
 
            change_invlist = TRUE;
 
            /* Set all the bits in the range, up to the max that we are doing */
-           high = (end < max_cp_to_set) ? end : max_cp_to_set;
+           high = (end < 255) ? end : 255;
            for (i = start; i <= (int) high; i++) {
                if (! ANYOF_BITMAP_TEST(ret, i)) {
                    ANYOF_BITMAP_SET(ret, i);
@@ -11428,140 +12311,31 @@ parseit:
        }
 
         /* Done with loop; remove any code points that are in the bitmap from
-         * <nonbitmap> */
+         * <cp_list> */
        if (change_invlist) {
-           _invlist_subtract(nonbitmap,
-                             (DEPENDS_SEMANTICS)
-                               ? PL_ASCII
-                               : PL_Latin1,
-                              &nonbitmap);
+           _invlist_subtract(cp_list, PL_Latin1, &cp_list);
        }
 
        /* If have completely emptied it, remove it completely */
-       if (invlist_len(nonbitmap) == 0) {
-           SvREFCNT_dec(nonbitmap);
-           nonbitmap = NULL;
+       if (invlist_len(cp_list) == 0) {
+           SvREFCNT_dec(cp_list);
+           cp_list = NULL;
        }
     }
 
-    /* Here, we have calculated what code points should be in the character
-     * class.  <nonbitmap> does not overlap the bitmap except possibly in the
-     * case of DEPENDS rules.
-     *
-     * Now we can see about various optimizations.  Fold calculation (which we
-     * did above) needs to take place before inversion.  Otherwise /[^k]/i
-     * would invert to include K, which under /i would match k, which it
-     * shouldn't. */
+    if (invert) {
+        ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+    }
 
-    /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
-     * set the FOLD flag yet, so this does optimize those.  It doesn't
-     * optimize locale.  Doing so perhaps could be done as long as there is
-     * nothing like \w in it; some thought also would have to be given to the
-     * interaction with above 0x100 chars */
-    if ((ANYOF_FLAGS(ret) & ANYOF_INVERT)
-        && ! LOC
-       && ! unicode_alternate
-       /* In case of /d, there are some things that should match only when in
-        * not in the bitmap, i.e., they require UTF8 to match.  These are
-        * listed in nonbitmap, but if ANYOF_NONBITMAP_NON_UTF8 is set in this
-        * case, they don't require UTF8, so can invert here */
-       && (! nonbitmap
-           || ! DEPENDS_SEMANTICS
-           || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
-       && SvCUR(listsv) == initial_listsv_len)
-    {
-       int i;
-       if (! nonbitmap) {
-           for (i = 0; i < 256; ++i) {
-               if (ANYOF_BITMAP_TEST(ret, i)) {
-                   ANYOF_BITMAP_CLEAR(ret, i);
-               }
-               else {
-                   ANYOF_BITMAP_SET(ret, i);
-                   prevvalue = value;
-                   value = i;
-               }
-           }
-           /* The inversion means that everything above 255 is matched */
-           ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
+    /* Combine the two lists into one. */
+    if (depends_list) {
+       if (cp_list) {
+           _invlist_union(cp_list, depends_list, &cp_list);
+           SvREFCNT_dec(depends_list);
        }
        else {
-           /* Here, also has things outside the bitmap that may overlap with
-            * the bitmap.  We have to sync them up, so that they get inverted
-            * in both places.  Earlier, we removed all overlaps except in the
-            * case of /d rules, so no syncing is needed except for this case
-            */
-           SV *remove_list = NULL;
-
-           if (DEPENDS_SEMANTICS) {
-               UV start, end;
-
-               /* Set the bits that correspond to the ones that aren't in the
-                * bitmap.  Otherwise, when we invert, we'll miss these.
-                * Earlier, we removed from the nonbitmap all code points
-                * < 128, so there is no extra work here */
-               invlist_iterinit(nonbitmap);
-               while (invlist_iternext(nonbitmap, &start, &end)) {
-                   if (start > 255) {  /* The bit map goes to 255 */
-                       break;
-                   }
-                   if (end > 255) {
-                       end = 255;
-                   }
-                   for (i = start; i <= (int) end; ++i) {
-                       ANYOF_BITMAP_SET(ret, i);
-                       prevvalue = value;
-                       value = i;
-                   }
-               }
-           }
-
-           /* Now invert both the bitmap and the nonbitmap.  Anything in the
-            * bitmap has to also be removed from the non-bitmap, but again,
-            * there should not be overlap unless is /d rules. */
-           _invlist_invert(nonbitmap);
-
-           /* Any swash can't be used as-is, because we've inverted things */
-           if (swash) {
-               SvREFCNT_dec(swash);
-               swash = NULL;
-           }
-
-           for (i = 0; i < 256; ++i) {
-               if (ANYOF_BITMAP_TEST(ret, i)) {
-                   ANYOF_BITMAP_CLEAR(ret, i);
-                   if (DEPENDS_SEMANTICS) {
-                       if (! remove_list) {
-                           remove_list = _new_invlist(2);
-                       }
-                       remove_list = add_cp_to_invlist(remove_list, i);
-                   }
-               }
-               else {
-                   ANYOF_BITMAP_SET(ret, i);
-                   prevvalue = value;
-                   value = i;
-               }
-           }
-
-           /* And do the removal */
-           if (DEPENDS_SEMANTICS) {
-               if (remove_list) {
-                   _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
-                   SvREFCNT_dec(remove_list);
-               }
-           }
-           else {
-               /* There is no overlap for non-/d, so just delete anything
-                * below 256 */
-               _invlist_intersection(nonbitmap, PL_AboveLatin1, &nonbitmap);
-           }
+           cp_list = depends_list;
        }
-
-       stored = 256 - stored;
-
-       /* Clear the invert flag since have just done it here */
-       ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
     }
 
     /* Folding in the bitmap is taken care of above, but not for locale (for
@@ -11571,7 +12345,7 @@ parseit:
      * run-time fold flag for these */
     if (FOLD && (LOC
                || (DEPENDS_SEMANTICS
-                   && nonbitmap
+                   && cp_list
                    && ! (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
                || unicode_alternate))
     {
@@ -11592,7 +12366,7 @@ parseit:
      * characters which only have the two folds; so things like 'fF' and 'Ii'
      * wouldn't work because they are part of the fold of 'LATIN SMALL LIGATURE
      * FI'. */
-    if (! nonbitmap
+    if (! cp_list
        && ! unicode_alternate
        && SvCUR(listsv) == initial_listsv_len
        && ! (ANYOF_FLAGS(ret) & (ANYOF_INVERT|ANYOF_UNICODE_ALL))
@@ -11679,7 +12453,7 @@ parseit:
        SvREFCNT_dec(swash);
        swash = NULL;
     }
-    if (! nonbitmap
+    if (! cp_list
        && SvCUR(listsv) == initial_listsv_len
        && ! unicode_alternate)
     {
@@ -11696,10 +12470,10 @@ parseit:
         *       swash is stored there now.
         * av[2] stores the multicharacter foldings, used later in
         *       regexec.c:S_reginclass().
-        * av[3] stores the nonbitmap inversion list for use in addition or
-        *       instead of av[0]; not used if av[1] isn't NULL
+        * av[3] stores the cp_list inversion list for use in addition or
+        *       instead of av[0]; used only if av[1] is NULL
         * av[4] is set if any component of the class is from a user-defined
-        *       property; not used if av[1] isn't NULL */
+        *       property; used only if av[1] is NULL */
        AV * const av = newAV();
        SV *rv;
 
@@ -11708,12 +12482,12 @@ parseit:
                        : listsv);
        if (swash) {
            av_store(av, 1, swash);
-           SvREFCNT_dec(nonbitmap);
+           SvREFCNT_dec(cp_list);
        }
        else {
            av_store(av, 1, NULL);
-           if (nonbitmap) {
-               av_store(av, 3, nonbitmap);
+           if (cp_list) {
+               av_store(av, 3, cp_list);
                av_store(av, 4, newSVuv(has_user_defined_property));
            }
        }
@@ -12704,6 +13478,7 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     SvREFCNT_dec(r->saved_copy);
 #endif
     Safefree(r->offs);
+    SvREFCNT_dec(r->qr_anoncv);
 }
 
 /*  reg_temp_copy()
@@ -12728,7 +13503,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
 {
     struct regexp *ret;
     struct regexp *const r = (struct regexp *)SvANY(rx);
-    register const I32 npar = r->nparens+1;
 
     PERL_ARGS_ASSERT_REG_TEMP_COPY;
 
@@ -12748,8 +13522,11 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
     SvLEN_set(ret_x, 0);
     SvSTASH_set(ret_x, NULL);
     SvMAGIC_set(ret_x, NULL);
-    Newx(ret->offs, npar, regexp_paren_pair);
-    Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    if (r->offs) {
+        const I32 npar = r->nparens+1;
+        Newx(ret->offs, npar, regexp_paren_pair);
+        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    }
     if (r->substrs) {
         Newx(ret->substrs, 1, struct reg_substr_data);
        StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
@@ -12767,6 +13544,7 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
     ret->saved_copy = NULL;
 #endif
     ret->mother_re = rx;
+    SvREFCNT_inc_void(ret->qr_anoncv);
     
     return ret_x;
 }
@@ -12809,16 +13587,21 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
     if (ri->u.offsets)
         Safefree(ri->u.offsets);             /* 20010421 MJD */
 #endif
+    if (ri->code_blocks) {
+       int n;
+       for (n = 0; n < ri->num_code_blocks; n++)
+           SvREFCNT_dec(ri->code_blocks[n].src_regex);
+       Safefree(ri->code_blocks);
+    }
+
     if (ri->data) {
        int n = ri->data->count;
-       PAD* new_comppad = NULL;
-       PAD* old_comppad;
-       PADOFFSET refcnt;
 
        while (--n >= 0) {
           /* If you add a ->what type here, update the comment in regcomp.h */
            switch (ri->data->what[n]) {
            case 'a':
+           case 'r':
            case 's':
            case 'S':
            case 'u':
@@ -12827,27 +13610,8 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
            case 'f':
                Safefree(ri->data->data[n]);
                break;
-           case 'p':
-               new_comppad = MUTABLE_AV(ri->data->data[n]);
-               break;
-           case 'o':
-               if (new_comppad == NULL)
-                   Perl_croak(aTHX_ "panic: pregfree comppad");
-               PAD_SAVE_LOCAL(old_comppad,
-                   /* Watch out for global destruction's random ordering. */
-                   (SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL
-               );
-               OP_REFCNT_LOCK;
-               refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]);
-               OP_REFCNT_UNLOCK;
-               if (!refcnt)
-                    op_free((OP_4tree*)ri->data->data[n]);
-
-               PAD_RESTORE_LOCAL(old_comppad);
-               SvREFCNT_dec(MUTABLE_SV(new_comppad));
-               new_comppad = NULL;
-               break;
-           case 'n':
+           case 'l':
+           case 'L':
                break;
             case 'T':          
                 { /* Aho Corasick add-on structure for a trie node.
@@ -12975,6 +13739,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
     }
 
     RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
+    ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
 
     if (ret->pprivate)
        RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -13035,7 +13800,20 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
     
     Newxc(reti, sizeof(regexp_internal) + len*sizeof(regnode), char, regexp_internal);
     Copy(ri->program, reti->program, len+1, regnode);
-    
+
+    reti->num_code_blocks = ri->num_code_blocks;
+    if (ri->code_blocks) {
+       int n;
+       Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block,
+               struct reg_code_block);
+       Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks,
+               struct reg_code_block);
+       for (n = 0; n < ri->num_code_blocks; n++)
+            reti->code_blocks[n].src_regex = (REGEXP*)
+                   sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param);
+    }
+    else
+       reti->code_blocks = NULL;
 
     reti->regstclass = NULL;
 
@@ -13052,12 +13830,11 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
        for (i = 0; i < count; i++) {
            d->what[i] = ri->data->what[i];
            switch (d->what[i]) {
-               /* legal options are one of: sSfpontTua
-                  see also regcomp.h and pregfree() */
+               /* see also regcomp.h and regfree_internal() */
            case 'a': /* actually an AV, but the dup function is identical.  */
+           case 'r':
            case 's':
            case 'S':
-           case 'p': /* actually an AV, but the dup function is identical.  */
            case 'u': /* actually an HV, but the dup function is identical.  */
                d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
                break;
@@ -13068,13 +13845,6 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                            struct regnode_charclass_class);
                reti->regstclass = (regnode*)d->data[i];
                break;
-           case 'o':
-               /* Compiled op trees are readonly and in shared memory,
-                  and can thus be shared without duplication. */
-               OP_REFCNT_LOCK;
-               d->data[i] = (void*)OpREFCNT_inc((OP*)ri->data->data[i]);
-               OP_REFCNT_UNLOCK;
-               break;
            case 'T':
                /* Trie stclasses are readonly and can thus be shared
                 * without duplication. We free the stclass in pregfree
@@ -13087,7 +13857,8 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                ((reg_trie_data*)ri->data->data[i])->refcount++;
                OP_REFCNT_UNLOCK;
                /* Fall through */
-           case 'n':
+           case 'l':
+           case 'L':
                d->data[i] = ri->data->data[i];
                break;
             default:
@@ -13197,8 +13968,6 @@ Perl_save_re_context(pTHX)
 
     Copy(&PL_reg_state, state, 1, struct re_save_state);
 
-    PL_reg_start_tmp = 0;
-    PL_reg_start_tmpl = 0;
     PL_reg_oldsaved = NULL;
     PL_reg_oldsavedlen = 0;
     PL_reg_maxiter = 0;
@@ -13440,8 +14209,8 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
  * Local variables:
  * c-indentation-style: bsd
  * c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
  * End:
  *
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
  */