Add caching to inversion list searches

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index f2b82a8..28f2ecb 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -81,16 +81,17 @@
  #define REG_COMP_C
  #ifdef PERL_IN_XSUB_RE
  #  include "re_comp.h"
+extern const struct regexp_engine my_reg_engine;
  #else
  #  include "regcomp.h"
  #endif
  
  #include "dquote_static.c"
-#ifndef PERL_IN_XSUB_RE
-#  include "charclass_invlists.h"
-#endif
+#include "charclass_invlists.h"
+#include "inline_invlist.c"
  
  #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
+#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  
  #ifdef op
  #undef op
@@ -220,7 +221,7 @@ typedef struct RExC_state_t {
  #define        WORST           0       /* Worst case. */
  #define        HASWIDTH        0x01    /* Known to match non-null strings. */
  
-/* Simple enough to be STAR/PLUS operand, in an EXACT node must be a single
+/* Simple enough to be STAR/PLUS operand; in an EXACT node must be a single
   * character, and if utf8, must be invariant.  Note that this is not the same
   * thing as REGNODE_SIMPLE */
  #define        SIMPLE          0x02
@@ -398,14 +399,18 @@ static const scan_data_t zero_scan_data =
  #define UNI_SEMANTICS (get_regex_charset(RExC_flags) == REGEX_UNICODE_CHARSET)
  #define AT_LEAST_UNI_SEMANTICS (get_regex_charset(RExC_flags) >= REGEX_UNICODE_CHARSET)
  #define ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_RESTRICTED_CHARSET)
-#define MORE_ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
  #define AT_LEAST_ASCII_RESTRICTED (get_regex_charset(RExC_flags) >= REGEX_ASCII_RESTRICTED_CHARSET)
+#define ASCII_FOLD_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
  
  #define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
  
-#define OOB_UNICODE            12345678
  #define OOB_NAMEDCLASS         -1
  
+/* There is no code point that is out-of-bounds, so this is problematic.  But
+ * its only current use is to initialize a variable that is always set before
+ * looked at. */
+#define OOB_UNICODE            0xDEADBEEF
+
  #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
  #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
  
@@ -2578,12 +2583,12 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
   * one, and looks for problematic sequences of characters whose folds vs.
   * non-folds have sufficiently different lengths, that the optimizer would be
   * fooled into rejecting legitimate matches of them, and the trie construction
- * code can't cope with them.  The joining is only done if:
+ * code needs to handle specially.  The joining is only done if:
   * 1) there is room in the current conglomerated node to entirely contain the
   *    next one.
   * 2) they are the exact same node type
   *
- * The adjacent nodes actually may be separated by NOTHING kind nodes, and
+ * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
   * these get optimized out
   *
   * If there are problematic code sequences, *min_subtract is set to the delta
@@ -2596,26 +2601,27 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
   *
   * This is as good a place as any to discuss the design of handling these
   * problematic sequences.  It's been wrong in Perl for a very long time.  There
- * are three code points in Unicode whose folded lengths differ so much from
- * the un-folded lengths that it causes problems for the optimizer and trie
- * construction.  Why only these are problematic, and not others where lengths
- * also differ is something I (khw) do not understand.  New versions of Unicode
- * might add more such code points.  Hopefully the logic in fold_grind.t that
- * figures out what to test (in part by verifying that each size-combination
- * gets tested) will catch any that do come along, so they can be added to the
- * special handling below.  The chances of new ones are actually rather small,
- * as most, if not all, of the world's scripts that have casefolding have
- * already been encoded by Unicode.  Also, a number of Unicode's decisions were
- * made to allow compatibility with pre-existing standards, and almost all of
- * those have already been dealt with.  These would otherwise be the most
- * likely candidates for generating further tricky sequences.  In other words,
- * Unicode by itself is unlikely to add new ones unless it is for compatibility
- * with pre-existing standards, and there aren't many of those left.
+ * are three code points currently in Unicode whose folded lengths differ so
+ * much from the un-folded lengths that it causes problems for the optimizer
+ * and trie construction.  Why only these are problematic, and not others where
+ * lengths also differ is something I (khw) do not understand.  New versions of
+ * Unicode might add more such code points.  Hopefully the logic in
+ * fold_grind.t that figures out what to test (in part by verifying that each
+ * size-combination gets tested) will catch any that do come along, so they can
+ * be added to the special handling below.  The chances of new ones are
+ * actually rather small, as most, if not all, of the world's scripts that have
+ * casefolding have already been encoded by Unicode.  Also, a number of
+ * Unicode's decisions were made to allow compatibility with pre-existing
+ * standards, and almost all of those have already been dealt with.  These
+ * would otherwise be the most likely candidates for generating further tricky
+ * sequences.  In other words, Unicode by itself is unlikely to add new ones
+ * unless it is for compatibility with pre-existing standards, and there aren't
+ * many of those left.
   *
   * The previous designs for dealing with these involved assigning a special
   * node for them.  This approach doesn't work, as evidenced by this example:
   *      "\xDFs" =~ /s\xDF/ui    # Used to fail before these patches
- * Both these fold to "sss", but if the pattern is parsed to create a node of
+ * Both these fold to "sss", but if the pattern is parsed to create a node
   * that would match just the \xDF, it won't be able to handle the case where a
   * successful match would have to cross the node's boundary.  The new approach
   * that hopefully generally solves the problem generates an EXACTFU_SS node
@@ -2630,9 +2636,9 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
   *      problematic sequences.  This delta is used by the caller to adjust the
   *      min length of the match, and the delta between min and max, so that the
   *      optimizer doesn't reject these possibilities based on size constraints.
- * 2)   These sequences are not currently correctly handled by the trie code
- *      either, so it changes the joined node type to ops that are not handled
- *      by trie's, those new ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
+ * 2)   These sequences require special handling by the trie code, so this code
+ *      changes the joined node type to special ops: EXACTFU_TRICKYFOLD and
+ *      EXACTFU_SS.
   * 3)   This is sufficient for the two Greek sequences (described below), but
   *      the one involving the Sharp s (\xDF) needs more.  The node type
   *      EXACTFU_SS is used for an EXACTFU node that contains at least one "ss"
@@ -2642,22 +2648,21 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
   *      itself with length changes, and so can be processed faster.  regexec.c
   *      takes advantage of this.  Generally, an EXACTFish node that is in UTF-8
   *      is pre-folded by regcomp.c.  This saves effort in regex matching.
- *      However, probably mostly for historical reasons, the pre-folding isn't
- *      done for non-UTF8 patterns (and it can't be for EXACTF and EXACTFL
- *      nodes, as what they fold to isn't known until runtime.)  The fold
- *      possibilities for the non-UTF8 patterns are quite simple, except for
- *      the sharp s.  All the ones that don't involve a UTF-8 target string
- *      are members of a fold-pair, and arrays are set up for all of them
- *      that quickly find the other member of the pair.  It might actually
- *      be faster to pre-fold these, but it isn't currently done, except for
- *      the sharp s.  Code elsewhere in this file makes sure that it gets
- *      folded to 'ss', even if the pattern isn't UTF-8.  This avoids the
- *      issues described in the next item.
+ *      However, the pre-folding isn't done for non-UTF8 patterns because the
+ *      fold of the MICRO SIGN requires UTF-8.  Also what EXACTF and EXACTFL
+ *      nodes fold to isn't known until runtime.  The fold possibilities for
+ *      the non-UTF8 patterns are quite simple, except for the sharp s.  All
+ *      the ones that don't involve a UTF-8 target string are members of a
+ *      fold-pair, and arrays are set up for all of them so that the other
+ *      member of the pair can be found quickly.  Code elsewhere in this file
+ *      makes sure that in EXACTFU nodes, the sharp s gets folded to 'ss', even
+ *      if the pattern isn't UTF-8.  This avoids the issues described in the
+ *      next item.
   * 4)   A problem remains for the sharp s in EXACTF nodes.  Whether it matches
   *      'ss' or not is not knowable at compile time.  It will match iff the
   *      target string is in UTF-8, unlike the EXACTFU nodes, where it always
   *      matches; and the EXACTFL and EXACTFA nodes where it never does.  Thus
- *      it can't be folded to "ss" at compile time, unlike EXACTFU does as
+ *      it can't be folded to "ss" at compile time, unlike EXACTFU does (as
   *      described in item 3).  An assumption that the optimizer part of
   *      regexec.c (probably unwittingly) makes is that a character in the
   *      pattern corresponds to at most a single character in the target string.
@@ -2729,6 +2734,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
              const unsigned int oldl = STR_LEN(scan);
              regnode * const nnext = regnext(n);
  
+            /* XXX I (khw) kind of doubt that this works on platforms where
+             * U8_MAX is above 255 because of lots of other assumptions */
              if (oldl + STR_LEN(n) > U8_MAX)
                  break;
              
@@ -2879,7 +2886,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
                       greek_sequence:
                         *min_subtract += 4;
  
-                       /* This can't currently be handled by trie's, so change
+                       /* This requires special handling by trie's, so change
                          * the node type to indicate this.  If EXACTFA and
                          * EXACTFL were ever to be handled by trie's, this
                          * would have to be changed.  If this node has already
@@ -2915,9 +2922,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
                             /* EXACTF nodes need to know that the minimum
                              * length changed so that a sharp s in the string
                              * can match this ss in the pattern, but they
-                            * remain EXACTF nodes, as they are not trie'able,
-                            * so don't have to invent a new node type to
-                            * exclude them from the trie code */
+                             * remain EXACTF nodes, as they won't match this
+                             * unless the target string is is UTF-8, which we
+                             * don't know until runtime */
                             if (OP(scan) != EXACTF) {
                                 OP(scan) = EXACTFU_SS;
                             }
@@ -3379,7 +3386,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                   * the end pointer. */
                                  if ( !first ) {
                                      first = cur;
-                                    trietype = noper_trietype;
                                     if ( noper_trietype == NOTHING ) {
  #if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
                                         regnode * const noper_next = regnext( noper );
@@ -3387,8 +3393,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                         U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
  #endif
  
-                                        if ( noper_next_trietype )
+                                        if ( noper_next_trietype ) {
                                             trietype = noper_next_trietype;
+                                        } else if (noper_next_type)  {
+                                            /* a NOTHING regop is 1 regop wide. We need at least two
+                                             * for a trie so we can't merge this in */
+                                            first = NULL;
+                                        }
+                                    } else {
+                                        trietype = noper_trietype;
                                      }
                                  } else {
                                      if ( trietype == NOTHING )
@@ -3651,7 +3664,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 uc = utf8_to_uvchr_buf(s, s + l, NULL);
                 l = utf8_length(s, s + l);
             }
-           else if (has_exactf_sharp_s) {
+           if (has_exactf_sharp_s) {
                 RExC_seen |= REG_SEEN_EXACTF_SHARP_S;
             }
             min += l - min_subtract;
@@ -3948,6 +3961,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                       && !(data->flags & SF_HAS_EVAL)
                       && !deltanext     /* atom is fixed width */
                       && minnext != 0   /* CURLYM can't handle zero width */
+                      && ! (RExC_seen & REG_SEEN_EXACTF_SHARP_S) /* Nor \xDF */
                 ) {
                     /* XXXX How to optimize if data == 0? */
                     /* Optimize to a simpler form.  */
@@ -4398,6 +4412,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             data->flags |= (OP(scan) == MEOL
                             ? SF_BEFORE_MEOL
                             : SF_BEFORE_SEOL);
+           SCAN_COMMIT(pRExC_state, data, minlenp);
+
         }
         else if (  PL_regkind[OP(scan)] == BRANCHJ
                  /* Lookbehind, or need to calculate parens/evals/stclass: */
@@ -4949,16 +4965,23 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  }
  #endif
  
-/* public(ish) wrapper for Perl_re_op_compile that only takes an SV
- * pattern rather than a list of OPs */
+/* public(ish) entry point for the perl core's own regex compiling code.
+ * It's actually a wrapper for Perl_re_op_compile that only takes an SV
+ * pattern rather than a list of OPs, and uses the internal engine rather
+ * than the current one */
  
  REGEXP *
  Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
  {
      SV *pat = pattern; /* defeat constness! */
      PERL_ARGS_ASSERT_RE_COMPILE;
-    return Perl_re_op_compile(aTHX_ &pat, 1, NULL, current_re_engine(),
-                               NULL, NULL, rx_flags, 0);
+    return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
+#ifdef PERL_IN_XSUB_RE
+                                &my_reg_engine,
+#else
+                                &PL_core_reg_engine,
+#endif
+                                NULL, NULL, rx_flags, 0);
  }
  
  /* see if there are any run-time code blocks in the pattern.
@@ -5019,7 +5042,7 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
   * the original pattern needs upgrading to utf8.
   */
  
-bool
+static bool
  S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
      char *pat, STRLEN plen)
  {
@@ -5041,7 +5064,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
         int n = 0;
         STRLEN s;
         char *p, *newpat;
-       int newlen = plen + 5; /* allow for "qr''x" extra chars */
+       int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
         SV *sv, *qr_ref;
         dSP;
  
@@ -5185,6 +5208,50 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
  }
  
  
+STATIC bool
+S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest, SV** rx_utf8, SV** rx_substr, I32* rx_end_shift, I32 lookbehind, I32 offset, I32 *minlen, STRLEN longest_length, bool eol, bool meol)
+{
+    /* This is the common code for setting up the floating and fixed length
+     * string data extracted from Perlre_op_compile() below.  Returns a boolean
+     * as to whether succeeded or not */
+
+    I32 t,ml;
+
+    if (! (longest_length
+           || (eol /* Can't have SEOL and MULTI */
+               && (! meol || (RExC_flags & RXf_PMf_MULTILINE)))
+          )
+            /* See comments for join_exact for why REG_SEEN_EXACTF_SHARP_S */
+        || (RExC_seen & REG_SEEN_EXACTF_SHARP_S))
+    {
+        return FALSE;
+    }
+
+    /* copy the information about the longest from the reg_scan_data
+        over to the program. */
+    if (SvUTF8(sv_longest)) {
+        *rx_utf8 = sv_longest;
+        *rx_substr = NULL;
+    } else {
+        *rx_substr = sv_longest;
+        *rx_utf8 = NULL;
+    }
+    /* end_shift is how many chars that must be matched that
+        follow this item. We calculate it ahead of time as once the
+        lookbehind offset is added in we lose the ability to correctly
+        calculate it.*/
+    ml = minlen ? *(minlen) : (I32)longest_length;
+    *rx_end_shift = ml - offset
+        - longest_length + (SvTAIL(sv_longest) != 0)
+        + lookbehind;
+
+    t = (eol/* Can't have SEOL and MULTI */
+         && (! meol || (RExC_flags & RXf_PMf_MULTILINE)));
+    fbm_compile(sv_longest, t ? FBMcf_TAIL : 0);
+
+    return TRUE;
+}
+
  /*
   * Perl_re_op_compile - the perl internal RE engine's function to compile a
   * regular expression into internal code.
@@ -5237,7 +5304,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      dVAR;
      REGEXP *rx;
      struct regexp *r;
-    register regexp_internal *ri;
+    regexp_internal *ri;
      STRLEN plen;
      char  * VOL exp;
      char* xend;
@@ -5299,9 +5366,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         PL_L1PosixGraph = _new_invlist_C_array(L1PosixGraph_invlist);
         PL_PosixGraph = _new_invlist_C_array(PosixGraph_invlist);
  
-       PL_L1PosixAlnum = _new_invlist_C_array(L1PosixAlnum_invlist);
-       PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist);
-
         PL_L1PosixLower = _new_invlist_C_array(L1PosixLower_invlist);
         PL_PosixLower = _new_invlist_C_array(PosixLower_invlist);
  
@@ -5383,6 +5447,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             OP *o = NULL;
             int n = 0;
             bool utf8 = 0;
+            STRLEN orig_patlen = 0;
  
             if (pRExC_state->num_code_blocks) {
                 o = cLISTOPx(expr)->op_first;
@@ -5424,11 +5489,36 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                     o = o->op_sibling;;
                 }
  
+               if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
+                       (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+               {
+                   sv_setsv(pat, sv);
+                   /* overloading involved: all bets are off over literal
+                    * code. Pretend we haven't seen it */
+                   pRExC_state->num_code_blocks -= n;
+                   n = 0;
+                    rx = NULL;
+
+               }
+               else  {
+                    while (SvAMAGIC(msv)
+                            && (sv = AMG_CALLunary(msv, string_amg))
+                            && sv != msv)
+                    {
+                        msv = sv;
+                        SvGETMAGIC(msv);
+                    }
+                    if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
+                        msv = SvRV(msv);
+                    orig_patlen = SvCUR(pat);
+                    sv_catsv_nomg(pat, msv);
+                    rx = msv;
+                    if (code)
+                        pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+                }
+
                 /* extract any code blocks within any embedded qr//'s */
-               rx = msv;
-               if (SvROK(rx))
-                   rx = SvRV(rx);
-               if (SvTYPE(rx) == SVt_REGEXP
+               if (rx && SvTYPE(rx) == SVt_REGEXP
                     && RX_ENGINE((REGEXP*)rx)->op_comp)
                 {
  
@@ -5446,7 +5536,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                         pRExC_state->num_code_blocks += ri->num_code_blocks;
                         for (i=0; i < ri->num_code_blocks; i++) {
                             struct reg_code_block *src, *dst;
-                           STRLEN offset =  SvCUR(pat)
+                           STRLEN offset =  orig_patlen
                                 + ((struct regexp *)SvANY(rx))->pre_prefix;
                             assert(n < pRExC_state->num_code_blocks);
                             src = &ri->code_blocks[i];
@@ -5462,29 +5552,20 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                         }
                     }
                 }
-
-               if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
-                       (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
-               {
-                   sv_setsv(pat, sv);
-                   /* overloading involved: all bets are off over literal
-                    * code. Pretend we haven't seen it */
-                   pRExC_state->num_code_blocks -= n;
-                   n = 0;
-
-               }
-               else {
-                   if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
-                       msv = SvRV(msv);
-                   sv_catsv_nomg(pat, msv);
-                   if (code)
-                       pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
-               }
             }
             SvSETMAGIC(pat);
         }
-       else
+       else {
+            SV *sv;
             pat = *patternp;
+            while (SvAMAGIC(pat)
+                    && (sv = AMG_CALLunary(pat, string_amg))
+                    && sv != pat)
+            {
+                pat = sv;
+                SvGETMAGIC(pat);
+            }
+        }
  
         /* handle bare regex: foo =~ $re */
         {
@@ -5659,7 +5740,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
                                             exp, plen);
         if (!runtime_code) {
-           ReREFCNT_inc(old_re);
             if (used_setjump) {
                 JMPENV_POP;
             }
@@ -6139,105 +6219,56 @@ reStudy:
         scan_commit(pRExC_state, &data,&minlen,0);
         SvREFCNT_dec(data.last_found);
  
-        /* Note that code very similar to this but for anchored string 
-           follows immediately below, changes may need to be made to both. 
-           Be careful. 
-         */
         longest_float_length = CHR_SVLEN(data.longest_float);
-       if (longest_float_length
-           || (data.flags & SF_FL_BEFORE_EOL
-               && (!(data.flags & SF_FL_BEFORE_MEOL)
-                   || (RExC_flags & RXf_PMf_MULTILINE)))) 
-        {
-            I32 t,ml;
  
-            /* See comments for join_exact for why REG_SEEN_EXACTF_SHARP_S */
-           if ((RExC_seen & REG_SEEN_EXACTF_SHARP_S)
-               || (SvCUR(data.longest_fixed)  /* ok to leave SvCUR */
-                   && data.offset_fixed == data.offset_float_min
-                   && SvCUR(data.longest_fixed) == SvCUR(data.longest_float)))
-                   goto remove_float;          /* As in (a)+. */
-
-            /* copy the information about the longest float from the reg_scan_data
-               over to the program. */
-           if (SvUTF8(data.longest_float)) {
-               r->float_utf8 = data.longest_float;
-               r->float_substr = NULL;
-           } else {
-               r->float_substr = data.longest_float;
-               r->float_utf8 = NULL;
-           }
-           /* float_end_shift is how many chars that must be matched that 
-              follow this item. We calculate it ahead of time as once the
-              lookbehind offset is added in we lose the ability to correctly
-              calculate it.*/
-           ml = data.minlen_float ? *(data.minlen_float) 
-                                  : (I32)longest_float_length;
-           r->float_end_shift = ml - data.offset_float_min
-               - longest_float_length + (SvTAIL(data.longest_float) != 0)
-               + data.lookbehind_float;
+        if (! ((SvCUR(data.longest_fixed)  /* ok to leave SvCUR */
+                   && data.offset_fixed == data.offset_float_min
+                   && SvCUR(data.longest_fixed) == SvCUR(data.longest_float)))
+            && S_setup_longest (aTHX_ pRExC_state,
+                                    data.longest_float,
+                                    &(r->float_utf8),
+                                    &(r->float_substr),
+                                    &(r->float_end_shift),
+                                    data.lookbehind_float,
+                                    data.offset_float_min,
+                                    data.minlen_float,
+                                    longest_float_length,
+                                    data.flags & SF_FL_BEFORE_EOL,
+                                    data.flags & SF_FL_BEFORE_MEOL))
+        {
             r->float_min_offset = data.offset_float_min - data.lookbehind_float;
             r->float_max_offset = data.offset_float_max;
             if (data.offset_float_max < I32_MAX) /* Don't offset infinity */
                 r->float_max_offset -= data.lookbehind_float;
-           
-           t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
-                      && (!(data.flags & SF_FL_BEFORE_MEOL)
-                          || (RExC_flags & RXf_PMf_MULTILINE)));
-           fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
         }
         else {
-         remove_float:
             r->float_substr = r->float_utf8 = NULL;
             SvREFCNT_dec(data.longest_float);
             longest_float_length = 0;
         }
  
-        /* Note that code very similar to this but for floating string 
-           is immediately above, changes may need to be made to both. 
-           Be careful. 
-         */
         longest_fixed_length = CHR_SVLEN(data.longest_fixed);
  
-        /* See comments for join_exact for why REG_SEEN_EXACTF_SHARP_S */
-       if (! (RExC_seen & REG_SEEN_EXACTF_SHARP_S)
-           && (longest_fixed_length
-               || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
-                   && (!(data.flags & SF_FIX_BEFORE_MEOL)
-                       || (RExC_flags & RXf_PMf_MULTILINE)))) )
+        if (S_setup_longest (aTHX_ pRExC_state,
+                                data.longest_fixed,
+                                &(r->anchored_utf8),
+                                &(r->anchored_substr),
+                                &(r->anchored_end_shift),
+                                data.lookbehind_fixed,
+                                data.offset_fixed,
+                                data.minlen_fixed,
+                                longest_fixed_length,
+                                data.flags & SF_FIX_BEFORE_EOL,
+                                data.flags & SF_FIX_BEFORE_MEOL))
          {
-            I32 t,ml;
-
-            /* copy the information about the longest fixed 
-               from the reg_scan_data over to the program. */
-           if (SvUTF8(data.longest_fixed)) {
-               r->anchored_utf8 = data.longest_fixed;
-               r->anchored_substr = NULL;
-           } else {
-               r->anchored_substr = data.longest_fixed;
-               r->anchored_utf8 = NULL;
-           }
-           /* fixed_end_shift is how many chars that must be matched that 
-              follow this item. We calculate it ahead of time as once the
-              lookbehind offset is added in we lose the ability to correctly
-              calculate it.*/
-            ml = data.minlen_fixed ? *(data.minlen_fixed) 
-                                   : (I32)longest_fixed_length;
-            r->anchored_end_shift = ml - data.offset_fixed
-               - longest_fixed_length + (SvTAIL(data.longest_fixed) != 0)
-               + data.lookbehind_fixed;
             r->anchored_offset = data.offset_fixed - data.lookbehind_fixed;
-
-           t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
-                && (!(data.flags & SF_FIX_BEFORE_MEOL)
-                    || (RExC_flags & RXf_PMf_MULTILINE)));
-           fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
         }
         else {
             r->anchored_substr = r->anchored_utf8 = NULL;
             SvREFCNT_dec(data.longest_fixed);
             longest_fixed_length = 0;
         }
+
         if (ri->regstclass
             && (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
             ri->regstclass = NULL;
@@ -6359,6 +6390,8 @@ reStudy:
         r->intflags |= PREGf_VERBARG_SEEN;
      if (RExC_seen & REG_SEEN_CUTGROUP)
         r->intflags |= PREGf_CUTGROUP_SEEN;
+    if (pm_flags & PMf_USE_RE_EVAL)
+       r->intflags |= PREGf_USE_RE_EVAL;
      if (RExC_paren_names)
          RXp_PAREN_NAMES(r) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
      else
@@ -6846,8 +6879,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
             do {
                 RExC_parse++;
             } while (isALNUM(*RExC_parse));
+    } else {
+       RExC_parse++; /* so the <- from the vFAIL is after the offending character */
+        vFAIL("Group name must start with a non-digit word character");
      }
-
      if ( flags ) {
          SV* sv_name
             = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
@@ -6871,7 +6906,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
              Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
                        (unsigned long) flags);
          }
-        /* NOT REACHED */
+        assert(0); /* NOT REACHED */
      }
      return NULL;
  }
@@ -6962,32 +6997,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
   * Some of the methods should always be private to the implementation, and some
   * should eventually be made public */
  
-#define INVLIST_LEN_OFFSET 0   /* Number of elements in the inversion list */
-#define INVLIST_ITER_OFFSET 1  /* Current iteration position */
-
-/* This is a combination of a version and data structure type, so that one
- * being passed in can be validated to be an inversion list of the correct
- * vintage.  When the structure of the header is changed, a new random number
- * in the range 2**31-1 should be generated and the new() method changed to
- * insert that at this location.  Then, if an auxiliary program doesn't change
- * correspondingly, it will be discovered immediately */
-#define INVLIST_VERSION_ID_OFFSET 2
-#define INVLIST_VERSION_ID 1064334010
+/* The header definitions are in F<inline_invlist.c> */
  
-/* For safety, when adding new elements, remember to #undef them at the end of
- * the inversion list code section */
-
-#define INVLIST_ZERO_OFFSET 3  /* 0 or 1; must be last element in header */
-/* The UV at position ZERO contains either 0 or 1.  If 0, the inversion list
- * contains the code point U+00000, and begins here.  If 1, the inversion list
- * doesn't contain U+0000, and it begins at the next UV in the array.
- * Inverting an inversion list consists of adding or removing the 0 at the
- * beginning of it.  By reserving a space for that 0, inversion can be made
- * very fast */
-
-#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
-
-/* Internally things are UVs */
  #define TO_INTERNAL_SIZE(x) ((x + HEADER_LENGTH) * sizeof(UV))
  #define FROM_INTERNAL_SIZE(x) ((x / sizeof(UV)) - HEADER_LENGTH)
  
@@ -7009,7 +7020,7 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
      PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
  
      /* Must be empty */
-    assert(! *get_invlist_len_addr(invlist));
+    assert(! *_get_invlist_len_addr(invlist));
  
      /* 1^1 = 0; 1^0 = 1 */
      *zero = 1 ^ will_have_0;
@@ -7027,7 +7038,7 @@ S_invlist_array(pTHX_ SV* const invlist)
  
      /* Must not be empty.  If these fail, you probably didn't check for <len>
       * being non-zero before trying to get the array */
-    assert(*get_invlist_len_addr(invlist));
+    assert(*_get_invlist_len_addr(invlist));
      assert(*get_invlist_zero_addr(invlist) == 0
            || *get_invlist_zero_addr(invlist) == 1);
  
@@ -7038,28 +7049,6 @@ S_invlist_array(pTHX_ SV* const invlist)
                    + *get_invlist_zero_addr(invlist));
  }
  
-PERL_STATIC_INLINE UV*
-S_get_invlist_len_addr(pTHX_ SV* invlist)
-{
-    /* Return the address of the UV that contains the current number
-     * of used elements in the inversion list */
-
-    PERL_ARGS_ASSERT_GET_INVLIST_LEN_ADDR;
-
-    return (UV *) (SvPVX(invlist) + (INVLIST_LEN_OFFSET * sizeof (UV)));
-}
-
-PERL_STATIC_INLINE UV
-S_invlist_len(pTHX_ SV* const invlist)
-{
-    /* Returns the current number of elements stored in the inversion list's
-     * array */
-
-    PERL_ARGS_ASSERT_INVLIST_LEN;
-
-    return *get_invlist_len_addr(invlist);
-}
-
  PERL_STATIC_INLINE void
  S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
  {
@@ -7067,7 +7056,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
  
      PERL_ARGS_ASSERT_INVLIST_SET_LEN;
  
-    *get_invlist_len_addr(invlist) = len;
+    *_get_invlist_len_addr(invlist) = len;
  
      assert(len <= SvLEN(invlist));
  
@@ -7087,6 +7076,39 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
       * Note that when inverting, SvCUR shouldn't change */
  }
  
+PERL_STATIC_INLINE IV*
+S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
+{
+    /* Return the address of the UV that is reserved to hold the cached index
+     * */
+
+    PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
+
+    return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV)));
+}
+
+PERL_STATIC_INLINE IV
+S_invlist_previous_index(pTHX_ SV* const invlist)
+{
+    /* Returns cached index of previous search */
+
+    PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX;
+
+    return *get_invlist_previous_index_addr(invlist);
+}
+
+PERL_STATIC_INLINE void
+S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index)
+{
+    /* Caches <index> for later retrieval */
+
+    PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX;
+
+    assert(index == 0 || index < (int) _invlist_len(invlist));
+
+    *get_invlist_previous_index_addr(invlist) = index;
+}
+
  PERL_STATIC_INLINE UV
  S_invlist_max(pTHX_ SV* const invlist)
  {
@@ -7137,8 +7159,9 @@ Perl__new_invlist(pTHX_ IV initial_size)
       * properly */
      *get_invlist_zero_addr(new_list) = UV_MAX;
  
+    *get_invlist_previous_index_addr(new_list) = 0;
      *get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 4
+#if HEADER_LENGTH != 5
  #   error Need to regenerate VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
  #endif
  
@@ -7161,7 +7184,7 @@ S__new_invlist_C_array(pTHX_ UV* list)
      SvPV_set(invlist, (char *) list);
      SvLEN_set(invlist, 0);  /* Means we own the contents, and the system
                                shouldn't touch it */
-    SvCUR_set(invlist, TO_INTERNAL_SIZE(invlist_len(invlist)));
+    SvCUR_set(invlist, TO_INTERNAL_SIZE(_invlist_len(invlist)));
  
      if (*get_invlist_version_id_addr(invlist) != INVLIST_VERSION_ID) {
          Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
@@ -7191,11 +7214,6 @@ S_invlist_trim(pTHX_ SV* const invlist)
      SvPV_shrink_to_cur((SV *) invlist);
  }
  
-/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
- * etc */
-#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
-#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
-
  #define _invlist_union_complement_2nd(a, b, output) _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
  
  STATIC void
@@ -7207,7 +7225,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
  
      UV* array;
      UV max = invlist_max(invlist);
-    UV len = invlist_len(invlist);
+    UV len = _invlist_len(invlist);
  
      PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
  
@@ -7279,8 +7297,8 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
  
  #ifndef PERL_IN_XSUB_RE
  
-STATIC IV
-S_invlist_search(pTHX_ SV* const invlist, const UV cp)
+IV
+Perl__invlist_search(pTHX_ SV* const invlist, const UV cp)
  {
      /* Searches the inversion list for the entry that contains the input code
       * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
@@ -7288,23 +7306,68 @@ S_invlist_search(pTHX_ SV* const invlist, const UV cp)
       * contains <cp> */
  
      IV low = 0;
-    IV high = invlist_len(invlist);
-    const UV * const array = invlist_array(invlist);
+    IV mid;
+    IV high = _invlist_len(invlist);
+    const IV highest_element = high - 1;
+    const UV* array;
  
-    PERL_ARGS_ASSERT_INVLIST_SEARCH;
+    PERL_ARGS_ASSERT__INVLIST_SEARCH;
  
-    /* If list is empty or the code point is before the first element, return
-     * failure. */
-    if (high == 0 || cp < array[0]) {
+    /* If list is empty, return failure. */
+    if (high == 0) {
         return -1;
      }
  
+    /* If the code point is before the first element, return failure.  (We
+     * can't combine this with the test above, because we can't get the array
+     * unless we know the list is non-empty) */
+    array = invlist_array(invlist);
+
+    mid = invlist_previous_index(invlist);
+    assert(mid >=0 && mid <= highest_element);
+
+    /* <mid> contains the cache of the result of the previous call to this
+     * function (0 the first time).  See if this call is for the same result,
+     * or if it is for mid-1.  This is under the theory that calls to this
+     * function will often be for related code points that are near each other.
+     * And benchmarks show that caching gives better results.  We also test
+     * here if the code point is within the bounds of the list.  These tests
+     * replace others that would have had to be made anyway to make sure that
+     * the array bounds were not exceeded, and give us extra information at the
+     * same time */
+    if (cp >= array[mid]) {
+        if (cp >= array[highest_element]) {
+            return highest_element;
+        }
+
+        /* Here, array[mid] <= cp < array[highest_element].  This means that
+         * the final element is not the answer, so can exclude it; it also
+         * means that <mid> is not the final element, so can refer to 'mid + 1'
+         * safely */
+        if (cp < array[mid + 1]) {
+            return mid;
+        }
+        high--;
+        low = mid + 1;
+    }
+    else { /* cp < aray[mid] */
+        if (cp < array[0]) { /* Fail if outside the array */
+            return -1;
+        }
+        high = mid;
+        if (cp >= array[mid - 1]) {
+            goto found_entry;
+        }
+    }
+
      /* Binary search.  What we are looking for is <i> such that
       * array[i] <= cp < array[i+1]
-     * The loop below converges on the i+1. */
+     * The loop below converges on the i+1.  Note that there may not be an
+     * (i+1)th element in the array, and things work nonetheless */
      while (low < high) {
-       IV mid = (low + high) / 2;
-       if (array[mid] <= cp) {
+       mid = (low + high) / 2;
+        assert(mid <= highest_element);
+       if (array[mid] <= cp) { /* cp >= array[mid] */
             low = mid + 1;
  
             /* We could do this extra test to exit the loop early.
@@ -7318,7 +7381,10 @@ S_invlist_search(pTHX_ SV* const invlist, const UV cp)
         }
      }
  
-    return high - 1;
+  found_entry:
+    high--;
+    invlist_set_previous_index(invlist, high);
+    return high;
  }
  
  void
@@ -7332,7 +7398,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
       * that <swatch> is all 0's on input */
  
      UV current = start;
-    const IV len = invlist_len(invlist);
+    const IV len = _invlist_len(invlist);
      IV i;
      const UV * array;
  
@@ -7345,7 +7411,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
      array = invlist_array(invlist);
  
      /* Find which element it is */
-    i = invlist_search(invlist, start);
+    i = _invlist_search(invlist, start);
  
      /* We populate from <start> to <end> */
      while (current < end) {
@@ -7364,7 +7430,15 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
              current = array[i];
             if (current >= end) {   /* Finished if beyond the end of what we
                                        are populating */
-                return;
+                if (LIKELY(end < UV_MAX)) {
+                    return;
+                }
+
+                /* We get here when the upper bound is the maximum
+                 * representable on the machine, and we are looking for just
+                 * that code point.  Have to special case it */
+                i = len;
+                goto join_end_of_list;
              }
          }
          assert(current >= start);
@@ -7381,6 +7455,8 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
              swatch[offset >> 3] |= 1 << (offset & 7);
          }
  
+    join_end_of_list:
+
         /* Quit if at the end of the list */
          if (i >= len) {
  
@@ -7404,7 +7480,6 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
      return;
  }
  
-
  void
  Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
  {
@@ -7453,7 +7528,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
      assert(a != b);
  
      /* If either one is empty, the union is the other one */
-    if (a == NULL || ((len_a = invlist_len(a)) == 0)) {
+    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
         if (*output == a) {
              if (a != NULL) {
                  SvREFCNT_dec(a);
@@ -7467,7 +7542,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
         } /* else *output already = b; */
         return;
      }
-    else if ((len_b = invlist_len(b)) == 0) {
+    else if ((len_b = _invlist_len(b)) == 0) {
         if (*output == b) {
             SvREFCNT_dec(b);
         }
@@ -7608,7 +7683,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
  
      /* Set result to final length, which can change the pointer to array_u, so
       * re-find it */
-    if (len_u != invlist_len(u)) {
+    if (len_u != _invlist_len(u)) {
         invlist_set_len(u, len_u);
         invlist_trim(u);
         array_u = invlist_array(u);
@@ -7687,8 +7762,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      assert(a != b);
  
      /* Special case if either one is empty */
-    len_a = invlist_len(a);
-    if ((len_a == 0) || ((len_b = invlist_len(b)) == 0)) {
+    len_a = _invlist_len(a);
+    if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
  
          if (len_a != 0 && complement_b) {
  
@@ -7834,7 +7909,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
      /* Set result to final length, which can change the pointer to array_r, so
       * re-find it */
-    if (len_r != invlist_len(r)) {
+    if (len_r != _invlist_len(r)) {
         invlist_set_len(r, len_r);
         invlist_trim(r);
         array_r = invlist_array(r);
@@ -7882,13 +7957,13 @@ Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
         len = 0;
      }
      else {
-       len = invlist_len(invlist);
+       len = _invlist_len(invlist);
      }
  
      /* If comes after the final entry, can just append it to the end */
      if (len == 0
         || start >= invlist_array(invlist)
-                                   [invlist_len(invlist) - 1])
+                                   [_invlist_len(invlist) - 1])
      {
         _append_range_to_invlist(invlist, start, end);
         return invlist;
@@ -7922,7 +7997,7 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
       * have a zero; removes it otherwise.  As described above, the data
       * structure is set up so that this is very efficient */
  
-    UV* len_pos = get_invlist_len_addr(invlist);
+    UV* len_pos = _get_invlist_len_addr(invlist);
  
      PERL_ARGS_ASSERT__INVLIST_INVERT;
  
@@ -7959,7 +8034,7 @@ Perl__invlist_invert_prop(pTHX_ SV* const invlist)
  
      _invlist_invert(invlist);
  
-    len = invlist_len(invlist);
+    len = _invlist_len(invlist);
  
      if (len != 0) { /* If empty do nothing */
         array = invlist_array(invlist);
@@ -7991,7 +8066,7 @@ S_invlist_clone(pTHX_ SV* const invlist)
  
      /* Need to allocate extra space to accommodate Perl's addition of a
       * trailing NUL to SvPV's, since it thinks they are always strings */
-    SV* new_invlist = _new_invlist(invlist_len(invlist) + 1);
+    SV* new_invlist = _new_invlist(_invlist_len(invlist) + 1);
      STRLEN length = SvCUR(invlist);
  
      PERL_ARGS_ASSERT_INVLIST_CLONE;
@@ -8042,7 +8117,7 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
       * will start over at the beginning of the list */
  
      UV* pos = get_invlist_iter_addr(invlist);
-    UV len = invlist_len(invlist);
+    UV len = _invlist_len(invlist);
      UV *array;
  
      PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
@@ -8066,6 +8141,36 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
      return TRUE;
  }
  
+PERL_STATIC_INLINE UV
+S_invlist_highest(pTHX_ SV* const invlist)
+{
+    /* Returns the highest code point that matches an inversion list.  This API
+     * has an ambiguity, as it returns 0 under either the highest is actually
+     * 0, or if the list is empty.  If this distinction matters to you, check
+     * for emptiness before calling this function */
+
+    UV len = _invlist_len(invlist);
+    UV *array;
+
+    PERL_ARGS_ASSERT_INVLIST_HIGHEST;
+
+    if (len == 0) {
+       return 0;
+    }
+
+    array = invlist_array(invlist);
+
+    /* The last element in the array in the inversion list always starts a
+     * range that goes to infinity.  That range may be for code points that are
+     * matched in the inversion list, or it may be for ones that aren't
+     * matched.  In the latter case, the highest code point in the set is one
+     * less than the beginning of this range; otherwise it is the final element
+     * of this range: infinity */
+    return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
+           ? UV_MAX
+           : array[len - 1] - 1;
+}
+
  #ifndef PERL_IN_XSUB_RE
  SV *
  Perl__invlist_contents(pTHX_ SV* const invlist)
@@ -8121,6 +8226,77 @@ S_invlist_dump(pTHX_ SV* const invlist, const char * const header)
  }
  #endif
  
+#if 0
+bool
+S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
+{
+    /* Return a boolean as to if the two passed in inversion lists are
+     * identical.  The final argument, if TRUE, says to take the complement of
+     * the second inversion list before doing the comparison */
+
+    UV* array_a = invlist_array(a);
+    UV* array_b = invlist_array(b);
+    UV len_a = _invlist_len(a);
+    UV len_b = _invlist_len(b);
+
+    UV i = 0;              /* current index into the arrays */
+    bool retval = TRUE;     /* Assume are identical until proven otherwise */
+
+    PERL_ARGS_ASSERT__INVLISTEQ;
+
+    /* If are to compare 'a' with the complement of b, set it
+     * up so are looking at b's complement. */
+    if (complement_b) {
+
+        /* The complement of nothing is everything, so <a> would have to have
+         * just one element, starting at zero (ending at infinity) */
+        if (len_b == 0) {
+            return (len_a == 1 && array_a[0] == 0);
+        }
+        else if (array_b[0] == 0) {
+
+            /* Otherwise, to complement, we invert.  Here, the first element is
+             * 0, just remove it.  To do this, we just pretend the array starts
+             * one later, and clear the flag as we don't have to do anything
+             * else later */
+
+            array_b++;
+            len_b--;
+            complement_b = FALSE;
+        }
+        else {
+
+            /* But if the first element is not zero, we unshift a 0 before the
+             * array.  The data structure reserves a space for that 0 (which
+             * should be a '1' right now), so physical shifting is unneeded,
+             * but temporarily change that element to 0.  Before exiting the
+             * routine, we must restore the element to '1' */
+            array_b--;
+            len_b++;
+            array_b[0] = 0;
+        }
+    }
+
+    /* Make sure that the lengths are the same, as well as the final element
+     * before looping through the remainder.  (Thus we test the length, final,
+     * and first elements right off the bat) */
+    if (len_a != len_b || array_a[len_a-1] != array_b[len_a-1]) {
+        retval = FALSE;
+    }
+    else for (i = 0; i < len_a - 1; i++) {
+        if (array_a[i] != array_b[i]) {
+            retval = FALSE;
+            break;
+        }
+    }
+
+    if (complement_b) {
+        array_b[0] = 1;
+    }
+    return retval;
+}
+#endif
+
  #undef HEADER_LENGTH
  #undef INVLIST_INITIAL_LENGTH
  #undef TO_INTERNAL_SIZE
@@ -8153,11 +8329,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
      /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
  {
      dVAR;
-    register regnode *ret;             /* Will be the head of the group. */
-    register regnode *br;
-    register regnode *lastbr;
-    register regnode *ender = NULL;
-    register I32 parno = 0;
+    regnode *ret;              /* Will be the head of the group. */
+    regnode *br;
+    regnode *lastbr;
+    regnode *ender = NULL;
+    I32 parno = 0;
      I32 flags;
      U32 oregflags = RExC_flags;
      bool have_branch = 0;
@@ -8328,7 +8504,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     ret = reganode(pRExC_state,
                                    ((! FOLD)
                                      ? NREF
-                                    : (MORE_ASCII_RESTRICTED)
+                                    : (ASCII_FOLD_RESTRICTED)
                                        ? NREFFA
                                         : (AT_LEAST_UNI_SEMANTICS)
                                           ? NREFFU
@@ -8485,7 +8661,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                  }
                  goto gen_recurse_regop;
-                /* NOT REACHED */
+                assert(0); /* NOT REACHED */
              case '+':
                  if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
                      RExC_parse++;
@@ -8555,7 +8731,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  nextchar(pRExC_state);
                  return ret;
              } /* named and numeric backreferences */
-            /* NOT REACHED */
+            assert(0); /* NOT REACHED */
  
             case '?':           /* (??...) */
                 is_logical = 1;
@@ -8605,10 +8781,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 nextchar(pRExC_state);
  
                 if (is_logical) {
+                    regnode *eval;
                     ret = reg_node(pRExC_state, LOGICAL);
-                   if (!SIZE_ONLY)
+                    eval = reganode(pRExC_state, EVAL, n);
+                   if (!SIZE_ONLY) {
                         ret->flags = 2;
-                    REGTAIL(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
+                        /* for later propagation into (??{}) return value */
+                        eval->flags = (U8) (RExC_flags & RXf_PMf_COMPILETIME);
+                    }
+                    REGTAIL(pRExC_state, ret, eval);
                      /* deal with the length of this later - MJD */
                     return ret;
                 }
@@ -9138,7 +9319,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
         }
         else
             FAIL("Junk on end of regexp");      /* "Can't happen". */
-       /* NOTREACHED */
+       assert(0); /* NOTREACHED */
      }
  
      if (RExC_in_lookbehind) {
@@ -9158,9 +9339,9 @@ STATIC regnode *
  S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
  {
      dVAR;
-    register regnode *ret;
-    register regnode *chain = NULL;
-    register regnode *latest;
+    regnode *ret;
+    regnode *chain = NULL;
+    regnode *latest;
      I32 flags = 0, c = 0;
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -9231,9 +9412,9 @@ STATIC regnode *
  S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
      dVAR;
-    register regnode *ret;
-    register char op;
-    register char *next;
+    regnode *ret;
+    char op;
+    char *next;
      I32 flags;
      const char * const origparse = RExC_parse;
      I32 min;
@@ -9422,86 +9603,95 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      return(ret);
  }
  
-
-/* reg_namedseq(pRExC_state,UVp, UV depth)
+STATIC bool
+S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, UV *valuep, I32 *flagp, U32 depth, bool in_char_class)
+{
     
-   This is expected to be called by a parser routine that has 
-   recognized '\N' and needs to handle the rest. RExC_parse is
-   expected to point at the first char following the N at the time
-   of the call.
+ /* This is expected to be called by a parser routine that has recognized '\N'
+   and needs to handle the rest. RExC_parse is expected to point at the first
+   char following the N at the time of the call.  On successful return,
+   RExC_parse has been updated to point to just after the sequence identified
+   by this routine, and <*flagp> has been updated.
  
-   The \N may be inside (indicated by valuep not being NULL) or outside a
+   The \N may be inside (indicated by the boolean <in_char_class>) or outside a
     character class.
  
     \N may begin either a named sequence, or if outside a character class, mean
     to match a non-newline.  For non single-quoted regexes, the tokenizer has
-   attempted to decide which, and in the case of a named sequence converted it
+   attempted to decide which, and in the case of a named sequence, converted it
     into one of the forms: \N{} (if the sequence is null), or \N{U+c1.c2...},
     where c1... are the characters in the sequence.  For single-quoted regexes,
     the tokenizer passes the \N sequence through unchanged; this code will not
-   attempt to determine this nor expand those.  The net effect is that if the
-   beginning of the passed-in pattern isn't '{U+' or there is no '}', it
-   signals that this \N occurrence means to match a non-newline.
-   
+   attempt to determine this nor expand those, instead raising a syntax error.
+   The net effect is that if the beginning of the passed-in pattern isn't '{U+'
+   or there is no '}', it signals that this \N occurrence means to match a
+   non-newline.
+
     Only the \N{U+...} form should occur in a character class, for the same
     reason that '.' inside a character class means to just match a period: it
     just doesn't make sense.
-   
-   If valuep is non-null then it is assumed that we are parsing inside 
-   of a charclass definition and the first codepoint in the resolved
-   string is returned via *valuep and the routine will return NULL. 
-   In this mode if a multichar string is returned from the charnames 
-   handler, a warning will be issued, and only the first char in the 
-   sequence will be examined. If the string returned is zero length
-   then the value of *valuep is undefined and NON-NULL will 
-   be returned to indicate failure. (This will NOT be a valid pointer 
-   to a regnode.)
-   
-   If valuep is null then it is assumed that we are parsing normal text and a
-   new EXACT node is inserted into the program containing the resolved string,
-   and a pointer to the new node is returned.  But if the string is zero length
-   a NOTHING node is emitted instead.
  
-   On success RExC_parse is set to the char following the endbrace.
-   Parsing failures will generate a fatal error via vFAIL(...)
+   The function raises an error (via vFAIL), and doesn't return for various
+   syntax errors.  Otherwise it returns TRUE and sets <node_p> or <valuep> on
+   success; it returns FALSE otherwise.
+
+   If <valuep> is non-null, it means the caller can accept an input sequence
+   consisting of a just a single code point; <*valuep> is set to that value
+   if the input is such.
+
+   If <node_p> is non-null it signifies that the caller can accept any other
+   legal sequence (i.e., one that isn't just a single code point).  <*node_p>
+   is set as follows:
+    1) \N means not-a-NL: points to a newly created REG_ANY node;
+    2) \N{}:              points to a new NOTHING node;
+    3) otherwise:         points to a new EXACT node containing the resolved
+                          string.
+   Note that FALSE is returned for single code point sequences if <valuep> is
+   null.
   */
-STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 depth)
-{
+
      char * endbrace;    /* '}' following the name */
-    regnode *ret = NULL;
      char* p;
+    char *endchar;     /* Points to '.' or '}' ending cur char in the input
+                           stream */
+    bool has_multiple_chars; /* true if the input stream contains a sequence of
+                                more than one character */
  
      GET_RE_DEBUG_FLAGS_DECL;
   
-    PERL_ARGS_ASSERT_REG_NAMEDSEQ;
+    PERL_ARGS_ASSERT_GROK_BSLASH_N;
  
      GET_RE_DEBUG_FLAGS;
  
+    assert(cBOOL(node_p) ^ cBOOL(valuep));  /* Exactly one should be set */
+
      /* The [^\n] meaning of \N ignores spaces and comments under the /x
       * modifier.  The other meaning does not */
      p = (RExC_flags & RXf_PMf_EXTENDED)
         ? regwhite( pRExC_state, RExC_parse )
         : RExC_parse;
-   
+
      /* Disambiguate between \N meaning a named character versus \N meaning
       * [^\n].  The former is assumed when it can't be the latter. */
      if (*p != '{' || regcurly(p)) {
         RExC_parse = p;
-       if (valuep) {
+       if (! node_p) {
             /* no bare \N in a charclass */
-           vFAIL("\\N in a character class must be a named character: \\N{...}");
-       }
+            if (in_char_class) {
+                vFAIL("\\N in a character class must be a named character: \\N{...}");
+            }
+            return FALSE;
+        }
         nextchar(pRExC_state);
-       ret = reg_node(pRExC_state, REG_ANY);
+       *node_p = reg_node(pRExC_state, REG_ANY);
         *flagp |= HASWIDTH|SIMPLE;
         RExC_naughty++;
         RExC_parse--;
-        Set_Node_Length(ret, 1); /* MJD */
-       return ret;
+        Set_Node_Length(*node_p, 1); /* MJD */
+       return TRUE;
      }
  
-    /* Here, we have decided it should be a named sequence */
+    /* Here, we have decided it should be a named character or sequence */
  
      /* The test above made sure that the next real character is a '{', but
       * under the /x modifier, it could be separated by space (or a comment and
@@ -9523,44 +9713,48 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
      }
  
      if (endbrace == RExC_parse) {   /* empty: \N{} */
-       if (! valuep) {
-           RExC_parse = endbrace + 1;  
-           return reg_node(pRExC_state,NOTHING);
-       }
-
-       if (SIZE_ONLY) {
-           ckWARNreg(RExC_parse,
-                   "Ignoring zero length \\N{} in character class"
-           );
-           RExC_parse = endbrace + 1;  
+        bool ret = TRUE;
+       if (node_p) {
+           *node_p = reg_node(pRExC_state,NOTHING);
+       }
+        else if (in_char_class) {
+            if (SIZE_ONLY && in_char_class) {
+                ckWARNreg(RExC_parse,
+                        "Ignoring zero length \\N{} in character class"
+                );
+            }
+            ret = FALSE;
         }
-       *valuep = 0;
-       return (regnode *) &RExC_parse; /* Invalid regnode pointer */
+        else {
+            return FALSE;
+        }
+        nextchar(pRExC_state);
+        return ret;
      }
  
-    REQUIRE_UTF8;      /* named sequences imply Unicode semantics */
+    RExC_uni_semantics = 1; /* Unicode named chars imply Unicode semantics */
      RExC_parse += 2;   /* Skip past the 'U+' */
  
-    if (valuep) {   /* In a bracketed char class */
-       /* We only pay attention to the first char of 
-       multichar strings being returned. I kinda wonder
+    endchar = RExC_parse + strcspn(RExC_parse, ".}");
+
+    /* Code points are separated by dots.  If none, there is only one code
+     * point, and is terminated by the brace */
+    has_multiple_chars = (endchar < endbrace);
+
+    if (valuep && (! has_multiple_chars || in_char_class)) {
+       /* We only pay attention to the first char of
+        multichar strings being returned in char classes. I kinda wonder
         if this makes sense as it does change the behaviour
         from earlier versions, OTOH that behaviour was broken
         as well. XXX Solution is to recharacterize as
         [rest-of-class]|multi1|multi2... */
  
-       STRLEN length_of_hex;
-       I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+       STRLEN length_of_hex = (STRLEN)(endchar - RExC_parse);
+       I32 grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
             | PERL_SCAN_DISALLOW_PREFIX
             | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
-    
-       char * endchar = RExC_parse + strcspn(RExC_parse, ".}");
-       if (endchar < endbrace) {
-           ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
-       }
  
-       length_of_hex = (STRLEN)(endchar - RExC_parse);
-       *valuep = grok_hex(RExC_parse, &length_of_hex, &flags, NULL);
+       *valuep = grok_hex(RExC_parse, &length_of_hex, &grok_hex_flags, NULL);
  
         /* The tokenizer should have guaranteed validity, but it's possible to
          * bypass it by using single quoting, so check */
@@ -9572,16 +9766,26 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
                             ? UTF8SKIP(RExC_parse)
                             : 1;
             /* Guard against malformed utf8 */
-           if (RExC_parse >= endchar) RExC_parse = endchar;
+           if (RExC_parse >= endchar) {
+                RExC_parse = endchar;
+            }
             vFAIL("Invalid hexadecimal number in \\N{U+...}");
-       }    
+       }
  
-       RExC_parse = endbrace + 1;
-       if (endchar == endbrace) return NULL;
+        if (in_char_class && has_multiple_chars) {
+           ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
+        }
+        RExC_parse = endbrace + 1;
+    }
+    else if (! node_p || ! has_multiple_chars) {
  
-        ret = (regnode *) &RExC_parse; /* Invalid regnode pointer */
+        /* Here, the input is legal, but not according to the caller's
+         * options.  We fail without advancing the parse, so that the
+         * caller can try again */
+        RExC_parse = p;
+        return FALSE;
      }
-    else {     /* Not a char class */
+    else {
  
         /* What is done here is to convert this to a sub-pattern of the form
          * (?:\x{char1}\x{char2}...)
@@ -9594,16 +9798,11 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
  
         SV * substitute_parse = newSVpvn_flags("?:", 2, SVf_UTF8|SVs_TEMP);
         STRLEN len;
-       char *endchar;      /* Points to '.' or '}' ending cur char in the input
-                              stream */
         char *orig_end = RExC_end;
+        I32 flags;
  
         while (RExC_parse < endbrace) {
  
-           /* Code points are separated by dots.  If none, there is only one
-            * code point, and is terminated by the brace */
-           endchar = RExC_parse + strcspn(RExC_parse, ".}");
-
             /* Convert to notation the rest of the code understands */
             sv_catpv(substitute_parse, "\\x{");
             sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
@@ -9611,6 +9810,7 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
  
             /* Point to the beginning of the next character in the sequence. */
             RExC_parse = endchar + 1;
+           endchar = RExC_parse + strcspn(RExC_parse, ".}");
         }
         sv_catpv(substitute_parse, ")");
  
@@ -9625,16 +9825,17 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
         /* The values are Unicode, and therefore not subject to recoding */
         RExC_override_recoding = 1;
  
-       ret = reg(pRExC_state, 1, flagp, depth+1);
+       *node_p = reg(pRExC_state, 1, &flags, depth+1);
+       *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
  
         RExC_parse = endbrace;
         RExC_end = orig_end;
         RExC_override_recoding = 0;
  
-       nextchar(pRExC_state);
+        nextchar(pRExC_state);
      }
  
-    return ret;
+    return TRUE;
  }
  
  
@@ -9671,38 +9872,163 @@ S_reg_recode(pTHX_ const char value, SV **encp)
      return uv;
  }
  
+PERL_STATIC_INLINE U8
+S_compute_EXACTish(pTHX_ RExC_state_t *pRExC_state)
+{
+    U8 op;
  
-/*
- - regatom - the lowest level
-
-   Try to identify anything special at the start of the pattern. If there
-   is, then handle it as required. This may involve generating a single regop,
-   such as for an assertion; or it may involve recursing, such as to
-   handle a () structure.
+    PERL_ARGS_ASSERT_COMPUTE_EXACTISH;
  
-   If the string doesn't start with something special then we gobble up
-   as much literal text as we can.
+    if (! FOLD) {
+        return EXACT;
+    }
  
-   Once we have been able to handle whatever type of thing started the
-   sequence, we return.
+    op = get_regex_charset(RExC_flags);
+    if (op >= REGEX_ASCII_RESTRICTED_CHARSET) {
+        op--; /* /a is same as /u, and map /aa's offset to what /a's would have
+                 been, so there is no hole */
+    }
  
-   Note: we have to be careful with escapes, as they can be both literal
-   and special, and in the case of \10 and friends can either, depending
-   on context. Specifically there are two separate switches for handling
-   escape sequences, with the one for handling literal escapes requiring
-   a dummy entry for all of the special escapes that are actually handled
-   by the other.
-*/
+    return op + EXACTF;
+}
  
-STATIC regnode *
-S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
+PERL_STATIC_INLINE void
+S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32* flagp, STRLEN len, UV code_point)
  {
-    dVAR;
-    register regnode *ret = NULL;
-    I32 flags;
-    char *parse_start = RExC_parse;
-    U8 op;
-    GET_RE_DEBUG_FLAGS_DECL;
+    /* This knows the details about sizing an EXACTish node, setting flags for
+     * it (by setting <*flagp>, and potentially populating it with a single
+     * character.
+     *
+     * If <len> is non-zero, this function assumes that the node has already
+     * been populated, and just does the sizing.  In this case <code_point>
+     * should be the final code point that has already been placed into the
+     * node.  This value will be ignored except that under some circumstances
+     * <*flagp> is set based on it.
+     *
+     * If <len is zero, the function assumes that the node is to contain only
+     * the single character given by <code_point> and calculates what <len>
+     * should be.  In pass 1, it sizes the node appropriately.  In pass 2, it
+     * additionally will populate the node's STRING with <code_point>, if <len>
+     * is 0.  In both cases <*flagp> is appropriately set
+     *
+     * It knows that under FOLD, UTF characters and the Latin Sharp S must be
+     * folded (the latter only when the rules indicate it can match 'ss') */
+
+    bool len_passed_in = cBOOL(len != 0);
+    U8 character[UTF8_MAXBYTES_CASE+1];
+
+    PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
+
+    if (! len_passed_in) {
+        if (UTF) {
+            if (FOLD) {
+                to_uni_fold(NATIVE_TO_UNI(code_point), character, &len);
+            }
+            else {
+                uvchr_to_utf8( character, code_point);
+                len = UTF8SKIP(character);
+            }
+        }
+        else if (! FOLD
+                 || code_point != LATIN_SMALL_LETTER_SHARP_S
+                 || ASCII_FOLD_RESTRICTED
+                 || ! AT_LEAST_UNI_SEMANTICS)
+        {
+            *character = (U8) code_point;
+            len = 1;
+        }
+        else {
+            *character = 's';
+            *(character + 1) = 's';
+            len = 2;
+        }
+    }
+
+    if (SIZE_ONLY) {
+        RExC_size += STR_SZ(len);
+    }
+    else {
+        RExC_emit += STR_SZ(len);
+        STR_LEN(node) = len;
+        if (! len_passed_in) {
+            Copy((char *) character, STRING(node), len, char);
+        }
+    }
+
+    *flagp |= HASWIDTH;
+    if (len == 1 && UNI_IS_INVARIANT(code_point))
+        *flagp |= SIMPLE;
+}
+
+/*
+ - regatom - the lowest level
+
+   Try to identify anything special at the start of the pattern. If there
+   is, then handle it as required. This may involve generating a single regop,
+   such as for an assertion; or it may involve recursing, such as to
+   handle a () structure.
+
+   If the string doesn't start with something special then we gobble up
+   as much literal text as we can.
+
+   Once we have been able to handle whatever type of thing started the
+   sequence, we return.
+
+   Note: we have to be careful with escapes, as they can be both literal
+   and special, and in the case of \10 and friends, context determines which.
+
+   A summary of the code structure is:
+
+   switch (first_byte) {
+       cases for each special:
+           handle this special;
+           break;
+       case '\\':
+           switch (2nd byte) {
+               cases for each unambiguous special:
+                   handle this special;
+                   break;
+               cases for each ambigous special/literal:
+                   disambiguate;
+                   if (special)  handle here
+                   else goto defchar;
+               default: // unambiguously literal:
+                   goto defchar;
+           }
+       default:  // is a literal char
+           // FALL THROUGH
+       defchar:
+           create EXACTish node for literal;
+           while (more input and node isn't full) {
+               switch (input_byte) {
+                  cases for each special;
+                       make sure parse pointer is set so that the next call to
+                           regatom will see this special first
+                       goto loopdone; // EXACTish node terminated by prev. char
+                  default:
+                      append char to EXACTISH node;
+               }
+               get next input byte;
+           }
+        loopdone:
+   }
+   return the generated node;
+
+   Specifically there are two separate switches for handling
+   escape sequences, with the one for handling literal escapes requiring
+   a dummy entry for all of the special escapes that are actually handled
+   by the other.
+*/
+
+STATIC regnode *
+S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
+{
+    dVAR;
+    regnode *ret = NULL;
+    I32 flags;
+    char *parse_start = RExC_parse;
+    U8 op;
+    GET_RE_DEBUG_FLAGS_DECL;
      DEBUG_PARSE("atom");
      *flagp = WORST;            /* Tentatively. */
  
@@ -9746,13 +10072,12 @@ tryagain:
      case '[':
      {
         char * const oregcomp_parse = ++RExC_parse;
-        ret = regclass(pRExC_state,depth+1);
+        ret = regclass(pRExC_state, flagp,depth+1);
         if (*RExC_parse != ']') {
             RExC_parse = oregcomp_parse;
             vFAIL("Unmatched [");
         }
         nextchar(pRExC_state);
-       *flagp |= HASWIDTH|SIMPLE;
          Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
         break;
      }
@@ -9842,43 +10167,17 @@ tryagain:
             *flagp |= HASWIDTH;
             goto finish_meta_pat;
         case 'w':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = ALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = ALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = ALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = ALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = ALNUM + get_regex_charset(RExC_flags);
+            if (op > ALNUMA) {  /* /aa is same as /a */
+                op = ALNUMA;
              }
             ret = reg_node(pRExC_state, op);
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
         case 'W':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NALNUM + get_regex_charset(RExC_flags);
+            if (op > NALNUMA) { /* /aa is same as /a */
+                op = NALNUMA;
              }
             ret = reg_node(pRExC_state, op);
             *flagp |= HASWIDTH|SIMPLE;
@@ -9886,22 +10185,9 @@ tryagain:
         case 'b':
             RExC_seen_zerolen++;
             RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = BOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = BOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = BOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = BOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = BOUND + get_regex_charset(RExC_flags);
+            if (op > BOUNDA) {  /* /aa is same as /a */
+                op = BOUNDA;
              }
             ret = reg_node(pRExC_state, op);
             FLAGS(ret) = get_regex_charset(RExC_flags);
@@ -9910,103 +10196,45 @@ tryagain:
         case 'B':
             RExC_seen_zerolen++;
             RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NBOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NBOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NBOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NBOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NBOUND + get_regex_charset(RExC_flags);
+            if (op > NBOUNDA) { /* /aa is same as /a */
+                op = NBOUNDA;
              }
             ret = reg_node(pRExC_state, op);
             FLAGS(ret) = get_regex_charset(RExC_flags);
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 's':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = SPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = SPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = SPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = SPACE;
-                   break;
-               default:
-                   goto bad_charset;
+           op = SPACE + get_regex_charset(RExC_flags);
+            if (op > SPACEA) {  /* /aa is same as /a */
+                op = SPACEA;
              }
             ret = reg_node(pRExC_state, op);
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
         case 'S':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NSPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NSPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NSPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NSPACE;
-                   break;
-               default:
-                   goto bad_charset;
-            }
-           ret = reg_node(pRExC_state, op);
-           *flagp |= HASWIDTH|SIMPLE;
-           goto finish_meta_pat;
-       case 'd':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = DIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = DIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = DIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NSPACE + get_regex_charset(RExC_flags);
+            if (op > NSPACEA) { /* /aa is same as /a */
+                op = NSPACEA;
              }
             ret = reg_node(pRExC_state, op);
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
         case 'D':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NDIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NDIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = NDIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+            op = NDIGIT;
+            goto join_D_and_d;
+       case 'd':
+            op = DIGIT;
+        join_D_and_d:
+            {
+                U8 offset = get_regex_charset(RExC_flags);
+                if (offset == REGEX_UNICODE_CHARSET) {
+                    offset = REGEX_DEPENDS_CHARSET;
+                }
+                else if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                    offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                }
+                op += offset;
              }
             ret = reg_node(pRExC_state, op);
             *flagp |= HASWIDTH|SIMPLE;
@@ -10060,7 +10288,7 @@ tryagain:
                 }
                 RExC_parse--;
  
-                ret = regclass(pRExC_state,depth+1);
+                ret = regclass(pRExC_state, flagp,depth+1);
  
                 RExC_end = oldregxend;
                 RExC_parse--;
@@ -10068,16 +10296,24 @@ tryagain:
                 Set_Node_Offset(ret, parse_start + 2);
                 Set_Node_Cur_Length(ret);
                 nextchar(pRExC_state);
-               *flagp |= HASWIDTH|SIMPLE;
             }
             break;
          case 'N': 
-            /* Handle \N and \N{NAME} here and not below because it can be
-            multicharacter. join_exact() will join them up later on. 
-            Also this makes sure that things like /\N{BLAH}+/ and 
-            \N{BLAH} being multi char Just Happen. dmq*/
+            /* Handle \N and \N{NAME} with multiple code points here and not
+             * below because it can be multicharacter. join_exact() will join
+             * them up later on.  Also this makes sure that things like
+             * /\N{BLAH}+/ and \N{BLAH} being multi char Just Happen. dmq.
+             * The options to the grok function call causes it to fail if the
+             * sequence is just a single code point.  We then go treat it as
+             * just another character in the current EXACT node, and hence it
+             * gets uniform treatment with all the other characters.  The
+             * special treatment for quantifiers is not needed for such single
+             * character sequences */
              ++RExC_parse;
-            ret= reg_namedseq(pRExC_state, NULL, flagp, depth);
+            if (! grok_bslash_N(pRExC_state, &ret, NULL, flagp, depth, FALSE)) {
+                RExC_parse--;
+                goto defchar;
+            }
              break;
         case 'k':    /* Handle \k<NAME> and \k'NAME' */
         parse_named_seq:
@@ -10107,7 +10343,7 @@ tryagain:
                  ret = reganode(pRExC_state,
                                 ((! FOLD)
                                   ? NREF
-                                : (MORE_ASCII_RESTRICTED)
+                                : (ASCII_FOLD_RESTRICTED)
                                    ? NREFFA
                                     : (AT_LEAST_UNI_SEMANTICS)
                                       ? NREFFU
@@ -10157,6 +10393,7 @@ tryagain:
                          vFAIL("Reference to nonexistent or unclosed group");
                  }
                 if (!isg && num > 9 && num >= RExC_npar)
+                    /* Probably a character specified in octal, e.g. \35 */
                     goto defchar;
                 else {
                     char * const parse_start = RExC_parse - 1; /* MJD */
@@ -10177,7 +10414,7 @@ tryagain:
                     ret = reganode(pRExC_state,
                                    ((! FOLD)
                                      ? REF
-                                    : (MORE_ASCII_RESTRICTED)
+                                    : (ASCII_FOLD_RESTRICTED)
                                        ? REFFA
                                         : (AT_LEAST_UNI_SEMANTICS)
                                           ? REFFU
@@ -10221,29 +10458,30 @@ tryagain:
             RExC_parse++;
  
         defchar: {
-           register STRLEN len;
-           register UV ender;
-           register char *p;
+           STRLEN len = 0;
+           UV ender;
+           char *p;
             char *s;
+#define MAX_NODE_STRING_SIZE 127
+           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE];
+           char *s0;
+           U8 upper_parse = MAX_NODE_STRING_SIZE;
             STRLEN foldlen;
-           U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
              U8 node_type;
-
-           /* Is this a LATIN LOWER CASE SHARP S in an EXACTFU node?  If so,
-            * it is folded to 'ss' even if not utf8 */
-           bool is_exactfu_sharp_s;
+            bool next_is_quantifier;
+            char * oldp = NULL;
  
             ender = 0;
-            node_type = ((! FOLD) ? EXACT
-                       : (LOC)
-                         ? EXACTFL
-                         : (MORE_ASCII_RESTRICTED)
-                           ? EXACTFA
-                           : (AT_LEAST_UNI_SEMANTICS)
-                             ? EXACTFU
-                             : EXACTF);
+            node_type = compute_EXACTish(pRExC_state);
             ret = reg_node(pRExC_state, node_type);
-           s = STRING(ret);
+
+            /* In pass1, folded, we use a temporary buffer instead of the
+             * actual node, as the node doesn't exist yet */
+           s = (SIZE_ONLY && FOLD) ? foldbuf : STRING(ret);
+
+            s0 = s;
+
+       reparse:
  
             /* XXX The node can hold up to 255 bytes, yet this only goes to
               * 127.  I (khw) do not know why.  Keeping it somewhat less than
@@ -10263,11 +10501,11 @@ tryagain:
               * could back off to end with only a code point that isn't such a
               * non-final, but it is possible for there not to be any in the
               * entire node. */
-           for (len = 0, p = RExC_parse - 1;
-                len < 127 && p < RExC_end;
+           for (p = RExC_parse - 1;
+                len < upper_parse && p < RExC_end;
                  len++)
             {
-               char * const oldp = p;
+               oldp = p;
  
                 if (RExC_flags & RXf_PMf_EXTENDED)
                     p = regwhite( pRExC_state, p );
@@ -10303,7 +10541,6 @@ tryagain:
                     case 'g': case 'G':   /* generic-backref, pos assertion */
                     case 'h': case 'H':   /* HORIZWS */
                     case 'k': case 'K':   /* named backref, keep marker */
-                   case 'N':             /* named char sequence */
                     case 'p': case 'P':   /* Unicode property */
                               case 'R':   /* LNBREAK */
                     case 's': case 'S':   /* space class */
@@ -10321,6 +10558,22 @@ tryagain:
                         ender = '\n';
                         p++;
                         break;
+                   case 'N': /* Handle a single-code point named character. */
+                        /* The options cause it to fail if a multiple code
+                         * point sequence.  Handle those in the switch() above
+                         * */
+                        RExC_parse = p + 1;
+                        if (! grok_bslash_N(pRExC_state, NULL, &ender,
+                                            flagp, depth, FALSE))
+                        {
+                            RExC_parse = p = oldp;
+                            goto loopdone;
+                        }
+                        p = RExC_parse;
+                        if (ender > 0xff) {
+                            REQUIRE_UTF8;
+                        }
+                        break;
                     case 'r':
                         ender = '\r';
                         p++;
@@ -10371,38 +10624,39 @@ tryagain:
                             break;
                         }
                     case 'x':
-                       if (*++p == '{') {
-                           char* const e = strchr(p, '}');
+                       {
+                           STRLEN brace_len = len;
+                           UV result;
+                           const char* error_msg;
  
-                           if (!e) {
-                               RExC_parse = p + 1;
-                               vFAIL("Missing right brace on \\x{}");
+                           bool valid = grok_bslash_x(p,
+                                                      &result,
+                                                      &brace_len,
+                                                      &error_msg,
+                                                      1);
+                           p += brace_len;
+                           if (! valid) {
+                               RExC_parse = p; /* going to die anyway; point
+                                                  to exact spot of failure */
+                               vFAIL(error_msg);
                             }
                             else {
-                                I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                                    | PERL_SCAN_DISALLOW_PREFIX;
-                                STRLEN numlen = e - p - 1;
-                               ender = grok_hex(p + 1, &numlen, &flags, NULL);
-                               if (ender > 0xff)
-                                   REQUIRE_UTF8;
-                               p = e + 1;
+                               ender = result;
                             }
+                           if (PL_encoding && ender < 0x100) {
+                               goto recode_encoding;
+                           }
+                           if (ender > 0xff) {
+                               REQUIRE_UTF8;
+                           }
+                           break;
                         }
-                       else {
-                            I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                           STRLEN numlen = 2;
-                           ender = grok_hex(p, &numlen, &flags, NULL);
-                           p += numlen;
-                       }
-                       if (PL_encoding && ender < 0x100)
-                           goto recode_encoding;
-                       break;
                     case 'c':
                         p++;
                         ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
                         break;
                     case '0': case '1': case '2': case '3':case '4':
-                   case '5': case '6': case '7': case '8':case '9':
+                   case '5': case '6': case '7':
                         if (*p == '0' ||
                             (isDIGIT(p[1]) && atoi(p) >= RExC_npar))
                         {
@@ -10435,7 +10689,7 @@ tryagain:
                             FAIL("Trailing \\");
                         /* FALL THROUGH */
                     default:
-                       if (!SIZE_ONLY&& isALPHA(*p)) {
+                       if (!SIZE_ONLY&& isALNUMC(*p)) {
                             ckWARN2reg(p + 1, "Unrecognized escape \\%.1s passed through", p);
                         }
                         goto normal_default;
@@ -10465,150 +10719,270 @@ tryagain:
                     break;
                 } /* End of switch on the literal */
  
-                is_exactfu_sharp_s = (node_type == EXACTFU
-                                     && ender == LATIN_SMALL_LETTER_SHARP_S);
+               /* Here, have looked at the literal character and <ender>
+                * contains its ordinal, <p> points to the character after it
+                */
+
                 if ( RExC_flags & RXf_PMf_EXTENDED)
                     p = regwhite( pRExC_state, p );
-               if ((UTF && FOLD) || is_exactfu_sharp_s) {
-                   /* Prime the casefolded buffer.  Locale rules, which apply
-                    * only to code points < 256, aren't known until execution,
-                    * so for them, just output the original character using
-                     * utf8.  If we start to fold non-UTF patterns, be sure to
-                     * update join_exact() */
-                   if (LOC && ender < 256) {
-                       if (UNI_IS_INVARIANT(ender)) {
-                           *tmpbuf = (U8) ender;
-                           foldlen = 1;
-                       } else {
-                           *tmpbuf = UTF8_TWO_BYTE_HI(ender);
-                           *(tmpbuf + 1) = UTF8_TWO_BYTE_LO(ender);
-                           foldlen = 2;
-                       }
-                   }
-                   else if (isASCII(ender)) {  /* Note: Here can't also be LOC
-                                                */
-                       ender = toLOWER(ender);
-                       *tmpbuf = (U8) ender;
-                       foldlen = 1;
-                   }
-                   else if (! MORE_ASCII_RESTRICTED && ! LOC) {
  
-                       /* Locale and /aa require more selectivity about the
-                        * fold, so are handled below.  Otherwise, here, just
-                        * use the fold */
-                       ender = toFOLD_uni(ender, tmpbuf, &foldlen);
-                   }
-                   else {
-                       /* Under locale rules or /aa we are not to mix,
-                        * respectively, ords < 256 or ASCII with non-.  So
-                        * reject folds that mix them, using only the
-                        * non-folded code point.  So do the fold to a
-                        * temporary, and inspect each character in it. */
-                       U8 trialbuf[UTF8_MAXBYTES_CASE+1];
-                       U8* s = trialbuf;
-                       UV tmpender = toFOLD_uni(ender, trialbuf, &foldlen);
-                       U8* e = s + foldlen;
-                       bool fold_ok = TRUE;
-
-                       while (s < e) {
-                           if (isASCII(*s)
-                               || (LOC && (UTF8_IS_INVARIANT(*s)
-                                          || UTF8_IS_DOWNGRADEABLE_START(*s))))
-                           {
-                               fold_ok = FALSE;
-                               break;
-                           }
-                           s += UTF8SKIP(s);
-                       }
-                       if (fold_ok) {
-                           Copy(trialbuf, tmpbuf, foldlen, U8);
-                           ender = tmpender;
-                       }
-                       else {
-                           uvuni_to_utf8(tmpbuf, ender);
-                           foldlen = UNISKIP(ender);
-                       }
-                   }
-               }
-               if (p < RExC_end && ISMULT2(p)) { /* Back off on ?+*. */
-                   if (len)
-                       p = oldp;
-                   else if (UTF || is_exactfu_sharp_s) {
-                        if (FOLD) {
-                             /* Emit all the Unicode characters. */
-                             STRLEN numlen;
-                             for (foldbuf = tmpbuf;
-                                  foldlen;
-                                  foldlen -= numlen) {
-
-                                  /* tmpbuf has been constructed by us, so we
-                                   * know it is valid utf8 */
-                                  ender = valid_utf8_to_uvchr(foldbuf, &numlen);
-                                  if (numlen > 0) {
-                                       const STRLEN unilen = reguni(pRExC_state, ender, s);
-                                       s       += unilen;
-                                       len     += unilen;
-                                       /* In EBCDIC the numlen
-                                        * and unilen can differ. */
-                                       foldbuf += numlen;
-                                       if (numlen >= foldlen)
-                                            break;
-                                  }
-                                  else
-                                       break; /* "Can't happen." */
-                             }
-                        }
-                        else {
-                             const STRLEN unilen = reguni(pRExC_state, ender, s);
-                             if (unilen > 0) {
-                                  s   += unilen;
-                                  len += unilen;
-                             }
-                        }
-                   }
-                   else {
-                       len++;
-                       REGC((char)ender, s++);
-                   }
-                   break;
+                /* If the next thing is a quantifier, it applies to this
+                 * character only, which means that this character has to be in
+                 * its own node and can't just be appended to the string in an
+                 * existing node, so if there are already other characters in
+                 * the node, close the node with just them, and set up to do
+                 * this character again next time through, when it will be the
+                 * only thing in its new node */
+                if ((next_is_quantifier = (p < RExC_end && ISMULT2(p))) && len)
+               {
+                    p = oldp;
+                    goto loopdone;
+                }
+
+               if (FOLD) {
+                    if (UTF
+                            /* See comments for join_exact() as to why we fold
+                             * this non-UTF at compile time */
+                        || (node_type == EXACTFU
+                            && ender == LATIN_SMALL_LETTER_SHARP_S))
+                    {
+
+
+                        /* Prime the casefolded buffer.  Locale rules, which
+                         * apply only to code points < 256, aren't known until
+                         * execution, so for them, just output the original
+                         * character using utf8.  If we start to fold non-UTF
+                         * patterns, be sure to update join_exact() */
+                        if (LOC && ender < 256) {
+                            if (UNI_IS_INVARIANT(ender)) {
+                                *s = (U8) ender;
+                                foldlen = 1;
+                            } else {
+                                *s = UTF8_TWO_BYTE_HI(ender);
+                                *(s + 1) = UTF8_TWO_BYTE_LO(ender);
+                                foldlen = 2;
+                            }
+                        }
+                        else {
+                            ender = _to_uni_fold_flags(ender, (U8 *) s, &foldlen,
+                                    FOLD_FLAGS_FULL
+                                     | ((LOC) ?  FOLD_FLAGS_LOCALE
+                                              : (ASCII_FOLD_RESTRICTED)
+                                                ? FOLD_FLAGS_NOMIX_ASCII
+                                                : 0)
+                                );
+                        }
+                       s += foldlen;
+
+                       /* The loop increments <len> each time, as all but this
+                        * path (and the one just below for UTF) through it add
+                        * a single byte to the EXACTish node.  But this one
+                        * has changed len to be the correct final value, so
+                        * subtract one to cancel out the increment that
+                        * follows */
+                       len += foldlen - 1;
+                    }
+                    else {
+                        *(s++) = ender;
+                    }
                 }
-                if (UTF || is_exactfu_sharp_s) {
-                    if (FOLD) {
-                         /* Emit all the Unicode characters. */
-                         STRLEN numlen;
-                         for (foldbuf = tmpbuf;
-                              foldlen;
-                              foldlen -= numlen) {
-                              ender = valid_utf8_to_uvchr(foldbuf, &numlen);
-                              if (numlen > 0) {
-                                   const STRLEN unilen = reguni(pRExC_state, ender, s);
-                                   len     += unilen;
-                                   s       += unilen;
-                                   /* In EBCDIC the numlen
-                                    * and unilen can differ. */
-                                   foldbuf += numlen;
-                                   if (numlen >= foldlen)
-                                        break;
-                              }
-                              else
-                                   break;
-                         }
-                    }
-                    else {
-                         const STRLEN unilen = reguni(pRExC_state, ender, s);
-                         if (unilen > 0) {
-                              s   += unilen;
-                              len += unilen;
-                         }
-                    }
-                    len--;
+               else if (UTF) {
+                    const STRLEN unilen = reguni(pRExC_state, ender, s);
+                    if (unilen > 0) {
+                       s   += unilen;
+                       len += unilen;
+                    }
+
+                   /* See comment just above for - 1 */
+                   len--;
                 }
                 else {
                     REGC((char)ender, s++);
+                }
+
+               if (next_is_quantifier) {
+
+                    /* Here, the next input is a quantifier, and to get here,
+                     * the current character is the only one in the node.
+                     * Also, here <len> doesn't include the final byte for this
+                     * character */
+                    len++;
+                    goto loopdone;
                 }
-           }
+
+           } /* End of loop through literal characters */
+
+            /* Here we have either exhausted the input or ran out of room in
+             * the node.  (If we encountered a character that can't be in the
+             * node, transfer is made directly to <loopdone>, and so we
+             * wouldn't have fallen off the end of the loop.)  In the latter
+             * case, we artificially have to split the node into two, because
+             * we just don't have enough space to hold everything.  This
+             * creates a problem if the final character participates in a
+             * multi-character fold in the non-final position, as a match that
+             * should have occurred won't, due to the way nodes are matched,
+             * and our artificial boundary.  So back off until we find a non-
+             * problematic character -- one that isn't at the beginning or
+             * middle of such a fold.  (Either it doesn't participate in any
+             * folds, or appears only in the final position of all the folds it
+             * does participate in.)  A better solution with far fewer false
+             * positives, and that would fill the nodes more completely, would
+             * be to actually have available all the multi-character folds to
+             * test against, and to back-off only far enough to be sure that
+             * this node isn't ending with a partial one.  <upper_parse> is set
+             * further below (if we need to reparse the node) to include just
+             * up through that final non-problematic character that this code
+             * identifies, so when it is set to less than the full node, we can
+             * skip the rest of this */
+            if (FOLD && p < RExC_end && upper_parse == MAX_NODE_STRING_SIZE) {
+
+                const STRLEN full_len = len;
+
+               assert(len >= MAX_NODE_STRING_SIZE);
+
+                /* Here, <s> points to the final byte of the final character.
+                 * Look backwards through the string until find a non-
+                 * problematic character */
+
+               if (! UTF) {
+
+                    /* These two have no multi-char folds to non-UTF characters
+                     */
+                    if (ASCII_FOLD_RESTRICTED || LOC) {
+                        goto loopdone;
+                    }
+
+                    while (--s >= s0 && IS_NON_FINAL_FOLD(*s)) { }
+                    len = s - s0 + 1;
+               }
+                else {
+                    if (!  PL_NonL1NonFinalFold) {
+                        PL_NonL1NonFinalFold = _new_invlist_C_array(
+                                        NonL1_Perl_Non_Final_Folds_invlist);
+                    }
+
+                    /* Point to the first byte of the final character */
+                    s = (char *) utf8_hop((U8 *) s, -1);
+
+                    while (s >= s0) {   /* Search backwards until find
+                                           non-problematic char */
+                        if (UTF8_IS_INVARIANT(*s)) {
+
+                            /* There are no ascii characters that participate
+                             * in multi-char folds under /aa.  In EBCDIC, the
+                             * non-ascii invariants are all control characters,
+                             * so don't ever participate in any folds. */
+                            if (ASCII_FOLD_RESTRICTED
+                                || ! IS_NON_FINAL_FOLD(*s))
+                            {
+                                break;
+                            }
+                        }
+                        else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+
+                            /* No Latin1 characters participate in multi-char
+                             * folds under /l */
+                            if (LOC
+                                || ! IS_NON_FINAL_FOLD(TWO_BYTE_UTF8_TO_UNI(
+                                                                *s, *(s+1))))
+                            {
+                                break;
+                            }
+                        }
+                        else if (! _invlist_contains_cp(
+                                        PL_NonL1NonFinalFold,
+                                        valid_utf8_to_uvchr((U8 *) s, NULL)))
+                        {
+                            break;
+                        }
+
+                        /* Here, the current character is problematic in that
+                         * it does occur in the non-final position of some
+                         * fold, so try the character before it, but have to
+                         * special case the very first byte in the string, so
+                         * we don't read outside the string */
+                        s = (s == s0) ? s -1 : (char *) utf8_hop((U8 *) s, -1);
+                    } /* End of loop backwards through the string */
+
+                    /* If there were only problematic characters in the string,
+                     * <s> will point to before s0, in which case the length
+                     * should be 0, otherwise include the length of the
+                     * non-problematic character just found */
+                    len = (s < s0) ? 0 : s - s0 + UTF8SKIP(s);
+               }
+
+                /* Here, have found the final character, if any, that is
+                 * non-problematic as far as ending the node without splitting
+                 * it across a potential multi-char fold.  <len> contains the
+                 * number of bytes in the node up-to and including that
+                 * character, or is 0 if there is no such character, meaning
+                 * the whole node contains only problematic characters.  In
+                 * this case, give up and just take the node as-is.  We can't
+                 * do any better */
+                if (len == 0) {
+                    len = full_len;
+                } else {
+
+                    /* Here, the node does contain some characters that aren't
+                     * problematic.  If one such is the final character in the
+                     * node, we are done */
+                    if (len == full_len) {
+                        goto loopdone;
+                    }
+                    else if (len + ((UTF) ? UTF8SKIP(s) : 1) == full_len) {
+
+                        /* If the final character is problematic, but the
+                         * penultimate is not, back-off that last character to
+                         * later start a new node with it */
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    /* Here, the final non-problematic character is earlier
+                     * in the input than the penultimate character.  What we do
+                     * is reparse from the beginning, going up only as far as
+                     * this final ok one, thus guaranteeing that the node ends
+                     * in an acceptable character.  The reason we reparse is
+                     * that we know how far in the character is, but we don't
+                     * know how to correlate its position with the input parse.
+                     * An alternate implementation would be to build that
+                     * correlation as we go along during the original parse,
+                     * but that would entail extra work for every node, whereas
+                     * this code gets executed only when the string is too
+                     * large for the node, and the final two characters are
+                     * problematic, an infrequent occurrence.  Yet another
+                     * possible strategy would be to save the tail of the
+                     * string, and the next time regatom is called, initialize
+                     * with that.  The problem with this is that unless you
+                     * back off one more character, you won't be guaranteed
+                     * regatom will get called again, unless regbranch,
+                     * regpiece ... are also changed.  If you do back off that
+                     * extra character, so that there is input guaranteed to
+                     * force calling regatom, you can't handle the case where
+                     * just the first character in the node is acceptable.  I
+                     * (khw) decided to try this method which doesn't have that
+                     * pitfall; if performance issues are found, we can do a
+                     * combination of the current approach plus that one */
+                    upper_parse = len;
+                    len = 0;
+                    s = s0;
+                    goto reparse;
+                }
+           }   /* End of verifying node ends with an appropriate char */
+
         loopdone:   /* Jumped to when encounters something that shouldn't be in
                        the node */
+
+            /* I (khw) don't know if you can get here with zero length, but the
+             * old code handled this situation by creating a zero-length EXACT
+             * node.  Might as well be NOTHING instead */
+            if (len == 0) {
+                OP(ret) = NOTHING;
+            }
+            else{
+                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender);
+            }
+
             RExC_parse = p - 1;
              Set_Node_Cur_Length(ret); /* MJD */
             nextchar(pRExC_state);
@@ -10618,27 +10992,12 @@ tryagain:
                 if (iv < 0)
                     vFAIL("Internal disaster");
             }
-           if (len > 0)
-               *flagp |= HASWIDTH;
-           if (len == 1 && UNI_IS_INVARIANT(ender))
-               *flagp |= SIMPLE;
  
-           if (SIZE_ONLY)
-               RExC_size += STR_SZ(len);
-           else {
-               STR_LEN(ret) = len;
-               RExC_emit += STR_SZ(len);
-            }
-       }
+       } /* End of label 'defchar:' */
         break;
-    }
+    } /* End of giant switch on input character */
  
      return(ret);
-
-/* Jumped to when an unrecognized character set is encountered */
-bad_charset:
-    Perl_croak(aTHX_ "panic: Unknown regex character set encoding: %u", get_regex_charset(RExC_flags));
-    return(NULL);
  }
  
  STATIC char *
@@ -10713,7 +11072,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
                     switch (skip) {
                     case 4:
                         if (memEQ(posixcc, "word", 4)) /* this is not POSIX, this is the Perl \w */
-                           namedclass = complement ? ANYOF_NALNUM : ANYOF_ALNUM;
+                           namedclass = ANYOF_ALNUM;
                         break;
                     case 5:
                         /* Names all of length 5.  */
@@ -10723,57 +11082,63 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
                         switch (posixcc[4]) {
                         case 'a':
                             if (memEQ(posixcc, "alph", 4)) /* alpha */
-                               namedclass = complement ? ANYOF_NALPHA : ANYOF_ALPHA;
+                               namedclass = ANYOF_ALPHA;
                             break;
                         case 'e':
                             if (memEQ(posixcc, "spac", 4)) /* space */
-                               namedclass = complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
+                               namedclass = ANYOF_PSXSPC;
                             break;
                         case 'h':
                             if (memEQ(posixcc, "grap", 4)) /* graph */
-                               namedclass = complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
+                               namedclass = ANYOF_GRAPH;
                             break;
                         case 'i':
                             if (memEQ(posixcc, "asci", 4)) /* ascii */
-                               namedclass = complement ? ANYOF_NASCII : ANYOF_ASCII;
+                               namedclass = ANYOF_ASCII;
                             break;
                         case 'k':
                             if (memEQ(posixcc, "blan", 4)) /* blank */
-                               namedclass = complement ? ANYOF_NBLANK : ANYOF_BLANK;
+                               namedclass = ANYOF_BLANK;
                             break;
                         case 'l':
                             if (memEQ(posixcc, "cntr", 4)) /* cntrl */
-                               namedclass = complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
+                               namedclass = ANYOF_CNTRL;
                             break;
                         case 'm':
                             if (memEQ(posixcc, "alnu", 4)) /* alnum */
-                               namedclass = complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
+                               namedclass = ANYOF_ALNUMC;
                             break;
                         case 'r':
                             if (memEQ(posixcc, "lowe", 4)) /* lower */
-                               namedclass = complement ? ANYOF_NLOWER : ANYOF_LOWER;
+                               namedclass = ANYOF_LOWER;
                             else if (memEQ(posixcc, "uppe", 4)) /* upper */
-                               namedclass = complement ? ANYOF_NUPPER : ANYOF_UPPER;
+                               namedclass = ANYOF_UPPER;
                             break;
                         case 't':
                             if (memEQ(posixcc, "digi", 4)) /* digit */
-                               namedclass = complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
+                               namedclass = ANYOF_DIGIT;
                             else if (memEQ(posixcc, "prin", 4)) /* print */
-                               namedclass = complement ? ANYOF_NPRINT : ANYOF_PRINT;
+                               namedclass = ANYOF_PRINT;
                             else if (memEQ(posixcc, "punc", 4)) /* punct */
-                               namedclass = complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
+                               namedclass = ANYOF_PUNCT;
                             break;
                         }
                         break;
                     case 6:
                         if (memEQ(posixcc, "xdigit", 6))
-                           namedclass = complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
+                           namedclass = ANYOF_XDIGIT;
                         break;
                     }
  
                     if (namedclass == OOB_NAMEDCLASS)
                         Simple_vFAIL3("POSIX class [:%.*s:] unknown",
                                       t - s - 1, s + 1);
+
+                    /* The #defines are structured so each complement is +1 to
+                     * the normal one */
+                    if (complement) {
+                        namedclass++;
+                    }
                     assert (posixcc[skip] == ':');
                     assert (posixcc[skip+1] == ']');
                 } else if (!SIZE_ONLY) {
@@ -10904,7 +11269,7 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
   *                determined at run-time
   * run_time_list  is a SV* that contains text names of properties that are to
   *                be computed at run time.  This concatenates <Xpropertyname>
- *                to it, apppropriately
+ *                to it, appropriately
   * This is essentially DO_POSIX, but we know only the Latin1 values at compile
   * time */
  #define DO_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,      \
@@ -10930,16 +11295,18 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
      }
  
  /* Like DO_POSIX_LATIN1_ONLY_KNOWN, but for the complement.  A combination of
- * this and DO_N_POSIX */
+ * this and DO_N_POSIX.  Sets <matches_above_unicode> only if it can; unchanged
+ * otherwise */
  #define DO_N_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,    \
-                              l1_sourcelist, Xpropertyname, run_time_list) \
+       l1_sourcelist, Xpropertyname, run_time_list, matches_above_unicode) \
      if (AT_LEAST_ASCII_RESTRICTED) {                                       \
          _invlist_union_complement_2nd(destlist, sourcelist, &destlist);    \
      }                                                                      \
      else {                                                                 \
          Perl_sv_catpvf(aTHX_ run_time_list, "!utf8::%s\n", Xpropertyname); \
+        matches_above_unicode = TRUE;                                      \
         if (LOC) {                                                         \
-           ANYOF_CLASS_SET(node, namedclass);                             \
+            ANYOF_CLASS_SET(node, namedclass);                            \
         }                                                                  \
         else {                                                             \
              SV* scratch_list = NULL;                                       \
@@ -10957,171 +11324,6 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
         }                                                                  \
      }
  
-STATIC U8
-S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-
-    /* Handle the setting of folds in the bitmap for non-locale ANYOF nodes.
-     * Locale folding is done at run-time, so this function should not be
-     * called for nodes that are for locales.
-     *
-     * This function sets the bit corresponding to the fold of the input
-     * 'value', if not already set.  The fold of 'f' is 'F', and the fold of
-     * 'F' is 'f'.
-     *
-     * It also knows about the characters that are in the bitmap that have
-     * folds that are matchable only outside it, and sets the appropriate lists
-     * and flags.
-     *
-     * It returns the number of bits that actually changed from 0 to 1 */
-
-    U8 stored = 0;
-    U8 fold;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD;
-
-    fold = (AT_LEAST_UNI_SEMANTICS) ? PL_fold_latin1[value]
-                                    : PL_fold[value];
-
-    /* It assumes the bit for 'value' has already been set */
-    if (fold != value && ! ANYOF_BITMAP_TEST(node, fold)) {
-        ANYOF_BITMAP_SET(node, fold);
-        stored++;
-    }
-    if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value) && (! isASCII(value) || ! MORE_ASCII_RESTRICTED)) {
-       /* Certain Latin1 characters have matches outside the bitmap.  To get
-        * here, 'value' is one of those characters.   None of these matches is
-        * valid for ASCII characters under /aa, which have been excluded by
-        * the 'if' above.  The matches fall into three categories:
-        * 1) They are singly folded-to or -from an above 255 character, as
-        *    LATIN SMALL LETTER Y WITH DIAERESIS and LATIN CAPITAL LETTER Y
-        *    WITH DIAERESIS;
-        * 2) They are part of a multi-char fold with another character in the
-        *    bitmap, only LATIN SMALL LETTER SHARP S => "ss" fits that bill;
-        * 3) They are part of a multi-char fold with a character not in the
-        *    bitmap, such as various ligatures.
-        * We aren't dealing fully with multi-char folds, except we do deal
-        * with the pattern containing a character that has a multi-char fold
-        * (not so much the inverse).
-        * For types 1) and 3), the matches only happen when the target string
-        * is utf8; that's not true for 2), and we set a flag for it.
-        *
-        * The code below adds to the passed in inversion list the single fold
-        * closures for 'value'.  The values are hard-coded here so that an
-        * innocent-looking character class, like /[ks]/i won't have to go out
-        * to disk to find the possible matches.  XXX It would be better to
-        * generate these via regen, in case a new version of the Unicode
-        * standard adds new mappings, though that is not really likely. */
-       switch (value) {
-           case 'k':
-           case 'K':
-               /* KELVIN SIGN */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212A);
-               break;
-           case 's':
-           case 'S':
-               /* LATIN SMALL LETTER LONG S */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x017F);
-               break;
-           case MICRO_SIGN:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                GREEK_SMALL_LETTER_MU);
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                GREEK_CAPITAL_LETTER_MU);
-               break;
-           case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
-           case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
-               /* ANGSTROM SIGN */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212B);
-               if (DEPENDS_SEMANTICS) {    /* See DEPENDS comment below */
-                   *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                    PL_fold_latin1[value]);
-               }
-               break;
-           case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                       LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
-               break;
-           case LATIN_SMALL_LETTER_SHARP_S:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                       LATIN_CAPITAL_LETTER_SHARP_S);
-
-               /* Under /a, /d, and /u, this can match the two chars "ss" */
-               if (! MORE_ASCII_RESTRICTED) {
-                   add_alternate(alternate_ptr, (U8 *) "ss", 2);
-
-                   /* And under /u or /a, it can match even if the target is
-                    * not utf8 */
-                   if (AT_LEAST_UNI_SEMANTICS) {
-                       ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
-                   }
-               }
-               break;
-           case 'F': case 'f':
-           case 'I': case 'i':
-           case 'L': case 'l':
-           case 'T': case 't':
-           case 'A': case 'a':
-           case 'H': case 'h':
-           case 'J': case 'j':
-           case 'N': case 'n':
-           case 'W': case 'w':
-           case 'Y': case 'y':
-                /* These all are targets of multi-character folds from code
-                 * points that require UTF8 to express, so they can't match
-                 * unless the target string is in UTF-8, so no action here is
-                 * necessary, as regexec.c properly handles the general case
-                 * for UTF-8 matching */
-               break;
-           default:
-               /* Use deprecated warning to increase the chances of this
-                * being output */
-               ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%x; please use the perlbug utility to report;", value);
-               break;
-       }
-    }
-    else if (DEPENDS_SEMANTICS
-           && ! isASCII(value)
-           && PL_fold_latin1[value] != value)
-    {
-          /* Under DEPENDS rules, non-ASCII Latin1 characters match their
-           * folds only when the target string is in UTF-8.  We add the fold
-           * here to the list of things to match outside the bitmap, which
-           * won't be looked at unless it is UTF8 (or else if something else
-           * says to look even if not utf8, but those things better not happen
-           * under DEPENDS semantics. */
-       *invlist_ptr = add_cp_to_invlist(*invlist_ptr, PL_fold_latin1[value]);
-    }
-
-    return stored;
-}
-
-
-PERL_STATIC_INLINE U8
-S_set_regclass_bit(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-    /* This inline function sets a bit in the bitmap if not already set, and if
-     * appropriate, its fold, returning the number of bits that actually
-     * changed from 0 to 1 */
-
-    U8 stored;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT;
-
-    if (ANYOF_BITMAP_TEST(node, value)) {   /* Already set */
-       return 0;
-    }
-
-    ANYOF_BITMAP_SET(node, value);
-    stored = 1;
-
-    if (FOLD && ! LOC) {       /* Locale folds aren't known until runtime */
-       stored += set_regclass_bit_fold(pRExC_state, node, value, invlist_ptr, alternate_ptr);
-    }
-
-    return stored;
-}
-
  STATIC void
  S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
  {
@@ -11140,6 +11342,15 @@ S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
      return;
  }
  
+/* The names of properties whose definitions are not known at compile time are
+ * stored in this SV, after a constant heading.  So if the length has been
+ * changed since initialization, then there is a run-time definition. */
+#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
+
+/* This converts the named class defined in regcomp.h to its equivalent class
+ * number defined in handy.h. */
+#define namedclass_to_classnum(class)  ((class) / 2)
+
  /*
     parse a class specification and produce either an ANYOF node that
     matches the pattern or perhaps will be optimized into an EXACTish node
@@ -11148,16 +11359,16 @@ S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
     above 255, a range list is used */
  
  STATIC regnode *
-S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
+S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
      dVAR;
-    register UV nextvalue;
-    register IV prevvalue = OOB_UNICODE;
-    register IV range = 0;
-    UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
-    register regnode *ret;
+    UV nextvalue;
+    UV prevvalue = OOB_UNICODE;
+    IV range = 0;
+    UV value = 0;
+    regnode *ret;
      STRLEN numlen;
-    IV namedclass;
+    IV namedclass = OOB_NAMEDCLASS;
      char *rangebegin = NULL;
      bool need_class = 0;
      bool allow_full_fold = TRUE;   /* Assume wants multi-char folding */
@@ -11165,6 +11376,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
                                       than just initialized.  */
      SV* properties = NULL;    /* Code points that match \p{} \P{} */
+    SV* posixes = NULL;     /* Code points that match classes like, [:word:],
+                               extended beyond the Latin1 range */
      UV element_count = 0;   /* Number of distinct elements in the class.
                                Optimizations may be possible if this is tiny */
      UV n;
@@ -11179,37 +11392,34 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
  
      /* Set if a component of this character class is user-defined; just passed
       * on to the engine */
-    UV has_user_defined_property = 0;
-
-    /* code points this node matches that can't be stored in the bitmap */
-    SV* nonbitmap = NULL;
-
-    /* The items that are to match that aren't stored in the bitmap, but are a
-     * result of things that are stored there.  This is the fold closure of
-     * such a character, either because it has DEPENDS semantics and shouldn't
-     * be matched unless the target string is utf8, or is a code point that is
-     * too large for the bit map, as for example, the fold of the MICRO SIGN is
-     * above 255.  This all is solely for performance reasons.  By having this
-     * code know the outside-the-bitmap folds that the bitmapped characters are
-     * involved with, we don't have to go out to disk to find the list of
-     * matches, unless the character class includes code points that aren't
-     * storable in the bit map.  That means that a character class with an 's'
-     * in it, for example, doesn't need to go out to disk to find everything
-     * that matches.  A 2nd list is used so that the 'nonbitmap' list is kept
-     * empty unless there is something whose fold we don't know about, and will
-     * have to go out to the disk to find. */
-    SV* l1_fold_invlist = NULL;
+    bool has_user_defined_property = FALSE;
+
+    /* inversion list of code points this node matches only when the target
+     * string is in UTF-8.  (Because is under /d) */
+    SV* depends_list = NULL;
+
+    /* inversion list of code points this node matches.  For much of the
+     * function, it includes only those that match regardless of the utf8ness
+     * of the target string */
+    SV* cp_list = NULL;
  
      /* List of multi-character folds that are matched by this node */
      AV* unicode_alternate  = NULL;
  #ifdef EBCDIC
+    /* In a range, counts how many 0-2 of the ends of it came from literals,
+     * not escapes.  Thus we can tell if 'A' was input vs \x{C1} */
      UV literal_endpoint = 0;
  #endif
-    UV stored = 0;  /* how many chars stored in the bitmap */
+    bool invert = FALSE;    /* Is this class to be complemented */
+
+    /* Is there any thing like \W or [:^digit:] that matches above the legal
+     * Unicode range? */
+    bool runtime_posix_matches_above_Unicode = FALSE;
  
      regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
          case we need to change the emitted regop to an EXACT. */
      const char * orig_parse = RExC_parse;
+    const I32 orig_size = RExC_size;
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_REGCLASS;
@@ -11230,8 +11440,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      if (UCHARAT(RExC_parse) == '^') {  /* Complement of range. */
         RExC_naughty++;
         RExC_parse++;
-       if (!SIZE_ONLY)
-           ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+        invert = TRUE;
  
          /* We have decided to not allow multi-char folds in inverted character
          * classes, due to the confusion that can happen, especially with
@@ -11253,7 +11462,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
         if (LOC) {
             ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
         }
-       ANYOF_BITMAP_ZERO(ret);
         listsv = newSVpvs("# comment\n");
         initial_listsv_len = SvCUR(listsv);
      }
@@ -11322,11 +11530,11 @@ parseit:
                      if this makes sense as it does change the behaviour
                      from earlier versions, OTOH that behaviour was broken
                      as well. */
-                    UV v; /* value is register so we cant & it /grrr */
-                    if (reg_namedseq(pRExC_state, &v, NULL, depth)) {
+                    if (! grok_bslash_N(pRExC_state, NULL, &value, flagp, depth,
+                                      TRUE /* => charclass */))
+                    {
                          goto parseit;
                      }
-                    value= v; 
                  }
                  break;
             case 'p':
@@ -11353,9 +11561,9 @@ parseit:
                     n = 1;
                 }
                 if (!SIZE_ONLY) {
-                    SV** invlistsvp;
                      SV* invlist;
                      char* name;
+
                     if (UCHARAT(RExC_parse) == '^') {
                          RExC_parse++;
                          n--;
@@ -11394,10 +11602,7 @@ parseit:
                      if (   ! swash
                          || ! SvROK(swash)
                          || ! SvTYPE(SvRV(swash)) == SVt_PVHV
-                        || ! (invlistsvp =
-                               hv_fetchs(MUTABLE_HV(SvRV(swash)),
-                                "INVLIST", FALSE))
-                        || ! (invlist = *invlistsvp))
+                        || ! (invlist = _get_swash_invlist(swash)))
                     {
                          if (swash) {
                              SvREFCNT_dec(swash);
@@ -11410,11 +11615,15 @@ parseit:
                          Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
                                          (value == 'p' ? '+' : '!'),
                                          name);
-                        has_user_defined_property = 1;
+                        has_user_defined_property = TRUE;
  
                          /* We don't know yet, so have to assume that the
                           * property could match something in the Latin1 range,
-                         * hence something that isn't utf8 */
+                         * hence something that isn't utf8.  Note that this
+                         * would cause things in <depends_list> to match
+                         * inappropriately, except that any \p{}, including
+                         * this one forces Unicode semantics, which means there
+                         * is <no depends_list> */
                          ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
                      }
                      else {
@@ -11422,17 +11631,14 @@ parseit:
                          /* Here, did get the swash and its inversion list.  If
                           * the swash is from a user-defined property, then this
                           * whole character class should be regarded as such */
-                        SV** user_defined_svp =
-                                            hv_fetchs(MUTABLE_HV(SvRV(swash)),
-                                                        "USER_DEFINED", FALSE);
-                        if (user_defined_svp) {
-                            has_user_defined_property
-                                                    |= SvUV(*user_defined_svp);
-                        }
+                        has_user_defined_property =
+                                                _is_swash_user_defined(swash);
  
                          /* Invert if asking for the complement */
                          if (value == 'P') {
-                           _invlist_union_complement_2nd(properties, invlist, &properties);
+                           _invlist_union_complement_2nd(properties,
+                                                          invlist,
+                                                          &properties);
  
                              /* The swash can't be used as-is, because we've
                              * inverted things; delay removing it to here after
@@ -11479,22 +11685,18 @@ parseit:
                 }
                 break;
             case 'x':
-               if (*RExC_parse == '{') {
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
-                        | PERL_SCAN_DISALLOW_PREFIX;
-                   char * const e = strchr(RExC_parse++, '}');
-                    if (!e)
-                        vFAIL("Missing right brace on \\x{}");
-
-                   numlen = e - RExC_parse;
-                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
-                   RExC_parse = e + 1;
-               }
-               else {
-                    I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                   numlen = 2;
-                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
+               RExC_parse--;   /* function expects to be pointed at the 'x' */
+               {
+                   const char* error_msg;
+                   bool valid = grok_bslash_x(RExC_parse,
+                                              &value,
+                                              &numlen,
+                                              &error_msg,
+                                              1);
                     RExC_parse += numlen;
+                   if (! valid) {
+                       vFAIL(error_msg);
+                   }
                 }
                 if (PL_encoding && value < 0x100)
                     goto recode_encoding;
@@ -11538,15 +11740,20 @@ parseit:
             literal_endpoint++;
  #endif
  
-       if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
-
-           /* What matches in a locale is not known until runtime, so need to
-            * (one time per class) allocate extra space to pass to regexec.
-            * The space will contain a bit for each named class that is to be
-            * matched against.  This isn't needed for \p{} and pseudo-classes,
-            * as they are not affected by locale, and hence are dealt with
-            * separately */
-           if (LOC && namedclass < ANYOF_MAX && ! need_class) {
+            /* What matches in a locale is not known until runtime.  This
+             * includes what the Posix classes (like \w, [:space:]) match.
+             * Room must be reserved (one time per class) to store such
+             * classes, either if Perl is compiled so that locale nodes always
+             * should have this space, or if there is such class info to be
+             * stored.  The space will contain a bit for each named class that
+             * is to be matched against.  This isn't needed for \p{} and
+             * pseudo-classes, as they are not affected by locale, and hence
+             * are dealt with separately */
+           if (LOC
+                && ! need_class
+                && (ANYOF_LOCALE == ANYOF_CLASS
+                    || (namedclass > OOB_NAMEDCLASS && namedclass < ANYOF_MAX)))
+            {
                 need_class = 1;
                 if (SIZE_ONLY) {
                     RExC_size += ANYOF_CLASS_SKIP - ANYOF_SKIP;
@@ -11558,6 +11765,8 @@ parseit:
                 ANYOF_FLAGS(ret) |= ANYOF_CLASS;
             }
  
+       if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
+
             /* a bad range like a-\d, a-[:digit:].  The '-' is taken as a
              * literal, as is the character that began the false range, i.e.
              * the 'a' in the examples */
@@ -11569,51 +11778,41 @@ parseit:
                     ckWARN4reg(RExC_parse,
                                "False [] range \"%*.*s\"",
                                w, w, rangebegin);
-
-                   stored +=
-                         set_regclass_bit(pRExC_state, ret, '-', &l1_fold_invlist, &unicode_alternate);
-                   if (prevvalue < 256) {
-                       stored +=
-                         set_regclass_bit(pRExC_state, ret, (U8) prevvalue, &l1_fold_invlist, &unicode_alternate);
-                   }
-                   else {
-                       nonbitmap = add_cp_to_invlist(nonbitmap, prevvalue);
-                   }
+                    cp_list = add_cp_to_invlist(cp_list, '-');
+                    cp_list = add_cp_to_invlist(cp_list, prevvalue);
                 }
  
                 range = 0; /* this was not a true range */
+                element_count += 2; /* So counts for three values */
             }
  
-           if (!SIZE_ONLY) {
-
-               /* Possible truncation here but in some 64-bit environments
-                * the compiler gets heartburn about switch on 64-bit values.
-                * A similar issue a little earlier when switching on value.
-                * --jhi */
+           if (! SIZE_ONLY) {
                 switch ((I32)namedclass) {
  
                 case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
                     break;
                 case ANYOF_NALNUMC:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_ALPHA:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
                     break;
                 case ANYOF_NALPHA:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_ASCII:
                     if (LOC) {
                         ANYOF_CLASS_SET(ret, namedclass);
                     }
                      else {
-                        _invlist_union(properties, PL_ASCII, &properties);
+                        _invlist_union(posixes, PL_ASCII, &posixes);
                      }
                     break;
                 case ANYOF_NASCII:
@@ -11621,59 +11820,61 @@ parseit:
                         ANYOF_CLASS_SET(ret, namedclass);
                     }
                      else {
-                        _invlist_union_complement_2nd(properties,
-                                                    PL_ASCII, &properties);
+                        _invlist_union_complement_2nd(posixes,
+                                                    PL_ASCII, &posixes);
                          if (DEPENDS_SEMANTICS) {
                              ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
                          }
                      }
                     break;
                 case ANYOF_BLANK:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixBlank, PL_XPosixBlank);
                     break;
                 case ANYOF_NBLANK:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixBlank, PL_XPosixBlank);
                     break;
                 case ANYOF_CNTRL:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixCntrl, PL_XPosixCntrl);
                     break;
                 case ANYOF_NCNTRL:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixCntrl, PL_XPosixCntrl);
                     break;
                 case ANYOF_DIGIT:
                     /* There are no digits in the Latin1 range outside of
                      * ASCII, so call the macro that doesn't have to resolve
                      * them */
-                   DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, posixes,
                          PL_PosixDigit, "XPosixDigit", listsv);
                     break;
                 case ANYOF_NDIGIT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_GRAPH:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
                     break;
                 case ANYOF_NGRAPH:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_HORIZWS:
-                   /* For these, we use the nonbitmap, as /d doesn't make a
+                   /* For these, we use the cp_list, as /d doesn't make a
                      * difference in what these match.  There would be problems
                      * if these characters had folds other than themselves, as
-                    * nonbitmap is subject to folding.  It turns out that \h
+                    * cp_list is subject to folding.  It turns out that \h
                      * is just a synonym for XPosixBlank */
-                   _invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap);
+                   _invlist_union(cp_list, PL_XPosixBlank, &cp_list);
                     break;
                 case ANYOF_NHORIZWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                 PL_XPosixBlank, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                 PL_XPosixBlank, &cp_list);
                     break;
                 case ANYOF_LOWER:
                 case ANYOF_NLOWER:
@@ -11696,45 +11897,48 @@ parseit:
                         Xname = "XPosixLower";
                     }
                     if (namedclass == ANYOF_LOWER) {
-                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                      ascii_source, l1_source, Xname, listsv);
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                            properties, ascii_source, l1_source, Xname, listsv);
+                            posixes, ascii_source, l1_source, Xname, listsv,
+                            runtime_posix_matches_above_Unicode);
                     }
                     break;
                 }
                 case ANYOF_PRINT:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
                     break;
                 case ANYOF_NPRINT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_PUNCT:
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                          PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
                     break;
                 case ANYOF_NPUNCT:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_PSXSPC:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixSpace, PL_XPosixSpace);
                     break;
                 case ANYOF_NPSXSPC:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixSpace, PL_XPosixSpace);
                     break;
                 case ANYOF_SPACE:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PerlSpace, PL_XPerlSpace);
                     break;
                 case ANYOF_NSPACE:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PerlSpace, PL_XPerlSpace);
                     break;
                 case ANYOF_UPPER:   /* Same as LOWER, above */
@@ -11755,40 +11959,42 @@ parseit:
                         Xname = "XPosixUpper";
                     }
                     if (namedclass == ANYOF_UPPER) {
-                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                       DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                                      ascii_source, l1_source, Xname, listsv);
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                        properties, ascii_source, l1_source, Xname, listsv);
+                        posixes, ascii_source, l1_source, Xname, listsv,
+                        runtime_posix_matches_above_Unicode);
                     }
                     break;
                 }
                 case ANYOF_ALNUM:   /* Really is 'Word' */
-                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+                   DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                              PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
                     break;
                 case ANYOF_NALNUM:
-                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
-                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
+                   DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv,
+                            runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_VERTWS:
-                   /* For these, we use the nonbitmap, as /d doesn't make a
+                   /* For these, we use the cp_list, as /d doesn't make a
                      * difference in what these match.  There would be problems
                      * if these characters had folds other than themselves, as
-                    * nonbitmap is subject to folding */
-                   _invlist_union(nonbitmap, PL_VertSpace, &nonbitmap);
+                    * cp_list is subject to folding */
+                   _invlist_union(cp_list, PL_VertSpace, &cp_list);
                     break;
                 case ANYOF_NVERTWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                    PL_VertSpace, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                    PL_VertSpace, &cp_list);
                     break;
                 case ANYOF_XDIGIT:
-                    DO_POSIX(ret, namedclass, properties,
+                    DO_POSIX(ret, namedclass, posixes,
                                              PL_PosixXDigit, PL_XPosixXDigit);
                     break;
                 case ANYOF_NXDIGIT:
-                    DO_N_POSIX(ret, namedclass, properties,
+                    DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PosixXDigit, PL_XPosixXDigit);
                     break;
                 case ANYOF_MAX:
@@ -11799,19 +12005,19 @@ parseit:
                     break;
                 }
  
-               continue;
+               continue;   /* Go get next character */
             }
         } /* end of namedclass \blah */
  
         if (range) {
-           if (prevvalue > (IV)value) /* b-a */ {
+           if (prevvalue > value) /* b-a */ {
                 const int w = RExC_parse - rangebegin;
                 Simple_vFAIL4("Invalid [] range \"%*.*s\"", w, w, rangebegin);
                 range = 0; /* not a valid range */
             }
         }
         else {
-           prevvalue = value; /* save the beginning of the range */
+            prevvalue = value; /* save the beginning of the potential range */
             if (RExC_parse+1 < RExC_end
                 && *RExC_parse == '-'
                 && RExC_parse[1] != ']')
@@ -11828,135 +12034,459 @@ parseit:
                                "False [] range \"%*.*s\"",
                                w, w, rangebegin);
                     }
-                   if (!SIZE_ONLY)
-                       stored +=
-                            set_regclass_bit(pRExC_state, ret, '-', &l1_fold_invlist, &unicode_alternate);
+                    if (!SIZE_ONLY) {
+                        cp_list = add_cp_to_invlist(cp_list, '-');
+                    }
+                    element_count++;
                 } else
                     range = 1;  /* yeah, it's a range! */
                 continue;       /* but do it the next time */
             }
         }
  
+        /* Here, <prevvalue> is the beginning of the range, if any; or <value>
+         * if not */
+
         /* non-Latin1 code point implies unicode semantics.  Must be set in
          * pass1 so is there for the whole of pass 2 */
         if (value > 255) {
             RExC_uni_semantics = 1;
         }
  
-       /* now is the next time */
-       if (!SIZE_ONLY) {
-           if (prevvalue < 256) {
-               const IV ceilvalue = value < 256 ? value : 255;
-               IV i;
-#ifdef EBCDIC
-               /* In EBCDIC [\x89-\x91] should include
-                * the \x8e but [i-j] should not. */
-               if (literal_endpoint == 2 &&
-                   ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-                    (isUPPER(prevvalue) && isUPPER(ceilvalue))))
-               {
-                   if (isLOWER(prevvalue)) {
-                       for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isLOWER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
-                               stored +=
-                                  set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-                           }
-                   } else {
-                       for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isUPPER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
-                               stored +=
-                                  set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-                           }
-                   }
-               }
-               else
-#endif
-                     for (i = prevvalue; i <= ceilvalue; i++) {
-                       stored += set_regclass_bit(pRExC_state, ret, (U8) i, &l1_fold_invlist, &unicode_alternate);
-                     }
-         }
-         if (value > 255) {
-           const UV prevnatvalue  = NATIVE_TO_UNI(prevvalue);
-           const UV natvalue      = NATIVE_TO_UNI(value);
-           nonbitmap = _add_range_to_invlist(nonbitmap, prevnatvalue, natvalue);
-       }
-#ifdef EBCDIC
-           literal_endpoint = 0;
-#endif
+        /* Ready to process either the single value, or the completed range */
+       if (!SIZE_ONLY) {
+#ifndef EBCDIC
+            cp_list = _add_range_to_invlist(cp_list, prevvalue, value);
+#else
+            UV* this_range = _new_invlist(1);
+            _append_range_to_invlist(this_range, prevvalue, value);
+
+            /* In EBCDIC, the ranges 'A-Z' and 'a-z' are each not contiguous.
+             * If this range was specified using something like 'i-j', we want
+             * to include only the 'i' and the 'j', and not anything in
+             * between, so exclude non-ASCII, non-alphabetics from it.
+             * However, if the range was specified with something like
+             * [\x89-\x91] or [\x89-j], all code points within it should be
+             * included.  literal_endpoint==2 means both ends of the range used
+             * a literal character, not \x{foo} */
+           if (literal_endpoint == 2
+                && (prevvalue >= 'a' && value <= 'z')
+                    || (prevvalue >= 'A' && value <= 'Z'))
+            {
+                _invlist_intersection(this_range, PL_ASCII, &this_range, );
+                _invlist_intersection(this_range, PL_Alpha, &this_range, );
+            }
+            _invlist_union(cp_list, this_range, &cp_list);
+            literal_endpoint = 0;
+#endif
+        }
+
+       range = 0; /* this range (if it was one) is done now */
+    } /* End of loop through all the text within the brackets */
+
+    /* If the character class contains only a single element, it may be
+     * optimizable into another node type which is smaller and runs faster.
+     * Check if this is the case for this class */
+    if (element_count == 1) {
+        U8 op = END;
+        U8 arg = 0;
+
+        if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like \w or
+                                              [:digit:] or \p{foo} */
+
+            /* Certain named classes have equivalents that can appear outside a
+             * character class, e.g. \w, \H.  We use these instead of a
+             * character class. */
+            switch ((I32)namedclass) {
+                U8 offset;
+
+                /* The first group is for node types that depend on the charset
+                 * modifier to the regex.  We first calculate the base node
+                 * type, and if it should be inverted */
+
+                case ANYOF_NALNUM:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_ALNUM:
+                    op = ALNUM;
+                    goto join_charset_classes;
+
+                case ANYOF_NSPACE:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_SPACE:
+                    op = SPACE;
+                    goto join_charset_classes;
+
+                case ANYOF_NDIGIT:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_DIGIT:
+                    op = DIGIT;
+
+                  join_charset_classes:
+
+                    /* Now that we have the base node type, we take advantage
+                     * of the enum ordering of the charset modifiers to get the
+                     * exact node type,  For example the base SPACE also has
+                     * SPACEL, SPACEU, and SPACEA */
+
+                    offset = get_regex_charset(RExC_flags);
+
+                    /* /aa is the same as /a for these */
+                    if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                        offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                    }
+                    else if (op == DIGIT && offset == REGEX_UNICODE_CHARSET) {
+                        offset = REGEX_DEPENDS_CHARSET; /* There is no DIGITU */
+                    }
+
+                    op += offset;
+
+                    /* The number of varieties of each of these is the same,
+                     * hence, so is the delta between the normal and
+                     * complemented nodes */
+                    if (invert) {
+                        op += NALNUM - ALNUM;
+                    }
+                    *flagp |= HASWIDTH|SIMPLE;
+                    break;
+
+                /* The second group doesn't depend of the charset modifiers.
+                 * We just have normal and complemented */
+                case ANYOF_NHORIZWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_HORIZWS:
+                  is_horizws:
+                    op = (invert) ? NHORIZWS : HORIZWS;
+                    *flagp |= HASWIDTH|SIMPLE;
+                    break;
+
+                case ANYOF_NVERTWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_VERTWS:
+                    op = (invert) ? NVERTWS : VERTWS;
+                    *flagp |= HASWIDTH|SIMPLE;
+                    break;
+
+                case ANYOF_MAX:
+                    break;
+
+                case ANYOF_NBLANK:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_BLANK:
+                    if (AT_LEAST_UNI_SEMANTICS && ! AT_LEAST_ASCII_RESTRICTED) {
+                        goto is_horizws;
+                    }
+                    /* FALLTHROUGH */
+                default:
+                    /* A generic posix class.  All the /a ones can be handled
+                     * by the POSIXA opcode.  And all are closed under folding
+                     * in the ASCII range, so FOLD doesn't matter */
+                    if (AT_LEAST_ASCII_RESTRICTED
+                        || (! LOC && namedclass == ANYOF_ASCII))
+                    {
+                        /* The odd numbered ones are the complements of the
+                         * next-lower even number one */
+                        if (namedclass % 2 == 1) {
+                            invert = ! invert;
+                            namedclass--;
+                        }
+                        arg = namedclass_to_classnum(namedclass);
+                        op = (invert) ? NPOSIXA : POSIXA;
+                    }
+                    break;
+            }
+        }
+        else if (value == prevvalue) {
+
+            /* Here, the class consists of just a single code point */
+
+            if (invert) {
+                if (! LOC && value == '\n') {
+                    op = REG_ANY; /* Optimize [^\n] */
+                    *flagp |= HASWIDTH|SIMPLE;
+                    RExC_naughty++;
+                }
+            }
+            else if (value < 256 || UTF) {
+
+                /* Optimize a single value into an EXACTish node, but not if it
+                 * would require converting the pattern to UTF-8. */
+                op = compute_EXACTish(pRExC_state);
+            }
+        } /* Otherwise is a range */
+        else if (! LOC) {   /* locale could vary these */
+            if (prevvalue == '0') {
+                if (value == '9') {
+                    op = (invert) ? NDIGITA : DIGITA;
+                    *flagp |= HASWIDTH|SIMPLE;
+                }
+            }
          }
  
-       range = 0; /* this range (if it was one) is done now */
-    }
+        /* Here, we have changed <op> away from its initial value iff we found
+         * an optimization */
+        if (op != END) {
+
+            /* Throw away this ANYOF regnode, and emit the calculated one,
+             * which should correspond to the beginning, not current, state of
+             * the parse */
+            const char * cur_parse = RExC_parse;
+            RExC_parse = (char *)orig_parse;
+            if ( SIZE_ONLY) {
+                if (! LOC) {
+
+                    /* To get locale nodes to not use the full ANYOF size would
+                     * require moving the code above that writes the portions
+                     * of it that aren't in other nodes to after this point.
+                     * e.g.  ANYOF_CLASS_SET */
+                    RExC_size = orig_size;
+                }
+            }
+            else {
+                RExC_emit = (regnode *)orig_emit;
+            }
+
+            ret = reg_node(pRExC_state, op);
+
+            if (PL_regkind[op] == POSIXD) {
+                if (! SIZE_ONLY) {
+                    FLAGS(ret) = arg;
+                }
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (PL_regkind[op] == EXACT) {
+                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value);
+            }
  
+            RExC_parse = (char *) cur_parse;
  
+            SvREFCNT_dec(listsv);
+            return ret;
+        }
+    }
  
      if (SIZE_ONLY)
          return ret;
-    /****** !SIZE_ONLY AFTER HERE *********/
+    /****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
  
-    /* If folding and there are code points above 255, we calculate all
-     * characters that could fold to or from the ones already on the list */
-    if (FOLD && nonbitmap) {
+    /* If folding, we calculate all characters that could fold to or from the
+     * ones already on the list */
+    if (FOLD && cp_list) {
         UV start, end;  /* End points of code point ranges */
  
         SV* fold_intersection = NULL;
  
-       /* This is a list of all the characters that participate in folds
-           * (except marks, etc in multi-char folds */
-       if (! PL_utf8_foldable) {
-           SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
-           PL_utf8_foldable = _swash_to_invlist(swash);
-            SvREFCNT_dec(swash);
-       }
+        /* In the Latin1 range, the characters that can be folded-to or -from
+         * are precisely the alphabetic characters.  If the highest code point
+         * is within Latin1, we can use the compiled-in list, and not have to
+         * go out to disk. */
+        if (invlist_highest(cp_list) < 256) {
+            _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
+        }
+        else {
  
-       /* This is a hash that for a particular fold gives all characters
-           * that are involved in it */
-       if (! PL_utf8_foldclosures) {
+            /* Here, there are non-Latin1 code points, so we will have to go
+             * fetch the list of all the characters that participate in folds
+             */
+            if (! PL_utf8_foldable) {
+                SV* swash = swash_init("utf8", "_Perl_Any_Folds",
+                                       &PL_sv_undef, 1, 0);
+                PL_utf8_foldable = _get_swash_invlist(swash);
+                SvREFCNT_dec(swash);
+            }
  
-           /* If we were unable to find any folds, then we likely won't be
-            * able to find the closures.  So just create an empty list.
-            * Folding will effectively be restricted to the non-Unicode rules
-            * hard-coded into Perl.  (This case happens legitimately during
-            * compilation of Perl itself before the Unicode tables are
-            * generated) */
-           if (invlist_len(PL_utf8_foldable) == 0) {
-               PL_utf8_foldclosures = newHV();
-           } else {
-               /* If the folds haven't been read in, call a fold function
-                   * to force that */
-               if (! PL_utf8_tofold) {
-                   U8 dummy[UTF8_MAXBYTES+1];
-                   STRLEN dummy_len;
-
-                   /* This particular string is above \xff in both UTF-8 and
-                    * UTFEBCDIC */
-                   to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
-                   assert(PL_utf8_tofold); /* Verify that worked */
-               }
-               PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
-           }
-       }
+            /* This is a hash that for a particular fold gives all characters
+             * that are involved in it */
+            if (! PL_utf8_foldclosures) {
+
+                /* If we were unable to find any folds, then we likely won't be
+                 * able to find the closures.  So just create an empty list.
+                 * Folding will effectively be restricted to the non-Unicode
+                 * rules hard-coded into Perl.  (This case happens legitimately
+                 * during compilation of Perl itself before the Unicode tables
+                 * are generated) */
+                if (_invlist_len(PL_utf8_foldable) == 0) {
+                    PL_utf8_foldclosures = newHV();
+                }
+                else {
+                    /* If the folds haven't been read in, call a fold function
+                     * to force that */
+                    if (! PL_utf8_tofold) {
+                        U8 dummy[UTF8_MAXBYTES+1];
+                        STRLEN dummy_len;
+
+                        /* This particular string is above \xff in both UTF-8
+                         * and UTFEBCDIC */
+                        to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+                        assert(PL_utf8_tofold); /* Verify that worked */
+                    }
+                    PL_utf8_foldclosures =
+                                        _swash_inversion_hash(PL_utf8_tofold);
+                }
+            }
  
-       /* Only the characters in this class that participate in folds need be
-        * checked.  Get the intersection of this class and all the possible
-        * characters that are foldable.  This can quickly narrow down a large
-        * class */
-       _invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
+            /* Only the characters in this class that participate in folds need
+             * be checked.  Get the intersection of this class and all the
+             * possible characters that are foldable.  This can quickly narrow
+             * down a large class */
+            _invlist_intersection(PL_utf8_foldable, cp_list,
+                                  &fold_intersection);
+        }
  
         /* Now look at the foldable characters in this class individually */
         invlist_iterinit(fold_intersection);
         while (invlist_iternext(fold_intersection, &start, &end)) {
             UV j;
  
+            /* Locale folding for Latin1 characters is deferred until runtime */
+            if (LOC && start < 256) {
+                start = 256;
+            }
+
             /* Look at every character in the range */
             for (j = start; j <= end; j++) {
  
-               /* Get its fold */
                 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                 STRLEN foldlen;
-               const UV f =
-                    _to_uni_fold_flags(j, foldbuf, &foldlen,
-                                       (allow_full_fold) ? FOLD_FLAGS_FULL : 0);
+                UV f;
+
+                if (j < 256) {
+
+                    /* We have the latin1 folding rules hard-coded here so that
+                     * an innocent-looking character class, like /[ks]/i won't
+                     * have to go out to disk to find the possible matches.
+                     * XXX It would be better to generate these via regen, in
+                     * case a new version of the Unicode standard adds new
+                     * mappings, though that is not really likely, and may be
+                     * caught by the default: case of the switch below. */
+
+                    if (PL_fold_latin1[j] != j) {
+
+                        /* ASCII is always matched; non-ASCII is matched only
+                         * under Unicode rules */
+                        if (isASCII(j) || AT_LEAST_UNI_SEMANTICS) {
+                            cp_list =
+                                add_cp_to_invlist(cp_list, PL_fold_latin1[j]);
+                        }
+                        else {
+                            depends_list =
+                             add_cp_to_invlist(depends_list, PL_fold_latin1[j]);
+                        }
+                    }
+
+                    if (HAS_NONLATIN1_FOLD_CLOSURE(j)
+                        && (! isASCII(j) || ! ASCII_FOLD_RESTRICTED))
+                    {
+                        /* Certain Latin1 characters have matches outside
+                         * Latin1, or are multi-character.  To get here, 'j' is
+                         * one of those characters.   None of these matches is
+                         * valid for ASCII characters under /aa, which is why
+                         * the 'if' just above excludes those.  The matches
+                         * fall into three categories:
+                         * 1) They are singly folded-to or -from an above 255
+                         *    character, e.g., LATIN SMALL LETTER Y WITH
+                         *    DIAERESIS and LATIN CAPITAL LETTER Y WITH
+                         *    DIAERESIS;
+                         * 2) They are part of a multi-char fold with another
+                         *    latin1 character; only LATIN SMALL LETTER
+                         *    SHARP S => "ss" fits this;
+                         * 3) They are part of a multi-char fold with a
+                         *    character outside of Latin1, such as various
+                         *    ligatures.
+                        * We aren't dealing fully with multi-char folds, except
+                        * we do deal with the pattern containing a character
+                        * that has a multi-char fold (not so much the inverse).
+                        * For types 1) and 3), the matches only happen when the
+                        * target string is utf8; that's not true for 2), and we
+                        * set a flag for it.
+                        *
+                        * The code below adds the single fold closures for 'j'
+                        * to the inversion list. */
+                        switch (j) {
+                            case 'k':
+                            case 'K':
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, KELVIN_SIGN);
+                                break;
+                            case 's':
+                            case 'S':
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    LATIN_SMALL_LETTER_LONG_S);
+                                break;
+                            case MICRO_SIGN:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    GREEK_CAPITAL_LETTER_MU);
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    GREEK_SMALL_LETTER_MU);
+                                break;
+                            case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
+                            case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, ANGSTROM_SIGN);
+                                break;
+                            case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                        LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
+                                break;
+                            case LATIN_SMALL_LETTER_SHARP_S:
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                LATIN_CAPITAL_LETTER_SHARP_S);
+
+                                /* Under /a, /d, and /u, this can match the two
+                                 * chars "ss" */
+                                if (! ASCII_FOLD_RESTRICTED) {
+                                    add_alternate(&unicode_alternate,
+                                                  (U8 *) "ss", 2);
+
+                                    /* And under /u or /a, it can match even if
+                                     * the target is not utf8 */
+                                    if (AT_LEAST_UNI_SEMANTICS) {
+                                        ANYOF_FLAGS(ret) |=
+                                                    ANYOF_NONBITMAP_NON_UTF8;
+                                    }
+                                }
+                                break;
+                            case 'F': case 'f':
+                            case 'I': case 'i':
+                            case 'L': case 'l':
+                            case 'T': case 't':
+                            case 'A': case 'a':
+                            case 'H': case 'h':
+                            case 'J': case 'j':
+                            case 'N': case 'n':
+                            case 'W': case 'w':
+                            case 'Y': case 'y':
+                                /* These all are targets of multi-character
+                                 * folds from code points that require UTF8 to
+                                 * express, so they can't match unless the
+                                 * target string is in UTF-8, so no action here
+                                 * is necessary, as regexec.c properly handles
+                                 * the general case for UTF-8 matching */
+                                break;
+                            default:
+                                /* Use deprecated warning to increase the
+                                 * chances of this being output */
+                                ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%"UVXf"; please use the perlbug utility to report;", j);
+                                break;
+                        }
+                    }
+                    continue;
+                }
+
+                /* Here is an above Latin1 character.  We don't have the rules
+                 * hard-coded for it.  First, get its fold */
+               f = _to_uni_fold_flags(j, foldbuf, &foldlen,
+                                    ((allow_full_fold) ? FOLD_FLAGS_FULL : 0)
+                                    | ((LOC)
+                                        ? FOLD_FLAGS_LOCALE
+                                        : (ASCII_FOLD_RESTRICTED)
+                                            ? FOLD_FLAGS_NOMIX_ASCII
+                                            : 0));
  
                 if (foldlen > (STRLEN)UNISKIP(f)) {
  
@@ -11972,54 +12502,25 @@ parseit:
  
                         /* If any of the folded characters of this are in the
                          * Latin1 range, tell the regex engine that this can
-                        * match a non-utf8 target string.  The only multi-byte
-                        * fold whose source is in the Latin1 range (U+00DF)
-                        * applies only when the target string is utf8, or
-                        * under unicode rules */
-                       if (j > 255 || AT_LEAST_UNI_SEMANTICS) {
-                           while (loc < e) {
-
-                               /* Can't mix ascii with non- under /aa */
-                               if (MORE_ASCII_RESTRICTED
-                                   && (isASCII(*loc) != isASCII(j)))
-                               {
-                                   goto end_multi_fold;
-                               }
-                               if (UTF8_IS_INVARIANT(*loc)
-                                   || UTF8_IS_DOWNGRADEABLE_START(*loc))
-                               {
-                                    /* Can't mix above and below 256 under LOC
-                                     */
-                                   if (LOC) {
-                                       goto end_multi_fold;
-                                   }
-                                   ANYOF_FLAGS(ret)
-                                           |= ANYOF_NONBITMAP_NON_UTF8;
-                                   break;
-                               }
-                               loc += UTF8SKIP(loc);
-                           }
-                       }
+                        * match a non-utf8 target string.  */
+                        while (loc < e) {
+                            if (UTF8_IS_INVARIANT(*loc)
+                                || UTF8_IS_DOWNGRADEABLE_START(*loc))
+                            {
+                                ANYOF_FLAGS(ret)
+                                        |= ANYOF_NONBITMAP_NON_UTF8;
+                                break;
+                            }
+                            loc += UTF8SKIP(loc);
+                        }
  
                         add_alternate(&unicode_alternate, foldbuf, foldlen);
-                   end_multi_fold: ;
-                   }
-
-                   /* This is special-cased, as it is the only letter which
-                    * has both a multi-fold and single-fold in Latin1.  All
-                    * the other chars that have single and multi-folds are
-                    * always in utf8, and the utf8 folding algorithm catches
-                    * them */
-                   if (! LOC && j == LATIN_CAPITAL_LETTER_SHARP_S) {
-                       stored += set_regclass_bit(pRExC_state,
-                                       ret,
-                                       LATIN_SMALL_LETTER_SHARP_S,
-                                       &l1_fold_invlist, &unicode_alternate);
                     }
                 }
-               else {
-                   /* Single character fold.  Add everything in its fold
-                    * closure to the list that this node should match */
+                else {
+                    /* Single character fold of above Latin1.  Add everything
+                     * in its fold closure to the list that this node should
+                     * match */
                     SV** listp;
  
                     /* The fold closures data structure is a hash with the keys
@@ -12042,94 +12543,322 @@ parseit:
                             /* /aa doesn't allow folds between ASCII and non-;
                              * /l doesn't allow them between above and below
                              * 256 */
-                           if ((MORE_ASCII_RESTRICTED
-                                && (isASCII(c) != isASCII(j)))
-                                   || (LOC && ((c < 256) != (j < 256))))
+                           if ((ASCII_FOLD_RESTRICTED
+                                      && (isASCII(c) != isASCII(j)))
+                               || (LOC && ((c < 256) != (j < 256))))
                             {
                                 continue;
                             }
  
-                           if (c < 256 && AT_LEAST_UNI_SEMANTICS) {
-                               stored += set_regclass_bit(pRExC_state,
-                                       ret,
-                                       (U8) c,
-                                       &l1_fold_invlist, &unicode_alternate);
-                           }
-                               /* It may be that the code point is already in
-                                * this range or already in the bitmap, in
-                                * which case we need do nothing */
-                           else if ((c < start || c > end)
-                                       && (c > 255
-                                           || ! ANYOF_BITMAP_TEST(ret, c)))
-                           {
-                               nonbitmap = add_cp_to_invlist(nonbitmap, c);
+                            /* Folds involving non-ascii Latin1 characters
+                             * under /d are added to a separate list */
+                           if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+                            {
+                               cp_list = add_cp_to_invlist(cp_list, c);
+                            }
+                            else {
+                              depends_list = add_cp_to_invlist(depends_list, c);
                             }
                         }
                     }
                 }
-           }
+            }
         }
         SvREFCNT_dec(fold_intersection);
      }
  
-    /* Combine the two lists into one. */
-    if (l1_fold_invlist) {
-       if (nonbitmap) {
-           _invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
-           SvREFCNT_dec(l1_fold_invlist);
-       }
-       else {
-           nonbitmap = l1_fold_invlist;
-       }
+    /* And combine the result (if any) with any inversion list from posix
+     * classes.  The lists are kept separate up to now because we don't want to
+     * fold the classes (folding of those is automatically handled by the swash
+     * fetching code) */
+    if (posixes) {
+        if (! DEPENDS_SEMANTICS) {
+            if (cp_list) {
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
+            }
+            else {
+                cp_list = posixes;
+            }
+        }
+        else {
+            /* Under /d, we put into a separate list the Latin1 things that
+             * match only when the target string is utf8 */
+            SV* nonascii_but_latin1_properties = NULL;
+            _invlist_intersection(posixes, PL_Latin1,
+                                  &nonascii_but_latin1_properties);
+            _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
+                              &nonascii_but_latin1_properties);
+            _invlist_subtract(posixes, nonascii_but_latin1_properties,
+                              &posixes);
+            if (cp_list) {
+                _invlist_union(cp_list, posixes, &cp_list);
+                SvREFCNT_dec(posixes);
+            }
+            else {
+                cp_list = posixes;
+            }
+
+            if (depends_list) {
+                _invlist_union(depends_list, nonascii_but_latin1_properties,
+                               &depends_list);
+                SvREFCNT_dec(nonascii_but_latin1_properties);
+            }
+            else {
+                depends_list = nonascii_but_latin1_properties;
+            }
+        }
      }
  
      /* And combine the result (if any) with any inversion list from properties.
-     * The lists are kept separate up to now because we don't want to fold the
-     * properties */
+     * The lists are kept separate up to now so that we can distinguish the two
+     * in regards to matching above-Unicode.  A run-time warning is generated
+     * if a Unicode property is matched against a non-Unicode code point. But,
+     * we allow user-defined properties to match anything, without any warning,
+     * and we also suppress the warning if there is a portion of the character
+     * class that isn't a Unicode property, and which matches above Unicode, \W
+     * or [\x{110000}] for example.
+     * (Note that in this case, unlike the Posix one above, there is no
+     * <depends_list>, because having a Unicode property forces Unicode
+     * semantics */
      if (properties) {
-       if (nonbitmap) {
-           _invlist_union(nonbitmap, properties, &nonbitmap);
-           SvREFCNT_dec(properties);
-       }
-       else {
-           nonbitmap = properties;
-       }
+        bool warn_super = ! has_user_defined_property;
+        if (cp_list) {
+
+            /* If it matters to the final outcome, see if a non-property
+             * component of the class matches above Unicode.  If so, the
+             * warning gets suppressed.  This is true even if just a single
+             * such code point is specified, as though not strictly correct if
+             * another such code point is matched against, the fact that they
+             * are using above-Unicode code points indicates they should know
+             * the issues involved */
+            if (warn_super) {
+                bool non_prop_matches_above_Unicode =
+                            runtime_posix_matches_above_Unicode
+                            | (invlist_highest(cp_list) > PERL_UNICODE_MAX);
+                if (invert) {
+                    non_prop_matches_above_Unicode =
+                                            !  non_prop_matches_above_Unicode;
+                }
+                warn_super = ! non_prop_matches_above_Unicode;
+            }
+
+            _invlist_union(properties, cp_list, &cp_list);
+            SvREFCNT_dec(properties);
+        }
+        else {
+            cp_list = properties;
+        }
+
+        if (warn_super) {
+            ANYOF_FLAGS(ret) |= ANYOF_WARN_SUPER;
+        }
      }
  
-    /* Here, <nonbitmap> contains all the code points we can determine at
-     * compile time that we haven't put into the bitmap.  Go through it, and
-     * for things that belong in the bitmap, put them there, and delete from
-     * <nonbitmap> */
-    if (nonbitmap) {
+    /* Here, we have calculated what code points should be in the character
+     * class.
+     *
+     * Now we can see about various optimizations.  Fold calculation (which we
+     * did above) needs to take place before inversion.  Otherwise /[^k]/i
+     * would invert to include K, which under /i would match k, which it
+     * shouldn't.  Therefore we can't invert folded locale now, as it won't be
+     * folded until runtime */
+
+    /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
+     * at compile time.  Besides not inverting folded locale now, we can't invert
+     * if there are things such as \w, which aren't known until runtime */
+    if (invert
+        && ! (LOC && (FOLD || (ANYOF_FLAGS(ret) & ANYOF_CLASS)))
+       && ! depends_list
+       && ! unicode_alternate
+       && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+    {
+        _invlist_invert(cp_list);
  
-       /* Above-ASCII code points in /d have to stay in <nonbitmap>, as they
-        * possibly only should match when the target string is UTF-8 */
-       UV max_cp_to_set = (DEPENDS_SEMANTICS) ? 127 : 255;
+        /* Any swash can't be used as-is, because we've inverted things */
+        if (swash) {
+            SvREFCNT_dec(swash);
+            swash = NULL;
+        }
+
+       /* Clear the invert flag since have just done it here */
+       invert = FALSE;
+    }
+
+    /* If we didn't do folding, it's because some information isn't available
+     * until runtime; set the run-time fold flag for these.  (We don't have to
+     * worry about properties folding, as that is taken care of by the swash
+     * fetching) */
+    if (FOLD && (LOC || unicode_alternate))
+    {
+       ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
+    }
+
+    /* Some character classes are equivalent to other nodes.  Such nodes take
+     * up less room and generally fewer operations to execute than ANYOF nodes.
+     * Above, we checked for and optimized into some such equivalents for
+     * certain common classes that are easy to test.  Getting to this point in
+     * the code means that the class didn't get optimized there.  Since this
+     * code is only executed in Pass 2, it is too late to save space--it has
+     * been allocated in Pass 1, and currently isn't given back.  But turning
+     * things into an EXACTish node can allow the optimizer to join it to any
+     * adjacent such nodes.  And if the class is equivalent to things like /./,
+     * expensive run-time swashes can be avoided.  Now that we have more
+     * complete information, we can find things necessarily missed by the
+     * earlier code.  I (khw) am not sure how much to look for here.  It would
+     * be easy, but perhaps too slow, to check any candidates against all the
+     * node types they could possibly match using _invlistEQ(). */
+
+    if (cp_list
+        && ! unicode_alternate
+        && ! invert
+        && ! depends_list
+        && ! (ANYOF_FLAGS(ret) & ANYOF_CLASS)
+        && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+    {
+       UV start, end;
+       U8 op = END;  /* The optimzation node-type */
+        const char * cur_parse= RExC_parse;
+
+       invlist_iterinit(cp_list);
+       if (! invlist_iternext(cp_list, &start, &end)) {
+
+            /* Here, the list is empty.  This happens, for example, when a
+             * Unicode property is the only thing in the character class, and
+             * it doesn't match anything.  (perluniprops.pod notes such
+             * properties) */
+            op = OPFAIL;
+            *flagp |= HASWIDTH|SIMPLE;
+        }
+        else if (start == end) {    /* The range is a single code point */
+            if (! invlist_iternext(cp_list, &start, &end)
+
+                    /* Don't do this optimization if it would require changing
+                     * the pattern to UTF-8 */
+                && (start < 256 || UTF))
+            {
+                /* Here, the list contains a single code point.  Can optimize
+                 * into an EXACT node */
+
+                value = start;
+
+                if (! FOLD) {
+                    op = EXACT;
+                }
+                else if (LOC) {
+
+                    /* A locale node under folding with one code point can be
+                     * an EXACTFL, as its fold won't be calculated until
+                     * runtime */
+                    op = EXACTFL;
+                }
+                else {
+
+                    /* Here, we are generally folding, but there is only one
+                     * code point to match.  If we have to, we use an EXACT
+                     * node, but it would be better for joining with adjacent
+                     * nodes in the optimization pass if we used the same
+                     * EXACTFish node that any such are likely to be.  We can
+                     * do this iff the code point doesn't participate in any
+                     * folds.  For example, an EXACTF of a colon is the same as
+                     * an EXACT one, since nothing folds to or from a colon.
+                     * In the Latin1 range, being an alpha means that the
+                     * character participates in a fold (except for the
+                     * feminine and masculine ordinals, which I (khw) don't
+                     * think are worrying about optimizing for). */
+                    if (value < 256) {
+                        if (isALPHA_L1(value)) {
+                            op = EXACT;
+                        }
+                    }
+                    else {
+                        if (! PL_utf8_foldable) {
+                            SV* swash = swash_init("utf8", "_Perl_Any_Folds",
+                                                &PL_sv_undef, 1, 0);
+                            PL_utf8_foldable = _get_swash_invlist(swash);
+                            SvREFCNT_dec(swash);
+                        }
+                        if (_invlist_contains_cp(PL_utf8_foldable, value)) {
+                            op = EXACT;
+                        }
+                    }
+
+                    /* If we haven't found the node type, above, it means we
+                     * can use the prevailing one */
+                    if (op == END) {
+                        op = compute_EXACTish(pRExC_state);
+                    }
+                }
+            }
+        }
+        else if (start == 0) {
+            if (end == UV_MAX) {
+                op = SANY;
+                *flagp |= HASWIDTH|SIMPLE;
+                RExC_naughty++;
+            }
+            else if (end == '\n' - 1
+                    && invlist_iternext(cp_list, &start, &end)
+                    && start == '\n' + 1 && end == UV_MAX)
+            {
+                op = REG_ANY;
+                *flagp |= HASWIDTH|SIMPLE;
+                RExC_naughty++;
+            }
+        }
+
+        if (op != END) {
+            RExC_parse = (char *)orig_parse;
+            RExC_emit = (regnode *)orig_emit;
+
+            ret = reg_node(pRExC_state, op);
+
+            RExC_parse = (char *)cur_parse;
+
+            if (PL_regkind[op] == EXACT) {
+                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value);
+            }
+
+            SvREFCNT_dec(listsv);
+            return ret;
+        }
+    }
+
+    /* Here, <cp_list> contains all the code points we can determine at
+     * compile time that match under all conditions.  Go through it, and
+     * for things that belong in the bitmap, put them there, and delete from
+     * <cp_list>.  While we are at it, see if everything above 255 is in the
+     * list, and if so, set a flag to speed up execution */
+    ANYOF_BITMAP_ZERO(ret);
+    if (cp_list) {
  
         /* This gets set if we actually need to modify things */
         bool change_invlist = FALSE;
  
         UV start, end;
  
-       /* Start looking through <nonbitmap> */
-       invlist_iterinit(nonbitmap);
-       while (invlist_iternext(nonbitmap, &start, &end)) {
+       /* Start looking through <cp_list> */
+       invlist_iterinit(cp_list);
+       while (invlist_iternext(cp_list, &start, &end)) {
             UV high;
             int i;
  
+            if (end == UV_MAX && start <= 256) {
+                ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
+            }
+
             /* Quit if are above what we should change */
-           if (start > max_cp_to_set) {
+           if (start > 255) {
                 break;
             }
  
             change_invlist = TRUE;
  
             /* Set all the bits in the range, up to the max that we are doing */
-           high = (end < max_cp_to_set) ? end : max_cp_to_set;
+           high = (end < 255) ? end : 255;
             for (i = start; i <= (int) high; i++) {
                 if (! ANYOF_BITMAP_TEST(ret, i)) {
                     ANYOF_BITMAP_SET(ret, i);
-                   stored++;
                     prevvalue = value;
                     value = i;
                 }
@@ -12137,249 +12866,33 @@ parseit:
         }
  
          /* Done with loop; remove any code points that are in the bitmap from
-         * <nonbitmap> */
+         * <cp_list> */
         if (change_invlist) {
-           _invlist_subtract(nonbitmap,
-                             (DEPENDS_SEMANTICS)
-                               ? PL_ASCII
-                               : PL_Latin1,
-                              &nonbitmap);
+           _invlist_subtract(cp_list, PL_Latin1, &cp_list);
         }
  
         /* If have completely emptied it, remove it completely */
-       if (invlist_len(nonbitmap) == 0) {
-           SvREFCNT_dec(nonbitmap);
-           nonbitmap = NULL;
+       if (_invlist_len(cp_list) == 0) {
+           SvREFCNT_dec(cp_list);
+           cp_list = NULL;
         }
      }
  
-    /* Here, we have calculated what code points should be in the character
-     * class.  <nonbitmap> does not overlap the bitmap except possibly in the
-     * case of DEPENDS rules.
-     *
-     * Now we can see about various optimizations.  Fold calculation (which we
-     * did above) needs to take place before inversion.  Otherwise /[^k]/i
-     * would invert to include K, which under /i would match k, which it
-     * shouldn't. */
-
-    /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
-     * set the FOLD flag yet, so this does optimize those.  It doesn't
-     * optimize locale.  Doing so perhaps could be done as long as there is
-     * nothing like \w in it; some thought also would have to be given to the
-     * interaction with above 0x100 chars */
-    if ((ANYOF_FLAGS(ret) & ANYOF_INVERT)
-        && ! LOC
-       && ! unicode_alternate
-       /* In case of /d, there are some things that should match only when in
-        * not in the bitmap, i.e., they require UTF8 to match.  These are
-        * listed in nonbitmap, but if ANYOF_NONBITMAP_NON_UTF8 is set in this
-        * case, they don't require UTF8, so can invert here */
-       && (! nonbitmap
-           || ! DEPENDS_SEMANTICS
-           || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
-       && SvCUR(listsv) == initial_listsv_len)
-    {
-       int i;
-       if (! nonbitmap) {
-           for (i = 0; i < 256; ++i) {
-               if (ANYOF_BITMAP_TEST(ret, i)) {
-                   ANYOF_BITMAP_CLEAR(ret, i);
-               }
-               else {
-                   ANYOF_BITMAP_SET(ret, i);
-                   prevvalue = value;
-                   value = i;
-               }
-           }
-           /* The inversion means that everything above 255 is matched */
-           ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
-       }
-       else {
-           /* Here, also has things outside the bitmap that may overlap with
-            * the bitmap.  We have to sync them up, so that they get inverted
-            * in both places.  Earlier, we removed all overlaps except in the
-            * case of /d rules, so no syncing is needed except for this case
-            */
-           SV *remove_list = NULL;
-
-           if (DEPENDS_SEMANTICS) {
-               UV start, end;
-
-               /* Set the bits that correspond to the ones that aren't in the
-                * bitmap.  Otherwise, when we invert, we'll miss these.
-                * Earlier, we removed from the nonbitmap all code points
-                * < 128, so there is no extra work here */
-               invlist_iterinit(nonbitmap);
-               while (invlist_iternext(nonbitmap, &start, &end)) {
-                   if (start > 255) {  /* The bit map goes to 255 */
-                       break;
-                   }
-                   if (end > 255) {
-                       end = 255;
-                   }
-                   for (i = start; i <= (int) end; ++i) {
-                       ANYOF_BITMAP_SET(ret, i);
-                       prevvalue = value;
-                       value = i;
-                   }
-               }
-           }
-
-           /* Now invert both the bitmap and the nonbitmap.  Anything in the
-            * bitmap has to also be removed from the non-bitmap, but again,
-            * there should not be overlap unless is /d rules. */
-           _invlist_invert(nonbitmap);
-
-           /* Any swash can't be used as-is, because we've inverted things */
-           if (swash) {
-               SvREFCNT_dec(swash);
-               swash = NULL;
-           }
-
-           for (i = 0; i < 256; ++i) {
-               if (ANYOF_BITMAP_TEST(ret, i)) {
-                   ANYOF_BITMAP_CLEAR(ret, i);
-                   if (DEPENDS_SEMANTICS) {
-                       if (! remove_list) {
-                           remove_list = _new_invlist(2);
-                       }
-                       remove_list = add_cp_to_invlist(remove_list, i);
-                   }
-               }
-               else {
-                   ANYOF_BITMAP_SET(ret, i);
-                   prevvalue = value;
-                   value = i;
-               }
-           }
-
-           /* And do the removal */
-           if (DEPENDS_SEMANTICS) {
-               if (remove_list) {
-                   _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
-                   SvREFCNT_dec(remove_list);
-               }
-           }
-           else {
-               /* There is no overlap for non-/d, so just delete anything
-                * below 256 */
-               _invlist_intersection(nonbitmap, PL_AboveLatin1, &nonbitmap);
-           }
-       }
-
-       stored = 256 - stored;
-
-       /* Clear the invert flag since have just done it here */
-       ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
-    }
-
-    /* Folding in the bitmap is taken care of above, but not for locale (for
-     * which we have to wait to see what folding is in effect at runtime), and
-     * for some things not in the bitmap (only the upper latin folds in this
-     * case, as all other single-char folding has been set above).  Set
-     * run-time fold flag for these */
-    if (FOLD && (LOC
-               || (DEPENDS_SEMANTICS
-                   && nonbitmap
-                   && ! (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
-               || unicode_alternate))
-    {
-       ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
+    if (invert) {
+        ANYOF_FLAGS(ret) |= ANYOF_INVERT;
      }
  
-    /* A single character class can be "optimized" into an EXACTish node.
-     * Note that since we don't currently count how many characters there are
-     * outside the bitmap, we are XXX missing optimization possibilities for
-     * them.  This optimization can't happen unless this is a truly single
-     * character class, which means that it can't be an inversion into a
-     * many-character class, and there must be no possibility of there being
-     * things outside the bitmap.  'stored' (only) for locales doesn't include
-     * \w, etc, so have to make a special test that they aren't present
-     *
-     * Similarly A 2-character class of the very special form like [bB] can be
-     * optimized into an EXACTFish node, but only for non-locales, and for
-     * characters which only have the two folds; so things like 'fF' and 'Ii'
-     * wouldn't work because they are part of the fold of 'LATIN SMALL LIGATURE
-     * FI'. */
-    if (! nonbitmap
-       && ! unicode_alternate
-       && SvCUR(listsv) == initial_listsv_len
-       && ! (ANYOF_FLAGS(ret) & (ANYOF_INVERT|ANYOF_UNICODE_ALL))
-        && (((stored == 1 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
-                              || (! ANYOF_CLASS_TEST_ANY_SET(ret)))))
-           || (stored == 2 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
-                                && (! _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value))
-                                /* If the latest code point has a fold whose
-                                 * bit is set, it must be the only other one */
-                               && ((prevvalue = PL_fold_latin1[value]) != (IV)value)
-                                && ANYOF_BITMAP_TEST(ret, prevvalue)))))
-    {
-        /* Note that the information needed to decide to do this optimization
-         * is not currently available until the 2nd pass, and that the actually
-        * used EXACTish node takes less space than the calculated ANYOF node,
-        * and hence the amount of space calculated in the first pass is larger
-         * than actually used, so this optimization doesn't gain us any space.
-        * But an EXACT node is faster than an ANYOF node, and can be combined
-        * with any adjacent EXACT nodes later by the optimizer for further
-        * gains.  The speed of executing an EXACTF is similar to an ANYOF
-        * node, so the optimization advantage comes from the ability to join
-        * it to adjacent EXACT nodes */
-
-        const char * cur_parse= RExC_parse;
-       U8 op;
-        RExC_emit = (regnode *)orig_emit;
-        RExC_parse = (char *)orig_parse;
-
-       if (stored == 1) {
-
-           /* A locale node with one point can be folded; all the other cases
-            * with folding will have two points, since we calculate them above
-            */
-           if (ANYOF_FLAGS(ret) & ANYOF_LOC_NONBITMAP_FOLD) {
-                op = EXACTFL;
-           }
-           else {
-               op = EXACT;
-           }
-       }
-       else {   /* else 2 chars in the bit map: the folds of each other */
-
-           /* Use the folded value, which for the cases where we get here,
-            * is just the lower case of the current one (which may resolve to
-            * itself, or to the other one */
-           value = toLOWER_LATIN1(value);
-
-           /* To join adjacent nodes, they must be the exact EXACTish type.
-            * Try to use the most likely type, by using EXACTFA if possible,
-            * then EXACTFU if the regex calls for it, or is required because
-            * the character is non-ASCII.  (If <value> is ASCII, its fold is
-            * also ASCII for the cases where we get here.) */
-           if (MORE_ASCII_RESTRICTED && isASCII(value)) {
-               op = EXACTFA;
-           }
-           else if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) {
-               op = EXACTFU;
-           }
-           else {    /* Otherwise, more likely to be EXACTF type */
-               op = EXACTF;
-           }
-       }
-
-       ret = reg_node(pRExC_state, op);
-        RExC_parse = (char *)cur_parse;
-       if (UTF && ! NATIVE_IS_INVARIANT(value)) {
-           *STRING(ret)= UTF8_EIGHT_BIT_HI((U8) value);
-           *(STRING(ret) + 1)= UTF8_EIGHT_BIT_LO((U8) value);
-           STR_LEN(ret)= 2;
-           RExC_emit += STR_SZ(2);
+    /* Here, the bitmap has been populated with all the Latin1 code points that
+     * always match.  Can now add to the overall list those that match only
+     * when the target string is UTF-8 (<depends_list>). */
+    if (depends_list) {
+       if (cp_list) {
+           _invlist_union(cp_list, depends_list, &cp_list);
+           SvREFCNT_dec(depends_list);
         }
         else {
-           *STRING(ret)= (char)value;
-           STR_LEN(ret)= 1;
-           RExC_emit += STR_SZ(1);
+           cp_list = depends_list;
         }
-       SvREFCNT_dec(listsv);
-        return ret;
      }
  
      /* If there is a swash and more than one element, we can't use the swash in
@@ -12388,8 +12901,9 @@ parseit:
         SvREFCNT_dec(swash);
         swash = NULL;
      }
-    if (! nonbitmap
-       && SvCUR(listsv) == initial_listsv_len
+
+    if (! cp_list
+       && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
         && ! unicode_alternate)
      {
         ARG_SET(ret, ANYOF_NONBITMAP_EMPTY);
@@ -12405,24 +12919,24 @@ parseit:
          *       swash is stored there now.
          * av[2] stores the multicharacter foldings, used later in
          *       regexec.c:S_reginclass().
-        * av[3] stores the nonbitmap inversion list for use in addition or
-        *       instead of av[0]; not used if av[1] isn't NULL
+        * av[3] stores the cp_list inversion list for use in addition or
+        *       instead of av[0]; used only if av[1] is NULL
          * av[4] is set if any component of the class is from a user-defined
-        *       property; not used if av[1] isn't NULL */
+        *       property; used only if av[1] is NULL */
         AV * const av = newAV();
         SV *rv;
  
-       av_store(av, 0, (SvCUR(listsv) == initial_listsv_len)
-                       ? &PL_sv_undef
-                       : listsv);
+       av_store(av, 0, (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+                       ? listsv
+                       : &PL_sv_undef);
         if (swash) {
             av_store(av, 1, swash);
-           SvREFCNT_dec(nonbitmap);
+           SvREFCNT_dec(cp_list);
         }
         else {
             av_store(av, 1, NULL);
-           if (nonbitmap) {
-               av_store(av, 3, nonbitmap);
+           if (cp_list) {
+               av_store(av, 3, cp_list);
                 av_store(av, 4, newSVuv(has_user_defined_property));
             }
         }
@@ -12443,8 +12957,11 @@ parseit:
         RExC_rxi->data->data[n] = (void*)rv;
         ARG_SET(ret, n);
      }
+
+    *flagp |= HASWIDTH|SIMPLE;
      return ret;
  }
+#undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
  
  
  /* reg_skipcomment()
@@ -12535,7 +13052,7 @@ STATIC regnode *                        /* Location. */
  S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  {
      dVAR;
-    register regnode *ptr;
+    regnode *ptr;
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -12577,7 +13094,7 @@ STATIC regnode *                        /* Location. */
  S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  {
      dVAR;
-    register regnode *ptr;
+    regnode *ptr;
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -12649,9 +13166,9 @@ STATIC void
  S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
  {
      dVAR;
-    register regnode *src;
-    register regnode *dst;
-    register regnode *place;
+    regnode *src;
+    regnode *dst;
+    regnode *place;
      const int offset = regarglen[(U8)op];
      const int size = NODE_STEP_REGNODE + offset;
      GET_RE_DEBUG_FLAGS_DECL;
@@ -12737,7 +13254,7 @@ STATIC void
  S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,U32 depth)
  {
      dVAR;
-    register regnode *scan;
+    regnode *scan;
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_REGTAIL;
@@ -12796,7 +13313,7 @@ STATIC U8
  S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,U32 depth)
  {
      dVAR;
-    register regnode *scan;
+    regnode *scan;
      U8 exact = PSEUDO;
  #ifdef EXPERIMENTAL_INPLACESCAN
      I32 min = 0;
@@ -13037,7 +13554,41 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
  {
  #ifdef DEBUGGING
      dVAR;
-    register int k;
+    int k;
+
+    /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
+    static const char * const anyofs[] = {
+        "\\w",
+        "\\W",
+        "\\s",
+        "\\S",
+        "\\d",
+        "\\D",
+        "[:alnum:]",
+        "[:^alnum:]",
+        "[:alpha:]",
+        "[:^alpha:]",
+        "[:ascii:]",
+        "[:^ascii:]",
+        "[:cntrl:]",
+        "[:^cntrl:]",
+        "[:graph:]",
+        "[:^graph:]",
+        "[:lower:]",
+        "[:^lower:]",
+        "[:print:]",
+        "[:^print:]",
+        "[:punct:]",
+        "[:^punct:]",
+        "[:upper:]",
+        "[:^upper:]",
+        "[:xdigit:]",
+        "[:^xdigit:]",
+        "[:space:]",
+        "[:^space:]",
+        "[:blank:]",
+        "[:^blank:]"
+    };
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
      
@@ -13158,39 +13709,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
         const U8 flags = ANYOF_FLAGS(o);
         int do_sep = 0;
  
-       /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
-       static const char * const anyofs[] = {
-           "\\w",
-           "\\W",
-           "\\s",
-           "\\S",
-           "\\d",
-           "\\D",
-           "[:alnum:]",
-           "[:^alnum:]",
-           "[:alpha:]",
-           "[:^alpha:]",
-           "[:ascii:]",
-           "[:^ascii:]",
-           "[:cntrl:]",
-           "[:^cntrl:]",
-           "[:graph:]",
-           "[:^graph:]",
-           "[:lower:]",
-           "[:^lower:]",
-           "[:print:]",
-           "[:^print:]",
-           "[:punct:]",
-           "[:^punct:]",
-           "[:upper:]",
-           "[:^upper:]",
-           "[:xdigit:]",
-           "[:^xdigit:]",
-           "[:space:]",
-           "[:^space:]",
-           "[:blank:]",
-           "[:^blank:]"
-       };
  
         if (flags & ANYOF_LOCALE)
             sv_catpvs(sv, "{loc}");
@@ -13328,6 +13846,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
  
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
      }
+    else if (k == POSIXD) {
+        U8 index = FLAGS(o) * 2;
+        if (index > (sizeof(anyofs) / sizeof(anyofs[0]))) {
+            Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
+        }
+        else {
+            sv_catpv(sv, anyofs[index]);
+        }
+    }
      else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
         Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
  #else
@@ -13438,7 +13965,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
  {
      struct regexp *ret;
      struct regexp *const r = (struct regexp *)SvANY(rx);
-    register const I32 npar = r->nparens+1;
  
      PERL_ARGS_ASSERT_REG_TEMP_COPY;
  
@@ -13458,8 +13984,11 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
      SvLEN_set(ret_x, 0);
      SvSTASH_set(ret_x, NULL);
      SvMAGIC_set(ret_x, NULL);
-    Newx(ret->offs, npar, regexp_paren_pair);
-    Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    if (r->offs) {
+        const I32 npar = r->nparens+1;
+        Newx(ret->offs, npar, regexp_paren_pair);
+        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    }
      if (r->substrs) {
          Newx(ret->substrs, 1, struct reg_substr_data);
         StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
@@ -13829,7 +14358,7 @@ regnode *
  Perl_regnext(pTHX_ register regnode *p)
  {
      dVAR;
-    register I32 offset;
+    I32 offset;
  
      if (!p)
         return(NULL);
@@ -13901,8 +14430,6 @@ Perl_save_re_context(pTHX)
  
      Copy(&PL_reg_state, state, 1, struct re_save_state);
  
-    PL_reg_start_tmp = 0;
-    PL_reg_start_tmpl = 0;
      PL_reg_oldsaved = NULL;
      PL_reg_oldsavedlen = 0;
      PL_reg_maxiter = 0;
@@ -13990,8 +14517,8 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             SV* sv, I32 indent, U32 depth)
  {
      dVAR;
-    register U8 op = PSEUDO;   /* Arbitrary non-END op. */
-    register const regnode *next;
+    U8 op = PSEUDO;    /* Arbitrary non-END op. */
+    const regnode *next;
      const regnode *optstart= NULL;
      
      RXi_GET_DECL(r,ri);
@@ -14042,9 +14569,9 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
         if (PL_regkind[(U8)op] == BRANCHJ) {
             assert(next);
             {
-                register const regnode *nnode = (OP(next) == LONGJMP
-                                            ? regnext((regnode *)next)
-                                            : next);
+                const regnode *nnode = (OP(next) == LONGJMP
+                                       ? regnext((regnode *)next)
+                                       : next);
                  if (last && nnode > last)
                      nnode = last;
                  DUMPUNTIL(NEXTOPER(NEXTOPER(node)), nnode);