Implement variable length lookbehind in regex patterns

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 59ef468..275945c 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -74,10 +74,6 @@
  #define PERL_IN_REGCOMP_C
  #include "perl.h"
  
-#ifndef PERL_IN_XSUB_RE
-#  include "INTERN.h"
-#endif
-
  #define REG_COMP_C
  #ifdef PERL_IN_XSUB_RE
  #  include "re_comp.h"
@@ -167,6 +163,7 @@ struct RExC_state_t {
      I32                seen_zerolen;
      regnode_offset *open_parens;       /* offsets to open parens */
      regnode_offset *close_parens;      /* offsets to close parens */
+    I32      parens_buf_size;           /* #slots malloced open/close_parens */
      regnode     *end_op;                /* END node in program */
      I32                utf8;           /* whether the pattern is utf8 or not */
      I32                orig_utf8;      /* whether the pattern was originally in utf8 */
@@ -197,6 +194,7 @@ struct RExC_state_t {
      scan_frame *frame_last;
      U32         frame_count;
      AV         *warn_text;
+    HV         *unlexed_names;
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
@@ -256,6 +254,7 @@ struct RExC_state_t {
  #define RExC_maxlen        (pRExC_state->maxlen)
  #define RExC_npar      (pRExC_state->npar)
  #define RExC_total_parens      (pRExC_state->total_par)
+#define RExC_parens_buf_size   (pRExC_state->parens_buf_size)
  #define RExC_nestroot   (pRExC_state->nestroot)
  #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
  #define RExC_utf8      (pRExC_state->utf8)
@@ -284,6 +283,7 @@ struct RExC_state_t {
  #define RExC_warn_text (pRExC_state->warn_text)
  #define RExC_in_script_run      (pRExC_state->in_script_run)
  #define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
+#define RExC_unlexed_names (pRExC_state->unlexed_names)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -347,13 +347,15 @@ struct RExC_state_t {
                               } STMT_END
  
  /* Change from /d into /u rules, and restart the parse.  RExC_uni_semantics is
- * a flag that indicates we've changed to /u during the parse.  */
+ * a flag that indicates we need to override /d with /u as a result of
+ * something in the pattern.  It should only be used in regards to calling
+ * set_regex_charset() or get_regex_charse() */
  #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
      STMT_START {                                                            \
              if (DEPENDS_SEMANTICS) {                                        \
                  set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                  RExC_uni_semantics = 1;                                     \
-                if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) {     \
+                if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) {           \
                      /* No need to restart the parse if we haven't seen      \
                       * anything that differs between /u and /d, and no need \
                       * to restart immediately if we're going to reparse     \
@@ -364,11 +366,10 @@ struct RExC_state_t {
              }                                                               \
      } STMT_END
  
-#define BRANCH_MAX_OFFSET   U16_MAX
  #define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
      STMT_START {                                                            \
                  RExC_use_BRANCHJ = 1;                                       \
-                if (LIKELY(RExC_total_parens >= 0)) {                       \
+                if (LIKELY(! IN_PARENS_PASS)) {                             \
                      /* No need to restart the parse immediately if we're    \
                       * going to reparse anyway to count parens */           \
                      *flagp |= RESTART_PARSE;                                \
@@ -376,10 +377,19 @@ struct RExC_state_t {
                  }                                                           \
      } STMT_END
  
+/* Until we have completed the parse, we leave RExC_total_parens at 0 or
+ * less.  After that, it must always be positive, because the whole re is
+ * considered to be surrounded by virtual parens.  Setting it to negative
+ * indicates there is some construct that needs to know the actual number of
+ * parens to be properly handled.  And that means an extra pass will be
+ * required after we've counted them all */
+#define ALL_PARENS_COUNTED (RExC_total_parens > 0)
  #define REQUIRE_PARENS_PASS                                                 \
-    STMT_START {                                                            \
-                    if (RExC_total_parens == 0) RExC_total_parens = -1;     \
+    STMT_START {  /* No-op if have completed a pass */                      \
+                    if (! ALL_PARENS_COUNTED) RExC_total_parens = -1;       \
      } STMT_END
+#define IN_PARENS_PASS (RExC_total_parens < 0)
+
  
  /* This is used to return failure (zero) early from the calling function if
   * various flags in 'flags' are set.  Two flags always cause a return:
@@ -1544,6 +1554,10 @@ S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state,
      return TRUE;
  }
  
+#define INVLIST_INDEX 0
+#define ONLY_LOCALE_MATCHES_INDEX 1
+#define DEFERRED_USER_DEFINED_INDEX 2
+
  STATIC SV*
  S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
                                 const regnode_charclass* const node)
@@ -1554,6 +1568,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
       * returned list must, and will, contain every code point that is a
       * possibility. */
  
+    dVAR;
      SV* invlist = NULL;
      SV* only_utf8_locale_invlist = NULL;
      unsigned int i;
@@ -1569,28 +1584,24 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
          SV **const ary = AvARRAY(av);
          assert(RExC_rxi->data->what[n] == 's');
  
-        if (ary[1] && ary[1] != &PL_sv_undef) { /* Has compile-time swash */
-            invlist = sv_2mortal(invlist_clone(_get_swash_invlist(ary[1]), NULL));
-        }
-        else if (ary[0] && ary[0] != &PL_sv_undef) {
+        if (av_tindex_skip_len_mg(av) >= DEFERRED_USER_DEFINED_INDEX) {
  
-            /* Here, no compile-time swash, and there are things that won't be
-             * known until runtime -- we have to assume it could be anything */
+            /* Here there are things that won't be known until runtime -- we
+             * have to assume it could be anything */
              invlist = sv_2mortal(_new_invlist(1));
              return _add_range_to_invlist(invlist, 0, UV_MAX);
          }
-        else if (ary[3] && ary[3] != &PL_sv_undef) {
+        else if (ary[INVLIST_INDEX]) {
  
-            /* Here no compile-time swash, and no run-time only data.  Use the
-             * node's inversion list */
-            invlist = sv_2mortal(invlist_clone(ary[3], NULL));
+            /* Use the node's inversion list */
+            invlist = sv_2mortal(invlist_clone(ary[INVLIST_INDEX], NULL));
          }
  
          /* Get the code points valid only under UTF-8 locales */
-        if ((ANYOF_FLAGS(node) & ANYOFL_FOLD)
-            && ary[2] && ary[2] != &PL_sv_undef)
+        if (   (ANYOF_FLAGS(node) & ANYOFL_FOLD)
+            &&  av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX)
          {
-            only_utf8_locale_invlist = ary[2];
+            only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX];
          }
      }
  
@@ -1615,15 +1626,19 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      }
  
      /* Add in the points from the bit map */
-    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-        if (ANYOF_BITMAP_TEST(node, i)) {
-            unsigned int start = i++;
+    if (OP(node) != ANYOFH) {
+        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+            if (ANYOF_BITMAP_TEST(node, i)) {
+                unsigned int start = i++;
  
-            for (; i < NUM_ANYOF_CODE_POINTS && ANYOF_BITMAP_TEST(node, i); ++i) {
-                /* empty */
+                for (;    i < NUM_ANYOF_CODE_POINTS
+                       && ANYOF_BITMAP_TEST(node, i); ++i)
+                {
+                    /* empty */
+                }
+                invlist = _add_range_to_invlist(invlist, start, i-1);
+                new_node_has_latin1 = TRUE;
              }
-            invlist = _add_range_to_invlist(invlist, start, i-1);
-            new_node_has_latin1 = TRUE;
          }
      }
  
@@ -1645,11 +1660,26 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
          _invlist_invert(invlist);
      }
-    else if (new_node_has_latin1 && ANYOF_FLAGS(node) & ANYOFL_FOLD) {
+    else if (ANYOF_FLAGS(node) & ANYOFL_FOLD) {
+        if (new_node_has_latin1) {
  
-        /* Under /li, any 0-255 could fold to any other 0-255, depending on the
-         * locale.  We can skip this if there are no 0-255 at all. */
-        _invlist_union(invlist, PL_Latin1, &invlist);
+            /* Under /li, any 0-255 could fold to any other 0-255, depending on
+             * the locale.  We can skip this if there are no 0-255 at all. */
+            _invlist_union(invlist, PL_Latin1, &invlist);
+
+            invlist = add_cp_to_invlist(invlist, LATIN_SMALL_LETTER_DOTLESS_I);
+            invlist = add_cp_to_invlist(invlist, LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+        }
+        else {
+            if (_invlist_contains_cp(invlist, LATIN_SMALL_LETTER_DOTLESS_I)) {
+                invlist = add_cp_to_invlist(invlist, 'I');
+            }
+            if (_invlist_contains_cp(invlist,
+                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
+            {
+                invlist = add_cp_to_invlist(invlist, 'i');
+            }
+        }
      }
  
      /* Similarly add the UTF-8 locale possible matches.  These have to be
@@ -2036,11 +2066,11 @@ S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
      U32 count = 0;      /* Running total of number of code points matched by
                             'ssc' */
      UV start, end;      /* Start and end points of current range in inversion
-                           list */
+                           XXX outdated.  UTF-8 locales are common, what about invert? list */
      const U32 max_code_points = (LOC)
                                  ?  256
-                                : ((   ! UNI_SEMANTICS
-                                     || invlist_highest(ssc->invlist) < 256)
+                                : ((  ! UNI_SEMANTICS
+                                    ||  invlist_highest(ssc->invlist) < 256)
                                    ? 128
                                    : NON_OTHER_COUNT);
      const U32 max_match = max_code_points / 2;
@@ -2088,8 +2118,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
  
      populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
  
-    set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist,
-                                NULL, NULL, NULL, FALSE);
+    set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL);
  
      /* Make sure is clone-safe */
      ssc->invlist = NULL;
@@ -2651,7 +2680,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
      switch (flags) {
          case EXACT: case EXACT_ONLY8: case EXACTL: break;
         case EXACTFAA:
-        case EXACTFU_SS:
+        case EXACTFUP:
         case EXACTFU:
         case EXACTFLU8: folder = PL_fold_latin1; break;
         case EXACTF:  folder = PL_fold; break;
@@ -2673,7 +2702,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          trie_words = newAV();
      });
  
-    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
+    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, GV_ADD);
      assert(re_trie_maxbuff);
      if (!SvIOK(re_trie_maxbuff)) {
          sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
@@ -2742,7 +2771,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
              && (    OP(noper) == flags
                  || (flags == EXACT && OP(noper) == EXACT_ONLY8)
                  || (flags == EXACTFU && (   OP(noper) == EXACTFU_ONLY8
-                                         || OP(noper) == EXACTFU_SS))) )
+                                         || OP(noper) == EXACTFUP))))
          {
              uc= (U8*)STRING(noper);
              e= uc + STR_LEN(noper);
@@ -2755,7 +2784,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          if ( set_bit ) { /* bitmap only alloced when !(UTF&&Folding) */
              TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte
                                            regardless of encoding */
-            if (OP( noper ) == EXACTFU_SS) {
+            if (OP( noper ) == EXACTFUP) {
                  /* false positives are ok, so just set this */
                  TRIE_BITMAP_SET(trie, LATIN_SMALL_LETTER_SHARP_S);
              }
@@ -2960,7 +2989,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  && (    OP(noper) == flags
                      || (flags == EXACT && OP(noper) == EXACT_ONLY8)
                      || (flags == EXACTFU && (   OP(noper) == EXACTFU_ONLY8
-                                             || OP(noper) == EXACTFU_SS))) )
+                                             || OP(noper) == EXACTFUP))))
              {
                  const U8 *uc= (U8*)STRING(noper);
                  const U8 *e= uc + STR_LEN(noper);
@@ -3185,7 +3214,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  && (    OP(noper) == flags
                      || (flags == EXACT && OP(noper) == EXACT_ONLY8)
                      || (flags == EXACTFU && (   OP(noper) == EXACTFU_ONLY8
-                                             || OP(noper) == EXACTFU_SS))) )
+                                             || OP(noper) == EXACTFUP))))
              {
                  const U8 *uc= (U8*)STRING(noper);
                  const U8 *e= uc + STR_LEN(noper);
@@ -3800,7 +3829,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   * require special handling.  The joining is only done if:
   * 1) there is room in the current conglomerated node to entirely contain the
   *    next one.
- * 2) they are the exact same node type
+ * 2) they are compatible node types
   *
   * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
   * these get optimized out
@@ -3837,7 +3866,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   * Both sides fold to "sss", but if the pattern is parsed to create a node that
   * would match just the \xDF, it won't be able to handle the case where a
   * successful match would have to cross the node's boundary.  The new approach
- * that hopefully generally solves the problem generates an EXACTFU_SS node
+ * that hopefully generally solves the problem generates an EXACTFUP node
   * that is "sss" in this case.
   *
   * It turns out that there are problems with all multi-character folds, and not
@@ -3853,26 +3882,46 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *      adjust the min length of the match, and the delta between min and max,
   *      so that the optimizer doesn't reject these possibilities based on size
   *      constraints.
- * 2)   For the sequence involving the Sharp s (\xDF), the node type EXACTFU_SS
- *      is used for an EXACTFU node that contains at least one "ss" sequence in
- *      it.  For non-UTF-8 patterns and strings, this is the only case where
- *      there is a possible fold length change.  That means that a regular
- *      EXACTFU node without UTF-8 involvement doesn't have to concern itself
- *      with length changes, and so can be processed faster.  regexec.c takes
- *      advantage of this.  Generally, an EXACTFish node that is in UTF-8 is
- *      pre-folded by regcomp.c (except EXACTFL, some of whose folds aren't
- *      known until runtime).  This saves effort in regex matching.  However,
- *      the pre-folding isn't done for non-UTF8 patterns because the fold of
- *      the MICRO SIGN requires UTF-8, and we don't want to slow things down by
- *      forcing the pattern into UTF8 unless necessary.  Also what EXACTF (and,
- *      again, EXACTFL) nodes fold to isn't known until runtime.  The fold
- *      possibilities for the non-UTF8 patterns are quite simple, except for
- *      the sharp s.  All the ones that don't involve a UTF-8 target string are
- *      members of a fold-pair, and arrays are set up for all of them so that
- *      the other member of the pair can be found quickly.  Code elsewhere in
- *      this file makes sure that in EXACTFU nodes, the sharp s gets folded to
- *      'ss', even if the pattern isn't UTF-8.  This avoids the issues
- *      described in the next item.
+ *
+ * 2)   For the sequence involving the LATIN SMALL LETTER SHARP S (U+00DF)
+ *      under /u, we fold it to 'ss' in regatom(), and in this routine, after
+ *      joining, we scan for occurrences of the sequence 'ss' in non-UTF-8
+ *      EXACTFU nodes.  The node type of such nodes is then changed to
+ *      EXACTFUP, indicating it is problematic, and needs careful handling.
+ *      (The procedures in step 1) above are sufficient to handle this case in
+ *      UTF-8 encoded nodes.)  The reason this is problematic is that this is
+ *      the only case where there is a possible fold length change in non-UTF-8
+ *      patterns.  By reserving a special node type for problematic cases, the
+ *      far more common regular EXACTFU nodes can be processed faster.
+ *      regexec.c takes advantage of this.
+ *
+ *      EXACTFUP has been created as a grab-bag for (hopefully uncommon)
+ *      problematic cases.   These all only occur when the pattern is not
+ *      UTF-8.  In addition to the 'ss' sequence where there is a possible fold
+ *      length change, it handles the situation where the string cannot be
+ *      entirely folded.  The strings in an EXACTFish node are folded as much
+ *      as possible during compilation in regcomp.c.  This saves effort in
+ *      regex matching.  By using an EXACTFUP node when it is not possible to
+ *      fully fold at compile time, regexec.c can know that everything in an
+ *      EXACTFU node is folded, so folding can be skipped at runtime.  The only
+ *      case where folding in EXACTFU nodes can't be done at compile time is
+ *      the presumably uncommon MICRO SIGN, when the pattern isn't UTF-8.  This
+ *      is because its fold requires UTF-8 to represent.  Thus EXACTFUP nodes
+ *      handle two very different cases.  Alternatively, there could have been
+ *      a node type where there are length changes, one for unfolded, and one
+ *      for both.  If yet another special case needed to be created, the number
+ *      of required node types would have to go to 7.  khw figures that even
+ *      though there are plenty of node types to spare, that the maintenance
+ *      cost wasn't worth the small speedup of doing it that way, especially
+ *      since he thinks the MICRO SIGN is rarely encountered in practice.
+ *
+ *      There are other cases where folding isn't done at compile time, but
+ *      none of them are under /u, and hence not for EXACTFU nodes.  The folds
+ *      in EXACTFL nodes aren't known until runtime, and vary as the locale
+ *      changes.  Some folds in EXACTF depend on if the runtime target string
+ *      is UTF-8 or not.  (regatom() will create an EXACTFU node even under /di
+ *      when no fold in it depends on the UTF-8ness of the target string.)
+ *
   * 3)   A problem remains for unfolded multi-char folds. (These occur when the
   *      validity of the fold won't be known until runtime, and so must remain
   *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFAA
@@ -3884,7 +3933,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *      that a character in the pattern corresponds to at most a single
   *      character in the target string.  (And I do mean character, and not byte
   *      here, unlike other parts of the documentation that have never been
- *      updated to account for multibyte Unicode.)  sharp s in EXACTF and
+ *      updated to account for multibyte Unicode.)  Sharp s in EXACTF and
   *      EXACTFL nodes can match the two character string 'ss'; in EXACTFAA
   *      nodes it can match "\x{17F}\x{17F}".  These, along with other ones in
   *      EXACTFL nodes, violate the assumption, and they are the only instances
@@ -3931,6 +3980,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                     U32 flags, regnode *val, U32 depth)
  {
      /* Merge several consecutive EXACTish nodes into one. */
+
      regnode *n = regnext(scan);
      U32 stringok = 1;
      regnode *next = scan + NODE_SZ_STR(scan);
@@ -3950,11 +4000,13 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
  #endif
      DEBUG_PEEP("join", scan, depth, 0);
  
+    assert(PL_regkind[OP(scan)] == EXACT);
+
      /* Look through the subsequent nodes in the chain.  Skip NOTHING, merge
       * EXACT ones that are mergeable to the current one. */
-    while (n
-           && (PL_regkind[OP(n)] == NOTHING
-               || (stringok && OP(n) == OP(scan)))
+    while (    n
+           && (    PL_regkind[OP(n)] == NOTHING
+               || (stringok && PL_regkind[OP(n)] == EXACT))
             && NEXT_OFF(n)
             && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX)
      {
@@ -3982,6 +4034,105 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
              if (oldl + STR_LEN(n) > U8_MAX)
                  break;
  
+            /* Joining something that requires UTF-8 with something that
+             * doesn't, means the result requires UTF-8. */
+            if (OP(scan) == EXACT && (OP(n) == EXACT_ONLY8)) {
+                OP(scan) = EXACT_ONLY8;
+            }
+            else if (OP(scan) == EXACT_ONLY8 && (OP(n) == EXACT)) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if ((OP(scan) == EXACTFU) && (OP(n) == EXACTFU_ONLY8)) {
+                OP(scan) = EXACTFU_ONLY8;
+            }
+            else if ((OP(scan) == EXACTFU_ONLY8) && (OP(n) == EXACTFU)) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if (OP(scan) == EXACTFU && OP(n) == EXACTFU_S_EDGE) {
+
+                 /* Under /di, temporary EXACTFU_S_EDGE nodes are generated,
+                  * which can join with EXACTFU ones.  We check for this case
+                  * here.  These need to be resolved to either EXACTFU or
+                  * EXACTF at joining time.  They have nothing in them that
+                  * would forbid them from being the more desirable EXACTFU
+                  * nodes except that they begin and/or end with a single [Ss].
+                  * The reason this is problematic is because they could be
+                  * joined in this loop with an adjacent node that ends and/or
+                  * begins with [Ss] which would then form the sequence 'ss',
+                  * which matches differently under /di than /ui, in which case
+                  * EXACTFU can't be used.  If the 'ss' sequence doesn't get
+                  * formed, the nodes get absorbed into any adjacent EXACTFU
+                  * node.  And if the only adjacent node is EXACTF, they get
+                  * absorbed into that, under the theory that a longer node is
+                  * better than two shorter ones, even if one is EXACTFU.  Note
+                  * that EXACTFU_ONLY8 is generated only for UTF-8 patterns,
+                  * and the EXACTFU_S_EDGE ones only for non-UTF-8.  */
+
+                if (STRING(n)[STR_LEN(n)-1] == 's') {
+
+                    /* Here the joined node would end with 's'.  If the node
+                     * following the combination is an EXACTF one, it's better to
+                     * join this trailing edge 's' node with that one, leaving the
+                     * current one in 'scan' be the more desirable EXACTFU */
+                    if (OP(nnext) == EXACTF) {
+                        break;
+                    }
+
+                    OP(scan) = EXACTFU_S_EDGE;
+
+                }   /* Otherwise, the beginning 's' of the 2nd node just
+                       becomes an interior 's' in 'scan' */
+            }
+            else if (OP(scan) == EXACTF && OP(n) == EXACTF) {
+                ;   /* join is compatible, no need to change OP */
+            }
+            else if (OP(scan) == EXACTF && OP(n) == EXACTFU_S_EDGE) {
+
+                /* EXACTF nodes are compatible for joining with EXACTFU_S_EDGE
+                 * nodes.  But the latter nodes can be also joined with EXACTFU
+                 * ones, and that is a better outcome, so if the node following
+                 * 'n' is EXACTFU, quit now so that those two can be joined
+                 * later */
+                if (OP(nnext) == EXACTFU) {
+                    break;
+                }
+
+                /* The join is compatible, and the combined node will be
+                 * EXACTF.  (These don't care if they begin or end with 's' */
+            }
+            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU_S_EDGE) {
+                if (   STRING(scan)[STR_LEN(scan)-1] == 's'
+                    && STRING(n)[0] == 's')
+                {
+                    /* When combined, we have the sequence 'ss', which means we
+                     * have to remain /di */
+                    OP(scan) = EXACTF;
+                }
+            }
+            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU) {
+                if (STRING(n)[0] == 's') {
+                    ;   /* Here the join is compatible and the combined node
+                           starts with 's', no need to change OP */
+                }
+                else {  /* Now the trailing 's' is in the interior */
+                    OP(scan) = EXACTFU;
+                }
+            }
+            else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTF) {
+
+                /* The join is compatible, and the combined node will be
+                 * EXACTF.  (These don't care if they begin or end with 's' */
+                OP(scan) = EXACTF;
+            }
+            else if (OP(scan) != OP(n)) {
+
+                /* The only other compatible joinings are the same node type */
+                break;
+            }
+
              DEBUG_PEEP("merg", n, depth, 0);
              merged++;
  
@@ -4011,6 +4162,12 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
  #endif
      }
  
+    /* This temporary node can now be turned into EXACTFU, and must, as
+     * regexec.c doesn't handle it */
+    if (OP(scan) == EXACTFU_S_EDGE) {
+        OP(scan) = EXACTFU;
+    }
+
      *min_subtract = 0;
      *unfolded_multi_char = FALSE;
  
@@ -4096,19 +4253,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                      continue;
                  }
  
-                /* Nodes with 'ss' require special handling, except for
-                 * EXACTFAA-ish for which there is no multi-char fold to this */
-                if (len == 2 && *s == 's' && *(s+1) == 's'
-                    && OP(scan) != EXACTFAA
-                    && OP(scan) != EXACTFAA_NO_TRIE)
-                {
-                    count = 2;
-                    if (OP(scan) != EXACTFL) {
-                        OP(scan) = EXACTFU_SS;
-                    }
-                    s += 2;
-                }
-                else { /* Here is a generic multi-char fold. */
+                { /* Here is a generic multi-char fold. */
                      U8* multi_end  = s + len;
  
                      /* Count how many characters are in it.  In the case of
@@ -4208,7 +4353,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                       * which we don't know until runtime.  EXACTFL nodes can't
                       * transform into EXACTFU nodes */
                      if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
-                        OP(scan) = EXACTFU_SS;
+                        OP(scan) = EXACTFUP;
                      }
                 }
  
@@ -4217,6 +4362,23 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
             }
  #endif
         }
+
+        if (     STR_LEN(scan) == 1
+            &&   isALPHA_A(* STRING(scan))
+            &&  (         OP(scan) == EXACTFAA
+                 || (     OP(scan) == EXACTFU
+                     && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(scan)))))
+        {
+            U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
+
+            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
+             * with the mask set to the complement of the bit that differs
+             * between upper and lower case, and the lowest code point of the
+             * pair (which the '&' forces) */
+            OP(scan) = ANYOFM;
+            ARG_SET(scan, *STRING(scan) & mask);
+            FLAGS(scan) = mask;
+        }
      }
  
  #ifdef DEBUGGING
@@ -4275,6 +4437,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         /* recursed: which subroutines have we recursed into */
                         /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
  {
+    dVAR;
      /* There must be at least this number of characters to match */
      SSize_t min = 0;
      I32 pars = 0, code;
@@ -4675,7 +4838,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  EXACT_ONLY8     | EXACT
                                  EXACTFU         | EXACTFU
                                  EXACTFU_ONLY8   | EXACTFU
-                                EXACTFU_SS      | EXACTFU
+                                EXACTFUP        | EXACTFU
                                  EXACTFAA        | EXACTFAA
                                  EXACTL          | EXACTL
                                  EXACTFLU8       | EXACTFLU8
@@ -4688,7 +4851,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                           ? EXACT                                            \
                           : (     EXACTFU == (X)                             \
                                || EXACTFU_ONLY8 == (X)                       \
-                              || EXACTFU_SS == (X) )                        \
+                              || EXACTFUP == (X) )                          \
                             ? EXACTFU                                        \
                             : ( EXACTFAA == (X) )                            \
                               ? EXACTFAA                                     \
@@ -5149,6 +5312,35 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 min++;
                 /* FALLTHROUGH */
             case STAR:
+                next = NEXTOPER(scan);
+
+                /* This temporary node can now be turned into EXACTFU, and
+                 * must, as regexec.c doesn't handle it */
+                if (OP(next) == EXACTFU_S_EDGE) {
+                    OP(next) = EXACTFU;
+                }
+
+                if (     STR_LEN(next) == 1
+                    &&   isALPHA_A(* STRING(next))
+                    && (         OP(next) == EXACTFAA
+                        || (     OP(next) == EXACTFU
+                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))))
+                {
+                    /* These differ in just one bit */
+                    U8 mask = ~ ('A' ^ 'a');
+
+                    assert(isALPHA_A(* STRING(next)));
+
+                    /* Then replace it by an ANYOFM node, with
+                    * the mask set to the complement of the
+                    * bit that differs between upper and lower
+                    * case, and the lowest code point of the
+                    * pair (which the '&' forces) */
+                    OP(next) = ANYOFM;
+                    ARG_SET(next, *STRING(next) & mask);
+                    FLAGS(next) = mask;
+                }
+
                 if (flags & SCF_DO_STCLASS) {
                     mincount = 0;
                     maxcount = REG_INFTY;
@@ -5637,6 +5829,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  case ANYOFD:
                  case ANYOFL:
                  case ANYOFPOSIXL:
+                case ANYOFH:
                  case ANYOF:
                     if (flags & SCF_DO_STCLASS_AND)
                         ssc_and(pRExC_state, data->start_class,
@@ -5702,21 +5895,11 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      }
                      break;
  
-                case NASCII:
-                    invert = 1;
-                    /* FALLTHROUGH */
-               case ASCII:
-                    my_invlist = invlist_clone(PL_Posix_ptrs[_CC_ASCII], NULL);
-
-                    /* This can be handled as a Posix class */
-                    goto join_posix_and_ascii;
-
                  case NPOSIXA:   /* For these, we always know the exact set of
                                     what's matched */
                      invert = 1;
                      /* FALLTHROUGH */
                 case POSIXA:
-                    assert(FLAGS(scan) != _CC_ASCII);
                      my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL);
                      goto join_posix_and_ascii;
  
@@ -5803,14 +5986,27 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                        last, &data_fake, stopparen,
                                        recursed_depth, NULL, f, depth+1);
                  if (scan->flags) {
-                    if (deltanext) {
-                       FAIL("Variable length lookbehind not implemented");
-                    }
-                    else if (minnext > (I32)U8_MAX) {
+                    if (   deltanext < 0
+                        || deltanext > (I32) U8_MAX
+                        || minnext > (I32)U8_MAX
+                        || minnext + deltanext > (I32)U8_MAX)
+                    {
                         FAIL2("Lookbehind longer than %" UVuf " not implemented",
                                (UV)U8_MAX);
                      }
-                    scan->flags = (U8)minnext;
+
+                    /* The 'next_off' field has been repurposed to count the
+                     * additional starting positions to try beyond the initial
+                     * one.  (This leaves it at 0 for non-variable length
+                     * matches to avoid breakage for those not using this
+                     * extension) */
+                    if (deltanext) {
+                        scan->next_off = deltanext;
+                        ckWARNexperimental(RExC_parse,
+                            WARN_EXPERIMENTAL__VLB,
+                            "Variable length lookbehind is experimental");
+                    }
+                    scan->flags = (U8)minnext + deltanext;
                  }
                  if (data) {
                      if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
@@ -5895,14 +6091,21 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                          stopparen, recursed_depth, NULL,
                                          f, depth+1);
                  if (scan->flags) {
-                    if (deltanext) {
-                       FAIL("Variable length lookbehind not implemented");
-                    }
-                    else if (*minnextp > (I32)U8_MAX) {
+                    assert(0);  /* This code has never been tested since this
+                                   is normally not compiled */
+                    if (   deltanext < 0
+                        || deltanext > (I32) U8_MAX
+                        || *minnextp > (I32)U8_MAX
+                        || *minnextp + deltanext > (I32)U8_MAX)
+                    {
                         FAIL2("Lookbehind longer than %" UVuf " not implemented",
                                (UV)U8_MAX);
                      }
-                    scan->flags = (U8)*minnextp;
+
+                    if (deltanext) {
+                        scan->next_off = deltanext;
+                    }
+                    scan->flags = (U8)*minnextp + deltanext;
                  }
  
                  *minnextp += min;
@@ -7128,6 +7331,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                     OP *expr, const regexp_engine* eng, REGEXP *old_re,
                      bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
  {
+    dVAR;
      REGEXP *Rx;         /* Capital 'R' means points to a REGEXP */
      STRLEN plen;
      char *exp;
@@ -7185,6 +7389,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      }
  
      pRExC_state->warn_text = NULL;
+    pRExC_state->unlexed_names = NULL;
      pRExC_state->code_blocks = NULL;
  
      if (is_bare_re)
@@ -7291,10 +7496,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      /* ignore the utf8ness if the pattern is 0 length */
      RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
-
-    RExC_uni_semantics = RExC_utf8; /* UTF-8 implies unicode semantics;
-                                       otherwise we may find later this should
-                                       be 1 */
+    RExC_uni_semantics = 0;
      RExC_contains_locale = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
      RExC_in_script_run = 0;
@@ -7365,11 +7567,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      rx_flags = orig_rx_flags;
  
-    if (initial_charset == REGEX_DEPENDS_CHARSET && RExC_uni_semantics) {
+    if (   (UTF || RExC_uni_semantics)
+        && initial_charset == REGEX_DEPENDS_CHARSET)
+    {
  
         /* Set to use unicode semantics if the pattern is in utf8 and has the
          * 'depends' charset specified, as it means unicode when utf8  */
         set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
+        RExC_uni_semantics = 1;
      }
  
      RExC_pm_flags = pm_flags;
@@ -7483,6 +7688,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      RExC_naughty = 0;
      RExC_npar = 1;
+    RExC_parens_buf_size = 0;
      RExC_emit_start = RExC_rxi->program;
      pRExC_state->code_index = 0;
  
@@ -7492,9 +7698,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      /* Do the parse */
      if (reg(pRExC_state, 0, &flags, 1)) {
  
-        /* Success!, But if RExC_total_parens < 0, we need to redo the parse
-         * knowing how many parens there actually are */
-        if (RExC_total_parens < 0) {
+        /* Success!, But we may need to redo the parse knowing how many parens
+         * there actually are */
+        if (IN_PARENS_PASS) {
              flags |= RESTART_PARSE;
          }
  
@@ -7536,7 +7742,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
          }
  
-        if (RExC_total_parens > 0) {
+        if (ALL_PARENS_COUNTED) {
              /* Make enough room for all the known parens, and zero it */
              Renew(RExC_open_parens, RExC_total_parens, regnode_offset);
              Zero(RExC_open_parens, RExC_total_parens, regnode_offset);
@@ -8634,7 +8840,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
              /* It might be a forward reference; we can't fail until we
                  * know, by completing the parse to get all the groups, and
                  * then reparsing */
-            if (RExC_total_parens > 0)  {
+            if (ALL_PARENS_COUNTED)  {
                  vFAIL("Reference to nonexistent named group");
              }
              else {
@@ -8649,7 +8855,6 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  }
  
  #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
-    int num;                                                    \
      if (RExC_lastparse!=RExC_parse) {                           \
          Perl_re_printf( aTHX_  "%s",                            \
              Perl_pv_pretty(aTHX_ RExC_mysv1, RExC_parse,        \
@@ -8665,16 +8870,15 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
      } else                                                      \
          Perl_re_printf( aTHX_ "%16s","");                       \
                                                                  \
-    num=REG_NODE_NUM(REGNODE_p(RExC_emit));                     \
-    if (RExC_lastnum!=num)                                      \
-       Perl_re_printf( aTHX_ "|%4d", num);                      \
+    if (RExC_lastnum!=RExC_emit)                                \
+       Perl_re_printf( aTHX_ "|%4d", RExC_emit);                \
      else                                                        \
         Perl_re_printf( aTHX_ "|%4s","");                        \
      Perl_re_printf( aTHX_ "|%*s%-4s",                           \
          (int)((depth*2)), "",                                   \
          (funcname)                                              \
      );                                                          \
-    RExC_lastnum=num;                                           \
+    RExC_lastnum=RExC_emit;                                     \
      RExC_lastparse=RExC_parse;                                  \
  })
  
@@ -8958,9 +9162,7 @@ Perl__new_invlist(pTHX_ IV initial_size)
         initial_size = 10;
      }
  
-    /* Allocate the initial space */
      new_list = newSV_type(SVt_INVLIST);
-
      initialize_invlist_guts(new_list, initial_size);
  
      return new_list;
@@ -9216,100 +9418,6 @@ Perl__invlist_search(SV* const invlist, const UV cp)
  }
  
  void
-Perl__invlist_populate_swatch(SV* const invlist,
-                              const UV start, const UV end, U8* swatch)
-{
-    /* populates a swatch of a swash the same way swatch_get() does in utf8.c,
-     * but is used when the swash has an inversion list.  This makes this much
-     * faster, as it uses a binary search instead of a linear one.  This is
-     * intimately tied to that function, and perhaps should be in utf8.c,
-     * except it is intimately tied to inversion lists as well.  It assumes
-     * that <swatch> is all 0's on input */
-
-    UV current = start;
-    const IV len = _invlist_len(invlist);
-    IV i;
-    const UV * array;
-
-    PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH;
-
-    if (len == 0) { /* Empty inversion list */
-        return;
-    }
-
-    array = invlist_array(invlist);
-
-    /* Find which element it is */
-    i = _invlist_search(invlist, start);
-
-    /* We populate from <start> to <end> */
-    while (current < end) {
-        UV upper;
-
-       /* The inversion list gives the results for every possible code point
-        * after the first one in the list.  Only those ranges whose index is
-        * even are ones that the inversion list matches.  For the odd ones,
-        * and if the initial code point is not in the list, we have to skip
-        * forward to the next element */
-        if (i == -1 || ! ELEMENT_RANGE_MATCHES_INVLIST(i)) {
-            i++;
-            if (i >= len) { /* Finished if beyond the end of the array */
-                return;
-            }
-            current = array[i];
-           if (current >= end) {   /* Finished if beyond the end of what we
-                                      are populating */
-                if (LIKELY(end < UV_MAX)) {
-                    return;
-                }
-
-                /* We get here when the upper bound is the maximum
-                 * representable on the machine, and we are looking for just
-                 * that code point.  Have to special case it */
-                i = len;
-                goto join_end_of_list;
-            }
-        }
-        assert(current >= start);
-
-       /* The current range ends one below the next one, except don't go past
-        * <end> */
-        i++;
-        upper = (i < len && array[i] < end) ? array[i] : end;
-
-       /* Here we are in a range that matches.  Populate a bit in the 3-bit U8
-        * for each code point in it */
-        for (; current < upper; current++) {
-            const STRLEN offset = (STRLEN)(current - start);
-            swatch[offset >> 3] |= 1 << (offset & 7);
-        }
-
-      join_end_of_list:
-
-       /* Quit if at the end of the list */
-        if (i >= len) {
-
-           /* But first, have to deal with the highest possible code point on
-            * the platform.  The previous code assumes that <end> is one
-            * beyond where we want to populate, but that is impossible at the
-            * platform's infinity, so have to handle it specially */
-            if (UNLIKELY(end == UV_MAX && ELEMENT_RANGE_MATCHES_INVLIST(len-1)))
-           {
-                const STRLEN offset = (STRLEN)(end - start);
-                swatch[offset >> 3] |= 1 << (offset & 7);
-            }
-            return;
-        }
-
-       /* Advance to the next range, which will be for code points not in the
-        * inversion list */
-        current = array[i];
-    }
-
-    return;
-}
-
-void
  Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
                                           const bool complement_b, SV** output)
  {
@@ -10145,18 +10253,15 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
  SV*
  Perl_invlist_clone(pTHX_ SV* const invlist, SV* new_invlist)
  {
-
      /* Return a new inversion list that is a copy of the input one, which is
       * unchanged.  The new list will not be mortal even if the old one was. */
  
-    const STRLEN nominal_length = _invlist_len(invlist);    /* Why not +1 XXX */
+    const STRLEN nominal_length = _invlist_len(invlist);
      const STRLEN physical_length = SvCUR(invlist);
      const bool offset = *(get_invlist_offset_addr(invlist));
  
      PERL_ARGS_ASSERT_INVLIST_CLONE;
  
-    /* Need to allocate extra space to accommodate Perl's addition of a
-     * trailing NUL to SvPV's, since it thinks they are always strings */
      if (new_invlist == NULL) {
          new_invlist = _new_invlist(nominal_length);
      }
@@ -10304,7 +10409,7 @@ S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
-           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFINITY%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
                                            start, intra_range_delimiter,
                                                   inter_range_delimiter);
         }
@@ -10338,7 +10443,7 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
           [0] 0x000A .. 0x000D
           [2] 0x0085
           [4] 0x2028 .. 0x2029
-         [6] 0x3104 .. INFINITY
+         [6] 0x3104 .. INFTY
       * This means that the first range of code points matched by the list are
       * 0xA through 0xD; the second range contains only the single code point
       * 0x85, etc.  An inversion list is an array of UVs.  Two array elements
@@ -10362,7 +10467,7 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
             Perl_dump_indent(aTHX_ level, file,
-                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFINITY\n",
+                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFTY\n",
                                     indent, (UV)count, start);
         }
         else if (end != start) {
@@ -10388,13 +10493,30 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
       * identical.  The final argument, if TRUE, says to take the complement of
       * the second inversion list before doing the comparison */
  
-    const UV* array_a = invlist_array(a);
-    const UV* array_b = invlist_array(b);
-    UV len_a = _invlist_len(a);
+    const UV len_a = _invlist_len(a);
      UV len_b = _invlist_len(b);
  
+    const UV* array_a = NULL;
+    const UV* array_b = NULL;
+
      PERL_ARGS_ASSERT__INVLISTEQ;
  
+    /* This code avoids accessing the arrays unless it knows the length is
+     * non-zero */
+
+    if (len_a == 0) {
+        if (len_b == 0) {
+            return ! complement_b;
+        }
+    }
+    else {
+        array_a = invlist_array(a);
+    }
+
+    if (len_b != 0) {
+        array_b = invlist_array(b);
+    }
+
      /* If are to compare 'a' with the complement of b, set it
       * up so are looking at b's complement. */
      if (complement_b) {
@@ -10404,7 +10526,7 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
          if (len_b == 0) {
              return (len_a == 1 && array_a[0] == 0);
          }
-        else if (array_b[0] == 0) {
+        if (array_b[0] == 0) {
  
              /* Otherwise, to complement, we invert.  Here, the first element is
               * 0, just remove it.  To do this, we just pretend the array starts
@@ -10430,7 +10552,8 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  
  /*
   * As best we can, determine the characters that can match the start of
- * the given EXACTF-ish node.
+ * the given EXACTF-ish node.  This is for use in creating ssc nodes, so there
+ * can be false positive matches
   *
   * Returns the invlist as a new SV*; it is the caller's responsibility to
   * call SvREFCNT_dec() when done with it.
@@ -10438,6 +10561,7 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  STATIC SV*
  S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
  {
+    dVAR;
      const U8 * s = (U8*)STRING(node);
      SSize_t bytelen = STR_LEN(node);
      UV uc;
@@ -10462,9 +10586,14 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
          }
          else {
              /* Any Latin1 range character can potentially match any
-             * other depending on the locale */
+             * other depending on the locale, and in Turkic locales, U+130 and
+             * U+131 */
              if (OP(node) == EXACTFL) {
                  _invlist_union(invlist, PL_Latin1, &invlist);
+                invlist = add_cp_to_invlist(invlist,
+                                                LATIN_SMALL_LETTER_DOTLESS_I);
+                invlist = add_cp_to_invlist(invlist,
+                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
              }
              else {
                  /* But otherwise, it matches at least itself.  We can
@@ -10545,19 +10674,19 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
          }
          else {  /* Single char fold */
              unsigned int k;
-            unsigned int first_folds_to;
-            const unsigned int * remaining_folds_to_list;
-            Size_t folds_to_count;
+            unsigned int first_fold;
+            const unsigned int * remaining_folds;
+            Size_t folds_count;
  
              /* It matches itself */
              invlist = add_cp_to_invlist(invlist, fc);
  
              /* ... plus all the things that fold to it, which are found in
               * PL_utf8_foldclosures */
-            folds_to_count = _inverse_folds(fc, &first_folds_to,
-                                                &remaining_folds_to_list);
-            for (k = 0; k < folds_to_count; k++) {
-                UV c = (k == 0) ? first_folds_to : remaining_folds_to_list[k-1];
+            folds_count = _inverse_folds(fc, &first_fold,
+                                                &remaining_folds);
+            for (k = 0; k < folds_count; k++) {
+                UV c = (k == 0) ? first_fold : remaining_folds[k-1];
  
                  /* /aa doesn't allow folds between ASCII and non- */
                  if (   (OP(node) == EXACTFAA || OP(node) == EXACTFAA_NO_TRIE)
@@ -10568,6 +10697,26 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
  
                  invlist = add_cp_to_invlist(invlist, c);
              }
+
+            if (OP(node) == EXACTFL) {
+
+                /* If either [iI] are present in an EXACTFL node the above code
+                 * should have added its normal case pair, but under a Turkish
+                 * locale they could match instead the case pairs from it.  Add
+                 * those as potential matches as well */
+                if (isALPHA_FOLD_EQ(fc, 'I')) {
+                    invlist = add_cp_to_invlist(invlist,
+                                                LATIN_SMALL_LETTER_DOTLESS_I);
+                    invlist = add_cp_to_invlist(invlist,
+                                        LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                }
+                else if (fc == LATIN_SMALL_LETTER_DOTLESS_I) {
+                    invlist = add_cp_to_invlist(invlist, 'I');
+                }
+                else if (fc == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+                    invlist = add_cp_to_invlist(invlist, 'i');
+                }
+            }
          }
      }
  
@@ -10613,16 +10762,18 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
          RExC_parse++;
          has_use_defaults = TRUE;
          STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
-        set_regex_charset(&RExC_flags, (RExC_uni_semantics)
-                                        ? REGEX_UNICODE_CHARSET
-                                        : REGEX_DEPENDS_CHARSET);
+        cs = (RExC_uni_semantics)
+             ? REGEX_UNICODE_CHARSET
+             : REGEX_DEPENDS_CHARSET;
+        set_regex_charset(&RExC_flags, cs);
      }
-
-    cs = get_regex_charset(RExC_flags);
-    if (cs == REGEX_DEPENDS_CHARSET
-        && (RExC_uni_semantics))
-    {
-        cs = REGEX_UNICODE_CHARSET;
+    else {
+        cs = get_regex_charset(RExC_flags);
+        if (   cs == REGEX_DEPENDS_CHARSET
+            && RExC_uni_semantics)
+        {
+            cs = REGEX_UNICODE_CHARSET;
+        }
      }
  
      while (RExC_parse < RExC_end) {
@@ -10881,6 +11032,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      I32 freeze_paren = 0;
      I32 after_freeze = 0;
      I32 num; /* numeric backreferences */
+    SV * max_open;  /* Max number of unclosed parens */
  
      char * parse_start = RExC_parse; /* MJD */
      char * const oregcomp_parse = RExC_parse;
@@ -10890,6 +11042,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      PERL_ARGS_ASSERT_REG;
      DEBUG_PARSE("reg ");
  
+
+    max_open = get_sv(RE_COMPILE_RECURSION_LIMIT, GV_ADD);
+    assert(max_open);
+    if (!SvIOK(max_open)) {
+        sv_setiv(max_open, RE_COMPILE_RECURSION_INIT);
+    }
+    if (depth > 4 * SvIV(max_open)) { /* We increase depth by 4 for each open
+                                         paren */
+        vFAIL("Too many nested open parens");
+    }
+
      *flagp = 0;                                /* Tentatively. */
  
      /* Having this true makes it feasible to have a lot fewer tests for the
@@ -11475,7 +11638,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          /* It might be a forward reference; we can't fail until
                           * we know, by completing the parse to get all the
                           * groups, and then reparsing */
-                        if (RExC_total_parens > 0)  {
+                        if (ALL_PARENS_COUNTED)  {
                              RExC_parse++;
                              vFAIL("Reference to nonexistent group");
                          }
@@ -11501,7 +11664,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      /* It might be a forward reference; we can't fail until we
                       * know, by completing the parse to get all the groups, and
                       * then reparsing */
-                    if (RExC_total_parens > 0)  {
+                    if (ALL_PARENS_COUNTED)  {
                          if (num >= RExC_total_parens) {
                              RExC_parse++;
                              vFAIL("Reference to nonexistent group");
@@ -11832,34 +11995,47 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
           capturing_parens:
             parno = RExC_npar;
             RExC_npar++;
-            if (RExC_total_parens <= 0) {
+            if (! ALL_PARENS_COUNTED) {
                  /* If we are in our first pass through (and maybe only pass),
                   * we  need to allocate memory for the capturing parentheses
-                 * data structures.  Since we start at npar=1, when it reaches
-                 * 2, for the first time it has something to put in it.  Above
-                 * 2 means we extend what we already have */
-                if (RExC_npar == 2) {
+                 * data structures.
+                 */
+
+                if (!RExC_parens_buf_size) {
+                    /* first guess at number of parens we might encounter */
+                    RExC_parens_buf_size = 10;
+
                      /* setup RExC_open_parens, which holds the address of each
                       * OPEN tag, and to make things simpler for the 0 index the
                       * start of the program - this is used later for offsets */
-                    Newxz(RExC_open_parens, RExC_npar, regnode_offset);
+                    Newxz(RExC_open_parens, RExC_parens_buf_size,
+                            regnode_offset);
                      RExC_open_parens[0] = 1;    /* +1 for REG_MAGIC */
  
                      /* setup RExC_close_parens, which holds the address of each
                       * CLOSE tag, and to make things simpler for the 0 index
                       * the end of the program - this is used later for offsets
                       * */
-                    Newxz(RExC_close_parens, RExC_npar, regnode_offset);
+                    Newxz(RExC_close_parens, RExC_parens_buf_size,
+                            regnode_offset);
                      /* we dont know where end op starts yet, so we dont need to
                       * set RExC_close_parens[0] like we do RExC_open_parens[0]
                       * above */
                  }
-                else {
-                    Renew(RExC_open_parens, RExC_npar, regnode_offset);
-                    Zero(RExC_open_parens + RExC_npar - 1, 1, regnode_offset);
+                else if (RExC_npar > RExC_parens_buf_size) {
+                    I32 old_size = RExC_parens_buf_size;
  
-                    Renew(RExC_close_parens, RExC_npar, regnode_offset);
-                    Zero(RExC_close_parens + RExC_npar - 1, 1, regnode_offset);
+                    RExC_parens_buf_size *= 2;
+
+                    Renew(RExC_open_parens, RExC_parens_buf_size,
+                            regnode_offset);
+                    Zero(RExC_open_parens + old_size,
+                            RExC_parens_buf_size - old_size, regnode_offset);
+
+                    Renew(RExC_close_parens, RExC_parens_buf_size,
+                            regnode_offset);
+                    Zero(RExC_close_parens + old_size,
+                            RExC_parens_buf_size - old_size, regnode_offset);
                  }
              }
  
@@ -11871,7 +12047,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
                      "%*s%*s Setting open paren #%" IVdf " to %d\n",
                      22, "|    |", (int)(depth * 2 + 1), "",
-                    (IV)parno, REG_NODE_NUM(REGNODE_p(ret))));
+                    (IV)parno, ret));
                  RExC_open_parens[parno]= ret;
              }
  
@@ -11941,7 +12117,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              RETURN_FAIL_ON_RESTART(flags, flagp);
              FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
          }
-        REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
+        if (!  REGTAIL(pRExC_state, lastbr, br)) {  /* BRANCH -> BRANCH. */
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
         lastbr = br;
         *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
      }
@@ -11960,7 +12138,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
                          "%*s%*s Setting close paren #%" IVdf " to %d\n",
                          22, "|    |", (int)(depth * 2 + 1), "",
-                        (IV)parno, REG_NODE_NUM(REGNODE_p(ender))));
+                        (IV)parno, ender));
                  RExC_close_parens[parno]= ender;
                 if (RExC_nestroot == parno)
                     RExC_nestroot = 0;
@@ -11994,7 +12172,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
                      "%*s%*s Setting close paren #0 (END) to %d\n",
                      22, "|    |", (int)(depth * 2 + 1), "",
-                    REG_NODE_NUM(REGNODE_p(ender))));
+                    ender));
  
                  RExC_close_parens[0]= ender;
              }
@@ -12006,13 +12184,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              regprop(RExC_rx, RExC_mysv2, REGNODE_p(ender), NULL, pRExC_state);
              Perl_re_printf( aTHX_  "~ tying lastbr %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n",
                            SvPV_nolen_const(RExC_mysv1),
-                          (IV)REG_NODE_NUM(REGNODE_p(lastbr)),
+                          (IV)lastbr,
                            SvPV_nolen_const(RExC_mysv2),
-                          (IV)REG_NODE_NUM(REGNODE_p(ender)),
+                          (IV)ender,
                            (IV)(ender - lastbr)
              );
          );
-        REGTAIL(pRExC_state, lastbr, ender);
+        if (! REGTAIL(pRExC_state, lastbr, ender)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
  
         if (have_branch) {
              char is_nothing= 1;
@@ -12023,9 +12203,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             for (br = REGNODE_p(ret); br; br = regnext(br)) {
                 const U8 op = PL_regkind[OP(br)];
                 if (op == BRANCH) {
-                    REGTAIL_STUDY(pRExC_state,
-                                  REGNODE_OFFSET(NEXTOPER(br)),
-                                  ender);
+                    if (! REGTAIL_STUDY(pRExC_state,
+                                        REGNODE_OFFSET(NEXTOPER(br)),
+                                        ender))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                      if ( OP(NEXTOPER(br)) != NOTHING
                           || regnext(NEXTOPER(br)) != REGNODE_p(ender))
                          is_nothing= 0;
@@ -12056,7 +12239,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                    SvPV_nolen_const(RExC_mysv1),
                                    (IV)REG_NODE_NUM(ret_as_regnode),
                                    SvPV_nolen_const(RExC_mysv2),
-                                  (IV)REG_NODE_NUM(REGNODE_p(ender)),
+                                  (IV)ender,
                                    (IV)(ender - ret)
                      );
                  );
@@ -12092,14 +12275,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
             Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
             FLAGS(REGNODE_p(ret)) = flag;
-            REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
+            if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
+            {
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
         }
      }
  
      /* Check for proper termination. */
      if (paren) {
-        /* restore original flags, but keep (?p) and, if we've changed from /d
-         * rules to /u, keep the /u */
+        /* restore original flags, but keep (?p) and, if we've encountered
+         * something in the parse that changes /d rules into /u, keep the /u */
         RExC_flags = oregflags | (RExC_flags & RXf_PMf_KEEPCOPY);
          if (DEPENDS_SEMANTICS && RExC_uni_semantics) {
              set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
@@ -12186,14 +12372,12 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
             /* FIXME adding one for every branch after the first is probably
              * excessive now we have TRIE support. (hv) */
             MARK_NAUGHTY(1);
-            if (     chain > (SSize_t) BRANCH_MAX_OFFSET
-                && ! RExC_use_BRANCHJ)
-            {
+            if (! REGTAIL(pRExC_state, chain, latest)) {
                  /* XXX We could just redo this branch, but figuring out what
-                 * bookkeeping needs to be reset is a pain */
+                 * bookkeeping needs to be reset is a pain, and it's likely
+                 * that other branches that goto END will also be too large */
                  REQUIRE_BRANCHJ(flagp, 0);
              }
-            REGTAIL(pRExC_state, chain, latest);
         }
         chain = latest;
         c++;
@@ -12493,20 +12677,23 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
    * points) that this \N sequence matches.  This is set, and the input is
    * parsed for errors, even if the function returns FALSE, as detailed below.
    *
-  * There are 5 possibilities here, as detailed in the next 5 paragraphs.
+  * There are 6 possibilities here, as detailed in the next 6 paragraphs.
    *
    * Probably the most common case is for the \N to specify a single code point.
    * *cp_count will be set to 1, and *code_point_p will be set to that code
    * point.
    *
-  * Another possibility is for the input to be an empty \N{}, which for
-  * backwards compatibility we accept.  *cp_count will be set to 0. *node_p
-  * will be set to a generated NOTHING node.
+  * Another possibility is for the input to be an empty \N{}.  This is no
+  * longer accepted, and will generate a fatal error.
+  *
+  * Another possibility is for a custom charnames handler to be in effect which
+  * translates the input name to an empty string.  *cp_count will be set to 0.
+  * *node_p will be set to a generated NOTHING node.
    *
    * Still another possibility is for the \N to mean [^\n]. *cp_count will be
    * set to 0. *node_p will be set to a generated REG_ANY node.
    *
-  * The fourth possibility is that \N resolves to a sequence of more than one
+  * The fifth possibility is that \N resolves to a sequence of more than one
    * code points.  *cp_count will be set to the number of code points in the
    * sequence. *node_p will be set to a generated node returned by this
    * function calling S_reg().
@@ -12514,7 +12701,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
    * The final possibility is that it is premature to be calling this function;
    * the parse needs to be restarted.  This can happen when this changes from
    * /d to /u rules, or when the pattern needs to be upgraded to UTF-8.  The
-  * latter occurs only when the fourth possibility would otherwise be in
+  * latter occurs only when the fifth possibility would otherwise be in
    * effect, and is because one of those code points requires the pattern to be
    * recompiled as UTF-8.  The function returns FALSE, and sets the
    * RESTART_PARSE and NEED_UTF8 flags in *flagp, as appropriate.  When this
@@ -12531,12 +12718,11 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
    * so we need a way to take a snapshot of what they resolve to at the time of
    * the original parse. [perl #56444].
    *
-  * That parsing is skipped for single-quoted regexes, so we may here get
-  * '\N{NAME}'.  This is a fatal error.  These names have to be resolved by the
-  * parser.  But if the single-quoted regex is something like '\N{U+41}', that
-  * is legal and handled here.  The code point is Unicode, and has to be
-  * translated into the native character set for non-ASCII platforms.
-  */
+  * That parsing is skipped for single-quoted regexes, so here we may get
+  * '\N{NAME}', which is parsed now.  If the single-quoted regex is something
+  * like '\N{U+41}', that code point is Unicode, and has to be translated into
+  * the native character set for non-ASCII platforms.  The other possibilities
+  * are already native, so no translation is done. */
  
      char * endbrace;    /* points to '}' following the name */
      char* p = RExC_parse; /* Temporary */
@@ -12545,7 +12731,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      char *orig_end;
      char *save_start;
      I32 flags;
-    Size_t count = 0;   /* code point count kept internally by this function */
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -12568,7 +12753,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      /* Disambiguate between \N meaning a named character versus \N meaning
       * [^\n].  The latter is assumed when the {...} following the \N is a legal
-     * quantifier, or there is no '{' at all */
+     * quantifier, or if there is no '{' at all */
      if (*p != '{' || regcurly(p)) {
          RExC_parse = p;
          if (cp_count) {
@@ -12601,15 +12786,19 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          vFAIL2("Missing right brace on \\%c{}", 'N');
      }
  
-    /* Here, we have decided it should be a named character or sequence */
-    REQUIRE_UNI_RULES(flagp, FALSE); /* Unicode named chars imply Unicode
-                                        semantics */
+    /* Here, we have decided it should be a named character or sequence.  These
+     * imply Unicode semantics */
+    REQUIRE_UNI_RULES(flagp, FALSE);
  
-    if (endbrace == RExC_parse) {   /* empty: \N{} */
+    /* \N{_} is what toke.c returns to us to indicate a name that evaluates to
+     * nothing at all (not allowed under strict) */
+    if (endbrace - RExC_parse == 1 && *RExC_parse == '_') {
+        RExC_parse = endbrace;
          if (strict) {
              RExC_parse++;   /* Position after the "}" */
              vFAIL("Zero length \\N{}");
          }
+
          if (cp_count) {
              *cp_count = 0;
          }
@@ -12622,15 +12811,122 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          return TRUE;
      }
  
-    /* If we haven't got something that begins with 'U+', then it didn't get lexed. */
-    if (   endbrace - RExC_parse < 2
-        || strnNE(RExC_parse, "U+", 2))
-    {
-        RExC_parse = endbrace;  /* position msg's '<--HERE' */
-        vFAIL("\\N{NAME} must be resolved by the lexer");
-    }
+    if (endbrace - RExC_parse < 2 || ! strBEGINs(RExC_parse, "U+")) {
+
+        /* Here, the name isn't of the form  U+....  This can happen if the
+         * pattern is single-quoted, so didn't get evaluated in toke.c.  Now
+         * is the time to find out what the name means */
+
+        const STRLEN name_len = endbrace - RExC_parse;
+        SV *  value_sv;     /* What does this name evaluate to */
+        SV ** value_svp;
+        const U8 * value;   /* string of name's value */
+        STRLEN value_len;   /* and its length */
+
+        /*  RExC_unlexed_names is a hash of names that weren't evaluated by
+         *  toke.c, and their values. Make sure is initialized */
+        if (! RExC_unlexed_names) {
+            RExC_unlexed_names = newHV();
+        }
+
+        /* If we have already seen this name in this pattern, use that.  This
+         * allows us to only call the charnames handler once per name per
+         * pattern.  A broken or malicious handler could return something
+         * different each time, which could cause the results to vary depending
+         * on if something gets added or subtracted from the pattern that
+         * causes the number of passes to change, for example */
+        if ((value_svp = hv_fetch(RExC_unlexed_names, RExC_parse,
+                                                      name_len, 0)))
+        {
+            value_sv = *value_svp;
+        }
+        else { /* Otherwise we have to go out and get the name */
+            const char * error_msg = NULL;
+            value_sv = get_and_check_backslash_N_name(RExC_parse, endbrace,
+                                                      UTF,
+                                                      &error_msg);
+            if (error_msg) {
+                RExC_parse = endbrace;
+                vFAIL(error_msg);
+            }
+
+            /* If no error message, should have gotten a valid return */
+            assert (value_sv);
+
+            /* Save the name's meaning for later use */
+            if (! hv_store(RExC_unlexed_names, RExC_parse, name_len,
+                           value_sv, 0))
+            {
+                Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
+            }
+        }
+
+        /* Here, we have the value the name evaluates to in 'value_sv' */
+        value = (U8 *) SvPV(value_sv, value_len);
+
+        /* See if the result is one code point vs 0 or multiple */
+        if (value_len > 0 && value_len <= ((SvUTF8(value_sv))
+                                           ? UTF8SKIP(value)
+                                           : 1))
+        {
+            /* Here, exactly one code point.  If that isn't what is wanted,
+             * fail */
+            if (! code_point_p) {
+                RExC_parse = p;
+                return FALSE;
+            }
+
+            /* Convert from string to numeric code point */
+            *code_point_p = (SvUTF8(value_sv))
+                            ? valid_utf8_to_uvchr(value, NULL)
+                            : *value;
+
+            /* Have parsed this entire single code point \N{...}.  *cp_count
+             * has already been set to 1, so don't do it again. */
+            RExC_parse = endbrace;
+            nextchar(pRExC_state);
+            return TRUE;
+        } /* End of is a single code point */
+
+        /* Count the code points, if caller desires.  The API says to do this
+         * even if we will later return FALSE */
+        if (cp_count) {
+            *cp_count = 0;
+
+            *cp_count = (SvUTF8(value_sv))
+                        ? utf8_length(value, value + value_len)
+                        : value_len;
+        }
+
+        /* Fail if caller doesn't want to handle a multi-code-point sequence.
+         * But don't back the pointer up if the caller wants to know how many
+         * code points there are (they need to handle it themselves in this
+         * case).  */
+        if (! node_p) {
+            if (! cp_count) {
+                RExC_parse = p;
+            }
+            return FALSE;
+        }
+
+        /* Convert this to a sub-pattern of the form "(?: ... )", and then call
+         * reg recursively to parse it.  That way, it retains its atomicness,
+         * while not having to worry about any special handling that some code
+         * points may have. */
  
-        /* This code purposely indented below because of future changes coming */
+        substitute_parse = newSVpvs("?:");
+        sv_catsv(substitute_parse, value_sv);
+        sv_catpv(substitute_parse, ")");
+
+#ifdef EBCDIC
+        /* The value should already be native, so no need to convert on EBCDIC
+         * platforms.*/
+        assert(! RExC_recode_x_to_native);
+#endif
+
+    }
+    else {   /* \N{U+...} */
+        Size_t count = 0;   /* code point count kept internally */
  
          /* We can get to here when the input is \N{U+...} or when toke.c has
           * converted a name to the \N{U+...} form.  This include changing a
@@ -12765,6 +13061,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          RExC_recode_x_to_native = 1;
  #endif
  
+    }
+
      /* Here, we have the string the name evaluates to, ready to be parsed,
       * stored in 'substitute_parse' as a series of valid "\x{...}\x{...}"
       * constructs.  This can be called from within a substitute parse already.
@@ -12828,185 +13126,24 @@ S_compute_EXACTish(RExC_state_t *pRExC_state)
      return op + EXACTF;
  }
  
-PERL_STATIC_INLINE void
-S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
-                         regnode_offset node, I32* flagp, STRLEN len,
-                         UV code_point, bool downgradable)
+STATIC bool
+S_new_regcurly(const char *s, const char *e)
  {
-    /* This knows the details about sizing an EXACTish node, setting flags for
-     * it (by setting <*flagp>, and potentially populating it with a single
-     * character.
-     *
-     * If <len> (the length in bytes) is non-zero, this function assumes that
-     * the node has already been populated, and just does the sizing.  In this
-     * case <code_point> should be the final code point that has already been
-     * placed into the node.  This value will be ignored except that under some
-     * circumstances <*flagp> is set based on it.
-     *
-     * If <len> is zero, the function assumes that the node is to contain only
-     * the single character given by <code_point> and calculates what <len>
-     * should be.  It populates the node's STRING with <code_point> or its
-     * fold if folding.
-     *
-     * In both cases <*flagp> is appropriately set
-     *
-     * It knows that under FOLD, the Latin Sharp S and UTF characters above
-     * 255, must be folded (the former only when the rules indicate it can
-     * match 'ss')
+    /* This is a temporary function designed to match the most lenient form of
+     * a {m,n} quantifier we ever envision, with either number omitted, and
+     * spaces anywhere between/before/after them.
       *
-     * When it does the populating, it looks at the flag 'downgradable'.  If
-     * true with a node that folds, it checks if the single code point
-     * participates in a fold, and if not downgrades the node to an EXACT.
-     * This helps the optimizer */
-
-    bool len_passed_in = cBOOL(len != 0);
-    U8 character[UTF8_MAXBYTES_CASE+1];
-
-    PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
-
-    if (! len_passed_in) {
-        if (UTF) {
-            if (UVCHR_IS_INVARIANT(code_point)) {
-                if (LOC || ! FOLD) {    /* /l defers folding until runtime */
-                    *character = (U8) code_point;
-                }
-                else { /* Here is /i and not /l. */
-                    *character = toFOLD((U8) code_point);
-
-                    /* We can downgrade to an EXACT node if this character
-                     * isn't a folding one.  Note that this assumes that
-                     * nothing above Latin1 folds to some other invariant than
-                     * one of these alphabetics; otherwise we would also have
-                     * to check:
-                     *  && (! HAS_NONLATIN1_FOLD_CLOSURE(code_point)
-                     *      || ASCII_FOLD_RESTRICTED))
-                     */
-                    if (downgradable && PL_fold[code_point] == code_point) {
-                        OP(REGNODE_p(node)) = EXACT;
-                    }
-                }
-                len = 1;
-            }
-            else if (FOLD && (   ! LOC
-                              || ! is_PROBLEMATIC_LOCALE_FOLD_cp(code_point)))
-            {   /* Folding, and ok to do so now */
-                UV folded = _to_uni_fold_flags(
-                                   code_point,
-                                   character,
-                                   &len,
-                                   FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
-                                                      ? FOLD_FLAGS_NOMIX_ASCII
-                                                      : 0));
-                if (downgradable
-                    && folded == code_point /* This quickly rules out many
-                                               cases, avoiding the
-                                               _invlist_contains_cp() overhead
-                                               for those.  */
-                    && ! _invlist_contains_cp(PL_utf8_foldable, code_point))
-                {
-                    OP(REGNODE_p(node)) = (LOC)
-                               ? EXACTL
-                               : EXACT;
-                }
-            }
-            else if (code_point <= MAX_UTF8_TWO_BYTE) {
+     * If this function fails, then the string it matches is very unlikely to
+     * ever be considered a valid quantifier, so we can allow the '{' that
+     * begins it to be considered as a literal */
  
-                /* Not folding this cp, and can output it directly */
-                *character = UTF8_TWO_BYTE_HI(code_point);
-                *(character + 1) = UTF8_TWO_BYTE_LO(code_point);
-                len = 2;
-            }
-            else {
-                uvchr_to_utf8( character, code_point);
-                len = UTF8SKIP(character);
-            }
-        } /* Else pattern isn't UTF8.  */
-        else if (! FOLD) {
-            *character = (U8) code_point;
-            len = 1;
-        } /* Else is folded non-UTF8 */
-#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
-   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
-                                      || UNICODE_DOT_DOT_VERSION > 0)
-        else if (LIKELY(code_point != LATIN_SMALL_LETTER_SHARP_S)) {
-#else
-        else if (1) {
-#endif
-            /* We don't fold any non-UTF8 except possibly the Sharp s  (see
-             * comments at join_exact()); */
-            *character = (U8) code_point;
-            len = 1;
-
-            /* Can turn into an EXACT node if we know the fold at compile time,
-             * and it folds to itself and doesn't particpate in other folds */
-            if (downgradable
-                && ! LOC
-                && PL_fold_latin1[code_point] == code_point
-                && (! HAS_NONLATIN1_FOLD_CLOSURE(code_point)
-                    || (isASCII(code_point) && ASCII_FOLD_RESTRICTED)))
-            {
-                OP(REGNODE_p(node)) = EXACT;
-            }
-        } /* else is Sharp s.  May need to fold it */
-        else if (AT_LEAST_UNI_SEMANTICS && ! ASCII_FOLD_RESTRICTED) {
-            *character = 's';
-            *(character + 1) = 's';
-            len = 2;
-        }
-        else {
-            *character = LATIN_SMALL_LETTER_SHARP_S;
-            len = 1;
-        }
-    }
+    bool has_min = FALSE;
+    bool has_max = FALSE;
  
-    if (downgradable) {
-        change_engine_size(pRExC_state, STR_SZ(len));
-    }
+    PERL_ARGS_ASSERT_NEW_REGCURLY;
  
-    RExC_emit += STR_SZ(len);
-    STR_LEN(REGNODE_p(node)) = len;
-    if (! len_passed_in) {
-        Copy((char *) character, STRING(REGNODE_p(node)), len, char);
-    }
-
-    *flagp |= HASWIDTH;
-
-    /* A single character node is SIMPLE, except for the special-cased SHARP S
-     * under /di. */
-    if ((len == 1 || (UTF && len == UVCHR_SKIP(code_point)))
-#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
-   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
-                                      || UNICODE_DOT_DOT_VERSION > 0)
-        && ( code_point != LATIN_SMALL_LETTER_SHARP_S
-            || ! FOLD || ! DEPENDS_SEMANTICS)
-#endif
-    ) {
-        *flagp |= SIMPLE;
-    }
-
-    if (OP(REGNODE_p(node)) == EXACTFL) {
-        RExC_contains_locale = 1;
-    }
-}
-
-STATIC bool
-S_new_regcurly(const char *s, const char *e)
-{
-    /* This is a temporary function designed to match the most lenient form of
-     * a {m,n} quantifier we ever envision, with either number omitted, and
-     * spaces anywhere between/before/after them.
-     *
-     * If this function fails, then the string it matches is very unlikely to
-     * ever be considered a valid quantifier, so we can allow the '{' that
-     * begins it to be considered as a literal */
-
-    bool has_min = FALSE;
-    bool has_max = FALSE;
-
-    PERL_ARGS_ASSERT_NEW_REGCURLY;
-
-    if (s >= e || *s++ != '{')
-       return FALSE;
+    if (s >= e || *s++ != '{')
+       return FALSE;
  
      while (s < e && isSPACE(*s)) {
          s++;
@@ -13122,6 +13259,7 @@ S_backref_value(char *p, char *e)
  STATIC regnode_offset
  S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
+    dVAR;
      regnode_offset ret = 0;
      I32 flags = 0;
      char *parse_start;
@@ -13294,25 +13432,17 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* FALLTHROUGH */
         case 'b':
            {
+            U8 flags = 0;
             regex_charset charset = get_regex_charset(RExC_flags);
  
             RExC_seen_zerolen++;
              RExC_seen |= REG_LOOKBEHIND_SEEN;
             op = BOUND + charset;
  
-            if (op == BOUND) {
-                RExC_seen_d_op = TRUE;
-            }
-            else if (op == BOUNDL) {
-                RExC_contains_locale = 1;
-            }
-
-           ret = reg_node(pRExC_state, op);
-           *flagp |= SIMPLE;
             if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
-                FLAGS(REGNODE_p(ret)) = TRADITIONAL_BOUND;
+                flags = TRADITIONAL_BOUND;
                  if (op > BOUNDA) {  /* /aa is same as /a */
-                    OP(REGNODE_p(ret)) = BOUNDA;
+                    op = BOUNDA;
                  }
              }
              else {
@@ -13320,9 +13450,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  char name = *RExC_parse;
                  char * endbrace = NULL;
                  RExC_parse += 2;
-                if (RExC_parse < RExC_end) {
-                    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
-                }
+                endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
  
                  if (! endbrace) {
                      vFAIL2("Missing right brace on \\%c{}", name);
@@ -13348,25 +13476,25 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          {
                              goto bad_bound_type;
                          }
-                        FLAGS(REGNODE_p(ret)) = GCB_BOUND;
+                        flags = GCB_BOUND;
                          break;
                      case 'l':
                          if (length != 2 || *(RExC_parse + 1) != 'b') {
                              goto bad_bound_type;
                          }
-                        FLAGS(REGNODE_p(ret)) = LB_BOUND;
+                        flags = LB_BOUND;
                          break;
                      case 's':
                          if (length != 2 || *(RExC_parse + 1) != 'b') {
                              goto bad_bound_type;
                          }
-                        FLAGS(REGNODE_p(ret)) = SB_BOUND;
+                        flags = SB_BOUND;
                          break;
                      case 'w':
                          if (length != 2 || *(RExC_parse + 1) != 'b') {
                              goto bad_bound_type;
                          }
-                        FLAGS(REGNODE_p(ret)) = WB_BOUND;
+                        flags = WB_BOUND;
                          break;
                      default:
                        bad_bound_type:
@@ -13379,8 +13507,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  RExC_parse = endbrace;
                  REQUIRE_UNI_RULES(flagp, 0);
  
-                if (op >= BOUNDA) {  /* /aa is same as /a */
-                    OP(REGNODE_p(ret)) = BOUNDU;
+                if (op == BOUND) {
+                    op = BOUNDU;
+                }
+                else if (op >= BOUNDA) {  /* /aa is same as /a */
+                    op = BOUNDU;
                      length += 4;
  
                      /* Don't have to worry about UTF-8, in this message because
@@ -13395,9 +13526,22 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  }
             }
  
+            if (op == BOUND) {
+                RExC_seen_d_op = TRUE;
+            }
+            else if (op == BOUNDL) {
+                RExC_contains_locale = 1;
+            }
+
              if (invert) {
-                OP(REGNODE_p(ret)) += NBOUND - BOUND;
+                op += NBOUND - BOUND;
              }
+
+           ret = reg_node(pRExC_state, op);
+            FLAGS(REGNODE_p(ret)) = flags;
+
+           *flagp |= SIMPLE;
+
             goto finish_meta_pat;
            }
  
@@ -13651,7 +13795,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      /* It might be a forward reference; we can't fail until we
                       * know, by completing the parse to get all the groups, and
                       * then reparsing */
-                    if (RExC_total_parens > 0)  {
+                    if (ALL_PARENS_COUNTED)  {
                          if (num >= RExC_total_parens)  {
                              vFAIL("Reference to nonexistent group");
                          }
@@ -13753,18 +13897,27 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* We can convert EXACTF nodes to EXACTFU if they contain only
               * characters that match identically regardless of the target
               * string's UTF8ness.  The reason to do this is that EXACTF is not
-             * trie-able, EXACTFU is.
+             * trie-able, EXACTFU is, and EXACTFU requires fewer operations at
+             * runtime.
               *
               * Similarly, we can convert EXACTFL nodes to EXACTFLU8 if they
               * contain only above-Latin1 characters (hence must be in UTF8),
               * which don't participate in folds with Latin1-range characters,
               * as the latter's folds aren't known until runtime. */
-            bool maybe_exactfu = FOLD;
+            bool maybe_exactfu = FOLD && (DEPENDS_SEMANTICS || LOC);
+
+            /* Single-character EXACTish nodes are almost always SIMPLE.  This
+             * allows us to override this as encountered */
+            U8 maybe_SIMPLE = SIMPLE;
  
              /* Does this node contain something that can't match unless the
               * target string is (also) in UTF-8 */
              bool requires_utf8_target = FALSE;
  
+            /* The sequence 'ss' is problematic in non-UTF-8 patterns. */
+            bool has_ss = FALSE;
+
+            /* So is the MICRO SIGN */
              bool has_micro_sign = FALSE;
  
              /* Allocate an EXACT node.  The node_type may change below to
@@ -13794,7 +13947,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                     || UTF8_IS_START(UCHARAT(RExC_parse)));
  
-
              /* Here, we have a literal character.  Find the maximal string of
               * them in the input that we can fit into a single EXACTish node.
               * We quit at the first non-literal or when the node gets full, or
@@ -13902,7 +14054,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              if (! maybe_exactfu) {
                                  len = 0;
                                  s = s0;
-                                maybe_exactfu = FOLD;   /* Prob. unnecessary */
                                  goto reparse;
                              }
                          }
@@ -14163,13 +14314,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * existing node, so can start a new node with this one */
                      if (! len) {
                          node_type = EXACTFL;
+                        RExC_contains_locale = 1;
                      }
                      else if (node_type == EXACT) {
                          p = oldp;
                          goto loopdone;
                      }
  
-                    /* This code point means we can't simplify things */
+                    /* This problematic code point means we can't simplify
+                     * things */
                      maybe_exactfu = FALSE;
  
                      /* Here, we are adding a problematic fold character.
@@ -14185,7 +14338,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  else /* regular fold; see if actually is in a fold */
                       if (   (ender < 256 && ! IS_IN_SOME_FOLD_L1(ender))
                           || (ender > 255
-                            && ! _invlist_contains_cp(PL_utf8_foldable, ender)))
+                            && ! _invlist_contains_cp(PL_in_some_fold, ender)))
                  {
                      /* Here, folding, but the character isn't in a fold.
                       *
@@ -14214,7 +14367,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          goto loopdone;
                      }
  
-                    if (UTF) {  /* For UTF-8, we add the folded value */
+                    if (UTF) {  /* Use the folded value */
                          if (UVCHR_IS_INVARIANT(ender)) {
                              *(s)++ = (U8) toFOLD(ender);
                          }
@@ -14228,19 +14381,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                                      : 0));
                              s += added_len;
  
-                            if (ender > 255)  {
+                            if (   ender > 255
+                                && LIKELY(ender != GREEK_SMALL_LETTER_MU))
+                            {
+                                /* U+B5 folds to the MU, so its possible for a
+                                 * non-UTF-8 target to match it */
                                  requires_utf8_target = TRUE;
-                                if (UNLIKELY(ender == GREEK_SMALL_LETTER_MU)) {
-                                    has_micro_sign = TRUE;
-                                }
                              }
                          }
                      }
                      else {
  
-                        /* Here is non-UTF8; we don't normally store the folded
-                         * value.  First, see if the character's fold differs
-                         * between /d and /u. */
+                        /* Here is non-UTF8.  First, see if the character's
+                         * fold differs between /d and /u. */
                          if (PL_fold[ender] != PL_fold_latin1[ender]) {
                              maybe_exactfu = FALSE;
                          }
@@ -14252,16 +14405,35 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          /* On non-ancient Unicode versions, this includes the
                           * multi-char fold SHARP S to 'ss' */
  
-                        else if (UNLIKELY(   ender == LATIN_SMALL_LETTER_SHARP_S
-                                          || (   len
-                                              && isALPHA_FOLD_EQ(ender, 's')
-                                              && isALPHA_FOLD_EQ(*(s-1), 's'))))
+                        if (   UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)
+                                 || (   isALPHA_FOLD_EQ(ender, 's')
+                                     && len > 0
+                                     && isALPHA_FOLD_EQ(*(s-1), 's')))
                          {
+                            /* Here, we have one of the following:
+                             *  a)  a SHARP S.  This folds to 'ss' only under
+                             *      /u rules.  If we are in that situation,
+                             *      fold the SHARP S to 'ss'.  See the comments
+                             *      for join_exact() as to why we fold this
+                             *      non-UTF at compile time, and no others.
+                             *  b)  'ss'.  When under /u, there's nothing
+                             *      special needed to be done here.  The
+                             *      previous iteration handled the first 's',
+                             *      and this iteration will handle the second.
+                             *      If, on the otherhand it's not /u, we have
+                             *      to exclude the possibility of moving to /u,
+                             *      so that we won't generate an unwanted
+                             *      match, unless, at runtime, the target
+                             *      string is in UTF-8.
+                             * */
  
-                            if (node_type == EXACTFU) {
-                                /* See comments for join_exact() as to why we
-                                 * fold this non-UTF at compile time */
-                                if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                            has_ss = TRUE;
+                            maybe_exactfu = FALSE;  /* Can't generate an
+                                                       EXACTFU node (unless we
+                                                       already are in one) */
+                            if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                                maybe_SIMPLE = 0;
+                                if (node_type == EXACTFU) {
                                      *(s++) = 's';
  
                                      /* Let the code below add in the extra 's' */
@@ -14269,9 +14441,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                      added_len = 2;
                                  }
                              }
-                            else {
-                                maybe_exactfu = FALSE;
-                            }
                          }
  #endif
  
@@ -14279,10 +14448,18 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              has_micro_sign = TRUE;
                          }
  
-                        /* Even when folding, we store just the input
-                         * character, as we have an array that finds its fold
-                         * quickly */
-                        *(s++) = (char) ender;
+                        *(s++) = (char) (DEPENDS_SEMANTICS)
+                                        ? toFOLD(ender)
+
+                                          /* Under /u, the fold of any
+                                           * character in the 0-255 range
+                                           * happens to be its lowercase
+                                           * equivalent, except for LATIN SMALL
+                                           * LETTER SHARP S, which was handled
+                                           * above, and the MICRO SIGN, whose
+                                           * fold requires UTF-8 to represent.
+                                           * */
+                                        : toLOWER_L1(ender);
                      }
                 } /* End of adding current character to the node */
  
@@ -14320,14 +14497,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
               * identifies, so when it is set to less than the full node, we can
               * skip the rest of this */
              if (FOLD && p < RExC_end && upper_parse == MAX_NODE_STRING_SIZE) {
+                PERL_UINT_FAST8_T backup_count = 0;
  
                  const STRLEN full_len = len;
  
                 assert(len >= MAX_NODE_STRING_SIZE);
  
-                /* Here, <s> points to the final byte of the final character.
-                 * Look backwards through the string until find a non-
-                 * problematic character */
+                /* Here, <s> points to just beyond where we have output the
+                 * final character of the node.  Look backwards through the
+                 * string until find a non- problematic character */
  
                 if (! UTF) {
  
@@ -14336,7 +14514,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          goto loopdone;
                      }
  
-                    while (--s >= s0 && IS_NON_FINAL_FOLD(*s)) { }
+                    while (--s >= s0 && IS_NON_FINAL_FOLD(*s)) {
+                        backup_count++;
+                    }
                      len = s - s0 + 1;
                 }
                  else {
@@ -14366,7 +14546,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              }
                          }
                          else if (! _invlist_contains_cp(
-                                        PL_NonL1NonFinalFold,
+                                        PL_NonFinalFold,
                                          valid_utf8_to_uvchr((U8 *) s, NULL)))
                          {
                              break;
@@ -14378,6 +14558,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                           * special case the very first byte in the string, so
                           * we don't read outside the string */
                          s = (s == s0) ? s -1 : (char *) utf8_hop((U8 *) s, -1);
+                        backup_count++;
                      } /* End of loop backwards through the string */
  
                      /* If there were only problematic characters in the string,
@@ -14398,23 +14579,16 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  if (len == 0) {
                      len = full_len;
  
-                    /* If the node ends in an 's' we make sure it stays EXACTF,
-                     * as if it turns into an EXACTFU, it could later get
-                     * joined with another 's' that would then wrongly match
-                     * the sharp s */
-                    if (maybe_exactfu && isALPHA_FOLD_EQ(ender, 's'))
-                    {
-                        maybe_exactfu = FALSE;
-                    }
                  } else {
  
                      /* Here, the node does contain some characters that aren't
-                     * problematic.  If one such is the final character in the
-                     * node, we are done */
-                    if (len == full_len) {
+                     * problematic.  If we didn't have to backup any, then the
+                     * final character in the node is non-problematic, and we
+                     * can take the node as-is */
+                    if (backup_count == 0) {
                          goto loopdone;
                      }
-                    else if (len + ((UTF) ? UTF8SKIP(s) : 1) == full_len) {
+                    else if (backup_count == 1) {
  
                          /* If the final character is problematic, but the
                           * penultimate is not, back-off that last character to
@@ -14468,58 +14642,83 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  OP(REGNODE_p(ret)) = NOTHING;
              }
              else {
-                OP(REGNODE_p(ret)) = node_type;
  
                  /* If the node type is EXACT here, check to see if it
                   * should be EXACTL, or EXACT_ONLY8. */
                  if (node_type == EXACT) {
                      if (LOC) {
-                        OP(REGNODE_p(ret)) = EXACTL;
+                        node_type = EXACTL;
                      }
                      else if (requires_utf8_target) {
-                        OP(REGNODE_p(ret)) = EXACT_ONLY8;
+                        node_type = EXACT_ONLY8;
                      }
-                }
+                } else if (FOLD) {
+                    if (    UNLIKELY(has_micro_sign || has_ss)
+                        && (node_type == EXACTFU || (   node_type == EXACTF
+                                                     && maybe_exactfu)))
+                    {   /* These two conditions are problematic in non-UTF-8
+                           EXACTFU nodes. */
+                        assert(! UTF);
+                        node_type = EXACTFUP;
+                    }
+                    else if (node_type == EXACTFL) {
  
-                if (FOLD) {
-                    /* If 'maybe_exactfu' is set, then there are no code points
-                     * that match differently depending on UTF8ness of the
-                     * target string (for /u), or depending on locale for /l */
-                    if (maybe_exactfu) {
-                        if (node_type == EXACTF) {
-                            OP(REGNODE_p(ret)) = EXACTFU;
-                        }
-                        else if (node_type == EXACTFL) {
-                            OP(REGNODE_p(ret)) = EXACTFLU8;
+                        /* 'maybe_exactfu' is deliberately set above to
+                         * indicate this node type, where all code points in it
+                         * are above 255 */
+                        if (maybe_exactfu) {
+                            node_type = EXACTFLU8;
                          }
                      }
-                    else if (node_type == EXACTF) {
-                        RExC_seen_d_op = TRUE;
+                    else if (node_type == EXACTF) {  /* Means is /di */
+
+                        /* If 'maybe_exactfu' is clear, then we need to stay
+                         * /di.  If it is set, it means there are no code
+                         * points that match differently depending on UTF8ness
+                         * of the target string, so it can become an EXACTFU
+                         * node */
+                        if (! maybe_exactfu) {
+                            RExC_seen_d_op = TRUE;
+                        }
+                        else if (   isALPHA_FOLD_EQ(* STRING(REGNODE_p(ret)), 's')
+                                 || isALPHA_FOLD_EQ(ender, 's'))
+                        {
+                            /* But, if the node begins or ends in an 's' we
+                             * have to defer changing it into an EXACTFU, as
+                             * the node could later get joined with another one
+                             * that ends or begins with 's' creating an 'ss'
+                             * sequence which would then wrongly match the
+                             * sharp s without the target being UTF-8.  We
+                             * create a special node that we resolve later when
+                             * we join nodes together */
+
+                            node_type = EXACTFU_S_EDGE;
+                        }
+                        else {
+                            node_type = EXACTFU;
+                        }
                      }
  
-                    /* The micro sign is the only below 256 character that
-                     * folds to above 255 */
-                    if (   OP(REGNODE_p(ret)) == EXACTFU
-                        && requires_utf8_target
-                        && LIKELY(! has_micro_sign))
-                    {
-                        OP(REGNODE_p(ret)) = EXACTFU_ONLY8;
+                    if (requires_utf8_target && node_type == EXACTFU) {
+                        node_type = EXACTFU_ONLY8;
                      }
+                }
  
+                OP(REGNODE_p(ret)) = node_type;
+                STR_LEN(REGNODE_p(ret)) = len;
+                RExC_emit += STR_SZ(len);
+
+                /* If the node isn't a single character, it can't be SIMPLE */
+                if (len > ((UTF) ? UVCHR_SKIP(ender) : 1)) {
+                    maybe_SIMPLE = 0;
                  }
  
-                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len,
-                                           UV_MAX,  /* unused here */
-                                           FALSE /* Don't look to see if could
-                                                    be turned into an EXACT
-                                                    node, as we have already
-                                                    computed that */
-                                          );
+                *flagp |= HASWIDTH | maybe_SIMPLE;
              }
  
-           RExC_parse = p - 1;
-            Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
-           RExC_parse = p;
+            Set_Node_Length(REGNODE_p(ret), p - parse_start - 1);
+            RExC_parse = p;
+
             {
                 /* len is STRLEN which is unsigned, need to copy to signed */
                 IV iv = len;
@@ -14556,9 +14755,16 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
       * sets up the bitmap and any flags, removing those code points from the
       * inversion list, setting it to NULL should it become completely empty */
  
+    dVAR;
+
      PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST;
      assert(PL_regkind[OP(node)] == ANYOF);
  
+    /* There is no bitmap for this node type */
+    if (OP(node) == ANYOFH) {
+        return;
+    }
+
      ANYOF_BITMAP_ZERO(node);
      if (*invlist_ptr) {
  
@@ -15624,10 +15830,11 @@ redo_curchar:
  
              case '(':
  
-                if (   RExC_parse < RExC_end - 1
-                    && (UCHARAT(RExC_parse + 1) == '?'))
+                if (   RExC_parse < RExC_end - 2
+                    && UCHARAT(RExC_parse + 1) == '?'
+                    && UCHARAT(RExC_parse + 2) == '^')
                  {
-                    /* If is a '(?', could be an embedded '(?flags:(?[...])'.
+                    /* If is a '(?', could be an embedded '(?^flags:(?[...])'.
                       * This happens when we have some thing like
                       *
                       *   my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/;
@@ -15645,14 +15852,11 @@ redo_curchar:
                      RExC_parse += 2;        /* Skip past the '(?' */
                      save_parse = RExC_parse;
  
-                    /* Parse any flags for the '(?' */
+                    /* Parse the flags for the '(?'.  We already know the first
+                     * flag to parse is a '^' */
                      parse_lparen_question_flags(pRExC_state);
  
-                    if (RExC_parse == save_parse  /* Makes sure there was at
-                                                     least one flag (or else
-                                                     this embedding wasn't
-                                                     compiled) */
-                        || RExC_parse >= RExC_end - 4
+                    if (   RExC_parse >= RExC_end - 4
                          || UCHARAT(RExC_parse) != ':'
                          || UCHARAT(++RExC_parse) != '('
                          || UCHARAT(++RExC_parse) != '?'
@@ -15661,8 +15865,7 @@ redo_curchar:
  
                          /* In combination with the above, this moves the
                           * pointer to the point just after the first erroneous
-                         * character (or if there are no flags, to where they
-                         * should have been) */
+                         * character. */
                          if (RExC_parse >= RExC_end - 4) {
                              RExC_parse = RExC_end;
                          }
@@ -16261,9 +16464,9 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
          default:    /* Other code points are checked against the data for the
                         current Unicode version */
            {
-            Size_t folds_to_count;
-            unsigned int first_folds_to;
-            const unsigned int * remaining_folds_to_list;
+            Size_t folds_count;
+            unsigned int first_fold;
+            const unsigned int * remaining_folds;
              UV folded_cp;
  
              if (isASCII(cp)) {
@@ -16279,9 +16482,9 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
                  *invlist = add_cp_to_invlist(*invlist, folded_cp);
              }
  
-            folds_to_count = _inverse_folds(folded_cp, &first_folds_to,
-                                                    &remaining_folds_to_list);
-            if (folds_to_count == 0) {
+            folds_count = _inverse_folds(folded_cp, &first_fold,
+                                                    &remaining_folds);
+            if (folds_count == 0) {
  
                  /* Use deprecated warning to increase the chances of this being
                   * output */
@@ -16292,13 +16495,13 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
              else {
                  unsigned int i;
  
-                if (first_folds_to > 255) {
-                    *invlist = add_cp_to_invlist(*invlist, first_folds_to);
+                if (first_fold > 255) {
+                    *invlist = add_cp_to_invlist(*invlist, first_fold);
                  }
-                for (i = 0; i < folds_to_count - 1; i++) {
-                    if (remaining_folds_to_list[i] > 255) {
+                for (i = 0; i < folds_count - 1; i++) {
+                    if (remaining_folds[i] > 255) {
                          *invlist = add_cp_to_invlist(*invlist,
-                                                    remaining_folds_to_list[i]);
+                                                    remaining_folds[i]);
                      }
                  }
              }
@@ -16437,7 +16640,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       *
       * ANYOF nodes contain a bit map for the first NUM_ANYOF_CODE_POINTS
       * characters, with the corresponding bit set if that character is in the
-     * list.  For characters above this, a range list or swash is used.  There
+     * list.  For characters above this, an inversion list is used.  There
       * are extra bits for \w, etc. in locale ANYOFs, as what these match is not
       * determinable at compile time
       *
@@ -16449,15 +16652,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * UTF-8
       */
  
+    dVAR;
      UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
      IV range = 0;
      UV value = OOB_UNICODE, save_value = OOB_UNICODE;
-    regnode_offset ret;
+    regnode_offset ret = -1;    /* Initialized to an illegal value */
      STRLEN numlen;
      int namedclass = OOB_NAMEDCLASS;
      char *rangebegin = NULL;
-    bool need_class = 0;
-    SV *listsv = NULL;
+    SV *listsv = NULL;      /* List of \p{user-defined} whose definitions
+                               aren't available at the time this was called */
      STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
                                       than just initialized.  */
      SV* properties = NULL;    /* Code points that match \p{} \P{} */
@@ -16486,22 +16690,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      const bool skip_white = cBOOL(   ret_invlist
                                    || (RExC_flags & RXf_PMf_EXTENDED_MORE));
  
-    /* Unicode properties are stored in a swash; this holds the current one
-     * being parsed.  If this swash is the only above-latin1 component of the
-     * character class, an optimization is to pass it directly on to the
-     * execution engine.  Otherwise, it is set to NULL to indicate that there
-     * are other things in the class that have to be dealt with at execution
-     * time */
-    SV* swash = NULL;          /* Code points that match \p{} \P{} */
-
-    /* Set if a component of this character class is user-defined; just passed
-     * on to the engine */
-    bool has_user_defined_property = FALSE;
-
      /* inversion list of code points this node matches only when the target
       * string is in UTF-8.  These are all non-ASCII, < 256.  (Because is under
       * /d) */
-    SV* has_upper_latin1_only_utf8_matches = NULL;
+    SV* upper_latin1_only_utf8_matches = NULL;
  
      /* Inversion list of code points this node matches regardless of things
       * like locale, folding, utf8ness of the target string */
@@ -16531,7 +16723,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      bool warn_super = ALWAYS_WARN_SUPER;
  
      const char * orig_parse = RExC_parse;
-    bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
  
      /* This variable is used to mark where the end in the input is of something
       * that looks like a POSIX construct but isn't.  During the parse, when
@@ -16546,7 +16737,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         one.  */
      U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
      U32 posixl = 0;       /* bit field of posix classes matched under /l */
-    bool use_anyofd = FALSE; /* ? Is this to be an ANYOFD node */
+
+
+/* Flags as to what things aren't knowable until runtime.  (Note that these are
+ * mutually exclusive.) */
+#define HAS_USER_DEFINED_PROPERTY 0x01   /* /u any user-defined properties that
+                                            haven't been defined as of yet */
+#define HAS_D_RUNTIME_DEPENDENCY  0x02   /* /d if the target being matched is
+                                            UTF-8 or not */
+#define HAS_L_RUNTIME_DEPENDENCY   0x04 /* /l what the posix classes match and
+                                            what gets folded */
+    U32 has_runtime_dependency = 0;     /* OR of the above flags */
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -16570,7 +16771,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      allow_multi_folds = FALSE;
  #endif
  
-    listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
+    /* We include the /i status at the beginning of this so that we can
+     * know it at runtime */
+    listsv = sv_2mortal(Perl_newSVpvf(aTHX_ "#%d\n", cBOOL(FOLD)));
      initial_listsv_len = SvCUR(listsv);
      SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
  
@@ -16771,6 +16974,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                "Ignoring zero length \\N{} in character class");
                          }
                          else { /* cp_count > 1 */
+                            assert(cp_count > 1);
                              if (! RExC_in_multi_char_class) {
                                  if (invert || range || *RExC_parse == '-') {
                                      if (strict) {
@@ -16809,17 +17013,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             case 'P':
                 {
                 char *e;
-                char *i;
-
-                /* We will handle any undefined properties ourselves */
-                U8 swash_init_flags = _CORE_SWASH_INIT_RETURN_IF_UNDEF
-                                       /* And we actually would prefer to get
-                                        * the straight inversion list of the
-                                        * swash, since we will be accessing it
-                                        * anyway, to save a little time */
-                                      |_CORE_SWASH_INIT_ACCEPT_INVLIST;
-
-                SvREFCNT_dec(swash); /* Free any left-overs */
  
                 /* \p means they want Unicode semantics */
                 REQUIRE_UNI_RULES(flagp, 0);
@@ -16875,169 +17068,82 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 }
                 {
                      char* name = RExC_parse;
-                    char* base_name;    /* name after any packages are stripped */
-                    char* lookup_name = NULL;
-                    const char * const colon_colon = "::";
-                    bool invert;
-
-                    SV* invlist;
-
-                    /* Temporary workaround for [perl #133136].  For this
-                    * precise input that is in the .t that is failing, load
-                    * utf8.pm, which is what the test wants, so that that
-                    * .t passes */
-                    if (     memEQs(RExC_start, e + 1 - RExC_start,
-                                    "foo\\p{Alnum}")
-                        && ! hv_common(GvHVn(PL_incgv),
-                                       NULL,
-                                       "utf8.pm", sizeof("utf8.pm") - 1,
-                                       0, HV_FETCH_ISEXISTS, NULL, 0))
-                    {
-                        require_pv("utf8.pm");
-                    }
-                    invlist = parse_uniprop_string(name, n, FOLD, &invert);
-                    if (invlist) {
-                        if (invert) {
-                            value ^= 'P' ^ 'p';
-                        }
-                    }
-                    else {
  
-                    /* Try to get the definition of the property into
-                     * <invlist>.  If /i is in effect, the effective property
-                     * will have its name be <__NAME_i>.  The design is
-                     * discussed in commit
-                     * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
-                    name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
-                    SAVEFREEPV(name);
-
-                    for (i = RExC_parse; i < RExC_parse + n; i++) {
-                        if (isCNTRL(*i) && *i != '\t') {
-                            RExC_parse = e + 1;
-                            vFAIL2("Can't find Unicode property definition \"%s\"", name);
+                    /* Any message returned about expanding the definition */
+                    SV* msg = newSVpvs_flags("", SVs_TEMP);
+
+                    /* If set TRUE, the property is user-defined as opposed to
+                     * official Unicode */
+                    bool user_defined = FALSE;
+
+                    SV * prop_definition = parse_uniprop_string(
+                                            name, n, UTF, FOLD,
+                                            FALSE, /* This is compile-time */
+                                            &user_defined,
+                                            msg,
+                                            0 /* Base level */
+                                           );
+                    if (SvCUR(msg)) {   /* Assumes any error causes a msg */
+                        assert(prop_definition == NULL);
+                        RExC_parse = e + 1;
+                        if (SvUTF8(msg)) {  /* msg being UTF-8 makes the whole
+                                               thing so, or else the display is
+                                               mojibake */
+                            RExC_utf8 = TRUE;
                          }
+                       /* diag_listed_as: Can't find Unicode property definition "%s" in regex; marked by <-- HERE in m/%s/ */
+                        vFAIL2utf8f("%" UTF8f, UTF8fARG(SvUTF8(msg),
+                                    SvCUR(msg), SvPVX(msg)));
                      }
  
-                    if (FOLD) {
-                        lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
-
-                        /* The function call just below that uses this can fail
-                         * to return, leaking memory if we don't do this */
-                        SAVEFREEPV(lookup_name);
-                    }
-
-                    /* Look up the property name, and get its swash and
-                     * inversion list, if the property is found  */
-                    swash = _core_swash_init("utf8",
-                                             (lookup_name)
-                                              ? lookup_name
-                                              : name,
-                                             &PL_sv_undef,
-                                             1, /* binary */
-                                             0, /* not tr/// */
-                                             NULL, /* No inversion list */
-                                             &swash_init_flags
-                                            );
-                    if (! swash || ! (invlist = _get_swash_invlist(swash))) {
-                        HV* curpkg = (IN_PERL_COMPILETIME)
-                                      ? PL_curstash
-                                      : CopSTASH(PL_curcop);
-                        UV final_n = n;
-                        bool has_pkg;
-
-                        if (swash) {    /* Got a swash but no inversion list.
-                                           Something is likely wrong that will
-                                           be sorted-out later */
-                            SvREFCNT_dec_NN(swash);
-                            swash = NULL;
-                        }
+                    if (! is_invlist(prop_definition)) {
  
-                        /* Here didn't find it.  It could be a an error (like a
-                         * typo) in specifying a Unicode property, or it could
-                         * be a user-defined property that will be available at
-                         * run-time.  The names of these must begin with 'In'
-                         * or 'Is' (after any packages are stripped off).  So
-                         * if not one of those, or if we accept only
-                         * compile-time properties, is an error; otherwise add
-                         * it to the list for run-time look up. */
-                        if ((base_name = rninstr(name, name + n,
-                                                 colon_colon, colon_colon + 2)))
-                        { /* Has ::.  We know this must be a user-defined
-                             property */
-                            base_name += 2;
-                            final_n -= base_name - name;
-                            has_pkg = TRUE;
+                        /* Here, the definition isn't known, so we have gotten
+                         * returned a string that will be evaluated if and when
+                         * encountered at runtime.  We add it to the list of
+                         * such properties, along with whether it should be
+                         * complemented or not */
+                        if (value == 'P') {
+                            sv_catpvs(listsv, "!");
                          }
                          else {
-                            base_name = name;
-                            has_pkg = FALSE;
+                            sv_catpvs(listsv, "+");
                          }
+                        sv_catsv(listsv, prop_definition);
  
-                        if (   final_n < 3
-                            || base_name[0] != 'I'
-                            || (base_name[1] != 's' && base_name[1] != 'n')
-                            || ret_invlist)
-                        {
-                            const char * const msg
-                                = (has_pkg)
-                                  ? "Illegal user-defined property name"
-                                  : "Can't find Unicode property definition";
-                            RExC_parse = e + 1;
-
-                            /* diag_listed_as: Can't find Unicode property definition "%s" */
-                            vFAIL3utf8f("%s \"%" UTF8f "\"",
-                                msg, UTF8fARG(UTF, n, name));
-                        }
-
-                        /* If the property name doesn't already have a package
-                         * name, add the current one to it so that it can be
-                         * referred to outside it. [perl #121777] */
-                        if (! has_pkg && curpkg) {
-                            char* pkgname = HvNAME(curpkg);
-                            if (memNEs(pkgname, HvNAMELEN(curpkg), "main")) {
-                                char* full_name = Perl_form(aTHX_
-                                                            "%s::%s",
-                                                            pkgname,
-                                                            name);
-                                n = strlen(full_name);
-                                name = savepvn(full_name, n);
-                                SAVEFREEPV(name);
-                            }
-                        }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%" UTF8f "%s\n",
-                                        (value == 'p' ? '+' : '!'),
-                                        (FOLD) ? "__" : "",
-                                        UTF8fARG(UTF, n, name),
-                                        (FOLD) ? "_i" : "");
-                        has_user_defined_property = TRUE;
-                        optimizable = FALSE;    /* Will have to leave this an
-                                                   ANYOF node */
+                        has_runtime_dependency |= HAS_USER_DEFINED_PROPERTY;
  
                          /* We don't know yet what this matches, so have to flag
                           * it */
                          anyof_flags |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
                      }
                      else {
+                        assert (prop_definition && is_invlist(prop_definition));
  
-                        /* Here, did get the swash and its inversion list.  If
-                         * the swash is from a user-defined property, then this
-                         * whole character class should be regarded as such */
-                        if (swash_init_flags
-                            & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY)
+                        /* Here we do have the complete property definition
+                         *
+                         * Temporary workaround for [perl #133136].  For this
+                         * precise input that is in the .t that is failing,
+                         * load utf8.pm, which is what the test wants, so that
+                         * that .t passes */
+                        if (     memEQs(RExC_start, e + 1 - RExC_start,
+                                        "foo\\p{Alnum}")
+                            && ! hv_common(GvHVn(PL_incgv),
+                                           NULL,
+                                           "utf8.pm", sizeof("utf8.pm") - 1,
+                                           0, HV_FETCH_ISEXISTS, NULL, 0))
                          {
-                            has_user_defined_property = TRUE;
+                            require_pv("utf8.pm");
                          }
-                    }
-                    }
-                    if (invlist) {
-                        if (! has_user_defined_property &&
+
+                        if (! user_defined &&
                              /* We warn on matching an above-Unicode code point
                               * if the match would return true, except don't
                               * warn for \p{All}, which has exactly one element
                               * = 0 */
-                            (_invlist_contains_cp(invlist, 0x110000)
-                                && (! (_invlist_len(invlist) == 1
-                                       && *invlist_array(invlist) == 0))))
+                            (_invlist_contains_cp(prop_definition, 0x110000)
+                                && (! (_invlist_len(prop_definition) == 1
+                                       && *invlist_array(prop_definition) == 0))))
                          {
                              warn_super = TRUE;
                          }
@@ -17045,23 +17151,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          /* Invert if asking for the complement */
                          if (value == 'P') {
                             _invlist_union_complement_2nd(properties,
-                                                          invlist,
+                                                          prop_definition,
                                                            &properties);
-
-                            /* The swash can't be used as-is, because we've
-                            * inverted things; delay removing it to here after
-                            * have copied its invlist above */
-                            if (! swash) {
-                                SvREFCNT_dec_NN(invlist);
-                            }
-                            SvREFCNT_dec(swash);
-                            swash = NULL;
                          }
                          else {
-                            _invlist_union(properties, invlist, &properties);
-                            if (! swash) {
-                                SvREFCNT_dec_NN(invlist);
-                            }
+                            _invlist_union(properties, prop_definition, &properties);
                         }
                      }
                  }
@@ -17205,43 +17299,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  SV* scratch_list = NULL;
  
                  /* What the Posix classes (like \w, [:space:]) match in locale
-                 * isn't knowable under locale until actual match time.  Room
-                 * must be reserved (one time per outer bracketed class) to
-                 * store such classes.  The space will contain a bit for each
-                 * named class that is to be matched against.  This isn't
-                 * needed for \p{} and pseudo-classes, as they are not affected
-                 * by locale, and hence are dealt with separately */
-                if (! need_class) {
-                    need_class = 1;
-                    anyof_flags |= ANYOF_MATCHES_POSIXL;
-
-                    /* We can't change this into some other type of node
-                     * (unless this is the only element, in which case there
-                     * are nodes that mean exactly this) as has runtime
-                     * dependencies */
-                    optimizable = FALSE;
-                }
-
-                /* Coverity thinks it is possible for this to be negative; both
-                 * jhi and khw think it's not, but be safer */
-                assert(! (anyof_flags & ANYOF_MATCHES_POSIXL)
-                       || (namedclass + ((namedclass % 2) ? -1 : 1)) >= 0);
-
-                /* See if it already matches the complement of this POSIX
-                 * class */
-                if (  (anyof_flags & ANYOF_MATCHES_POSIXL)
-                    && POSIXL_TEST(posixl, namedclass + ((namedclass % 2)
-                                                         ? -1
-                                                         : 1)))
-                {
-                    posixl_matches_all = TRUE;
-                    break;  /* No need to continue.  Since it matches both
-                               e.g., \w and \W, it matches everything, and the
-                               bracketed class can be optimized into qr/./s */
-                }
-
-                /* Add this class to those that should be checked at runtime */
+                 * isn't knowable under locale until actual match time.  A
+                 * special node is used for these which has extra space for a
+                 * bitmap, with a bit reserved for each named class that is to
+                 * be matched against.  This isn't needed for \p{} and
+                 * pseudo-classes, as they are not affected by locale, and
+                 * hence are dealt with separately */
                  POSIXL_SET(posixl, namedclass);
+                has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
+                anyof_flags |= ANYOF_MATCHES_POSIXL;
  
                  /* The above-Latin1 characters are not subject to locale rules.
                   * Just add them to the unconditionally-matched list */
@@ -17295,11 +17361,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  &cp_list);
                      }
                  }
-                else if (  UNI_SEMANTICS
-                        || AT_LEAST_ASCII_RESTRICTED
-                        || classnum == _CC_ASCII
-                        || (DEPENDS_SEMANTICS && (   classnum == _CC_DIGIT
-                                                  || classnum == _CC_XDIGIT)))
+                else if (   AT_LEAST_UNI_SEMANTICS
+                         || classnum == _CC_ASCII
+                         || (DEPENDS_SEMANTICS && (   classnum == _CC_DIGIT
+                                                   || classnum == _CC_XDIGIT)))
                  {
                      /* We usually have to worry about /d affecting what POSIX
                       * classes match, with special code needed because we won't
@@ -17429,7 +17494,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
           *
           * See [perl #89750] */
          if (FOLD && allow_multi_folds && value == prevvalue) {
-            if (value == LATIN_SMALL_LETTER_SHARP_S
+            if (    value == LATIN_SMALL_LETTER_SHARP_S
                  || (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
                                                          value)))
              {
@@ -17771,8 +17836,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
              /* Our calculated list will be for Unicode rules.  For locale
               * matching, we have to keep a separate list that is consulted at
-             * runtime only when the locale indicates Unicode rules.  For
-             * non-locale, we just use the general list */
+             * runtime only when the locale indicates Unicode rules (and we
+             * don't include potential matches in the ASCII/Latin1 range, as
+             * any code point could fold to any other, based on the run-time
+             * locale).   For non-locale, we just use the general list */
              if (LOC) {
                  use_list = &only_utf8_locale_list;
              }
@@ -17784,7 +17851,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
               * be checked.  Get the intersection of this class and all the
               * possible characters that are foldable.  This can quickly narrow
               * down a large class */
-            _invlist_intersection(PL_utf8_foldable, cp_foldable_list,
+            _invlist_intersection(PL_in_some_fold, cp_foldable_list,
                                    &fold_intersection);
  
              /* Now look at the foldable characters in this class individually */
@@ -17798,13 +17865,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                      STRLEN foldlen;
                      unsigned int k;
-                    Size_t folds_to_count;
-                    unsigned int first_folds_to;
-                    const unsigned int * remaining_folds_to_list;
+                    Size_t folds_count;
+                    unsigned int first_fold;
+                    const unsigned int * remaining_folds;
  
                      if (j < 256) {
  
-                        if (IS_IN_SOME_FOLD_L1(j)) {
+                        /* Under /l, we don't know what code points below 256
+                         * fold to, except we do know the MICRO SIGN folds to
+                         * an above-255 character if the locale is UTF-8, so we
+                         * add it to the special list (in *use_list)  Otherwise
+                         * we know now what things can match, though some folds
+                         * are valid under /d only if the target is UTF-8.
+                         * Those go in a separate list */
+                        if (      IS_IN_SOME_FOLD_L1(j)
+                            && ! (LOC && j != MICRO_SIGN))
+                        {
  
                              /* ASCII is always matched; non-ASCII is matched
                               * only under Unicode rules (which could happen
@@ -17813,11 +17889,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  *use_list = add_cp_to_invlist(*use_list,
                                                              PL_fold_latin1[j]);
                              }
-                            else {
-                                has_upper_latin1_only_utf8_matches
-                                    = add_cp_to_invlist(
-                                            has_upper_latin1_only_utf8_matches,
-                                            PL_fold_latin1[j]);
+                            else if (j != PL_fold_latin1[j]) {
+                                upper_latin1_only_utf8_matches
+                                        = add_cp_to_invlist(
+                                                upper_latin1_only_utf8_matches,
+                                                PL_fold_latin1[j]);
                              }
                          }
  
@@ -17843,16 +17919,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      /* Single character fold of above Latin1.  Add everything
                       * in its fold closure to the list that this node should
                       * match. */
-                    folds_to_count = _inverse_folds(folded, &first_folds_to,
-                                                    &remaining_folds_to_list);
-                    for (k = 0; k <= folds_to_count; k++) {
+                    folds_count = _inverse_folds(folded, &first_fold,
+                                                    &remaining_folds);
+                    for (k = 0; k <= folds_count; k++) {
                          UV c = (k == 0)     /* First time through use itself */
                                  ? folded
                                  : (k == 1)  /* 2nd time use, the first fold */
-                                   ? first_folds_to
+                                   ? first_fold
  
                                       /* Then the remaining ones */
-                                   : remaining_folds_to_list[k-2];
+                                   : remaining_folds[k-2];
  
                          /* /aa doesn't allow folds between ASCII and non- */
                          if ((   ASCII_FOLD_RESTRICTED
@@ -17876,10 +17952,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          else {
                              /* Similarly folds involving non-ascii Latin1
                               * characters under /d are added to their list */
-                            has_upper_latin1_only_utf8_matches
-                                = add_cp_to_invlist(
-                                            has_upper_latin1_only_utf8_matches,
-                                            c);
+                            upper_latin1_only_utf8_matches
+                                    = add_cp_to_invlist(
+                                                upper_latin1_only_utf8_matches,
+                                                c);
                          }
                      }
                  }
@@ -17895,8 +17971,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      /* And combine the result (if any) with any inversion lists from posix
       * classes.  The lists are kept separate up to now because we don't want to
-     * fold the classes (folding of those is automatically handled by the swash
-     * fetching code) */
+     * fold the classes */
      if (simple_posixes) {   /* These are the classes known to be unaffected by
                                 /a, /aa, and /d */
          if (cp_list) {
@@ -17936,7 +18011,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
               * the target string is in UTF-8.  But things like \W match all the
               * upper Latin1 characters if the target string is not in UTF-8.
               *
-             * Handle the case where there something like \W separately */
+             * Handle the case with something like \W separately */
              if (nposixes) {
                  SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1, NULL);
  
@@ -17959,12 +18034,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                  /* Likewise for anything else in the range that matched only
                   * under UTF-8 */
-                if (has_upper_latin1_only_utf8_matches) {
+                if (upper_latin1_only_utf8_matches) {
                      _invlist_union(cp_list,
-                                   has_upper_latin1_only_utf8_matches,
+                                   upper_latin1_only_utf8_matches,
                                     &cp_list);
-                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
-                    has_upper_latin1_only_utf8_matches = NULL;
+                    SvREFCNT_dec_NN(upper_latin1_only_utf8_matches);
+                    upper_latin1_only_utf8_matches = NULL;
                  }
  
                  /* If we don't match all the upper Latin1 characters regardless
@@ -17990,9 +18065,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                        &nonascii_but_latin1_properties);
  
                  /* And add them to the final list of such characters. */
-                _invlist_union(has_upper_latin1_only_utf8_matches,
+                _invlist_union(upper_latin1_only_utf8_matches,
                                 nonascii_but_latin1_properties,
-                               &has_upper_latin1_only_utf8_matches);
+                               &upper_latin1_only_utf8_matches);
  
                  /* Remove them from what now becomes the unconditional list */
                  _invlist_subtract(posixes, nonascii_but_latin1_properties,
@@ -18010,15 +18085,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                  SvREFCNT_dec(nonascii_but_latin1_properties);
  
-                /* Get rid of any characters that we now know are matched
-                 * unconditionally from the conditional list, which may make
-                 * that list empty */
-                _invlist_subtract(has_upper_latin1_only_utf8_matches,
+                /* Get rid of any characters from the conditional list that we
+                 * now know are matched unconditionally, which may make that
+                 * list empty */
+                _invlist_subtract(upper_latin1_only_utf8_matches,
                                    cp_list,
-                                  &has_upper_latin1_only_utf8_matches);
-                if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
-                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
-                    has_upper_latin1_only_utf8_matches = NULL;
+                                  &upper_latin1_only_utf8_matches);
+                if (_invlist_len(upper_latin1_only_utf8_matches) == 0) {
+                    SvREFCNT_dec_NN(upper_latin1_only_utf8_matches);
+                    upper_latin1_only_utf8_matches = NULL;
                  }
              }
          }
@@ -18033,7 +18108,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * class that isn't a Unicode property, and which matches above Unicode, \W
       * or [\x{110000}] for example.
       * (Note that in this case, unlike the Posix one above, there is no
-     * <has_upper_latin1_only_utf8_matches>, because having a Unicode property
+     * <upper_latin1_only_utf8_matches>, because having a Unicode property
       * forces Unicode semantics */
      if (properties) {
          if (cp_list) {
@@ -18077,10 +18152,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * folded until runtime */
  
      /* If we didn't do folding, it's because some information isn't available
-     * until runtime; set the run-time fold flag for these.  (We don't have to
-     * worry about properties folding, as that is taken care of by the swash
-     * fetching).  We know to set the flag if we have a non-NULL list for UTF-8
-     * locales, or the class matches at least one 0-255 range code point */
+     * until runtime; set the run-time fold flag for these  We know to set the
+     * flag if we have a non-NULL list for UTF-8 locales, or the class matches
+     * at least one 0-255 range code point */
      if (LOC && FOLD) {
  
          /* Some things on the list might be unconditionally included because of
@@ -18095,7 +18169,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  only_utf8_locale_list = NULL;
              }
          }
-        if (only_utf8_locale_list) {
+        if (    only_utf8_locale_list
+            || (cp_list && (   _invlist_contains_cp(cp_list, LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
+                            || _invlist_contains_cp(cp_list, LATIN_SMALL_LETTER_DOTLESS_I))))
+        {
+            has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
              anyof_flags
                   |= ANYOFL_FOLD
                   |  ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
@@ -18105,359 +18183,804 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              invlist_iterinit(cp_list);
              if (invlist_iternext(cp_list, &start, &end) && start < 256) {
                  anyof_flags |= ANYOFL_FOLD;
+                has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
              }
              invlist_iterfinish(cp_list);
          }
      }
      else if (   DEPENDS_SEMANTICS
-             && (    has_upper_latin1_only_utf8_matches
+             && (    upper_latin1_only_utf8_matches
                   || (anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
      {
-        use_anyofd = TRUE;
          RExC_seen_d_op = TRUE;
-        optimizable = FALSE;
+        has_runtime_dependency |= HAS_D_RUNTIME_DEPENDENCY;
      }
  
-    /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
-     * at compile time.  Besides not inverting folded locale now, we can't
-     * invert if there are things such as \w, which aren't known until runtime
-     * */
+    /* Optimize inverted patterns (e.g. [^a-z]) when everything is known at
+     * compile time. */
      if (     cp_list
          &&   invert
-        && ! use_anyofd
-        && ! (anyof_flags & (ANYOF_LOCALE_FLAGS))
-       && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+        && ! has_runtime_dependency)
      {
          _invlist_invert(cp_list);
  
-        /* Any swash can't be used as-is, because we've inverted things */
-        if (swash) {
-            SvREFCNT_dec_NN(swash);
-            swash = NULL;
-        }
-
         /* Clear the invert flag since have just done it here */
         invert = FALSE;
      }
  
      if (ret_invlist) {
          *ret_invlist = cp_list;
-        SvREFCNT_dec(swash);
  
          return RExC_emit;
      }
  
+    /* All possible optimizations below still have these characteristics.
+     * (Multi-char folds aren't SIMPLE, but they don't get this far in this
+     * routine) */
+    *flagp |= HASWIDTH|SIMPLE;
+
+    if (anyof_flags & ANYOF_LOCALE_FLAGS) {
+        RExC_contains_locale = 1;
+    }
+
      /* Some character classes are equivalent to other nodes.  Such nodes take
-     * up less room and generally fewer operations to execute than ANYOF nodes.
-     * */
+     * up less room, and some nodes require fewer operations to execute, than
+     * ANYOF nodes.  EXACTish nodes may be joinable with adjacent nodes to
+     * improve efficiency. */
  
      if (optimizable) {
-        int posix_class = -1;   /* Illegal value */
-        U8 ANYOFM_mask = 0xFF;
-        U32 anode_arg = 0;
-        UV start, end;
+        PERL_UINT_FAST8_T i;
+        Size_t partial_cp_count = 0;
+        UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
+        UV   end[MAX_FOLD_FROMS+1] = { 0 };
  
-        if (UNLIKELY(posixl_matches_all)) {
-            op = SANY;
-        }
-        else if (cp_list && ! invert) {
+        if (cp_list) { /* Count the code points in enough ranges that we would
+                          see all the ones possible in any fold in this version
+                          of Unicode */
  
              invlist_iterinit(cp_list);
-            if (! invlist_iternext(cp_list, &start, &end)) {
-
-                /* Here, the list is empty.  This happens, for example, when a
-                 * Unicode property that doesn't match anything is the only
-                 * element in the character class (perluniprops.pod notes such
-                 * properties).  */
-                op = OPFAIL;
-                *flagp |= HASWIDTH|SIMPLE;
+            for (i = 0; i <= MAX_FOLD_FROMS; i++) {
+                if (! invlist_iternext(cp_list, &start[i], &end[i])) {
+                    break;
+                }
+                partial_cp_count += end[i] - start[i] + 1;
              }
-            else if (start == end) {    /* The range is a single code point */
-                if (! invlist_iternext(cp_list, &start, &end)
  
-                        /* Don't do this optimization if it would require
-                         * changing the pattern to UTF-8 */
-                    && (start < 256 || UTF))
-                {
-                    /* Here, the list contains a single code point.  Can
-                     * optimize into an EXACTish node */
-
-                    value = start;
+            invlist_iterfinish(cp_list);
+        }
  
-                    if (! FOLD) {
-                        op = (LOC)
-                             ? EXACTL
-                             : EXACT;
-                    }
-                    else if (LOC) {
+        /* If we know at compile time that this matches every possible code
+         * point, any run-time dependencies don't matter */
+        if (start[0] == 0 && end[0] == UV_MAX) {
+            if (invert) {
+                ret = reganode(pRExC_state, OPFAIL, 0);
+            }
+            else {
+                ret = reg_node(pRExC_state, SANY);
+                MARK_NAUGHTY(1);
+            }
+            goto not_anyof;
+        }
  
-                        /* A locale node under folding with one code point can
-                         * be an EXACTFL, as its fold won't be calculated until
-                         * runtime */
-                        op = EXACTFL;
+        /* Similarly, for /l posix classes, if both a class and its
+         * complement match, any run-time dependencies don't matter */
+        if (posixl) {
+            for (namedclass = 0; namedclass < ANYOF_POSIXL_MAX;
+                                                        namedclass += 2)
+            {
+                if (   POSIXL_TEST(posixl, namedclass)      /* class */
+                    && POSIXL_TEST(posixl, namedclass + 1)) /* its complement */
+                {
+                    if (invert) {
+                        ret = reganode(pRExC_state, OPFAIL, 0);
                      }
                      else {
-
-                        /* Here, we are generally folding, but there is only
-                         * one code point to match.  If we have to, we use an
-                         * EXACT node, but it would be better for joining with
-                         * adjacent nodes in the optimization phase if we used
-                         * the same EXACTFish node that any such are likely to
-                         * be.  We can do this iff the code point doesn't
-                         * participate in any folds.  For example, an EXACTF of
-                         * a colon is the same as an EXACT one, since nothing
-                         * folds to or from a colon. */
-                        if (value < 256) {
-                            if (IS_IN_SOME_FOLD_L1(value)) {
-                                op = EXACT;
-                            }
-                        }
-                        else {
-                            if (_invlist_contains_cp(PL_utf8_foldable, value)) {
-                                op = EXACT;
-                            }
-                        }
-
-                        /* If we haven't found the node type, above, it means
-                         * we can use the prevailing one */
-                        if (op == END) {
-                            op = compute_EXACTish(pRExC_state);
-                        }
+                        ret = reg_node(pRExC_state, SANY);
+                        MARK_NAUGHTY(1);
                      }
+                    goto not_anyof;
                  }
-            }   /* End of first range contains just a single code point */
-            else if (start == 0) {
-                if (end == UV_MAX) {
-                    op = SANY;
-                    *flagp |= HASWIDTH|SIMPLE;
-                    MARK_NAUGHTY(1);
-                }
-                else if (end == '\n' - 1
-                        && invlist_iternext(cp_list, &start, &end)
-                        && start == '\n' + 1 && end == UV_MAX)
-                {
-                    op = REG_ANY;
-                    *flagp |= HASWIDTH|SIMPLE;
-                    MARK_NAUGHTY(1);
+            }
+            /* For well-behaved locales, some classes are subsets of others,
+             * so complementing the subset and including the non-complemented
+             * superset should match everything, like [\D[:alnum:]], and
+             * [[:^alpha:][:alnum:]], but some implementations of locales are
+             * buggy, and khw thinks its a bad idea to have optimization change
+             * behavior, even if it avoids an OS bug in a given case */
+
+#define isSINGLE_BIT_SET(n) isPOWER_OF_2(n)
+
+            /* If is a single posix /l class, can optimize to just that op.
+             * Such a node will not match anything in the Latin1 range, as that
+             * is not determinable until runtime, but will match whatever the
+             * class does outside that range.  (Note that some classes won't
+             * match anything outside the range, like [:ascii:]) */
+            if (    isSINGLE_BIT_SET(posixl)
+                && (partial_cp_count == 0 || start[0] > 255))
+            {
+                U8 classnum;
+                SV * class_above_latin1 = NULL;
+                bool already_inverted;
+                bool are_equivalent;
+
+                /* Compute which bit is set, which is the same thing as, e.g.,
+                 * ANYOF_CNTRL.  From
+                 * https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
+                 * */
+                static const int MultiplyDeBruijnBitPosition2[32] =
+                    {
+                    0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+                    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+                    };
+
+                namedclass = MultiplyDeBruijnBitPosition2[(posixl
+                                                          * 0x077CB531U) >> 27];
+                classnum = namedclass_to_classnum(namedclass);
+
+                /* The named classes are such that the inverted number is one
+                 * larger than the non-inverted one */
+                already_inverted = namedclass
+                                 - classnum_to_namedclass(classnum);
+
+                /* Create an inversion list of the official property, inverted
+                 * if the constructed node list is inverted, and restricted to
+                 * only the above latin1 code points, which are the only ones
+                 * known at compile time */
+                _invlist_intersection_maybe_complement_2nd(
+                                                    PL_AboveLatin1,
+                                                    PL_XPosix_ptrs[classnum],
+                                                    already_inverted,
+                                                    &class_above_latin1);
+                are_equivalent = _invlistEQ(class_above_latin1, cp_list,
+                                                                        FALSE);
+                SvREFCNT_dec_NN(class_above_latin1);
+
+                if (are_equivalent) {
+
+                    /* Resolve the run-time inversion flag with this possibly
+                     * inverted class */
+                    invert = invert ^ already_inverted;
+
+                    ret = reg_node(pRExC_state,
+                                   POSIXL + invert * (NPOSIXL - POSIXL));
+                    FLAGS(REGNODE_p(ret)) = classnum;
+                    goto not_anyof;
                  }
              }
-            invlist_iterfinish(cp_list);
+        }
  
-            if (op == END) {
+        /* khw can't think of any other possible transformation involving
+         * these. */
+        if (has_runtime_dependency & HAS_USER_DEFINED_PROPERTY) {
+            goto is_anyof;
+        }
  
-                /* Here, didn't find an optimization.  See if this matches any
-                 * of the POSIX classes.  First try ASCII */
+        if (! has_runtime_dependency) {
  
-                if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
-                    op = ASCII;
-                    *flagp |= HASWIDTH|SIMPLE;
-                }
-                else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
-                    op = NASCII;
-                    *flagp |= HASWIDTH|SIMPLE;
+            /* If the list is empty, nothing matches.  This happens, for
+             * example, when a Unicode property that doesn't match anything is
+             * the only element in the character class (perluniprops.pod notes
+             * such properties). */
+            if (partial_cp_count == 0) {
+                if (invert) {
+                    ret = reg_node(pRExC_state, SANY);
                  }
                  else {
+                    ret = reganode(pRExC_state, OPFAIL, 0);
+                }
+
+                goto not_anyof;
+            }
  
-                    /* Then try the other POSIX classes.  The POSIXA ones are
-                     * about the same speed as ANYOF ops, but take less room;
-                     * the ones that have above-Latin1 code point matches are
-                     * somewhat faster than ANYOF. */
+            /* If matches everything but \n */
+            if (   start[0] == 0 && end[0] == '\n' - 1
+                && start[1] == '\n' + 1 && end[1] == UV_MAX)
+            {
+                assert (! invert);
+                ret = reg_node(pRExC_state, REG_ANY);
+                MARK_NAUGHTY(1);
+                goto not_anyof;
+            }
+        }
  
-                    for (posix_class = 0;
-                         posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
-                         posix_class++)
+        /* Next see if can optimize classes that contain just a few code points
+         * into an EXACTish node.  The reason to do this is to let the
+         * optimizer join this node with adjacent EXACTish ones.
+         *
+         * An EXACTFish node can be generated even if not under /i, and vice
+         * versa.  But care must be taken.  An EXACTFish node has to be such
+         * that it only matches precisely the code points in the class, but we
+         * want to generate the least restrictive one that does that, to
+         * increase the odds of being able to join with an adjacent node.  For
+         * example, if the class contains [kK], we have to make it an EXACTFAA
+         * node to prevent the KELVIN SIGN from matching.  Whether we are under
+         * /i or not is irrelevant in this case.  Less obvious is the pattern
+         * qr/[\x{02BC}]n/i.  U+02BC is MODIFIER LETTER APOSTROPHE. That is
+         * supposed to match the single character U+0149 LATIN SMALL LETTER N
+         * PRECEDED BY APOSTROPHE.  And so even though there is no simple fold
+         * that includes \X{02BC}, there is a multi-char fold that does, and so
+         * the node generated for it must be an EXACTFish one.  On the other
+         * hand qr/:/i should generate a plain EXACT node since the colon
+         * participates in no fold whatsoever, and having it EXACT tells the
+         * optimizer the target string cannot match unless it has a colon in
+         * it.
+         *
+         * We don't typically generate an EXACTish node if doing so would
+         * require changing the pattern to UTF-8, as that affects /d and
+         * otherwise is slower.  However, under /i, not changing to UTF-8 can
+         * miss some potential multi-character folds.  We calculate the
+         * EXACTish node, and then decide if something would be missed if we
+         * don't upgrade */
+        if (   ! posixl
+            && ! invert
+
+                /* Only try if there are no more code points in the class than
+                 * in the max possible fold */
+            &&   partial_cp_count > 0 && partial_cp_count <= MAX_FOLD_FROMS + 1
+
+            && (start[0] < 256 || UTF || FOLD))
+        {
+            if (partial_cp_count == 1 && ! upper_latin1_only_utf8_matches)
+            {
+                /* We can always make a single code point class into an
+                 * EXACTish node. */
+
+                if (LOC) {
+
+                    /* Here is /l:  Use EXACTL, except /li indicates EXACTFL,
+                     * as that means there is a fold not known until runtime so
+                     * shows as only a single code point here. */
+                    op = (FOLD) ? EXACTFL : EXACTL;
+                }
+                else if (! FOLD) { /* Not /l and not /i */
+                    op = (start[0] < 256) ? EXACT : EXACT_ONLY8;
+                }
+                else if (start[0] < 256) { /* /i, not /l, and the code point is
+                                              small */
+
+                    /* Under /i, it gets a little tricky.  A code point that
+                     * doesn't participate in a fold should be an EXACT node.
+                     * We know this one isn't the result of a simple fold, or
+                     * there'd be more than one code point in the list, but it
+                     * could be part of a multi- character fold.  In that case
+                     * we better not create an EXACT node, as we would wrongly
+                     * be telling the optimizer that this code point must be in
+                     * the target string, and that is wrong.  This is because
+                     * if the sequence around this code point forms a
+                     * multi-char fold, what needs to be in the string could be
+                     * the code point that folds to the sequence.
+                     *
+                     * This handles the case of below-255 code points, as we
+                     * have an easy look up for those.  The next clause handles
+                     * the above-256 one */
+                    op = IS_IN_SOME_FOLD_L1(start[0])
+                         ? EXACTFU
+                         : EXACT;
+                }
+                else {  /* /i, larger code point.  Since we are under /i, and
+                           have just this code point, we know that it can't
+                           fold to something else, so PL_InMultiCharFold
+                           applies to it */
+                    op = _invlist_contains_cp(PL_InMultiCharFold,
+                                              start[0])
+                         ? EXACTFU_ONLY8
+                         : EXACT_ONLY8;
+                }
+
+                value = start[0];
+            }
+            else if (  ! (has_runtime_dependency & ~HAS_D_RUNTIME_DEPENDENCY)
+                     && _invlist_contains_cp(PL_in_some_fold, start[0]))
+            {
+                /* Here, the only runtime dependency, if any, is from /d, and
+                 * the class matches more than one code point, and the lowest
+                 * code point participates in some fold.  It might be that the
+                 * other code points are /i equivalent to this one, and hence
+                 * they would representable by an EXACTFish node.  Above, we
+                 * eliminated classes that contain too many code points to be
+                 * EXACTFish, with the test for MAX_FOLD_FROMS
+                 *
+                 * First, special case the ASCII fold pairs, like 'B' and 'b'.
+                 * We do this because we have EXACTFAA at our disposal for the
+                 * ASCII range */
+                if (partial_cp_count == 2 && isASCII(start[0])) {
+
+                    /* The only ASCII characters that participate in folds are
+                     * alphabetics */
+                    assert(isALPHA(start[0]));
+                    if (   end[0] == start[0]   /* First range is a single
+                                                   character, so 2nd exists */
+                        && isALPHA_FOLD_EQ(start[0], start[1]))
                      {
-                        int try_inverted;
  
-                        for (try_inverted = 0; try_inverted < 2; try_inverted++)
+                        /* Here, is part of an ASCII fold pair */
+
+                        if (   ASCII_FOLD_RESTRICTED
+                            || HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(start[0]))
                          {
+                            /* If the second clause just above was true, it
+                             * means we can't be under /i, or else the list
+                             * would have included more than this fold pair.
+                             * Therefore we have to exclude the possibility of
+                             * whatever else it is that folds to these, by
+                             * using EXACTFAA */
+                            op = EXACTFAA;
+                        }
+                        else if (HAS_NONLATIN1_FOLD_CLOSURE(start[0])) {
  
-                            /* Check if matches POSIXA, normal or inverted */
-                            if (PL_Posix_ptrs[posix_class]) {
-                                if (_invlistEQ(cp_list,
-                                               PL_Posix_ptrs[posix_class],
-                                               try_inverted))
-                                {
-                                    op = (try_inverted)
-                                        ? NPOSIXA
-                                        : POSIXA;
-                                    *flagp |= HASWIDTH|SIMPLE;
-                                    goto found_posix;
-                                }
-                            }
+                            /* Here, there's no simple fold that start[0] is part
+                             * of, but there is a multi-character one.  If we
+                             * are not under /i, we want to exclude that
+                             * possibility; if under /i, we want to include it
+                             * */
+                            op = (FOLD) ? EXACTFU : EXACTFAA;
+                        }
+                        else {
  
-                            /* Check if matches POSIXU, normal or inverted */
-                            if (_invlistEQ(cp_list,
-                                           PL_XPosix_ptrs[posix_class],
-                                           try_inverted))
-                            {
-                                op = (try_inverted)
-                                     ? NPOSIXU
-                                     : POSIXU;
-                                *flagp |= HASWIDTH|SIMPLE;
-                                goto found_posix;
-                            }
+                            /* Here, the only possible fold start[0] particpates in
+                             * is with start[1].  /i or not isn't relevant */
+                            op = EXACTFU;
                          }
+
+                        value = toFOLD(start[0]);
                      }
-                  found_posix: ;
-                }
-
-                /* If it didn't match a POSIX class, it might be able to be
-                 * turned into an ANYOFM node.  Compare two different bytes,
-                 * bit-by-bit.  In some positions, the bits in each will be 1;
-                 * and in other positions both will be 0; and in some positions
-                 * the bit will be 1 in one byte, and 0 in the other.  Let 'n'
-                 * be the number of positions where the bits differ.  We create
-                 * a mask which has exactly 'n' 0 bits, each in a position
-                 * where the two bytes differ.  Now take the set of all bytes
-                 * that when ANDed with the mask yield the same result.  That
-                 * set has 2**n elements, and is representable by just two 8
-                 * bit numbers: the result and the mask.  Importantly, matching
-                 * the set can be vectorized by creating a word full of the
-                 * result bytes, and a word full of the mask bytes, yielding a
-                 * significant speed up.  Here, see if this node matches such a
-                 * set.  As a concrete example consider [01], and the byte
-                 * representing '0' which is 0x30 on ASCII machines.  It has
-                 * the bits 0011 0000.  Take the mask 1111 1110.  If we AND
-                 * 0x31 and 0x30 with that mask we get 0x30.  Any other bytes
-                 * ANDed yield something else.  So [01], which is a common
-                 * usage, is optimizable into ANYOFM, and can benefit from the
-                 * speed up.  We can only do this on UTF-8 invariant bytes,
-                 * because the variance would throw this off.  */
-                if (op == END) {
-                    PERL_UINT_FAST8_T inverted = 0;
-#ifdef EBCDIC
-                    const PERL_UINT_FAST8_T max_permissible = 0xFF;
-#else
-                    const PERL_UINT_FAST8_T max_permissible = 0x7F;
-#endif
-                    if (invlist_highest(cp_list) > max_permissible) {
-                        _invlist_invert(cp_list);
-                        inverted = 1;
+                }
+                else if (  ! upper_latin1_only_utf8_matches
+                         || (   _invlist_len(upper_latin1_only_utf8_matches)
+                                                                          == 2
+                             && PL_fold_latin1[
+                               invlist_highest(upper_latin1_only_utf8_matches)]
+                             == start[0]))
+                {
+                    /* Here, the smallest character is non-ascii or there are
+                     * more than 2 code points matched by this node.  Also, we
+                     * either don't have /d UTF-8 dependent matches, or if we
+                     * do, they look like they could be a single character that
+                     * is the fold of the lowest one in the always-match list.
+                     * This test quickly excludes most of the false positives
+                     * when there are /d UTF-8 depdendent matches.  These are
+                     * like LATIN CAPITAL LETTER A WITH GRAVE matching LATIN
+                     * SMALL LETTER A WITH GRAVE iff the target string is
+                     * UTF-8.  (We don't have to worry above about exceeding
+                     * the array bounds of PL_fold_latin1[] because any code
+                     * point in 'upper_latin1_only_utf8_matches' is below 256.)
+                     *
+                     * EXACTFAA would apply only to pairs (hence exactly 2 code
+                     * points) in the ASCII range, so we can't use it here to
+                     * artificially restrict the fold domain, so we check if
+                     * the class does or does not match some EXACTFish node.
+                     * Further, if we aren't under /i, and and the folded-to
+                     * character is part of a multi-character fold, we can't do
+                     * this optimization, as the sequence around it could be
+                     * that multi-character fold, and we don't here know the
+                     * context, so we have to assume it is that multi-char
+                     * fold, to prevent potential bugs.
+                     *
+                     * To do the general case, we first find the fold of the
+                     * lowest code point (which may be higher than the lowest
+                     * one), then find everything that folds to it.  (The data
+                     * structure we have only maps from the folded code points,
+                     * so we have to do the earlier step.) */
+
+                    Size_t foldlen;
+                    U8 foldbuf[UTF8_MAXBYTES_CASE];
+                    UV folded = _to_uni_fold_flags(start[0],
+                                                        foldbuf, &foldlen, 0);
+                    unsigned int first_fold;
+                    const unsigned int * remaining_folds;
+                    Size_t folds_to_this_cp_count = _inverse_folds(
+                                                            folded,
+                                                            &first_fold,
+                                                            &remaining_folds);
+                    Size_t folds_count = folds_to_this_cp_count + 1;
+                    SV * fold_list = _new_invlist(folds_count);
+                    unsigned int i;
+
+                    /* If there are UTF-8 dependent matches, create a temporary
+                     * list of what this node matches, including them. */
+                    SV * all_cp_list = NULL;
+                    SV ** use_this_list = &cp_list;
+
+                    if (upper_latin1_only_utf8_matches) {
+                        all_cp_list = _new_invlist(0);
+                        use_this_list = &all_cp_list;
+                        _invlist_union(cp_list,
+                                       upper_latin1_only_utf8_matches,
+                                       use_this_list);
                      }
  
-                    if (invlist_highest(cp_list) <= max_permissible) {
-                    Size_t cp_count = 0;
-                    bool first_time = TRUE;
-                    unsigned int lowest_cp = 0xFF;
-                    U8 bits_differing = 0;
-
-                    /* Only needed on EBCDIC, as there, variants and non- are mixed
-                     * together.  Could #ifdef it out on ASCII, but probably the
-                     * compiler will optimize it out */
-                    bool has_variant = FALSE;
-
-                    /* Go through the bytes and find the bit positions that differ */
-                    invlist_iterinit(cp_list);
-                    while (invlist_iternext(cp_list, &start, &end)) {
-                        unsigned int i = start;
+                    /* Having gotten everything that participates in the fold
+                     * containing the lowest code point, we turn that into an
+                     * inversion list, making sure everything is included. */
+                    fold_list = add_cp_to_invlist(fold_list, start[0]);
+                    fold_list = add_cp_to_invlist(fold_list, folded);
+                    if (folds_to_this_cp_count > 0) {
+                        fold_list = add_cp_to_invlist(fold_list, first_fold);
+                        for (i = 0; i + 1 < folds_to_this_cp_count; i++) {
+                            fold_list = add_cp_to_invlist(fold_list,
+                                                        remaining_folds[i]);
+                        }
+                    }
  
-                        cp_count += end - start + 1;
+                    /* If the fold list is identical to what's in this ANYOF
+                     * node, the node can be represented by an EXACTFish one
+                     * instead */
+                    if (_invlistEQ(*use_this_list, fold_list,
+                                   0 /* Don't complement */ )
+                    ) {
  
-                        if (first_time) {
-                            if (! UVCHR_IS_INVARIANT(i)) {
-                                has_variant = TRUE;
-                                continue;
+                        /* But, we have to be careful, as mentioned above.
+                         * Just the right sequence of characters could match
+                         * this if it is part of a multi-character fold.  That
+                         * IS what we want if we are under /i.  But it ISN'T
+                         * what we want if not under /i, as it could match when
+                         * it shouldn't.  So, when we aren't under /i and this
+                         * character participates in a multi-char fold, we
+                         * don't optimize into an EXACTFish node.  So, for each
+                         * case below we have to check if we are folding
+                         * and if not, if it is not part of a multi-char fold.
+                         * */
+                        if (start[0] > 255) {    /* Highish code point */
+                            if (FOLD || ! _invlist_contains_cp(
+                                            PL_InMultiCharFold, folded))
+                            {
+                                op = (LOC)
+                                     ? EXACTFLU8
+                                     : (ASCII_FOLD_RESTRICTED)
+                                       ? EXACTFAA
+                                       : EXACTFU_ONLY8;
+                                value = folded;
                              }
-
-                            first_time = FALSE;
-                            lowest_cp = start;
-
-                            i++;
+                        }   /* Below, the lowest code point < 256 */
+                        else if (    FOLD
+                                 &&  folded == 's'
+                                 &&  DEPENDS_SEMANTICS)
+                        {   /* An EXACTF node containing a single character
+                                's', can be an EXACTFU if it doesn't get
+                                joined with an adjacent 's' */
+                            op = EXACTFU_S_EDGE;
+                            value = folded;
                          }
+                        else if (    FOLD
+                                || ! HAS_NONLATIN1_FOLD_CLOSURE(start[0]))
+                        {
+                            if (upper_latin1_only_utf8_matches) {
+                                op = EXACTF;
  
-                        /* Find the bit positions that differ from the lowest
-                         * code point in the node.  Keep track of all such
-                         * positions by OR'ing */
-                        for (; i <= end; i++) {
-                            if (! UVCHR_IS_INVARIANT(i)) {
-                                has_variant = TRUE;
-                                continue;
+                                /* We can't use the fold, as that only matches
+                                 * under UTF-8 */
+                                value = start[0];
+                            }
+                            else if (     UNLIKELY(start[0] == MICRO_SIGN)
+                                     && ! UTF)
+                            {   /* EXACTFUP is a special node for this
+                                   character */
+                                op = (ASCII_FOLD_RESTRICTED)
+                                     ? EXACTFAA
+                                     : EXACTFUP;
+                                value = MICRO_SIGN;
+                            }
+                            else if (     ASCII_FOLD_RESTRICTED
+                                     && ! isASCII(start[0]))
+                            {   /* For ASCII under /iaa, we can use EXACTFU
+                                   below */
+                                op = EXACTFAA;
+                                value = folded;
+                            }
+                            else {
+                                op = EXACTFU;
+                                value = folded;
                              }
-
-                            bits_differing  |= i ^ lowest_cp;
                          }
                      }
-                    invlist_iterfinish(cp_list);
-
-                    /* At the end of the loop, we count how many bits differ
-                     * from the bits in lowest code point, call the count 'd'.
-                     * If the set we found contains 2**d elements, it is the
-                     * closure of all code points that differ only in those bit
-                     * positions.  To convince yourself of that, first note
-                     * that the number in the closure must be a power of 2,
-                     * which we test for.  The only way we could have that
-                     * count and it be some differing set, is if we got some
-                     * code points that don't differ from the lowest code point
-                     * in any position, but do differ from each other in some
-                     * other position.  That means one code point has a 1 in
-                     * that position, and another has a 0.  But that would mean
-                     * that one of them differs from the lowest code point in
-                     * that position, which possibility we've already excluded.
-                     * */
-                    if ( ! has_variant
-                        && cp_count == 1U << PL_bitcount[bits_differing])
-                    {
-                        assert(inverted || cp_count > 1);
-                        op = ANYOFM + inverted;;
  
-                        /* We need to make the bits that differ be 0's */
-                        ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS
-                                                         */
+                    SvREFCNT_dec_NN(fold_list);
+                    SvREFCNT_dec(all_cp_list);
+                }
+            }
+
+            if (op != END) {
+
+                /* Here, we have calculated what EXACTish node we would use.
+                 * But we don't use it if it would require converting the
+                 * pattern to UTF-8, unless not using it could cause us to miss
+                 * some folds (hence be buggy) */
  
-                        /* The argument is the lowest code point */
-                        anode_arg = lowest_cp;
-                        *flagp |= HASWIDTH|SIMPLE;
+                if (! UTF && value > 255) {
+                    SV * in_multis = NULL;
+
+                    assert(FOLD);
+
+                    /* If there is no code point that is part of a multi-char
+                     * fold, then there aren't any matches, so we don't do this
+                     * optimization.  Otherwise, it could match depending on
+                     * the context around us, so we do upgrade */
+                    _invlist_intersection(PL_InMultiCharFold, cp_list, &in_multis);
+                    if (UNLIKELY(_invlist_len(in_multis) != 0)) {
+                        REQUIRE_UTF8(flagp);
+                    }
+                    else {
+                        op = END;
                      }
                  }
-                if (inverted) {
-                    _invlist_invert(cp_list);
+
+                if (op != END) {
+                    U8 len = (UTF) ? UVCHR_SKIP(value) : 1;
+
+                    ret = regnode_guts(pRExC_state, op, len, "exact");
+                    FILL_NODE(ret, op);
+                    RExC_emit += 1 + STR_SZ(len);
+                    STR_LEN(REGNODE_p(ret)) = len;
+                    if (len == 1) {
+                        *STRING(REGNODE_p(ret)) = value;
+                    }
+                    else {
+                        uvchr_to_utf8((U8 *) STRING(REGNODE_p(ret)), value);
+                    }
+                    goto not_anyof;
                  }
              }
-            }
          }
  
-        if (op != END) {
-            if (regarglen[op]) {
-                ret = reganode(pRExC_state, op, anode_arg);
-            } else {
-                ret = reg_node(pRExC_state, op);
+        if (! has_runtime_dependency) {
+
+            /* See if this can be turned into an ANYOFM node.  Think about the
+             * bit patterns in two different bytes.  In some positions, the
+             * bits in each will be 1; and in other positions both will be 0;
+             * and in some positions the bit will be 1 in one byte, and 0 in
+             * the other.  Let 'n' be the number of positions where the bits
+             * differ.  We create a mask which has exactly 'n' 0 bits, each in
+             * a position where the two bytes differ.  Now take the set of all
+             * bytes that when ANDed with the mask yield the same result.  That
+             * set has 2**n elements, and is representable by just two 8 bit
+             * numbers: the result and the mask.  Importantly, matching the set
+             * can be vectorized by creating a word full of the result bytes,
+             * and a word full of the mask bytes, yielding a significant speed
+             * up.  Here, see if this node matches such a set.  As a concrete
+             * example consider [01], and the byte representing '0' which is
+             * 0x30 on ASCII machines.  It has the bits 0011 0000.  Take the
+             * mask 1111 1110.  If we AND 0x31 and 0x30 with that mask we get
+             * 0x30.  Any other bytes ANDed yield something else.  So [01],
+             * which is a common usage, is optimizable into ANYOFM, and can
+             * benefit from the speed up.  We can only do this on UTF-8
+             * invariant bytes, because they have the same bit patterns under
+             * UTF-8 as not. */
+            PERL_UINT_FAST8_T inverted = 0;
+#ifdef EBCDIC
+            const PERL_UINT_FAST8_T max_permissible = 0xFF;
+#else
+            const PERL_UINT_FAST8_T max_permissible = 0x7F;
+#endif
+            /* If doesn't fit the criteria for ANYOFM, invert and try again.
+             * If that works we will instead later generate an NANYOFM, and
+             * invert back when through */
+            if (invlist_highest(cp_list) > max_permissible) {
+                _invlist_invert(cp_list);
+                inverted = 1;
              }
-            Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
-                                                   RExC_parse - orig_parse);;
  
-            if (PL_regkind[op] == EXACT) {
-                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value,
-                                           TRUE /* downgradable to EXACT */
-                                          );
-            }
-            else if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
-                FLAGS(REGNODE_p(ret)) = posix_class;
-            }
-            else if (PL_regkind[op] == ANYOFM) {
-                FLAGS(REGNODE_p(ret)) = ANYOFM_mask;
-            }
+            if (invlist_highest(cp_list) <= max_permissible) {
+                UV this_start, this_end;
+                UV lowest_cp = UV_MAX;  /* inited to suppress compiler warn */
+                U8 bits_differing = 0;
+                Size_t full_cp_count = 0;
+                bool first_time = TRUE;
  
-            SvREFCNT_dec_NN(cp_list);
-            return ret;
-        }
-    }   /* End of seeing if can optimize it into a different node */
+                /* Go through the bytes and find the bit positions that differ
+                 * */
+                invlist_iterinit(cp_list);
+                while (invlist_iternext(cp_list, &this_start, &this_end)) {
+                    unsigned int i = this_start;
  
-    /* It's going to be an ANYOF node. */
-    op = (use_anyofd)
-         ? ANYOFD
-         : ((posixl)
-            ? ANYOFPOSIXL
-            : ((LOC)
-               ? ANYOFL
-               : ANYOF));
-    ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
-    FILL_NODE(ret, op);        /* We set the argument later */
-    RExC_emit += 1 + regarglen[op];
-    ANYOF_FLAGS(REGNODE_p(ret)) = anyof_flags;
+                    if (first_time) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            goto done_anyofm;
+                        }
  
-    /* Here, <cp_list> contains all the code points we can determine at
-     * compile time that match under all conditions.  Go through it, and
+                        first_time = FALSE;
+                        lowest_cp = this_start;
+
+                        /* We have set up the code point to compare with.
+                         * Don't compare it with itself */
+                        i++;
+                    }
+
+                    /* Find the bit positions that differ from the lowest code
+                     * point in the node.  Keep track of all such positions by
+                     * OR'ing */
+                    for (; i <= this_end; i++) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            goto done_anyofm;
+                        }
+
+                        bits_differing  |= i ^ lowest_cp;
+                    }
+
+                    full_cp_count += this_end - this_start + 1;
+                }
+                invlist_iterfinish(cp_list);
+
+                /* At the end of the loop, we count how many bits differ from
+                 * the bits in lowest code point, call the count 'd'.  If the
+                 * set we found contains 2**d elements, it is the closure of
+                 * all code points that differ only in those bit positions.  To
+                 * convince yourself of that, first note that the number in the
+                 * closure must be a power of 2, which we test for.  The only
+                 * way we could have that count and it be some differing set,
+                 * is if we got some code points that don't differ from the
+                 * lowest code point in any position, but do differ from each
+                 * other in some other position.  That means one code point has
+                 * a 1 in that position, and another has a 0.  But that would
+                 * mean that one of them differs from the lowest code point in
+                 * that position, which possibility we've already excluded.  */
+                if (  (inverted || full_cp_count > 1)
+                    && full_cp_count == 1U << PL_bitcount[bits_differing])
+                {
+                    U8 ANYOFM_mask;
+
+                    op = ANYOFM + inverted;;
+
+                    /* We need to make the bits that differ be 0's */
+                    ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
+
+                    /* The argument is the lowest code point */
+                    ret = reganode(pRExC_state, op, lowest_cp);
+                    FLAGS(REGNODE_p(ret)) = ANYOFM_mask;
+                }
+            }
+          done_anyofm:
+
+            if (inverted) {
+                _invlist_invert(cp_list);
+            }
+
+            if (op != END) {
+                goto not_anyof;
+            }
+        }
+
+        if (! (anyof_flags & ANYOF_LOCALE_FLAGS)) {
+            PERL_UINT_FAST8_T type;
+            SV * intersection = NULL;
+            SV* d_invlist = NULL;
+
+            /* See if this matches any of the POSIX classes.  The POSIXA and
+             * POSIXD ones are about the same speed as ANYOF ops, but take less
+             * room; the ones that have above-Latin1 code point matches are
+             * somewhat faster than ANYOF.  */
+
+            for (type = POSIXA; type >= POSIXD; type--) {
+                int posix_class;
+
+                if (type == POSIXL) {   /* But not /l posix classes */
+                    continue;
+                }
+
+                for (posix_class = 0;
+                     posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
+                     posix_class++)
+                {
+                    SV** our_code_points = &cp_list;
+                    SV** official_code_points;
+                    int try_inverted;
+
+                    if (type == POSIXA) {
+                        official_code_points = &PL_Posix_ptrs[posix_class];
+                    }
+                    else {
+                        official_code_points = &PL_XPosix_ptrs[posix_class];
+                    }
+
+                    /* Skip non-existent classes of this type.  e.g. \v only
+                     * has an entry in PL_XPosix_ptrs */
+                    if (! *official_code_points) {
+                        continue;
+                    }
+
+                    /* Try both the regular class, and its inversion */
+                    for (try_inverted = 0; try_inverted < 2; try_inverted++) {
+                        bool this_inverted = invert ^ try_inverted;
+
+                        if (type != POSIXD) {
+
+                            /* This class that isn't /d can't match if we have
+                             * /d dependencies */
+                            if (has_runtime_dependency
+                                                    & HAS_D_RUNTIME_DEPENDENCY)
+                            {
+                                continue;
+                            }
+                        }
+                        else /* is /d */ if (! this_inverted) {
+
+                            /* /d classes don't match anything non-ASCII below
+                             * 256 unconditionally (which cp_list contains) */
+                            _invlist_intersection(cp_list, PL_UpperLatin1,
+                                                           &intersection);
+                            if (_invlist_len(intersection) != 0) {
+                                continue;
+                            }
+
+                            SvREFCNT_dec(d_invlist);
+                            d_invlist = invlist_clone(cp_list, NULL);
+
+                            /* But under UTF-8 it turns into using /u rules.
+                             * Add the things it matches under these conditions
+                             * so that we check below that these are identical
+                             * to what the tested class should match */
+                            if (upper_latin1_only_utf8_matches) {
+                                _invlist_union(
+                                            d_invlist,
+                                            upper_latin1_only_utf8_matches,
+                                            &d_invlist);
+                            }
+                            our_code_points = &d_invlist;
+                        }
+                        else {  /* POSIXD, inverted.  If this doesn't have this
+                                   flag set, it isn't /d. */
+                            if (! (anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+                            {
+                                continue;
+                            }
+                            our_code_points = &cp_list;
+                        }
+
+                        /* Here, have weeded out some things.  We want to see
+                         * if the list of characters this node contains
+                         * ('*our_code_points') precisely matches those of the
+                         * class we are currently checking against
+                         * ('*official_code_points'). */
+                        if (_invlistEQ(*our_code_points,
+                                       *official_code_points,
+                                       try_inverted))
+                        {
+                            /* Here, they precisely match.  Optimize this ANYOF
+                             * node into its equivalent POSIX one of the
+                             * correct type, possibly inverted */
+                            ret = reg_node(pRExC_state, (try_inverted)
+                                                        ? type + NPOSIXA
+                                                                - POSIXA
+                                                        : type);
+                            FLAGS(REGNODE_p(ret)) = posix_class;
+                            SvREFCNT_dec(d_invlist);
+                            SvREFCNT_dec(intersection);
+                            goto not_anyof;
+                        }
+                    }
+                }
+            }
+            SvREFCNT_dec(d_invlist);
+            SvREFCNT_dec(intersection);
+        }
+
+        /* If didn't find an optimization and there is no need for a
+        * bitmap, optimize to indicate that */
+        if (     start[0] >= NUM_ANYOF_CODE_POINTS
+            && ! LOC
+            && ! upper_latin1_only_utf8_matches)
+        {
+            op = ANYOFH;
+        }
+    }   /* End of seeing if can optimize it into a different node */
+
+  is_anyof: /* It's going to be an ANYOF node. */
+    if (op != ANYOFH) {
+        op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
+             ? ANYOFD
+             : ((posixl)
+                ? ANYOFPOSIXL
+                : ((LOC)
+                   ? ANYOFL
+                   : ANYOF));
+    }
+
+    ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
+    FILL_NODE(ret, op);        /* We set the argument later */
+    RExC_emit += 1 + regarglen[op];
+    ANYOF_FLAGS(REGNODE_p(ret)) = anyof_flags;
+
+    /* Here, <cp_list> contains all the code points we can determine at
+     * compile time that match under all conditions.  Go through it, and
       * for things that belong in the bitmap, put them there, and delete from
       * <cp_list>.  While we are at it, see if everything above 255 is in the
       * list, and if so, set a flag to speed up execution */
@@ -18474,44 +18997,35 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      /* Here, the bitmap has been populated with all the Latin1 code points that
       * always match.  Can now add to the overall list those that match only
-     * when the target string is UTF-8 (<has_upper_latin1_only_utf8_matches>).
+     * when the target string is UTF-8 (<upper_latin1_only_utf8_matches>).
       * */
-    if (has_upper_latin1_only_utf8_matches) {
+    if (upper_latin1_only_utf8_matches) {
         if (cp_list) {
             _invlist_union(cp_list,
-                           has_upper_latin1_only_utf8_matches,
+                           upper_latin1_only_utf8_matches,
                             &cp_list);
-           SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+           SvREFCNT_dec_NN(upper_latin1_only_utf8_matches);
         }
         else {
-           cp_list = has_upper_latin1_only_utf8_matches;
+           cp_list = upper_latin1_only_utf8_matches;
         }
          ANYOF_FLAGS(REGNODE_p(ret)) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
      }
  
-    /* If there is a swash and more than one element, we can't use the swash in
-     * the optimization below. */
-    if (swash && element_count > 1) {
-       SvREFCNT_dec_NN(swash);
-       swash = NULL;
-    }
-
-    /* Note that the optimization of using 'swash' if it is the only thing in
-     * the class doesn't have us change swash at all, so it can include things
-     * that are also in the bitmap; otherwise we have purposely deleted that
-     * duplicate information */
      set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list,
                    (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
                     ? listsv : NULL,
-                  only_utf8_locale_list,
-                  swash, has_user_defined_property);
+                  only_utf8_locale_list);
+    return ret;
  
-    *flagp |= HASWIDTH|SIMPLE;
+  not_anyof:
  
-    if (ANYOF_FLAGS(REGNODE_p(ret)) & ANYOF_LOCALE_FLAGS) {
-        RExC_contains_locale = 1;
-    }
+    /* Here, the node is getting optimized into something that's not an ANYOF
+     * one.  Finish up. */
  
+    Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
+                                           RExC_parse - orig_parse);;
+    SvREFCNT_dec(cp_list);;
      return ret;
  }
  
@@ -18522,31 +19036,21 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
                  regnode* const node,
                  SV* const cp_list,
                  SV* const runtime_defns,
-                SV* const only_utf8_locale_list,
-                SV* const swash,
-                const bool has_user_defined_property)
+                SV* const only_utf8_locale_list)
  {
      /* Sets the arg field of an ANYOF-type node 'node', using information about
       * the node passed-in.  If there is nothing outside the node's bitmap, the
       * arg is set to ANYOF_ONLY_HAS_BITMAP.  Otherwise, it sets the argument to
       * the count returned by add_data(), having allocated and stored an array,
-     * av, that that count references, as follows:
-     *  av[0] stores the character class description in its textual form.
-     *        This is used later (regexec.c:Perl_regclass_swash()) to
-     *        initialize the appropriate swash, and is also useful for dumping
-     *        the regnode.  This is set to &PL_sv_undef if the textual
-     *        description is not needed at run-time (as happens if the other
-     *        elements completely define the class)
-     *  av[1] if &PL_sv_undef, is a placeholder to later contain the swash
-     *        computed from av[0].  But if no further computation need be done,
-     *        the swash is stored here now (and av[0] is &PL_sv_undef).
-     *  av[2] stores the inversion list of code points that match only if the
-     *        current locale is UTF-8
-     *  av[3] stores the cp_list inversion list for use in addition or instead
-     *        of av[0]; used only if cp_list exists and av[1] is &PL_sv_undef.
-     *        (Otherwise everything needed is already in av[0] and av[1])
-     *  av[4] is set if any component of the class is from a user-defined
-     *        property; used only if av[3] exists */
+     * av, as follows:
+     *
+     *  av[0] stores the inversion list defining this class as far as known at
+     *        this time, or PL_sv_undef if nothing definite is now known.
+     *  av[1] stores the inversion list of code points that match only if the
+     *        current locale is UTF-8, or if none, PL_sv_undef if there is an
+     *        av[2], or no entry otherwise.
+     *  av[2] stores the list of user-defined properties whose subroutine
+     *        definitions aren't known at this time, or no entry if none. */
  
      UV n;
  
@@ -18561,26 +19065,16 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
         AV * const av = newAV();
         SV *rv;
  
-       av_store(av, 0, (runtime_defns)
-                       ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
-       if (swash) {
-           assert(cp_list);
-           av_store(av, 1, swash);
-           SvREFCNT_dec_NN(cp_list);
-       }
-       else {
-           av_store(av, 1, &PL_sv_undef);
-           if (cp_list) {
-               av_store(av, 3, cp_list);
-               av_store(av, 4, newSVuv(has_user_defined_property));
-           }
-       }
+        if (cp_list) {
+            av_store(av, INVLIST_INDEX, cp_list);
+        }
  
          if (only_utf8_locale_list) {
-           av_store(av, 2, only_utf8_locale_list);
+            av_store(av, ONLY_LOCALE_MATCHES_INDEX, only_utf8_locale_list);
          }
-        else {
-           av_store(av, 2, &PL_sv_undef);
+
+        if (runtime_defns) {
+            av_store(av, DEFERRED_USER_DEFINED_INDEX, SvREFCNT_inc(runtime_defns));
          }
  
         rv = newRV_noinc(MUTABLE_SV(av));
@@ -18601,14 +19095,15 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
  {
      /* For internal core use only.
-     * Returns the swash for the input 'node' in the regex 'prog'.
-     * If <doinit> is 'true', will attempt to create the swash if not already
-     *   done.
+     * Returns the inversion list for the input 'node' in the regex 'prog'.
+     * If <doinit> is 'true', will attempt to create the inversion list if not
+     *    already done.
       * If <listsvp> is non-null, will return the printable contents of the
-     *    swash.  This can be used to get debugging information even before the
-     *    swash exists, by calling this function with 'doinit' set to false, in
-     *    which case the components that will be used to eventually create the
-     *    swash are returned  (in a printable form).
+     *    property definition.  This can be used to get debugging information
+     *    even before the inversion list exists, by calling this function with
+     *    'doinit' set to false, in which case the components that will be used
+     *    to eventually create the inversion list are returned  (in a printable
+     *    form).
       * If <only_utf8_locale_ptr> is not NULL, it is where this routine is to
       *    store an inversion list of code points that should match only if the
       *    execution-time locale is a UTF-8 one.
@@ -18616,18 +19111,17 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
       *    inversion list of the code points that would be instead returned in
       *    <listsvp> if this were NULL.  Thus, what gets output in <listsvp>
       *    when this parameter is used, is just the non-code point data that
-     *    will go into creating the swash.  This currently should be just
+     *    will go into creating the inversion list.  This currently should be just
       *    user-defined properties whose definitions were not known at compile
       *    time.  Using this parameter allows for easier manipulation of the
-     *    swash's data by the caller.  It is illegal to call this function with
-     *    this parameter set, but not <listsvp>
+     *    inversion list's data by the caller.  It is illegal to call this
+     *    function with this parameter set, but not <listsvp>
       *
       * Tied intimately to how S_set_ANYOF_arg sets up the data structure.  Note
-     * that, in spite of this function's name, the swash it returns may include
-     * the bitmap data as well */
+     * that, in spite of this function's name, the inversion list it returns
+     * may include the bitmap data as well */
  
-    SV *sw  = NULL;
-    SV *si  = NULL;         /* Input swash initialization string */
+    SV *si  = NULL;         /* Input initialization string */
      SV* invlist = NULL;
  
      RXi_GET_DECL(prog, progi);
@@ -18643,69 +19137,72 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
             SV * const rv = MUTABLE_SV(data->data[n]);
             AV * const av = MUTABLE_AV(SvRV(rv));
             SV **const ary = AvARRAY(av);
-           U8 swash_init_flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
  
-           si = *ary;  /* ary[0] = the string to initialize the swash with */
+            invlist = ary[INVLIST_INDEX];
  
-            if (av_tindex_skip_len_mg(av) >= 2) {
-                if (only_utf8_locale_ptr
-                    && ary[2]
-                    && ary[2] != &PL_sv_undef)
-                {
-                    *only_utf8_locale_ptr = ary[2];
-                }
-                else {
-                    assert(only_utf8_locale_ptr);
-                    *only_utf8_locale_ptr = NULL;
-                }
-
-                /* Elements 3 and 4 are either both present or both absent. [3]
-                 * is any inversion list generated at compile time; [4]
-                 * indicates if that inversion list has any user-defined
-                 * properties in it. */
-                if (av_tindex_skip_len_mg(av) >= 3) {
-                    invlist = ary[3];
-                    if (SvUV(ary[4])) {
-                        swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
+            if (av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX) {
+                *only_utf8_locale_ptr = ary[ONLY_LOCALE_MATCHES_INDEX];
+            }
+
+            if (av_tindex_skip_len_mg(av) >= DEFERRED_USER_DEFINED_INDEX) {
+                si = ary[DEFERRED_USER_DEFINED_INDEX];
+            }
+
+           if (doinit && (si || invlist)) {
+                if (si) {
+                    bool user_defined;
+                    SV * msg = newSVpvs_flags("", SVs_TEMP);
+
+                    SV * prop_definition = handle_user_defined_property(
+                            "", 0, FALSE,   /* There is no \p{}, \P{} */
+                            SvPVX_const(si)[1] - '0',   /* /i or not has been
+                                                           stored here for just
+                                                           this occasion */
+                            TRUE,           /* run time */
+                            si,             /* The property definition  */
+                            &user_defined,
+                            msg,
+                            0               /* base level call */
+                           );
+
+                    if (SvCUR(msg)) {
+                        assert(prop_definition == NULL);
+
+                        Perl_croak(aTHX_ "%" UTF8f,
+                                UTF8fARG(SvUTF8(msg), SvCUR(msg), SvPVX(msg)));
                      }
-                }
-                else {
-                    invlist = NULL;
-                }
-           }
  
-           /* Element [1] is reserved for the set-up swash.  If already there,
-            * return it; if not, create it and store it there */
-           if (ary[1] && SvROK(ary[1])) {
-               sw = ary[1];
-           }
-           else if (doinit && ((si && si != &PL_sv_undef)
-                                 || (invlist && invlist != &PL_sv_undef))) {
-               assert(si);
-               sw = _core_swash_init("utf8", /* the utf8 package */
-                                     "", /* nameless */
-                                     si,
-                                     1, /* binary */
-                                     0, /* not from tr/// */
-                                     invlist,
-                                     &swash_init_flags);
-               (void)av_store(av, 1, sw);
+                    if (invlist) {
+                        _invlist_union(invlist, prop_definition, &invlist);
+                        SvREFCNT_dec_NN(prop_definition);
+                    }
+                    else {
+                        invlist = prop_definition;
+                    }
+
+                    STATIC_ASSERT_STMT(ONLY_LOCALE_MATCHES_INDEX == 1 + INVLIST_INDEX);
+                    STATIC_ASSERT_STMT(DEFERRED_USER_DEFINED_INDEX == 1 + ONLY_LOCALE_MATCHES_INDEX);
+
+                    av_store(av, INVLIST_INDEX, invlist);
+                    av_fill(av, (ary[ONLY_LOCALE_MATCHES_INDEX])
+                                 ? ONLY_LOCALE_MATCHES_INDEX:
+                                 INVLIST_INDEX);
+                    si = NULL;
+                }
             }
         }
      }
  
-    /* If requested, return a printable version of what this swash matches */
+    /* If requested, return a printable version of what this ANYOF node matches
+     * */
      if (listsvp) {
         SV* matches_string = NULL;
  
-        /* The swash should be used, if possible, to get the data, as it
-         * contains the resolved data.  But this function can be called at
-         * compile-time, before everything gets resolved, in which case we
-         * return the currently best available information, which is the string
-         * that will eventually be used to do that resolving, 'si' */
-       if ((! sw || (invlist = _get_swash_invlist(sw)) == NULL)
-            && (si && si != &PL_sv_undef))
-        {
+        /* This function can be called at compile-time, before everything gets
+         * resolved, in which case we return the currently best available
+         * information, which is the string that will eventually be used to do
+         * that resolving, 'si' */
+       if (si) {
              /* Here, we only have 'si' (and possibly some passed-in data in
               * 'invlist', which is handled below)  If the caller only wants
               * 'si', use that.  */
@@ -18798,12 +19295,10 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                  if (SvCUR(matches_string)) {  /* Get rid of trailing blank */
                      SvCUR_set(matches_string, SvCUR(matches_string) - 1);
                  }
-            } /* end of has an 'si' but no swash */
+            } /* end of has an 'si' */
         }
  
-        /* If we have a swash in place, its equivalent inversion list was above
-         * placed into 'invlist'.  If not, this variable may contain a stored
-         * inversion list which is information beyond what is in 'si' */
+        /* Add the stuff that's already known */
          if (invlist) {
  
              /* Again, if the caller doesn't want the output inversion list, put
@@ -18827,7 +19322,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
         *listsvp = matches_string;
      }
  
-    return sw;
+    return invlist;
  }
  #endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */
  
@@ -18950,6 +19445,9 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
  STATIC void
  S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
  {
+    /* 'size' is the delta to add or subtract from the current memory allocated
+     * to the regex engine being constructed */
+
      PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE;
  
      RExC_size += size;
@@ -19107,7 +19605,11 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
      src = REGNODE_p(RExC_emit);
      RExC_emit += size;
      dst = REGNODE_p(RExC_emit);
-    if (RExC_open_parens) {
+
+    /* If we are in a "count the parentheses" pass, the numbers are unreliable,
+     * and [perl #133871] shows this can lead to problems, so skip this
+     * realignment of parens until a later pass when they are reliable */
+    if (! IN_PARENS_PASS && RExC_open_parens) {
          int paren;
          /*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/
          /* remember that RExC_npar is rex->nparens + 1,
@@ -19180,10 +19682,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
  }
  
  /*
-- regtail - set the next-pointer at the end of a node chain of p to val.
+- regtail - set the next-pointer at the end of a node chain of p to val.  If
+            that value won't fit in the space available, instead returns FALSE.
+            (Except asserts if we can't fit in the largest space the regex
+            engine is designed for.)
  - SEE ALSO: regtail_study
  */
-STATIC void
+STATIC bool
  S_regtail(pTHX_ RExC_state_t * pRExC_state,
                  const regnode_offset p,
                  const regnode_offset val,
@@ -19205,7 +19710,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
              DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
              regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state);
              Perl_re_printf( aTHX_  "~ %s (%d) %s %s\n",
-                SvPV_nolen_const(RExC_mysv), REG_NODE_NUM(REGNODE_p(scan)),
+                SvPV_nolen_const(RExC_mysv), scan,
                      (temp == NULL ? "->" : ""),
                      (temp == NULL ? PL_reg_name[OP(REGNODE_p(val))] : "")
              );
@@ -19216,11 +19721,21 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
      }
  
      if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+        assert(val - scan <= U32_MAX);
          ARG_SET(REGNODE_p(scan), val - scan);
      }
      else {
+        if (val - scan > U16_MAX) {
+            /* Since not all callers check the return value, populate this with
+             * something that won't loop and will likely lead to a crash if
+             * execution continues */
+            NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+            return FALSE;
+        }
          NEXT_OFF(REGNODE_p(scan)) = val - scan;
      }
+
+    return TRUE;
  }
  
  #ifdef DEBUGGING
@@ -19237,10 +19752,14 @@ that it is purely analytical.
  Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
  to control which is which.
  
+This used to return a value that was ignored.  It was a problem that it is
+#ifdef'd to be another function that didn't return a value.  khw has changed it
+so both currently return a pass/fail return.
+
  */
  /* TODO: All four parms should be const */
  
-STATIC U8
+STATIC bool
  S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
                        const regnode_offset val, U32 depth)
  {
@@ -19264,7 +19783,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
             bool unfolded_multi_char;   /* Unexamined in this routine */
              if (join_exact(pRExC_state, scan, &min,
                             &unfolded_multi_char, 1, REGNODE_p(val), depth+1))
-                return EXACT;
+                return TRUE; /* Was return EXACT */
         }
  #endif
          if ( exact ) {
@@ -19273,12 +19792,13 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
                  case EXACT_ONLY8:
                  case EXACTL:
                  case EXACTF:
+                case EXACTFU_S_EDGE:
                  case EXACTFAA_NO_TRIE:
                  case EXACTFAA:
                  case EXACTFU:
                  case EXACTFU_ONLY8:
                  case EXACTFLU8:
-                case EXACTFU_SS:
+                case EXACTFUP:
                  case EXACTFL:
                          if( exact == PSEUDO )
                              exact= OP(REGNODE_p(scan));
@@ -19295,7 +19815,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
              regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state);
              Perl_re_printf( aTHX_  "~ %s (%d) -> %s\n",
                  SvPV_nolen_const(RExC_mysv),
-                REG_NODE_NUM(REGNODE_p(scan)),
+                scan,
                  PL_reg_name[exact]);
          });
         if (temp == NULL)
@@ -19308,18 +19828,23 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
          Perl_re_printf( aTHX_
                        "~ attach to %s (%" IVdf ") offset to %" IVdf "\n",
                       SvPV_nolen_const(RExC_mysv),
-                     (IV)REG_NODE_NUM(REGNODE_p(val)),
+                     (IV)val,
                       (IV)(val - scan)
          );
      });
      if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+        assert(val - scan <= U32_MAX);
         ARG_SET(REGNODE_p(scan), val - scan);
      }
      else {
+        if (val - scan > U16_MAX) {
+            NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+            return FALSE;
+        }
         NEXT_OFF(REGNODE_p(scan)) = val - scan;
      }
  
-    return exact;
+    return TRUE; /* Was 'return exact' */
  }
  #endif
  
@@ -19581,6 +20106,7 @@ void
  Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
  {
  #ifdef DEBUGGING
+    dVAR;
      int k;
      RXi_GET_DECL(prog, progi);
      GET_RE_DEBUG_FLAGS_DECL;
@@ -19786,42 +20312,46 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* Ready to start outputting.  First, the initial left bracket */
         Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
  
-        /* Then all the things that could fit in the bitmap */
-        do_sep = put_charclass_bitmap_innards(sv,
-                                              ANYOF_BITMAP(o),
-                                              bitmap_range_not_in_bitmap,
-                                              only_utf8_locale_invlist,
-                                              o,
-
-                                              /* Can't try inverting for a
-                                               * better display if there are
-                                               * things that haven't been
-                                               * resolved */
-                                              unresolved != NULL);
-        SvREFCNT_dec(bitmap_range_not_in_bitmap);
-
-        /* If there are user-defined properties which haven't been defined yet,
-         * output them.  If the result is not to be inverted, it is clearest to
-         * output them in a separate [] from the bitmap range stuff.  If the
-         * result is to be complemented, we have to show everything in one [],
-         * as the inversion applies to the whole thing.  Use {braces} to
-         * separate them from anything in the bitmap and anything above the
-         * bitmap. */
-        if (unresolved) {
-            if (inverted) {
-                if (! do_sep) { /* If didn't output anything in the bitmap */
-                    sv_catpvs(sv, "^");
+        if (OP(o) != ANYOFH) {
+            /* Then all the things that could fit in the bitmap */
+            do_sep = put_charclass_bitmap_innards(sv,
+                                                  ANYOF_BITMAP(o),
+                                                  bitmap_range_not_in_bitmap,
+                                                  only_utf8_locale_invlist,
+                                                  o,
+
+                                                  /* Can't try inverting for a
+                                                   * better display if there
+                                                   * are things that haven't
+                                                   * been resolved */
+                                                  unresolved != NULL);
+            SvREFCNT_dec(bitmap_range_not_in_bitmap);
+
+            /* If there are user-defined properties which haven't been defined
+             * yet, output them.  If the result is not to be inverted, it is
+             * clearest to output them in a separate [] from the bitmap range
+             * stuff.  If the result is to be complemented, we have to show
+             * everything in one [], as the inversion applies to the whole
+             * thing.  Use {braces} to separate them from anything in the
+             * bitmap and anything above the bitmap. */
+            if (unresolved) {
+                if (inverted) {
+                    if (! do_sep) { /* If didn't output anything in the bitmap
+                                     */
+                        sv_catpvs(sv, "^");
+                    }
+                    sv_catpvs(sv, "{");
                  }
-                sv_catpvs(sv, "{");
-            }
-            else if (do_sep) {
-                Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1], PL_colors[0]);
-            }
-            sv_catsv(sv, unresolved);
-            if (inverted) {
-                sv_catpvs(sv, "}");
+                else if (do_sep) {
+                    Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1],
+                                                      PL_colors[0]);
+                }
+                sv_catsv(sv, unresolved);
+                if (inverted) {
+                    sv_catpvs(sv, "}");
+                }
+                do_sep = ! inverted;
              }
-            do_sep = ! inverted;
          }
  
          /* And, finally, add the above-the-bitmap stuff */
@@ -19922,8 +20452,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
          sv_catpv(sv, bounds[FLAGS(o)]);
      }
-    else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
-       Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
+    else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) {
+       Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
+        if (o->next_off) {
+            Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
+        }
+       Perl_sv_catpvf(aTHX_ sv, "]");
+    }
      else if (OP(o) == SBOL)
          Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
  
@@ -20869,6 +21404,7 @@ S_put_charclass_bitmap_innards_common(pTHX_
       * output would have been only the inversion indicator '^', NULL is instead
       * returned. */
  
+    dVAR;
      SV * output;
  
      PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
@@ -20972,6 +21508,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       * whether the class itself is to be inverted.  However,  there are some
       * cases where it can't try inverting, as what actually matches isn't known
       * until runtime, and hence the inversion isn't either. */
+
+    dVAR;
      bool inverting_allowed = ! force_as_is_display;
  
      int i;
@@ -21366,6 +21904,17 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
  void
  Perl_init_uniprops(pTHX)
  {
+    dVAR;
+
+    PL_user_def_props = newHV();
+
+#ifdef USE_ITHREADS
+
+    HvSHAREKEYS_off(PL_user_def_props);
+    PL_user_def_props_aTHX = aTHX;
+
+#endif
+
      /* Set up the inversion list global variables */
  
      PL_XPosix_ptrs[_CC_ASCII] = _new_invlist_C_array(uni_prop_ptrs[UNI_ASCII]);
@@ -21420,11 +21969,13 @@ Perl_init_uniprops(pTHX)
      PL_utf8_charname_begin = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_CHARNAME_BEGIN]);
      PL_utf8_charname_continue = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_CHARNAME_CONTINUE]);
  
-    PL_utf8_foldable = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_ANY_FOLDS]);
+    PL_in_some_fold = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_ANY_FOLDS]);
      PL_HasMultiCharFold = _new_invlist_C_array(uni_prop_ptrs[
                                              UNI__PERL_FOLDS_TO_MULTI_CHAR]);
-    PL_NonL1NonFinalFold = _new_invlist_C_array(
-                                            NonL1_Perl_Non_Final_Folds_invlist);
+    PL_InMultiCharFold = _new_invlist_C_array(uni_prop_ptrs[
+                                            UNI__PERL_IS_IN_MULTI_CHAR_FOLD]);
+    PL_NonFinalFold = _new_invlist_C_array(uni_prop_ptrs[
+                                            UNI__PERL_NON_FINAL_FOLDS]);
  
      PL_utf8_toupper = _new_invlist_C_array(Uppercase_Mapping_invlist);
      PL_utf8_tolower = _new_invlist_C_array(Lowercase_Mapping_invlist);
@@ -21433,46 +21984,457 @@ Perl_init_uniprops(pTHX)
      PL_utf8_tosimplefold = _new_invlist_C_array(Simple_Case_Folding_invlist);
      PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
      PL_utf8_mark = _new_invlist_C_array(uni_prop_ptrs[UNI_M]);
+    PL_CCC_non0_non230 = _new_invlist_C_array(_Perl_CCC_non0_non230_invlist);
+    PL_Private_Use = _new_invlist_C_array(uni_prop_ptrs[UNI_CO]);
  
+#ifdef UNI_XIDC
      /* The below are used only by deprecated functions.  They could be removed */
      PL_utf8_xidcont  = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDC]);
      PL_utf8_idcont   = _new_invlist_C_array(uni_prop_ptrs[UNI_IDC]);
      PL_utf8_xidstart = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDS]);
+#endif
  }
  
-SV *
-Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
-                                const bool to_fold, bool * invert)
+#if 0
+
+This code was mainly added for backcompat to give a warning for non-portable
+code points in user-defined properties.  But experiments showed that the
+warning in earlier perls were only omitted on overflow, which should be an
+error, so there really isnt a backcompat issue, and actually adding the
+warning when none was present before might cause breakage, for little gain.  So
+khw left this code in, but not enabled.  Tests were never added.
+
+embed.fnc entry:
+Ei     |const char *|get_extended_utf8_msg|const UV cp
+
+PERL_STATIC_INLINE const char *
+S_get_extended_utf8_msg(pTHX_ const UV cp)
  {
-    /* Parse the interior meat of \p{} passed to this in 'name' with length
-     * 'name_len', and return an inversion list if a property with 'name' is
-     * found, or NULL if not.  'name' point to the input with leading and
-     * trailing space trimmed.  'to_fold' indicates if /i is in effect.
+    U8 dummy[UTF8_MAXBYTES + 1];
+    HV *msgs;
+    SV **msg;
+
+    uvchr_to_utf8_flags_msgs(dummy, cp, UNICODE_WARN_PERL_EXTENDED,
+                             &msgs);
+
+    msg = hv_fetchs(msgs, "text", 0);
+    assert(msg);
+
+    (void) sv_2mortal((SV *) msgs);
+
+    return SvPVX(*msg);
+}
+
+#endif
+
+SV *
+Perl_handle_user_defined_property(pTHX_
+
+    /* Parses the contents of a user-defined property definition; returning the
+     * expanded definition if possible.  If so, the return is an inversion
+     * list.
       *
-     * When the return is an inversion list, '*invert' will be set to a boolean
-     * indicating if it should be inverted or not
+     * If there are subroutines that are part of the expansion and which aren't
+     * known at the time of the call to this function, this returns what
+     * parse_uniprop_string() returned for the first one encountered.
       *
-     * This currently doesn't handle all cases.  A NULL return indicates the
-     * caller should try a different approach
-     */
+     * If an error was found, NULL is returned, and 'msg' gets a suitable
+     * message appended to it.  (Appending allows the back trace of how we got
+     * to the faulty definition to be displayed through nested calls of
+     * user-defined subs.)
+     *
+     * The caller IS responsible for freeing any returned SV.
+     *
+     * The syntax of the contents is pretty much described in perlunicode.pod,
+     * but we also allow comments on each line */
+
+    const char * name,          /* Name of property */
+    const STRLEN name_len,      /* The name's length in bytes */
+    const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
+    const bool to_fold,         /* ? Is this under /i */
+    const bool runtime,         /* ? Are we in compile- or run-time */
+    SV* contents,               /* The property's definition */
+    bool *user_defined_ptr,     /* This will be set TRUE as we wouldn't be
+                                   getting called unless this is thought to be
+                                   a user-defined property */
+    SV * msg,                   /* Any error or warning msg(s) are appended to
+                                   this */
+    const STRLEN level)         /* Recursion level of this call */
+{
+    STRLEN len;
+    const char * string         = SvPV_const(contents, len);
+    const char * const e        = string + len;
+    const bool is_contents_utf8 = cBOOL(SvUTF8(contents));
+    const STRLEN msgs_length_on_entry = SvCUR(msg);
+
+    const char * s0 = string;   /* Points to first byte in the current line
+                                   being parsed in 'string' */
+    const char overflow_msg[] = "Code point too large in \"";
+    SV* running_definition = NULL;
+
+    PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY;
+
+    *user_defined_ptr = TRUE;
+
+    /* Look at each line */
+    while (s0 < e) {
+        const char * s;     /* Current byte */
+        char op = '+';      /* Default operation is 'union' */
+        IV   min = 0;       /* range begin code point */
+        IV   max = -1;      /* and range end */
+        SV* this_definition;
+
+        /* Skip comment lines */
+        if (*s0 == '#') {
+            s0 = strchr(s0, '\n');
+            if (s0 == NULL) {
+                break;
+            }
+            s0++;
+            continue;
+        }
  
-    char* lookup_name;
-    bool stricter = FALSE;
-    bool is_nv_type = FALSE;         /* nv= or numeric_value=, or possibly one
-                                        of the cjk numeric properties (though
-                                        it requires extra effort to compile
-                                        them) */
-    unsigned int i;
-    unsigned int j = 0, lookup_len;
-    int equals_pos = -1;        /* Where the '=' is found, or negative if none */
-    int slash_pos = -1;        /* Where the '/' is found, or negative if none */
-    int table_index = 0;
-    bool starts_with_In_or_Is = FALSE;
-    Size_t lookup_offset = 0;
+        /* For backcompat, allow an empty first line */
+        if (*s0 == '\n') {
+            s0++;
+            continue;
+        }
+
+        /* First character in the line may optionally be the operation */
+        if (   *s0 == '+'
+            || *s0 == '!'
+            || *s0 == '-'
+            || *s0 == '&')
+        {
+            op = *s0++;
+        }
+
+        /* If the line is one or two hex digits separated by blank space, its
+         * a range; otherwise it is either another user-defined property or an
+         * error */
+
+        s = s0;
+
+        if (! isXDIGIT(*s)) {
+            goto check_if_property;
+        }
+
+        do { /* Each new hex digit will add 4 bits. */
+            if (min > ( (IV) MAX_LEGAL_CP >> 4)) {
+                s = strchr(s, '\n');
+                if (s == NULL) {
+                    s = e;
+                }
+                if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                sv_catpv(msg, overflow_msg);
+                Perl_sv_catpvf(aTHX_ msg, "%" UTF8f,
+                                     UTF8fARG(is_contents_utf8, s - s0, s0));
+                sv_catpvs(msg, "\"");
+                goto return_msg;
+            }
+
+            /* Accumulate this digit into the value */
+            min = (min << 4) + READ_XDIGIT(s);
+        } while (isXDIGIT(*s));
+
+        while (isBLANK(*s)) { s++; }
+
+        /* We allow comments at the end of the line */
+        if (*s == '#') {
+            s = strchr(s, '\n');
+            if (s == NULL) {
+                s = e;
+            }
+            s++;
+        }
+        else if (s < e && *s != '\n') {
+            if (! isXDIGIT(*s)) {
+                goto check_if_property;
+            }
+
+            /* Look for the high point of the range */
+            max = 0;
+            do {
+                if (max > ( (IV) MAX_LEGAL_CP >> 4)) {
+                    s = strchr(s, '\n');
+                    if (s == NULL) {
+                        s = e;
+                    }
+                    if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                    sv_catpv(msg, overflow_msg);
+                    Perl_sv_catpvf(aTHX_ msg, "%" UTF8f,
+                                      UTF8fARG(is_contents_utf8, s - s0, s0));
+                    sv_catpvs(msg, "\"");
+                    goto return_msg;
+                }
+
+                max = (max << 4) + READ_XDIGIT(s);
+            } while (isXDIGIT(*s));
+
+            while (isBLANK(*s)) { s++; }
+
+            if (*s == '#') {
+                s = strchr(s, '\n');
+                if (s == NULL) {
+                    s = e;
+                }
+            }
+            else if (s < e && *s != '\n') {
+                goto check_if_property;
+            }
+        }
+
+        if (max == -1) {    /* The line only had one entry */
+            max = min;
+        }
+        else if (max < min) {
+            if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+            sv_catpvs(msg, "Illegal range in \"");
+            Perl_sv_catpvf(aTHX_ msg, "%" UTF8f,
+                                UTF8fARG(is_contents_utf8, s - s0, s0));
+            sv_catpvs(msg, "\"");
+            goto return_msg;
+        }
+
+#if 0   /* See explanation at definition above of get_extended_utf8_msg() */
+
+        if (   UNICODE_IS_PERL_EXTENDED(min)
+            || UNICODE_IS_PERL_EXTENDED(max))
+        {
+            if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+
+            /* If both code points are non-portable, warn only on the lower
+             * one. */
+            sv_catpv(msg, get_extended_utf8_msg(
+                                            (UNICODE_IS_PERL_EXTENDED(min))
+                                            ? min : max));
+            sv_catpvs(msg, " in \"");
+            Perl_sv_catpvf(aTHX_ msg, "%" UTF8f,
+                                 UTF8fARG(is_contents_utf8, s - s0, s0));
+            sv_catpvs(msg, "\"");
+        }
+
+#endif
+
+        /* Here, this line contains a legal range */
+        this_definition = sv_2mortal(_new_invlist(2));
+        this_definition = _add_range_to_invlist(this_definition, min, max);
+        goto calculate;
+
+      check_if_property:
+
+        /* Here it isn't a legal range line.  See if it is a legal property
+         * line.  First find the end of the meat of the line */
+        s = strpbrk(s, "#\n");
+        if (s == NULL) {
+            s = e;
+        }
+
+        /* Ignore trailing blanks in keeping with the requirements of
+         * parse_uniprop_string() */
+        s--;
+        while (s > s0 && isBLANK_A(*s)) {
+            s--;
+        }
+        s++;
+
+        this_definition = parse_uniprop_string(s0, s - s0,
+                                               is_utf8, to_fold, runtime,
+                                               user_defined_ptr, msg,
+                                               (name_len == 0)
+                                                ? level /* Don't increase level
+                                                           if input is empty */
+                                                : level + 1
+                                              );
+        if (this_definition == NULL) {
+            goto return_msg;    /* 'msg' should have had the reason appended to
+                                   it by the above call */
+        }
+
+        if (! is_invlist(this_definition)) {    /* Unknown at this time */
+            return newSVsv(this_definition);
+        }
+
+        if (*s != '\n') {
+            s = strchr(s, '\n');
+            if (s == NULL) {
+                s = e;
+            }
+        }
+
+      calculate:
+
+        switch (op) {
+            case '+':
+                _invlist_union(running_definition, this_definition,
+                                                        &running_definition);
+                break;
+            case '-':
+                _invlist_subtract(running_definition, this_definition,
+                                                        &running_definition);
+                break;
+            case '&':
+                _invlist_intersection(running_definition, this_definition,
+                                                        &running_definition);
+                break;
+            case '!':
+                _invlist_union_complement_2nd(running_definition,
+                                        this_definition, &running_definition);
+                break;
+            default:
+                Perl_croak(aTHX_ "panic: %s: %d: Unexpected operation %d",
+                                 __FILE__, __LINE__, op);
+                break;
+        }
+
+        /* Position past the '\n' */
+        s0 = s + 1;
+    }   /* End of loop through the lines of 'contents' */
+
+    /* Here, we processed all the lines in 'contents' without error.  If we
+     * didn't add any warnings, simply return success */
+    if (msgs_length_on_entry == SvCUR(msg)) {
+
+        /* If the expansion was empty, the answer isn't nothing: its an empty
+         * inversion list */
+        if (running_definition == NULL) {
+            running_definition = _new_invlist(1);
+        }
+
+        return running_definition;
+    }
+
+    /* Otherwise, add some explanatory text, but we will return success */
+
+  return_msg:
+
+    if (name_len > 0) {
+        sv_catpvs(msg, " in expansion of ");
+        Perl_sv_catpvf(aTHX_ msg, "%" UTF8f, UTF8fARG(is_utf8, name_len, name));
+    }
+
+    return running_definition;
+}
+
+/* As explained below, certain operations need to take place in the first
+ * thread created.  These macros switch contexts */
+#ifdef USE_ITHREADS
+#  define DECLARATION_FOR_GLOBAL_CONTEXT                                    \
+                                        PerlInterpreter * save_aTHX = aTHX;
+#  define SWITCH_TO_GLOBAL_CONTEXT                                          \
+                           PERL_SET_CONTEXT((aTHX = PL_user_def_props_aTHX))
+#  define RESTORE_CONTEXT  PERL_SET_CONTEXT((aTHX = save_aTHX));
+#  define CUR_CONTEXT      aTHX
+#  define ORIGINAL_CONTEXT save_aTHX
+#else
+#  define DECLARATION_FOR_GLOBAL_CONTEXT
+#  define SWITCH_TO_GLOBAL_CONTEXT          NOOP
+#  define RESTORE_CONTEXT                   NOOP
+#  define CUR_CONTEXT                       NULL
+#  define ORIGINAL_CONTEXT                  NULL
+#endif
+
+STATIC void
+S_delete_recursion_entry(pTHX_ void *key)
+{
+    /* Deletes the entry used to detect recursion when expanding user-defined
+     * properties.  This is a function so it can be set up to be called even if
+     * the program unexpectedly quits */
+
+    dVAR;
+    SV ** current_entry;
+    const STRLEN key_len = strlen((const char *) key);
+    DECLARATION_FOR_GLOBAL_CONTEXT;
+
+    SWITCH_TO_GLOBAL_CONTEXT;
+
+    /* If the entry is one of these types, it is a permanent entry, and not the
+     * one used to detect recursions.  This function should delete only the
+     * recursion entry */
+    current_entry = hv_fetch(PL_user_def_props, (const char *) key, key_len, 0);
+    if (     current_entry
+        && ! is_invlist(*current_entry)
+        && ! SvPOK(*current_entry))
+    {
+        (void) hv_delete(PL_user_def_props, (const char *) key, key_len,
+                                                                    G_DISCARD);
+    }
+
+    RESTORE_CONTEXT;
+}
+
+SV *
+Perl_parse_uniprop_string(pTHX_
+
+    /* Parse the interior of a \p{}, \P{}.  Returns its definition if knowable
+     * now.  If so, the return is an inversion list.
+     *
+     * If the property is user-defined, it is a subroutine, which in turn
+     * may call other subroutines.  This function will call the whole nest of
+     * them to get the definition they return; if some aren't known at the time
+     * of the call to this function, the fully qualified name of the highest
+     * level sub is returned.  It is an error to call this function at runtime
+     * without every sub defined.
+     *
+     * If an error was found, NULL is returned, and 'msg' gets a suitable
+     * message appended to it.  (Appending allows the back trace of how we got
+     * to the faulty definition to be displayed through nested calls of
+     * user-defined subs.)
+     *
+     * The caller should NOT try to free any returned inversion list.
+     *
+     * Other parameters will be set on return as described below */
+
+    const char * const name,    /* The first non-blank in the \p{}, \P{} */
+    const Size_t name_len,      /* Its length in bytes, not including any
+                                   trailing space */
+    const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
+    const bool to_fold,         /* ? Is this under /i */
+    const bool runtime,         /* TRUE if this is being called at run time */
+    bool *user_defined_ptr,     /* Upon return from this function it will be
+                                   set to TRUE if any component is a
+                                   user-defined property */
+    SV * msg,                   /* Any error or warning msg(s) are appended to
+                                   this */
+   const STRLEN level)          /* Recursion level of this call */
+{
+    dVAR;
+    char* lookup_name;          /* normalized name for lookup in our tables */
+    unsigned lookup_len;        /* Its length */
+    bool stricter = FALSE;      /* Some properties have stricter name
+                                   normalization rules, which we decide upon
+                                   based on parsing */
+
+    /* nv= or numeric_value=, or possibly one of the cjk numeric properties
+     * (though it requires extra effort to download them from Unicode and
+     * compile perl to know about them) */
+    bool is_nv_type = FALSE;
+
+    unsigned int i, j = 0;
+    int equals_pos = -1;    /* Where the '=' is found, or negative if none */
+    int slash_pos  = -1;    /* Where the '/' is found, or negative if none */
+    int table_index = 0;    /* The entry number for this property in the table
+                               of all Unicode property names */
+    bool starts_with_In_or_Is = FALSE;  /* ? Does the name start with 'In' or
+                                             'Is' */
+    Size_t lookup_offset = 0;   /* Used to ignore the first few characters of
+                                   the normalized name in certain situations */
+    Size_t non_pkg_begin = 0;   /* Offset of first byte in 'name' that isn't
+                                   part of a package name */
+    bool could_be_user_defined = TRUE;  /* ? Could this be a user-defined
+                                             property rather than a Unicode
+                                             one. */
+    SV * prop_definition = NULL;  /* The returned definition of 'name' or NULL
+                                     if an error.  If it is an inversion list,
+                                     it is the definition.  Otherwise it is a
+                                     string containing the fully qualified sub
+                                     name of 'name' */
+    bool invert_return = FALSE; /* ? Do we need to complement the result before
+                                     returning it */
  
      PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING;
  
-    /* The input will be modified into 'lookup_name' */
+    /* The input will be normalized into 'lookup_name' */
      Newx(lookup_name, name_len, char);
      SAVEFREEPV(lookup_name);
  
@@ -21480,40 +22442,86 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
      for (i = 0; i < name_len; i++) {
          char cur = name[i];
  
-        /* These characters can be freely ignored in most situations.  Later it
-         * may turn out we shouldn't have ignored them, and we have to reparse,
-         * but we don't have enough information yet to make that decision */
-        if (cur == '-' || cur == '_' || isSPACE_A(cur)) {
+        /* Most of the characters in the input will be of this ilk, being parts
+         * of a name */
+        if (isIDCONT_A(cur)) {
+
+            /* Case differences are ignored.  Our lookup routine assumes
+             * everything is lowercase, so normalize to that */
+            if (isUPPER_A(cur)) {
+                lookup_name[j++] = toLOWER_A(cur);
+                continue;
+            }
+
+            if (cur == '_') { /* Don't include these in the normalized name */
+                continue;
+            }
+
+            lookup_name[j++] = cur;
+
+            /* The first character in a user-defined name must be of this type.
+             * */
+            if (i - non_pkg_begin == 0 && ! isIDFIRST_A(cur)) {
+                could_be_user_defined = FALSE;
+            }
+
              continue;
          }
  
-        /* Case differences are also ignored.  Our lookup routine assumes
-         * everything is lowercase */
-        if (isUPPER_A(cur)) {
-            lookup_name[j++] = toLOWER(cur);
+        /* Here, the character is not something typically in a name,  But these
+         * two types of characters (and the '_' above) can be freely ignored in
+         * most situations.  Later it may turn out we shouldn't have ignored
+         * them, and we have to reparse, but we don't have enough information
+         * yet to make that decision */
+        if (cur == '-' || isSPACE_A(cur)) {
+            could_be_user_defined = FALSE;
              continue;
          }
  
-        /* A double colon is either an error, or a package qualifier to a
-         * subroutine user-defined property; neither of which do we currently
-         * handle
-         *
-         * But a single colon is a synonym for '=' */
-        if (cur == ':') {
-            if (i < name_len - 1 && name[i+1] == ':') {
-                return NULL;
-            }
-            cur = '=';
+        /* An equals sign or single colon mark the end of the first part of
+         * the property name */
+        if (    cur == '='
+            || (cur == ':' && (i >= name_len - 1 || name[i+1] != ':')))
+        {
+            lookup_name[j++] = '='; /* Treat the colon as an '=' */
+            equals_pos = j; /* Note where it occurred in the input */
+            could_be_user_defined = FALSE;
+            break;
          }
  
          /* Otherwise, this character is part of the name. */
          lookup_name[j++] = cur;
  
-        /* Only the equals sign needs further processing */
-        if (cur == '=') {
-            equals_pos = j; /* Note where it occurred in the input */
-            break;
+        /* Here it isn't a single colon, so if it is a colon, it must be a
+         * double colon */
+        if (cur == ':') {
+
+            /* A double colon should be a package qualifier.  We note its
+             * position and continue.  Note that one could have
+             *      pkg1::pkg2::...::foo
+             * so that the position at the end of the loop will be just after
+             * the final qualifier */
+
+            i++;
+            non_pkg_begin = i + 1;
+            lookup_name[j++] = ':';
          }
+        else { /* Only word chars (and '::') can be in a user-defined name */
+            could_be_user_defined = FALSE;
+        }
+    } /* End of parsing through the lhs of the property name (or all of it if
+         no rhs) */
+
+#define STRLENs(s)  (sizeof("" s "") - 1)
+
+    /* If there is a single package name 'utf8::', it is ambiguous.  It could
+     * be for a user-defined property, or it could be a Unicode property, as
+     * all of them are considered to be for that package.  For the purposes of
+     * parsing the rest of the property, strip it off */
+    if (non_pkg_begin == STRLENs("utf8::") && memBEGINPs(name, name_len, "utf8::")) {
+        lookup_name +=  STRLENs("utf8::");
+        j -=  STRLENs("utf8::");
+        equals_pos -=  STRLENs("utf8::");
      }
  
      /* Here, we are either done with the whole property name, if it was simple;
@@ -21530,17 +22538,183 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
              }
          }
  
-        /* Certain properties need special handling.  They may optionally be
-         * prefixed by 'is'.  Ignore that prefix for the purposes of checking
-         * if this is one of those properties */
+        /* Most punctuation after the equals indicates a subpattern, like
+         * \p{foo=/bar/} */
+        if (   isPUNCT_A(name[i])
+            && name[i] != '-'
+            && name[i] != '+'
+            && name[i] != '_'
+            && name[i] != '{')
+        {
+            /* Find the property.  The table includes the equals sign, so we
+             * use 'j' as-is */
+            table_index = match_uniprop((U8 *) lookup_name, j);
+            if (table_index) {
+                const char * const * prop_values
+                                            = UNI_prop_value_ptrs[table_index];
+                SV * subpattern;
+                Size_t subpattern_len;
+                REGEXP * subpattern_re;
+                char open = name[i++];
+                char close;
+                const char * pos_in_brackets;
+                bool escaped = 0;
+
+                /* A backslash means the real delimitter is the next character.
+                 * */
+                if (open == '\\') {
+                    open = name[i++];
+                    escaped = 1;
+                }
+
+                /* This data structure is constructed so that the matching
+                 * closing bracket is 3 past its matching opening.  The second
+                 * set of closing is so that if the opening is something like
+                 * ']', the closing will be that as well.  Something similar is
+                 * done in toke.c */
+                pos_in_brackets = strchr("([<)]>)]>", open);
+                close = (pos_in_brackets) ? pos_in_brackets[3] : open;
+
+                if (   name[name_len-1] != close
+                    || (escaped && name[name_len-2] != '\\'))
+                {
+                    sv_catpvs(msg, "Unicode property wildcard not terminated");
+                    goto append_name_to_msg;
+                }
+
+                Perl_ck_warner_d(aTHX_
+                    packWARN(WARN_EXPERIMENTAL__UNIPROP_WILDCARDS),
+                    "The Unicode property wildcards feature is experimental");
+
+                /* Now create and compile the wildcard subpattern.  Use /iaa
+                 * because nothing outside of ASCII will match, and it the
+                 * property values should all match /i.  Note that when the
+                 * pattern fails to compile, our added text to the user's
+                 * pattern will be displayed to the user, which is not so
+                 * desirable. */
+                subpattern_len = name_len - i - 1 - escaped;
+                subpattern = Perl_newSVpvf(aTHX_ "(?iaa:%.*s)",
+                                              (unsigned) subpattern_len,
+                                              name + i);
+                subpattern = sv_2mortal(subpattern);
+                subpattern_re = re_compile(subpattern, 0);
+                assert(subpattern_re);  /* Should have died if didn't compile
+                                         successfully */
+
+                /* For each legal property value, see if the supplied pattern
+                 * matches it. */
+                while (*prop_values) {
+                    const char * const entry = *prop_values;
+                    const Size_t len = strlen(entry);
+                    SV* entry_sv = newSVpvn_flags(entry, len, SVs_TEMP);
+
+                    if (pregexec(subpattern_re,
+                                 (char *) entry,
+                                 (char *) entry + len,
+                                 (char *) entry, 0,
+                                 entry_sv,
+                                 0))
+                    { /* Here, matched.  Add to the returned list */
+                        Size_t total_len = j + len;
+                        SV * sub_invlist = NULL;
+                        char * this_string;
+
+                        /* We know this is a legal \p{property=value}.  Call
+                         * the function to return the list of code points that
+                         * match it */
+                        Newxz(this_string, total_len + 1, char);
+                        Copy(lookup_name, this_string, j, char);
+                        my_strlcat(this_string, entry, total_len + 1);
+                        SAVEFREEPV(this_string);
+                        sub_invlist = parse_uniprop_string(this_string,
+                                                           total_len,
+                                                           is_utf8,
+                                                           to_fold,
+                                                           runtime,
+                                                           user_defined_ptr,
+                                                           msg,
+                                                           level + 1);
+                        _invlist_union(prop_definition, sub_invlist,
+                                       &prop_definition);
+                    }
+
+                    prop_values++;  /* Next iteration, look at next propvalue */
+                } /* End of looking through property values; (the data
+                     structure is terminated by a NULL ptr) */
+
+                SvREFCNT_dec_NN(subpattern_re);
+
+                if (prop_definition) {
+                    return prop_definition;
+                }
+
+                sv_catpvs(msg, "No Unicode property value wildcard matches:");
+                goto append_name_to_msg;
+            }
+
+            /* Here's how khw thinks we should proceed to handle the properties
+             * not yet done:    Bidi Mirroring Glyph
+                                Bidi Paired Bracket
+                                Case Folding  (both full and simple)
+                                Decomposition Mapping
+                                Equivalent Unified Ideograph
+                                Name
+                                Name Alias
+                                Lowercase Mapping  (both full and simple)
+                                NFKC Case Fold
+                                Titlecase Mapping  (both full and simple)
+                                Uppercase Mapping  (both full and simple)
+             * Move the part that looks at the property values into a perl
+             * script, like utf8_heavy.pl is done.  This makes things somewhat
+             * easier, but most importantly, it avoids always adding all these
+             * strings to the memory usage when the feature is little-used.
+             *
+             * The property values would all be concatenated into a single
+             * string per property with each value on a separate line, and the
+             * code point it's for on alternating lines.  Then we match the
+             * user's input pattern m//mg, without having to worry about their
+             * uses of '^' and '$'.  Only the values that aren't the default
+             * would be in the strings.  Code points would be in UTF-8.  The
+             * search pattern that we would construct would look like
+             * (?: \n (code-point_re) \n (?aam: user-re ) \n )
+             * And so $1 would contain the code point that matched the user-re.
+             * For properties where the default is the code point itself, such
+             * as any of the case changing mappings, the string would otherwise
+             * consist of all Unicode code points in UTF-8 strung together.
+             * This would be impractical.  So instead, examine their compiled
+             * pattern, looking at the ssc.  If none, reject the pattern as an
+             * error.  Otherwise run the pattern against every code point in
+             * the ssc.  The ssc is kind of like tr18's 3.9 Possible Match Sets
+             * And it might be good to create an API to return the ssc.
+             *
+             * For the name properties, a new function could be created in
+             * charnames which essentially does the same thing as above,
+             * sharing Name.pl with the other charname functions.  Don't know
+             * about loose name matching, or algorithmically determined names.
+             * Decomposition.pl similarly.
+             *
+             * It might be that a new pattern modifier would have to be
+             * created, like /t for resTricTed, which changed the behavior of
+             * some constructs in their subpattern, like \A. */
+        } /* End of is a wildcard subppattern */
+
+
+        /* Certain properties whose values are numeric need special handling.
+         * They may optionally be prefixed by 'is'.  Ignore that prefix for the
+         * purposes of checking if this is one of those properties */
          if (memBEGINPs(lookup_name, name_len, "is")) {
              lookup_offset = 2;
          }
  
-        /* Then check if it is one of these properties.  This is hard-coded
-         * because easier this way, and the list is unlikely to change.  There
-         * are several properties like this in the Unihan DB, which is unlikely
-         * to be compiled, and they all end with 'numeric'.  The interiors
+        /* Then check if it is one of these specially-handled properties.  The
+         * possibilities are hard-coded because easier this way, and the list
+         * is unlikely to change.
+         *
+         * All numeric value type properties are of this ilk, and are also
+         * special in a different way later on.  So find those first.  There
+         * are several numeric value type properties in the Unihan DB (which is
+         * unlikely to be compiled with perl, but we handle it here in case it
+         * does get compiled).  They all end with 'numeric'.  The interiors
           * aren't checked for the precise property.  This would stop working if
           * a cjk property were to be created that ended with 'numeric' and
           * wasn't a numeric type */
@@ -21568,15 +22742,14 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
          {
              unsigned int k;
  
-            /* What makes these properties special is that the stuff after the
-             * '=' is a number.  Therefore, we can't throw away '-'
-             * willy-nilly, as those could be a minus sign.  Other stricter
+            /* Since the stuff after the '=' is a number, we can't throw away
+             * '-' willy-nilly, as those could be a minus sign.  Other stricter
               * rules also apply.  However, these properties all can have the
               * rhs not be a number, in which case they contain at least one
               * alphabetic.  In those cases, the stricter rules don't apply.
               * But the numeric type properties can have the alphas [Ee] to
               * signify an exponent, and it is still a number with stricter
-             * rules.  So look for an alpha that signifys not-strict */
+             * rules.  So look for an alpha that signifies not-strict */
              stricter = TRUE;
              for (k = i; k < name_len; k++) {
                  if (   isALPHA_A(name[k])
@@ -21604,7 +22777,7 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
               * zeros, or between the final leading zero and the first other
               * digit */
              for (; i < name_len - 1; i++) {
-                if (   name[i] != '0'
+                if (    name[i] != '0'
                      && (name[i] != '_' || ! isDIGIT_A(name[i+1])))
                  {
                      break;
@@ -21614,9 +22787,8 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
      }
      else {  /* No '=' */
  
-       /* We are now in a position to determine if this property should have
-        * been parsed using stricter rules.  Only a few are like that, and
-        * unlikely to change. */
+       /* Only a few properties without an '=' should be parsed with stricter
+        * rules.  The list is unlikely to change. */
          if (   memBEGINPs(lookup_name, j, "perl")
              && memNEs(lookup_name + 4, j - 4, "space")
              && memNEs(lookup_name + 4, j - 4, "word"))
@@ -21711,33 +22883,308 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
      {
          lookup_name[j++] = '&';
      }
-    else if (name_len > 2 && name[0] == 'I' && (   name[1] == 'n'
-                                                || name[1] == 's'))
-    {
-
-        /* Also, if the original input began with 'In' or 'Is', it could be a
-         * subroutine call instead of a property names, which currently isn't
-         * handled by this function.  Subroutine calls can't happen if there is
-         * an '=' in the name */
-        if (equals_pos < 0 && get_cvn_flags(name, name_len, GV_NOTQUAL) != NULL)
-        {
-            return NULL;
-        }
  
+    /* If the original input began with 'In' or 'Is', it could be a subroutine
+     * call to a user-defined property instead of a Unicode property name. */
+    if (    non_pkg_begin + name_len > 2
+        &&  name[non_pkg_begin+0] == 'I'
+        && (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's'))
+    {
          starts_with_In_or_Is = TRUE;
      }
+    else {
+        could_be_user_defined = FALSE;
+    }
+
+    if (could_be_user_defined) {
+        CV* user_sub;
+
+        /* Here, the name could be for a user defined property, which are
+         * implemented as subs. */
+        user_sub = get_cvn_flags(name, name_len, 0);
+        if (user_sub) {
+
+            /* Here, there is a sub by the correct name.  Normally we call it
+             * to get the property definition */
+            dSP;
+            SV * user_sub_sv = MUTABLE_SV(user_sub);
+            SV * error;     /* Any error returned by calling 'user_sub' */
+            SV * fq_name;   /* Fully qualified property name */
+            SV * placeholder;
+            char to_fold_string[] = "0:";   /* The 0 gets overwritten with the
+                                               actual value */
+            SV ** saved_user_prop_ptr;      /* Hash entry for this property */
+
+            /* How many times to retry when another thread is in the middle of
+             * expanding the same definition we want */
+            PERL_INT_FAST8_T retry_countdown = 10;
+
+            DECLARATION_FOR_GLOBAL_CONTEXT;
+
+            /* If we get here, we know this property is user-defined */
+            *user_defined_ptr = TRUE;
+
+            /* We refuse to call a tainted subroutine; returning an error
+             * instead */
+            if (TAINT_get) {
+                if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                sv_catpvs(msg, "Insecure user-defined property");
+                goto append_name_to_msg;
+            }
+
+            /* In principal, we only call each subroutine property definition
+             * once during the life of the program.  This guarantees that the
+             * property definition never changes.  The results of the single
+             * sub call are stored in a hash, which is used instead for future
+             * references to this property.  The property definition is thus
+             * immutable.  But, to allow the user to have a /i-dependent
+             * definition, we call the sub once for non-/i, and once for /i,
+             * should the need arise, passing the /i status as a parameter.
+             *
+             * We start by constructing the hash key name, consisting of the
+             * fully qualified subroutine name */
+            fq_name = sv_2mortal(newSV(10));    /* 10 is just a guess */
+            (void) cv_name(user_sub, fq_name, 0);
+
+            /* But precede the sub name in the key with the /i status, so that
+             * there is a key for /i and a different key for non-/i */
+            to_fold_string[0] = to_fold + '0';
+            sv_insert(fq_name, 0, 0, to_fold_string, 2);
+
+            /* We only call the sub once throughout the life of the program
+             * (with the /i, non-/i exception noted above).  That means the
+             * hash must be global and accessible to all threads.  It is
+             * created at program start-up, before any threads are created, so
+             * is accessible to all children.  But this creates some
+             * complications.
+             *
+             * 1) The keys can't be shared, or else problems arise; sharing is
+             *    turned off at hash creation time
+             * 2) All SVs in it are there for the remainder of the life of the
+             *    program, and must be created in the same interpreter context
+             *    as the hash, or else they will be freed from the wrong pool
+             *    at global destruction time.  This is handled by switching to
+             *    the hash's context to create each SV going into it, and then
+             *    immediately switching back
+             * 3) All accesses to the hash must be controlled by a mutex, to
+             *    prevent two threads from getting an unstable state should
+             *    they simultaneously be accessing it.  The code below is
+             *    crafted so that the mutex is locked whenever there is an
+             *    access and unlocked only when the next stable state is
+             *    achieved.
+             *
+             * The hash stores either the definition of the property if it was
+             * valid, or, if invalid, the error message that was raised.  We
+             * use the type of SV to distinguish.
+             *
+             * There's also the need to guard against the definition expansion
+             * from infinitely recursing.  This is handled by storing the aTHX
+             * of the expanding thread during the expansion.  Again the SV type
+             * is used to distinguish this from the other two cases.  If we
+             * come to here and the hash entry for this property is our aTHX,
+             * it means we have recursed, and the code assumes that we would
+             * infinitely recurse, so instead stops and raises an error.
+             * (Any recursion has always been treated as infinite recursion in
+             * this feature.)
+             *
+             * If instead, the entry is for a different aTHX, it means that
+             * that thread has gotten here first, and hasn't finished expanding
+             * the definition yet.  We just have to wait until it is done.  We
+             * sleep and retry a few times, returning an error if the other
+             * thread doesn't complete. */
+
+          re_fetch:
+            USER_PROP_MUTEX_LOCK;
+
+            /* If we have an entry for this key, the subroutine has already
+             * been called once with this /i status. */
+            saved_user_prop_ptr = hv_fetch(PL_user_def_props,
+                                           SvPVX(fq_name), SvCUR(fq_name), 0);
+            if (saved_user_prop_ptr) {
+
+                /* If the saved result is an inversion list, it is the valid
+                 * definition of this property */
+                if (is_invlist(*saved_user_prop_ptr)) {
+                    prop_definition = *saved_user_prop_ptr;
+
+                    /* The SV in the hash won't be removed until global
+                     * destruction, so it is stable and we can unlock */
+                    USER_PROP_MUTEX_UNLOCK;
+
+                    /* The caller shouldn't try to free this SV */
+                    return prop_definition;
+                }
+
+                /* Otherwise, if it is a string, it is the error message
+                 * that was returned when we first tried to evaluate this
+                 * property.  Fail, and append the message */
+                if (SvPOK(*saved_user_prop_ptr)) {
+                    if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                    sv_catsv(msg, *saved_user_prop_ptr);
+
+                    /* The SV in the hash won't be removed until global
+                     * destruction, so it is stable and we can unlock */
+                    USER_PROP_MUTEX_UNLOCK;
+
+                    return NULL;
+                }
+
+                assert(SvIOK(*saved_user_prop_ptr));
+
+                /* Here, we have an unstable entry in the hash.  Either another
+                 * thread is in the middle of expanding the property's
+                 * definition, or we are ourselves recursing.  We use the aTHX
+                 * in it to distinguish */
+                if (SvIV(*saved_user_prop_ptr) != PTR2IV(CUR_CONTEXT)) {
+
+                    /* Here, it's another thread doing the expanding.  We've
+                     * looked as much as we are going to at the contents of the
+                     * hash entry.  It's safe to unlock. */
+                    USER_PROP_MUTEX_UNLOCK;
+
+                    /* Retry a few times */
+                    if (retry_countdown-- > 0) {
+                        PerlProc_sleep(1);
+                        goto re_fetch;
+                    }
+
+                    if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                    sv_catpvs(msg, "Timeout waiting for another thread to "
+                                   "define");
+                    goto append_name_to_msg;
+                }
+
+                /* Here, we are recursing; don't dig any deeper */
+                USER_PROP_MUTEX_UNLOCK;
+
+                if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                sv_catpvs(msg,
+                          "Infinite recursion in user-defined property");
+                goto append_name_to_msg;
+            }
+
+            /* Here, this thread has exclusive control, and there is no entry
+             * for this property in the hash.  So we have the go ahead to
+             * expand the definition ourselves. */
+
+            ENTER;
+
+            /* Create a temporary placeholder in the hash to detect recursion
+             * */
+            SWITCH_TO_GLOBAL_CONTEXT;
+            placeholder= newSVuv(PTR2IV(ORIGINAL_CONTEXT));
+            (void) hv_store_ent(PL_user_def_props, fq_name, placeholder, 0);
+            RESTORE_CONTEXT;
+
+            /* Now that we have a placeholder, we can let other threads
+             * continue */
+            USER_PROP_MUTEX_UNLOCK;
+
+            /* Make sure the placeholder always gets destroyed */
+            SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(fq_name));
+
+            PUSHMARK(SP);
+            SAVETMPS;
+
+            /* Call the user's function, with the /i status as a parameter.
+             * Note that we have gone to a lot of trouble to keep this call
+             * from being within the locked mutex region. */
+            XPUSHs(boolSV(to_fold));
+            PUTBACK;
+
+            (void) call_sv(user_sub_sv, G_EVAL|G_SCALAR);
+
+            SPAGAIN;
+
+            error = ERRSV;
+            if (SvTRUE(error)) {
+                if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                sv_catpvs(msg, "Error \"");
+                sv_catsv(msg, error);
+                sv_catpvs(msg, "\"");
+                if (name_len > 0) {
+                    sv_catpvs(msg, " in expansion of ");
+                    Perl_sv_catpvf(aTHX_ msg, "%" UTF8f, UTF8fARG(is_utf8,
+                                                                  name_len,
+                                                                  name));
+                }
+
+                (void) POPs;
+                prop_definition = NULL;
+            }
+            else {  /* G_SCALAR guarantees a single return value */
+
+                /* The contents is supposed to be the expansion of the property
+                 * definition.  Call a function to check for valid syntax and
+                 * handle it */
+                prop_definition = handle_user_defined_property(name, name_len,
+                                                    is_utf8, to_fold, runtime,
+                                                    POPs, user_defined_ptr,
+                                                    msg,
+                                                    level);
+            }
+
+            /* Here, we have the results of the expansion.  Replace the
+             * placeholder with them.  We need exclusive access to the hash,
+             * and we can't let anyone else in, between when we delete the
+             * placeholder and add the permanent entry */
+            USER_PROP_MUTEX_LOCK;
+
+            S_delete_recursion_entry(aTHX_ SvPVX(fq_name));
+
+            if (! prop_definition || is_invlist(prop_definition)) {
+
+                /* If we got success we use the inversion list defining the
+                 * property; otherwise use the error message */
+                SWITCH_TO_GLOBAL_CONTEXT;
+                (void) hv_store_ent(PL_user_def_props,
+                                    fq_name,
+                                    ((prop_definition)
+                                     ? newSVsv(prop_definition)
+                                     : newSVsv(msg)),
+                                    0);
+                RESTORE_CONTEXT;
+            }
+
+            /* All done, and the hash now has a permanent entry for this
+             * property.  Give up exclusive control */
+            USER_PROP_MUTEX_UNLOCK;
  
-    lookup_len = j;     /* Use a more mnemonic name starting here */
+            FREETMPS;
+            LEAVE;
+
+            if (prop_definition) {
+
+                /* If the definition is for something not known at this time,
+                 * we toss it, and go return the main property name, as that's
+                 * the one the user will be aware of */
+                if (! is_invlist(prop_definition)) {
+                    SvREFCNT_dec_NN(prop_definition);
+                    goto definition_deferred;
+                }
+
+                sv_2mortal(prop_definition);
+            }
+
+            /* And return */
+            return prop_definition;
+
+        }   /* End of calling the subroutine for the user-defined property */
+    }       /* End of it could be a user-defined property */
+
+    /* Here it wasn't a user-defined property that is known at this time.  See
+     * if it is a Unicode property */
+
+    lookup_len = j;     /* This is a more mnemonic name than 'j' */
  
      /* Get the index into our pointer table of the inversion list corresponding
       * to the property */
      table_index = match_uniprop((U8 *) lookup_name, lookup_len);
  
-    /* If it didn't find the property */
+    /* If it didn't find the property ... */
      if (table_index == 0) {
  
-        /* If didn't find the property, we try again stripping off any initial
-         * 'In' or 'Is' */
+        /* Try again stripping off any initial 'In' or 'Is' */
          if (starts_with_In_or_Is) {
              lookup_name += 2;
              lookup_len -= 2;
@@ -21750,14 +23197,28 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
          if (table_index == 0) {
              char * canonical;
  
-            /* If not found, and not a numeric type property, isn't a legal
-             * property */
+            /* Here, we didn't find it.  If not a numeric type property, and
+             * can't be a user-defined one, it isn't a legal property */
              if (! is_nv_type) {
-                return NULL;
-            }
+                if (! could_be_user_defined) {
+                    goto failed;
+                }
+
+                /* Here, the property name is legal as a user-defined one.   At
+                 * compile time, it might just be that the subroutine for that
+                 * property hasn't been encountered yet, but at runtime, it's
+                 * an error to try to use an undefined one */
+                if (runtime) {
+                    if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                    sv_catpvs(msg, "Unknown user-defined property name");
+                    goto append_name_to_msg;
+                }
  
-            /* But the numeric type properties need more work to decide.  What
-             * we do is make sure we have the number in canonical form and look
+                goto definition_deferred;
+            } /* End of isn't a numeric type property */
+
+            /* The numeric type properties need more work to decide.  What we
+             * do is make sure we have the number in canonical form and look
               * that up. */
  
              if (slash_pos < 0) {    /* No slash */
@@ -21773,13 +23234,14 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
                               lookup_len - equals_pos)
                            != lookup_name + lookup_len)
                  {
-                    return NULL;
+                    goto failed;
                  }
  
-                /* If the value is an integer, the canonical value is integral */
+                /* If the value is an integer, the canonical value is integral
+                 * */
                  if (Perl_ceil(value) == value) {
                      canonical = Perl_form(aTHX_ "%.*s%.0" NVff,
-                                                equals_pos, lookup_name, value);
+                                            equals_pos, lookup_name, value);
                  }
                  else {  /* Otherwise, it is %e with a known precision */
                      char * exp_ptr;
@@ -21841,12 +23303,12 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
                  /* Convert the numerator to numeric */
                  end_ptr = this_lookup_name + slash_pos;
                  if (! grok_atoUV(this_lookup_name, &numerator, &end_ptr)) {
-                    return NULL;
+                    goto failed;
                  }
  
                  /* It better have included all characters before the slash */
                  if (*end_ptr != '/') {
-                    return NULL;
+                    goto failed;
                  }
  
                  /* Set to look at just the denominator */
@@ -21856,7 +23318,7 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
  
                  /* Convert the denominator to numeric */
                  if (! grok_atoUV(this_lookup_name, &denominator, &end_ptr)) {
-                    return NULL;
+                    goto failed;
                  }
  
                  /* It better be the rest of the characters, and don't divide by
@@ -21864,7 +23326,7 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
                  if (   end_ptr != this_lookup_name + lookup_len
                      || denominator == 0)
                  {
-                    return NULL;
+                    goto failed;
                  }
  
                  /* Get the greatest common denominator using
@@ -21880,11 +23342,11 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
                  /* If already in lowest possible terms, we have already tried
                   * looking this up */
                  if (gcd == 1) {
-                    return NULL;
+                    goto failed;
                  }
  
-                /* Reduce the rational, which should put it in canonical form.
-                 * Then look it up */
+                /* Reduce the rational, which should put it in canonical form
+                 * */
                  numerator /= gcd;
                  denominator /= gcd;
  
@@ -21895,26 +23357,23 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
              /* Here, we have the number in canonical form.  Try that */
              table_index = match_uniprop((U8 *) canonical, strlen(canonical));
              if (table_index == 0) {
-                return NULL;
+                goto failed;
              }
-        }
-    }
+        }   /* End of still didn't find the property in our table */
+    }       /* End of       didn't find the property in our table */
  
-    /* The return is an index into a table of ptrs.  A negative return
-     * signifies that the real index is the absolute value, but the result
-     * needs to be inverted */
+    /* Here, we have a non-zero return, which is an index into a table of ptrs.
+     * A negative return signifies that the real index is the absolute value,
+     * but the result needs to be inverted */
      if (table_index < 0) {
-        *invert = TRUE;
+        invert_return = TRUE;
          table_index = -table_index;
      }
-    else {
-        *invert = FALSE;
-    }
  
      /* Out-of band indices indicate a deprecated property.  The proper index is
       * modulo it with the table size.  And dividing by the table size yields
-     * an offset into a table constructed to contain the corresponding warning
-     * message */
+     * an offset into a table constructed by regen/mk_invlists.pl to contain
+     * the corresponding warning message */
      if (table_index > MAX_UNI_KEYWORD_INDEX) {
          Size_t warning_offset = table_index / MAX_UNI_KEYWORD_INDEX;
          table_index %= MAX_UNI_KEYWORD_INDEX;
@@ -21948,7 +23407,117 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
      }
  
      /* Create and return the inversion list */
-    return _new_invlist_C_array(uni_prop_ptrs[table_index]);
+    prop_definition =_new_invlist_C_array(uni_prop_ptrs[table_index]);
+    sv_2mortal(prop_definition);
+
+
+    /* See if there is a private use override to add to this definition */
+    {
+        COPHH * hinthash = (IN_PERL_COMPILETIME)
+                           ? CopHINTHASH_get(&PL_compiling)
+                           : CopHINTHASH_get(PL_curcop);
+       SV * pu_overrides = cophh_fetch_pv(hinthash, "private_use", 0, 0);
+
+        if (UNLIKELY(pu_overrides && SvPOK(pu_overrides))) {
+
+            /* See if there is an element in the hints hash for this table */
+            SV * pu_lookup = Perl_newSVpvf(aTHX_ "%d=", table_index);
+            const char * pos = strstr(SvPVX(pu_overrides), SvPVX(pu_lookup));
+
+            if (pos) {
+                bool dummy;
+                SV * pu_definition;
+                SV * pu_invlist;
+                SV * expanded_prop_definition =
+                            sv_2mortal(invlist_clone(prop_definition, NULL));
+
+                /* If so, it's definition is the string from here to the next
+                 * \a character.  And its format is the same as a user-defined
+                 * property */
+                pos += SvCUR(pu_lookup);
+                pu_definition = newSVpvn(pos, strchr(pos, '\a') - pos);
+                pu_invlist = handle_user_defined_property(lookup_name,
+                                                          lookup_len,
+                                                          0, /* Not UTF-8 */
+                                                          0, /* Not folded */
+                                                          runtime,
+                                                          pu_definition,
+                                                          &dummy,
+                                                          msg,
+                                                          level);
+                if (TAINT_get) {
+                    if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+                    sv_catpvs(msg, "Insecure private-use override");
+                    goto append_name_to_msg;
+                }
+
+                /* For now, as a safety measure, make sure that it doesn't
+                 * override non-private use code points */
+                _invlist_intersection(pu_invlist, PL_Private_Use, &pu_invlist);
+
+                /* Add it to the list to be returned */
+                _invlist_union(prop_definition, pu_invlist,
+                               &expanded_prop_definition);
+                prop_definition = expanded_prop_definition;
+                Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__PRIVATE_USE), "The private_use feature is experimental");
+            }
+        }
+    }
+
+    if (invert_return) {
+        _invlist_invert(prop_definition);
+    }
+    return prop_definition;
+
+
+  failed:
+    if (non_pkg_begin != 0) {
+        if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+        sv_catpvs(msg, "Illegal user-defined property name");
+    }
+    else {
+        if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+        sv_catpvs(msg, "Can't find Unicode property definition");
+    }
+    /* FALLTHROUGH */
+
+  append_name_to_msg:
+    {
+        const char * prefix = (runtime && level == 0) ?  " \\p{" : " \"";
+        const char * suffix = (runtime && level == 0) ?  "}" : "\"";
+
+        sv_catpv(msg, prefix);
+        Perl_sv_catpvf(aTHX_ msg, "%" UTF8f, UTF8fARG(is_utf8, name_len, name));
+        sv_catpv(msg, suffix);
+    }
+
+    return NULL;
+
+  definition_deferred:
+
+    /* Here it could yet to be defined, so defer evaluation of this
+     * until its needed at runtime. */
+    prop_definition = newSVpvs_flags("", SVs_TEMP);
+
+    /* To avoid any ambiguity, the package is always specified.
+     * Use the current one if it wasn't included in our input */
+    if (non_pkg_begin == 0) {
+        const HV * pkg = (IN_PERL_COMPILETIME)
+                         ? PL_curstash
+                         : CopSTASH(PL_curcop);
+        const char* pkgname = HvNAME(pkg);
+
+        Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
+                      UTF8fARG(is_utf8, strlen(pkgname), pkgname));
+        sv_catpvs(prop_definition, "::");
+    }
+
+    Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
+                         UTF8fARG(is_utf8, name_len, name));
+    sv_catpvs(prop_definition, "\n");
+
+    *user_defined_ptr = TRUE;
+    return prop_definition;
  }
  
  #endif