This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Replace code with fcn that does the same thing
[perl5.git] / regcomp.c
index fcedc36..ca0c7af 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -149,7 +149,7 @@ struct RExC_state_t {
     I32                sawback;                /* Did we see \1, ...? */
     U32                seen;
     SSize_t    size;                   /* Code size. */
-    I32                npar;            /* Capture buffer count, (OPEN) plus
+    I32         npar;                   /* Capture buffer count, (OPEN) plus
                                            one. ("par" 0 is the whole
                                            pattern)*/
     I32                nestroot;               /* root parens we are in - used by
@@ -212,6 +212,7 @@ struct RExC_state_t {
     bool        seen_unfolded_sharp_s;
     bool        strict;
     bool        study_started;
+    bool        in_script_run;
 };
 
 #define RExC_flags     (pRExC_state->flags)
@@ -278,6 +279,7 @@ struct RExC_state_t {
 #define RExC_strict (pRExC_state->strict)
 #define RExC_study_started      (pRExC_state->study_started)
 #define RExC_warn_text (pRExC_state->warn_text)
+#define RExC_in_script_run      (pRExC_state->in_script_run)
 
 /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
  * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -343,7 +345,7 @@ struct RExC_state_t {
 /* Change from /d into /u rules, and restart the parse if we've already seen
  * something whose size would increase as a result, by setting *flagp and
  * returning 'restart_retval'.  RExC_uni_semantics is a flag that indicates
- * we've change to /u during the parse.  */
+ * we've changed to /u during the parse.  */
 #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
     STMT_START {                                                            \
             if (DEPENDS_SEMANTICS) {                                        \
@@ -357,6 +359,34 @@ struct RExC_state_t {
             }                                                               \
     } STMT_END
 
+/* Executes a return statement with the value 'X', if 'flags' contains any of
+ * 'RESTART_PASS1', 'NEED_UTF8', or 'extra'.  If so, *flagp is set to those
+ * flags */
+#define RETURN_X_ON_RESTART_OR_FLAGS(X, flags, flagp, extra)                \
+    STMT_START {                                                            \
+            if ((flags) & (RESTART_PASS1|NEED_UTF8|(extra))) {              \
+                *(flagp) = (flags) & (RESTART_PASS1|NEED_UTF8|(extra));     \
+                return X;                                                   \
+            }                                                               \
+    } STMT_END
+
+#define RETURN_NULL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
+                    RETURN_X_ON_RESTART_OR_FLAGS(NULL,flags,flagp,extra)
+
+#define RETURN_X_ON_RESTART(X, flags,flagp)                                 \
+                        RETURN_X_ON_RESTART_OR_FLAGS( X, flags, flagp, 0)
+
+
+#define RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,extra)                  \
+            if (*(flagp) & (RESTART_PASS1|(extra))) return NULL
+
+#define MUST_RESTART(flags) ((flags) & (RESTART_PASS1))
+
+#define RETURN_NULL_ON_RESTART(flags,flagp)                                 \
+                                    RETURN_X_ON_RESTART(NULL, flags,flagp)
+#define RETURN_NULL_ON_RESTART_FLAGP(flagp)                                 \
+                            RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,0)
+
 /* This converts the named class defined in regcomp.h to its equivalent class
  * number defined in handy.h. */
 #define namedclass_to_classnum(class)  ((int) ((class) / 2))
@@ -627,7 +657,13 @@ static const scan_data_t zero_scan_data = {
     UTF8fARG(UTF,                                                           \
              (xI(xC) > eC) /* Don't run off end */                          \
               ? eC - sC   /* Length before the <--HERE */                   \
-              : ( __ASSERT_(xI_offset(xC) >= 0) xI_offset(xC) ),            \
+              : ((xI_offset(xC) >= 0)                                       \
+                 ? xI_offset(xC)                                            \
+                 : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
+                                    IVdf " trying to output message for "   \
+                                    " pattern %.*s",                        \
+                                    __FILE__, __LINE__, xI_offset(xC),      \
+                                    ((int) (eC - sC)), sC), 0)),            \
              sC),         /* The input pattern printed up to the <--HERE */ \
     UTF8fARG(UTF,                                                           \
              (xI(xC) > eC) ? 0 : eC - xI(xC), /* Length after <--HERE */    \
@@ -2544,7 +2580,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
 
     switch (flags) {
         case EXACT: case EXACTL: break;
-       case EXACTFA:
+       case EXACTFAA:
         case EXACTFU_SS:
        case EXACTFU:
        case EXACTFLU8: folder = PL_fold_latin1; break;
@@ -3693,10 +3729,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full
  * as possible, even if that means splitting an existing node so that its first
  * part is moved to the preceeding node.  This would maximise the efficiency of
- * memEQ during matching.  Elsewhere in this file, khw proposes splitting
- * EXACTFish nodes into portions that don't change under folding vs those that
- * do.  Those portions that don't change may be the only things in the pattern that
- * could be used to find fixed and floating strings.
+ * memEQ during matching.
  *
  * If a node is to match under /i (folded), the number of characters it matches
  * can be different than its character length if it contains a multi-character
@@ -3704,14 +3737,16 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  * input nodes.
  *
  * And *unfolded_multi_char is set to indicate whether or not the node contains
- * an unfolded multi-char fold.  This happens when whether the fold is valid or
- * not won't be known until runtime; namely for EXACTF nodes that contain LATIN
- * SMALL LETTER SHARP S, as only if the target string being matched against
- * turns out to be UTF-8 is that fold valid; and also for EXACTFL nodes whose
- * folding rules depend on the locale in force at runtime.  (Multi-char folds
- * whose components are all above the Latin1 range are not run-time locale
- * dependent, and have already been folded by the time this function is
- * called.)
+ * an unfolded multi-char fold.  This happens when it won't be known until
+ * runtime whether the fold is valid or not; namely
+ *  1) for EXACTF nodes that contain LATIN SMALL LETTER SHARP S, as only if the
+ *      target string being matched against turns out to be UTF-8 is that fold
+ *      valid; or
+ *  2) for EXACTFL nodes whose folding rules depend on the locale in force at
+ *      runtime.
+ * (Multi-char folds whose components are all above the Latin1 range are not
+ * run-time locale dependent, and have already been folded by the time this
+ * function is called.)
  *
  * This is as good a place as any to discuss the design of handling these
  * multi-character fold sequences.  It's been wrong in Perl for a very long
@@ -3761,7 +3796,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  *      described in the next item.
  * 3)   A problem remains for unfolded multi-char folds. (These occur when the
  *      validity of the fold won't be known until runtime, and so must remain
- *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFA
+ *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFAA
  *      nodes when the pattern isn't in UTF-8.  (Note, BTW, that there cannot
  *      be an EXACTF node with a UTF-8 pattern.)  They also occur for various
  *      folds in EXACTFL nodes, regardless of the UTF-ness of the pattern.)
@@ -3771,28 +3806,28 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  *      character in the target string.  (And I do mean character, and not byte
  *      here, unlike other parts of the documentation that have never been
  *      updated to account for multibyte Unicode.)  sharp s in EXACTF and
- *      EXACTFL nodes can match the two character string 'ss'; in EXACTFA nodes
- *      it can match "\x{17F}\x{17F}".  These, along with other ones in EXACTFL
- *      nodes, violate the assumption, and they are the only instances where it
- *      is violated.  I'm reluctant to try to change the assumption, as the
- *      code involved is impenetrable to me (khw), so instead the code here
- *      punts.  This routine examines EXACTFL nodes, and (when the pattern
- *      isn't UTF-8) EXACTF and EXACTFA for such unfolded folds, and returns a
+ *      EXACTFL nodes can match the two character string 'ss'; in EXACTFAA
+ *      nodes it can match "\x{17F}\x{17F}".  These, along with other ones in
+ *      EXACTFL nodes, violate the assumption, and they are the only instances
+ *      where it is violated.  I'm reluctant to try to change the assumption,
+ *      as the code involved is impenetrable to me (khw), so instead the code
+ *      here punts.  This routine examines EXACTFL nodes, and (when the pattern
+ *      isn't UTF-8) EXACTF and EXACTFAA for such unfolded folds, and returns a
  *      boolean indicating whether or not the node contains such a fold.  When
  *      it is true, the caller sets a flag that later causes the optimizer in
  *      this file to not set values for the floating and fixed string lengths,
  *      and thus avoids the optimizer code in regexec.c that makes the invalid
  *      assumption.  Thus, there is no optimization based on string lengths for
  *      EXACTFL nodes that contain these few folds, nor for non-UTF8-pattern
- *      EXACTF and EXACTFA nodes that contain the sharp s.  (The reason the
+ *      EXACTF and EXACTFAA nodes that contain the sharp s.  (The reason the
  *      assumption is wrong only in these cases is that all other non-UTF-8
  *      folds are 1-1; and, for UTF-8 patterns, we pre-fold all other folds to
  *      their expanded versions.  (Again, we can't prefold sharp s to 'ss' in
  *      EXACTF nodes because we don't know at compile time if it actually
  *      matches 'ss' or not.  For EXACTF nodes it will match iff the target
  *      string is in UTF-8.  This is in contrast to EXACTFU nodes, where it
- *      always matches; and EXACTFA where it never does.  In an EXACTFA node in
- *      a UTF-8 pattern, sharp s is folded to "\x{17F}\x{17F}, avoiding the
+ *      always matches; and EXACTFAA where it never does.  In an EXACTFAA node
+ *      in a UTF-8 pattern, sharp s is folded to "\x{17F}\x{17F}, avoiding the
  *      problem; but in a non-UTF8 pattern, folding it to that above-Latin1
  *      string would require the pattern to be forced into UTF-8, the overhead
  *      of which we want to avoid.  Similarly the unfolded multi-char folds in
@@ -3801,9 +3836,9 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  *
  *      Similarly, the code that generates tries doesn't currently handle
  *      not-already-folded multi-char folds, and it looks like a pain to change
- *      that.  Therefore, trie generation of EXACTFA nodes with the sharp s
- *      doesn't work.  Instead, such an EXACTFA is turned into a new regnode,
- *      EXACTFA_NO_TRIE, which the trie code knows not to handle.  Most people
+ *      that.  Therefore, trie generation of EXACTFAA nodes with the sharp s
+ *      doesn't work.  Instead, such an EXACTFAA is turned into a new regnode,
+ *      EXACTFAA_NO_TRIE, which the trie code knows not to handle.  Most people
  *      using /iaa matching will be doing so almost entirely with ASCII
  *      strings, so this should rarely be encountered in practice */
 
@@ -3983,10 +4018,10 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                 }
 
                 /* Nodes with 'ss' require special handling, except for
-                 * EXACTFA-ish for which there is no multi-char fold to this */
+                 * EXACTFAA-ish for which there is no multi-char fold to this */
                 if (len == 2 && *s == 's' && *(s+1) == 's'
-                    && OP(scan) != EXACTFA
-                    && OP(scan) != EXACTFA_NO_TRIE)
+                    && OP(scan) != EXACTFAA
+                    && OP(scan) != EXACTFAA_NO_TRIE)
                 {
                     count = 2;
                     if (OP(scan) != EXACTFL) {
@@ -4000,7 +4035,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                     /* Count how many characters are in it.  In the case of
                      * /aa, no folds which contain ASCII code points are
                      * allowed, so check for those, and skip if found. */
-                    if (OP(scan) != EXACTFA && OP(scan) != EXACTFA_NO_TRIE) {
+                    if (OP(scan) != EXACTFAA && OP(scan) != EXACTFAA_NO_TRIE) {
                         count = utf8_length(s, multi_end);
                         s = multi_end;
                     }
@@ -4038,9 +4073,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
             *min_subtract += total_count_delta;
             Safefree(folded);
        }
-       else if (OP(scan) == EXACTFA) {
+       else if (OP(scan) == EXACTFAA) {
 
-            /* Non-UTF-8 pattern, EXACTFA node.  There can't be a multi-char
+            /* Non-UTF-8 pattern, EXACTFAA node.  There can't be a multi-char
              * fold to the ASCII range (and there are no existing ones in the
              * upper latin1 range).  But, as outlined in the comments preceding
              * this function, we need to flag any occurrences of the sharp s.
@@ -4051,7 +4086,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                                       || UNICODE_DOT_DOT_VERSION > 0)
            while (s < s_end) {
                 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
-                    OP(scan) = EXACTFA_NO_TRIE;
+                    OP(scan) = EXACTFAA_NO_TRIE;
                     *unfolded_multi_char = TRUE;
                     break;
                 }
@@ -4060,7 +4095,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
         }
        else {
 
-            /* Non-UTF-8 pattern, not EXACTFA node.  Look for the multi-char
+            /* Non-UTF-8 pattern, not EXACTFAA node.  Look for the multi-char
              * folds that are all Latin1.  As explained in the comments
              * preceding this function, we look also for the sharp s in EXACTF
              * and EXACTFL nodes; it can be in the final position.  Otherwise
@@ -4143,7 +4178,7 @@ S_unwind_scan_frames(pTHX_ const void *p)
     } while (f);
 }
 
-
+/* the return from this sub is the minimum length that could possibly match */
 STATIC SSize_t
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         SSize_t *minlenp, SSize_t *deltap,
@@ -4560,7 +4595,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                 EXACT           | EXACT
                                 EXACTFU         | EXACTFU
                                 EXACTFU_SS      | EXACTFU
-                                EXACTFA         | EXACTFA
+                                EXACTFAA         | EXACTFAA
                                 EXACTL          | EXACTL
                                 EXACTFLU8       | EXACTFLU8
 
@@ -4572,8 +4607,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          ? EXACT                                            \
                          : ( EXACTFU == (X) || EXACTFU_SS == (X) )          \
                            ? EXACTFU                                        \
-                           : ( EXACTFA == (X) )                             \
-                             ? EXACTFA                                      \
+                           : ( EXACTFAA == (X) )                             \
+                             ? EXACTFAA                                      \
                              : ( EXACTL == (X) )                            \
                                ? EXACTL                                     \
                                : ( EXACTFLU8 == (X) )                        \
@@ -5447,7 +5482,9 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 /* Cannot expect anything... */
                 scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min += 1;
-               data->pos_delta += 1;
+                if (data->pos_delta != SSize_t_MAX) {
+                    data->pos_delta += 1;
+                }
                data->cur_is_floating = 1; /* float */
            }
        }
@@ -5514,6 +5551,27 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                           (regnode_charclass *) scan);
                    break;
 
+                case ANYOFM:
+                  {
+                    SV* cp_list = get_ANYOFM_contents(scan);
+
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_union(data->start_class,
+                                  cp_list,
+                                  FALSE /* don't invert */
+                                  );
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class,
+                                         cp_list,
+                                         FALSE /* don't invert */
+                                         );
+                    }
+
+                    SvREFCNT_dec_NN(cp_list);
+                    break;
+                  }
+
                case NPOSIXL:
                     invert = 1;
                     /* FALLTHROUGH */
@@ -5554,20 +5612,23 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     }
                     break;
 
+                case NASCII:
+                    invert = 1;
+                    /* FALLTHROUGH */
+               case ASCII:
+                    my_invlist = invlist_clone(PL_Posix_ptrs[_CC_ASCII]);
+
+                    /* This can be handled as a Posix class */
+                    goto join_posix_and_ascii;
+
                 case NPOSIXA:   /* For these, we always know the exact set of
                                    what's matched */
                     invert = 1;
                     /* FALLTHROUGH */
                case POSIXA:
-                    if (FLAGS(scan) == _CC_ASCII) {
-                        my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
-                    }
-                    else {
-                        _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
-                                              PL_XPosix_ptrs[_CC_ASCII],
-                                              &my_invlist);
-                    }
-                    goto join_posix;
+                    assert(FLAGS(scan) != _CC_ASCII);
+                    my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)]);
+                    goto join_posix_and_ascii;
 
                case NPOSIXD:
                case NPOSIXU:
@@ -5587,7 +5648,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                           &my_invlist);
                     }
 
-                  join_posix:
+                  join_posix_and_ascii:
 
                     if (flags & SCF_DO_STCLASS_AND) {
                         ssc_intersection(data->start_class, my_invlist, invert);
@@ -5941,8 +6002,12 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     data->cur_is_floating = 1; /* float */
             }
             min += min1;
-            if (delta != SSize_t_MAX)
-                delta += max1 - min1;
+            if (delta != SSize_t_MAX) {
+                if (SSize_t_MAX - (max1 - min1) >= delta)
+                    delta += max1 - min1;
+                else
+                    delta = SSize_t_MAX;
+            }
             if (flags & SCF_DO_STCLASS_OR) {
                 ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
                 if (min1) {
@@ -6451,13 +6516,13 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                  * it is properly null terminated or we will fail asserts
                  * later. In theory we probably shouldn't get such SV's,
                  * but if we do we should handle it gracefully. */
-                if ( SvTYPE(msv) != SVt_PV || (SvLEN(msv) > SvCUR(msv) && *(SvEND(msv)) == 0) ) {
+                if ( SvTYPE(msv) != SVt_PV || (SvLEN(msv) > SvCUR(msv) && *(SvEND(msv)) == 0) || SvIsCOW_shared_hash(msv) ) {
                     /* not a string, or a string with a trailing null */
                     pat = msv;
                 } else {
                     /* a string with no trailing null, we need to copy it
-                     * so it we have a trailing null */
-                    pat = newSVsv(msv);
+                     * so it has a trailing null */
+                    pat = sv_2mortal(newSVsv(msv));
                 }
             }
 
@@ -6893,18 +6958,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     /* Initialize these here instead of as-needed, as is quick and avoids
      * having to test them each time otherwise */
-    if (! PL_AboveLatin1) {
+    if (! PL_InBitmap) {
 #ifdef DEBUGGING
         char * dump_len_string;
 #endif
 
-       PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
-       PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
-       PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
-        PL_utf8_foldable = _new_invlist_C_array(_Perl_Any_Folds_invlist);
-        PL_HasMultiCharFold =
-                       _new_invlist_C_array(_Perl_Folds_To_Multi_Char_invlist);
-
         /* This is calculated here, because the Perl program that generates the
          * static global ones doesn't currently have access to
          * NUM_ANYOF_CODE_POINTS */
@@ -7033,6 +7091,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_seen_unfolded_sharp_s = 0;
     RExC_contains_locale = 0;
     RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
+    RExC_in_script_run = 0;
     RExC_study_started = 0;
     pRExC_state->runtime_code_qr = NULL;
     RExC_frame_head= NULL;
@@ -7171,14 +7230,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         at least some part of the pattern, and therefore must convert the whole
         thing.
         -- dmq */
-        if (flags & RESTART_PASS1) {
+        if (MUST_RESTART(flags)) {
             if (flags & NEED_UTF8) {
                 S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
                 pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
+                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo pass 1 after upgrade\n"));
             }
             else {
-                DEBUG_PARSE_r(Perl_re_printf( aTHX_
-                "Need to redo pass 1\n"));
+                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo pass 1\n"));
             }
 
             goto redo_first_pass;
@@ -8542,7 +8601,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
     PERL_UNUSED_CONTEXT;
     PERL_ARGS_ASSERT_INVLIST_SET_LEN;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     SvCUR_set(invlist,
               (len == 0)
@@ -8570,8 +8629,8 @@ S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
 
     PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
 
-    assert(SvTYPE(src) == SVt_INVLIST);
-    assert(SvTYPE(dest) == SVt_INVLIST);
+    assert(is_invlist(src));
+    assert(is_invlist(dest));
     assert(! invlist_is_iterating(src));
     assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
 
@@ -8606,7 +8665,7 @@ S_get_invlist_previous_index_addr(SV* invlist)
      * */
     PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     return &(((XINVLIST*) SvANY(invlist))->prev_index);
 }
@@ -8644,7 +8703,7 @@ S_invlist_trim(SV* invlist)
 
     PERL_ARGS_ASSERT_INVLIST_TRIM;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
 }
@@ -8654,7 +8713,7 @@ S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
 {
     PERL_ARGS_ASSERT_INVLIST_CLEAR;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     invlist_set_len(invlist, 0, 0);
     invlist_trim(invlist);
@@ -8680,7 +8739,7 @@ S_invlist_max(SV* const invlist)
 
     PERL_ARGS_ASSERT_INVLIST_MAX;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     /* Assumes worst case, in which the 0 element is not counted in the
      * inversion list, so subtracts 1 for that */
@@ -8711,7 +8770,7 @@ Perl__new_invlist(pTHX_ IV initial_size)
     invlist_set_len(new_list, 0, 0);
 
     /* Force iterinit() to be used to get iteration to work */
-    *get_invlist_iter_addr(new_list) = (STRLEN) UV_MAX;
+    invlist_iterfinish(new_list);
 
     *get_invlist_previous_index_addr(new_list) = 0;
 
@@ -8781,7 +8840,7 @@ S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
 
     PERL_ARGS_ASSERT_INVLIST_EXTEND;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     /* Add one to account for the zero element at the beginning which may not
      * be counted by the calling parameters */
@@ -9105,7 +9164,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
 
     PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
     assert(a != b);
-    assert(*output == NULL || SvTYPE(*output) == SVt_INVLIST);
+    assert(*output == NULL || is_invlist(*output));
 
     len_b = _invlist_len(b);
     if (len_b == 0) {
@@ -9383,7 +9442,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
 
     PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
     assert(a != b);
-    assert(*i == NULL || SvTYPE(*i) == SVt_INVLIST);
+    assert(*i == NULL || is_invlist(*i));
 
     /* Special case if either one is empty */
     len_a = (a == NULL) ? 0 : _invlist_len(a);
@@ -9926,7 +9985,7 @@ S_get_invlist_iter_addr(SV* invlist)
 
     PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     return &(((XINVLIST*) SvANY(invlist))->iterator);
 }
@@ -10122,23 +10181,6 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
     }
 }
 
-void
-Perl__load_PL_utf8_foldclosures (pTHX)
-{
-    assert(! PL_utf8_foldclosures);
-
-    /* If the folds haven't been read in, call a fold function
-     * to force that */
-    if (! PL_utf8_tofold) {
-        U8 dummy[UTF8_MAXBYTES_CASE+1];
-        const U8 hyphen[] = HYPHEN_UTF8;
-
-        /* This string is just a short named one above \xff */
-        toFOLD_utf8_safe(hyphen, hyphen + sizeof(hyphen) - 1, dummy, NULL);
-        assert(PL_utf8_tofold); /* Verify that worked */
-    }
-    PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
-}
 #endif
 
 #if defined(PERL_ARGS_ASSERT__INVLISTEQ) && !defined(PERL_IN_XSUB_RE)
@@ -10239,8 +10281,8 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             /* Some characters match above-Latin1 ones under /i.  This
              * is true of EXACTFL ones when the locale is UTF-8 */
             if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
-                && (! isASCII(uc) || (OP(node) != EXACTFA
-                                    && OP(node) != EXACTFA_NO_TRIE)))
+                && (! isASCII(uc) || (OP(node) != EXACTFAA
+                                    && OP(node) != EXACTFAA_NO_TRIE)))
             {
                 add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
             }
@@ -10248,11 +10290,10 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
     }
     else {  /* Pattern is UTF-8 */
         U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
-        STRLEN foldlen = UTF8SKIP(s);
         const U8* e = s + bytelen;
-        SV** listp;
+        IV fc;
 
-        uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+        fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
 
         /* The only code points that aren't folded in a UTF EXACTFish
          * node are are the problematic ones in EXACTFL nodes */
@@ -10264,14 +10305,21 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             U8 *d = folded;
             int i;
 
+            fc = -1;
             for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
                 if (isASCII(*s)) {
                     *(d++) = (U8) toFOLD(*s);
+                    if (fc < 0) {       /* Save the first fold */
+                        fc = *(d-1);
+                    }
                     s++;
                 }
                 else {
                     STRLEN len;
-                    toFOLD_utf8_safe(s, e, d, &len);
+                    UV fold = toFOLD_utf8_safe(s, e, d, &len);
+                    if (fc < 0) {       /* Save the first fold */
+                        fc = fold;
+                    }
                     d += len;
                     s += UTF8SKIP(s);
                 }
@@ -10280,15 +10328,13 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             /* And set up so the code below that looks in this folded
              * buffer instead of the node's string */
             e = d;
-            foldlen = UTF8SKIP(folded);
             s = folded;
         }
 
         /* When we reach here 's' points to the fold of the first
          * character(s) of the node; and 'e' points to far enough along
          * the folded string to be just past any possible multi-char
-         * fold. 'foldlen' is the length in bytes of the first
-         * character in 's'
+         * fold.
          *
          * Unlike the non-UTF-8 case, the macro for determining if a
          * string is a multi-char fold requires all the characters to
@@ -10301,33 +10347,29 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
         }
         else {  /* Single char fold */
-
-            /* It matches all the things that fold to it, which are
-             * found in PL_utf8_foldclosures (including itself) */
-            invlist = add_cp_to_invlist(invlist, uc);
-            if (! PL_utf8_foldclosures)
-                _load_PL_utf8_foldclosures();
-            if ((listp = hv_fetch(PL_utf8_foldclosures,
-                                (char *) s, foldlen, FALSE)))
-            {
-                AV* list = (AV*) *listp;
-                IV k;
-                for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
-                    SV** c_p = av_fetch(list, k, FALSE);
-                    UV c;
-                    assert(c_p);
-
-                    c = SvUV(*c_p);
-
-                    /* /aa doesn't allow folds between ASCII and non- */
-                    if ((OP(node) == EXACTFA || OP(node) == EXACTFA_NO_TRIE)
-                        && isASCII(c) != isASCII(uc))
-                    {
-                        continue;
-                    }
-
-                    invlist = add_cp_to_invlist(invlist, c);
+            unsigned int k;
+            unsigned int first_folds_to;
+            const unsigned int * remaining_folds_to_list;
+            Size_t folds_to_count;
+
+            /* It matches itself */
+            invlist = add_cp_to_invlist(invlist, fc);
+
+            /* ... plus all the things that fold to it, which are found in
+             * PL_utf8_foldclosures */
+            folds_to_count = _inverse_folds(fc, &first_folds_to,
+                                                &remaining_folds_to_list);
+            for (k = 0; k < folds_to_count; k++) {
+                UV c = (k == 0) ? first_folds_to : remaining_folds_to_list[k-1];
+
+                /* /aa doesn't allow folds between ASCII and non- */
+                if (   (OP(node) == EXACTFAA || OP(node) == EXACTFAA_NO_TRIE)
+                    && isASCII(c) != isASCII(fc))
+                {
+                    continue;
                 }
+
+                invlist = add_cp_to_invlist(invlist, c);
             }
         }
     }
@@ -10629,7 +10671,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
      * RExC_parse beyond the '('.  Things like '(?' are indivisible tokens, and
      * this flag alerts us to the need to check for that */
 {
-    regnode *ret;              /* Will be the head of the group. */
+    regnode *ret = NULL;    /* Will be the head of the group. */
     regnode *br;
     regnode *lastbr;
     regnode *ender = NULL;
@@ -10667,47 +10709,81 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
          * here (if paren ==2).  The forms '(*VERB' and '(?...' disallow such
          * intervening space, as the sequence is a token, and a token should be
          * indivisible */
-        bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
+        bool has_intervening_patws = (paren == 2)
+                                  && *(RExC_parse - 1) != '(';
 
         if (RExC_parse >= RExC_end) {
            vFAIL("Unmatched (");
         }
 
-        if ( *RExC_parse == '*') { /* (*VERB:ARG) */
+        if (paren == 'r') {     /* Atomic script run */
+            paren = '>';
+            goto parse_rest;
+        }
+        else if ( *RExC_parse == '*') { /* (*VERB:ARG), (*construct:...) */
            char *start_verb = RExC_parse + 1;
            STRLEN verb_len;
            char *start_arg = NULL;
            unsigned char op = 0;
             int arg_required = 0;
             int internal_argval = -1; /* if >-1 we are not allowed an argument*/
+            bool has_upper = FALSE;
 
             if (has_intervening_patws) {
                 RExC_parse++;   /* past the '*' */
-                vFAIL("In '(*VERB...)', the '(' and '*' must be adjacent");
+
+                /* For strict backwards compatibility, don't change the message
+                 * now that we also have lowercase operands */
+                if (isUPPER(*RExC_parse)) {
+                    vFAIL("In '(*VERB...)', the '(' and '*' must be adjacent");
+                }
+                else {
+                    vFAIL("In '(*...)', the '(' and '*' must be adjacent");
+                }
             }
            while (RExC_parse < RExC_end && *RExC_parse != ')' ) {
                if ( *RExC_parse == ':' ) {
                    start_arg = RExC_parse + 1;
                    break;
                }
-               RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                else if (! UTF) {
+                    if (isUPPER(*RExC_parse)) {
+                        has_upper = TRUE;
+                    }
+                    RExC_parse++;
+                }
+                else {
+                    RExC_parse += UTF8SKIP(RExC_parse);
+                }
            }
            verb_len = RExC_parse - start_verb;
            if ( start_arg ) {
                 if (RExC_parse >= RExC_end) {
                     goto unterminated_verb_pattern;
                 }
+
                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-               while ( RExC_parse < RExC_end && *RExC_parse != ')' )
+               while ( RExC_parse < RExC_end && *RExC_parse != ')' ) {
                     RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-               if ( RExC_parse >= RExC_end || *RExC_parse != ')' )
+                }
+               if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
                   unterminated_verb_pattern:
-                   vFAIL("Unterminated verb pattern argument");
-               if ( RExC_parse == start_arg )
-                   start_arg = NULL;
+                    if (has_upper) {
+                        vFAIL("Unterminated verb pattern argument");
+                    }
+                    else {
+                        vFAIL("Unterminated '(*...' argument");
+                    }
+                }
            } else {
-               if ( RExC_parse >= RExC_end || *RExC_parse != ')' )
-                   vFAIL("Unterminated verb pattern");
+               if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
+                    if (has_upper) {
+                        vFAIL("Unterminated verb pattern");
+                    }
+                    else {
+                        vFAIL("Unterminated '(*...' construct");
+                    }
+                }
            }
 
             /* Here, we know that RExC_parse < RExC_end */
@@ -10750,13 +10826,183 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     RExC_seen |= REG_CUTGROUP_SEEN;
                 }
                 break;
-           }
+            case 'a':
+                if (   memEQs(start_verb, verb_len, "asr")
+                    || memEQs(start_verb, verb_len, "atomic_script_run"))
+                {
+                    paren = 'r';        /* Mnemonic: recursed run */
+                    goto script_run;
+                }
+                else if (memEQs(start_verb, verb_len, "atomic")) {
+                    paren = 't';    /* AtOMIC */
+                    goto alpha_assertions;
+                }
+                break;
+            case 'p':
+                if (   memEQs(start_verb, verb_len, "plb")
+                    || memEQs(start_verb, verb_len, "positive_lookbehind"))
+                {
+                    paren = 'b';
+                    goto lookbehind_alpha_assertions;
+                }
+                else if (   memEQs(start_verb, verb_len, "pla")
+                         || memEQs(start_verb, verb_len, "positive_lookahead"))
+                {
+                    paren = 'a';
+                    goto alpha_assertions;
+                }
+                break;
+            case 'n':
+                if (   memEQs(start_verb, verb_len, "nlb")
+                    || memEQs(start_verb, verb_len, "negative_lookbehind"))
+                {
+                    paren = 'B';
+                    goto lookbehind_alpha_assertions;
+                }
+                else if (   memEQs(start_verb, verb_len, "nla")
+                         || memEQs(start_verb, verb_len, "negative_lookahead"))
+                {
+                    paren = 'A';
+                    goto alpha_assertions;
+                }
+                break;
+            case 's':
+                if (   memEQs(start_verb, verb_len, "sr")
+                    || memEQs(start_verb, verb_len, "script_run"))
+                {
+                    regnode * atomic;
+
+                    paren = 's';
+
+                   script_run:
+
+                    /* This indicates Unicode rules. */
+                    REQUIRE_UNI_RULES(flagp, NULL);
+
+                    if (! start_arg) {
+                        goto no_colon;
+                    }
+
+                    RExC_parse = start_arg;
+
+                    if (RExC_in_script_run) {
+
+                        /*  Nested script runs are treated as no-ops, because
+                         *  if the nested one fails, the outer one must as
+                         *  well.  It could fail sooner, and avoid (??{} with
+                         *  side effects, but that is explicitly documented as
+                         *  undefined behavior. */
+
+                        ret = NULL;
+
+                        if (paren == 's') {
+                            paren = ':';
+                            goto parse_rest;
+                        }
+
+                        /* But, the atomic part of a nested atomic script run
+                         * isn't a no-op, but can be treated just like a '(?>'
+                         * */
+                        paren = '>';
+                        goto parse_rest;
+                    }
+
+                    /* By doing this here, we avoid extra warnings for nested
+                     * script runs */
+                    if (PASS2) {
+                        Perl_ck_warner_d(aTHX_
+                            packWARN(WARN_EXPERIMENTAL__SCRIPT_RUN),
+                            "The script_run feature is experimental"
+                            REPORT_LOCATION, REPORT_LOCATION_ARGS(RExC_parse));
+
+                    }
+
+                    if (paren == 's') {
+                        /* Here, we're starting a new regular script run */
+                        ret = reg_node(pRExC_state, SROPEN);
+                        RExC_in_script_run = 1;
+                        is_open = 1;
+                        goto parse_rest;
+                    }
+
+                    /* Here, we are starting an atomic script run.  This is
+                     * handled by recursing to deal with the atomic portion
+                     * separately, enclosed in SROPEN ... SRCLOSE nodes */
+
+                    ret = reg_node(pRExC_state, SROPEN);
+
+                    RExC_in_script_run = 1;
+
+                    atomic = reg(pRExC_state, 'r', &flags, depth);
+                    if (flags & (RESTART_PASS1|NEED_UTF8)) {
+                        *flagp = flags & (RESTART_PASS1|NEED_UTF8);
+                        return NULL;
+                    }
+
+                    REGTAIL(pRExC_state, ret, atomic);
+
+                    REGTAIL(pRExC_state, atomic,
+                           reg_node(pRExC_state, SRCLOSE));
+
+                    RExC_in_script_run = 0;
+                    return ret;
+                }
+
+                break;
+
+            lookbehind_alpha_assertions:
+                RExC_seen |= REG_LOOKBEHIND_SEEN;
+                RExC_in_lookbehind++;
+                /*FALLTHROUGH*/
+
+            alpha_assertions:
+
+                if (PASS2) {
+                    Perl_ck_warner_d(aTHX_
+                        packWARN(WARN_EXPERIMENTAL__ALPHA_ASSERTIONS),
+                        "The alpha_assertions feature is experimental"
+                        REPORT_LOCATION, REPORT_LOCATION_ARGS(RExC_parse));
+                }
+
+                RExC_seen_zerolen++;
+
+                if (! start_arg) {
+                    goto no_colon;
+                }
+
+                /* An empty negative lookahead assertion simply is failure */
+                if (paren == 'A' && RExC_parse == start_arg) {
+                    ret=reganode(pRExC_state, OPFAIL, 0);
+                    nextchar(pRExC_state);
+                    return ret;
+               }
+
+                RExC_parse = start_arg;
+                goto parse_rest;
+
+              no_colon:
+                vFAIL2utf8f(
+                "'(*%" UTF8f "' requires a terminating ':'",
+                UTF8fARG(UTF, verb_len, start_verb));
+               NOT_REACHED; /*NOTREACHED*/
+
+           } /* End of switch */
            if ( ! op ) {
                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-                vFAIL2utf8f(
+                if (has_upper || verb_len == 0) {
+                    vFAIL2utf8f(
                     "Unknown verb pattern '%" UTF8f "'",
                     UTF8fARG(UTF, verb_len, start_verb));
+                }
+                else {
+                    vFAIL2utf8f(
+                    "Unknown '(*...)' construct '%" UTF8f "'",
+                    UTF8fARG(UTF, verb_len, start_verb));
+                }
            }
+            if ( RExC_parse == start_arg ) {
+                start_arg = NULL;
+            }
             if ( arg_required && !start_arg ) {
                 vFAIL3("Verb pattern '%.*s' has a mandatory argument",
                     verb_len, start_verb);
@@ -10916,6 +11162,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    paren = 1;
                    goto capturing_parens;
                }
+
                 RExC_seen |= REG_LOOKBEHIND_SEEN;
                RExC_in_lookbehind++;
                RExC_parse++;
@@ -10998,6 +11245,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         RExC_parse++;
                         is_neg = TRUE;
                     }
+                    endptr = RExC_end;
                     if (grok_atoUV(RExC_parse, &unum, &endptr)
                         && unum <= I32_MAX
                     ) {
@@ -11146,32 +11394,51 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            {
                int is_define= 0;
                 const int DEFINE_len = sizeof("DEFINE") - 1;
-               if (RExC_parse[0] == '?') {        /* (?(?...)) */
-                    if (   RExC_parse < RExC_end - 1
-                        && (   RExC_parse[1] == '='
-                            || RExC_parse[1] == '!'
-                            || RExC_parse[1] == '<'
-                            || RExC_parse[1] == '{')
-                    ) { /* Lookahead or eval. */
-                       I32 flag;
-                        regnode *tail;
-
-                       ret = reg_node(pRExC_state, LOGICAL);
-                       if (!SIZE_ONLY)
-                           ret->flags = 1;
-
-                        tail = reg(pRExC_state, 1, &flag, depth+1);
-                        if (flag & (RESTART_PASS1|NEED_UTF8)) {
-                            *flagp = flag & (RESTART_PASS1|NEED_UTF8);
-                            return NULL;
-                        }
-                        REGTAIL(pRExC_state, ret, tail);
-                       goto insert_if;
-                   }
-                   /* Fall through to â€˜Unknown switch condition’ at the
-                      end of the if/else chain. */
-               }
-               else if ( RExC_parse[0] == '<'     /* (?(<NAME>)...) */
+               if (    RExC_parse < RExC_end - 1
+                    && (   (       RExC_parse[0] == '?'        /* (?(?...)) */
+                            && (   RExC_parse[1] == '='
+                                || RExC_parse[1] == '!'
+                                || RExC_parse[1] == '<'
+                                || RExC_parse[1] == '{'))
+                       || (       RExC_parse[0] == '*'        /* (?(*...)) */
+                            && (   memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "pla:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "plb:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "nla:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "nlb:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "positive_lookahead:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "positive_lookbehind:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "negative_lookahead:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "negative_lookbehind:"))))
+                ) { /* Lookahead or eval. */
+                    I32 flag;
+                    regnode *tail;
+
+                    ret = reg_node(pRExC_state, LOGICAL);
+                    if (!SIZE_ONLY)
+                        ret->flags = 1;
+
+                    tail = reg(pRExC_state, 1, &flag, depth+1);
+                    RETURN_NULL_ON_RESTART(flag,flagp);
+                    REGTAIL(pRExC_state, ret, tail);
+                    goto insert_if;
+                }
+               else if (   RExC_parse[0] == '<'     /* (?(<NAME>)...) */
                         || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
                {
                    char ch = RExC_parse[0] == '<' ? '>' : '\'';
@@ -11217,6 +11484,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     }
                     else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                         UV uv;
+                        endptr = RExC_end;
                         if (grok_atoUV(RExC_parse, &uv, &endptr)
                             && uv <= I32_MAX
                         ) {
@@ -11252,6 +11520,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     /* (?(1)...) */
                    char c;
                     UV uv;
+                    endptr = RExC_end;
                     if (grok_atoUV(RExC_parse, &uv, &endptr)
                         && uv <= I32_MAX
                     ) {
@@ -11273,10 +11542,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
                     br = regbranch(pRExC_state, &flags, 1,depth+1);
                    if (br == NULL) {
-                        if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                            *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                            return NULL;
-                        }
+                        RETURN_NULL_ON_RESTART(flags,flagp);
                         FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                               (UV) flags);
                     } else
@@ -11294,10 +11560,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         lastbr = reganode(pRExC_state, IFTHEN, 0);
 
                         if (!regbranch(pRExC_state, &flags, 1,depth+1)) {
-                            if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                                return NULL;
-                            }
+                            RETURN_NULL_ON_RESTART(flags,flagp);
                             FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                   (UV) flags);
                         }
@@ -11332,7 +11595,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 vFAIL("Unknown switch condition (?(...))");
            }
            case '[':           /* (?[ ... ]) */
-                return handle_regex_sets(pRExC_state, NULL, flagp, depth,
+                return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
                                          oregcomp_parse);
             case 0: /* A NUL */
                RExC_parse--; /* for vFAIL to print correctly */
@@ -11354,7 +11617,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 goto parse_rest;
             } /* end switch */
        }
-       else if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
+       else {
+            if (*RExC_parse == '{' && PASS2) {
+                ckWARNregdep(RExC_parse + 1,
+                            "Unescaped left brace in regex is "
+                            "deprecated here (and will be fatal "
+                            "in Perl 5.32), passed through");
+            }
+            /* Not bothering to indent here, as the above 'else' is temporary
+             * */
+        if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
          capturing_parens:
            parno = RExC_npar;
            RExC_npar++;
@@ -11380,6 +11652,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             paren = ':';
            ret = NULL;
        }
+        }
     }
     else                        /* ! paren */
        ret = NULL;
@@ -11392,10 +11665,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     /*     branch_len = (paren != 0); */
 
     if (br == NULL) {
-        if (flags & (RESTART_PASS1|NEED_UTF8)) {
-            *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-            return NULL;
-        }
+        RETURN_NULL_ON_RESTART(flags,flagp);
         FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
     }
     if (*RExC_parse == '|') {
@@ -11439,10 +11709,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
         br = regbranch(pRExC_state, &flags, 0, depth+1);
 
        if (br == NULL) {
-            if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                return NULL;
-            }
+            RETURN_NULL_ON_RESTART(flags,flagp);
             FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
         }
         REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
@@ -11469,12 +11736,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             Set_Node_Offset(ender,RExC_parse+1); /* MJD */
             Set_Node_Length(ender,1); /* MJD */
            break;
+       case 's':
+           ender = reg_node(pRExC_state, SRCLOSE);
+            RExC_in_script_run = 0;
+           break;
        case '<':
+        case 'a':
+        case 'A':
+        case 'b':
+        case 'B':
        case ',':
        case '=':
        case '!':
            *flagp &= ~HASWIDTH;
            /* FALLTHROUGH */
+        case 't':   /* aTomic */
        case '>':
            ender = reg_node(pRExC_state, SUCCEED);
            break;
@@ -11560,14 +11836,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
     {
         const char *p;
-        static const char parens[] = "=!<,>";
+         /* Even/odd or x=don't care: 010101x10x */
+        static const char parens[] = "=!aA<,>Bbt";
+         /* flag below is set to 0 up through 'A'; 1 for larger */
 
        if (paren && (p = strchr(parens, paren))) {
            U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
-           int flag = (p - parens) > 1;
+           int flag = (p - parens) > 3;
 
-           if (paren == '>')
+           if (paren == '>' || paren == 't') {
                node = SUSPEND, flag = 0;
+            }
+
            reginsert(pRExC_state, node,ret, depth+1);
             Set_Node_Cur_Length(ret, parse_start);
            Set_Node_Offset(ret, parse_start + 1);
@@ -11653,10 +11933,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
        if (latest == NULL) {
            if (flags & TRYAGAIN)
                continue;
-            if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                return NULL;
-            }
+            RETURN_NULL_ON_RESTART(flags,flagp);
             FAIL2("panic: regpiece returned NULL, flags=%#" UVxf, (UV) flags);
        }
        else if (ret == NULL)
@@ -11726,11 +12003,8 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
     ret = regatom(pRExC_state, &flags,depth+1);
     if (ret == NULL) {
-       if (flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8))
-           *flagp |= flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8);
-        else
-            FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
-       return(NULL);
+        RETURN_NULL_ON_RESTART_OR_FLAGS(flags,flagp,TRYAGAIN);
+        FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
     }
 
     op = *RExC_parse;
@@ -11756,6 +12030,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                maxpos = next;
            RExC_parse++;
             if (isDIGIT(*RExC_parse)) {
+                endptr = RExC_end;
                 if (!grok_atoUV(RExC_parse, &uv, &endptr))
                     vFAIL("Invalid quantifier in {,}");
                 if (uv >= REG_INFTY)
@@ -11769,6 +12044,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            else
                maxpos = RExC_parse;
             if (isDIGIT(*maxpos)) {
+                endptr = RExC_end;
                 if (!grok_atoUV(maxpos, &uv, &endptr))
                     vFAIL("Invalid quantifier in {,}");
                 if (uv >= REG_INFTY)
@@ -11969,8 +12245,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * *node_p, nor *code_point_p, nor *flagp.
   *
   * If <cp_count> is not NULL, the caller wants to know the length (in code
-  * points) that this \N sequence matches.  This is set even if the function
-  * returns FALSE, as detailed below.
+  * points) that this \N sequence matches.  This is set, and the input is
+  * parsed for errors, even if the function returns FALSE, as detailed below.
   *
   * There are 5 possibilities here, as detailed in the next 5 paragraphs.
   *
@@ -12018,10 +12294,14 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   */
 
     char * endbrace;    /* points to '}' following the name */
-    char *endchar;     /* Points to '.' or '}' ending cur char in the input
-                           stream */
     char* p = RExC_parse; /* Temporary */
 
+    SV * substitute_parse = NULL;
+    char *orig_end;
+    char *save_start;
+    I32 flags;
+    Size_t count = 0;   /* code point count kept internally by this function */
+
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_GROK_BSLASH_N;
@@ -12045,48 +12325,38 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      * [^\n].  The latter is assumed when the {...} following the \N is a legal
      * quantifier, or there is no '{' at all */
     if (*p != '{' || regcurly(p)) {
-       RExC_parse = p;
+        RExC_parse = p;
         if (cp_count) {
             *cp_count = -1;
         }
 
-       if (! node_p) {
+        if (! node_p) {
             return FALSE;
         }
 
-       *node_p = reg_node(pRExC_state, REG_ANY);
-       *flagp |= HASWIDTH|SIMPLE;
-       MARK_NAUGHTY(1);
+        *node_p = reg_node(pRExC_state, REG_ANY);
+        *flagp |= HASWIDTH|SIMPLE;
+        MARK_NAUGHTY(1);
         Set_Node_Length(*node_p, 1); /* MJD */
-       return TRUE;
+        return TRUE;
     }
 
-    /* Here, we have decided it should be a named character or sequence */
-
     /* The test above made sure that the next real character is a '{', but
      * under the /x modifier, it could be separated by space (or a comment and
      * \n) and this is not allowed (for consistency with \x{...} and the
      * tokenizer handling of \N{NAME}). */
     if (*RExC_parse != '{') {
-       vFAIL("Missing braces on \\N{}");
+        vFAIL("Missing braces on \\N{}");
     }
 
-    RExC_parse++;      /* Skip past the '{' */
+    RExC_parse++;       /* Skip past the '{' */
 
     endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
     if (! endbrace) { /* no trailing brace */
         vFAIL2("Missing right brace on \\%c{}", 'N');
     }
-    else if (!(   endbrace == RExC_parse       /* nothing between the {} */
-               || memBEGINs(RExC_parse,   /* U+ (bad hex is checked below
-                                                   for a  better error msg) */
-                                  (STRLEN) (RExC_end - RExC_parse),
-                                 "U+")))
-    {
-       RExC_parse = endbrace;  /* position msg's '<--HERE' */
-       vFAIL("\\N{NAME} must be resolved by the lexer");
-    }
 
+    /* Here, we have decided it should be a named character or sequence */
     REQUIRE_UNI_RULES(flagp, FALSE); /* Unicode named chars imply Unicode
                                         semantics */
 
@@ -12099,7 +12369,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
             *cp_count = 0;
         }
         nextchar(pRExC_state);
-       if (! node_p) {
+        if (! node_p) {
             return FALSE;
         }
 
@@ -12107,159 +12377,183 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         return TRUE;
     }
 
-    RExC_parse += 2;   /* Skip past the 'U+' */
+    /* If we haven't got something that begins with 'U+', then it didn't get lexed. */
+    if (   endbrace - RExC_parse < 2
+        || strnNE(RExC_parse, "U+", 2))
+    {
+        RExC_parse = endbrace;  /* position msg's '<--HERE' */
+        vFAIL("\\N{NAME} must be resolved by the lexer");
+    }
 
-    /* Because toke.c has generated a special construct for us guaranteed not
-     * to have NULs, we can use a str function */
-    endchar = RExC_parse + strcspn(RExC_parse, ".}");
+        /* This code purposely indented below because of future changes coming */
 
-    /* Code points are separated by dots.  If none, there is only one code
-     * point, and is terminated by the brace */
+        /* We can get to here when the input is \N{U+...} or when toke.c has
+         * converted a name to the \N{U+...} form.  This include changing a
+         * name that evaluates to multiple code points to \N{U+c1.c2.c3 ...} */
 
-    if (endchar >= endbrace) {
-       STRLEN length_of_hex;
-       I32 grok_hex_flags;
+        RExC_parse += 2;    /* Skip past the 'U+' */
 
-        /* Here, exactly one code point.  If that isn't what is wanted, fail */
-        if (! code_point_p) {
-            RExC_parse = p;
-            return FALSE;
-        }
+        /* Code points are separated by dots.  The '}' terminates the whole
+         * thing. */
 
-        /* Convert code point from hex */
-       length_of_hex = (STRLEN)(endchar - RExC_parse);
-       grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
-                       | PERL_SCAN_DISALLOW_PREFIX
-
-                           /* No errors in the first pass (See [perl
-                            * #122671].)  We let the code below find the
-                            * errors when there are multiple chars. */
-                       | ((SIZE_ONLY)
-                          ? PERL_SCAN_SILENT_ILLDIGIT
-                          : 0);
-
-        /* This routine is the one place where both single- and double-quotish
-         * \N{U+xxxx} are evaluated.  The value is a Unicode code point which
-         * must be converted to native. */
-       *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
-                                               &length_of_hex,
-                                               &grok_hex_flags,
-                                               NULL));
-
-       /* The tokenizer should have guaranteed validity, but it's possible to
-         * bypass it by using single quoting, so check.  Don't do the check
-         * here when there are multiple chars; we do it below anyway. */
-        if (length_of_hex == 0
-            || length_of_hex != (STRLEN)(endchar - RExC_parse) )
-        {
-            RExC_parse += length_of_hex;       /* Includes all the valid */
-            RExC_parse += (RExC_orig_utf8)     /* point to after 1st invalid */
-                            ? UTF8SKIP(RExC_parse)
-                            : 1;
-            /* Guard against malformed utf8 */
-            if (RExC_parse >= endchar) {
-                RExC_parse = endchar;
+        do {    /* Loop until the ending brace */
+            UV cp = 0;
+            char * start_digit;     /* The first of the current code point */
+            if (! isXDIGIT(*RExC_parse)) {
+                RExC_parse++;
+                vFAIL("Invalid hexadecimal number in \\N{U+...}");
             }
-            vFAIL("Invalid hexadecimal number in \\N{U+...}");
-        }
 
-        RExC_parse = endbrace + 1;
-        return TRUE;
-    }
-    else {  /* Is a multiple character sequence */
-       SV * substitute_parse;
-       STRLEN len;
-       char *orig_end = RExC_end;
-       char *save_start = RExC_start;
-        I32 flags;
+            start_digit = RExC_parse;
+            count++;
 
-        /* Count the code points, if desired, in the sequence */
-        if (cp_count) {
-            *cp_count = 0;
-            while (RExC_parse < endbrace) {
-                /* Point to the beginning of the next character in the sequence. */
-                RExC_parse = endchar + 1;
-                endchar = RExC_parse + strcspn(RExC_parse, ".}");
-                (*cp_count)++;
+            /* Loop through the hex digits of the current code point */
+            do {
+                /* Adding this digit will shift the result 4 bits.  If that
+                 * result would be above the legal max, it's overflow */
+                if (cp > MAX_LEGAL_CP >> 4) {
+
+                    /* Find the end of the code point */
+                    do {
+                        RExC_parse ++;
+                    } while (isXDIGIT(*RExC_parse) || *RExC_parse == '_');
+
+                    /* Be sure to synchronize this message with the similar one
+                     * in utf8.c */
+                    vFAIL4("Use of code point 0x%.*s is not allowed; the"
+                        " permissible max is 0x%" UVxf,
+                        (int) (RExC_parse - start_digit), start_digit,
+                        MAX_LEGAL_CP);
+                }
+
+                /* Accumulate this (valid) digit into the running total */
+                cp  = (cp << 4) + READ_XDIGIT(RExC_parse);
+
+                /* READ_XDIGIT advanced the input pointer.  Ignore a single
+                 * underscore separator */
+                if (*RExC_parse == '_' && isXDIGIT(RExC_parse[1])) {
+                    RExC_parse++;
+                }
+            } while (isXDIGIT(*RExC_parse));
+
+            /* Here, have accumulated the next code point */
+            if (RExC_parse >= endbrace) {   /* If done ... */
+                if (count != 1) {
+                    goto do_concat;
+                }
+
+                /* Here, is a single code point; fail if doesn't want that */
+                if (! code_point_p) {
+                    RExC_parse = p;
+                    return FALSE;
+                }
+
+                /* A single code point is easy to handle; just return it */
+                *code_point_p = UNI_TO_NATIVE(cp);
+                RExC_parse = endbrace;
+                nextchar(pRExC_state);
+                return TRUE;
             }
-        }
 
-        /* Fail if caller doesn't want to handle a multi-code-point sequence.
-         * But don't backup up the pointer if the caller wants to know how many
-         * code points there are (they can then handle things) */
-        if (! node_p) {
-            if (! cp_count) {
-                RExC_parse = p;
+            /* Here, the only legal thing would be a multiple character
+             * sequence (of the form "\N{U+c1.c2. ... }".   So the next
+             * character must be a dot (and the one after that can't be the
+             * endbrace, or we'd have something like \N{U+100.} ) */
+            if (*RExC_parse != '.' || RExC_parse + 1 >= endbrace) {
+                RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
+                                ? UTF8SKIP(RExC_parse)
+                                : 1;
+                if (RExC_parse >= endbrace) { /* Guard against malformed utf8 */
+                    RExC_parse = endbrace;
+                }
+                vFAIL("Invalid hexadecimal number in \\N{U+...}");
             }
-            return FALSE;
-        }
 
-       /* What is done here is to convert this to a sub-pattern of the form
-         * \x{char1}\x{char2}...  and then call reg recursively to parse it
-         * (enclosing in "(?: ... )" ).  That way, it retains its atomicness,
-         * while not having to worry about special handling that some code
-         * points may have. */
+            /* Here, looks like its really a multiple character sequence.  Fail
+             * if that's not what the caller wants.  But continue with counting
+             * and error checking if they still want a count */
+            if (! node_p && ! cp_count) {
+                return FALSE;
+            }
 
-       substitute_parse = newSVpvs("?:");
+            /* What is done here is to convert this to a sub-pattern of the
+             * form \x{char1}\x{char2}...  and then call reg recursively to
+             * parse it (enclosing in "(?: ... )" ).  That way, it retains its
+             * atomicness, while not having to worry about special handling
+             * that some code points may have.  We don't create a subpattern,
+             * but go through the motions of code point counting and error
+             * checking, if the caller doesn't want a node returned. */
 
-       while (RExC_parse < endbrace) {
+            if (node_p && count == 1) {
+                substitute_parse = newSVpvs("?:");
+            }
 
-           /* Convert to notation the rest of the code understands */
-           sv_catpv(substitute_parse, "\\x{");
-           sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
-           sv_catpv(substitute_parse, "}");
+          do_concat:
 
-           /* Point to the beginning of the next character in the sequence. */
-           RExC_parse = endchar + 1;
-           endchar = RExC_parse + strcspn(RExC_parse, ".}");
+            if (node_p) {
+                /* Convert to notation the rest of the code understands */
+                sv_catpvs(substitute_parse, "\\x{");
+                sv_catpvn(substitute_parse, start_digit,
+                                            RExC_parse - start_digit);
+                sv_catpvs(substitute_parse, "}");
+            }
 
-       }
-        sv_catpv(substitute_parse, ")");
+            /* Move to after the dot (or ending brace the final time through.)
+             * */
+            RExC_parse++;
+            count++;
 
-        len = SvCUR(substitute_parse);
+        } while (RExC_parse < endbrace);
 
-       /* Don't allow empty number */
-       if (len < (STRLEN) 8) {
-            RExC_parse = endbrace;
-           vFAIL("Invalid hexadecimal number in \\N{U+...}");
-       }
+        if (! node_p) { /* Doesn't want the node */
+            assert (cp_count);
 
-        RExC_parse = RExC_start = RExC_adjusted_start
-                                              = SvPV_nolen(substitute_parse);
-       RExC_end = RExC_parse + len;
+            *cp_count = count;
+            return FALSE;
+        }
+
+        sv_catpvs(substitute_parse, ")");
 
-        /* The values are Unicode, and therefore not subject to recoding, but
-         * have to be converted to native on a non-Unicode (meaning non-ASCII)
-         * platform. */
 #ifdef EBCDIC
+        /* The values are Unicode, and therefore have to be converted to native
+         * on a non-Unicode (meaning non-ASCII) platform. */
         RExC_recode_x_to_native = 1;
 #endif
 
-        *node_p = reg(pRExC_state, 1, &flags, depth+1);
+    /* Here, we have the string the name evaluates to, ready to be parsed,
+     * stored in 'substitute_parse' as a series of valid "\x{...}\x{...}"
+     * constructs.  This can be called from within a substitute parse already.
+     * The error reporting mechanism doesn't work for 2 levels of this, but the
+     * code above has validated this new construct, so there should be no
+     * errors generated by the below.*/
+    save_start = RExC_start;
+    orig_end = RExC_end;
 
-        /* Restore the saved values */
-       RExC_start = RExC_adjusted_start = save_start;
-       RExC_parse = endbrace;
-       RExC_end = orig_end;
+    RExC_parse = RExC_start = SvPVX(substitute_parse);
+    RExC_end = RExC_parse + SvCUR(substitute_parse);
+
+    *node_p = reg(pRExC_state, 1, &flags, depth+1);
+
+    /* Restore the saved values */
+    RExC_start = save_start;
+    RExC_parse = endbrace;
+    RExC_end = orig_end;
 #ifdef EBCDIC
-        RExC_recode_x_to_native = 0;
+    RExC_recode_x_to_native = 0;
 #endif
-        SvREFCNT_dec_NN(substitute_parse);
-
-        if (! *node_p) {
-            if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                return FALSE;
-            }
-            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
-                (UV) flags);
-        }
-        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
 
-        nextchar(pRExC_state);
+    SvREFCNT_dec_NN(substitute_parse);
 
-        return TRUE;
+    if (! *node_p) {
+        RETURN_X_ON_RESTART(FALSE, flags,flagp);
+        FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
+            (UV) flags);
     }
+    *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+
+    nextchar(pRExC_state);
+
+    return TRUE;
 }
 
 
@@ -12509,9 +12803,9 @@ S_new_regcurly(const char *s, const char *e)
  * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
 
 static I32
-S_backref_value(char *p)
+S_backref_value(char *p, char *e)
 {
-    const char* endptr;
+    const char* endptr = e;
     UV val;
     if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX)
         return (I32)val;
@@ -12650,8 +12944,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        NULL,
                        NULL);
         if (ret == NULL) {
-            if (*flagp & (RESTART_PASS1|NEED_UTF8))
-                return NULL;
+            RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,NEED_UTF8);
             FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                   (UV) *flagp);
         }
@@ -12675,10 +12968,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    }
                    goto tryagain;
                }
-                if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                    return NULL;
-                }
+                RETURN_NULL_ON_RESTART(flags,flagp);
                 FAIL2("panic: reg returned NULL to regatom, flags=%#" UVxf,
                                                                  (UV) flags);
        }
@@ -12963,8 +13253,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                            TRUE, /* Allow an optimized regnode result */
                            NULL,
                            NULL);
-            if (*flagp & RESTART_PASS1)
-                return NULL;
+            RETURN_NULL_ON_RESTART_FLAGP(flagp);
             /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
              * multi-char folds are allowed.  */
             if (!ret)
@@ -13003,8 +13292,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 break;
             }
 
-            if (*flagp & RESTART_PASS1)
-                return NULL;
+            RETURN_NULL_ON_RESTART_FLAGP(flagp);
 
             /* Here, evaluates to a single code point.  Go get that */
             RExC_parse = parse_start;
@@ -13063,7 +13351,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     if (RExC_parse >= RExC_end) {
                         goto unterminated_g;
                     }
-                    num = S_backref_value(RExC_parse);
+                    num = S_backref_value(RExC_parse, RExC_end);
                     if (num == 0)
                         vFAIL("Reference to invalid group 0");
                     else if (num == I32_MAX) {
@@ -13081,7 +13369,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     }
                 }
                 else {
-                    num = S_backref_value(RExC_parse);
+                    num = S_backref_value(RExC_parse, RExC_end);
                     /* bare \NNN might be backref or octal - if it is larger
                      * than or equal RExC_npar then it is assumed to be an
                      * octal escape. Note RExC_npar is +1 from the actual
@@ -13185,11 +13473,26 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            UV ender = 0;
            char *p;
            char *s;
-#define MAX_NODE_STRING_SIZE 127
-           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE];
+
+/* This allows us to fill a node with just enough spare so that if the final
+ * character folds, its expansion is guaranteed to fit */
+#define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
+           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE+1];
+
            char *s0;
            U8 upper_parse = MAX_NODE_STRING_SIZE;
-            U8 node_type = compute_EXACTish(pRExC_state);
+
+            /* We start out as an EXACT node, even if under /i, until we find a
+             * character which is in a fold.  The algorithm now segregates into
+             * separate nodes, characters that fold from those that don't under
+             * /i.  (This hopefull will create nodes that are fixed strings
+             * even under /i, giving the optimizer something to grab onto to.)
+             * So, if a node has something in it and the next character is in
+             * the opposite category, that node is closed up, and the function
+             * returns.  Then regatom is called again, and a new node is
+             * created for the new category. */
+            U8 node_type = EXACT;
+
             bool next_is_quantifier;
             char * oldp = NULL;
 
@@ -13203,14 +13506,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              * which don't participate in folds with Latin1-range characters,
              * as the latter's folds aren't known until runtime.  (We don't
              * need to figure this out until pass 2) */
-            bool maybe_exactfu = PASS2
-                               && (node_type == EXACTF || node_type == EXACTFL);
+            bool maybe_exactfu = PASS2;
 
-            /* If a folding node contains only code points that don't
-             * participate in folds, it can be changed into an EXACT node,
-             * which allows the optimizer more things to look for */
-            bool maybe_exact;
+            /* To see if RExC_uni_semantics changes during parsing of the node.
+             * */
+            bool uni_semantics_at_node_start;
 
+            /* The node_type may change below, but since the size of the node
+             * doesn't change, it works */
            ret = reg_node(pRExC_state, node_type);
 
             /* In pass1, folded, we use a temporary buffer instead of the
@@ -13221,45 +13524,33 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
          reparse:
 
-            /* We look for the EXACTFish to EXACT node optimizaton only if
-             * folding.  (And we don't need to figure this out until pass 2).
-             * XXX It might actually make sense to split the node into portions
-             * that are exact and ones that aren't, so that we could later use
-             * the exact ones to find the longest fixed and floating strings.
-             * One would want to join them back into a larger node.  One could
-             * use a pseudo regnode like 'EXACT_ORIG_FOLD' */
-            maybe_exact = FOLD && PASS2;
-
-           /* XXX The node can hold up to 255 bytes, yet this only goes to
-             * 127.  I (khw) do not know why.  Keeping it somewhat less than
-             * 255 allows us to not have to worry about overflow due to
-             * converting to utf8 and fold expansion, but that value is
-             * 255-UTF8_MAXBYTES_CASE.  join_exact() may join adjacent nodes
-             * split up by this limit into a single one using the real max of
-             * 255.  Even at 127, this breaks under rare circumstances.  If
-             * folding, we do not want to split a node at a character that is a
-             * non-final in a multi-char fold, as an input string could just
-             * happen to want to match across the node boundary.  The join
-             * would solve that problem if the join actually happens.  But a
-             * series of more than two nodes in a row each of 127 would cause
-             * the first join to succeed to get to 254, but then there wouldn't
-             * be room for the next one, which could at be one of those split
-             * multi-char folds.  I don't know of any fool-proof solution.  One
-             * could back off to end with only a code point that isn't such a
-             * non-final, but it is possible for there not to be any in the
-             * entire node. */
-
-            assert(   ! UTF     /* Is at the beginning of a character */
+            /* This breaks under rare circumstances.  If folding, we do not
+             * want to split a node at a character that is a non-final in a
+             * multi-char fold, as an input string could just happen to want to
+             * match across the node boundary.  The code at the end of the loop
+             * looks for this, and backs off until it finds not such a
+             * character, but it is possible (though extremely, extremely
+             * unlikely) for all characters in the node to be non-final fold
+             * ones, in which case we just leave the node fully filled, and
+             * hope that it doesn't match the string in just the wrong place */
+
+            assert( ! UTF     /* Is at the beginning of a character */
                    || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                    || UTF8_IS_START(UCHARAT(RExC_parse)));
 
+            uni_semantics_at_node_start = RExC_uni_semantics;
+
             /* Here, we have a literal character.  Find the maximal string of
              * them in the input that we can fit into a single EXACTish node.
-             * We quit at the first non-literal or when the node gets full */
-           for (p = RExC_parse;
-                len < upper_parse && p < RExC_end;
-                len++)
-           {
+             * We quit at the first non-literal or when the node gets full, or
+             * under /i the categorization of folding/non-folding character
+             * changes */
+           for (p = RExC_parse; len < upper_parse && p < RExC_end; ) {
+
+                /* In most cases each iteration adds one byte to the output.
+                 * The exceptions override this */
+                Size_t added_len = 1;
+
                oldp = p;
 
                 /* White space has already been ignored */
@@ -13332,8 +13623,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         ) {
                             if (*flagp & NEED_UTF8)
                                 FAIL("panic: grok_bslash_N set NEED_UTF8");
-                            if (*flagp & RESTART_PASS1)
-                                return NULL;
+                            RETURN_NULL_ON_RESTART_FLAGP(flagp);
 
                             /* Here, it wasn't a single code point.  Go close
                              * up this EXACTish node.  The switch() prior to
@@ -13456,7 +13746,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         /* NOTE, RExC_npar is 1 more than the actual number of
                          * parens we have seen so far, hence the < RExC_npar below. */
 
-                        if ( !isDIGIT(p[1]) || S_backref_value(p) < RExC_npar)
+                        if ( !isDIGIT(p[1]) || S_backref_value(p, RExC_end) < RExC_npar)
                         {  /* Not to be treated as an octal constant, go
                                    find backref */
                             --p;
@@ -13498,20 +13788,22 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    } /* End of switch on '\' */
                    break;
                case '{':
-                    /* Currently we allow an lbrace at the start of a construct
-                     * without raising a warning.  This is because we think we
-                     * will never want such a brace to be meant to be other
-                     * than taken literally. */
+                    /* Trying to gain new uses for '{' without breaking too
+                     * much existing code is hard.  The solution currently
+                     * adopted is:
+                     *  1)  If there is no ambiguity that a '{' should always
+                     *      be taken literally, at the start of a construct, we
+                     *      just do so.
+                     *  2)  If the literal '{' conflicts with our desired use
+                     *      of it as a metacharacter, we die.  The deprecation
+                     *      cycles for this have come and gone.
+                     *  3)  If there is ambiguity, we raise a simple warning.
+                     *      This could happen, for example, if the user
+                     *      intended it to introduce a quantifier, but slightly
+                     *      misspelled the quantifier.  Without this warning,
+                     *      the quantifier would silently be taken as a literal
+                     *      string of characters instead of a meta construct */
                    if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
-
-                        /* But, we raise a fatal warning otherwise, as the
-                         * deprecation cycle has come and gone.  Except that it
-                         * turns out that some heavily-relied on upstream
-                         * software, notably GNU Autoconf, have failed to fix
-                         * their uses.  For these, don't make it fatal unless
-                         * we anticipate using the '{' for something else.
-                         * This happens after any alpha, and for a looser {m,n}
-                         * quantifier specification */
                         if (      RExC_strict
                             || (  p > parse_start + 1
                                 && isALPHA_A(*(p - 1))
@@ -13523,10 +13815,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                   "illegal here");
                         }
                         if (PASS2) {
-                            ckWARNregdep(p + 1,
-                                        "Unescaped left brace in regex is "
-                                        "deprecated here (and will be fatal "
-                                        "in Perl 5.30), passed through");
+                            ckWARNreg(p + 1, "Unescaped left brace in regex is"
+                                             " passed through");
                         }
                    }
                    goto normal_default;
@@ -13549,8 +13839,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    break;
                } /* End of switch on the literal */
 
-               /* Here, have looked at the literal character and <ender>
-                 * contains its ordinal, <p> points to the character after it.
+               /* Here, have looked at the literal character, and <ender>
+                 * contains its ordinal; <p> points to the character after it.
                  * We need to check if the next non-ignored thing is a
                  * quantifier.  Move <p> to after anything that should be
                  * ignored, which, as a side effect, positions <p> for the next
@@ -13585,15 +13875,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                             const STRLEN unilen = UVCHR_SKIP(ender);
                             s += unilen;
-
-                            /* We have to subtract 1 just below (and again in
-                             * the corresponding PASS2 code) because the loop
-                             * increments <len> each time, as all but this path
-                             * (and one other) through it add a single byte to
-                             * the EXACTish node.  But these paths would change
-                             * len to be the correct final value, so cancel out
-                             * the increment that follows */
-                            len += unilen - 1;
+                            added_len = unilen;
                         }
                         else {
                             s++;
@@ -13602,7 +13884,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       not_fold_common:
                         if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                             U8 * new_s = uvchr_to_utf8((U8*)s, ender);
-                            len += (char *) new_s - s - 1;
+                            added_len = (char *) new_s - s;
                             s = (char *) new_s;
                         }
                         else {
@@ -13613,12 +13895,23 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
 
                     /* Here are folding under /l, and the code point is
-                     * problematic.  First, we know we can't simplify things */
-                    maybe_exact = FALSE;
-                    maybe_exactfu = FALSE;
-
-                    /* A problematic code point in this context means that its
-                     * fold isn't known until runtime, so we can't fold it now.
+                     * problematic.  If this is the first character in the
+                     * node, change the node type to folding.   Otherwise, if
+                     * this is the first problematic character, close up the
+                     * existing node, so can start a new node with this one */
+                    if (! len) {
+                        node_type = EXACTFL;
+                    }
+                    else if (node_type == EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    /* This code point means we can't simplify things */
+                    maybe_exactfu = FALSE;
+
+                    /* A problematic code point in this context means that its
+                     * fold isn't known until runtime, so we can't fold it now.
                      * (The non-problematic code points are the above-Latin1
                      * ones that fold to also all above-Latin1.  Their folds
                      * don't vary no matter what the locale is.) But here we
@@ -13632,108 +13925,184 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      * do for both passes is the PASS2 code for non-folding */
                     goto not_fold_common;
                 }
-                else /* A regular FOLD code point */
-                    if (! (   UTF
-#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
-   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
-                                      || UNICODE_DOT_DOT_VERSION > 0)
-                            /* See comments for join_exact() as to why we fold
-                             * this non-UTF at compile time */
-                            || (   node_type == EXACTFU
-                                && ender == LATIN_SMALL_LETTER_SHARP_S)
-#endif
-                )) {
+                else                /* A regular FOLD code point */
+                     if (! UTF)
+                {
                     /* Here, are folding and are not UTF-8 encoded; therefore
-                     * the character must be in the range 0-255, and is not /l
+                     * the character must be in the range 0-255, and is not /l.
                      * (Not /l because we already handled these under /l in
                      * is_PROBLEMATIC_LOCALE_FOLD_cp) */
-                    if (IS_IN_SOME_FOLD_L1(ender)) {
-                        maybe_exact = FALSE;
+                    if (! IS_IN_SOME_FOLD_L1(ender)) {
 
-                        /* See if the character's fold differs between /d and
-                         * /u.  This includes the multi-char fold SHARP S to
-                         * 'ss' */
-                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
-                            RExC_seen_unfolded_sharp_s = 1;
-                            maybe_exactfu = FALSE;
+                        /* Start a new node for this non-folding character if
+                         * previous ones in the node were folded */
+                        if (len && node_type != EXACT) {
+                            p = oldp;
+                            goto loopdone;
+                        }
+
+                        *(s++) = (char) ender;
+                    }
+                    else {  /* Here, does participate in some fold */
+
+                        /* if this is the first character in the node, change
+                         * its type to folding.  Otherwise, if this is the
+                         * first folding character in the node, close up the
+                         * existing node, so can start a new node with this
+                         * one.  */
+                        if (! len) {
+                            node_type = compute_EXACTish(pRExC_state);
+                        }
+                        else if (node_type == EXACT) {
+                            p = oldp;
+                            goto loopdone;
                         }
-                        else if (maybe_exactfu
-                            && (PL_fold[ender] != PL_fold_latin1[ender]
+
+                        /* See if the character's fold differs between /d and
+                         * /u.  On non-ancient Unicode versions, this includes
+                         * the multi-char fold SHARP S to 'ss' */
+
 #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
    || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                       || UNICODE_DOT_DOT_VERSION > 0)
-                                || (   len > 0
-                                    && isALPHA_FOLD_EQ(ender, 's')
-                                    && isALPHA_FOLD_EQ(*(s-1), 's'))
+
+                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+
+                            /* See comments for join_exact() as to why we fold
+                             * this non-UTF at compile time */
+                            if (node_type == EXACTFU) {
+                                *(s++) = 's';
+
+                                /* Let the code below add in the extra 's' */
+                                ender = 's';
+                                added_len = 2;
+                            }
+                            else if (   uni_semantics_at_node_start
+                                     != RExC_uni_semantics)
+                            {
+                                /* Here, we are supossed to be using Unicode
+                                 * rules, but this folding node is not.  This
+                                 * happens during pass 1 when the node started
+                                 * out not under Unicode rules, but a \N{} was
+                                 * encountered during the processing of it,
+                                 * causing Unicode rules to be switched into.
+                                 * Pass 1 continues uninterrupted, as by the
+                                 * time we get to pass 2, we will know enough
+                                 * to generate the correct folds.  Except in
+                                 * this one case, we need to restart the node,
+                                 * because the fold of the sharp s requires 2
+                                 * characters, and the sizing needs to account
+                                 * for that. */
+                                p = oldp;
+                                goto loopdone;
+                            }
+                            else {
+                                RExC_seen_unfolded_sharp_s = 1;
+                                maybe_exactfu = FALSE;
+                            }
+                        }
+                        else if (   len
+                                 && isALPHA_FOLD_EQ(ender, 's')
+                                 && isALPHA_FOLD_EQ(*(s-1), 's'))
+                        {
+                            maybe_exactfu = FALSE;
+                        }
+                        else
 #endif
-                        )) {
+
+                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
                             maybe_exactfu = FALSE;
                         }
-                    }
 
-                    /* Even when folding, we store just the input character, as
-                     * we have an array that finds its fold quickly */
-                    *(s++) = (char) ender;
+                        /* Even when folding, we store just the input
+                         * character, as we have an array that finds its fold
+                         * quickly */
+                        *(s++) = (char) ender;
+                    }
                 }
-                else {  /* FOLD, and UTF (or sharp s) */
+                else {  /* FOLD, and UTF */
                     /* Unlike the non-fold case, we do actually have to
-                     * calculate the results here in pass 1.  This is for two
-                     * reasons, the folded length may be longer than the
-                     * unfolded, and we have to calculate how many EXACTish
-                     * nodes it will take; and we may run out of room in a node
-                     * in the middle of a potential multi-char fold, and have
-                     * to back off accordingly.  */
-
-                    UV folded;
+                     * calculate the fold in pass 1.  This is for two reasons,
+                     * the folded length may be longer than the unfolded, and
+                     * we have to calculate how many EXACTish nodes it will
+                     * take; and we may run out of room in a node in the middle
+                     * of a potential multi-char fold, and have to back off
+                     * accordingly.  */
+
                     if (isASCII_uni(ender)) {
-                        folded = toFOLD(ender);
-                        *(s)++ = (U8) folded;
+
+                        /* As above, we close up and start a new node if the
+                         * previous characters don't match the fold/non-fold
+                         * state of this one.  And if this is the first
+                         * character in the node, and it folds, we change the
+                         * node away from being EXACT */
+                        if (! IS_IN_SOME_FOLD_L1(ender)) {
+                            if (len && node_type != EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            *(s)++ = (U8) ender;
+                        }
+                        else {  /* Is in a fold */
+
+                            if (! len) {
+                                node_type = compute_EXACTish(pRExC_state);
+                            }
+                            else if (node_type == EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            *(s)++ = (U8) toFOLD(ender);
+                        }
                     }
-                    else {
+                    else {  /* Not ASCII */
                         STRLEN foldlen;
 
-                        folded = _to_uni_fold_flags(
+                        /* As above, we close up and start a new node if the
+                         * previous characters don't match the fold/non-fold
+                         * state of this one.  And if this is the first
+                         * character in the node, and it folds, we change the
+                         * node away from being EXACT */
+                        if (! _invlist_contains_cp(PL_utf8_foldable, ender)) {
+                            if (len && node_type != EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            s = (char *) uvchr_to_utf8((U8 *) s, ender);
+                            added_len = UVCHR_SKIP(ender);
+                        }
+                        else {
+
+                            if (! len) {
+                                node_type = compute_EXACTish(pRExC_state);
+                            }
+                            else if (node_type == EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            ender = _to_uni_fold_flags(
                                      ender,
                                      (U8 *) s,
                                      &foldlen,
                                      FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
                                                         ? FOLD_FLAGS_NOMIX_ASCII
                                                         : 0));
-                        s += foldlen;
-
-                        /* The loop increments <len> each time, as all but this
-                         * path (and one other) through it add a single byte to
-                         * the EXACTish node.  But this one has changed len to
-                         * be the correct final value, so subtract one to
-                         * cancel out the increment that follows */
-                        len += foldlen - 1;
-                    }
-                    /* If this node only contains non-folding code points so
-                     * far, see if this new one is also non-folding */
-                    if (maybe_exact) {
-                        if (folded != ender) {
-                            maybe_exact = FALSE;
-                        }
-                        else {
-                            /* Here the fold is the original; we have to check
-                             * further to see if anything folds to it */
-                            if (_invlist_contains_cp(PL_utf8_foldable,
-                                                        ender))
-                            {
-                                maybe_exact = FALSE;
-                            }
+                            s += foldlen;
+                            added_len = foldlen;
                         }
                     }
-                    ender = folded;
                }
 
+                len += added_len;
+
                if (next_is_quantifier) {
 
                     /* Here, the next input is a quantifier, and to get here,
-                     * the current character is the only one in the node.
-                     * Also, here <len> doesn't include the final byte for this
-                     * character */
-                    len++;
+                     * the current character is the only one in the node. */
                     goto loopdone;
                }
 
@@ -13782,16 +14151,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     len = s - s0 + 1;
                }
                 else {
-                    if (!  PL_NonL1NonFinalFold) {
-                        PL_NonL1NonFinalFold = _new_invlist_C_array(
-                                        NonL1_Perl_Non_Final_Folds_invlist);
-                    }
 
                     /* Point to the first byte of the final character */
                     s = (char *) utf8_hop((U8 *) s, -1);
 
                     while (s >= s0) {   /* Search backwards until find
-                                           non-problematic char */
+                                           non-problematic char */
                         if (UTF8_IS_INVARIANT(*s)) {
 
                             /* There are no ascii characters that participate
@@ -13911,23 +14276,30 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 OP(ret) = NOTHING;
             }
             else {
-                if (FOLD) {
-                    /* If 'maybe_exact' is still set here, means there are no
-                     * code points in the node that participate in folds;
-                     * similarly for 'maybe_exactfu' and code points that match
-                     * differently depending on UTF8ness of the target string
-                     * (for /u), or depending on locale for /l */
-                    if (maybe_exact) {
-                        OP(ret) = (LOC)
-                                  ? EXACTL
-                                  : EXACT;
+                OP(ret) = node_type;
+
+                /* If the node type is EXACT here, check to see if it
+                 * should be EXACTL. */
+                if (node_type == EXACT) {
+                    if (LOC) {
+                        OP(ret) = EXACTL;
                     }
-                    else if (maybe_exactfu) {
-                        OP(ret) = (LOC)
-                                  ? EXACTFLU8
-                                  : EXACTFU;
+                }
+
+                if (FOLD) {
+                    /* If 'maybe_exactfu' is set, then there are no code points
+                     * that match differently depending on UTF8ness of the
+                     * target string (for /u), or depending on locale for /l */
+                    if (maybe_exactfu) {
+                        if (node_type == EXACTF) {
+                            OP(ret) = EXACTFU;
+                        }
+                        else if (node_type == EXACTFL) {
+                            OP(ret) = EXACTFLU8;
+                        }
                     }
                 }
+
                 alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
                                            FALSE /* Don't look to see if could
                                                     be turned into an EXACT
@@ -13953,8 +14325,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     /* Position parse to next real character */
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                             FALSE /* Don't force to /x */ );
-    if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) {
-        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through");
+    if (   PASS2 && *RExC_parse == '{'
+        && OP(ret) != SBOL && ! regcurly(RExC_parse))
+    {
+        if (RExC_strict || new_regcurly(RExC_parse, RExC_end)) {
+            RExC_parse++;
+            vFAIL("Unescaped left brace in regex is illegal here");
+        }
+        ckWARNreg(RExC_parse + 1, "Unescaped left brace in regex is"
+                                  " passed through");
     }
 
     return(ret);
@@ -14915,6 +15294,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
     PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
 
+    DEBUG_PARSE("xcls");
+
     if (in_locale) {
         set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
     }
@@ -14932,7 +15313,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      * these things, we need to realize that something preceded by a backslash
      * is escaped, so we have to keep track of backslashes */
     if (SIZE_ONLY) {
-        UV depth = 0; /* how many nested (?[...]) constructs */
+        UV nest_depth = 0; /* how many nested (?[...]) constructs */
 
         while (RExC_parse < RExC_end) {
             SV* current = NULL;
@@ -14941,8 +15322,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                     TRUE /* Force /x */ );
 
             switch (*RExC_parse) {
-                case '?':
-                    if (RExC_parse[1] == '[') depth++, RExC_parse++;
+                case '(':
+                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
+                        nest_depth++, RExC_parse+=2;
                     /* FALLTHROUGH */
                 default:
                     break;
@@ -14999,9 +15381,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                 }
 
                 case ']':
-                    if (depth--) break;
-                    RExC_parse++;
-                    if (*RExC_parse == ')') {
+                    if (RExC_parse[1] == ')') {
+                        RExC_parse++;
+                        if (nest_depth--) break;
                         node = reganode(pRExC_state, ANYOF, 0);
                         RExC_size += ANYOF_SKIP;
                         nextchar(pRExC_state);
@@ -15013,20 +15395,25 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
                         return node;
                     }
-                    goto no_close;
+                    /* We output the messages even if warnings are off, because we'll fail
+                     * the very next thing, and these give a likely diagnosis for that */
+                    if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
+                        output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
+                    }
+                    RExC_parse++;
+                    vFAIL("Unexpected ']' with no following ')' in (?[...");
             }
 
             RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
         }
 
-      no_close:
         /* We output the messages even if warnings are off, because we'll fail
          * the very next thing, and these give a likely diagnosis for that */
         if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
             output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
         }
 
-        FAIL("Syntax error in (?[...])");
+        vFAIL("Syntax error in (?[...])");
     }
 
     /* Pass 2 only after this. */
@@ -15206,12 +15593,14 @@ redo_curchar:
                      * inversion list, and RExC_parse points to the trailing
                      * ']'; the next character should be the ')' */
                     RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for nested extended charclass");
 
                     /* Then the ')' matching the original '(' handled by this
                      * case: statement */
                     RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
 
                     RExC_parse++;
                     RExC_flags = save_flags;
@@ -15335,7 +15724,7 @@ redo_curchar:
                  * fence.  Get rid of it */
                 fence_ptr = av_pop(fence_stack);
                 assert(fence_ptr);
-                fence = SvIV(fence_ptr) - 1;
+                fence = SvIV(fence_ptr);
                 SvREFCNT_dec_NN(fence_ptr);
                 fence_ptr = NULL;
 
@@ -15569,7 +15958,7 @@ redo_curchar:
     if (av_tindex_skip_len_mg(stack) < 0   /* Was empty */
         || ((final = av_pop(stack)) == NULL)
         || ! IS_OPERAND(final)
-        || SvTYPE(final) != SVt_INVLIST
+        || ! is_invlist(final)
         || av_tindex_skip_len_mg(stack) >= 0)  /* More left on stack */
     {
       bad_syntax:
@@ -15726,25 +16115,19 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
 STATIC void
 S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
 {
-    /* This hard-codes the Latin1/above-Latin1 folding rules, so that an
-     * innocent-looking character class, like /[ks]/i won't have to go out to
-     * disk to find the possible matches.
+    /* This adds the Latin1/above-Latin1 folding rules.
      *
      * This should be called only for a Latin1-range code points, cp, which is
      * known to be involved in a simple fold with other code points above
      * Latin1.  It would give false results if /aa has been specified.
      * Multi-char folds are outside the scope of this, and must be handled
-     * specially.
-     *
-     * XXX It would be better to generate these via regen, in case a new
-     * version of the Unicode standard adds new mappings, though that is not
-     * really likely, and may be caught by the default: case of the switch
-     * below. */
+     * specially. */
 
     PERL_ARGS_ASSERT_ADD_ABOVE_LATIN1_FOLDS;
 
     assert(HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(cp));
 
+    /* The rules that are valid for all Unicode versions are hard-coded in */
     switch (cp) {
         case 'k':
         case 'K':
@@ -15768,36 +16151,54 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
                                         LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
             break;
 
-#ifdef LATIN_CAPITAL_LETTER_SHARP_S /* not defined in early Unicode releases */
+        default:    /* Other code points are checked against the data for the
+                       current Unicode version */
+          {
+            Size_t folds_to_count;
+            unsigned int first_folds_to;
+            const unsigned int * remaining_folds_to_list;
+            UV folded_cp;
 
-        case LATIN_SMALL_LETTER_SHARP_S:
-          *invlist = add_cp_to_invlist(*invlist, LATIN_CAPITAL_LETTER_SHARP_S);
-            break;
+            if (isASCII(cp)) {
+                folded_cp = toFOLD(cp);
+            }
+            else {
+                U8 dummy_fold[UTF8_MAXBYTES_CASE+1];
+                Size_t dummy_len;
+                folded_cp = _to_fold_latin1(cp, dummy_fold, &dummy_len, 0);
+            }
 
-#endif
+            if (folded_cp > 255) {
+                *invlist = add_cp_to_invlist(*invlist, folded_cp);
+            }
 
-#if    UNICODE_MAJOR_VERSION < 3                                        \
-   || (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0)
+            folds_to_count = _inverse_folds(folded_cp, &first_folds_to,
+                                                    &remaining_folds_to_list);
+            if (folds_to_count == 0) {
 
-        /* In 3.0 and earlier, U+0130 folded simply to 'i'; and in 3.0.1 so did
-         * U+0131.  */
-        case 'i':
-        case 'I':
-          *invlist =
-             add_cp_to_invlist(*invlist, LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-#   if UNICODE_DOT_DOT_VERSION == 1
-          *invlist = add_cp_to_invlist(*invlist, LATIN_SMALL_LETTER_DOTLESS_I);
-#   endif
-            break;
-#endif
+                /* Use deprecated warning to increase the chances of this being
+                 * output */
+                if (PASS2) {
+                    ckWARN2reg_d(RExC_parse,
+                        "Perl folding rules are not up-to-date for 0x%02X;"
+                        " please use the perlbug utility to report;", cp);
+                }
+            }
+            else {
+                unsigned int i;
 
-        default:
-            /* Use deprecated warning to increase the chances of this being
-             * output */
-            if (PASS2) {
-                ckWARN2reg_d(RExC_parse, "Perl folding rules are not up-to-date for 0x%02X; please use the perlbug utility to report;", cp);
+                if (first_folds_to > 255) {
+                    *invlist = add_cp_to_invlist(*invlist, first_folds_to);
+                }
+                for (i = 0; i < folds_to_count - 1; i++) {
+                    if (remaining_folds_to_list[i] > 255) {
+                        *invlist = add_cp_to_invlist(*invlist,
+                                                    remaining_folds_to_list[i]);
+                    }
+                }
             }
             break;
+         }
     }
 }
 
@@ -16211,6 +16612,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         do_posix_warnings ? &posix_warnings : NULL,
                         TRUE /* checking only */);
         }
+        else if (  strict && ! skip_white
+                 && (   _generic_isCC(value, _CC_VERTSPACE)
+                     || is_VERTWS_cp_high(value)))
+        {
+            vFAIL("Literal vertical space in [] is illegal except under /x");
+        }
         else if (value == '\\') {
             /* Is a backslash; get the code point of the char after it */
 
@@ -16265,8 +16672,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                         if (*flagp & NEED_UTF8)
                             FAIL("panic: grok_bslash_N set NEED_UTF8");
-                        if (*flagp & RESTART_PASS1)
-                            return NULL;
+
+                        RETURN_NULL_ON_RESTART_FLAGP(flagp);
 
                         if (cp_count < 0) {
                             vFAIL("\\N in a character class must be a named character: \\N{...}");
@@ -16318,6 +16725,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
            case 'P':
                {
                char *e;
+                char *i;
+
 
                 /* We will handle any undefined properties ourselves */
                 U8 swash_init_flags = _CORE_SWASH_INIT_RETURN_IF_UNDEF
@@ -16327,6 +16736,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                         * anyway, to save a little time */
                                       |_CORE_SWASH_INIT_ACCEPT_INVLIST;
 
+                SvREFCNT_dec(swash); /* Free any left-overs */
                if (RExC_parse >= RExC_end)
                    vFAIL2("Empty \\%c", (U8)value);
                if (*RExC_parse == '{') {
@@ -16338,6 +16748,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
 
                     RExC_parse++;
+
+                    /* White space is allowed adjacent to the braces and after
+                     * any '^', even when not under /x */
                     while (isSPACE(*RExC_parse)) {
                          RExC_parse++;
                    }
@@ -16361,6 +16774,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    n = e - RExC_parse;
                    while (isSPACE(*(RExC_parse + n - 1)))
                        n--;
+
                }   /* The \p isn't immediately followed by a '{' */
                else if (! isALPHA(*RExC_parse)) {
                     RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
@@ -16373,11 +16787,34 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    n = 1;
                }
                if (!SIZE_ONLY) {
-                    SV* invlist;
-                    char* name;
+                    char* name = RExC_parse;
                     char* base_name;    /* name after any packages are stripped */
                     char* lookup_name = NULL;
                     const char * const colon_colon = "::";
+                    bool invert;
+
+                    SV* invlist;
+
+                    /* Temporary workaround for [perl #133136].  For this
+                    * precise input that is in the .t that is failing, load
+                    * utf8.pm, which is what the test wants, so that that
+                    * .t passes */
+                    if (     memEQs(RExC_start, e + 1 - RExC_start,
+                                    "foo\\p{Alnum}")
+                        && ! hv_common(GvHVn(PL_incgv),
+                                       NULL,
+                                       "utf8.pm", sizeof("utf8.pm") - 1,
+                                       0, HV_FETCH_ISEXISTS, NULL, 0))
+                    {
+                        require_pv("utf8.pm");
+                    }
+                    invlist = parse_uniprop_string(name, n, FOLD, &invert);
+                    if (invlist) {
+                        if (invert) {
+                            value ^= 'P' ^ 'p';
+                        }
+                    }
+                    else {
 
                     /* Try to get the definition of the property into
                      * <invlist>.  If /i is in effect, the effective property
@@ -16386,6 +16823,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
                     name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
                     SAVEFREEPV(name);
+
+                    for (i = RExC_parse; i < RExC_parse + n; i++) {
+                        if (isCNTRL(*i) && *i != '\t') {
+                            RExC_parse = e + 1;
+                            vFAIL2("Can't find Unicode property definition \"%s\"", name);
+                        }
+                    }
+
                     if (FOLD) {
                         lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
 
@@ -16396,7 +16841,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                     /* Look up the property name, and get its swash and
                      * inversion list, if the property is found  */
-                    SvREFCNT_dec(swash); /* Free any left-overs */
                     swash = _core_swash_init("utf8",
                                              (lookup_name)
                                               ? lookup_name
@@ -16496,19 +16940,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         {
                             has_user_defined_property = TRUE;
                         }
-                        else if
+                    }
+                    }
+                    if (invlist) {
+                        if (! has_user_defined_property &&
                             /* We warn on matching an above-Unicode code point
                              * if the match would return true, except don't
                              * warn for \p{All}, which has exactly one element
                              * = 0 */
                             (_invlist_contains_cp(invlist, 0x110000)
                                 && (! (_invlist_len(invlist) == 1
-                                       && *invlist_array(invlist) == 0)))
+                                       && *invlist_array(invlist) == 0))))
                         {
                             warn_super = TRUE;
                         }
 
-
                         /* Invert if asking for the complement */
                         if (value == 'P') {
                            _invlist_union_complement_2nd(properties,
@@ -16518,14 +16964,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             /* The swash can't be used as-is, because we've
                             * inverted things; delay removing it to here after
                             * have copied its invlist above */
-                            SvREFCNT_dec_NN(swash);
+                            if (! swash) {
+                                SvREFCNT_dec_NN(invlist);
+                            }
+                            SvREFCNT_dec(swash);
                             swash = NULL;
                         }
                         else {
                             _invlist_union(properties, invlist, &properties);
+                            if (! swash) {
+                                SvREFCNT_dec_NN(invlist);
+                            }
                        }
-                   }
-               }
+                    }
+                } /* End of actually getting the values in pass 2 */
+
                RExC_parse = e + 1;
                 namedclass = ANYOF_UNIPROP;  /* no official name, but it's
                                                 named */
@@ -16777,21 +17230,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
                 }
                 else if (  UNI_SEMANTICS
+                        || AT_LEAST_ASCII_RESTRICTED
                         || classnum == _CC_ASCII
                         || (DEPENDS_SEMANTICS && (   classnum == _CC_DIGIT
                                                   || classnum == _CC_XDIGIT)))
                 {
-                    /* We usually have to worry about /d and /a affecting what
-                     * POSIX classes match, with special code needed for /d
-                     * because we won't know until runtime what all matches.
-                     * But there is no extra work needed under /u, and
-                     * [:ascii:] is unaffected by /a and /d; and :digit: and
-                     * :xdigit: don't have runtime differences under /d.  So we
-                     * can special case these, and avoid some extra work below,
-                     * and at runtime. */
+                    /* We usually have to worry about /d a affecting what POSIX
+                     * classes match, with special code needed because we won't
+                     * know until runtime what all matches.  But there is no
+                     * extra work needed under /u and /a; and [:ascii:] is
+                     * unaffected by /d; and :digit: and :xdigit: don't have
+                     * runtime differences under /d.  So we can special case
+                     * these, and avoid some extra work below, and at runtime.
+                     * */
                     _invlist_union_maybe_complement_2nd(
                                                      simple_posixes,
-                                                     PL_XPosix_ptrs[classnum],
+                                                      ((AT_LEAST_ASCII_RESTRICTED)
+                                                       ? PL_Posix_ptrs[classnum]
+                                                       : PL_XPosix_ptrs[classnum]),
                                                      namedclass % 2 != 0,
                                                      &simple_posixes);
                 }
@@ -16916,7 +17372,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             {
                 /* Here <value> is indeed a multi-char fold.  Get what it is */
 
-                U8 foldbuf[UTF8_MAXBYTES_CASE];
+                U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                 STRLEN foldlen;
 
                 UV folded = _to_uni_fold_flags(
@@ -16991,7 +17447,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                           " be some subset of \"0-9\","
                                           " \"A-Z\", or \"a-z\"");
                     }
-                    else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+                    else if (prevvalue >= FIRST_NON_ASCII_DECIMAL_DIGIT) {
                         SSize_t index_start;
                         SSize_t index_final;
 
@@ -16999,8 +17455,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          * can't do the same checks for above-ASCII ranges,
                          * except in the case of digit ones.  These should
                          * contain only digits from the same group of 10.  The
-                         * ASCII case is handled just above.  0x660 is the
-                         * first digit character beyond ASCII.  Hence here, the
+                         * ASCII case is handled just above.  Hence here, the
                          * range could be a range of digits.  First some
                          * unlikely special cases.  Grandfather in that a range
                          * ending in 19DA (NEW TAI LUE THAM DIGIT ONE) is bad
@@ -17171,7 +17626,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 #if 0   /* Have decided not to deal with multi-char folds in inverted classes,
            because too confusing */
         if (invert) {
-            sv_catpv(substitute_parse, "(?:");
+            sv_catpvs(substitute_parse, "(?:");
         }
 #endif
 
@@ -17191,7 +17646,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                                 &PL_sv_undef)
                 {
                     if (! first_time) {
-                        sv_catpv(substitute_parse, "|");
+                        sv_catpvs(substitute_parse, "|");
                     }
                     first_time = FALSE;
 
@@ -17203,24 +17658,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         /* If the character class contains anything else besides these
          * multi-character folds, have to include it in recursive parsing */
         if (element_count) {
-            sv_catpv(substitute_parse, "|[");
+            sv_catpvs(substitute_parse, "|[");
             prefix_end = SvCUR(substitute_parse);
             sv_catpvn(substitute_parse, orig_parse, RExC_parse - orig_parse);
 
             /* Put in a closing ']' only if not going off the end, as otherwise
              * we are adding something that really isn't there */
             if (RExC_parse < RExC_end) {
-                sv_catpv(substitute_parse, "]");
+                sv_catpvs(substitute_parse, "]");
             }
         }
 
-        sv_catpv(substitute_parse, ")");
+        sv_catpvs(substitute_parse, ")");
 #if 0
         if (invert) {
             /* This is a way to get the parse to skip forward a whole named
              * sequence instead of matching the 2nd character when it fails the
              * first */
-            sv_catpv(substitute_parse, "(*THEN)(*SKIP)(*FAIL)|.)");
+            sv_catpvs(substitute_parse, "(*THEN)(*SKIP)(*FAIL)|.)");
         }
 #endif
 
@@ -17236,7 +17691,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
        ret = reg(pRExC_state, 1, &reg_flags, depth+1);
 
-       *flagp |= reg_flags&(HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PASS1|NEED_UTF8);
+        *flagp |= reg_flags & (HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PASS1|NEED_UTF8);
 
         /* And restore so can parse the rest of the pattern */
         RExC_parse = save_parse;
@@ -17294,14 +17749,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 /* The actual POSIXish node for all the rest depends on the
                  * charset modifier.  The ones in the first set depend only on
                  * ASCII or, if available on this platform, also locale */
+
                 case ANYOF_ASCII:
                 case ANYOF_NASCII:
+
 #ifdef HAS_ISASCII
-                    op = (LOC) ? POSIXL : POSIXA;
-#else
-                    op = POSIXA;
+                    if (LOC) {
+                        op = POSIXL;
+                        goto join_posix;
+                    }
 #endif
-                    goto join_posix;
+                    /* (named_class - ANYOF_ASCII) is 0 or 1. xor'ing with
+                     * invert converts that to 1 or 0 */
+                    op = ASCII + ((namedclass - ANYOF_ASCII) ^ invert);
+                    break;
 
                 /* The following don't have any matches in the upper Latin1
                  * range, hence /d is equivalent to /u for them.  Making it /u
@@ -17443,6 +17904,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                            TRUE /* downgradable to EXACT */
                                            );
             }
+            else {
+                *flagp |= HASWIDTH|SIMPLE;
+            }
 
             RExC_parse = (char *) cur_parse;
 
@@ -17486,27 +17950,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             _invlist_intersection(PL_utf8_foldable, cp_foldable_list,
                                   &fold_intersection);
 
-            /* The folds for all the Latin1 characters are hard-coded into this
-             * program, but we have to go out to disk to get the others. */
-            if (invlist_highest(cp_foldable_list) >= 256) {
-
-                /* This is a hash that for a particular fold gives all
-                 * characters that are involved in it */
-                if (! PL_utf8_foldclosures) {
-                    _load_PL_utf8_foldclosures();
-                }
-            }
-
             /* Now look at the foldable characters in this class individually */
             invlist_iterinit(fold_intersection);
             while (invlist_iternext(fold_intersection, &start, &end)) {
                 UV j;
+                UV folded;
 
                 /* Look at every character in the range */
                 for (j = start; j <= end; j++) {
                     U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                     STRLEN foldlen;
-                    SV** listp;
+                    unsigned int k;
+                    Size_t folds_to_count;
+                    unsigned int first_folds_to;
+                    const unsigned int * remaining_folds_to_list;
 
                     if (j < 256) {
 
@@ -17541,57 +17998,51 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * rules hard-coded for it.  First, get its fold.  This is
                      * the simple fold, as the multi-character folds have been
                      * handled earlier and separated out */
-                    _to_uni_fold_flags(j, foldbuf, &foldlen,
+                    folded = _to_uni_fold_flags(j, foldbuf, &foldlen,
                                                         (ASCII_FOLD_RESTRICTED)
                                                         ? FOLD_FLAGS_NOMIX_ASCII
                                                         : 0);
 
-                    /* Single character fold of above Latin1.  Add everything in
-                    * its fold closure to the list that this node should match.
-                    * The fold closures data structure is a hash with the keys
-                    * being the UTF-8 of every character that is folded to, like
-                    * 'k', and the values each an array of all code points that
-                    * fold to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ].
-                    * Multi-character folds are not included */
-                    if ((listp = hv_fetch(PL_utf8_foldclosures,
-                                        (char *) foldbuf, foldlen, FALSE)))
-                    {
-                        AV* list = (AV*) *listp;
-                        IV k;
-                        for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
-                            SV** c_p = av_fetch(list, k, FALSE);
-                            UV c;
-                            assert(c_p);
-
-                            c = SvUV(*c_p);
-
-                            /* /aa doesn't allow folds between ASCII and non- */
-                            if ((ASCII_FOLD_RESTRICTED
-                                && (isASCII(c) != isASCII(j))))
-                            {
-                                continue;
-                            }
+                    /* Single character fold of above Latin1.  Add everything
+                     * in its fold closure to the list that this node should
+                     * match. */
+                    folds_to_count = _inverse_folds(folded, &first_folds_to,
+                                                    &remaining_folds_to_list);
+                    for (k = 0; k <= folds_to_count; k++) {
+                        UV c = (k == 0)     /* First time through use itself */
+                                ? folded
+                                : (k == 1)  /* 2nd time use, the first fold */
+                                   ? first_folds_to
+
+                                     /* Then the remaining ones */
+                                   : remaining_folds_to_list[k-2];
+
+                        /* /aa doesn't allow folds between ASCII and non- */
+                        if ((   ASCII_FOLD_RESTRICTED
+                            && (isASCII(c) != isASCII(j))))
+                        {
+                            continue;
+                        }
 
-                            /* Folds under /l which cross the 255/256 boundary
-                             * are added to a separate list.  (These are valid
-                             * only when the locale is UTF-8.) */
-                            if (c < 256 && LOC) {
-                                *use_list = add_cp_to_invlist(*use_list, c);
-                                continue;
-                            }
+                        /* Folds under /l which cross the 255/256 boundary are
+                         * added to a separate list.  (These are valid only
+                         * when the locale is UTF-8.) */
+                        if (c < 256 && LOC) {
+                            *use_list = add_cp_to_invlist(*use_list, c);
+                            continue;
+                        }
 
-                            if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
-                            {
-                                cp_list = add_cp_to_invlist(cp_list, c);
-                            }
-                            else {
-                                /* Similarly folds involving non-ascii Latin1
-                                * characters under /d are added to their list */
-                                has_upper_latin1_only_utf8_matches
-                                        = add_cp_to_invlist(
-                                           has_upper_latin1_only_utf8_matches,
-                                           c);
-                            }
+                        if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+                        {
+                            cp_list = add_cp_to_invlist(cp_list, c);
+                        }
+                        else {
+                            /* Similarly folds involving non-ascii Latin1
+                             * characters under /d are added to their list */
+                            has_upper_latin1_only_utf8_matches
+                                = add_cp_to_invlist(
+                                            has_upper_latin1_only_utf8_matches,
+                                            c);
                         }
                     }
                 }
@@ -17620,26 +18071,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         }
     }
     if (posixes || nposixes) {
-
-        /* We have to adjust /a and /aa */
-        if (AT_LEAST_ASCII_RESTRICTED) {
-
-            /* Under /a and /aa, nothing above ASCII matches these */
-            if (posixes) {
-                _invlist_intersection(posixes,
-                                    PL_XPosix_ptrs[_CC_ASCII],
-                                    &posixes);
-            }
-
-            /* Under /a and /aa, everything above ASCII matches these
-             * complements */
-            if (nposixes) {
-                _invlist_union_complement_2nd(nposixes,
-                                              PL_XPosix_ptrs[_CC_ASCII],
-                                              &nposixes);
-            }
-        }
-
         if (! DEPENDS_SEMANTICS) {
 
             /* For everything but /d, we can just add the current 'posixes' and
@@ -17707,6 +18138,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 if (_invlist_len(only_non_utf8_list) != 0) {
                     ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
                 }
+                SvREFCNT_dec_NN(only_non_utf8_list);
             }
             else {
                 /* Here there were no complemented posix classes.  That means
@@ -17893,25 +18325,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      * certain common classes that are easy to test.  Getting to this point in
      * the code means that the class didn't get optimized there.  Since this
      * code is only executed in Pass 2, it is too late to save space--it has
-     * been allocated in Pass 1, and currently isn't given back.  But turning
-     * things into an EXACTish node can allow the optimizer to join it to any
-     * adjacent such nodes.  And if the class is equivalent to things like /./,
-     * expensive run-time swashes can be avoided.  Now that we have more
-     * complete information, we can find things necessarily missed by the
-     * earlier code.  Another possible "optimization" that isn't done is that
-     * something like [Ee] could be changed into an EXACTFU.  khw tried this
-     * and found that the ANYOF is faster, including for code points not in the
-     * bitmap.  This still might make sense to do, provided it got joined with
-     * an adjacent node(s) to create a longer EXACTFU one.  This could be
-     * accomplished by creating a pseudo ANYOF_EXACTFU node type that the join
-     * routine would know is joinable.  If that didn't happen, the node type
-     * could then be made a straight ANYOF */
+     * been allocated in Pass 1, and currently isn't given back.  XXX Why not?
+     * But turning things into an EXACTish node can allow the optimizer to join
+     * it to any adjacent such nodes.  And if the class is equivalent to things
+     * like /./, expensive run-time swashes can be avoided.  Now that we have
+     * more complete information, we can find things necessarily missed by the
+     * earlier code. */
 
     if (optimizable && cp_list && ! invert) {
         UV start, end;
         U8 op = END;  /* The optimzation node-type */
         int posix_class = -1;   /* Illegal value */
         const char * cur_parse= RExC_parse;
+        U8 ANYOFM_mask = 0xFF;
+        U32 anode_arg = 0;
 
         invlist_iterinit(cp_list);
         if (! invlist_iternext(cp_list, &start, &end)) {
@@ -17994,29 +18421,45 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         invlist_iterfinish(cp_list);
 
         if (op == END) {
-            const UV cp_list_len = _invlist_len(cp_list);
-            const UV* cp_list_array = invlist_array(cp_list);
 
             /* Here, didn't find an optimization.  See if this matches any of
-             * the POSIX classes.  These run slightly faster for above-Unicode
-             * code points, so don't bother with POSIXA ones nor the 2 that
-             * have no above-Unicode matches.  We can avoid these checks unless
-             * the ANYOF matches at least as high as the lowest POSIX one
-             * (which was manually found to be \v.  The actual code point may
-             * increase in later Unicode releases, if a higher code point is
-             * assigned to be \v, but this code will never break.  It would
-             * just mean we could execute the checks for posix optimizations
-             * unnecessarily) */
-
-            if (cp_list_array[cp_list_len-1] > 0x2029) {
+             * the POSIX classes.  First try ASCII */
+
+            if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
+                op = ASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
+                op = NASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (invlist_highest(cp_list) >= 0x2029) {
+
+                /* Then try the other POSIX classes.  The POSIXA ones are about
+                 * the same speed as ANYOF ops, but the ones that have
+                 * above-Latin1 code point matches are somewhat faster than
+                 * ANYOF.  So optimize those, but don't bother with the POSIXA
+                 * ones nor [:cntrl:] which has no above-Latin1 matches.  If
+                 * this ANYOF node has a lower highest possible matching code
+                 * point than any of the XPosix ones, we know that it can't
+                 * possibly be the same as any of them, so we can avoid
+                 * executing this code.  The 0x2029 above for the lowest max
+                 * was determined by manual inspection of the classes, and
+                 * comes from \v.  Suppose Unicode in a later version adds a
+                 * higher code point to \v.  All that means is that this code
+                 * can be executed unnecessarily.  It will still give the
+                 * correct answer. */
+
                 for (posix_class = 0;
                      posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
                      posix_class++)
                 {
                     int try_inverted;
-                    if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) {
+
+                    if (posix_class == _CC_CNTRL) {
                         continue;
                     }
+
                     for (try_inverted = 0; try_inverted < 2; try_inverted++) {
 
                         /* Check if matches normal or inverted */
@@ -18034,6 +18477,106 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 }
               found_posix: ;
             }
+
+            /* If it didn't match a POSIX class, it might be able to be turned
+             * into an ANYOFM node.  Compare two different bytes, bit-by-bit.
+             * In some positions, the bits in each will be 1; and in other
+             * positions both will be 0; and in some positions the bit will be
+             * 1 in one byte, and 0 in the other.  Let 'n' be the number of
+             * positions where the bits differ.  We create a mask which has
+             * exactly 'n' 0 bits, each in a position where the two bytes
+             * differ.  Now take the set of all bytes that when ANDed with the
+             * mask yield the same result.  That set has 2**n elements, and is
+             * representable by just two 8 bit numbers: the result and the
+             * mask.  Importantly, matching the set can be vectorized by
+             * creating a word full of the result bytes, and a word full of the
+             * mask bytes, yielding a significant speed up.  Here, see if this
+             * node matches such a set.  As a concrete example consider [01],
+             * and the byte representing '0' which is 0x30 on ASCII machines.
+             * It has the bits 0011 0000.  Take the mask 1111 1110.  If we AND
+             * 0x31 and 0x30 with that mask we get 0x30.  Any other bytes ANDed
+             * yield something else.  So [01], which is a common usage, is
+             * optimizable into ANYOFM, and can benefit from the speed up.  We
+             * can only do this on UTF-8 invariant bytes, because the variance
+             * would throw this off.  */
+            if (   op == END
+                && invlist_highest(cp_list) <=
+#ifdef EBCDIC
+                                               0xFF
+#else
+                                               0x7F
+#endif
+            ) {
+                Size_t cp_count = 0;
+                bool first_time = TRUE;
+                unsigned int lowest_cp = 0xFF;
+                U8 bits_differing = 0;
+
+                /* Only needed on EBCDIC, as there, variants and non- are mixed
+                 * together.  Could #ifdef it out on ASCII, but probably the
+                 * compiler will optimize it out */
+                bool has_variant = FALSE;
+
+                /* Go through the bytes and find the bit positions that differ */
+                invlist_iterinit(cp_list);
+                while (invlist_iternext(cp_list, &start, &end)) {
+                    unsigned int i = start;
+
+                    cp_count += end - start + 1;
+
+                    if (first_time) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            has_variant = TRUE;
+                            continue;
+                        }
+
+                        first_time = FALSE;
+                        lowest_cp = start;
+
+                        i++;
+                    }
+
+                    /* Find the bit positions that differ from the lowest code
+                     * point in the node.  Keep track of all such positions by
+                     * OR'ing */
+                    for (; i <= end; i++) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            has_variant = TRUE;
+                            continue;
+                        }
+
+                        bits_differing  |= i ^ lowest_cp;
+                    }
+                }
+                invlist_iterfinish(cp_list);
+
+                /* At the end of the loop, we count how many bits differ from
+                 * the bits in lowest code point, call the count 'd'.  If the
+                 * set we found contains 2**d elements, it is the closure of
+                 * all code points that differ only in those bit positions.  To
+                 * convince yourself of that, first note that the number in the
+                 * closure must be a power of 2, which we test for.  The only
+                 * way we could have that count and it be some differing set,
+                 * is if we got some code points that don't differ from the
+                 * lowest code point in any position, but do differ from each
+                 * other in some other position.  That means one code point has
+                 * a 1 in that position, and another has a 0.  But that would
+                 * mean that one of them differs from the lowest code point in
+                 * that position, which possibility we've already excluded. */
+                if ( ! has_variant
+                    && cp_count == 1U << PL_bitcount[bits_differing])
+                {
+                    assert(cp_count > 1);
+                    op = ANYOFM;
+
+                    /* We need to make the bits that differ be 0's */
+                    ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
+
+                    /* The argument is the lowest code point */
+                    anode_arg = lowest_cp;
+                    *flagp |= HASWIDTH|SIMPLE;
+                }
+            }
         }
 
         if (op != END) {
@@ -18041,7 +18584,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             RExC_emit = (regnode *)orig_emit;
 
             if (regarglen[op]) {
-                ret = reganode(pRExC_state, op, 0);
+                ret = reganode(pRExC_state, op, anode_arg);
             } else {
                 ret = reg_node(pRExC_state, op);
             }
@@ -18056,6 +18599,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             else if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
                 FLAGS(ret) = posix_class;
             }
+            else if (PL_regkind[op] == ANYOFM) {
+                FLAGS(ret) = ANYOFM_mask;
+            }
 
             SvREFCNT_dec_NN(cp_list);
             return ret;
@@ -18859,8 +19405,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
                 case EXACT:
                 case EXACTL:
                 case EXACTF:
-                case EXACTFA_NO_TRIE:
-                case EXACTFA:
+                case EXACTFAA_NO_TRIE:
+                case EXACTFAA:
                 case EXACTFU:
                 case EXACTFLU8:
                 case EXACTFU_SS:
@@ -18908,6 +19454,36 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
 }
 #endif
 
+STATIC SV*
+S_get_ANYOFM_contents(pTHX_ const regnode * n) {
+
+    /* Returns an inversion list of all the code points matched by the ANYOFM
+     * node 'n' */
+
+    SV * cp_list = _new_invlist(-1);
+    const U8 lowest = (U8) ARG(n);
+    unsigned int i;
+    U8 count = 0;
+    U8 needed = 1U << PL_bitcount[ (U8) ~ FLAGS(n)];
+
+    PERL_ARGS_ASSERT_GET_ANYOFM_CONTENTS;
+
+    /* Starting with the lowest code point, any code point that ANDed with the
+     * mask yields the lowest code point is in the set */
+    for (i = lowest; i <= 0xFF; i++) {
+        if ((i & FLAGS(n)) == ARG(n)) {
+            cp_list = add_cp_to_invlist(cp_list, i);
+            count++;
+
+            /* We know how many code points (a power of two) that are in the
+             * set.  No use looking once we've got that number */
+            if (count >= needed) break;
+        }
+    }
+
+    return cp_list;
+}
+
 /*
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
@@ -19250,7 +19826,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         if ( k == REF && reginfo) {
             U32 n = ARG(o);  /* which paren pair */
             I32 ln = prog->offs[n].start;
-            if (prog->lastparen < n || ln == -1)
+            if (prog->lastparen < n || ln == -1 || prog->offs[n].end == -1)
                 Perl_sv_catpvf(aTHX_ sv, ": FAIL");
             else if (ln == prog->offs[n].end)
                 Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING");
@@ -19434,15 +20010,24 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 
         SvREFCNT_dec(unresolved);
     }
+    else if (k == ANYOFM) {
+        SV * cp_list = get_ANYOFM_contents(o);
+
+       Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, TRUE);
+       Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        SvREFCNT_dec(cp_list);
+    }
     else if (k == POSIXD || k == NPOSIXD) {
         U8 index = FLAGS(o) * 2;
         if (index < C_ARRAY_LENGTH(anyofs)) {
             if (*anyofs[index] != '[')  {
-                sv_catpv(sv, "[");
+                sv_catpvs(sv, "[");
             }
             sv_catpv(sv, anyofs[index]);
             if (*anyofs[index] != '[')  {
-                sv_catpv(sv, "]");
+                sv_catpvs(sv, "]");
             }
         }
         else {
@@ -20322,9 +20907,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
 #else
         format = "\\x%02" UVXf "-\\x%02" UVXf;
 #endif
-        GCC_DIAG_IGNORE(-Wformat-nonliteral);
+        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
         Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
-        GCC_DIAG_RESTORE;
+        GCC_DIAG_RESTORE_STMT;
         break;
     }
 }
@@ -20473,7 +21058,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
 {
     /* Appends to 'sv' a displayable version of the innards of the bracketed
      * character class defined by the other arguments:
-     *  'bitmap' points to the bitmap.
+     *  'bitmap' points to the bitmap, or NULL if to ignore that.
      *  'nonbitmap_invlist' is an inversion list of the code points that are in
      *      the bitmap range, but for some reason aren't in the bitmap; NULL if
      *      none.  The reasons for this could be that they require some
@@ -20482,9 +21067,9 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      *      was not resolved at the time of the regex compilation (under /u)
      *  'only_utf8_locale_invlist' is an inversion list of the code points that
      *      are valid only if the runtime locale is a UTF-8 one; NULL if none
-     *  'node' is the regex pattern node.  It is needed only when the above two
-     *      parameters are not null, and is passed so that this routine can
-     *      tease apart the various reasons for them.
+     *  'node' is the regex pattern ANYOF node.  It is needed only when the
+     *      above two parameters are not null, and is passed so that this
+     *      routine can tease apart the various reasons for them.
      *  'force_as_is_display' is TRUE if this routine should definitely NOT try
      *      to invert things to see if that leads to a cleaner display.  If
      *      FALSE, this routine is free to use its judgment about doing this.
@@ -20584,13 +21169,16 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
     }
 
     /* Accumulate the bit map into the unconditional match list */
-    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-        if (BITMAP_TEST(bitmap, i)) {
-            int start = i++;
-            for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) {
-                /* empty */
+    if (bitmap) {
+        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+            if (BITMAP_TEST(bitmap, i)) {
+                int start = i++;
+                for (;
+                     i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i);
+                     i++)
+                { /* empty */ }
+                invlist = _add_range_to_invlist(invlist, start, i-1);
             }
-            invlist = _add_range_to_invlist(invlist, start, i-1);
         }
     }
 
@@ -20757,7 +21345,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
        /* While that wasn't END last time... */
        NODE_ALIGN(node);
        op = OP(node);
-       if (op == CLOSE || op == WHILEM)
+       if (op == CLOSE || op == SRCLOSE || op == WHILEM)
            indent--;
        next = regnext((regnode *)node);
 
@@ -20881,7 +21469,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
            node = NEXTOPER(node);
            node += regarglen[(U8)op];
        }
-       if (op == CURLYX || op == OPEN)
+       if (op == CURLYX || op == OPEN || op == SROPEN)
            indent++;
     }
     CLEAR_OPTSTART;
@@ -20893,6 +21481,600 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 
 #endif /* DEBUGGING */
 
+#ifndef PERL_IN_XSUB_RE
+
+#include "uni_keywords.h"
+
+void
+Perl_init_uniprops(pTHX)
+{
+    /* Set up the inversion list global variables */
+
+    PL_XPosix_ptrs[_CC_ASCII] = _new_invlist_C_array(uni_prop_ptrs[UNI_ASCII]);
+    PL_XPosix_ptrs[_CC_ALPHANUMERIC] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXALNUM]);
+    PL_XPosix_ptrs[_CC_ALPHA] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXALPHA]);
+    PL_XPosix_ptrs[_CC_BLANK] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXBLANK]);
+    PL_XPosix_ptrs[_CC_CASED] =  _new_invlist_C_array(uni_prop_ptrs[UNI_CASED]);
+    PL_XPosix_ptrs[_CC_CNTRL] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXCNTRL]);
+    PL_XPosix_ptrs[_CC_DIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXDIGIT]);
+    PL_XPosix_ptrs[_CC_GRAPH] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXGRAPH]);
+    PL_XPosix_ptrs[_CC_LOWER] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXLOWER]);
+    PL_XPosix_ptrs[_CC_PRINT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXPRINT]);
+    PL_XPosix_ptrs[_CC_PUNCT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXPUNCT]);
+    PL_XPosix_ptrs[_CC_SPACE] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXSPACE]);
+    PL_XPosix_ptrs[_CC_UPPER] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXUPPER]);
+    PL_XPosix_ptrs[_CC_VERTSPACE] = _new_invlist_C_array(uni_prop_ptrs[UNI_VERTSPACE]);
+    PL_XPosix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXWORD]);
+    PL_XPosix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXXDIGIT]);
+
+    PL_Posix_ptrs[_CC_ASCII] = _new_invlist_C_array(uni_prop_ptrs[UNI_ASCII]);
+    PL_Posix_ptrs[_CC_ALPHANUMERIC] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXALNUM]);
+    PL_Posix_ptrs[_CC_ALPHA] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXALPHA]);
+    PL_Posix_ptrs[_CC_BLANK] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXBLANK]);
+    PL_Posix_ptrs[_CC_CASED] =  _new_invlist_C_array(uni_prop_ptrs[UNI_CASED]);
+    PL_Posix_ptrs[_CC_CNTRL] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXCNTRL]);
+    PL_Posix_ptrs[_CC_DIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXDIGIT]);
+    PL_Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXGRAPH]);
+    PL_Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXLOWER]);
+    PL_Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXPRINT]);
+    PL_Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXPUNCT]);
+    PL_Posix_ptrs[_CC_SPACE] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXSPACE]);
+    PL_Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXUPPER]);
+    PL_Posix_ptrs[_CC_VERTSPACE] = _new_invlist_C_array(uni_prop_ptrs[UNI_VERTSPACE]);
+    PL_Posix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXWORD]);
+    PL_Posix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXXDIGIT]);
+
+    PL_GCB_invlist = _new_invlist_C_array(_Perl_GCB_invlist);
+    PL_SB_invlist = _new_invlist_C_array(_Perl_SB_invlist);
+    PL_WB_invlist = _new_invlist_C_array(_Perl_WB_invlist);
+    PL_LB_invlist = _new_invlist_C_array(_Perl_LB_invlist);
+    PL_SCX_invlist = _new_invlist_C_array(_Perl_SCX_invlist);
+
+    PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
+    PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
+    PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
+
+    PL_Assigned_invlist = _new_invlist_C_array(uni_prop_ptrs[UNI_ASSIGNED]);
+
+    PL_utf8_perl_idstart = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_IDSTART]);
+    PL_utf8_perl_idcont = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_IDCONT]);
+
+    PL_utf8_charname_begin = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_CHARNAME_BEGIN]);
+    PL_utf8_charname_continue = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_CHARNAME_CONTINUE]);
+
+    PL_utf8_foldable = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_ANY_FOLDS]);
+    PL_HasMultiCharFold = _new_invlist_C_array(uni_prop_ptrs[
+                                            UNI__PERL_FOLDS_TO_MULTI_CHAR]);
+    PL_NonL1NonFinalFold = _new_invlist_C_array(
+                                            NonL1_Perl_Non_Final_Folds_invlist);
+
+    PL_utf8_toupper = _new_invlist_C_array(Uppercase_Mapping_invlist);
+    PL_utf8_tolower = _new_invlist_C_array(Lowercase_Mapping_invlist);
+    PL_utf8_totitle = _new_invlist_C_array(Titlecase_Mapping_invlist);
+    PL_utf8_tofold = _new_invlist_C_array(Case_Folding_invlist);
+    PL_utf8_tosimplefold = _new_invlist_C_array(Simple_Case_Folding_invlist);
+    PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
+    PL_utf8_mark = _new_invlist_C_array(uni_prop_ptrs[UNI_M]);
+
+    /* The below are used only by deprecated functions.  They could be removed */
+    PL_utf8_xidcont  = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDC]);
+    PL_utf8_idcont   = _new_invlist_C_array(uni_prop_ptrs[UNI_IDC]);
+    PL_utf8_xidstart = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDS]);
+}
+
+SV *
+Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
+                                const bool to_fold, bool * invert)
+{
+    /* Parse the interior meat of \p{} passed to this in 'name' with length
+     * 'name_len', and return an inversion list if a property with 'name' is
+     * found, or NULL if not.  'name' point to the input with leading and
+     * trailing space trimmed.  'to_fold' indicates if /i is in effect.
+     *
+     * When the return is an inversion list, '*invert' will be set to a boolean
+     * indicating if it should be inverted or not
+     *
+     * This currently doesn't handle all cases.  A NULL return indicates the
+     * caller should try a different approach
+     */
+
+    char* lookup_name;
+    bool stricter = FALSE;
+    bool is_nv_type = FALSE;         /* nv= or numeric_value=, or possibly one
+                                        of the cjk numeric properties (though
+                                        it requires extra effort to compile
+                                        them) */
+    unsigned int i;
+    unsigned int j = 0, lookup_len;
+    int equals_pos = -1;        /* Where the '=' is found, or negative if none */
+    int slash_pos = -1;        /* Where the '/' is found, or negative if none */
+    int table_index = 0;
+    bool starts_with_In_or_Is = FALSE;
+    Size_t lookup_offset = 0;
+
+    PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING;
+
+    /* The input will be modified into 'lookup_name' */
+    Newx(lookup_name, name_len, char);
+    SAVEFREEPV(lookup_name);
+
+    /* Parse the input. */
+    for (i = 0; i < name_len; i++) {
+        char cur = name[i];
+
+        /* These characters can be freely ignored in most situations.  Later it
+         * may turn out we shouldn't have ignored them, and we have to reparse,
+         * but we don't have enough information yet to make that decision */
+        if (cur == '-' || cur == '_' || isSPACE_A(cur)) {
+            continue;
+        }
+
+        /* Case differences are also ignored.  Our lookup routine assumes
+         * everything is lowercase */
+        if (isUPPER_A(cur)) {
+            lookup_name[j++] = toLOWER(cur);
+            continue;
+        }
+
+        /* A double colon is either an error, or a package qualifier to a
+         * subroutine user-defined property; neither of which do we currently
+         * handle
+         *
+         * But a single colon is a synonym for '=' */
+        if (cur == ':') {
+            if (i < name_len - 1 && name[i+1] == ':') {
+                return NULL;
+            }
+            cur = '=';
+        }
+
+        /* Otherwise, this character is part of the name. */
+        lookup_name[j++] = cur;
+
+        /* Only the equals sign needs further processing */
+        if (cur == '=') {
+            equals_pos = j; /* Note where it occurred in the input */
+            break;
+        }
+    }
+
+    /* Here, we are either done with the whole property name, if it was simple;
+     * or are positioned just after the '=' if it is compound. */
+
+    if (equals_pos >= 0) {
+        assert(! stricter); /* We shouldn't have set this yet */
+
+        /* Space immediately after the '=' is ignored */
+        i++;
+        for (; i < name_len; i++) {
+            if (! isSPACE_A(name[i])) {
+                break;
+            }
+        }
+
+        /* Certain properties need special handling.  They may optionally be
+         * prefixed by 'is'.  Ignore that prefix for the purposes of checking
+         * if this is one of those properties */
+        if (memBEGINPs(lookup_name, name_len, "is")) {
+            lookup_offset = 2;
+        }
+
+        /* Then check if it is one of these properties.  This is hard-coded
+         * because easier this way, and the list is unlikely to change.  There
+         * are several properties like this in the Unihan DB, which is unlikely
+         * to be compiled, and they all end with 'numeric'.  The interiors
+         * aren't checked for the precise property.  This would stop working if
+         * a cjk property were to be created that ended with 'numeric' and
+         * wasn't a numeric type */
+        is_nv_type = memEQs(lookup_name + lookup_offset,
+                       j - 1 - lookup_offset, "numericvalue")
+                  || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "nv")
+                  || (   memENDPs(lookup_name + lookup_offset,
+                            j - 1 - lookup_offset, "numeric")
+                      && (   memBEGINPs(lookup_name + lookup_offset,
+                                      j - 1 - lookup_offset, "cjk")
+                          || memBEGINPs(lookup_name + lookup_offset,
+                                      j - 1 - lookup_offset, "k")));
+        if (   is_nv_type
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "canonicalcombiningclass")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "ccc")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "age")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "in")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "presentin"))
+        {
+            unsigned int k;
+
+            /* What makes these properties special is that the stuff after the
+             * '=' is a number.  Therefore, we can't throw away '-'
+             * willy-nilly, as those could be a minus sign.  Other stricter
+             * rules also apply.  However, these properties all can have the
+             * rhs not be a number, in which case they contain at least one
+             * alphabetic.  In those cases, the stricter rules don't apply.
+             * But the numeric type properties can have the alphas [Ee] to
+             * signify an exponent, and it is still a number with stricter
+             * rules.  So look for an alpha that signifys not-strict */
+            stricter = TRUE;
+            for (k = i; k < name_len; k++) {
+                if (   isALPHA_A(name[k])
+                    && (! is_nv_type || ! isALPHA_FOLD_EQ(name[k], 'E')))
+                {
+                    stricter = FALSE;
+                    break;
+                }
+            }
+        }
+
+        if (stricter) {
+
+            /* A number may have a leading '+' or '-'.  The latter is retained
+             * */
+            if (name[i] == '+') {
+                i++;
+            }
+            else if (name[i] == '-') {
+                lookup_name[j++] = '-';
+                i++;
+            }
+
+            /* Skip leading zeros including single underscores separating the
+             * zeros, or between the final leading zero and the first other
+             * digit */
+            for (; i < name_len - 1; i++) {
+                if (   name[i] != '0'
+                    && (name[i] != '_' || ! isDIGIT_A(name[i+1])))
+                {
+                    break;
+                }
+            }
+        }
+    }
+    else {  /* No '=' */
+
+       /* We are now in a position to determine if this property should have
+        * been parsed using stricter rules.  Only a few are like that, and
+        * unlikely to change. */
+        if (   memBEGINPs(lookup_name, j, "perl")
+            && memNEs(lookup_name + 4, j - 4, "space")
+            && memNEs(lookup_name + 4, j - 4, "word"))
+        {
+            stricter = TRUE;
+
+            /* We set the inputs back to 0 and the code below will reparse,
+             * using strict */
+            i = j = 0;
+        }
+    }
+
+    /* Here, we have either finished the property, or are positioned to parse
+     * the remainder, and we know if stricter rules apply.  Finish out, if not
+     * already done */
+    for (; i < name_len; i++) {
+        char cur = name[i];
+
+        /* In all instances, case differences are ignored, and we normalize to
+         * lowercase */
+        if (isUPPER_A(cur)) {
+            lookup_name[j++] = toLOWER(cur);
+            continue;
+        }
+
+        /* An underscore is skipped, but not under strict rules unless it
+         * separates two digits */
+        if (cur == '_') {
+            if (    stricter
+                && (     i == 0 || (int) i == equals_pos || i == name_len- 1
+                    || ! isDIGIT_A(name[i-1]) || ! isDIGIT_A(name[i+1])))
+            {
+                lookup_name[j++] = '_';
+            }
+            continue;
+        }
+
+        /* Hyphens are skipped except under strict */
+        if (cur == '-' && ! stricter) {
+            continue;
+        }
+
+        /* XXX Bug in documentation.  It says white space skipped adjacent to
+         * non-word char.  Maybe we should, but shouldn't skip it next to a dot
+         * in a number */
+        if (isSPACE_A(cur) && ! stricter) {
+            continue;
+        }
+
+        lookup_name[j++] = cur;
+
+        /* Unless this is a non-trailing slash, we are done with it */
+        if (i >= name_len - 1 || cur != '/') {
+            continue;
+        }
+
+        slash_pos = j;
+
+        /* A slash in the 'numeric value' property indicates that what follows
+         * is a denominator.  It can have a leading '+' and '0's that should be
+         * skipped.  But we have never allowed a negative denominator, so treat
+         * a minus like every other character.  (No need to rule out a second
+         * '/', as that won't match anything anyway */
+        if (is_nv_type) {
+            i++;
+            if (i < name_len && name[i] == '+') {
+                i++;
+            }
+
+            /* Skip leading zeros including underscores separating digits */
+            for (; i < name_len - 1; i++) {
+                if (   name[i] != '0'
+                    && (name[i] != '_' || ! isDIGIT_A(name[i+1])))
+                {
+                    break;
+                }
+            }
+
+            /* Store the first real character in the denominator */
+            lookup_name[j++] = name[i];
+        }
+    }
+
+    /* Here are completely done parsing the input 'name', and 'lookup_name'
+     * contains a copy, normalized.
+     *
+     * This special case is grandfathered in: 'L_' and 'GC=L_' are accepted and
+     * different from without the underscores.  */
+    if (  (   UNLIKELY(memEQs(lookup_name, j, "l"))
+           || UNLIKELY(memEQs(lookup_name, j, "gc=l")))
+        && UNLIKELY(name[name_len-1] == '_'))
+    {
+        lookup_name[j++] = '&';
+    }
+    else if (name_len > 2 && name[0] == 'I' && (   name[1] == 'n'
+                                                || name[1] == 's'))
+    {
+
+        /* Also, if the original input began with 'In' or 'Is', it could be a
+         * subroutine call instead of a property names, which currently isn't
+         * handled by this function.  Subroutine calls can't happen if there is
+         * an '=' in the name */
+        if (equals_pos < 0 && get_cvn_flags(name, name_len, GV_NOTQUAL) != NULL)
+        {
+            return NULL;
+        }
+
+        starts_with_In_or_Is = TRUE;
+    }
+
+    lookup_len = j;     /* Use a more mnemonic name starting here */
+
+    /* Get the index into our pointer table of the inversion list corresponding
+     * to the property */
+    table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+
+    /* If it didn't find the property */
+    if (table_index == 0) {
+
+        /* If didn't find the property, we try again stripping off any initial
+         * 'In' or 'Is' */
+        if (starts_with_In_or_Is) {
+            lookup_name += 2;
+            lookup_len -= 2;
+            equals_pos -= 2;
+            slash_pos -= 2;
+
+            table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+        }
+
+        if (table_index == 0) {
+            char * canonical;
+
+            /* If not found, and not a numeric type property, isn't a legal
+             * property */
+            if (! is_nv_type) {
+                return NULL;
+            }
+
+            /* But the numeric type properties need more work to decide.  What
+             * we do is make sure we have the number in canonical form and look
+             * that up. */
+
+            if (slash_pos < 0) {    /* No slash */
+
+                /* When it isn't a rational, take the input, convert it to a
+                 * NV, then create a canonical string representation of that
+                 * NV. */
+
+                NV value;
+
+                /* Get the value */
+                if (my_atof3(lookup_name + equals_pos, &value,
+                             lookup_len - equals_pos)
+                          != lookup_name + lookup_len)
+                {
+                    return NULL;
+                }
+
+                /* If the value is an integer, the canonical value is integral */
+                if (Perl_ceil(value) == value) {
+                    canonical = Perl_form(aTHX_ "%.*s%.0" NVff,
+                                                equals_pos, lookup_name, value);
+                }
+                else {  /* Otherwise, it is %e with a known precision */
+                    char * exp_ptr;
+
+                    canonical = Perl_form(aTHX_ "%.*s%.*" NVef,
+                                                equals_pos, lookup_name,
+                                                PL_E_FORMAT_PRECISION, value);
+
+                    /* The exponent generated is expecting two digits, whereas
+                     * %e on some systems will generate three.  Remove leading
+                     * zeros in excess of 2 from the exponent.  We start
+                     * looking for them after the '=' */
+                    exp_ptr = strchr(canonical + equals_pos, 'e');
+                    if (exp_ptr) {
+                        char * cur_ptr = exp_ptr + 2; /* past the 'e[+-]' */
+                        SSize_t excess_exponent_len = strlen(cur_ptr) - 2;
+
+                        assert(*(cur_ptr - 1) == '-' || *(cur_ptr - 1) == '+');
+
+                        if (excess_exponent_len > 0) {
+                            SSize_t leading_zeros = strspn(cur_ptr, "0");
+                            SSize_t excess_leading_zeros
+                                    = MIN(leading_zeros, excess_exponent_len);
+                            if (excess_leading_zeros > 0) {
+                                Move(cur_ptr + excess_leading_zeros,
+                                     cur_ptr,
+                                     strlen(cur_ptr) - excess_leading_zeros
+                                       + 1,  /* Copy the NUL as well */
+                                     char);
+                            }
+                        }
+                    }
+                }
+            }
+            else {  /* Has a slash.  Create a rational in canonical form  */
+                UV numerator, denominator, gcd, trial;
+                const char * end_ptr;
+                const char * sign = "";
+
+                /* We can't just find the numerator, denominator, and do the
+                 * division, then use the method above, because that is
+                 * inexact.  And the input could be a rational that is within
+                 * epsilon (given our precision) of a valid rational, and would
+                 * then incorrectly compare valid.
+                 *
+                 * We're only interested in the part after the '=' */
+                const char * this_lookup_name = lookup_name + equals_pos;
+                lookup_len -= equals_pos;
+                slash_pos -= equals_pos;
+
+                /* Handle any leading minus */
+                if (this_lookup_name[0] == '-') {
+                    sign = "-";
+                    this_lookup_name++;
+                    lookup_len--;
+                    slash_pos--;
+                }
+
+                /* Convert the numerator to numeric */
+                end_ptr = this_lookup_name + slash_pos;
+                if (! grok_atoUV(this_lookup_name, &numerator, &end_ptr)) {
+                    return NULL;
+                }
+
+                /* It better have included all characters before the slash */
+                if (*end_ptr != '/') {
+                    return NULL;
+                }
+
+                /* Set to look at just the denominator */
+                this_lookup_name += slash_pos;
+                lookup_len -= slash_pos;
+                end_ptr = this_lookup_name + lookup_len;
+
+                /* Convert the denominator to numeric */
+                if (! grok_atoUV(this_lookup_name, &denominator, &end_ptr)) {
+                    return NULL;
+                }
+
+                /* It better be the rest of the characters, and don't divide by
+                 * 0 */
+                if (   end_ptr != this_lookup_name + lookup_len
+                    || denominator == 0)
+                {
+                    return NULL;
+                }
+
+                /* Get the greatest common denominator using
+                   http://en.wikipedia.org/wiki/Euclidean_algorithm */
+                gcd = numerator;
+                trial = denominator;
+                while (trial != 0) {
+                    UV temp = trial;
+                    trial = gcd % trial;
+                    gcd = temp;
+                }
+
+                /* If already in lowest possible terms, we have already tried
+                 * looking this up */
+                if (gcd == 1) {
+                    return NULL;
+                }
+
+                /* Reduce the rational, which should put it in canonical form.
+                 * Then look it up */
+                numerator /= gcd;
+                denominator /= gcd;
+
+                canonical = Perl_form(aTHX_ "%.*s%s%" UVuf "/%" UVuf,
+                        equals_pos, lookup_name, sign, numerator, denominator);
+            }
+
+            /* Here, we have the number in canonical form.  Try that */
+            table_index = match_uniprop((U8 *) canonical, strlen(canonical));
+            if (table_index == 0) {
+                return NULL;
+            }
+        }
+    }
+
+    /* The return is an index into a table of ptrs.  A negative return
+     * signifies that the real index is the absolute value, but the result
+     * needs to be inverted */
+    if (table_index < 0) {
+        *invert = TRUE;
+        table_index = -table_index;
+    }
+    else {
+        *invert = FALSE;
+    }
+
+    /* Out-of band indices indicate a deprecated property.  The proper index is
+     * modulo it with the table size.  And dividing by the table size yields
+     * an offset into a table constructed to contain the corresponding warning
+     * message */
+    if (table_index > MAX_UNI_KEYWORD_INDEX) {
+        Size_t warning_offset = table_index / MAX_UNI_KEYWORD_INDEX;
+        table_index %= MAX_UNI_KEYWORD_INDEX;
+        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+                "Use of '%.*s' in \\p{} or \\P{} is deprecated because: %s",
+                (int) name_len, name, deprecated_property_msgs[warning_offset]);
+    }
+
+    /* In a few properties, a different property is used under /i.  These are
+     * unlikely to change, so are hard-coded here. */
+    if (to_fold) {
+        if (   table_index == UNI_XPOSIXUPPER
+            || table_index == UNI_XPOSIXLOWER
+            || table_index == UNI_TITLE)
+        {
+            table_index = UNI_CASED;
+        }
+        else if (   table_index == UNI_UPPERCASELETTER
+                 || table_index == UNI_LOWERCASELETTER
+#  ifdef UNI_TITLECASELETTER   /* Missing from early Unicodes */
+                 || table_index == UNI_TITLECASELETTER
+#  endif
+        ) {
+            table_index = UNI_CASEDLETTER;
+        }
+        else if (  table_index == UNI_POSIXUPPER
+                || table_index == UNI_POSIXLOWER)
+        {
+            table_index = UNI_POSIXALPHA;
+        }
+    }
+
+    /* Create and return the inversion list */
+    return _new_invlist_C_array(uni_prop_ptrs[table_index]);
+}
+
+#endif
+
 /*
  * ex: set ts=8 sts=4 sw=4 et:
  */