This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Generate new regnode for /[[:posix:]]/l
[perl5.git] / regcomp.c
index 90e9332..b0c8db6 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -133,8 +133,10 @@ struct RExC_state_t {
     char       *start;                 /* Start of input for compile */
     char       *end;                   /* End of input for compile */
     char       *parse;                 /* Input-scan pointer. */
-    char        *adjusted_start;        /* 'start', adjusted.  See code use */
-    STRLEN      precomp_adj;            /* an offset beyond precomp.  See code use */
+    char        *copy_start;            /* start of copy of input within
+                                           constructed parse string */
+    char        *copy_start_in_input;   /* Position in input string
+                                           corresponding to copy_start */
     SSize_t    whilem_seen;            /* number of WHILEM in this expr */
     regnode    *emit_start;            /* Start of emitted-code area */
     regnode    *emit_bound;            /* First regnode outside of the
@@ -148,7 +150,8 @@ struct RExC_state_t {
     I32                naughty;                /* How bad is this pattern? */
     I32                sawback;                /* Did we see \1, ...? */
     U32                seen;
-    SSize_t    size;                   /* Code size. */
+    SSize_t    size;                   /* Number of regnode equivalents in
+                                           pattern */
     I32         npar;                   /* Capture buffer count, (OPEN) plus
                                            one. ("par" 0 is the whole
                                            pattern)*/
@@ -169,7 +172,7 @@ struct RExC_state_t {
     HV         *paren_names;           /* Paren names */
 
     regnode    **recurse;              /* Recurse regops */
-    I32                recurse_count;                /* Number of recurse regops we have generated */
+    I32         recurse_count;          /* Number of recurse regops we have generated */
     U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
                                            through */
     U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
@@ -200,6 +203,7 @@ struct RExC_state_t {
     U32         study_chunk_recursed_count;
     SV          *mysv1;
     SV          *mysv2;
+
 #define RExC_lastparse (pRExC_state->lastparse)
 #define RExC_lastnum   (pRExC_state->lastnum)
 #define RExC_paren_name_list    (pRExC_state->paren_name_list)
@@ -218,8 +222,8 @@ struct RExC_state_t {
 #define RExC_flags     (pRExC_state->flags)
 #define RExC_pm_flags  (pRExC_state->pm_flags)
 #define RExC_precomp   (pRExC_state->precomp)
-#define RExC_precomp_adj (pRExC_state->precomp_adj)
-#define RExC_adjusted_start  (pRExC_state->adjusted_start)
+#define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
+#define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
 #define RExC_precomp_end (pRExC_state->precomp_end)
 #define RExC_rx_sv     (pRExC_state->rx_sv)
 #define RExC_rx                (pRExC_state->rx)
@@ -240,7 +244,7 @@ struct RExC_state_t {
 #define RExC_seen_unfolded_sharp_s (pRExC_state->seen_unfolded_sharp_s)
 
 #ifdef RE_TRACK_PATTERN_OFFSETS
-#define RExC_offsets   (pRExC_state->rxi->u.offsets) /* I am not like the
+#  define RExC_offsets  (pRExC_state->rxi->u.offsets) /* I am not like the
                                                          others */
 #endif
 #define RExC_emit      (pRExC_state->emit)
@@ -315,7 +319,7 @@ struct RExC_state_t {
 #define TRYAGAIN       0x10    /* Weeded out a declaration. */
 #define RESTART_PASS1   0x20    /* Need to restart sizing pass */
 #define NEED_UTF8       0x40    /* In conjunction with RESTART_PASS1, need to
-                                   calcuate sizes as UTF-8 */
+                                   calculate sizes as UTF-8 */
 
 #define REG_NODE_NUM(x) ((x) ? (int)((x)-RExC_emit_start) : -1)
 
@@ -370,22 +374,22 @@ struct RExC_state_t {
             }                                                               \
     } STMT_END
 
-#define RETURN_NULL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
+#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
                     RETURN_X_ON_RESTART_OR_FLAGS(NULL,flags,flagp,extra)
 
 #define RETURN_X_ON_RESTART(X, flags,flagp)                                 \
                         RETURN_X_ON_RESTART_OR_FLAGS( X, flags, flagp, 0)
 
 
-#define RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,extra)                  \
+#define RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp,extra)                  \
             if (*(flagp) & (RESTART_PASS1|(extra))) return NULL
 
 #define MUST_RESTART(flags) ((flags) & (RESTART_PASS1))
 
-#define RETURN_NULL_ON_RESTART(flags,flagp)                                 \
+#define RETURN_FAIL_ON_RESTART(flags,flagp)                                 \
                                     RETURN_X_ON_RESTART(NULL, flags,flagp)
-#define RETURN_NULL_ON_RESTART_FLAGP(flagp)                                 \
-                            RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,0)
+#define RETURN_FAIL_ON_RESTART_FLAGP(flagp)                                 \
+                            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, 0)
 
 /* This converts the named class defined in regcomp.h to its equivalent class
  * number defined in handy.h. */
@@ -606,11 +610,11 @@ static const scan_data_t zero_scan_data = {
  * the form of something that is completely different from the input, or
  * something that uses the input as part of the alternate.  In the first case,
  * there should be no possibility of an error, as we are in complete control of
- * the alternate string.  But in the second case we don't control the input
- * portion, so there may be errors in that.  Here's an example:
+ * the alternate string.  But in the second case we don't completely control
+ * the input portion, so there may be errors in that.  Here's an example:
  *      /[abc\x{DF}def]/ui
  * is handled specially because \x{df} folds to a sequence of more than one
- * character, 'ss'.  What is done is to create and parse an alternate string,
+ * character: 'ss'.  What is done is to create and parse an alternate string,
  * which looks like this:
  *      /(?:\x{DF}|[abc\x{DF}def])/ui
  * where it uses the input unchanged in the middle of something it constructs,
@@ -619,49 +623,65 @@ static const scan_data_t zero_scan_data = {
  * class while in this substitute parse.) 'abc' and 'def' may have errors that
  * need to be reported.  The general situation looks like this:
  *
+ *                                       |<------- identical ------>|
  *              sI                       tI               xI       eI
- * Input:       ----------------------------------------------------
+ * Input:       ---------------------------------------------------------------
  * Constructed:         ---------------------------------------------------
  *                      sC               tC               xC       eC     EC
+ *                                       |<------- identical ------>|
  *
- * The input string sI..eI is the input pattern.  The string sC..EC is the
- * constructed substitute parse string.  The portions sC..tC and eC..EC are
- * constructed by us.  The portion tC..eC is an exact duplicate of the input
- * pattern tI..eI.  In the diagram, these are vertically aligned.  Suppose that
- * while parsing, we find an error at xC.  We want to display a message showing
- * the real input string.  Thus we need to find the point xI in it which
- * corresponds to xC.  xC >= tC, since the portion of the string sC..tC has
- * been constructed by us, and so shouldn't have errors.  We get:
- *
- *      xI = sI + (tI - sI) + (xC - tC)
+ * sI..eI   is the portion of the input pattern we are concerned with here.
+ * sC..EC   is the constructed substitute parse string.
+ *  sC..tC  is constructed by us
+ *  tC..eC  is an exact duplicate of the portion of the input pattern tI..eI.
+ *          In the diagram, these are vertically aligned.
+ *  eC..EC  is also constructed by us.
+ * xC       is the position in the substitute parse string where we found a
+ *          problem.
+ * xI       is the position in the original pattern corresponding to xC.
  *
- * and, the offset into sI is:
+ * We want to display a message showing the real input string.  Thus we need to
+ * translate from xC to xI.  We know that xC >= tC, since the portion of the
+ * string sC..tC has been constructed by us, and so shouldn't have errors.  We
+ * get:
+ *      xI = tI + (xC - tC)
  *
- *      (xI - sI) = (tI - sI) + (xC - tC)
+ * When the substitute parse is constructed, the code needs to set:
+ *      RExC_start (sC)
+ *      RExC_end (eC)
+ *      RExC_copy_start_in_input  (tI)
+ *      RExC_copy_start_in_constructed (tC)
+ * and restore them when done.
  *
- * When the substitute is constructed, we save (tI -sI) as RExC_precomp_adj,
- * and we save tC as RExC_adjusted_start.
- *
- * During normal processing of the input pattern, everything points to that,
- * with RExC_precomp_adj set to 0, and RExC_adjusted_start set to sI.
+ * During normal processing of the input pattern, both
+ * 'RExC_copy_start_in_input' and 'RExC_copy_start_in_constructed' are set to
+ * sI, so that xC equals xI.
  */
 
-#define tI_sI           RExC_precomp_adj
-#define tC              RExC_adjusted_start
-#define sC              RExC_precomp
-#define xI_offset(xC)   ((IV) (tI_sI + (xC - tC)))
-#define xI(xC)          (sC + xI_offset(xC))
-#define eC              RExC_precomp_end
+#define sI              RExC_precomp
+#define eI              RExC_precomp_end
+#define sC              RExC_start
+#define eC              RExC_end
+#define tI              RExC_copy_start_in_input
+#define tC              RExC_copy_start_in_constructed
+#define xI(xC)          (tI + (xC - tC))
+#define xI_offset(xC)   (xI(xC) - sI)
 
 #define REPORT_LOCATION_ARGS(xC)                                            \
     UTF8fARG(UTF,                                                           \
-             (xI(xC) > eC) /* Don't run off end */                          \
+             (xI(xC) > eI) /* Don't run off end */                          \
               ? eC - sC   /* Length before the <--HERE */                   \
-              : ( __ASSERT_(xI_offset(xC) >= 0) xI_offset(xC) ),            \
-             sC),         /* The input pattern printed up to the <--HERE */ \
+              : ((xI_offset(xC) >= 0)                                       \
+                 ? xI_offset(xC)                                            \
+                 : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
+                                    IVdf " trying to output message for "   \
+                                    " pattern %.*s",                        \
+                                    __FILE__, __LINE__, (IV) xI_offset(xC), \
+                                    ((int) (eC - sC)), sC), 0)),            \
+             sI),         /* The input pattern printed up to the <--HERE */ \
     UTF8fARG(UTF,                                                           \
-             (xI(xC) > eC) ? 0 : eC - xI(xC), /* Length after <--HERE */    \
-             (xI(xC) > eC) ? eC : xI(xC))     /* pattern after <--HERE */
+             (xI(xC) > eI) ? 0 : eI - xI(xC), /* Length after <--HERE */    \
+             (xI(xC) > eI) ? eI : xI(xC))     /* pattern after <--HERE */
 
 /* Used to point after bad bytes for an error message, but avoid skipping
  * past a nul byte. */
@@ -674,7 +694,7 @@ static const scan_data_t zero_scan_data = {
  */
 #define _FAIL(code) STMT_START {                                       \
     const char *ellipses = "";                                         \
-    IV len = RExC_precomp_end - RExC_precomp;                                  \
+    IV len = RExC_precomp_end - RExC_precomp;                          \
                                                                        \
     if (!SIZE_ONLY)                                                    \
        SAVEFREESV(RExC_rx_sv);                                         \
@@ -865,6 +885,11 @@ static const scan_data_t zero_scan_data = {
                                        REPORT_LOCATION_ARGS(loc));      \
 } STMT_END
 
+/* Convert between a pointer to a node and its offset from the beginning of the
+ * program */
+#define REGNODE_p(offset)    (RExC_emit_start + (offset))
+#define REGNODE_OFFSET(node) ((node) - RExC_emit_start)
+
 /* Macros for recording node offsets.   20001227 mjd@plover.com
  * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
  * element 2*n-1 of the array.  Element #2n holds the byte length node #n.
@@ -872,7 +897,7 @@ static const scan_data_t zero_scan_data = {
  * Position is 1 indexed.
  */
 #ifndef RE_TRACK_PATTERN_OFFSETS
-#define Set_Node_Offset_To_R(node,byte)
+#define Set_Node_Offset_To_R(offset,byte)
 #define Set_Node_Offset(node,byte)
 #define Set_Cur_Node_Offset
 #define Set_Node_Length_To_R(node,len)
@@ -886,21 +911,21 @@ static const scan_data_t zero_scan_data = {
 #else
 #define ProgLen(ri) ri->u.offsets[0]
 #define SetProgLen(ri,x) ri->u.offsets[0] = x
-#define Set_Node_Offset_To_R(node,byte) STMT_START {                   \
+#define Set_Node_Offset_To_R(offset,byte) STMT_START {                 \
     if (! SIZE_ONLY) {                                                 \
        MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n",         \
-                   __LINE__, (int)(node), (int)(byte)));               \
-       if((node) < 0) {                                                \
+                   __LINE__, (int)(offset), (int)(byte)));             \
+       if((offset) < 0) {                                              \
            Perl_croak(aTHX_ "value of node is %d in Offset macro",     \
-                                         (int)(node));                  \
+                                         (int)(offset));                \
        } else {                                                        \
-           RExC_offsets[2*(node)-1] = (byte);                          \
+            RExC_offsets[2*(offset)-1] = (byte);                       \
        }                                                               \
     }                                                                  \
 } STMT_END
 
-#define Set_Node_Offset(node,byte) \
-    Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
+#define Set_Node_Offset(node,byte)                                      \
+    Set_Node_Offset_To_R(REGNODE_OFFSET(node), (byte)-RExC_start)
 #define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
 
 #define Set_Node_Length_To_R(node,len) STMT_START {                    \
@@ -917,17 +942,17 @@ static const scan_data_t zero_scan_data = {
 } STMT_END
 
 #define Set_Node_Length(node,len) \
-    Set_Node_Length_To_R((node)-RExC_emit_start, len)
+    Set_Node_Length_To_R(REGNODE_OFFSET(node), len)
 #define Set_Node_Cur_Length(node, start)                \
     Set_Node_Length(node, RExC_parse - start)
 
 /* Get offsets and lengths */
-#define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
-#define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
+#define Node_Offset(n) (RExC_offsets[2*(REGNODE_OFFSET(n))-1])
+#define Node_Length(n) (RExC_offsets[2*(REGNODE_OFFSET(n))])
 
 #define Set_Node_Offset_Length(node,offset,len) STMT_START {   \
-    Set_Node_Offset_To_R((node)-RExC_emit_start, (offset));    \
-    Set_Node_Length_To_R((node)-RExC_emit_start, (len));       \
+    Set_Node_Offset_To_R(REGNODE_OFFSET(node), (offset));      \
+    Set_Node_Length_To_R(REGNODE_OFFSET(node), (len)); \
 } STMT_END
 #endif
 
@@ -966,39 +991,39 @@ Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
 
 #define DEBUG_RExC_seen()                                                   \
         DEBUG_OPTIMISE_MORE_r({                                             \
-            Perl_re_printf( aTHX_ "RExC_seen: ");                                       \
+            Perl_re_printf( aTHX_ "RExC_seen: ");                           \
                                                                             \
             if (RExC_seen & REG_ZERO_LEN_SEEN)                              \
-                Perl_re_printf( aTHX_ "REG_ZERO_LEN_SEEN ");                            \
+                Perl_re_printf( aTHX_ "REG_ZERO_LEN_SEEN ");                \
                                                                             \
             if (RExC_seen & REG_LOOKBEHIND_SEEN)                            \
-                Perl_re_printf( aTHX_ "REG_LOOKBEHIND_SEEN ");                          \
+                Perl_re_printf( aTHX_ "REG_LOOKBEHIND_SEEN ");              \
                                                                             \
             if (RExC_seen & REG_GPOS_SEEN)                                  \
-                Perl_re_printf( aTHX_ "REG_GPOS_SEEN ");                                \
+                Perl_re_printf( aTHX_ "REG_GPOS_SEEN ");                    \
                                                                             \
             if (RExC_seen & REG_RECURSE_SEEN)                               \
-                Perl_re_printf( aTHX_ "REG_RECURSE_SEEN ");                             \
+                Perl_re_printf( aTHX_ "REG_RECURSE_SEEN ");                 \
                                                                             \
             if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)                    \
-                Perl_re_printf( aTHX_ "REG_TOP_LEVEL_BRANCHES_SEEN ");                  \
+                Perl_re_printf( aTHX_ "REG_TOP_LEVEL_BRANCHES_SEEN ");      \
                                                                             \
             if (RExC_seen & REG_VERBARG_SEEN)                               \
-                Perl_re_printf( aTHX_ "REG_VERBARG_SEEN ");                             \
+                Perl_re_printf( aTHX_ "REG_VERBARG_SEEN ");                 \
                                                                             \
             if (RExC_seen & REG_CUTGROUP_SEEN)                              \
-                Perl_re_printf( aTHX_ "REG_CUTGROUP_SEEN ");                            \
+                Perl_re_printf( aTHX_ "REG_CUTGROUP_SEEN ");                \
                                                                             \
             if (RExC_seen & REG_RUN_ON_COMMENT_SEEN)                        \
-                Perl_re_printf( aTHX_ "REG_RUN_ON_COMMENT_SEEN ");                      \
+                Perl_re_printf( aTHX_ "REG_RUN_ON_COMMENT_SEEN ");          \
                                                                             \
             if (RExC_seen & REG_UNFOLDED_MULTI_SEEN)                        \
-                Perl_re_printf( aTHX_ "REG_UNFOLDED_MULTI_SEEN ");                      \
+                Perl_re_printf( aTHX_ "REG_UNFOLDED_MULTI_SEEN ");          \
                                                                             \
             if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)                  \
-                Perl_re_printf( aTHX_ "REG_UNBOUNDED_QUANTIFIER_SEEN ");                \
+                Perl_re_printf( aTHX_ "REG_UNBOUNDED_QUANTIFIER_SEEN ");    \
                                                                             \
-            Perl_re_printf( aTHX_ "\n");                                                \
+            Perl_re_printf( aTHX_ "\n");                                    \
         });
 
 #define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
@@ -1144,7 +1169,7 @@ typedef struct dictionary item;
 
 
 PERL_STATIC_INLINE item*
-push(UV key,item* curr)
+push(UV key, item* curr)
 {
     item* head;
     Newx(head, 1, item);
@@ -1170,7 +1195,7 @@ find(item* head, UV key)
 }
 
 PERL_STATIC_INLINE item*
-uniquePush(item* head,UV key)
+uniquePush(item* head, UV key)
 {
     item* iterator = head;
 
@@ -1181,7 +1206,7 @@ uniquePush(item* head,UV key)
         iterator = iterator->next;
     }
 
-    return push(key,head);
+    return push(key, head);
 }
 
 PERL_STATIC_INLINE void
@@ -1210,7 +1235,7 @@ S_edit_distance(const UV* src,
 )
 {
     item *head = NULL;
-    UV swapCount,swapScore,targetCharCount,i,j;
+    UV swapCount, swapScore, targetCharCount, i, j;
     UV *scores;
     UV score_ceil = x + y;
 
@@ -1222,14 +1247,14 @@ S_edit_distance(const UV* src,
     scores[1 * (y + 2) + 0] = score_ceil;
     scores[0 * (y + 2) + 1] = score_ceil;
     scores[1 * (y + 2) + 1] = 0;
-    head = uniquePush(uniquePush(head,src[0]),tgt[0]);
+    head = uniquePush(uniquePush(head, src[0]), tgt[0]);
 
     /* work loops    */
     /* i = src index */
     /* j = tgt index */
     for (i=1;i<=x;i++) {
         if (i < x)
-            head = uniquePush(head,src[i]);
+            head = uniquePush(head, src[i]);
         scores[(i+1) * (y + 2) + 1] = i;
         scores[(i+1) * (y + 2) + 0] = score_ceil;
         swapCount = 0;
@@ -1237,12 +1262,12 @@ S_edit_distance(const UV* src,
         for (j=1;j<=y;j++) {
             if (i == 1) {
                 if(j < y)
-                head = uniquePush(head,tgt[j]);
+                head = uniquePush(head, tgt[j]);
                 scores[1 * (y + 2) + (j + 1)] = j;
                 scores[0 * (y + 2) + (j + 1)] = score_ceil;
             }
 
-            targetCharCount = find(head,tgt[j-1])->value;
+            targetCharCount = find(head, tgt[j-1])->value;
             swapScore = scores[targetCharCount * (y + 2) + swapCount] + i - targetCharCount - 1 + j - swapCount;
 
             if (src[i-1] != tgt[j-1]){
@@ -1254,7 +1279,7 @@ S_edit_distance(const UV* src,
             }
         }
 
-        find(head,src[i-1])->value = i;
+        find(head, src[i-1])->value = i;
     }
 
     {
@@ -1494,7 +1519,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
         assert(RExC_rxi->data->what[n] == 's');
 
         if (ary[1] && ary[1] != &PL_sv_undef) { /* Has compile-time swash */
-            invlist = sv_2mortal(invlist_clone(_get_swash_invlist(ary[1])));
+            invlist = sv_2mortal(invlist_clone(_get_swash_invlist(ary[1]), NULL));
         }
         else if (ary[0] && ary[0] != &PL_sv_undef) {
 
@@ -1507,7 +1532,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
 
             /* Here no compile-time swash, and no run-time only data.  Use the
              * node's inversion list */
-            invlist = sv_2mortal(invlist_clone(ary[3]));
+            invlist = sv_2mortal(invlist_clone(ary[3], NULL));
         }
 
         /* Get the code points valid only under UTF-8 locales */
@@ -1996,7 +2021,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
      * ANYOF node, with the first NUM_ANYOF_CODE_POINTS code points in a bit
      * map */
 
-    SV* invlist = invlist_clone(ssc->invlist);
+    SV* invlist = invlist_clone(ssc->invlist, NULL);
 
     PERL_ARGS_ASSERT_SSC_FINALIZE;
 
@@ -2020,9 +2045,9 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
 
     if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
         ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
+        OP(ssc) = ANYOFPOSIXL;
     }
-
-    if (RExC_contains_locale) {
+    else if (RExC_contains_locale) {
         OP(ssc) = ANYOFL;
     }
 
@@ -2128,7 +2153,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
                     (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next
                    );
                 } else {
-                    Perl_re_printf( aTHX_  "%*s",colwidth,"   ." );
+                    Perl_re_printf( aTHX_  "%*s", colwidth,"   ." );
                 }
             }
 
@@ -2184,7 +2209,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
         }
         for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
            SV ** const tmp = av_fetch( revcharmap,
-                                        TRIE_LIST_ITEM(state,charid).forid, 0);
+                                        TRIE_LIST_ITEM(state, charid).forid, 0);
            if ( tmp ) {
                 Perl_re_printf( aTHX_  "%*s:%3X=%4" UVXf " | ",
                     colwidth,
@@ -2194,8 +2219,8 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
                               (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
                               | PERL_PV_ESCAPE_FIRSTCHAR
                     ) ,
-                    TRIE_LIST_ITEM(state,charid).forid,
-                    (UV)TRIE_LIST_ITEM(state,charid).newstate
+                    TRIE_LIST_ITEM(state, charid).forid,
+                    (UV)TRIE_LIST_ITEM(state, charid).newstate
                 );
                 if (!(charid % 10))
                     Perl_re_printf( aTHX_  "\n%*s| ",
@@ -2606,7 +2631,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
         Perl_re_indentf( aTHX_
           "make_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
           depth+1,
-          REG_NODE_NUM(startbranch),REG_NODE_NUM(first),
+          REG_NODE_NUM(startbranch), REG_NODE_NUM(first),
           REG_NODE_NUM(last), REG_NODE_NUM(tail), (int)depth);
     });
 
@@ -3381,14 +3406,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                                     SV ** const tmp = av_fetch( revcharmap, first_ofs, 0);
                                    const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
 
-                                    TRIE_BITMAP_SET_FOLDED(trie,*ch,folder);
+                                    TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
                                     DEBUG_OPTIMISE_r(
                                         Perl_re_printf( aTHX_  "%s", (char*)ch)
                                     );
                                }
                            }
                             /* store the current firstchar in the bitmap */
-                            TRIE_BITMAP_SET_FOLDED(trie,*ch,folder);
+                            TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
                             DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
                        }
                         first_ofs = ofs;
@@ -3449,7 +3474,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                    ) {
                    regnode *fix = convert;
                    U32 word = trie->wordcount;
+#ifdef RE_TRACK_PATTERN_OFFSETS
                    mjd_nodelen++;
+#endif
                    Set_Node_Offset_Length(convert, mjd_offset, state - 1);
                    while( ++fix < n ) {
                        Set_Node_Offset_Length(fix, 0, 0);
@@ -3516,19 +3543,21 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
             regnode *opt = convert;
 
             while ( ++opt < optimize) {
-                Set_Node_Offset_Length(opt,0,0);
+                Set_Node_Offset_Length(opt, 0, 0);
             }
             /*
                 Try to clean up some of the debris left after the
                 optimisation.
              */
             while( optimize < jumper ) {
+#ifdef RE_TRACK_PATTERN_OFFSETS
                 mjd_nodelen += Node_Length((optimize));
+#endif
                 OP( optimize ) = OPTIMIZED;
-                Set_Node_Offset_Length(optimize,0,0);
+                Set_Node_Offset_Length(optimize, 0, 0);
                 optimize++;
             }
-            Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
+            Set_Node_Offset_Length(convert, mjd_offset, mjd_nodelen);
         });
     } /* end node insert */
 
@@ -3632,12 +3661,12 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
     if ( OP(source) == TRIE ) {
         struct regnode_1 *op = (struct regnode_1 *)
             PerlMemShared_calloc(1, sizeof(struct regnode_1));
-        StructCopy(source,op,struct regnode_1);
+        StructCopy(source, op, struct regnode_1);
         stclass = (regnode *)op;
     } else {
         struct regnode_charclass *op = (struct regnode_charclass *)
             PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
-        StructCopy(source,op,struct regnode_charclass);
+        StructCopy(source, op, struct regnode_charclass);
         stclass = (regnode *)op;
     }
     OP(stclass)+=2; /* convert the TRIE type to its AHO-CORASICK equivalent */
@@ -3838,12 +3867,12 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
 
 #define JOIN_EXACT(scan,min_subtract,unfolded_multi_char, flags) \
     if (PL_regkind[OP(scan)] == EXACT) \
-        join_exact(pRExC_state,(scan),(min_subtract),unfolded_multi_char, (flags),NULL,depth+1)
+        join_exact(pRExC_state,(scan),(min_subtract),unfolded_multi_char, (flags), NULL, depth+1)
 
 STATIC U32
 S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                    UV *min_subtract, bool *unfolded_multi_char,
-                   U32 flags,regnode *val, U32 depth)
+                   U32 flags, regnode *val, U32 depth)
 {
     /* Merge several consecutive EXACTish nodes into one. */
     regnode *n = regnext(scan);
@@ -4157,7 +4186,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
 
 #define INIT_AND_WITHP \
     assert(!and_withp); \
-    Newx(and_withp,1, regnode_ssc); \
+    Newx(and_withp, 1, regnode_ssc); \
     SAVEFREEPV(and_withp)
 
 
@@ -4387,7 +4416,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* recurse study_chunk() for each BRANCH in an alternation */
                    minnext = study_chunk(pRExC_state, &scan, minlenp,
                                       &deltanext, next, &data_fake, stopparen,
-                                      recursed_depth, NULL, f,depth+1);
+                                      recursed_depth, NULL, f, depth+1);
 
                    if (min1 > minnext)
                        min1 = minnext;
@@ -4533,7 +4562,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                             Perl_re_indentf( aTHX_  "%s %" UVuf ":%s\n",
                               depth+1,
                               "Looking for TRIE'able sequences. Tail node is ",
-                              (UV)(tail - RExC_emit_start),
+                              (UV) REGNODE_OFFSET(tail),
                               SvPV_nolen_const( RExC_mysv )
                             );
                         });
@@ -4732,7 +4761,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         DEBUG_TRIE_COMPILE_r({
                             regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
                             Perl_re_indentf( aTHX_  "- %s (%d) <SCAN FINISHED> ",
-                              depth+1, SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
+                              depth+1, SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
                             Perl_re_printf( aTHX_  "(First==%d, Last==%d, Cur==%d, tt==%s)\n",
                                REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
                                PL_reg_name[trietype]
@@ -4775,7 +4804,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                         regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
                                         Perl_re_indentf( aTHX_  "- %s (%d) <NOTHING BRANCH SEQUENCE>\n",
                                           depth+1,
-                                          SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
+                                          SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
 
                                     });
                                     OP(startbranch)= NOTHING;
@@ -4893,7 +4922,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     RExC_frame_head= newframe;
                     RExC_frame_count++;
                 } else if (!RExC_frame_last->next_frame) {
-                    Newxz(newframe,1,scan_frame);
+                    Newxz(newframe, 1, scan_frame);
                     RExC_frame_last->next_frame= newframe;
                     newframe->prev_frame= RExC_frame_last;
                     RExC_frame_count++;
@@ -5286,7 +5315,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        /* Optimize again: */
                         /* recurse study_chunk() on optimised CURLYX => CURLYM */
                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
-                                    NULL, stopparen, recursed_depth, NULL, 0,depth+1);
+                                    NULL, stopparen, recursed_depth, NULL, 0,
+                                    depth+1);
                    }
                    else
                        oscan->flags = 0;
@@ -5476,7 +5506,9 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 /* Cannot expect anything... */
                 scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min += 1;
-               data->pos_delta += 1;
+                if (data->pos_delta != SSize_t_MAX) {
+                    data->pos_delta += 1;
+                }
                data->cur_is_floating = 1; /* float */
            }
        }
@@ -5534,6 +5566,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
 
                 case ANYOFD:
                 case ANYOFL:
+                case ANYOFPOSIXL:
                 case ANYOF:
                    if (flags & SCF_DO_STCLASS_AND)
                        ssc_and(pRExC_state, data->start_class,
@@ -5608,7 +5641,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     invert = 1;
                     /* FALLTHROUGH */
                case ASCII:
-                    my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
+                    my_invlist = invlist_clone(PL_Posix_ptrs[_CC_ASCII], NULL);
 
                     /* This can be handled as a Posix class */
                     goto join_posix_and_ascii;
@@ -5619,9 +5652,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     /* FALLTHROUGH */
                case POSIXA:
                     assert(FLAGS(scan) != _CC_ASCII);
-                    _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
-                                          PL_XPosix_ptrs[_CC_ASCII],
-                                          &my_invlist);
+                    my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL);
                     goto join_posix_and_ascii;
 
                case NPOSIXD:
@@ -5630,7 +5661,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     /* FALLTHROUGH */
                case POSIXD:
                case POSIXU:
-                    my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)]);
+                    my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)], NULL);
 
                     /* NPOSIXD matches all upper Latin1 code points unless the
                      * target string being matched is UTF-8, which is
@@ -5797,7 +5828,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                         &deltanext, last, &data_fake,
                                         stopparen, recursed_depth, NULL,
-                                        f,depth+1);
+                                        f, depth+1);
                 if (scan->flags) {
                     if (deltanext) {
                        FAIL("Variable length lookbehind not implemented");
@@ -5958,7 +5989,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                         /* optimise study_chunk() for TRIE */
                         minnext = study_chunk(pRExC_state, &scan, minlenp,
                             &deltanext, (regnode *)nextbranch, &data_fake,
-                            stopparen, recursed_depth, NULL, f,depth+1);
+                            stopparen, recursed_depth, NULL, f, depth+1);
                     }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode*)nextbranch);
@@ -6198,7 +6229,7 @@ Perl_current_re_engine(pTHX)
        ptr = hv_fetchs(table, "regcomp", FALSE);
        if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr)))
            return &PL_core_reg_engine;
-       return INT2PTR(regexp_engine*,SvIV(*ptr));
+       return INT2PTR(regexp_engine*, SvIV(*ptr));
     }
     else {
        SV *ptr;
@@ -6207,7 +6238,7 @@ Perl_current_re_engine(pTHX)
        ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0);
        if ( !(ptr && SvIOK(ptr) && SvIV(ptr)))
            return &PL_core_reg_engine;
-       return INT2PTR(regexp_engine*,SvIV(ptr));
+       return INT2PTR(regexp_engine*, SvIV(ptr));
     }
 }
 
@@ -6510,13 +6541,13 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                  * it is properly null terminated or we will fail asserts
                  * later. In theory we probably shouldn't get such SV's,
                  * but if we do we should handle it gracefully. */
-                if ( SvTYPE(msv) != SVt_PV || (SvLEN(msv) > SvCUR(msv) && *(SvEND(msv)) == 0) ) {
+                if ( SvTYPE(msv) != SVt_PV || (SvLEN(msv) > SvCUR(msv) && *(SvEND(msv)) == 0) || SvIsCOW_shared_hash(msv) ) {
                     /* not a string, or a string with a trailing null */
                     pat = msv;
                 } else {
                     /* a string with no trailing null, we need to copy it
-                     * so it we have a trailing null */
-                    pat = newSVsv(msv);
+                     * so it has a trailing null */
+                    pat = sv_2mortal(newSVsv(msv));
                 }
             }
 
@@ -6585,7 +6616,7 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 {
     int n = 0;
     STRLEN s;
-    
+
     PERL_UNUSED_CONTEXT;
 
     for (s = 0; s < plen; s++) {
@@ -6697,7 +6728,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
        DEBUG_COMPILE_r({
             Perl_re_printf( aTHX_
                "%sre-parsing pattern for runtime code:%s %s\n",
-               PL_colors[4],PL_colors[5],newpat);
+               PL_colors[4], PL_colors[5], newpat);
        });
 
        sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0);
@@ -6913,9 +6944,9 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state,
 REGEXP *
 Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                    OP *expr, const regexp_engine* eng, REGEXP *old_re,
-                    bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
+                    bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
 {
-    REGEXP *rx;
+    REGEXP *Rx;         /* Capital 'R' means points to a REGEXP */
     struct regexp *r;
     regexp_internal *ri;
     STRLEN plen;
@@ -6952,18 +6983,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     /* Initialize these here instead of as-needed, as is quick and avoids
      * having to test them each time otherwise */
-    if (! PL_AboveLatin1) {
+    if (! PL_InBitmap) {
 #ifdef DEBUGGING
         char * dump_len_string;
 #endif
 
-       PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
-       PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
-       PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
-        PL_utf8_foldable = _new_invlist_C_array(_Perl_Any_Folds_invlist);
-        PL_HasMultiCharFold =
-                       _new_invlist_C_array(_Perl_Folds_To_Multi_Char_invlist);
-
         /* This is calculated here, because the Perl program that generates the
          * static global ones doesn't currently have access to
          * NUM_ANYOF_CODE_POINTS */
@@ -7103,11 +7127,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         RExC_mysv1= sv_newmortal();
         RExC_mysv2= sv_newmortal();
     });
+
     DEBUG_COMPILE_r({
             SV *dsv= sv_newmortal();
             RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, PL_dump_re_max_len);
             Perl_re_printf( aTHX_  "%sCompiling REx%s %s\n",
-                          PL_colors[4],PL_colors[5],s);
+                          PL_colors[4], PL_colors[5], s);
         });
 
   redo_first_pass:
@@ -7151,8 +7176,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
     }
 
-    RExC_precomp = exp;
-    RExC_precomp_adj = 0;
+    RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
     RExC_flags = rx_flags;
     RExC_pm_flags = pm_flags;
 
@@ -7185,7 +7209,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     /* First pass: determine size, legality. */
     RExC_parse = exp;
-    RExC_start = RExC_adjusted_start = exp;
+    RExC_start = RExC_copy_start_in_constructed = exp;
     RExC_end = exp + plen;
     RExC_precomp_end = RExC_end;
     RExC_naughty = 0;
@@ -7222,7 +7246,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         RExC_lastparse=NULL;
     );
 
-    if (reg(pRExC_state, 0, &flags,1) == NULL) {
+    if (reg(pRExC_state, 0, &flags, 1) == NULL) {
         /* It's possible to write a regexp in ascii that represents Unicode
         codepoints outside of the byte range, such as via \x{100}. If we
         detect such a sequence we have to convert the entire pattern to utf8
@@ -7243,7 +7267,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
             goto redo_first_pass;
         }
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags);
     }
 
     DEBUG_PARSE_r({
@@ -7274,8 +7298,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* Allocate space and zero-initialize. Note, the two step process
        of zeroing when in debug mode, thus anything assigned has to
        happen after that */
-    rx = (REGEXP*) newSV_type(SVt_REGEXP);
-    r = ReANY(rx);
+    Rx = (REGEXP*) newSV_type(SVt_REGEXP);
+    r = ReANY(Rx);
     Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
         char, regexp_internal);
     if ( r == NULL || ri == NULL )
@@ -7332,10 +7356,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         /* make sure PL_bitcount bounds not exceeded */
         assert(sizeof(STD_PAT_MODS) <= 8);
 
-        p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
-        SvPOK_on(rx);
+        p = sv_grow(MUTABLE_SV(Rx), wraplen + 1); /* +1 for the ending NUL */
+        SvPOK_on(Rx);
        if (RExC_utf8)
-           SvFLAGS(rx) |= SVf_UTF8;
+           SvFLAGS(Rx) |= SVf_UTF8;
         *p++='('; *p++='?';
 
         /* If a default, cover it using the caret */
@@ -7361,14 +7385,18 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
         *p++ = ':';
         Copy(RExC_precomp, p, plen, char);
-       assert ((RX_WRAPPED(rx) - p) < 16);
-       r->pre_prefix = p - RX_WRAPPED(rx);
+       assert ((RX_WRAPPED(Rx) - p) < 16);
+       r->pre_prefix = p - RX_WRAPPED(Rx);
         p += plen;
+
+        /* Adding a trailing \n causes this to compile properly:
+                my $R = qr / A B C # D E/x; /($R)/
+           Otherwise the parens are considered part of the comment */
         if (has_runon)
             *p++ = '\n';
         *p++ = ')';
         *p = 0;
-       SvCUR_set(rx, p - RX_WRAPPED(rx));
+       SvCUR_set(Rx, p - RX_WRAPPED(Rx));
     }
 
     r->intflags = 0;
@@ -7383,7 +7411,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                           (UV)((2*RExC_size+1) * sizeof(U32))));
 #endif
     SetProgLen(ri,RExC_size);
-    RExC_rx_sv = rx;
+    RExC_rx_sv = Rx;
     RExC_rx = r;
     RExC_rxi = ri;
 
@@ -7409,14 +7437,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         /* setup RExC_open_parens, which holds the address of each
          * OPEN tag, and to make things simpler for the 0 index
          * the start of the program - this is used later for offsets */
-        Newxz(RExC_open_parens, RExC_npar,regnode *);
+        Newxz(RExC_open_parens, RExC_npar, regnode *);
         SAVEFREEPV(RExC_open_parens);
         RExC_open_parens[0] = RExC_emit;
 
         /* setup RExC_close_parens, which holds the address of each
          * CLOSE tag, and to make things simpler for the 0 index
          * the end of the program - this is used later for offsets */
-        Newxz(RExC_close_parens, RExC_npar,regnode *);
+        Newxz(RExC_close_parens, RExC_npar, regnode *);
         SAVEFREEPV(RExC_close_parens);
         /* we dont know where end op starts yet, so we dont
          * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */
@@ -7431,8 +7459,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     }
     RExC_npar = 1;
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
-       ReREFCNT_dec(rx);
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags);
+       ReREFCNT_dec(Rx);
+        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags);
     }
     DEBUG_OPTIMISE_r(
         Perl_re_printf( aTHX_  "Starting post parse optimization\n");
@@ -7442,7 +7470,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        3-units-long substrs field. */
     Newx(r->substrs, 1, struct reg_substr_data);
     if (RExC_recurse_count) {
-        Newx(RExC_recurse,RExC_recurse_count,regnode *);
+        Newx(RExC_recurse, RExC_recurse_count, regnode *);
         SAVEFREEPV(RExC_recurse);
     }
 
@@ -7482,7 +7510,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
 
     if (UTF)
-       SvUTF8_on(rx);  /* Unicode in it? */
+       SvUTF8_on(Rx);  /* Unicode in it? */
     ri->regstclass = NULL;
     if (RExC_naughty >= TOO_NAUGHTY)   /* Probably an expensive pattern. */
        r->intflags |= PREGf_NAUGHTY;
@@ -7664,7 +7692,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         ){
            r->extflags |= RXf_CHECK_ALL;
         }
-       scan_commit(pRExC_state, &data,&minlen,0);
+       scan_commit(pRExC_state, &data,&minlen, 0);
 
 
         /* XXX this is done in reverse order because that's the way the
@@ -7958,9 +7986,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      * by setting the regexp SV to readonly-only instead. If the
      * pattern's been recompiled, the USEDness should remain. */
     if (old_re && SvREADONLY(old_re))
-        SvREADONLY_on(rx);
+        SvREADONLY_on(Rx);
 #endif
-    return rx;
+    return Rx;
 }
 
 
@@ -8031,7 +8059,7 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
                     && rx->offs[nums[i]].end != -1)
                 {
                     ret = newSVpvs("");
-                    CALLREG_NUMBUF_FETCH(r,nums[i],ret);
+                    CALLREG_NUMBUF_FETCH(r, nums[i], ret);
                     if (!retarray)
                         return ret;
                 } else {
@@ -8100,7 +8128,7 @@ Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
     if (rx && RXp_PAREN_NAMES(rx)) {
         HV *hv = RXp_PAREN_NAMES(rx);
         HE *temphe;
-        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
             IV i;
             IV parno = 0;
             SV* sv_dat = HeVAL(temphe);
@@ -8162,7 +8190,7 @@ Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
         HV *hv= RXp_PAREN_NAMES(rx);
         HE *temphe;
         (void)hv_iterinit(hv);
-        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+        while ( (temphe = hv_iternext_flags(hv, 0)) ) {
             IV i;
             IV parno = 0;
             SV* sv_dat = HeVAL(temphe);
@@ -8449,7 +8477,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
                 he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
             if ( he_str )
                 sv_dat = HeVAL(he_str);
-            if ( ! sv_dat )
+            if ( ! sv_dat )     /* Didn't find group */
                 vFAIL("Reference to nonexistent named group");
             return sv_dat;
         }
@@ -8465,7 +8493,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
 #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
     int num;                                                    \
     if (RExC_lastparse!=RExC_parse) {                           \
-        Perl_re_printf( aTHX_  "%s",                                        \
+        Perl_re_printf( aTHX_  "%s",                            \
             Perl_pv_pretty(aTHX_ RExC_mysv1, RExC_parse,        \
                 RExC_end - RExC_parse, 16,                      \
                 "", "",                                         \
@@ -8477,17 +8505,17 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
             )                                                   \
         );                                                      \
     } else                                                      \
-        Perl_re_printf( aTHX_ "%16s","");                                   \
+        Perl_re_printf( aTHX_ "%16s","");                       \
                                                                 \
     if (SIZE_ONLY)                                              \
        num = RExC_size + 1;                                     \
     else                                                        \
        num=REG_NODE_NUM(RExC_emit);                             \
     if (RExC_lastnum!=num)                                      \
-       Perl_re_printf( aTHX_ "|%4d",num);                                   \
+       Perl_re_printf( aTHX_ "|%4d", num);                      \
     else                                                        \
-       Perl_re_printf( aTHX_ "|%4s","");                                    \
-    Perl_re_printf( aTHX_ "|%*s%-4s",                                       \
+       Perl_re_printf( aTHX_ "|%4s","");                        \
+    Perl_re_printf( aTHX_ "|%*s%-4s",                           \
         (int)((depth*2)), "",                                   \
         (funcname)                                              \
     );                                                          \
@@ -8592,8 +8620,6 @@ S__invlist_array_init(SV* const invlist, const bool will_have_0)
     return zero_addr + *offset;
 }
 
-#endif
-
 PERL_STATIC_INLINE void
 S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
 {
@@ -8602,7 +8628,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
     PERL_UNUSED_CONTEXT;
     PERL_ARGS_ASSERT_INVLIST_SET_LEN;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     SvCUR_set(invlist,
               (len == 0)
@@ -8611,8 +8637,6 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
     assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
 }
 
-#ifndef PERL_IN_XSUB_RE
-
 STATIC void
 S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
 {
@@ -8630,8 +8654,8 @@ S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
 
     PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
 
-    assert(SvTYPE(src) == SVt_INVLIST);
-    assert(SvTYPE(dest) == SVt_INVLIST);
+    assert(is_invlist(src));
+    assert(is_invlist(dest));
     assert(! invlist_is_iterating(src));
     assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
 
@@ -8666,7 +8690,7 @@ S_get_invlist_previous_index_addr(SV* invlist)
      * */
     PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     return &(((XINVLIST*) SvANY(invlist))->prev_index);
 }
@@ -8704,7 +8728,7 @@ S_invlist_trim(SV* invlist)
 
     PERL_ARGS_ASSERT_INVLIST_TRIM;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
 }
@@ -8714,7 +8738,7 @@ S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
 {
     PERL_ARGS_ASSERT_INVLIST_CLEAR;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     invlist_set_len(invlist, 0, 0);
     invlist_trim(invlist);
@@ -8740,7 +8764,7 @@ S_invlist_max(SV* const invlist)
 
     PERL_ARGS_ASSERT_INVLIST_MAX;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     /* Assumes worst case, in which the 0 element is not counted in the
      * inversion list, so subtracts 1 for that */
@@ -8748,6 +8772,23 @@ S_invlist_max(SV* const invlist)
            ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
            : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
 }
+
+STATIC void
+S_initialize_invlist_guts(pTHX_ SV* invlist, const Size_t initial_size)
+{
+    PERL_ARGS_ASSERT_INITIALIZE_INVLIST_GUTS;
+
+    /* First 1 is in case the zero element isn't in the list; second 1 is for
+     * trailing NUL */
+    SvGROW(invlist, TO_INTERNAL_SIZE(initial_size + 1) + 1);
+    invlist_set_len(invlist, 0, 0);
+
+    /* Force iterinit() to be used to get iteration to work */
+    invlist_iterfinish(invlist);
+
+    *get_invlist_previous_index_addr(invlist) = 0;
+}
+
 SV*
 Perl__new_invlist(pTHX_ IV initial_size)
 {
@@ -8765,15 +8806,7 @@ Perl__new_invlist(pTHX_ IV initial_size)
     /* Allocate the initial space */
     new_list = newSV_type(SVt_INVLIST);
 
-    /* First 1 is in case the zero element isn't in the list; second 1 is for
-     * trailing NUL */
-    SvGROW(new_list, TO_INTERNAL_SIZE(initial_size + 1) + 1);
-    invlist_set_len(new_list, 0, 0);
-
-    /* Force iterinit() to be used to get iteration to work */
-    *get_invlist_iter_addr(new_list) = (STRLEN) UV_MAX;
-
-    *get_invlist_previous_index_addr(new_list) = 0;
+    initialize_invlist_guts(new_list, initial_size);
 
     return new_list;
 }
@@ -8841,7 +8874,7 @@ S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
 
     PERL_ARGS_ASSERT_INVLIST_EXTEND;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     /* Add one to account for the zero element at the beginning which may not
      * be counted by the calling parameters */
@@ -9165,7 +9198,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
 
     PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
     assert(a != b);
-    assert(*output == NULL || SvTYPE(*output) == SVt_INVLIST);
+    assert(*output == NULL || is_invlist(*output));
 
     len_b = _invlist_len(b);
     if (len_b == 0) {
@@ -9207,7 +9240,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
          * union.  We can just return a copy of 'a' if '*output' doesn't point
          * to an existing list */
         if (*output == NULL) {
-            *output = invlist_clone(a);
+            *output = invlist_clone(a, NULL);
             return;
         }
 
@@ -9218,7 +9251,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         }
 
         /* Here, '*output' is to be overwritten by 'a' */
-        u = invlist_clone(a);
+        u = invlist_clone(a, NULL);
         invlist_replace_list_destroys_src(*output, u);
         SvREFCNT_dec_NN(u);
 
@@ -9235,7 +9268,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
          * the clone */
 
         SV ** dest = (*output == NULL) ? output : &u;
-        *dest = invlist_clone(b);
+        *dest = invlist_clone(b, NULL);
         if (complement_b) {
             _invlist_invert(*dest);
         }
@@ -9443,7 +9476,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
 
     PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
     assert(a != b);
-    assert(*i == NULL || SvTYPE(*i) == SVt_INVLIST);
+    assert(*i == NULL || is_invlist(*i));
 
     /* Special case if either one is empty */
     len_a = (a == NULL) ? 0 : _invlist_len(a);
@@ -9460,11 +9493,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
             }
 
             if (*i == NULL) {
-                *i = invlist_clone(a);
+                *i = invlist_clone(a, NULL);
                 return;
             }
 
-            r = invlist_clone(a);
+            r = invlist_clone(a, NULL);
             invlist_replace_list_destroys_src(*i, r);
             SvREFCNT_dec_NN(r);
             return;
@@ -9954,30 +9987,38 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
     *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
 }
 
-#endif
-
-PERL_STATIC_INLINE SV*
-S_invlist_clone(pTHX_ SV* const invlist)
+SV*
+Perl_invlist_clone(pTHX_ SV* const invlist, SV* new_invlist)
 {
 
     /* Return a new inversion list that is a copy of the input one, which is
      * unchanged.  The new list will not be mortal even if the old one was. */
 
-    /* Need to allocate extra space to accommodate Perl's addition of a
-     * trailing NUL to SvPV's, since it thinks they are always strings */
-    SV* new_invlist = _new_invlist(_invlist_len(invlist) + 1);
-    STRLEN physical_length = SvCUR(invlist);
-    bool offset = *(get_invlist_offset_addr(invlist));
+    const STRLEN nominal_length = _invlist_len(invlist);    /* Why not +1 XXX */
+    const STRLEN physical_length = SvCUR(invlist);
+    const bool offset = *(get_invlist_offset_addr(invlist));
 
     PERL_ARGS_ASSERT_INVLIST_CLONE;
 
+    /* Need to allocate extra space to accommodate Perl's addition of a
+     * trailing NUL to SvPV's, since it thinks they are always strings */
+    if (new_invlist == NULL) {
+        new_invlist = _new_invlist(nominal_length);
+    }
+    else {
+        sv_upgrade(new_invlist, SVt_INVLIST);
+        initialize_invlist_guts(new_invlist, nominal_length);
+    }
+
     *(get_invlist_offset_addr(new_invlist)) = offset;
-    invlist_set_len(new_invlist, _invlist_len(invlist), offset);
+    invlist_set_len(new_invlist, nominal_length, offset);
     Copy(SvPVX(invlist), SvPVX(new_invlist), physical_length, char);
 
     return new_invlist;
 }
 
+#endif
+
 PERL_STATIC_INLINE STRLEN*
 S_get_invlist_iter_addr(SV* invlist)
 {
@@ -9986,7 +10027,7 @@ S_get_invlist_iter_addr(SV* invlist)
 
     PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
 
-    assert(SvTYPE(invlist) == SVt_INVLIST);
+    assert(is_invlist(invlist));
 
     return &(((XINVLIST*) SvANY(invlist))->iterator);
 }
@@ -10182,23 +10223,6 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
     }
 }
 
-void
-Perl__load_PL_utf8_foldclosures (pTHX)
-{
-    assert(! PL_utf8_foldclosures);
-
-    /* If the folds haven't been read in, call a fold function
-     * to force that */
-    if (! PL_utf8_tofold) {
-        U8 dummy[UTF8_MAXBYTES_CASE+1];
-        const U8 hyphen[] = HYPHEN_UTF8;
-
-        /* This string is just a short named one above \xff */
-        toFOLD_utf8_safe(hyphen, hyphen + sizeof(hyphen) - 1, dummy, NULL);
-        assert(PL_utf8_tofold); /* Verify that worked */
-    }
-    PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
-}
 #endif
 
 #if defined(PERL_ARGS_ASSERT__INVLISTEQ) && !defined(PERL_IN_XSUB_RE)
@@ -10308,11 +10332,10 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
     }
     else {  /* Pattern is UTF-8 */
         U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
-        STRLEN foldlen = UTF8SKIP(s);
         const U8* e = s + bytelen;
-        SV** listp;
+        IV fc;
 
-        uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+        fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
 
         /* The only code points that aren't folded in a UTF EXACTFish
          * node are are the problematic ones in EXACTFL nodes */
@@ -10324,14 +10347,21 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             U8 *d = folded;
             int i;
 
+            fc = -1;
             for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
                 if (isASCII(*s)) {
                     *(d++) = (U8) toFOLD(*s);
+                    if (fc < 0) {       /* Save the first fold */
+                        fc = *(d-1);
+                    }
                     s++;
                 }
                 else {
                     STRLEN len;
-                    toFOLD_utf8_safe(s, e, d, &len);
+                    UV fold = toFOLD_utf8_safe(s, e, d, &len);
+                    if (fc < 0) {       /* Save the first fold */
+                        fc = fold;
+                    }
                     d += len;
                     s += UTF8SKIP(s);
                 }
@@ -10340,15 +10370,13 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             /* And set up so the code below that looks in this folded
              * buffer instead of the node's string */
             e = d;
-            foldlen = UTF8SKIP(folded);
             s = folded;
         }
 
         /* When we reach here 's' points to the fold of the first
          * character(s) of the node; and 'e' points to far enough along
          * the folded string to be just past any possible multi-char
-         * fold. 'foldlen' is the length in bytes of the first
-         * character in 's'
+         * fold.
          *
          * Unlike the non-UTF-8 case, the macro for determining if a
          * string is a multi-char fold requires all the characters to
@@ -10361,33 +10389,29 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
         }
         else {  /* Single char fold */
-
-            /* It matches all the things that fold to it, which are
-             * found in PL_utf8_foldclosures (including itself) */
-            invlist = add_cp_to_invlist(invlist, uc);
-            if (! PL_utf8_foldclosures)
-                _load_PL_utf8_foldclosures();
-            if ((listp = hv_fetch(PL_utf8_foldclosures,
-                                (char *) s, foldlen, FALSE)))
-            {
-                AV* list = (AV*) *listp;
-                IV k;
-                for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
-                    SV** c_p = av_fetch(list, k, FALSE);
-                    UV c;
-                    assert(c_p);
-
-                    c = SvUV(*c_p);
-
-                    /* /aa doesn't allow folds between ASCII and non- */
-                    if ((OP(node) == EXACTFAA || OP(node) == EXACTFAA_NO_TRIE)
-                        && isASCII(c) != isASCII(uc))
-                    {
-                        continue;
-                    }
-
-                    invlist = add_cp_to_invlist(invlist, c);
+            unsigned int k;
+            unsigned int first_folds_to;
+            const unsigned int * remaining_folds_to_list;
+            Size_t folds_to_count;
+
+            /* It matches itself */
+            invlist = add_cp_to_invlist(invlist, fc);
+
+            /* ... plus all the things that fold to it, which are found in
+             * PL_utf8_foldclosures */
+            folds_to_count = _inverse_folds(fc, &first_folds_to,
+                                                &remaining_folds_to_list);
+            for (k = 0; k < folds_to_count; k++) {
+                UV c = (k == 0) ? first_folds_to : remaining_folds_to_list[k-1];
+
+                /* /aa doesn't allow folds between ASCII and non- */
+                if (   (OP(node) == EXACTFAA || OP(node) == EXACTFAA_NO_TRIE)
+                    && isASCII(c) != isASCII(fc))
+                {
+                    continue;
                 }
+
+                invlist = add_cp_to_invlist(invlist, c);
             }
         }
     }
@@ -10648,7 +10672,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
 
     if (RExC_parse == name_start || *RExC_parse != ch) {
         /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
-        vFAIL2("Sequence %.3s... not terminated",parse_start);
+        vFAIL2("Sequence %.3s... not terminated", parse_start);
     }
 
     if (!SIZE_ONLY) {
@@ -10683,13 +10707,13 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
    upgraded to UTF-8.  Otherwise would only return NULL if regbranch() returns
    NULL, which cannot happen.  */
 STATIC regnode *
-S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
+S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     /* paren: Parenthesized? 0=top; 1,2=inside '(': changed to letter.
      * 2 is like 1, but indicates that nextchar() has been called to advance
      * RExC_parse beyond the '('.  Things like '(?' are indivisible tokens, and
      * this flag alerts us to the need to check for that */
 {
-    regnode *ret;              /* Will be the head of the group. */
+    regnode *ret = NULL;    /* Will be the head of the group. */
     regnode *br;
     regnode *lastbr;
     regnode *ender = NULL;
@@ -10734,7 +10758,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            vFAIL("Unmatched (");
         }
 
-        if ( *RExC_parse == '*') { /* (*VERB:ARG), (*construct:...) */
+        if (paren == 'r') {     /* Atomic script run */
+            paren = '>';
+            goto parse_rest;
+        }
+        else if ( *RExC_parse == '*') { /* (*VERB:ARG), (*construct:...) */
            char *start_verb = RExC_parse + 1;
            STRLEN verb_len;
            char *start_arg = NULL;
@@ -10804,44 +10832,50 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
            switch ( *start_verb ) {
             case 'A':  /* (*ACCEPT) */
-                if ( memEQs(start_verb,verb_len,"ACCEPT") ) {
+                if ( memEQs(start_verb, verb_len,"ACCEPT") ) {
                    op = ACCEPT;
                    internal_argval = RExC_nestroot;
                }
                break;
             case 'C':  /* (*COMMIT) */
-                if ( memEQs(start_verb,verb_len,"COMMIT") )
+                if ( memEQs(start_verb, verb_len,"COMMIT") )
                     op = COMMIT;
                 break;
             case 'F':  /* (*FAIL) */
-                if ( verb_len==1 || memEQs(start_verb,verb_len,"FAIL") ) {
+                if ( verb_len==1 || memEQs(start_verb, verb_len,"FAIL") ) {
                    op = OPFAIL;
                }
                break;
             case ':':  /* (*:NAME) */
            case 'M':  /* (*MARK:NAME) */
-               if ( verb_len==0 || memEQs(start_verb,verb_len,"MARK") ) {
+               if ( verb_len==0 || memEQs(start_verb, verb_len,"MARK") ) {
                     op = MARKPOINT;
                     arg_required = 1;
                 }
                 break;
             case 'P':  /* (*PRUNE) */
-                if ( memEQs(start_verb,verb_len,"PRUNE") )
+                if ( memEQs(start_verb, verb_len,"PRUNE") )
                     op = PRUNE;
                 break;
             case 'S':   /* (*SKIP) */
-                if ( memEQs(start_verb,verb_len,"SKIP") )
+                if ( memEQs(start_verb, verb_len,"SKIP") )
                     op = SKIP;
                 break;
             case 'T':  /* (*THEN) */
                 /* [19:06] <TimToady> :: is then */
-                if ( memEQs(start_verb,verb_len,"THEN") ) {
+                if ( memEQs(start_verb, verb_len,"THEN") ) {
                     op = CUTGROUP;
                     RExC_seen |= REG_CUTGROUP_SEEN;
                 }
                 break;
             case 'a':
-                if (memEQs(start_verb, verb_len, "atomic")) {
+                if (   memEQs(start_verb, verb_len, "asr")
+                    || memEQs(start_verb, verb_len, "atomic_script_run"))
+                {
+                    paren = 'r';        /* Mnemonic: recursed run */
+                    goto script_run;
+                }
+                else if (memEQs(start_verb, verb_len, "atomic")) {
                     paren = 't';    /* AtOMIC */
                     goto alpha_assertions;
                 }
@@ -10878,8 +10912,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 if (   memEQs(start_verb, verb_len, "sr")
                     || memEQs(start_verb, verb_len, "script_run"))
                 {
+                    regnode * atomic;
+
                     paren = 's';
 
+                   script_run:
+
                     /* This indicates Unicode rules. */
                     REQUIRE_UNI_RULES(flagp, NULL);
 
@@ -10889,6 +10927,30 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
                     RExC_parse = start_arg;
 
+                    if (RExC_in_script_run) {
+
+                        /*  Nested script runs are treated as no-ops, because
+                         *  if the nested one fails, the outer one must as
+                         *  well.  It could fail sooner, and avoid (??{} with
+                         *  side effects, but that is explicitly documented as
+                         *  undefined behavior. */
+
+                        ret = NULL;
+
+                        if (paren == 's') {
+                            paren = ':';
+                            goto parse_rest;
+                        }
+
+                        /* But, the atomic part of a nested atomic script run
+                         * isn't a no-op, but can be treated just like a '(?>'
+                         * */
+                        paren = '>';
+                        goto parse_rest;
+                    }
+
+                    /* By doing this here, we avoid extra warnings for nested
+                     * script runs */
                     if (PASS2) {
                         Perl_ck_warner_d(aTHX_
                             packWARN(WARN_EXPERIMENTAL__SCRIPT_RUN),
@@ -10897,18 +10959,35 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
                     }
 
-                    if (RExC_in_script_run) {
-                        paren = ':';
-                        nextchar(pRExC_state);
-                        ret = NULL;
+                    if (paren == 's') {
+                        /* Here, we're starting a new regular script run */
+                        ret = reg_node(pRExC_state, SROPEN);
+                        RExC_in_script_run = 1;
+                        is_open = 1;
                         goto parse_rest;
                     }
-                    RExC_in_script_run = 1;
+
+                    /* Here, we are starting an atomic script run.  This is
+                     * handled by recursing to deal with the atomic portion
+                     * separately, enclosed in SROPEN ... SRCLOSE nodes */
 
                     ret = reg_node(pRExC_state, SROPEN);
 
-                    is_open = 1;
-                    goto parse_rest;
+                    RExC_in_script_run = 1;
+
+                    atomic = reg(pRExC_state, 'r', &flags, depth);
+                    if (flags & (RESTART_PASS1|NEED_UTF8)) {
+                        *flagp = flags & (RESTART_PASS1|NEED_UTF8);
+                        return NULL;
+                    }
+
+                    REGTAIL(pRExC_state, ret, atomic);
+
+                    REGTAIL(pRExC_state, atomic,
+                           reg_node(pRExC_state, SRCLOSE));
+
+                    RExC_in_script_run = 0;
+                    return ret;
                 }
 
                 break;
@@ -10983,9 +11062,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     ARG(ret) = add_data( pRExC_state,
                                          STR_WITH_LEN("S"));
                     RExC_rxi->data->data[ARG(ret)]=(void*)sv;
-                    ret->flags = 1;
+                    FLAGS(ret) = 1;
                 } else {
-                    ret->flags = 0;
+                    FLAGS(ret) = 0;
                 }
                 if ( internal_argval != -1 )
                     ARG2L_SET(ret, internal_argval);
@@ -11105,7 +11184,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                 SvIV_set(sv_dat, SvIVX(sv_dat) + 1);
                             }
                         } else {
-                            (void)SvUPGRADE(sv_dat,SVt_PVNV);
+                            (void)SvUPGRADE(sv_dat, SVt_PVNV);
                             sv_setpvn(sv_dat, (char *)&(RExC_npar),
                                                                 sizeof(I32));
                             SvIOK_on(sv_dat);
@@ -11208,6 +11287,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         RExC_parse++;
                         is_neg = TRUE;
                     }
+                    endptr = RExC_end;
                     if (grok_atoUV(RExC_parse, &unum, &endptr)
                         && unum <= I32_MAX
                     ) {
@@ -11341,7 +11421,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                        RExC_flags & RXf_PMf_COMPILETIME
                                       );
                    if (!SIZE_ONLY) {
-                       ret->flags = 2;
+                       FLAGS(ret) = 2;
                     }
                     REGTAIL(pRExC_state, ret, eval);
                     /* deal with the length of this later - MJD */
@@ -11363,43 +11443,43 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                 || RExC_parse[1] == '<'
                                 || RExC_parse[1] == '{'))
                        || (       RExC_parse[0] == '*'        /* (?(*...)) */
-                            && (   memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "pla:")
-                                || memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "plb")
-                                || memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "nla")
-                                || memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "nlb")
-                                || memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "positive_lookahead")
-                                || memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "positive_lookbehind")
-                                || memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "negative_lookahead")
-                                || memBEGINs(RExC_parse +1,
-                                             (Size_t) (RExC_end - (RExC_parse + 1)),
-                                             "negative_lookbehind"))))
-                    ) { /* Lookahead or eval. */
-                       I32 flag;
-                        regnode *tail;
-
-                       ret = reg_node(pRExC_state, LOGICAL);
-                       if (!SIZE_ONLY)
-                           ret->flags = 1;
-
-                        tail = reg(pRExC_state, 1, &flag, depth+1);
-                        RETURN_NULL_ON_RESTART(flag,flagp);
-                        REGTAIL(pRExC_state, ret, tail);
-                       goto insert_if;
-                   }
+                            && (   memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "pla:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "plb:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "nla:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "nlb:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "positive_lookahead:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "positive_lookbehind:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "negative_lookahead:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "negative_lookbehind:"))))
+                ) { /* Lookahead or eval. */
+                    I32 flag;
+                    regnode *tail;
+
+                    ret = reg_node(pRExC_state, LOGICAL);
+                    if (!SIZE_ONLY)
+                        FLAGS(ret) = 1;
+
+                    tail = reg(pRExC_state, 1, &flag, depth+1);
+                    RETURN_FAIL_ON_RESTART(flag, flagp);
+                    REGTAIL(pRExC_state, ret, tail);
+                    goto insert_if;
+                }
                else if (   RExC_parse[0] == '<'     /* (?(<NAME>)...) */
                         || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
                {
@@ -11421,14 +11501,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         RExC_rxi->data->data[num]=(void*)sv_dat;
                         SvREFCNT_inc_simple_void(sv_dat);
                     }
-                    ret = reganode(pRExC_state,NGROUPP,num);
+                    ret = reganode(pRExC_state, NGROUPP, num);
                     goto insert_if_check_paren;
                }
                else if (memBEGINs(RExC_parse,
                                    (STRLEN) (RExC_end - RExC_parse),
                                    "DEFINE"))
                 {
-                   ret = reganode(pRExC_state,DEFINEP,0);
+                   ret = reganode(pRExC_state, DEFINEP, 0);
                    RExC_parse += DEFINE_len;
                    is_define = 1;
                    goto insert_if_check_paren;
@@ -11446,6 +11526,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     }
                     else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                         UV uv;
+                        endptr = RExC_end;
                         if (grok_atoUV(RExC_parse, &uv, &endptr)
                             && uv <= I32_MAX
                         ) {
@@ -11474,13 +11555,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         if (sv_dat)
                             parno = 1 + *((I32 *)SvPVX(sv_dat));
                    }
-                   ret = reganode(pRExC_state,INSUBP,parno);
+                   ret = reganode(pRExC_state, INSUBP, parno);
                    goto insert_if_check_paren;
                }
                else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                     /* (?(1)...) */
                    char c;
                     UV uv;
+                    endptr = RExC_end;
                     if (grok_atoUV(RExC_parse, &uv, &endptr)
                         && uv <= I32_MAX
                     ) {
@@ -11500,10 +11582,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    nextchar(pRExC_state);
                  insert_if:
                     REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
-                    br = regbranch(pRExC_state, &flags, 1,depth+1);
+                    br = regbranch(pRExC_state, &flags, 1, depth+1);
                    if (br == NULL) {
-                        RETURN_NULL_ON_RESTART(flags,flagp);
-                        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
+                        RETURN_FAIL_ON_RESTART(flags, flagp);
+                        FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
                               (UV) flags);
                     } else
                         REGTAIL(pRExC_state, br, reganode(pRExC_state,
@@ -11519,9 +11601,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         /* Fake one for optimizer.  */
                         lastbr = reganode(pRExC_state, IFTHEN, 0);
 
-                        if (!regbranch(pRExC_state, &flags, 1,depth+1)) {
-                            RETURN_NULL_ON_RESTART(flags,flagp);
-                            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
+                        if (!regbranch(pRExC_state, &flags, 1, depth+1)) {
+                            RETURN_FAIL_ON_RESTART(flags, flagp);
+                            FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
                                   (UV) flags);
                         }
                         REGTAIL(pRExC_state, ret, lastbr);
@@ -11604,6 +11686,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     RExC_open_parens[parno]= ret;
                }
            }
+
             Set_Node_Length(ret, 1); /* MJD */
             Set_Node_Offset(ret, RExC_parse); /* MJD */
            is_open = 1;
@@ -11620,13 +11703,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
    parse_rest:
     /* Pick up the branches, linking them together. */
     parse_start = RExC_parse;   /* MJD */
-    br = regbranch(pRExC_state, &flags, 1,depth+1);
+    br = regbranch(pRExC_state, &flags, 1, depth+1);
 
     /*     branch_len = (paren != 0); */
 
     if (br == NULL) {
-        RETURN_NULL_ON_RESTART(flags,flagp);
-        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
+        RETURN_FAIL_ON_RESTART(flags, flagp);
+        FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
     }
     if (*RExC_parse == '|') {
        if (!SIZE_ONLY && RExC_extralen) {
@@ -11653,7 +11736,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     lastbr = br;
     while (*RExC_parse == '|') {
        if (!SIZE_ONLY && RExC_extralen) {
-           ender = reganode(pRExC_state, LONGJMP,0);
+           ender = reganode(pRExC_state, LONGJMP, 0);
 
             /* Append to the previous. */
             REGTAIL(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender);
@@ -11669,8 +11752,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
         br = regbranch(pRExC_state, &flags, 0, depth+1);
 
        if (br == NULL) {
-            RETURN_NULL_ON_RESTART(flags,flagp);
-            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
+            RETURN_FAIL_ON_RESTART(flags, flagp);
+            FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
         }
         REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
        lastbr = br;
@@ -11693,8 +11776,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                if (RExC_nestroot == parno)
                    RExC_nestroot = 0;
            }
-            Set_Node_Offset(ender,RExC_parse+1); /* MJD */
-            Set_Node_Length(ender,1); /* MJD */
+            Set_Node_Offset(ender, RExC_parse+1); /* MJD */
+            Set_Node_Length(ender, 1); /* MJD */
            break;
        case 's':
            ender = reg_node(pRExC_state, SRCLOSE);
@@ -11807,10 +11890,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            if (paren == '>' || paren == 't') {
                node = SUSPEND, flag = 0;
             }
-           reginsert(pRExC_state, node,ret, depth+1);
+
+           reginsert(pRExC_state, node, ret, depth+1);
             Set_Node_Cur_Length(ret, parse_start);
            Set_Node_Offset(ret, parse_start + 1);
-           ret->flags = flag;
+           FLAGS(ret) = flag;
             REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
        }
     }
@@ -11872,7 +11956,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
        ret = NULL;
     else {
        if (!SIZE_ONLY && RExC_extralen)
-           ret = reganode(pRExC_state, BRANCHJ,0);
+           ret = reganode(pRExC_state, BRANCHJ, 0);
        else {
            ret = reg_node(pRExC_state, BRANCH);
             Set_Node_Length(ret, 1);
@@ -11888,12 +11972,12 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
                             FALSE /* Don't force to /x */ );
     while (RExC_parse < RExC_end && *RExC_parse != '|' && *RExC_parse != ')') {
        flags &= ~TRYAGAIN;
-        latest = regpiece(pRExC_state, &flags,depth+1);
+        latest = regpiece(pRExC_state, &flags, depth+1);
        if (latest == NULL) {
            if (flags & TRYAGAIN)
                continue;
-            RETURN_NULL_ON_RESTART(flags,flagp);
-            FAIL2("panic: regpiece returned NULL, flags=%#" UVxf, (UV) flags);
+            RETURN_FAIL_ON_RESTART(flags, flagp);
+            FAIL2("panic: regpiece returned failure, flags=%#" UVxf, (UV) flags);
        }
        else if (ret == NULL)
             ret = latest;
@@ -11960,10 +12044,10 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
     DEBUG_PARSE("piec");
 
-    ret = regatom(pRExC_state, &flags,depth+1);
+    ret = regatom(pRExC_state, &flags, depth+1);
     if (ret == NULL) {
-        RETURN_NULL_ON_RESTART_OR_FLAGS(flags,flagp,TRYAGAIN);
-        FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
+        RETURN_FAIL_ON_RESTART_OR_FLAGS(flags, flagp, TRYAGAIN);
+        FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
     }
 
     op = *RExC_parse;
@@ -11989,6 +12073,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                maxpos = next;
            RExC_parse++;
             if (isDIGIT(*RExC_parse)) {
+                endptr = RExC_end;
                 if (!grok_atoUV(RExC_parse, &uv, &endptr))
                     vFAIL("Invalid quantifier in {,}");
                 if (uv >= REG_INFTY)
@@ -12002,6 +12087,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            else
                maxpos = RExC_parse;
             if (isDIGIT(*maxpos)) {
+                endptr = RExC_end;
                 if (!grok_atoUV(maxpos, &uv, &endptr))
                     vFAIL("Invalid quantifier in {,}");
                 if (uv >= REG_INFTY)
@@ -12052,14 +12138,14 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            else {
                regnode * const w = reg_node(pRExC_state, WHILEM);
 
-               w->flags = 0;
+               FLAGS(w) = 0;
                 REGTAIL(pRExC_state, ret, w);
                if (!SIZE_ONLY && RExC_extralen) {
-                   reginsert(pRExC_state, LONGJMP,ret, depth+1);
-                   reginsert(pRExC_state, NOTHING,ret, depth+1);
+                   reginsert(pRExC_state, LONGJMP, ret, depth+1);
+                   reginsert(pRExC_state, NOTHING, ret, depth+1);
                    NEXT_OFF(ret) = 3;  /* Go over LONGJMP. */
                }
-               reginsert(pRExC_state, CURLYX,ret, depth+1);
+               reginsert(pRExC_state, CURLYX, ret, depth+1);
                                 /* MJD hk */
                 Set_Node_Offset(ret, parse_start+1);
                 Set_Node_Length(ret,
@@ -12072,7 +12158,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    RExC_whilem_seen++, RExC_extralen += 3;
                 MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
            }
-           ret->flags = 0;
+           FLAGS(ret) = 0;
 
            if (min > 0)
                *flagp = WORST;
@@ -12202,8 +12288,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * *node_p, nor *code_point_p, nor *flagp.
   *
   * If <cp_count> is not NULL, the caller wants to know the length (in code
-  * points) that this \N sequence matches.  This is set even if the function
-  * returns FALSE, as detailed below.
+  * points) that this \N sequence matches.  This is set, and the input is
+  * parsed for errors, even if the function returns FALSE, as detailed below.
   *
   * There are 5 possibilities here, as detailed in the next 5 paragraphs.
   *
@@ -12220,7 +12306,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   *
   * The fourth possibility is that \N resolves to a sequence of more than one
   * code points.  *cp_count will be set to the number of code points in the
-  * sequence. *node_p will be set to a generated node returned by this
+  * sequence. *node_p will be set to a generated node returned by this
   * function calling S_reg().
   *
   * The final possibility is that it is premature to be calling this function;
@@ -12251,10 +12337,14 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   */
 
     char * endbrace;    /* points to '}' following the name */
-    char *endchar;     /* Points to '.' or '}' ending cur char in the input
-                           stream */
     char* p = RExC_parse; /* Temporary */
 
+    SV * substitute_parse = NULL;
+    char *orig_end;
+    char *save_start;
+    I32 flags;
+    Size_t count = 0;   /* code point count kept internally by this function */
+
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_GROK_BSLASH_N;
@@ -12278,48 +12368,38 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      * [^\n].  The latter is assumed when the {...} following the \N is a legal
      * quantifier, or there is no '{' at all */
     if (*p != '{' || regcurly(p)) {
-       RExC_parse = p;
+        RExC_parse = p;
         if (cp_count) {
             *cp_count = -1;
         }
 
-       if (! node_p) {
+        if (! node_p) {
             return FALSE;
         }
 
-       *node_p = reg_node(pRExC_state, REG_ANY);
-       *flagp |= HASWIDTH|SIMPLE;
-       MARK_NAUGHTY(1);
+        *node_p = reg_node(pRExC_state, REG_ANY);
+        *flagp |= HASWIDTH|SIMPLE;
+        MARK_NAUGHTY(1);
         Set_Node_Length(*node_p, 1); /* MJD */
-       return TRUE;
+        return TRUE;
     }
 
-    /* Here, we have decided it should be a named character or sequence */
-
     /* The test above made sure that the next real character is a '{', but
      * under the /x modifier, it could be separated by space (or a comment and
      * \n) and this is not allowed (for consistency with \x{...} and the
      * tokenizer handling of \N{NAME}). */
     if (*RExC_parse != '{') {
-       vFAIL("Missing braces on \\N{}");
+        vFAIL("Missing braces on \\N{}");
     }
 
-    RExC_parse++;      /* Skip past the '{' */
+    RExC_parse++;       /* Skip past the '{' */
 
     endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
     if (! endbrace) { /* no trailing brace */
         vFAIL2("Missing right brace on \\%c{}", 'N');
     }
-    else if (!(   endbrace == RExC_parse       /* nothing between the {} */
-               || memBEGINs(RExC_parse,   /* U+ (bad hex is checked below
-                                                   for a  better error msg) */
-                                  (STRLEN) (RExC_end - RExC_parse),
-                                 "U+")))
-    {
-       RExC_parse = endbrace;  /* position msg's '<--HERE' */
-       vFAIL("\\N{NAME} must be resolved by the lexer");
-    }
 
+    /* Here, we have decided it should be a named character or sequence */
     REQUIRE_UNI_RULES(flagp, FALSE); /* Unicode named chars imply Unicode
                                         semantics */
 
@@ -12332,164 +12412,193 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
             *cp_count = 0;
         }
         nextchar(pRExC_state);
-       if (! node_p) {
+        if (! node_p) {
             return FALSE;
         }
 
-        *node_p = reg_node(pRExC_state,NOTHING);
+        *node_p = reg_node(pRExC_state, NOTHING);
         return TRUE;
     }
 
-    RExC_parse += 2;   /* Skip past the 'U+' */
+    /* If we haven't got something that begins with 'U+', then it didn't get lexed. */
+    if (   endbrace - RExC_parse < 2
+        || strnNE(RExC_parse, "U+", 2))
+    {
+        RExC_parse = endbrace;  /* position msg's '<--HERE' */
+        vFAIL("\\N{NAME} must be resolved by the lexer");
+    }
 
-    /* Because toke.c has generated a special construct for us guaranteed not
-     * to have NULs, we can use a str function */
-    endchar = RExC_parse + strcspn(RExC_parse, ".}");
+        /* This code purposely indented below because of future changes coming */
 
-    /* Code points are separated by dots.  If none, there is only one code
-     * point, and is terminated by the brace */
+        /* We can get to here when the input is \N{U+...} or when toke.c has
+         * converted a name to the \N{U+...} form.  This include changing a
+         * name that evaluates to multiple code points to \N{U+c1.c2.c3 ...} */
 
-    if (endchar >= endbrace) {
-       STRLEN length_of_hex;
-       I32 grok_hex_flags;
+        RExC_parse += 2;    /* Skip past the 'U+' */
 
-        /* Here, exactly one code point.  If that isn't what is wanted, fail */
-        if (! code_point_p) {
-            RExC_parse = p;
-            return FALSE;
-        }
+        /* Code points are separated by dots.  The '}' terminates the whole
+         * thing. */
 
-        /* Convert code point from hex */
-       length_of_hex = (STRLEN)(endchar - RExC_parse);
-       grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
-                       | PERL_SCAN_DISALLOW_PREFIX
-
-                           /* No errors in the first pass (See [perl
-                            * #122671].)  We let the code below find the
-                            * errors when there are multiple chars. */
-                       | ((SIZE_ONLY)
-                          ? PERL_SCAN_SILENT_ILLDIGIT
-                          : 0);
-
-        /* This routine is the one place where both single- and double-quotish
-         * \N{U+xxxx} are evaluated.  The value is a Unicode code point which
-         * must be converted to native. */
-       *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
-                                               &length_of_hex,
-                                               &grok_hex_flags,
-                                               NULL));
-
-       /* The tokenizer should have guaranteed validity, but it's possible to
-         * bypass it by using single quoting, so check.  Don't do the check
-         * here when there are multiple chars; we do it below anyway. */
-        if (length_of_hex == 0
-            || length_of_hex != (STRLEN)(endchar - RExC_parse) )
-        {
-            RExC_parse += length_of_hex;       /* Includes all the valid */
-            RExC_parse += (RExC_orig_utf8)     /* point to after 1st invalid */
-                            ? UTF8SKIP(RExC_parse)
-                            : 1;
-            /* Guard against malformed utf8 */
-            if (RExC_parse >= endchar) {
-                RExC_parse = endchar;
+        do {    /* Loop until the ending brace */
+            UV cp = 0;
+            char * start_digit;     /* The first of the current code point */
+            if (! isXDIGIT(*RExC_parse)) {
+                RExC_parse++;
+                vFAIL("Invalid hexadecimal number in \\N{U+...}");
             }
-            vFAIL("Invalid hexadecimal number in \\N{U+...}");
-        }
 
-        RExC_parse = endbrace + 1;
-        return TRUE;
-    }
-    else {  /* Is a multiple character sequence */
-       SV * substitute_parse;
-       STRLEN len;
-       char *orig_end = RExC_end;
-       char *save_start = RExC_start;
-        I32 flags;
+            start_digit = RExC_parse;
+            count++;
 
-        /* Count the code points, if desired, in the sequence */
-        if (cp_count) {
-            *cp_count = 0;
-            while (RExC_parse < endbrace) {
-                /* Point to the beginning of the next character in the sequence. */
-                RExC_parse = endchar + 1;
-                endchar = RExC_parse + strcspn(RExC_parse, ".}");
-                (*cp_count)++;
+            /* Loop through the hex digits of the current code point */
+            do {
+                /* Adding this digit will shift the result 4 bits.  If that
+                 * result would be above the legal max, it's overflow */
+                if (cp > MAX_LEGAL_CP >> 4) {
+
+                    /* Find the end of the code point */
+                    do {
+                        RExC_parse ++;
+                    } while (isXDIGIT(*RExC_parse) || *RExC_parse == '_');
+
+                    /* Be sure to synchronize this message with the similar one
+                     * in utf8.c */
+                    vFAIL4("Use of code point 0x%.*s is not allowed; the"
+                        " permissible max is 0x%" UVxf,
+                        (int) (RExC_parse - start_digit), start_digit,
+                        MAX_LEGAL_CP);
+                }
+
+                /* Accumulate this (valid) digit into the running total */
+                cp  = (cp << 4) + READ_XDIGIT(RExC_parse);
+
+                /* READ_XDIGIT advanced the input pointer.  Ignore a single
+                 * underscore separator */
+                if (*RExC_parse == '_' && isXDIGIT(RExC_parse[1])) {
+                    RExC_parse++;
+                }
+            } while (isXDIGIT(*RExC_parse));
+
+            /* Here, have accumulated the next code point */
+            if (RExC_parse >= endbrace) {   /* If done ... */
+                if (count != 1) {
+                    goto do_concat;
+                }
+
+                /* Here, is a single code point; fail if doesn't want that */
+                if (! code_point_p) {
+                    RExC_parse = p;
+                    return FALSE;
+                }
+
+                /* A single code point is easy to handle; just return it */
+                *code_point_p = UNI_TO_NATIVE(cp);
+                RExC_parse = endbrace;
+                nextchar(pRExC_state);
+                return TRUE;
             }
-        }
 
-        /* Fail if caller doesn't want to handle a multi-code-point sequence.
-         * But don't backup up the pointer if the caller wants to know how many
-         * code points there are (they can then handle things) */
-        if (! node_p) {
-            if (! cp_count) {
-                RExC_parse = p;
+            /* Here, the only legal thing would be a multiple character
+             * sequence (of the form "\N{U+c1.c2. ... }".   So the next
+             * character must be a dot (and the one after that can't be the
+             * endbrace, or we'd have something like \N{U+100.} ) */
+            if (*RExC_parse != '.' || RExC_parse + 1 >= endbrace) {
+                RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
+                                ? UTF8SKIP(RExC_parse)
+                                : 1;
+                if (RExC_parse >= endbrace) { /* Guard against malformed utf8 */
+                    RExC_parse = endbrace;
+                }
+                vFAIL("Invalid hexadecimal number in \\N{U+...}");
             }
-            return FALSE;
-        }
 
-       /* What is done here is to convert this to a sub-pattern of the form
-         * \x{char1}\x{char2}...  and then call reg recursively to parse it
-         * (enclosing in "(?: ... )" ).  That way, it retains its atomicness,
-         * while not having to worry about special handling that some code
-         * points may have. */
+            /* Here, looks like its really a multiple character sequence.  Fail
+             * if that's not what the caller wants.  But continue with counting
+             * and error checking if they still want a count */
+            if (! node_p && ! cp_count) {
+                return FALSE;
+            }
 
-       substitute_parse = newSVpvs("?:");
+            /* What is done here is to convert this to a sub-pattern of the
+             * form \x{char1}\x{char2}...  and then call reg recursively to
+             * parse it (enclosing in "(?: ... )" ).  That way, it retains its
+             * atomicness, while not having to worry about special handling
+             * that some code points may have.  We don't create a subpattern,
+             * but go through the motions of code point counting and error
+             * checking, if the caller doesn't want a node returned. */
 
-       while (RExC_parse < endbrace) {
+            if (node_p && count == 1) {
+                substitute_parse = newSVpvs("?:");
+            }
 
-           /* Convert to notation the rest of the code understands */
-           sv_catpv(substitute_parse, "\\x{");
-           sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
-           sv_catpv(substitute_parse, "}");
+          do_concat:
 
-           /* Point to the beginning of the next character in the sequence. */
-           RExC_parse = endchar + 1;
-           endchar = RExC_parse + strcspn(RExC_parse, ".}");
+            if (node_p) {
+                /* Convert to notation the rest of the code understands */
+                sv_catpvs(substitute_parse, "\\x{");
+                sv_catpvn(substitute_parse, start_digit,
+                                            RExC_parse - start_digit);
+                sv_catpvs(substitute_parse, "}");
+            }
 
-       }
-        sv_catpv(substitute_parse, ")");
+            /* Move to after the dot (or ending brace the final time through.)
+             * */
+            RExC_parse++;
+            count++;
 
-        len = SvCUR(substitute_parse);
+        } while (RExC_parse < endbrace);
 
-       /* Don't allow empty number */
-       if (len < (STRLEN) 8) {
-            RExC_parse = endbrace;
-           vFAIL("Invalid hexadecimal number in \\N{U+...}");
-       }
+        if (! node_p) { /* Doesn't want the node */
+            assert (cp_count);
 
-        RExC_parse = RExC_start = RExC_adjusted_start
-                                              = SvPV_nolen(substitute_parse);
-       RExC_end = RExC_parse + len;
+            *cp_count = count;
+            return FALSE;
+        }
+
+        sv_catpvs(substitute_parse, ")");
 
-        /* The values are Unicode, and therefore not subject to recoding, but
-         * have to be converted to native on a non-Unicode (meaning non-ASCII)
-         * platform. */
 #ifdef EBCDIC
+        /* The values are Unicode, and therefore have to be converted to native
+         * on a non-Unicode (meaning non-ASCII) platform. */
         RExC_recode_x_to_native = 1;
 #endif
 
-        *node_p = reg(pRExC_state, 1, &flags, depth+1);
-
-        /* Restore the saved values */
-       RExC_start = RExC_adjusted_start = save_start;
-       RExC_parse = endbrace;
-       RExC_end = orig_end;
+    /* Here, we have the string the name evaluates to, ready to be parsed,
+     * stored in 'substitute_parse' as a series of valid "\x{...}\x{...}"
+     * constructs.  This can be called from within a substitute parse already.
+     * The error reporting mechanism doesn't work for 2 levels of this, but the
+     * code above has validated this new construct, so there should be no
+     * errors generated by the below.  And this isn' an exact copy, so the
+     * mechanism to seamlessly deal with this won't work.  XXX Maybe should
+     * turn off all warnings for safety? */
+    save_start = RExC_start;
+    orig_end = RExC_end;
+
+    RExC_parse = RExC_start = SvPVX(substitute_parse);
+    RExC_end = RExC_parse + SvCUR(substitute_parse);
+
+    *node_p = reg(pRExC_state, 1, &flags, depth+1);
+
+    /* Restore the saved values */
+    RExC_start = save_start;
+    RExC_parse = endbrace;
+    RExC_end = orig_end;
 #ifdef EBCDIC
-        RExC_recode_x_to_native = 0;
+    RExC_recode_x_to_native = 0;
 #endif
-        SvREFCNT_dec_NN(substitute_parse);
 
-        if (! *node_p) {
-            RETURN_X_ON_RESTART(FALSE, flags,flagp);
-            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
-                (UV) flags);
-        }
-        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
-
-        nextchar(pRExC_state);
+    SvREFCNT_dec_NN(substitute_parse);
 
-        return TRUE;
+    if (! *node_p) {
+        RETURN_X_ON_RESTART(FALSE, flags, flagp);
+        FAIL2("panic: reg returned failure to grok_bslash_N, flags=%#" UVxf,
+            (UV) flags);
     }
+    *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+
+    nextchar(pRExC_state);
+
+    return TRUE;
 }
 
 
@@ -12739,9 +12848,9 @@ S_new_regcurly(const char *s, const char *e)
  * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
 
 static I32
-S_backref_value(char *p)
+S_backref_value(char *p, char *e)
 {
-    const char* endptr;
+    const char* endptr = e;
     UV val;
     if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX)
         return (I32)val;
@@ -12871,7 +12980,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     case '[':
     {
        char * const oregcomp_parse = ++RExC_parse;
-        ret = regclass(pRExC_state, flagp,depth+1,
+        ret = regclass(pRExC_state, flagp, depth+1,
                        FALSE, /* means parse the whole char class */
                        TRUE, /* allow multi-char folds */
                        FALSE, /* don't silence non-portable warnings. */
@@ -12880,8 +12989,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        NULL,
                        NULL);
         if (ret == NULL) {
-            RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,NEED_UTF8);
-            FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
+            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, NEED_UTF8);
+            FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                   (UV) *flagp);
         }
        if (*RExC_parse != ']') {
@@ -12894,7 +13003,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     }
     case '(':
        nextchar(pRExC_state);
-        ret = reg(pRExC_state, 2, &flags,depth+1);
+        ret = reg(pRExC_state, 2, &flags, depth+1);
        if (ret == NULL) {
                if (flags & TRYAGAIN) {
                    if (RExC_parse >= RExC_end) {
@@ -12904,8 +13013,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    }
                    goto tryagain;
                }
-                RETURN_NULL_ON_RESTART(flags,flagp);
-                FAIL2("panic: reg returned NULL to regatom, flags=%#" UVxf,
+                RETURN_FAIL_ON_RESTART(flags, flagp);
+                FAIL2("panic: reg returned failure to regatom, flags=%#" UVxf,
                                                                  (UV) flags);
        }
        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
@@ -12948,7 +13057,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              * /\A/ from /^/ in split. We check ret because first pass we
              * have no regop struct to set the flags on. */
             if (PASS2)
-                ret->flags = 1;
+                FLAGS(ret) = 1;
            *flagp |= SIMPLE;
            goto finish_meta_pat;
        case 'G':
@@ -13179,7 +13288,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        case 'P':
             RExC_parse--;
 
-            ret = regclass(pRExC_state, flagp,depth+1,
+            ret = regclass(pRExC_state, flagp, depth+1,
                            TRUE, /* means just parse this element */
                            FALSE, /* don't allow multi-char folds */
                            FALSE, /* don't silence non-portable warnings.  It
@@ -13189,11 +13298,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                            TRUE, /* Allow an optimized regnode result */
                            NULL,
                            NULL);
-            RETURN_NULL_ON_RESTART_FLAGP(flagp);
+            RETURN_FAIL_ON_RESTART_FLAGP(flagp);
             /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
              * multi-char folds are allowed.  */
             if (!ret)
-                FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
+                FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                       (UV) *flagp);
 
             RExC_parse--;
@@ -13228,7 +13337,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 break;
             }
 
-            RETURN_NULL_ON_RESTART_FLAGP(flagp);
+            RETURN_FAIL_ON_RESTART_FLAGP(flagp);
 
             /* Here, evaluates to a single code point.  Go get that */
             RExC_parse = parse_start;
@@ -13245,7 +13354,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             {
                RExC_parse++;
                /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
-               vFAIL2("Sequence %.2s... not terminated",parse_start);
+               vFAIL2("Sequence %.2s... not terminated", parse_start);
            } else {
                RExC_parse += 2;
                 ret = handle_named_backref(pRExC_state,
@@ -13287,7 +13396,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     if (RExC_parse >= RExC_end) {
                         goto unterminated_g;
                     }
-                    num = S_backref_value(RExC_parse);
+                    num = S_backref_value(RExC_parse, RExC_end);
                     if (num == 0)
                         vFAIL("Reference to invalid group 0");
                     else if (num == I32_MAX) {
@@ -13305,7 +13414,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     }
                 }
                 else {
-                    num = S_backref_value(RExC_parse);
+                    num = S_backref_value(RExC_parse, RExC_end);
                     /* bare \NNN might be backref or octal - if it is larger
                      * than or equal RExC_npar then it is assumed to be an
                      * octal escape. Note RExC_npar is +1 from the actual
@@ -13322,10 +13431,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         && *RExC_parse != '8'
                         /* cannot be an octal escape it it starts with 9 */
                         && *RExC_parse != '9'
-                    )
-                    {
-                        /* Probably not a backref, instead likely to be an
-                         * octal character escape, e.g. \35 or \777.
+                    ) {
+                        /* Probably not meant to be a backref, instead likely
+                         * to be an octal character escape, e.g. \35 or \777.
                          * The above logic should make it obvious why using
                          * octal escapes in patterns is problematic. - Yves */
                         RExC_parse = parse_start;
@@ -13444,6 +13552,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              * need to figure this out until pass 2) */
             bool maybe_exactfu = PASS2;
 
+            /* To see if RExC_uni_semantics changes during parsing of the node.
+             * */
+            bool uni_semantics_at_node_start;
+
             /* The node_type may change below, but since the size of the node
              * doesn't change, it works */
            ret = reg_node(pRExC_state, node_type);
@@ -13470,6 +13582,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                    || UTF8_IS_START(UCHARAT(RExC_parse)));
 
+            uni_semantics_at_node_start = cBOOL(RExC_uni_semantics);
+
             /* Here, we have a literal character.  Find the maximal string of
              * them in the input that we can fit into a single EXACTish node.
              * We quit at the first non-literal or when the node gets full, or
@@ -13511,6 +13625,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    */
 
                    switch ((U8)*++p) {
+
                    /* These are all the special escapes. */
                    case 'A':             /* Start assertion */
                    case 'b': case 'B':   /* Word-boundary assertion*/
@@ -13553,7 +13668,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         ) {
                             if (*flagp & NEED_UTF8)
                                 FAIL("panic: grok_bslash_N set NEED_UTF8");
-                            RETURN_NULL_ON_RESTART_FLAGP(flagp);
+                            RETURN_FAIL_ON_RESTART_FLAGP(flagp);
 
                             /* Here, it wasn't a single code point.  Go close
                              * up this EXACTish node.  The switch() prior to
@@ -13674,9 +13789,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          * pattern.  */
 
                         /* NOTE, RExC_npar is 1 more than the actual number of
-                         * parens we have seen so far, hence the < RExC_npar below. */
-
-                        if ( !isDIGIT(p[1]) || S_backref_value(p) < RExC_npar)
+                         * parens we have seen so far, hence the "<" as opposed
+                         * to "<=" */
+                        if ( !isDIGIT(p[1]) || S_backref_value(p, RExC_end) < RExC_npar)
                         {  /* Not to be treated as an octal constant, go
                                    find backref */
                             --p;
@@ -13718,20 +13833,22 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    } /* End of switch on '\' */
                    break;
                case '{':
-                    /* Currently we allow an lbrace at the start of a construct
-                     * without raising a warning.  This is because we think we
-                     * will never want such a brace to be meant to be other
-                     * than taken literally. */
+                    /* Trying to gain new uses for '{' without breaking too
+                     * much existing code is hard.  The solution currently
+                     * adopted is:
+                     *  1)  If there is no ambiguity that a '{' should always
+                     *      be taken literally, at the start of a construct, we
+                     *      just do so.
+                     *  2)  If the literal '{' conflicts with our desired use
+                     *      of it as a metacharacter, we die.  The deprecation
+                     *      cycles for this have come and gone.
+                     *  3)  If there is ambiguity, we raise a simple warning.
+                     *      This could happen, for example, if the user
+                     *      intended it to introduce a quantifier, but slightly
+                     *      misspelled the quantifier.  Without this warning,
+                     *      the quantifier would silently be taken as a literal
+                     *      string of characters instead of a meta construct */
                    if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
-
-                        /* But, we raise a fatal warning otherwise, as the
-                         * deprecation cycle has come and gone.  Except that it
-                         * turns out that some heavily-relied on upstream
-                         * software, notably GNU Autoconf, have failed to fix
-                         * their uses.  For these, don't make it fatal unless
-                         * we anticipate using the '{' for something else.
-                         * This happens after any alpha, and for a looser {m,n}
-                         * quantifier specification */
                         if (      RExC_strict
                             || (  p > parse_start + 1
                                 && isALPHA_A(*(p - 1))
@@ -13743,10 +13860,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                   "illegal here");
                         }
                         if (PASS2) {
-                            ckWARNregdep(p + 1,
-                                        "Unescaped left brace in regex is "
-                                        "deprecated here (and will be fatal "
-                                        "in Perl 5.30), passed through");
+                            ckWARNreg(p + 1, "Unescaped left brace in regex is"
+                                             " passed through");
                         }
                    }
                    goto normal_default;
@@ -13907,6 +14022,25 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                 ender = 's';
                                 added_len = 2;
                             }
+                            else if (   uni_semantics_at_node_start
+                                     != RExC_uni_semantics)
+                            {
+                                /* Here, we are supossed to be using Unicode
+                                 * rules, but this folding node is not.  This
+                                 * happens during pass 1 when the node started
+                                 * out not under Unicode rules, but a \N{} was
+                                 * encountered during the processing of it,
+                                 * causing Unicode rules to be switched into.
+                                 * Pass 1 continues uninterrupted, as by the
+                                 * time we get to pass 2, we will know enough
+                                 * to generate the correct folds.  Except in
+                                 * this one case, we need to restart the node,
+                                 * because the fold of the sharp s requires 2
+                                 * characters, and the sizing needs to account
+                                 * for that. */
+                                p = oldp;
+                                goto loopdone;
+                            }
                             else {
                                 RExC_seen_unfolded_sharp_s = 1;
                                 maybe_exactfu = FALSE;
@@ -14062,10 +14196,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     len = s - s0 + 1;
                }
                 else {
-                    if (!  PL_NonL1NonFinalFold) {
-                        PL_NonL1NonFinalFold = _new_invlist_C_array(
-                                        NonL1_Perl_Non_Final_Folds_invlist);
-                    }
 
                     /* Point to the first byte of the final character */
                     s = (char *) utf8_hop((U8 *) s, -1);
@@ -14240,8 +14370,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     /* Position parse to next real character */
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                             FALSE /* Don't force to /x */ );
-    if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) {
-        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through");
+    if (   PASS2 && *RExC_parse == '{'
+        && OP(ret) != SBOL && ! regcurly(RExC_parse))
+    {
+        if (RExC_strict || new_regcurly(RExC_parse, RExC_end)) {
+            RExC_parse++;
+            vFAIL("Unescaped left brace in regex is illegal here");
+        }
+        ckWARNreg(RExC_parse + 1, "Unescaped left brace in regex is"
+                                  " passed through");
     }
 
     return(ret);
@@ -14334,8 +14471,9 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
  * routine. q.v. */
 #define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
         if (posix_warnings) {                                               \
-            if (! RExC_warn_text ) RExC_warn_text = (AV *) sv_2mortal((SV *) newAV()); \
-            av_push(RExC_warn_text, Perl_newSVpvf(aTHX_                          \
+            if (! RExC_warn_text ) RExC_warn_text =                         \
+                                         (AV *) sv_2mortal((SV *) newAV()); \
+            av_push(RExC_warn_text, Perl_newSVpvf(aTHX_                     \
                                              WARNING_PREFIX                 \
                                              text                           \
                                              REPORT_LOCATION,               \
@@ -15112,7 +15250,10 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                 ADD_POSIX_WARNING(p, "there is no terminating ']'");
             }
 
-            if (posix_warnings && RExC_warn_text && av_top_index(RExC_warn_text) > -1) {
+            if (   posix_warnings
+                && RExC_warn_text
+                && av_top_index(RExC_warn_text) > -1)
+            {
                 *posix_warnings = RExC_warn_text;
             }
         }
@@ -15208,10 +15349,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
         set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
     }
 
-    REQUIRE_UNI_RULES(flagp, NULL);   /* The use of this operator implies /u.
-                                         This is required so that the compile
-                                         time values are valid in all runtime
-                                         cases */
+    /* The use of this operator implies /u.  This is required so that the
+     * compile time values are valid in all runtime cases */
+    REQUIRE_UNI_RULES(flagp, NULL);
 
     /* This will return only an ANYOF regnode, or (unlikely) something smaller
      * (such as EXACT).  Thus we can skip most everything if just sizing.  We
@@ -15264,7 +15404,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
                     /* regclass() can only return RESTART_PASS1 and NEED_UTF8
                      * if multi-char folds are allowed.  */
-                    if (!regclass(pRExC_state, flagp,depth+1,
+                    if (!regclass(pRExC_state, flagp, depth+1,
                                   is_posix_class, /* parse the whole char
                                                      class only if not a
                                                      posix class */
@@ -15275,7 +15415,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                   &current,
                                   &posix_warnings
                                  ))
-                        FAIL2("panic: regclass returned NULL to handle_sets, "
+                        FAIL2("panic: regclass returned failure to handle_sets, "
                               "flags=%#" UVxf, (UV) *flagp);
 
                     /* function call leaves parse pointing to the ']', except
@@ -15542,7 +15682,7 @@ redo_curchar:
             case '\\':
                 /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
                  * multi-char folds are allowed.  */
-                if (!regclass(pRExC_state, flagp,depth+1,
+                if (!regclass(pRExC_state, flagp, depth+1,
                               TRUE, /* means parse just the next thing */
                               FALSE, /* don't allow multi-char folds */
                               FALSE, /* don't silence non-portable warnings.  */
@@ -15551,7 +15691,7 @@ redo_curchar:
                               &current,
                               NULL))
                 {
-                    FAIL2("panic: regclass returned NULL to handle_sets, "
+                    FAIL2("panic: regclass returned failure to handle_sets, "
                           "flags=%#" UVxf, (UV) *flagp);
                 }
 
@@ -15578,7 +15718,7 @@ redo_curchar:
 
                 /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
                  * multi-char folds are allowed.  */
-                if (!regclass(pRExC_state, flagp,depth+1,
+                if (!regclass(pRExC_state, flagp, depth+1,
                                 is_posix_class, /* parse the whole char
                                                     class only if not a
                                                     posix class */
@@ -15590,7 +15730,7 @@ redo_curchar:
                                 NULL
                                 ))
                 {
-                    FAIL2("panic: regclass returned NULL to handle_sets, "
+                    FAIL2("panic: regclass returned failure to handle_sets, "
                           "flags=%#" UVxf, (UV) *flagp);
                 }
 
@@ -15632,7 +15772,7 @@ redo_curchar:
                  * fence.  Get rid of it */
                 fence_ptr = av_pop(fence_stack);
                 assert(fence_ptr);
-                fence = SvIV(fence_ptr) - 1;
+                fence = SvIV(fence_ptr);
                 SvREFCNT_dec_NN(fence_ptr);
                 fence_ptr = NULL;
 
@@ -15866,7 +16006,7 @@ redo_curchar:
     if (av_tindex_skip_len_mg(stack) < 0   /* Was empty */
         || ((final = av_pop(stack)) == NULL)
         || ! IS_OPERAND(final)
-        || SvTYPE(final) != SVt_INVLIST
+        || ! is_invlist(final)
         || av_tindex_skip_len_mg(stack) >= 0)  /* More left on stack */
     {
       bad_syntax:
@@ -15908,7 +16048,7 @@ redo_curchar:
     RExC_flags &= ~RXf_PMf_FOLD;
     /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if multi-char
      * folds are allowed.  */
-    node = regclass(pRExC_state, flagp,depth+1,
+    node = regclass(pRExC_state, flagp, depth+1,
                     FALSE, /* means parse the whole char class */
                     FALSE, /* don't allow multi-char folds */
                     TRUE, /* silence non-portable warnings.  The above may very
@@ -15920,7 +16060,7 @@ redo_curchar:
                     NULL
                 );
     if (!node)
-        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#" UVxf,
+        FAIL2("panic: regclass returned failure to handle_sets, flags=%#" UVxf,
                     PTR2UV(flagp));
 
     /* Fix up the node type if we are in locale.  (We have pretended we are
@@ -16023,25 +16163,19 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
 STATIC void
 S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
 {
-    /* This hard-codes the Latin1/above-Latin1 folding rules, so that an
-     * innocent-looking character class, like /[ks]/i won't have to go out to
-     * disk to find the possible matches.
+    /* This adds the Latin1/above-Latin1 folding rules.
      *
      * This should be called only for a Latin1-range code points, cp, which is
      * known to be involved in a simple fold with other code points above
      * Latin1.  It would give false results if /aa has been specified.
      * Multi-char folds are outside the scope of this, and must be handled
-     * specially.
-     *
-     * XXX It would be better to generate these via regen, in case a new
-     * version of the Unicode standard adds new mappings, though that is not
-     * really likely, and may be caught by the default: case of the switch
-     * below. */
+     * specially. */
 
     PERL_ARGS_ASSERT_ADD_ABOVE_LATIN1_FOLDS;
 
     assert(HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(cp));
 
+    /* The rules that are valid for all Unicode versions are hard-coded in */
     switch (cp) {
         case 'k':
         case 'K':
@@ -16065,36 +16199,54 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
                                         LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
             break;
 
-#ifdef LATIN_CAPITAL_LETTER_SHARP_S /* not defined in early Unicode releases */
+        default:    /* Other code points are checked against the data for the
+                       current Unicode version */
+          {
+            Size_t folds_to_count;
+            unsigned int first_folds_to;
+            const unsigned int * remaining_folds_to_list;
+            UV folded_cp;
 
-        case LATIN_SMALL_LETTER_SHARP_S:
-          *invlist = add_cp_to_invlist(*invlist, LATIN_CAPITAL_LETTER_SHARP_S);
-            break;
+            if (isASCII(cp)) {
+                folded_cp = toFOLD(cp);
+            }
+            else {
+                U8 dummy_fold[UTF8_MAXBYTES_CASE+1];
+                Size_t dummy_len;
+                folded_cp = _to_fold_latin1(cp, dummy_fold, &dummy_len, 0);
+            }
 
-#endif
+            if (folded_cp > 255) {
+                *invlist = add_cp_to_invlist(*invlist, folded_cp);
+            }
 
-#if    UNICODE_MAJOR_VERSION < 3                                        \
-   || (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0)
+            folds_to_count = _inverse_folds(folded_cp, &first_folds_to,
+                                                    &remaining_folds_to_list);
+            if (folds_to_count == 0) {
 
-        /* In 3.0 and earlier, U+0130 folded simply to 'i'; and in 3.0.1 so did
-         * U+0131.  */
-        case 'i':
-        case 'I':
-          *invlist =
-             add_cp_to_invlist(*invlist, LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
-#   if UNICODE_DOT_DOT_VERSION == 1
-          *invlist = add_cp_to_invlist(*invlist, LATIN_SMALL_LETTER_DOTLESS_I);
-#   endif
-            break;
-#endif
+                /* Use deprecated warning to increase the chances of this being
+                 * output */
+                if (PASS2) {
+                    ckWARN2reg_d(RExC_parse,
+                        "Perl folding rules are not up-to-date for 0x%02X;"
+                        " please use the perlbug utility to report;", cp);
+                }
+            }
+            else {
+                unsigned int i;
 
-        default:
-            /* Use deprecated warning to increase the chances of this being
-             * output */
-            if (PASS2) {
-                ckWARN2reg_d(RExC_parse, "Perl folding rules are not up-to-date for 0x%02X; please use the perlbug utility to report;", cp);
+                if (first_folds_to > 255) {
+                    *invlist = add_cp_to_invlist(*invlist, first_folds_to);
+                }
+                for (i = 0; i < folds_to_count - 1; i++) {
+                    if (remaining_folds_to_list[i] > 255) {
+                        *invlist = add_cp_to_invlist(*invlist,
+                                                    remaining_folds_to_list[i]);
+                    }
+                }
             }
             break;
+         }
     }
 }
 
@@ -16342,6 +16494,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     AV* posix_warnings = NULL;
     const bool do_posix_warnings =     return_posix_warnings
                                    || (PASS2 && ckWARN(WARN_REGEXP));
+    U8 op = END;    /* The returned node-type, initialized to an impossible
+                       one.  */
+    U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
+    U32 posixl = 0;       /* bit field of posix classes matched under /l */
 
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -16358,21 +16514,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     allow_multi_folds = FALSE;
 #endif
 
-    /* Assume we are going to generate an ANYOF node. */
-    ret = reganode(pRExC_state,
-                   (LOC)
-                    ? ANYOFL
-                    : ANYOF,
-                   0);
-
     if (SIZE_ONLY) {
-       RExC_size += ANYOF_SKIP;
        listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
     }
     else {
-        ANYOF_FLAGS(ret) = 0;
-
-       RExC_emit += ANYOF_SKIP;
        listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
        initial_listsv_len = SvCUR(listsv);
         SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
@@ -16569,7 +16714,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         if (*flagp & NEED_UTF8)
                             FAIL("panic: grok_bslash_N set NEED_UTF8");
 
-                        RETURN_NULL_ON_RESTART_FLAGP(flagp);
+                        RETURN_FAIL_ON_RESTART_FLAGP(flagp);
 
                         if (cp_count < 0) {
                             vFAIL("\\N in a character class must be a named character: \\N{...}");
@@ -16621,6 +16766,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
            case 'P':
                {
                char *e;
+                char *i;
 
                 /* We will handle any undefined properties ourselves */
                 U8 swash_init_flags = _CORE_SWASH_INIT_RETURN_IF_UNDEF
@@ -16630,6 +16776,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                         * anyway, to save a little time */
                                       |_CORE_SWASH_INIT_ACCEPT_INVLIST;
 
+                SvREFCNT_dec(swash); /* Free any left-overs */
                if (RExC_parse >= RExC_end)
                    vFAIL2("Empty \\%c", (U8)value);
                if (*RExC_parse == '{') {
@@ -16641,6 +16788,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
 
                     RExC_parse++;
+
+                    /* White space is allowed adjacent to the braces and after
+                     * any '^', even when not under /x */
                     while (isSPACE(*RExC_parse)) {
                          RExC_parse++;
                    }
@@ -16664,6 +16814,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    n = e - RExC_parse;
                    while (isSPACE(*(RExC_parse + n - 1)))
                        n--;
+
                }   /* The \p isn't immediately followed by a '{' */
                else if (! isALPHA(*RExC_parse)) {
                     RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
@@ -16676,11 +16827,34 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    n = 1;
                }
                if (!SIZE_ONLY) {
-                    SV* invlist;
-                    char* name;
+                    char* name = RExC_parse;
                     char* base_name;    /* name after any packages are stripped */
                     char* lookup_name = NULL;
                     const char * const colon_colon = "::";
+                    bool invert;
+
+                    SV* invlist;
+
+                    /* Temporary workaround for [perl #133136].  For this
+                    * precise input that is in the .t that is failing, load
+                    * utf8.pm, which is what the test wants, so that that
+                    * .t passes */
+                    if (     memEQs(RExC_start, e + 1 - RExC_start,
+                                    "foo\\p{Alnum}")
+                        && ! hv_common(GvHVn(PL_incgv),
+                                       NULL,
+                                       "utf8.pm", sizeof("utf8.pm") - 1,
+                                       0, HV_FETCH_ISEXISTS, NULL, 0))
+                    {
+                        require_pv("utf8.pm");
+                    }
+                    invlist = parse_uniprop_string(name, n, FOLD, &invert);
+                    if (invlist) {
+                        if (invert) {
+                            value ^= 'P' ^ 'p';
+                        }
+                    }
+                    else {
 
                     /* Try to get the definition of the property into
                      * <invlist>.  If /i is in effect, the effective property
@@ -16689,6 +16863,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
                     name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
                     SAVEFREEPV(name);
+
+                    for (i = RExC_parse; i < RExC_parse + n; i++) {
+                        if (isCNTRL(*i) && *i != '\t') {
+                            RExC_parse = e + 1;
+                            vFAIL2("Can't find Unicode property definition \"%s\"", name);
+                        }
+                    }
+
                     if (FOLD) {
                         lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
 
@@ -16699,7 +16881,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                     /* Look up the property name, and get its swash and
                      * inversion list, if the property is found  */
-                    SvREFCNT_dec(swash); /* Free any left-overs */
                     swash = _core_swash_init("utf8",
                                              (lookup_name)
                                               ? lookup_name
@@ -16787,7 +16968,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                         /* We don't know yet what this matches, so have to flag
                          * it */
-                        ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
+                        anyof_flags |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
                     }
                     else {
 
@@ -16799,19 +16980,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         {
                             has_user_defined_property = TRUE;
                         }
-                        else if
+                    }
+                    }
+                    if (invlist) {
+                        if (! has_user_defined_property &&
                             /* We warn on matching an above-Unicode code point
                              * if the match would return true, except don't
                              * warn for \p{All}, which has exactly one element
                              * = 0 */
                             (_invlist_contains_cp(invlist, 0x110000)
                                 && (! (_invlist_len(invlist) == 1
-                                       && *invlist_array(invlist) == 0)))
+                                       && *invlist_array(invlist) == 0))))
                         {
                             warn_super = TRUE;
                         }
 
-
                         /* Invert if asking for the complement */
                         if (value == 'P') {
                            _invlist_union_complement_2nd(properties,
@@ -16821,14 +17004,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             /* The swash can't be used as-is, because we've
                             * inverted things; delay removing it to here after
                             * have copied its invlist above */
-                            SvREFCNT_dec_NN(swash);
+                            if (! swash) {
+                                SvREFCNT_dec_NN(invlist);
+                            }
+                            SvREFCNT_dec(swash);
                             swash = NULL;
                         }
                         else {
                             _invlist_union(properties, invlist, &properties);
+                            if (! swash) {
+                                SvREFCNT_dec_NN(invlist);
+                            }
                        }
-                   }
-               }
+                    }
+                } /* End of actually getting the values in pass 2 */
+
                RExC_parse = e + 1;
                 namedclass = ANYOF_UNIPROP;  /* no official name, but it's
                                                 named */
@@ -16983,14 +17173,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  * by locale, and hence are dealt with separately */
                 if (! need_class) {
                     need_class = 1;
-                    if (SIZE_ONLY) {
-                        RExC_size += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
-                    }
-                    else {
-                        RExC_emit += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
-                    }
-                    ANYOF_FLAGS(ret) |= ANYOF_MATCHES_POSIXL;
-                    ANYOF_POSIXL_ZERO(ret);
+                    anyof_flags |= ANYOF_MATCHES_POSIXL;
 
                     /* We can't change this into some other type of node
                      * (unless this is the only element, in which case there
@@ -17001,15 +17184,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                 /* Coverity thinks it is possible for this to be negative; both
                  * jhi and khw think it's not, but be safer */
-                assert(! (ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
+                assert(! (anyof_flags & ANYOF_MATCHES_POSIXL)
                        || (namedclass + ((namedclass % 2) ? -1 : 1)) >= 0);
 
                 /* See if it already matches the complement of this POSIX
                  * class */
-                if ((ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
-                    && ANYOF_POSIXL_TEST(ret, namedclass + ((namedclass % 2)
-                                                            ? -1
-                                                            : 1)))
+                if (  (anyof_flags & ANYOF_MATCHES_POSIXL)
+                    && POSIXL_TEST(posixl, namedclass + ((namedclass % 2)
+                                                         ? -1
+                                                         : 1)))
                 {
                     posixl_matches_all = TRUE;
                     break;  /* No need to continue.  Since it matches both
@@ -17018,7 +17201,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 }
 
                 /* Add this class to those that should be checked at runtime */
-                ANYOF_POSIXL_SET(ret, namedclass);
+                POSIXL_SET(posixl, namedclass);
 
                 /* The above-Latin1 characters are not subject to locale rules.
                  * Just add them, in the second pass, to the
@@ -17080,21 +17263,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
                 }
                 else if (  UNI_SEMANTICS
+                        || AT_LEAST_ASCII_RESTRICTED
                         || classnum == _CC_ASCII
                         || (DEPENDS_SEMANTICS && (   classnum == _CC_DIGIT
                                                   || classnum == _CC_XDIGIT)))
                 {
-                    /* We usually have to worry about /d and /a affecting what
-                     * POSIX classes match, with special code needed for /d
-                     * because we won't know until runtime what all matches.
-                     * But there is no extra work needed under /u, and
-                     * [:ascii:] is unaffected by /a and /d; and :digit: and
-                     * :xdigit: don't have runtime differences under /d.  So we
-                     * can special case these, and avoid some extra work below,
-                     * and at runtime. */
+                    /* We usually have to worry about /d affecting what POSIX
+                     * classes match, with special code needed because we won't
+                     * know until runtime what all matches.  But there is no
+                     * extra work needed under /u and /a; and [:ascii:] is
+                     * unaffected by /d; and :digit: and :xdigit: don't have
+                     * runtime differences under /d.  So we can special case
+                     * these, and avoid some extra work below, and at runtime.
+                     * */
                     _invlist_union_maybe_complement_2nd(
                                                      simple_posixes,
-                                                     PL_XPosix_ptrs[classnum],
+                                                      ((AT_LEAST_ASCII_RESTRICTED)
+                                                       ? PL_Posix_ptrs[classnum]
+                                                       : PL_XPosix_ptrs[classnum]),
                                                      namedclass % 2 != 0,
                                                      &simple_posixes);
                 }
@@ -17460,20 +17646,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
        char *save_end = RExC_end;
        char *save_parse = RExC_parse;
        char *save_start = RExC_start;
-        STRLEN prefix_end = 0;      /* We copy the character class after a
-                                       prefix supplied here.  This is the size
-                                       + 1 of that prefix */
+        Size_t constructed_prefix_len = 0; /* This gives the length of the
+                                              constructed portion of the
+                                              substitute parse. */
         bool first_time = TRUE;     /* First multi-char occurrence doesn't get
                                        a "|" */
         I32 reg_flags;
 
         assert(! invert);
-        assert(RExC_precomp_adj == 0); /* Only one level of recursion allowed */
+        /* Only one level of recursion allowed */
+        assert(RExC_copy_start_in_constructed == RExC_precomp);
 
 #if 0   /* Have decided not to deal with multi-char folds in inverted classes,
            because too confusing */
         if (invert) {
-            sv_catpv(substitute_parse, "(?:");
+            sv_catpvs(substitute_parse, "(?:");
         }
 #endif
 
@@ -17493,7 +17680,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                                 &PL_sv_undef)
                 {
                     if (! first_time) {
-                        sv_catpv(substitute_parse, "|");
+                        sv_catpvs(substitute_parse, "|");
                     }
                     first_time = FALSE;
 
@@ -17505,36 +17692,35 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         /* If the character class contains anything else besides these
          * multi-character folds, have to include it in recursive parsing */
         if (element_count) {
-            sv_catpv(substitute_parse, "|[");
-            prefix_end = SvCUR(substitute_parse);
+            sv_catpvs(substitute_parse, "|[");
+            constructed_prefix_len = SvCUR(substitute_parse);
             sv_catpvn(substitute_parse, orig_parse, RExC_parse - orig_parse);
 
             /* Put in a closing ']' only if not going off the end, as otherwise
              * we are adding something that really isn't there */
             if (RExC_parse < RExC_end) {
-                sv_catpv(substitute_parse, "]");
+                sv_catpvs(substitute_parse, "]");
             }
         }
 
-        sv_catpv(substitute_parse, ")");
+        sv_catpvs(substitute_parse, ")");
 #if 0
         if (invert) {
             /* This is a way to get the parse to skip forward a whole named
              * sequence instead of matching the 2nd character when it fails the
              * first */
-            sv_catpv(substitute_parse, "(*THEN)(*SKIP)(*FAIL)|.)");
+            sv_catpvs(substitute_parse, "(*THEN)(*SKIP)(*FAIL)|.)");
         }
 #endif
 
         /* Set up the data structure so that any errors will be properly
          * reported.  See the comments at the definition of
          * REPORT_LOCATION_ARGS for details */
-        RExC_precomp_adj = orig_parse - RExC_precomp;
-       RExC_start =  RExC_parse = SvPV(substitute_parse, len);
-        RExC_adjusted_start = RExC_start + prefix_end;
+        RExC_copy_start_in_input = (char *) orig_parse;
+       RExC_start = RExC_parse = SvPV(substitute_parse, len);
+        RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len;
        RExC_end = RExC_parse + len;
         RExC_in_multi_char_class = 1;
-        RExC_emit = (regnode *)orig_emit;
 
        ret = reg(pRExC_state, 1, &reg_flags, depth+1);
 
@@ -17542,8 +17728,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
         /* And restore so can parse the rest of the pattern */
         RExC_parse = save_parse;
-       RExC_start = RExC_adjusted_start = save_start;
-        RExC_precomp_adj = 0;
+       RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start;
        RExC_end = save_end;
        RExC_in_multi_char_class = 0;
         SvREFCNT_dec_NN(multi_char_matches);
@@ -17711,23 +17896,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          * an optimization */
         if (op != END) {
 
-            /* Throw away this ANYOF regnode, and emit the calculated one,
+            /* Emit the calculated regnode,
              * which should correspond to the beginning, not current, state of
              * the parse */
             const char * cur_parse = RExC_parse;
             RExC_parse = (char *)orig_parse;
-            if ( SIZE_ONLY) {
-                if (! LOC) {
-
-                    /* To get locale nodes to not use the full ANYOF size would
-                     * require moving the code above that writes the portions
-                     * of it that aren't in other nodes to after this point.
-                     * e.g.  ANYOF_POSIXL_SET */
-                    RExC_size = orig_size;
-                }
-            }
-            else {
-                RExC_emit = (regnode *)orig_emit;
                 if (PL_regkind[op] == POSIXD) {
                     if (op == POSIXL) {
                         RExC_contains_locale = 1;
@@ -17736,7 +17909,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         op += NPOSIXD - POSIXD;
                     }
                 }
-            }
 
             ret = reg_node(pRExC_state, op);
 
@@ -17766,9 +17938,26 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         }
     }
 
-    if (SIZE_ONLY)
+    /* Assume we are going to generate an ANYOF-type node. */
+    op = (posixl)
+          ? ANYOFPOSIXL
+          : (LOC)
+             ? ANYOFL
+             : ANYOF;
+    ret = reganode(pRExC_state, op, 0);
+
+    if (SIZE_ONLY) {
+        RExC_size += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP + 1;
         return ret;
+    }
+
     /****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
+    RExC_emit += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP;
+
+    ANYOF_FLAGS(ret) = anyof_flags;
+    if (posixl) {
+        ANYOF_POSIXL_SET_TO_BITMAP(ret, posixl);
+    }
 
     /* If folding, we calculate all characters that could fold to or from the
      * ones already on the list */
@@ -17797,27 +17986,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             _invlist_intersection(PL_utf8_foldable, cp_foldable_list,
                                   &fold_intersection);
 
-            /* The folds for all the Latin1 characters are hard-coded into this
-             * program, but we have to go out to disk to get the others. */
-            if (invlist_highest(cp_foldable_list) >= 256) {
-
-                /* This is a hash that for a particular fold gives all
-                 * characters that are involved in it */
-                if (! PL_utf8_foldclosures) {
-                    _load_PL_utf8_foldclosures();
-                }
-            }
-
             /* Now look at the foldable characters in this class individually */
             invlist_iterinit(fold_intersection);
             while (invlist_iternext(fold_intersection, &start, &end)) {
                 UV j;
+                UV folded;
 
                 /* Look at every character in the range */
                 for (j = start; j <= end; j++) {
                     U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                     STRLEN foldlen;
-                    SV** listp;
+                    unsigned int k;
+                    Size_t folds_to_count;
+                    unsigned int first_folds_to;
+                    const unsigned int * remaining_folds_to_list;
 
                     if (j < 256) {
 
@@ -17852,57 +18034,51 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * rules hard-coded for it.  First, get its fold.  This is
                      * the simple fold, as the multi-character folds have been
                      * handled earlier and separated out */
-                    _to_uni_fold_flags(j, foldbuf, &foldlen,
+                    folded = _to_uni_fold_flags(j, foldbuf, &foldlen,
                                                         (ASCII_FOLD_RESTRICTED)
                                                         ? FOLD_FLAGS_NOMIX_ASCII
                                                         : 0);
 
-                    /* Single character fold of above Latin1.  Add everything in
-                    * its fold closure to the list that this node should match.
-                    * The fold closures data structure is a hash with the keys
-                    * being the UTF-8 of every character that is folded to, like
-                    * 'k', and the values each an array of all code points that
-                    * fold to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ].
-                    * Multi-character folds are not included */
-                    if ((listp = hv_fetch(PL_utf8_foldclosures,
-                                        (char *) foldbuf, foldlen, FALSE)))
-                    {
-                        AV* list = (AV*) *listp;
-                        IV k;
-                        for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
-                            SV** c_p = av_fetch(list, k, FALSE);
-                            UV c;
-                            assert(c_p);
-
-                            c = SvUV(*c_p);
-
-                            /* /aa doesn't allow folds between ASCII and non- */
-                            if ((ASCII_FOLD_RESTRICTED
-                                && (isASCII(c) != isASCII(j))))
-                            {
-                                continue;
-                            }
+                    /* Single character fold of above Latin1.  Add everything
+                     * in its fold closure to the list that this node should
+                     * match. */
+                    folds_to_count = _inverse_folds(folded, &first_folds_to,
+                                                    &remaining_folds_to_list);
+                    for (k = 0; k <= folds_to_count; k++) {
+                        UV c = (k == 0)     /* First time through use itself */
+                                ? folded
+                                : (k == 1)  /* 2nd time use, the first fold */
+                                   ? first_folds_to
+
+                                     /* Then the remaining ones */
+                                   : remaining_folds_to_list[k-2];
+
+                        /* /aa doesn't allow folds between ASCII and non- */
+                        if ((   ASCII_FOLD_RESTRICTED
+                            && (isASCII(c) != isASCII(j))))
+                        {
+                            continue;
+                        }
 
-                            /* Folds under /l which cross the 255/256 boundary
-                             * are added to a separate list.  (These are valid
-                             * only when the locale is UTF-8.) */
-                            if (c < 256 && LOC) {
-                                *use_list = add_cp_to_invlist(*use_list, c);
-                                continue;
-                            }
+                        /* Folds under /l which cross the 255/256 boundary are
+                         * added to a separate list.  (These are valid only
+                         * when the locale is UTF-8.) */
+                        if (c < 256 && LOC) {
+                            *use_list = add_cp_to_invlist(*use_list, c);
+                            continue;
+                        }
 
-                            if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
-                            {
-                                cp_list = add_cp_to_invlist(cp_list, c);
-                            }
-                            else {
-                                /* Similarly folds involving non-ascii Latin1
-                                * characters under /d are added to their list */
-                                has_upper_latin1_only_utf8_matches
-                                        = add_cp_to_invlist(
-                                           has_upper_latin1_only_utf8_matches,
-                                           c);
-                            }
+                        if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+                        {
+                            cp_list = add_cp_to_invlist(cp_list, c);
+                        }
+                        else {
+                            /* Similarly folds involving non-ascii Latin1
+                             * characters under /d are added to their list */
+                            has_upper_latin1_only_utf8_matches
+                                = add_cp_to_invlist(
+                                            has_upper_latin1_only_utf8_matches,
+                                            c);
                         }
                     }
                 }
@@ -17931,26 +18107,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         }
     }
     if (posixes || nposixes) {
-
-        /* We have to adjust /a and /aa */
-        if (AT_LEAST_ASCII_RESTRICTED) {
-
-            /* Under /a and /aa, nothing above ASCII matches these */
-            if (posixes) {
-                _invlist_intersection(posixes,
-                                    PL_XPosix_ptrs[_CC_ASCII],
-                                    &posixes);
-            }
-
-            /* Under /a and /aa, everything above ASCII matches these
-             * complements */
-            if (nposixes) {
-                _invlist_union_complement_2nd(nposixes,
-                                              PL_XPosix_ptrs[_CC_ASCII],
-                                              &nposixes);
-            }
-        }
-
         if (! DEPENDS_SEMANTICS) {
 
             /* For everything but /d, we can just add the current 'posixes' and
@@ -17981,7 +18137,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              *
              * Handle the case where there something like \W separately */
             if (nposixes) {
-                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
+                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1, NULL);
 
                 /* A complemented posix class matches all upper Latin1
                  * characters if not in UTF-8.  And it matches just certain
@@ -18165,8 +18321,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      * at compile time.  Besides not inverting folded locale now, we can't
      * invert if there are things such as \w, which aren't known until runtime
      * */
-    if (cp_list
-        && invert
+    if (     cp_list
+        &&   invert
         && OP(ret) != ANYOFD
         && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS))
        && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
@@ -18658,7 +18814,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
     SV *si  = NULL;         /* Input swash initialization string */
     SV* invlist = NULL;
 
-    RXi_GET_DECL(prog,progi);
+    RXi_GET_DECL(prog, progi);
     const struct reg_data * const data = prog ? progi->data : NULL;
 
     PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
@@ -18845,7 +19001,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                                                   ));
             }
             else if (! *output_invlist) {
-                *output_invlist = invlist_clone(invlist);
+                *output_invlist = invlist_clone(invlist, NULL);
             }
             else {
                 _invlist_union(*output_invlist, invlist, output_invlist);
@@ -18983,6 +19139,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
      * RExC_emit */
 
     regnode * const ret = RExC_emit;
+
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGNODE_GUTS;
@@ -19080,7 +19237,8 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
 /*
 - reginsert - insert an operator in front of already-emitted operand
 *
-* Means relocating the operand.
+* That means that on exit 'operand' is the offset of the newly inserted
+* operator, and the original operand has been relocated.
 *
 * IMPORTANT NOTE - it is the *callers* responsibility to correctly
 * set up NEXT_OFF() of the inserted node if needed. Something like this:
@@ -19089,7 +19247,7 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
 * if (PASS2)
 *     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
 *
-* ALSO NOTE - operand->flags will be set to 0 as well.
+* ALSO NOTE - FLAGS(newly-inserted-operator) will be set to 0 as well.
 */
 STATIC void
 S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
@@ -19105,7 +19263,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
     PERL_UNUSED_CONTEXT;
     PERL_UNUSED_ARG(depth);
 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
-    DEBUG_PARSE_FMT("inst"," - %s",PL_reg_name[op]);
+    DEBUG_PARSE_FMT("inst"," - %s", PL_reg_name[op]);
     if (SIZE_ONLY) {
        RExC_size += size;
        return;
@@ -19127,13 +19285,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
              * regex, it can't move. RExC_close_parens[0] is the end
              * of the regex, it *can* move. */
             if ( paren && RExC_open_parens[paren] >= operand ) {
-                /*DEBUG_PARSE_FMT("open"," - %d",size);*/
+                /*DEBUG_PARSE_FMT("open"," - %d", size);*/
                 RExC_open_parens[paren] += size;
             } else {
                 /*DEBUG_PARSE_FMT("open"," - %s","ok");*/
             }
             if ( RExC_close_parens[paren] >= operand ) {
-                /*DEBUG_PARSE_FMT("close"," - %d",size);*/
+                /*DEBUG_PARSE_FMT("close"," - %d", size);*/
                 RExC_close_parens[paren] += size;
             } else {
                 /*DEBUG_PARSE_FMT("close"," - %s","ok");*/
@@ -19149,16 +19307,16 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
         if (RExC_offsets) {     /* MJD 20010112 */
            MJD_OFFSET_DEBUG(
                  ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n",
-                  "reg_insert",
+                  "reginsert",
                  __LINE__,
                  PL_reg_name[op],
-                  (UV)(dst - RExC_emit_start) > RExC_offsets[0]
+                  (UV)(REGNODE_OFFSET(dst)) > RExC_offsets[0]
                    ? "Overwriting end of array!\n" : "OK",
-                  (UV)(src - RExC_emit_start),
-                  (UV)(dst - RExC_emit_start),
+                  (UV)REGNODE_OFFSET(src),
+                  (UV)REGNODE_OFFSET(dst),
                   (UV)RExC_offsets[0]));
-           Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
-           Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
+           Set_Node_Offset_To_R(REGNODE_OFFSET(dst), Node_Offset(src));
+           Set_Node_Length_To_R(REGNODE_OFFSET(dst), Node_Length(src));
         }
 #endif
     }
@@ -19171,9 +19329,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
               "reginsert",
              __LINE__,
              PL_reg_name[op],
-              (UV)(place - RExC_emit_start) > RExC_offsets[0]
+              (UV)REGNODE_OFFSET(place) > RExC_offsets[0]
               ? "Overwriting end of array!\n" : "OK",
-              (UV)(place - RExC_emit_start),
+              (UV)REGNODE_OFFSET(place),
               (UV)(RExC_parse - RExC_start),
               (UV)RExC_offsets[0]));
        Set_Node_Offset(place, RExC_parse);
@@ -19181,8 +19339,10 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
     }
 #endif
     src = NEXTOPER(place);
-    place->flags = 0;
-    FILL_ADVANCE_NODE(place, op);
+    FLAGS(place) = 0;
+    FILL_NODE(place, op);
+
+    /* Zero out any arguments in the new node */
     Zero(src, offset, regnode);
 }
 
@@ -19252,7 +19412,7 @@ to control which is which.
 
 STATIC U8
 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
-                      const regnode *val,U32 depth)
+                      const regnode *val, U32 depth)
 {
     regnode *scan;
     U8 exact = PSEUDO;
@@ -19380,15 +19540,15 @@ S_regdump_intflags(pTHX_ const char *lead, const U32 flags)
     for (bit=0; bit<REG_INTFLAGS_NAME_SIZE; bit++) {
         if (flags & (1<<bit)) {
             if (!set++ && lead)
-                Perl_re_printf( aTHX_  "%s",lead);
-            Perl_re_printf( aTHX_  "%s ",PL_reg_intflags_name[bit]);
+                Perl_re_printf( aTHX_  "%s", lead);
+            Perl_re_printf( aTHX_  "%s ", PL_reg_intflags_name[bit]);
         }
     }
     if (lead)  {
         if (set)
             Perl_re_printf( aTHX_  "\n");
         else
-            Perl_re_printf( aTHX_  "%s[none-set]\n",lead);
+            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
     }
 }
 
@@ -19407,13 +19567,13 @@ S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
                continue;
            }
             if (!set++ && lead)
-                Perl_re_printf( aTHX_  "%s",lead);
-            Perl_re_printf( aTHX_  "%s ",PL_reg_extflags_name[bit]);
+                Perl_re_printf( aTHX_  "%s", lead);
+            Perl_re_printf( aTHX_  "%s ", PL_reg_extflags_name[bit]);
         }
     }
     if ((cs = get_regex_charset(flags)) != REGEX_DEPENDS_CHARSET) {
             if (!set++ && lead) {
-                Perl_re_printf( aTHX_  "%s",lead);
+                Perl_re_printf( aTHX_  "%s", lead);
             }
             switch (cs) {
                 case REGEX_UNICODE_CHARSET:
@@ -19437,7 +19597,7 @@ S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
         if (set)
             Perl_re_printf( aTHX_  "\n");
         else
-            Perl_re_printf( aTHX_  "%s[none-set]\n",lead);
+            Perl_re_printf( aTHX_  "%s[none-set]\n", lead);
     }
 }
 #endif
@@ -19449,7 +19609,7 @@ Perl_regdump(pTHX_ const regexp *r)
     int i;
     SV * const sv = sv_newmortal();
     SV *dsv= sv_newmortal();
-    RXi_GET_DECL(r,ri);
+    RXi_GET_DECL(r, ri);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGDUMP;
@@ -19524,8 +19684,8 @@ Perl_regdump(pTHX_ const regexp *r)
         Perl_re_printf( aTHX_  "with eval ");
     Perl_re_printf( aTHX_  "\n");
     DEBUG_FLAGS_r({
-        regdump_extflags("r->extflags: ",r->extflags);
-        regdump_intflags("r->intflags: ",r->intflags);
+        regdump_extflags("r->extflags: ", r->extflags);
+        regdump_intflags("r->intflags: ", r->intflags);
     });
 #else
     PERL_ARGS_ASSERT_REGDUMP;
@@ -19590,7 +19750,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 {
 #ifdef DEBUGGING
     int k;
-    RXi_GET_DECL(prog,progi);
+    RXi_GET_DECL(prog, progi);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGPROP;
@@ -19631,7 +19791,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         const reg_trie_data * const trie
            = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
 
-        Perl_sv_catpvf(aTHX_ sv, "-%s",PL_reg_name[o->flags]);
+        Perl_sv_catpvf(aTHX_ sv, "-%s", PL_reg_name[o->flags]);
         DEBUG_TRIE_COMPILE_r({
           if (trie->jump)
             sv_catpvs(sv, "(JUMP)");
@@ -19755,7 +19915,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 
         const bool inverted = flags & ANYOF_INVERT;
 
-       if (OP(o) == ANYOFL) {
+       if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) {
             if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
                 sv_catpvs(sv, "{utf8-locale-reqd}");
             }
@@ -19823,7 +19983,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                 sv_catpvs(sv, "{");
             }
             else if (do_sep) {
-                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
+                Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1], PL_colors[0]);
             }
             sv_catsv(sv, unresolved);
             if (inverted) {
@@ -19843,7 +20003,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 
             /* This is output in a separate [] */
             if (do_sep) {
-                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
+                Perl_sv_catpvf(aTHX_ sv,"%s][%s", PL_colors[1], PL_colors[0]);
             }
 
             /* And, for easy of understanding, it is shown in the
@@ -19903,11 +20063,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         U8 index = FLAGS(o) * 2;
         if (index < C_ARRAY_LENGTH(anyofs)) {
             if (*anyofs[index] != '[')  {
-                sv_catpv(sv, "[");
+                sv_catpvs(sv, "[");
             }
             sv_catpv(sv, anyofs[index]);
             if (*anyofs[index] != '[')  {
-                sv_catpv(sv, "]");
+                sv_catpvs(sv, "]");
             }
         }
         else {
@@ -19970,7 +20130,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                      "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
                      PL_colors[4],
                      RX_UTF8(r) ? "utf8 " : "",
-                     PL_colors[5],PL_colors[0],
+                     PL_colors[5], PL_colors[0],
                      s,
                      PL_colors[1],
                      (strlen(s) > PL_dump_re_max_len ? "..." : ""));
@@ -20124,7 +20284,7 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
     drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
     SvREFCNT_inc_void(drx->qr_anoncv);
     if (srx->recurse_locinput)
-        Newx(drx->recurse_locinput,srx->nparens + 1,char *);
+        Newx(drx->recurse_locinput, srx->nparens + 1, char *);
 
     return dsv;
 }
@@ -20147,7 +20307,7 @@ void
 Perl_regfree_internal(pTHX_ REGEXP * const rx)
 {
     struct regexp *const r = ReANY(rx);
-    RXi_GET_DECL(r,ri);
+    RXi_GET_DECL(r, ri);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGFREE_INTERNAL;
@@ -20160,9 +20320,10 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
             RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
                 dsv, RX_PRECOMP(rx), RX_PRELEN(rx), PL_dump_re_max_len);
             Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n",
-                PL_colors[4],PL_colors[5],s);
+                PL_colors[4], PL_colors[5], s);
         }
     });
+
 #ifdef RE_TRACK_PATTERN_OFFSETS
     if (ri->u.offsets)
         Safefree(ri->u.offsets);             /* 20010421 MJD */
@@ -20255,9 +20416,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
     Safefree(ri);
 }
 
-#define av_dup_inc(s,t)        MUTABLE_AV(sv_dup_inc((const SV *)s,t))
-#define hv_dup_inc(s,t)        MUTABLE_HV(sv_dup_inc((const SV *)s,t))
-#define SAVEPVN(p,n)   ((p) ? savepvn(p,n) : NULL)
+#define av_dup_inc(s, t)       MUTABLE_AV(sv_dup_inc((const SV *)s, t))
+#define hv_dup_inc(s, t)       MUTABLE_HV(sv_dup_inc((const SV *)s, t))
+#define SAVEPVN(p, n)  ((p) ? savepvn(p, n) : NULL)
 
 /*
    re_dup_guts - duplicate a regexp.
@@ -20334,10 +20495,10 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
     RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
     ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
     if (r->recurse_locinput)
-        Newx(ret->recurse_locinput,r->nparens + 1,char *);
+        Newx(ret->recurse_locinput, r->nparens + 1, char *);
 
     if (ret->pprivate)
-       RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
+       RXi_SET(ret, CALLREGDUPE_PVT(dstr, param));
 
     if (RX_MATCH_COPIED(dstr))
        ret->subbeg  = SAVEPVN(ret->subbeg, ret->sublen);
@@ -20379,7 +20540,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
     struct regexp *const r = ReANY(rx);
     regexp_internal *reti;
     int len;
-    RXi_GET_DECL(r,ri);
+    RXi_GET_DECL(r, ri);
 
     PERL_ARGS_ASSERT_REGDUPE_INTERNAL;
 
@@ -20484,7 +20645,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
         Copy(ri->u.offsets, reti->u.offsets, 2*len+1, U32);
     }
 #else
-    SetProgLen(reti,len);
+    SetProgLen(reti, len);
 #endif
 
     return (void*)reti;
@@ -20519,7 +20680,7 @@ Perl_regnext(pTHX_ regnode *p)
 #endif
 
 STATIC void
-S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
+S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2,...)
 {
     va_list args;
     STRLEN l1 = strlen(pat1);
@@ -20541,7 +20702,7 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
     va_start(args, pat2);
     msv = vmess(buf, &args);
     va_end(args);
-    message = SvPV_const(msv,l1);
+    message = SvPV_const(msv, l1);
     if (l1 > 512)
        l1 = 512;
     Copy(message, buf, l1 , char);
@@ -21000,7 +21161,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      * don't change the caller's list) */
     if (nonbitmap_invlist) {
         assert(invlist_highest(nonbitmap_invlist) < NUM_ANYOF_CODE_POINTS);
-        invlist = invlist_clone(nonbitmap_invlist);
+        invlist = invlist_clone(nonbitmap_invlist, NULL);
     }
     else {  /* Worst case size is every other code point is matched */
         invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
@@ -21021,10 +21182,10 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
             /* And this flag for matching all non-ASCII 0xFF and below */
             if (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
             {
-                not_utf8 = invlist_clone(PL_UpperLatin1);
+                not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
             }
         }
-        else if (OP(node) == ANYOFL) {
+        else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {
 
             /* If either of these flags are set, what matches isn't
              * determinable except during execution, so don't know enough here
@@ -21040,7 +21201,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
 
                 posixes = newSVpvs("");
                 for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
-                    if (ANYOF_POSIXL_TEST(node,i)) {
+                    if (ANYOF_POSIXL_TEST(node, i)) {
                         sv_catpv(posixes, anyofs[i]);
                     }
                 }
@@ -21077,7 +21238,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
 
         /* Since this list is passed in, we have to make a copy before
          * modifying it */
-        only_utf8_locale = invlist_clone(only_utf8_locale_invlist);
+        only_utf8_locale = invlist_clone(only_utf8_locale_invlist, NULL);
 
         _invlist_subtract(only_utf8_locale, invlist, &only_utf8_locale);
 
@@ -21207,14 +21368,14 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
     const regnode *next;
     const regnode *optstart= NULL;
 
-    RXi_GET_DECL(r,ri);
+    RXi_GET_DECL(r, ri);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_DUMPUNTIL;
 
 #ifdef DEBUG_DUMPUNTIL
-    Perl_re_printf( aTHX_  "--- %d : %d - %d - %d\n",indent,node-start,
-        last ? last-start : 0,plast ? plast-start : 0);
+    Perl_re_printf( aTHX_  "--- %d : %d - %d - %d\n", indent, node-start,
+        last ? last-start : 0, plast ? plast-start : 0);
 #endif
 
     if (plast && plast < last)
@@ -21286,7 +21447,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
            I32 word_idx;
             SvPVCLEAR(sv);
            for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
-               SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
+               SV ** const elem_ptr = av_fetch(trie_words, word_idx, 0);
 
                 Perl_re_indentf( aTHX_  "%s ",
                     indent+3,
@@ -21361,6 +21522,600 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 
 #endif /* DEBUGGING */
 
+#ifndef PERL_IN_XSUB_RE
+
+#include "uni_keywords.h"
+
+void
+Perl_init_uniprops(pTHX)
+{
+    /* Set up the inversion list global variables */
+
+    PL_XPosix_ptrs[_CC_ASCII] = _new_invlist_C_array(uni_prop_ptrs[UNI_ASCII]);
+    PL_XPosix_ptrs[_CC_ALPHANUMERIC] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXALNUM]);
+    PL_XPosix_ptrs[_CC_ALPHA] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXALPHA]);
+    PL_XPosix_ptrs[_CC_BLANK] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXBLANK]);
+    PL_XPosix_ptrs[_CC_CASED] =  _new_invlist_C_array(uni_prop_ptrs[UNI_CASED]);
+    PL_XPosix_ptrs[_CC_CNTRL] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXCNTRL]);
+    PL_XPosix_ptrs[_CC_DIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXDIGIT]);
+    PL_XPosix_ptrs[_CC_GRAPH] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXGRAPH]);
+    PL_XPosix_ptrs[_CC_LOWER] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXLOWER]);
+    PL_XPosix_ptrs[_CC_PRINT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXPRINT]);
+    PL_XPosix_ptrs[_CC_PUNCT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXPUNCT]);
+    PL_XPosix_ptrs[_CC_SPACE] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXSPACE]);
+    PL_XPosix_ptrs[_CC_UPPER] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXUPPER]);
+    PL_XPosix_ptrs[_CC_VERTSPACE] = _new_invlist_C_array(uni_prop_ptrs[UNI_VERTSPACE]);
+    PL_XPosix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXWORD]);
+    PL_XPosix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXXDIGIT]);
+
+    PL_Posix_ptrs[_CC_ASCII] = _new_invlist_C_array(uni_prop_ptrs[UNI_ASCII]);
+    PL_Posix_ptrs[_CC_ALPHANUMERIC] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXALNUM]);
+    PL_Posix_ptrs[_CC_ALPHA] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXALPHA]);
+    PL_Posix_ptrs[_CC_BLANK] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXBLANK]);
+    PL_Posix_ptrs[_CC_CASED] = PL_Posix_ptrs[_CC_ALPHA];
+    PL_Posix_ptrs[_CC_CNTRL] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXCNTRL]);
+    PL_Posix_ptrs[_CC_DIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXDIGIT]);
+    PL_Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXGRAPH]);
+    PL_Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXLOWER]);
+    PL_Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXPRINT]);
+    PL_Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXPUNCT]);
+    PL_Posix_ptrs[_CC_SPACE] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXSPACE]);
+    PL_Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXUPPER]);
+    PL_Posix_ptrs[_CC_VERTSPACE] = NULL;
+    PL_Posix_ptrs[_CC_WORDCHAR] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXWORD]);
+    PL_Posix_ptrs[_CC_XDIGIT] = _new_invlist_C_array(uni_prop_ptrs[UNI_POSIXXDIGIT]);
+
+    PL_GCB_invlist = _new_invlist_C_array(_Perl_GCB_invlist);
+    PL_SB_invlist = _new_invlist_C_array(_Perl_SB_invlist);
+    PL_WB_invlist = _new_invlist_C_array(_Perl_WB_invlist);
+    PL_LB_invlist = _new_invlist_C_array(_Perl_LB_invlist);
+    PL_SCX_invlist = _new_invlist_C_array(_Perl_SCX_invlist);
+
+    PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
+    PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
+    PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
+
+    PL_Assigned_invlist = _new_invlist_C_array(uni_prop_ptrs[UNI_ASSIGNED]);
+
+    PL_utf8_perl_idstart = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_IDSTART]);
+    PL_utf8_perl_idcont = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_IDCONT]);
+
+    PL_utf8_charname_begin = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_CHARNAME_BEGIN]);
+    PL_utf8_charname_continue = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_CHARNAME_CONTINUE]);
+
+    PL_utf8_foldable = _new_invlist_C_array(uni_prop_ptrs[UNI__PERL_ANY_FOLDS]);
+    PL_HasMultiCharFold = _new_invlist_C_array(uni_prop_ptrs[
+                                            UNI__PERL_FOLDS_TO_MULTI_CHAR]);
+    PL_NonL1NonFinalFold = _new_invlist_C_array(
+                                            NonL1_Perl_Non_Final_Folds_invlist);
+
+    PL_utf8_toupper = _new_invlist_C_array(Uppercase_Mapping_invlist);
+    PL_utf8_tolower = _new_invlist_C_array(Lowercase_Mapping_invlist);
+    PL_utf8_totitle = _new_invlist_C_array(Titlecase_Mapping_invlist);
+    PL_utf8_tofold = _new_invlist_C_array(Case_Folding_invlist);
+    PL_utf8_tosimplefold = _new_invlist_C_array(Simple_Case_Folding_invlist);
+    PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
+    PL_utf8_mark = _new_invlist_C_array(uni_prop_ptrs[UNI_M]);
+
+    /* The below are used only by deprecated functions.  They could be removed */
+    PL_utf8_xidcont  = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDC]);
+    PL_utf8_idcont   = _new_invlist_C_array(uni_prop_ptrs[UNI_IDC]);
+    PL_utf8_xidstart = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDS]);
+}
+
+SV *
+Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len,
+                                const bool to_fold, bool * invert)
+{
+    /* Parse the interior meat of \p{} passed to this in 'name' with length
+     * 'name_len', and return an inversion list if a property with 'name' is
+     * found, or NULL if not.  'name' point to the input with leading and
+     * trailing space trimmed.  'to_fold' indicates if /i is in effect.
+     *
+     * When the return is an inversion list, '*invert' will be set to a boolean
+     * indicating if it should be inverted or not
+     *
+     * This currently doesn't handle all cases.  A NULL return indicates the
+     * caller should try a different approach
+     */
+
+    char* lookup_name;
+    bool stricter = FALSE;
+    bool is_nv_type = FALSE;         /* nv= or numeric_value=, or possibly one
+                                        of the cjk numeric properties (though
+                                        it requires extra effort to compile
+                                        them) */
+    unsigned int i;
+    unsigned int j = 0, lookup_len;
+    int equals_pos = -1;        /* Where the '=' is found, or negative if none */
+    int slash_pos = -1;        /* Where the '/' is found, or negative if none */
+    int table_index = 0;
+    bool starts_with_In_or_Is = FALSE;
+    Size_t lookup_offset = 0;
+
+    PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING;
+
+    /* The input will be modified into 'lookup_name' */
+    Newx(lookup_name, name_len, char);
+    SAVEFREEPV(lookup_name);
+
+    /* Parse the input. */
+    for (i = 0; i < name_len; i++) {
+        char cur = name[i];
+
+        /* These characters can be freely ignored in most situations.  Later it
+         * may turn out we shouldn't have ignored them, and we have to reparse,
+         * but we don't have enough information yet to make that decision */
+        if (cur == '-' || cur == '_' || isSPACE_A(cur)) {
+            continue;
+        }
+
+        /* Case differences are also ignored.  Our lookup routine assumes
+         * everything is lowercase */
+        if (isUPPER_A(cur)) {
+            lookup_name[j++] = toLOWER(cur);
+            continue;
+        }
+
+        /* A double colon is either an error, or a package qualifier to a
+         * subroutine user-defined property; neither of which do we currently
+         * handle
+         *
+         * But a single colon is a synonym for '=' */
+        if (cur == ':') {
+            if (i < name_len - 1 && name[i+1] == ':') {
+                return NULL;
+            }
+            cur = '=';
+        }
+
+        /* Otherwise, this character is part of the name. */
+        lookup_name[j++] = cur;
+
+        /* Only the equals sign needs further processing */
+        if (cur == '=') {
+            equals_pos = j; /* Note where it occurred in the input */
+            break;
+        }
+    }
+
+    /* Here, we are either done with the whole property name, if it was simple;
+     * or are positioned just after the '=' if it is compound. */
+
+    if (equals_pos >= 0) {
+        assert(! stricter); /* We shouldn't have set this yet */
+
+        /* Space immediately after the '=' is ignored */
+        i++;
+        for (; i < name_len; i++) {
+            if (! isSPACE_A(name[i])) {
+                break;
+            }
+        }
+
+        /* Certain properties need special handling.  They may optionally be
+         * prefixed by 'is'.  Ignore that prefix for the purposes of checking
+         * if this is one of those properties */
+        if (memBEGINPs(lookup_name, name_len, "is")) {
+            lookup_offset = 2;
+        }
+
+        /* Then check if it is one of these properties.  This is hard-coded
+         * because easier this way, and the list is unlikely to change.  There
+         * are several properties like this in the Unihan DB, which is unlikely
+         * to be compiled, and they all end with 'numeric'.  The interiors
+         * aren't checked for the precise property.  This would stop working if
+         * a cjk property were to be created that ended with 'numeric' and
+         * wasn't a numeric type */
+        is_nv_type = memEQs(lookup_name + lookup_offset,
+                       j - 1 - lookup_offset, "numericvalue")
+                  || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "nv")
+                  || (   memENDPs(lookup_name + lookup_offset,
+                            j - 1 - lookup_offset, "numeric")
+                      && (   memBEGINPs(lookup_name + lookup_offset,
+                                      j - 1 - lookup_offset, "cjk")
+                          || memBEGINPs(lookup_name + lookup_offset,
+                                      j - 1 - lookup_offset, "k")));
+        if (   is_nv_type
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "canonicalcombiningclass")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "ccc")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "age")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "in")
+            || memEQs(lookup_name + lookup_offset,
+                      j - 1 - lookup_offset, "presentin"))
+        {
+            unsigned int k;
+
+            /* What makes these properties special is that the stuff after the
+             * '=' is a number.  Therefore, we can't throw away '-'
+             * willy-nilly, as those could be a minus sign.  Other stricter
+             * rules also apply.  However, these properties all can have the
+             * rhs not be a number, in which case they contain at least one
+             * alphabetic.  In those cases, the stricter rules don't apply.
+             * But the numeric type properties can have the alphas [Ee] to
+             * signify an exponent, and it is still a number with stricter
+             * rules.  So look for an alpha that signifys not-strict */
+            stricter = TRUE;
+            for (k = i; k < name_len; k++) {
+                if (   isALPHA_A(name[k])
+                    && (! is_nv_type || ! isALPHA_FOLD_EQ(name[k], 'E')))
+                {
+                    stricter = FALSE;
+                    break;
+                }
+            }
+        }
+
+        if (stricter) {
+
+            /* A number may have a leading '+' or '-'.  The latter is retained
+             * */
+            if (name[i] == '+') {
+                i++;
+            }
+            else if (name[i] == '-') {
+                lookup_name[j++] = '-';
+                i++;
+            }
+
+            /* Skip leading zeros including single underscores separating the
+             * zeros, or between the final leading zero and the first other
+             * digit */
+            for (; i < name_len - 1; i++) {
+                if (   name[i] != '0'
+                    && (name[i] != '_' || ! isDIGIT_A(name[i+1])))
+                {
+                    break;
+                }
+            }
+        }
+    }
+    else {  /* No '=' */
+
+       /* We are now in a position to determine if this property should have
+        * been parsed using stricter rules.  Only a few are like that, and
+        * unlikely to change. */
+        if (   memBEGINPs(lookup_name, j, "perl")
+            && memNEs(lookup_name + 4, j - 4, "space")
+            && memNEs(lookup_name + 4, j - 4, "word"))
+        {
+            stricter = TRUE;
+
+            /* We set the inputs back to 0 and the code below will reparse,
+             * using strict */
+            i = j = 0;
+        }
+    }
+
+    /* Here, we have either finished the property, or are positioned to parse
+     * the remainder, and we know if stricter rules apply.  Finish out, if not
+     * already done */
+    for (; i < name_len; i++) {
+        char cur = name[i];
+
+        /* In all instances, case differences are ignored, and we normalize to
+         * lowercase */
+        if (isUPPER_A(cur)) {
+            lookup_name[j++] = toLOWER(cur);
+            continue;
+        }
+
+        /* An underscore is skipped, but not under strict rules unless it
+         * separates two digits */
+        if (cur == '_') {
+            if (    stricter
+                && (     i == 0 || (int) i == equals_pos || i == name_len- 1
+                    || ! isDIGIT_A(name[i-1]) || ! isDIGIT_A(name[i+1])))
+            {
+                lookup_name[j++] = '_';
+            }
+            continue;
+        }
+
+        /* Hyphens are skipped except under strict */
+        if (cur == '-' && ! stricter) {
+            continue;
+        }
+
+        /* XXX Bug in documentation.  It says white space skipped adjacent to
+         * non-word char.  Maybe we should, but shouldn't skip it next to a dot
+         * in a number */
+        if (isSPACE_A(cur) && ! stricter) {
+            continue;
+        }
+
+        lookup_name[j++] = cur;
+
+        /* Unless this is a non-trailing slash, we are done with it */
+        if (i >= name_len - 1 || cur != '/') {
+            continue;
+        }
+
+        slash_pos = j;
+
+        /* A slash in the 'numeric value' property indicates that what follows
+         * is a denominator.  It can have a leading '+' and '0's that should be
+         * skipped.  But we have never allowed a negative denominator, so treat
+         * a minus like every other character.  (No need to rule out a second
+         * '/', as that won't match anything anyway */
+        if (is_nv_type) {
+            i++;
+            if (i < name_len && name[i] == '+') {
+                i++;
+            }
+
+            /* Skip leading zeros including underscores separating digits */
+            for (; i < name_len - 1; i++) {
+                if (   name[i] != '0'
+                    && (name[i] != '_' || ! isDIGIT_A(name[i+1])))
+                {
+                    break;
+                }
+            }
+
+            /* Store the first real character in the denominator */
+            lookup_name[j++] = name[i];
+        }
+    }
+
+    /* Here are completely done parsing the input 'name', and 'lookup_name'
+     * contains a copy, normalized.
+     *
+     * This special case is grandfathered in: 'L_' and 'GC=L_' are accepted and
+     * different from without the underscores.  */
+    if (  (   UNLIKELY(memEQs(lookup_name, j, "l"))
+           || UNLIKELY(memEQs(lookup_name, j, "gc=l")))
+        && UNLIKELY(name[name_len-1] == '_'))
+    {
+        lookup_name[j++] = '&';
+    }
+    else if (name_len > 2 && name[0] == 'I' && (   name[1] == 'n'
+                                                || name[1] == 's'))
+    {
+
+        /* Also, if the original input began with 'In' or 'Is', it could be a
+         * subroutine call instead of a property names, which currently isn't
+         * handled by this function.  Subroutine calls can't happen if there is
+         * an '=' in the name */
+        if (equals_pos < 0 && get_cvn_flags(name, name_len, GV_NOTQUAL) != NULL)
+        {
+            return NULL;
+        }
+
+        starts_with_In_or_Is = TRUE;
+    }
+
+    lookup_len = j;     /* Use a more mnemonic name starting here */
+
+    /* Get the index into our pointer table of the inversion list corresponding
+     * to the property */
+    table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+
+    /* If it didn't find the property */
+    if (table_index == 0) {
+
+        /* If didn't find the property, we try again stripping off any initial
+         * 'In' or 'Is' */
+        if (starts_with_In_or_Is) {
+            lookup_name += 2;
+            lookup_len -= 2;
+            equals_pos -= 2;
+            slash_pos -= 2;
+
+            table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+        }
+
+        if (table_index == 0) {
+            char * canonical;
+
+            /* If not found, and not a numeric type property, isn't a legal
+             * property */
+            if (! is_nv_type) {
+                return NULL;
+            }
+
+            /* But the numeric type properties need more work to decide.  What
+             * we do is make sure we have the number in canonical form and look
+             * that up. */
+
+            if (slash_pos < 0) {    /* No slash */
+
+                /* When it isn't a rational, take the input, convert it to a
+                 * NV, then create a canonical string representation of that
+                 * NV. */
+
+                NV value;
+
+                /* Get the value */
+                if (my_atof3(lookup_name + equals_pos, &value,
+                             lookup_len - equals_pos)
+                          != lookup_name + lookup_len)
+                {
+                    return NULL;
+                }
+
+                /* If the value is an integer, the canonical value is integral */
+                if (Perl_ceil(value) == value) {
+                    canonical = Perl_form(aTHX_ "%.*s%.0" NVff,
+                                                equals_pos, lookup_name, value);
+                }
+                else {  /* Otherwise, it is %e with a known precision */
+                    char * exp_ptr;
+
+                    canonical = Perl_form(aTHX_ "%.*s%.*" NVef,
+                                                equals_pos, lookup_name,
+                                                PL_E_FORMAT_PRECISION, value);
+
+                    /* The exponent generated is expecting two digits, whereas
+                     * %e on some systems will generate three.  Remove leading
+                     * zeros in excess of 2 from the exponent.  We start
+                     * looking for them after the '=' */
+                    exp_ptr = strchr(canonical + equals_pos, 'e');
+                    if (exp_ptr) {
+                        char * cur_ptr = exp_ptr + 2; /* past the 'e[+-]' */
+                        SSize_t excess_exponent_len = strlen(cur_ptr) - 2;
+
+                        assert(*(cur_ptr - 1) == '-' || *(cur_ptr - 1) == '+');
+
+                        if (excess_exponent_len > 0) {
+                            SSize_t leading_zeros = strspn(cur_ptr, "0");
+                            SSize_t excess_leading_zeros
+                                    = MIN(leading_zeros, excess_exponent_len);
+                            if (excess_leading_zeros > 0) {
+                                Move(cur_ptr + excess_leading_zeros,
+                                     cur_ptr,
+                                     strlen(cur_ptr) - excess_leading_zeros
+                                       + 1,  /* Copy the NUL as well */
+                                     char);
+                            }
+                        }
+                    }
+                }
+            }
+            else {  /* Has a slash.  Create a rational in canonical form  */
+                UV numerator, denominator, gcd, trial;
+                const char * end_ptr;
+                const char * sign = "";
+
+                /* We can't just find the numerator, denominator, and do the
+                 * division, then use the method above, because that is
+                 * inexact.  And the input could be a rational that is within
+                 * epsilon (given our precision) of a valid rational, and would
+                 * then incorrectly compare valid.
+                 *
+                 * We're only interested in the part after the '=' */
+                const char * this_lookup_name = lookup_name + equals_pos;
+                lookup_len -= equals_pos;
+                slash_pos -= equals_pos;
+
+                /* Handle any leading minus */
+                if (this_lookup_name[0] == '-') {
+                    sign = "-";
+                    this_lookup_name++;
+                    lookup_len--;
+                    slash_pos--;
+                }
+
+                /* Convert the numerator to numeric */
+                end_ptr = this_lookup_name + slash_pos;
+                if (! grok_atoUV(this_lookup_name, &numerator, &end_ptr)) {
+                    return NULL;
+                }
+
+                /* It better have included all characters before the slash */
+                if (*end_ptr != '/') {
+                    return NULL;
+                }
+
+                /* Set to look at just the denominator */
+                this_lookup_name += slash_pos;
+                lookup_len -= slash_pos;
+                end_ptr = this_lookup_name + lookup_len;
+
+                /* Convert the denominator to numeric */
+                if (! grok_atoUV(this_lookup_name, &denominator, &end_ptr)) {
+                    return NULL;
+                }
+
+                /* It better be the rest of the characters, and don't divide by
+                 * 0 */
+                if (   end_ptr != this_lookup_name + lookup_len
+                    || denominator == 0)
+                {
+                    return NULL;
+                }
+
+                /* Get the greatest common denominator using
+                   http://en.wikipedia.org/wiki/Euclidean_algorithm */
+                gcd = numerator;
+                trial = denominator;
+                while (trial != 0) {
+                    UV temp = trial;
+                    trial = gcd % trial;
+                    gcd = temp;
+                }
+
+                /* If already in lowest possible terms, we have already tried
+                 * looking this up */
+                if (gcd == 1) {
+                    return NULL;
+                }
+
+                /* Reduce the rational, which should put it in canonical form.
+                 * Then look it up */
+                numerator /= gcd;
+                denominator /= gcd;
+
+                canonical = Perl_form(aTHX_ "%.*s%s%" UVuf "/%" UVuf,
+                        equals_pos, lookup_name, sign, numerator, denominator);
+            }
+
+            /* Here, we have the number in canonical form.  Try that */
+            table_index = match_uniprop((U8 *) canonical, strlen(canonical));
+            if (table_index == 0) {
+                return NULL;
+            }
+        }
+    }
+
+    /* The return is an index into a table of ptrs.  A negative return
+     * signifies that the real index is the absolute value, but the result
+     * needs to be inverted */
+    if (table_index < 0) {
+        *invert = TRUE;
+        table_index = -table_index;
+    }
+    else {
+        *invert = FALSE;
+    }
+
+    /* Out-of band indices indicate a deprecated property.  The proper index is
+     * modulo it with the table size.  And dividing by the table size yields
+     * an offset into a table constructed to contain the corresponding warning
+     * message */
+    if (table_index > MAX_UNI_KEYWORD_INDEX) {
+        Size_t warning_offset = table_index / MAX_UNI_KEYWORD_INDEX;
+        table_index %= MAX_UNI_KEYWORD_INDEX;
+        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+                "Use of '%.*s' in \\p{} or \\P{} is deprecated because: %s",
+                (int) name_len, name, deprecated_property_msgs[warning_offset]);
+    }
+
+    /* In a few properties, a different property is used under /i.  These are
+     * unlikely to change, so are hard-coded here. */
+    if (to_fold) {
+        if (   table_index == UNI_XPOSIXUPPER
+            || table_index == UNI_XPOSIXLOWER
+            || table_index == UNI_TITLE)
+        {
+            table_index = UNI_CASED;
+        }
+        else if (   table_index == UNI_UPPERCASELETTER
+                 || table_index == UNI_LOWERCASELETTER
+#  ifdef UNI_TITLECASELETTER   /* Missing from early Unicodes */
+                 || table_index == UNI_TITLECASELETTER
+#  endif
+        ) {
+            table_index = UNI_CASEDLETTER;
+        }
+        else if (  table_index == UNI_POSIXUPPER
+                || table_index == UNI_POSIXLOWER)
+        {
+            table_index = UNI_POSIXALPHA;
+        }
+    }
+
+    /* Create and return the inversion list */
+    return _new_invlist_C_array(uni_prop_ptrs[table_index]);
+}
+
+#endif
+
 /*
  * ex: set ts=8 sts=4 sw=4 et:
  */