regcomp.c: Fix comment

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 54d641d..fdc3c31 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -110,7 +110,6 @@ typedef struct scan_frame {
      regnode *next_regnode;      /* next node to process when last is reached */
      U32 prev_recursed_depth;
      I32 stopparen;              /* what stopparen do we use */
-    U32 is_top_frame;           /* what flags do we use? */
  
      struct scan_frame *this_prev_frame; /* this previous frame */
      struct scan_frame *prev_frame;      /* previous frame */
@@ -119,8 +118,7 @@ typedef struct scan_frame {
  
  /* Certain characters are output as a sequence with the first being a
   * backslash. */
-#define isBACKSLASHED_PUNCT(c)                                              \
-                    ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
+#define isBACKSLASHED_PUNCT(c)  strchr("-[]\\^", c)
  
  
  struct RExC_state_t {
@@ -151,7 +149,7 @@ struct RExC_state_t {
      I32                sawback;                /* Did we see \1, ...? */
      U32                seen;
      SSize_t    size;                   /* Code size. */
-    I32                npar;            /* Capture buffer count, (OPEN) plus
+    I32         npar;                   /* Capture buffer count, (OPEN) plus
                                             one. ("par" 0 is the whole
                                             pattern)*/
      I32                nestroot;               /* root parens we are in - used by
@@ -214,6 +212,7 @@ struct RExC_state_t {
      bool        seen_unfolded_sharp_s;
      bool        strict;
      bool        study_started;
+    bool        in_script_run;
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -280,6 +279,7 @@ struct RExC_state_t {
  #define RExC_strict (pRExC_state->strict)
  #define RExC_study_started      (pRExC_state->study_started)
  #define RExC_warn_text (pRExC_state->warn_text)
+#define RExC_in_script_run      (pRExC_state->in_script_run)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -345,7 +345,7 @@ struct RExC_state_t {
  /* Change from /d into /u rules, and restart the parse if we've already seen
   * something whose size would increase as a result, by setting *flagp and
   * returning 'restart_retval'.  RExC_uni_semantics is a flag that indicates
- * we've change to /u during the parse.  */
+ * we've changed to /u during the parse.  */
  #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
      STMT_START {                                                            \
              if (DEPENDS_SEMANTICS) {                                        \
@@ -393,7 +393,7 @@ struct RExC_state_t {
  
    For each string some basic information is maintained:
  
-  - offset or min_offset
+  - min_offset
      This is the position the string must appear at, or not before.
      It also implicitly (when combined with minlenp) tells us how many
      characters must match before the string we are searching for.
@@ -405,6 +405,7 @@ struct RExC_state_t {
      Only used for floating strings. This is the rightmost point that
      the string can appear at. If set to SSize_t_MAX it indicates that the
      string can occur infinitely far to the right.
+    For fixed strings, it is equal to min_offset.
  
    - minlenp
      A pointer to the minimum number of characters of the pattern that the
@@ -444,6 +445,15 @@ struct RExC_state_t {
  
  */
  
+struct scan_data_substrs {
+    SV      *str;       /* longest substring found in pattern */
+    SSize_t min_offset; /* earliest point in string it can appear */
+    SSize_t max_offset; /* latest point in string it can appear */
+    SSize_t *minlenp;   /* pointer to the minlen relevant to the string */
+    SSize_t lookbehind; /* is the pos of the string modified by LB */
+    I32 flags;          /* per substring SF_* and SCF_* flags */
+};
+
  typedef struct scan_data_t {
      /*I32 len_min;      unused */
      /*I32 len_delta;    unused */
@@ -453,17 +463,14 @@ typedef struct scan_data_t {
      SSize_t last_end;      /* min value, <0 unless valid. */
      SSize_t last_start_min;
      SSize_t last_start_max;
-    SV **longest;          /* Either &l_fixed, or &l_float. */
-    SV *longest_fixed;      /* longest fixed string found in pattern */
-    SSize_t offset_fixed;   /* offset where it starts */
-    SSize_t *minlen_fixed;  /* pointer to the minlen relevant to the string */
-    I32 lookbehind_fixed;   /* is the position of the string modfied by LB */
-    SV *longest_float;      /* longest floating string found in pattern */
-    SSize_t offset_float_min; /* earliest point in string it can appear */
-    SSize_t offset_float_max; /* latest point in string it can appear */
-    SSize_t *minlen_float;  /* pointer to the minlen relevant to the string */
-    SSize_t lookbehind_float; /* is the pos of the string modified by LB */
-    I32 flags;
+    U8      cur_is_floating; /* whether the last_* values should be set as
+                              * the next fixed (0) or floating (1)
+                              * substring */
+
+    /* [0] is longest fixed substring so far, [1] is longest float so far */
+    struct scan_data_substrs  substrs[2];
+
+    I32 flags;             /* common SF_* and SCF_* flags */
      I32 whilem_c;
      SSize_t *last_closep;
      regnode_ssc *start_class;
@@ -473,23 +480,21 @@ typedef struct scan_data_t {
   * Forward declarations for pregcomp()'s friends.
   */
  
-static const scan_data_t zero_scan_data =
-  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0};
+static const scan_data_t zero_scan_data = {
+    0, 0, NULL, 0, 0, 0, 0,
+    {
+        { NULL, 0, 0, 0, 0, 0 },
+        { NULL, 0, 0, 0, 0, 0 },
+    },
+    0, 0, NULL, NULL
+};
+
+/* study flags */
  
-#define SF_BEFORE_EOL          (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
  #define SF_BEFORE_SEOL         0x0001
  #define SF_BEFORE_MEOL         0x0002
-#define SF_FIX_BEFORE_EOL      (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
-#define SF_FL_BEFORE_EOL       (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
-
-#define SF_FIX_SHIFT_EOL       (+2)
-#define SF_FL_SHIFT_EOL                (+4)
-
-#define SF_FIX_BEFORE_SEOL     (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
-#define SF_FIX_BEFORE_MEOL     (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
+#define SF_BEFORE_EOL          (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
  
-#define SF_FL_BEFORE_SEOL      (SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
-#define SF_FL_BEFORE_MEOL      (SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
  #define SF_IS_INF              0x0040
  #define SF_HAS_PAR             0x0080
  #define SF_IN_PAR              0x0100
@@ -624,7 +629,7 @@ static const scan_data_t zero_scan_data =
      UTF8fARG(UTF,                                                           \
               (xI(xC) > eC) /* Don't run off end */                          \
                ? eC - sC   /* Length before the <--HERE */                   \
-              : xI_offset(xC),                                              \
+              : ( __ASSERT_(xI_offset(xC) >= 0) xI_offset(xC) ),            \
               sC),         /* The input pattern printed up to the <--HERE */ \
      UTF8fARG(UTF,                                                           \
               (xI(xC) > eC) ? 0 : eC - xI(xC), /* Length after <--HERE */    \
@@ -971,65 +976,123 @@ Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
  #define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
    if ((flags) & flag) Perl_re_printf( aTHX_  "%s ", #flag)
  
-#define DEBUG_SHOW_STUDY_FLAGS(flags,open_str,close_str)                    \
-    if ( ( flags ) ) {                                                      \
-        Perl_re_printf( aTHX_  "%s", open_str);                                         \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_SEOL);                     \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_MEOL);                     \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_IS_INF);                             \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_HAS_PAR);                            \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_IN_PAR);                             \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_HAS_EVAL);                           \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_SUBSTR);                         \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS_AND);                    \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS_OR);                     \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS);                        \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_WHILEM_VISITED_POS);                \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_RESTUDY);                      \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_SEEN_ACCEPT);                       \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_DOING_RESTUDY);                \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_IN_DEFINE);                         \
-        Perl_re_printf( aTHX_  "%s", close_str);                                        \
-    }
-
-
-#define DEBUG_STUDYDATA(str,data,depth)                              \
-DEBUG_OPTIMISE_MORE_r(if(data){                                      \
-    Perl_re_indentf( aTHX_  "" str "Pos:%" IVdf "/%" IVdf            \
-        " Flags: 0x%" UVXf,                                          \
-        depth,                                                       \
-        (IV)((data)->pos_min),                                       \
-        (IV)((data)->pos_delta),                                     \
-        (UV)((data)->flags)                                          \
-    );                                                               \
-    DEBUG_SHOW_STUDY_FLAGS((data)->flags," [ ","]");                 \
-    Perl_re_printf( aTHX_                                            \
-        " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",                    \
-        (IV)((data)->whilem_c),                                      \
-        (IV)((data)->last_closep ? *((data)->last_closep) : -1),     \
-        is_inf ? "INF " : ""                                         \
-    );                                                               \
-    if ((data)->last_found)                                          \
-        Perl_re_printf( aTHX_                                        \
-            "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf                   \
-            " %sFixed:'%s' @ %" IVdf                                 \
-            " %sFloat: '%s' @ %" IVdf "/%" IVdf,                     \
-            SvPVX_const((data)->last_found),                         \
-            (IV)((data)->last_end),                                  \
-            (IV)((data)->last_start_min),                            \
-            (IV)((data)->last_start_max),                            \
-            ((data)->longest &&                                      \
-             (data)->longest==&((data)->longest_fixed)) ? "*" : "",  \
-            SvPVX_const((data)->longest_fixed),                      \
-            (IV)((data)->offset_fixed),                              \
-            ((data)->longest &&                                      \
-             (data)->longest==&((data)->longest_float)) ? "*" : "",  \
-            SvPVX_const((data)->longest_float),                      \
-            (IV)((data)->offset_float_min),                          \
-            (IV)((data)->offset_float_max)                           \
-        );                                                           \
-    Perl_re_printf( aTHX_ "\n");                                                 \
-});
+
+#ifdef DEBUGGING
+static void
+S_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
+                                    const char *close_str)
+{
+    if (!flags)
+        return;
+
+    Perl_re_printf( aTHX_  "%s", open_str);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_SEOL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_MEOL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_IS_INF);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_PAR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_IN_PAR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_EVAL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_SUBSTR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_AND);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_OR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_WHILEM_VISITED_POS);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_RESTUDY);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_SEEN_ACCEPT);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_DOING_RESTUDY);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_IN_DEFINE);
+    Perl_re_printf( aTHX_  "%s", close_str);
+}
+
+
+static void
+S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
+                    U32 depth, int is_inf)
+{
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    DEBUG_OPTIMISE_MORE_r({
+        if (!data)
+            return;
+        Perl_re_indentf(aTHX_  "%s: Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
+            depth,
+            where,
+            (IV)data->pos_min,
+            (IV)data->pos_delta,
+            (UV)data->flags
+        );
+
+        S_debug_show_study_flags(aTHX_ data->flags," [","]");
+
+        Perl_re_printf( aTHX_
+            " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",
+            (IV)data->whilem_c,
+            (IV)(data->last_closep ? *((data)->last_closep) : -1),
+            is_inf ? "INF " : ""
+        );
+
+        if (data->last_found) {
+            int i;
+            Perl_re_printf(aTHX_
+                "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
+                    SvPVX_const(data->last_found),
+                    (IV)data->last_end,
+                    (IV)data->last_start_min,
+                    (IV)data->last_start_max
+            );
+
+            for (i = 0; i < 2; i++) {
+                Perl_re_printf(aTHX_
+                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
+                    data->cur_is_floating == i ? "*" : "",
+                    i ? "Float" : "Fixed",
+                    SvPVX_const(data->substrs[i].str),
+                    (IV)data->substrs[i].min_offset,
+                    (IV)data->substrs[i].max_offset
+                );
+                S_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
+            }
+        }
+
+        Perl_re_printf( aTHX_ "\n");
+    });
+}
+
+
+static void
+S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
+                regnode *scan, U32 depth, U32 flags)
+{
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    DEBUG_OPTIMISE_r({
+        regnode *Next;
+
+        if (!scan)
+            return;
+        Next = regnext(scan);
+        regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
+        Perl_re_indentf( aTHX_   "%s>%3d: %s (%d)",
+            depth,
+            str,
+            REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),
+            Next ? (REG_NODE_NUM(Next)) : 0 );
+        S_debug_show_study_flags(aTHX_ flags," [ ","]");
+        Perl_re_printf( aTHX_  "\n");
+   });
+}
+
+
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf) \
+                    S_debug_studydata(aTHX_ where, data, depth, is_inf)
+
+#  define DEBUG_PEEP(str, scan, depth, flags)   \
+                    S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
+
+#else
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf) NOOP
+#  define DEBUG_PEEP(str, scan, depth, flags)         NOOP
+#endif
  
  
  /* =========================================================
@@ -1056,7 +1119,7 @@ PERL_STATIC_INLINE item*
  push(UV key,item* curr)
  {
      item* head;
-    Newxz(head, 1, item);
+    Newx(head, 1, item);
      head->key = key;
      head->value = 0;
      head->next = curr;
@@ -1126,7 +1189,7 @@ S_edit_distance(const UV* src,
      PERL_ARGS_ASSERT_EDIT_DISTANCE;
  
      /* intialize matrix start values */
-    Newxz(scores, ( (x + 2) * (y + 2)), UV);
+    Newx(scores, ( (x + 2) * (y + 2)), UV);
      scores[0] = score_ceil;
      scores[1 * (y + 2) + 0] = score_ceil;
      scores[0 * (y + 2) + 1] = score_ceil;
@@ -1201,7 +1264,7 @@ S_cntrl_to_mnemonic(const U8 c)
  }
  
  /* Mark that we cannot extend a found fixed substring at this point.
-   Update the longest found anchored substring and the longest found
+   Update the longest found anchored substring or the longest found
     floating substrings if needed. */
  
  STATIC void
@@ -1209,42 +1272,38 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
                      SSize_t *minlenp, int is_inf)
  {
      const STRLEN l = CHR_SVLEN(data->last_found);
-    const STRLEN old_l = CHR_SVLEN(*data->longest);
+    SV * const longest_sv = data->substrs[data->cur_is_floating].str;
+    const STRLEN old_l = CHR_SVLEN(longest_sv);
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_SCAN_COMMIT;
  
      if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
-       SvSetMagicSV(*data->longest, data->last_found);
-       if (*data->longest == data->longest_fixed) {
-           data->offset_fixed = l ? data->last_start_min : data->pos_min;
-           if (data->flags & SF_BEFORE_EOL)
-               data->flags
-                   |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
-           else
-               data->flags &= ~SF_FIX_BEFORE_EOL;
-           data->minlen_fixed=minlenp;
-           data->lookbehind_fixed=0;
-       }
-       else { /* *data->longest == data->longest_float */
-           data->offset_float_min = l ? data->last_start_min : data->pos_min;
-           data->offset_float_max = (l
+        const U8 i = data->cur_is_floating;
+       SvSetMagicSV(longest_sv, data->last_found);
+        data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
+
+       if (!i) /* fixed */
+           data->substrs[0].max_offset = data->substrs[0].min_offset;
+       else { /* float */
+           data->substrs[1].max_offset = (l
                            ? data->last_start_max
                            : (data->pos_delta > SSize_t_MAX - data->pos_min
                                          ? SSize_t_MAX
                                          : data->pos_min + data->pos_delta));
             if (is_inf
-                || (STRLEN)data->offset_float_max > (STRLEN)SSize_t_MAX)
-               data->offset_float_max = SSize_t_MAX;
-           if (data->flags & SF_BEFORE_EOL)
-               data->flags
-                   |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
-           else
-               data->flags &= ~SF_FL_BEFORE_EOL;
-            data->minlen_float=minlenp;
-            data->lookbehind_float=0;
-       }
+                || (STRLEN)data->substrs[1].max_offset > (STRLEN)SSize_t_MAX)
+               data->substrs[1].max_offset = SSize_t_MAX;
+        }
+
+        if (data->flags & SF_BEFORE_EOL)
+            data->substrs[i].flags |= (data->flags & SF_BEFORE_EOL);
+        else
+            data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].minlenp = minlenp;
+        data->substrs[i].lookbehind = 0;
      }
+
      SvCUR_set(data->last_found, 0);
      {
         SV * const sv = data->last_found;
@@ -1256,7 +1315,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
      }
      data->last_end = -1;
      data->flags &= ~SF_BEFORE_EOL;
-    DEBUG_STUDYDATA("commit: ",data,0);
+    DEBUG_STUDYDATA("commit", data, 0, is_inf);
  }
  
  /* An SSC is just a regnode_charclass_posix with an extra field: the inversion
@@ -1644,6 +1703,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
              regnode_charclass_posixl temp;
              int add = 1;    /* To calculate the index of the complement */
  
+            Zero(&temp, 1, regnode_charclass_posixl);
              ANYOF_POSIXL_ZERO(&temp);
              for (i = 0; i < ANYOF_MAX; i++) {
                  assert(i % 2 != 0
@@ -2365,7 +2425,7 @@ is the recommended Unicode-aware way of saying
  } STMT_END
  
  #define TRIE_LIST_NEW(state) STMT_START {                       \
-    Newxz( trie->states[ state ].trans.list,               \
+    Newx( trie->states[ state ].trans.list,                     \
         4, reg_trie_trans_le );                                 \
       TRIE_LIST_CUR( state ) = 1;                                \
       TRIE_LIST_LEN( state ) = 4;                                \
@@ -2463,8 +2523,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
      /* we just use folder as a flag in utf8 */
      const U8 * folder = NULL;
  
+    /* in the below add_data call we are storing either 'tu' or 'tuaa'
+     * which stands for one trie structure, one hash, optionally followed
+     * by two arrays */
  #ifdef DEBUGGING
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tuuu"));
+    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tuaa"));
      AV *trie_words = NULL;
      /* along with revcharmap, this only used during construction but both are
       * useful during debugging so we store them in the struct when debugging.
@@ -3557,7 +3620,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
      aho->trie=trie_offset;
      aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
      Copy( trie->states, aho->states, numstates, reg_trie_state );
-    Newxz( q, numstates, U32);
+    Newx( q, numstates, U32);
      aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
      aho->refcount = 1;
      fail = aho->fail;
@@ -3619,17 +3682,6 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  }
  
  
-#define DEBUG_PEEP(str,scan,depth)         \
-    DEBUG_OPTIMISE_r({if (scan){           \
-       regnode *Next = regnext(scan);      \
-       regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);\
-       Perl_re_indentf( aTHX_  "" str ">%3d: %s (%d)", \
-           depth, REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),\
-           Next ? (REG_NODE_NUM(Next)) : 0 );\
-       DEBUG_SHOW_STUDY_FLAGS(flags," [ ","]");\
-       Perl_re_printf( aTHX_  "\n");                   \
-   }});
-
  /* The below joins as many adjacent EXACTish nodes as possible into a single
   * one.  The regop may be changed if the node(s) contain certain sequences that
   * require special handling.  The joining is only done if:
@@ -3654,14 +3706,16 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   * input nodes.
   *
   * And *unfolded_multi_char is set to indicate whether or not the node contains
- * an unfolded multi-char fold.  This happens when whether the fold is valid or
- * not won't be known until runtime; namely for EXACTF nodes that contain LATIN
- * SMALL LETTER SHARP S, as only if the target string being matched against
- * turns out to be UTF-8 is that fold valid; and also for EXACTFL nodes whose
- * folding rules depend on the locale in force at runtime.  (Multi-char folds
- * whose components are all above the Latin1 range are not run-time locale
- * dependent, and have already been folded by the time this function is
- * called.)
+ * an unfolded multi-char fold.  This happens when it won't be known until
+ * runtime whether the fold is valid or not; namely
+ *  1) for EXACTF nodes that contain LATIN SMALL LETTER SHARP S, as only if the
+ *      target string being matched against turns out to be UTF-8 is that fold
+ *      valid; or
+ *  2) for EXACTFL nodes whose folding rules depend on the locale in force at
+ *      runtime.
+ * (Multi-char folds whose components are all above the Latin1 range are not
+ * run-time locale dependent, and have already been folded by the time this
+ * function is called.)
   *
   * This is as good a place as any to discuss the design of handling these
   * multi-character fold sequences.  It's been wrong in Perl for a very long
@@ -3784,7 +3838,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
      PERL_UNUSED_ARG(flags);
      PERL_UNUSED_ARG(val);
  #endif
-    DEBUG_PEEP("join",scan,depth);
+    DEBUG_PEEP("join", scan, depth, 0);
  
      /* Look through the subsequent nodes in the chain.  Skip NOTHING, merge
       * EXACT ones that are mergeable to the current one. */
@@ -3798,7 +3852,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
          if (OP(n) == TAIL || n > next)
              stringok = 0;
          if (PL_regkind[OP(n)] == NOTHING) {
-            DEBUG_PEEP("skip:",n,depth);
+            DEBUG_PEEP("skip:", n, depth, 0);
              NEXT_OFF(scan) += NEXT_OFF(n);
              next = n + NODE_STEP_REGNODE;
  #ifdef DEBUGGING
@@ -3818,7 +3872,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
              if (oldl + STR_LEN(n) > U8_MAX)
                  break;
  
-            DEBUG_PEEP("merg",n,depth);
+            DEBUG_PEEP("merg", n, depth, 0);
              merged++;
  
              NEXT_OFF(scan) += NEXT_OFF(n);
@@ -3835,7 +3889,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
  
  #ifdef EXPERIMENTAL_INPLACESCAN
         if (flags && !NEXT_OFF(n)) {
-           DEBUG_PEEP("atch", val, depth);
+           DEBUG_PEEP("atch", val, depth, 0);
             if (reg_off_by_arg[OP(n)]) {
                 ARG_SET(n, val - n);
             }
@@ -4066,7 +4120,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
          n++;
      }
  #endif
-    DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl",scan,depth)});
+    DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl", scan, depth, 0);});
      return stopnow;
  }
  
@@ -4093,7 +4147,7 @@ S_unwind_scan_frames(pTHX_ const void *p)
      } while (f);
  }
  
-
+/* the return from this sub is the minimum length that could possibly match */
  STATIC SSize_t
  S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          SSize_t *minlenp, SSize_t *deltap,
@@ -4129,6 +4183,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      PERL_ARGS_ASSERT_STUDY_CHUNK;
      RExC_study_started= 1;
  
+    Zero(&data_fake, 1, scan_data_t);
  
      if ( depth == 0 ) {
          while (first_non_open && OP(first_non_open) == OPEN)
@@ -4180,8 +4235,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                     the folded version may be shorter) */
         bool unfolded_multi_char = FALSE;
         /* Peephole optimizer: */
-        DEBUG_STUDYDATA("Peep:", data, depth);
-        DEBUG_PEEP("Peep", scan, depth);
+        DEBUG_STUDYDATA("Peep", data, depth, is_inf);
+        DEBUG_PEEP("Peep", scan, depth, flags);
  
  
          /* The reason we do this here is that we need to deal with things like
@@ -4225,17 +4280,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
              StructCopy(&zero_scan_data, &data_fake, scan_data_t);
              scan = regnext(scan);
              assert( OP(scan) == IFTHEN );
-            DEBUG_PEEP("expect IFTHEN", scan, depth);
+            DEBUG_PEEP("expect IFTHEN", scan, depth, flags);
  
              data_fake.last_closep= &fake_last_close;
              minlen = *minlenp;
              next = regnext(scan);
              scan = NEXTOPER(NEXTOPER(scan));
-            DEBUG_PEEP("scan", scan, depth);
-            DEBUG_PEEP("next", next, depth);
+            DEBUG_PEEP("scan", scan, depth, flags);
+            DEBUG_PEEP("next", next, depth, flags);
  
              /* we suppose the run is continuous, last=next...
               * NOTE we dont use the return here! */
+            /* DEFINEP study_chunk() recursion */
              (void)study_chunk(pRExC_state, &scan, &minlen,
                                &deltanext, next, &data_fake, stopparen,
                                recursed_depth, NULL, f, depth+1);
@@ -4276,7 +4332,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     I32 f = 0;
                     regnode_ssc this_class;
  
-                    DEBUG_PEEP("Branch", scan, depth);
+                    DEBUG_PEEP("Branch", scan, depth, flags);
  
                     num++;
                      StructCopy(&zero_scan_data, &data_fake, scan_data_t);
@@ -4303,6 +4359,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         f |= SCF_WHILEM_VISITED_POS;
  
                     /* we suppose the run is continuous, last=next...*/
+                    /* recurse study_chunk() for each BRANCH in an alternation */
                     minnext = study_chunk(pRExC_state, &scan, minlenp,
                                        &deltanext, next, &data_fake, stopparen,
                                        recursed_depth, NULL, f,depth+1);
@@ -4341,7 +4398,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     else
                         data->pos_delta += max1 - min1;
                     if (max1 != min1 || is_inf)
-                       data->longest = &(data->longest_float);
+                       data->cur_is_floating = 1;
                 }
                 min += min1;
                 if (delta == SSize_t_MAX
@@ -4779,16 +4836,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                               RExC_study_chunk_recursed_bytes, U8);
                      }
                      /* we havent recursed into this paren yet, so recurse into it */
-                    DEBUG_STUDYDATA("gosub-set:", data,depth);
+                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
                      PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
                      my_recursed_depth= recursed_depth + 1;
                  } else {
-                    DEBUG_STUDYDATA("gosub-inf:", data,depth);
+                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
                      /* some form of infinite recursion, assume infinite length
                       * */
                      if (flags & SCF_DO_SUBSTR) {
                          scan_commit(pRExC_state, data, minlenp, is_inf);
-                        data->longest = &(data->longest_float);
+                        data->cur_is_floating = 1;
                      }
                      is_inf = is_inf_internal = 1;
                      if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
@@ -4826,8 +4883,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  newframe->prev_recursed_depth = recursed_depth;
                  newframe->this_prev_frame= frame;
  
-                DEBUG_STUDYDATA("frame-new:",data,depth);
-                DEBUG_PEEP("fnew", scan, depth);
+                DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
+                DEBUG_PEEP("fnew", scan, depth, flags);
  
                 frame = newframe;
                 scan =  start;
@@ -4842,6 +4899,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
         else if (OP(scan) == EXACT || OP(scan) == EXACTL) {
             SSize_t l = STR_LEN(scan);
             UV uc;
+            assert(l);
             if (UTF) {
                 const U8 * const s = (U8*)STRING(scan);
                 uc = utf8_to_uvchr_buf(s, s + l, NULL);
@@ -4916,7 +4974,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  }
                  data->pos_delta += min_subtract;
                 if (min_subtract) {
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                 }
             }
  
@@ -4983,7 +5041,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 if (flags & SCF_DO_SUBSTR) {
                      scan_commit(pRExC_state, data, minlenp, is_inf);
                      /* Cannot extend fixed substrings */
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                 }
                  is_inf = is_inf_internal = 1;
                  scan = regnext(scan);
@@ -5038,6 +5096,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     f &= ~SCF_WHILEM_VISITED_POS;
  
                 /* This will finish on WHILEM, setting scan, or on NULL: */
+                /* recurse study_chunk() on loop bodies */
                 minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
                                    last, data, stopparen, recursed_depth, NULL,
                                    (mincount == 0
@@ -5073,7 +5132,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 }
                 if (!scan)              /* It was not CURLYX, but CURLY. */
                     scan = next;
-               if (!(flags & SCF_TRIE_DOING_RESTUDY)
+               if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
                     /* ? quantifier ok, except for (?{ ... }) */
                     && (next_is_eval || !(mincount == 0 && maxcount == 1))
                     && (minnext == 0) && (deltanext == 0)
@@ -5200,6 +5259,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         }
  #endif
                         /* Optimize again: */
+                        /* recurse study_chunk() on optimised CURLYX => CURLYM */
                         study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                      NULL, stopparen, recursed_depth, NULL, 0,depth+1);
                     }
@@ -5321,7 +5381,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                 ? SSize_t_MAX
                                 : data->pos_min + data->pos_delta - last_chrs;
                         }
-                       data->longest = &(data->longest_float);
+                       data->cur_is_floating = 1; /* float */
                     }
                     SvREFCNT_dec(last_str);
                 }
@@ -5345,7 +5405,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 if (flags & SCF_DO_SUBSTR) {
                      /* Cannot expect anything... */
                      scan_commit(pRExC_state, data, minlenp, is_inf);
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                 }
                 is_inf = is_inf_internal = 1;
                 if (flags & SCF_DO_STCLASS_OR) {
@@ -5392,7 +5452,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  scan_commit(pRExC_state, data, minlenp, is_inf);
                 data->pos_min += 1;
                 data->pos_delta += 1;
-               data->longest = &(data->longest_float);
+               data->cur_is_floating = 1; /* float */
             }
         }
         else if (REGNODE_SIMPLE(OP(scan))) {
@@ -5458,6 +5518,27 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                            (regnode_charclass *) scan);
                     break;
  
+                case ANYOFM:
+                  {
+                    SV* cp_list = get_ANYOFM_contents(scan);
+
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_union(data->start_class,
+                                  cp_list,
+                                  FALSE /* don't invert */
+                                  );
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class,
+                                         cp_list,
+                                         FALSE /* don't invert */
+                                         );
+                    }
+
+                    SvREFCNT_dec_NN(cp_list);
+                    break;
+                  }
+
                 case NPOSIXL:
                      invert = 1;
                      /* FALLTHROUGH */
@@ -5498,20 +5579,25 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      }
                      break;
  
+                case NASCII:
+                    invert = 1;
+                    /* FALLTHROUGH */
+               case ASCII:
+                    my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
+
+                    /* This can be handled as a Posix class */
+                    goto join_posix_and_ascii;
+
                  case NPOSIXA:   /* For these, we always know the exact set of
                                     what's matched */
                      invert = 1;
                      /* FALLTHROUGH */
                 case POSIXA:
-                    if (FLAGS(scan) == _CC_ASCII) {
-                        my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
-                    }
-                    else {
-                        _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
-                                              PL_XPosix_ptrs[_CC_ASCII],
-                                              &my_invlist);
-                    }
-                    goto join_posix;
+                    assert(FLAGS(scan) != _CC_ASCII);
+                    _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
+                                          PL_XPosix_ptrs[_CC_ASCII],
+                                          &my_invlist);
+                    goto join_posix_and_ascii;
  
                 case NPOSIXD:
                 case NPOSIXU:
@@ -5531,7 +5617,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                            &my_invlist);
                      }
  
-                  join_posix:
+                  join_posix_and_ascii:
  
                      if (flags & SCF_DO_STCLASS_AND) {
                          ssc_intersection(data->start_class, my_invlist, invert);
@@ -5590,6 +5676,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      f |= SCF_WHILEM_VISITED_POS;
                  next = regnext(scan);
                  nscan = NEXTOPER(NEXTOPER(scan));
+
+                /* recurse study_chunk() for lookahead body */
                  minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                        last, &data_fake, stopparen,
                                        recursed_depth, NULL, f, depth+1);
@@ -5664,6 +5752,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  else
                      data_fake.last_closep = &fake;
                  data_fake.flags = 0;
+                data_fake.substrs[0].flags = 0;
+                data_fake.substrs[1].flags = 0;
                 data_fake.pos_delta = delta;
                  if (is_inf)
                     data_fake.flags |= SF_IS_INF;
@@ -5678,6 +5768,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  next = regnext(scan);
                  nscan = NEXTOPER(NEXTOPER(scan));
  
+                /* positive lookahead study_chunk() recursion */
                  *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                          &deltanext, last, &data_fake,
                                          stopparen, recursed_depth, NULL,
@@ -5706,29 +5797,29 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                          data->flags |= SF_HAS_EVAL;
                      data->whilem_c = data_fake.whilem_c;
                      if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
+                        int i;
                          if (RExC_rx->minlen<*minnextp)
                              RExC_rx->minlen=*minnextp;
                          scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
                          SvREFCNT_dec_NN(data_fake.last_found);
  
-                        if ( data_fake.minlen_fixed != minlenp )
-                        {
-                            data->offset_fixed= data_fake.offset_fixed;
-                            data->minlen_fixed= data_fake.minlen_fixed;
-                            data->lookbehind_fixed+= scan->flags;
-                        }
-                        if ( data_fake.minlen_float != minlenp )
-                        {
-                            data->minlen_float= data_fake.minlen_float;
-                            data->offset_float_min=data_fake.offset_float_min;
-                            data->offset_float_max=data_fake.offset_float_max;
-                            data->lookbehind_float+= scan->flags;
+                        for (i = 0; i < 2; i++) {
+                            if (data_fake.substrs[i].minlenp != minlenp) {
+                                data->substrs[i].min_offset =
+                                            data_fake.substrs[i].min_offset;
+                                data->substrs[i].max_offset =
+                                            data_fake.substrs[i].max_offset;
+                                data->substrs[i].minlenp =
+                                            data_fake.substrs[i].minlenp;
+                                data->substrs[i].lookbehind += scan->flags;
+                            }
                          }
                      }
                  }
             }
  #endif
         }
+
         else if (OP(scan) == OPEN) {
             if (stopparen != (I32)ARG(scan))
                 pars++;
@@ -5765,7 +5856,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
         {
                 if (flags & SCF_DO_SUBSTR) {
                      scan_commit(pRExC_state, data, minlenp, is_inf);
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                 }
                 is_inf = is_inf_internal = 1;
                 if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
@@ -5839,6 +5930,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                          /* We go from the jump point to the branch that follows
                             it. Note this means we need the vestigal unused
                             branches even though they arent otherwise used. */
+                        /* optimise study_chunk() for TRIE */
                          minnext = study_chunk(pRExC_state, &scan, minlenp,
                              &deltanext, (regnode *)nextbranch, &data_fake,
                              stopparen, recursed_depth, NULL, f,depth+1);
@@ -5876,11 +5968,15 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  data->pos_min += min1;
                  data->pos_delta += max1 - min1;
                  if (max1 != min1 || is_inf)
-                    data->longest = &(data->longest_float);
+                    data->cur_is_floating = 1; /* float */
              }
              min += min1;
-            if (delta != SSize_t_MAX)
-                delta += max1 - min1;
+            if (delta != SSize_t_MAX) {
+                if (SSize_t_MAX - (max1 - min1) >= delta)
+                    delta += max1 - min1;
+                else
+                    delta = SSize_t_MAX;
+            }
              if (flags & SCF_DO_STCLASS_OR) {
                  ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
                  if (min1) {
@@ -5920,7 +6016,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 data->pos_min += trie->minlen;
                 data->pos_delta += (trie->maxlen - trie->minlen);
                 if (trie->maxlen != trie->minlen)
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
             }
             if (trie->jump) /* no more substrings -- for now /grr*/
                 flags &= ~SCF_DO_SUBSTR;
@@ -5937,8 +6033,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
          /* we need to unwind recursion. */
          depth = depth - 1;
  
-        DEBUG_STUDYDATA("frame-end:",data,depth);
-        DEBUG_PEEP("fend", scan, depth);
+        DEBUG_STUDYDATA("frame-end", data, depth, is_inf);
+        DEBUG_PEEP("fend", scan, depth, flags);
  
          /* restore previous context */
          last = frame->last_regnode;
@@ -5952,7 +6048,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
      }
  
      assert(!frame);
-    DEBUG_STUDYDATA("pre-fin:",data,depth);
+    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
  
      *scanp = scan;
      *deltap = is_inf_internal ? SSize_t_MAX : delta;
@@ -5974,7 +6070,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
      if (flags & SCF_TRIE_RESTUDY)
          data->flags |=         SCF_TRIE_RESTUDY;
  
-    DEBUG_STUDYDATA("post-fin:",data,depth);
+    DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
  
      {
          SSize_t final_minlen= min < stopmin ? min : stopmin;
@@ -6692,10 +6788,10 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
  
  
  STATIC bool
-S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest,
-                      SV** rx_utf8, SV** rx_substr, SSize_t* rx_end_shift,
-                     SSize_t lookbehind, SSize_t offset, SSize_t *minlen,
-                      STRLEN longest_length, bool eol, bool meol)
+S_setup_longest(pTHX_ RExC_state_t *pRExC_state,
+                      struct reg_substr_datum  *rsd,
+                      struct scan_data_substrs *sub,
+                      STRLEN longest_length)
  {
      /* This is the common code for setting up the floating and fixed length
       * string data extracted from Perl_re_op_compile() below.  Returns a boolean
@@ -6703,6 +6799,8 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest,
  
      I32 t;
      SSize_t ml;
+    bool eol  = cBOOL(sub->flags & SF_BEFORE_EOL);
+    bool meol = cBOOL(sub->flags & SF_BEFORE_MEOL);
  
      if (! (longest_length
             || (eol /* Can't have SEOL and MULTI */
@@ -6716,29 +6814,29 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest,
  
      /* copy the information about the longest from the reg_scan_data
          over to the program. */
-    if (SvUTF8(sv_longest)) {
-        *rx_utf8 = sv_longest;
-        *rx_substr = NULL;
+    if (SvUTF8(sub->str)) {
+        rsd->substr      = NULL;
+        rsd->utf8_substr = sub->str;
      } else {
-        *rx_substr = sv_longest;
-        *rx_utf8 = NULL;
+        rsd->substr      = sub->str;
+        rsd->utf8_substr = NULL;
      }
      /* end_shift is how many chars that must be matched that
          follow this item. We calculate it ahead of time as once the
          lookbehind offset is added in we lose the ability to correctly
          calculate it.*/
-    ml = minlen ? *(minlen) : (SSize_t)longest_length;
-    *rx_end_shift = ml - offset
+    ml = sub->minlenp ? *(sub->minlenp) : (SSize_t)longest_length;
+    rsd->end_shift = ml - sub->min_offset
          - longest_length
              /* XXX SvTAIL is always false here - did you mean FBMcf_TAIL
               * intead? - DAPM
-            + (SvTAIL(sv_longest) != 0)
+            + (SvTAIL(sub->str) != 0)
              */
-        + lookbehind;
+        + sub->lookbehind;
  
      t = (eol/* Can't have SEOL and MULTI */
           && (! meol || (RExC_flags & RXf_PMf_MULTILINE)));
-    fbm_compile(sv_longest, t ? FBMcf_TAIL : 0);
+    fbm_compile(sub->str, t ? FBMcf_TAIL : 0);
  
      return TRUE;
  }
@@ -6852,7 +6950,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (   ! dump_len_string
              || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
          {
-            PL_dump_re_max_len = 0;
+            PL_dump_re_max_len = 60;    /* A reasonable default */
          }
  #endif
      }
@@ -6969,6 +7067,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_seen_unfolded_sharp_s = 0;
      RExC_contains_locale = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
+    RExC_in_script_run = 0;
      RExC_study_started = 0;
      pRExC_state->runtime_code_qr = NULL;
      RExC_frame_head= NULL;
@@ -6981,7 +7080,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      });
      DEBUG_COMPILE_r({
              SV *dsv= sv_newmortal();
-            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, 60);
+            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, PL_dump_re_max_len);
              Perl_re_printf( aTHX_  "%sCompiling REx%s %s\n",
                            PL_colors[4],PL_colors[5],s);
          });
@@ -7208,8 +7307,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          /* make sure PL_bitcount bounds not exceeded */
          assert(sizeof(STD_PAT_MODS) <= 8);
  
-        Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
-       r->xpv_len_u.xpvlenu_pv = p;
+        p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
+        SvPOK_on(rx);
         if (RExC_utf8)
             SvFLAGS(rx) |= SVf_UTF8;
          *p++='('; *p++='?';
@@ -7318,7 +7417,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         3-units-long substrs field. */
      Newx(r->substrs, 1, struct reg_substr_data);
      if (RExC_recurse_count) {
-        Newxz(RExC_recurse,RExC_recurse_count,regnode *);
+        Newx(RExC_recurse,RExC_recurse_count,regnode *);
          SAVEFREEPV(RExC_recurse);
      }
  
@@ -7369,12 +7468,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /*  Only one top-level choice.
                                                    */
         SSize_t fake;
-       STRLEN longest_float_length, longest_fixed_length;
+       STRLEN longest_length[2];
         regnode_ssc ch_class; /* pointed to by data */
         int stclass_flag;
         SSize_t last_close = 0; /* pointed to by data */
          regnode *first= scan;
          regnode *first_next= regnext(first);
+        int i;
+
         /*
          * Skip introductions and multiplicators >= 1
          * so that we can extract the 'meat' of the pattern that must
@@ -7416,7 +7517,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
         /* Starting-point info. */
        again:
-        DEBUG_PEEP("first:",first,0);
+        DEBUG_PEEP("first:", first, 0, 0);
          /* Ignore EXACT as we deal with it later. */
         if (PL_regkind[OP(first)] == EXACT) {
             if (OP(first) == EXACT || OP(first) == EXACTL)
@@ -7497,13 +7598,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         * earlier string may buy us something the later one won't.]
         */
  
-       data.longest_fixed = newSVpvs("");
-       data.longest_float = newSVpvs("");
+       data.substrs[0].str = newSVpvs("");
+       data.substrs[1].str = newSVpvs("");
         data.last_found = newSVpvs("");
-       data.longest = &(data.longest_fixed);
+       data.cur_is_floating = 0; /* initially any found substring is fixed */
         ENTER_with_name("study_chunk");
-       SAVEFREESV(data.longest_fixed);
-       SAVEFREESV(data.longest_float);
+       SAVEFREESV(data.substrs[0].str);
+       SAVEFREESV(data.substrs[1].str);
         SAVEFREESV(data.last_found);
         first = scan;
         if (!ri->regstclass) {
@@ -7515,6 +7616,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         data.last_closep = &last_close;
  
          DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/PATTERN/
+         * (NO top level branches)
+         */
         minlen = study_chunk(pRExC_state, &first, &minlen, &fake,
                               scan + RExC_size, /* Up to end */
              &data, -1, 0, NULL,
@@ -7526,7 +7631,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
  
  
-       if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
+       if ( RExC_npar == 1 && !data.cur_is_floating
              && data.last_start_min == 0 && data.last_end > 0
              && !RExC_seen_zerolen
               && !(RExC_seen & REG_VERBARG_SEEN)
@@ -7536,62 +7641,49 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          }
         scan_commit(pRExC_state, &data,&minlen,0);
  
-       longest_float_length = CHR_SVLEN(data.longest_float);
-
-        if (! ((SvCUR(data.longest_fixed)  /* ok to leave SvCUR */
-                   && data.offset_fixed == data.offset_float_min
-                   && SvCUR(data.longest_fixed) == SvCUR(data.longest_float)))
-            && S_setup_longest (aTHX_ pRExC_state,
-                                    data.longest_float,
-                                    &(r->float_utf8),
-                                    &(r->float_substr),
-                                    &(r->float_end_shift),
-                                    data.lookbehind_float,
-                                    data.offset_float_min,
-                                    data.minlen_float,
-                                    longest_float_length,
-                                    cBOOL(data.flags & SF_FL_BEFORE_EOL),
-                                    cBOOL(data.flags & SF_FL_BEFORE_MEOL)))
-        {
-           r->float_min_offset = data.offset_float_min - data.lookbehind_float;
-           r->float_max_offset = data.offset_float_max;
-           if (data.offset_float_max < SSize_t_MAX) /* Don't offset infinity */
-               r->float_max_offset -= data.lookbehind_float;
-           SvREFCNT_inc_simple_void_NN(data.longest_float);
-       }
-       else {
-           r->float_substr = r->float_utf8 = NULL;
-           longest_float_length = 0;
-       }
  
-       longest_fixed_length = CHR_SVLEN(data.longest_fixed);
-
-        if (S_setup_longest (aTHX_ pRExC_state,
-                                data.longest_fixed,
-                                &(r->anchored_utf8),
-                                &(r->anchored_substr),
-                                &(r->anchored_end_shift),
-                                data.lookbehind_fixed,
-                                data.offset_fixed,
-                                data.minlen_fixed,
-                                longest_fixed_length,
-                                cBOOL(data.flags & SF_FIX_BEFORE_EOL),
-                                cBOOL(data.flags & SF_FIX_BEFORE_MEOL)))
-        {
-           r->anchored_offset = data.offset_fixed - data.lookbehind_fixed;
-           SvREFCNT_inc_simple_void_NN(data.longest_fixed);
-       }
-       else {
-           r->anchored_substr = r->anchored_utf8 = NULL;
-           longest_fixed_length = 0;
-       }
+        /* XXX this is done in reverse order because that's the way the
+         * code was before it was parameterised. Don't know whether it
+         * actually needs doing in reverse order. DAPM */
+        for (i = 1; i >= 0; i--) {
+            longest_length[i] = CHR_SVLEN(data.substrs[i].str);
+
+            if (   !(   i
+                     && SvCUR(data.substrs[0].str)  /* ok to leave SvCUR */
+                     &&    data.substrs[0].min_offset
+                        == data.substrs[1].min_offset
+                     &&    SvCUR(data.substrs[0].str)
+                        == SvCUR(data.substrs[1].str)
+                    )
+                && S_setup_longest (aTHX_ pRExC_state,
+                                        &(r->substrs->data[i]),
+                                        &(data.substrs[i]),
+                                        longest_length[i]))
+            {
+                r->substrs->data[i].min_offset =
+                        data.substrs[i].min_offset - data.substrs[i].lookbehind;
+
+                r->substrs->data[i].max_offset = data.substrs[i].max_offset;
+                /* Don't offset infinity */
+                if (data.substrs[i].max_offset < SSize_t_MAX)
+                    r->substrs->data[i].max_offset -= data.substrs[i].lookbehind;
+                SvREFCNT_inc_simple_void_NN(data.substrs[i].str);
+            }
+            else {
+                r->substrs->data[i].substr      = NULL;
+                r->substrs->data[i].utf8_substr = NULL;
+                longest_length[i] = 0;
+            }
+        }
+
         LEAVE_with_name("study_chunk");
  
         if (ri->regstclass
             && (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
             ri->regstclass = NULL;
  
-       if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
+       if ((!(r->substrs->data[0].substr || r->substrs->data[0].utf8_substr)
+              || r->substrs->data[0].min_offset)
             && stclass_flag
              && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && is_ssc_worth_it(pRExC_state, data.start_class))
@@ -7614,37 +7706,29 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              data.start_class = NULL;
         }
  
-        /* A temporary algorithm prefers floated substr to fixed one to dig
-         * more info. */
-       if (longest_fixed_length > longest_float_length) {
-           r->substrs->check_ix = 0;
-           r->check_end_shift = r->anchored_end_shift;
-           r->check_substr = r->anchored_substr;
-           r->check_utf8 = r->anchored_utf8;
-           r->check_offset_min = r->check_offset_max = r->anchored_offset;
-            if (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS))
-                r->intflags |= PREGf_NOSCAN;
-       }
-       else {
-           r->substrs->check_ix = 1;
-           r->check_end_shift = r->float_end_shift;
-           r->check_substr = r->float_substr;
-           r->check_utf8 = r->float_utf8;
-           r->check_offset_min = r->float_min_offset;
-           r->check_offset_max = r->float_max_offset;
-       }
+        /* A temporary algorithm prefers floated substr to fixed one of
+         * same length to dig more info. */
+       i = (longest_length[0] <= longest_length[1]);
+        r->substrs->check_ix = i;
+        r->check_end_shift  = r->substrs->data[i].end_shift;
+        r->check_substr     = r->substrs->data[i].substr;
+        r->check_utf8       = r->substrs->data[i].utf8_substr;
+        r->check_offset_min = r->substrs->data[i].min_offset;
+        r->check_offset_max = r->substrs->data[i].max_offset;
+        if (!i && (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS)))
+            r->intflags |= PREGf_NOSCAN;
+
         if ((r->check_substr || r->check_utf8) ) {
             r->extflags |= RXf_USE_INTUIT;
             if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
                 r->extflags |= RXf_INTUIT_TAIL;
         }
-        r->substrs->data[0].max_offset = r->substrs->data[0].min_offset;
  
         /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
-       if ( (STRLEN)minlen < longest_float_length )
-            minlen= longest_float_length;
-        if ( (STRLEN)minlen < longest_fixed_length )
-            minlen= longest_fixed_length;
+       if ( (STRLEN)minlen < longest_length[1] )
+            minlen= longest_length[1];
+        if ( (STRLEN)minlen < longest_length[0] )
+            minlen= longest_length[0];
          */
      }
      else {
@@ -7661,6 +7745,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         data.last_closep = &last_close;
  
          DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../
+         * (patterns WITH top level branches)
+         */
         minlen = study_chunk(pRExC_state,
              &scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL,
              SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
@@ -7670,8 +7758,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
          CHECK_RESTUDY_GOTO_butfirst(NOOP);
  
-       r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
-               = r->float_substr = r->float_utf8 = NULL;
+       r->check_substr = NULL;
+        r->check_utf8 = NULL;
+        r->substrs->data[0].substr      = NULL;
+        r->substrs->data[0].utf8_substr = NULL;
+        r->substrs->data[1].substr      = NULL;
+        r->substrs->data[1].utf8_substr = NULL;
  
          if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && is_ssc_worth_it(pRExC_state, data.start_class))
@@ -7715,7 +7807,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      if (RExC_seen & REG_RECURSE_SEEN ) {
          r->intflags |= PREGf_RECURSE_SEEN;
-        Newxz(r->recurse_locinput, r->nparens + 1, char *);
+        Newx(r->recurse_locinput, r->nparens + 1, char *);
      }
      if (RExC_seen & REG_GPOS_SEEN)
          r->intflags |= PREGf_GPOS_SEEN;
@@ -10610,13 +10702,28 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
           * here (if paren ==2).  The forms '(*VERB' and '(?...' disallow such
           * intervening space, as the sequence is a token, and a token should be
           * indivisible */
-        bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
+        bool has_intervening_patws = (paren == 2 || paren == 's')
+                                  && *(RExC_parse - 1) != '(';
  
          if (RExC_parse >= RExC_end) {
             vFAIL("Unmatched (");
          }
  
-        if ( *RExC_parse == '*') { /* (*VERB:ARG) */
+        if (paren == 's') {
+
+            /* A nested script run  is a no-op besides clustering */
+            if (RExC_in_script_run) {
+                paren = ':';
+                nextchar(pRExC_state);
+                ret = NULL;
+                goto parse_rest;
+            }
+            RExC_in_script_run = 1;
+
+           ret = reg_node(pRExC_state, SROPEN);
+            is_open = 1;
+        }
+        else if ( *RExC_parse == '*') { /* (*VERB:ARG) */
             char *start_verb = RExC_parse + 1;
             STRLEN verb_len;
             char *start_arg = NULL;
@@ -10640,6 +10747,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  if (RExC_parse >= RExC_end) {
                      goto unterminated_verb_pattern;
                  }
+
                 RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                 while ( RExC_parse < RExC_end && *RExC_parse != ')' )
                      RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
@@ -10727,6 +10835,45 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             nextchar(pRExC_state);
             return ret;
          }
+        else if (*RExC_parse == '+') { /* (+...) */
+            RExC_parse++;
+
+            if (has_intervening_patws) {
+                /* XXX Note that a potential gotcha is that outside of /x '( +
+                 * ...)' means to match a space at least once ...   This is a
+                 * problem elsewhere too */
+                vFAIL("In '(+...)', the '(' and '+' must be adjacent");
+            }
+
+            if (! memBEGINPs(RExC_parse, (STRLEN) (RExC_end - RExC_parse),
+                             "script_run:"))
+            {
+                RExC_parse += strcspn(RExC_parse, ":)");
+                vFAIL("Unknown (+ pattern");
+            }
+            else {
+
+                /* This indicates Unicode rules. */
+                REQUIRE_UNI_RULES(flagp, NULL);
+
+                RExC_parse += sizeof("script_run:") - 1;
+
+                if (PASS2) {
+                    Perl_ck_warner_d(aTHX_
+                        packWARN(WARN_EXPERIMENTAL__SCRIPT_RUN),
+                        "The script_run feature is experimental"
+                        REPORT_LOCATION, REPORT_LOCATION_ARGS(RExC_parse));
+                }
+
+                ret = reg(pRExC_state, 's', &flags, depth+1);
+                if (flags & (RESTART_PASS1|NEED_UTF8)) {
+                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
+                    return NULL;
+                }
+
+                return ret;
+            }
+        }
          else if (*RExC_parse == '?') { /* (?...) */
             bool is_logical = 0;
             const char * const seqstart = RExC_parse;
@@ -11138,8 +11285,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      ret = reganode(pRExC_state,NGROUPP,num);
                      goto insert_if_check_paren;
                 }
-               else if (RExC_end - RExC_parse >= DEFINE_len
-                        && strnEQ(RExC_parse, "DEFINE", DEFINE_len))
+               else if (memBEGINs(RExC_parse,
+                                   (STRLEN) (RExC_end - RExC_parse),
+                                   "DEFINE"))
                  {
                     ret = reganode(pRExC_state,DEFINEP,0);
                     RExC_parse += DEFINE_len;
@@ -11274,7 +11422,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  vFAIL("Unknown switch condition (?(...))");
             }
             case '[':           /* (?[ ... ]) */
-                return handle_regex_sets(pRExC_state, NULL, flagp, depth,
+                return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
                                           oregcomp_parse);
              case 0: /* A NUL */
                 RExC_parse--; /* for vFAIL to print correctly */
@@ -11296,7 +11444,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  goto parse_rest;
              } /* end switch */
         }
-       else if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
+       else {
+            if (*RExC_parse == '{' && PASS2) {
+                ckWARNregdep(RExC_parse + 1,
+                            "Unescaped left brace in regex is "
+                            "deprecated here (and will be fatal "
+                            "in Perl 5.32), passed through");
+            }
+            /* Not bothering to indent here, as the above 'else' is temporary
+             * */
+        if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
           capturing_parens:
             parno = RExC_npar;
             RExC_npar++;
@@ -11322,6 +11479,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              paren = ':';
             ret = NULL;
         }
+       }
      }
      else                        /* ! paren */
         ret = NULL;
@@ -11411,6 +11569,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              Set_Node_Offset(ender,RExC_parse+1); /* MJD */
              Set_Node_Length(ender,1); /* MJD */
             break;
+       case 's':
+           ender = reg_node(pRExC_state, SRCLOSE);
+            RExC_in_script_run = 0;
+           break;
         case '<':
         case ',':
         case '=':
@@ -11743,14 +11905,12 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             if ((flags&SIMPLE)) {
                  if (min == 0 && max == REG_INFTY) {
                      reginsert(pRExC_state, STAR, ret, depth+1);
-                    ret->flags = 0;
                      MARK_NAUGHTY(4);
                      RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                      goto nest_check;
                  }
                  if (min == 1 && max == REG_INFTY) {
                      reginsert(pRExC_state, PLUS, ret, depth+1);
-                    ret->flags = 0;
                      MARK_NAUGHTY(3);
                      RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                      goto nest_check;
@@ -11863,7 +12023,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          ender = reg_node(pRExC_state, SUCCEED);
          REGTAIL(pRExC_state, ret, ender);
          reginsert(pRExC_state, SUSPEND, ret, depth+1);
-        ret->flags = 0;
          ender = reg_node(pRExC_state, TAIL);
          REGTAIL(pRExC_state, ret, ender);
      }
@@ -12018,13 +12177,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      RExC_parse++;      /* Skip past the '{' */
  
-    if (! (endbrace = strchr(RExC_parse, '}'))) { /* no trailing brace */
+    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
+    if (! endbrace) { /* no trailing brace */
          vFAIL2("Missing right brace on \\%c{}", 'N');
      }
-    else if(!(endbrace == RExC_parse           /* nothing between the {} */
-              || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
-                  && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
-                                                       error msg) */
+    else if (!(   endbrace == RExC_parse       /* nothing between the {} */
+               || memBEGINs(RExC_parse,   /* U+ (bad hex is checked below
+                                                   for a  better error msg) */
+                                  (STRLEN) (RExC_end - RExC_parse),
+                                 "U+")))
      {
         RExC_parse = endbrace;  /* position msg's '<--HERE' */
         vFAIL("\\N{NAME} must be resolved by the lexer");
@@ -12072,22 +12233,22 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          /* Convert code point from hex */
         length_of_hex = (STRLEN)(endchar - RExC_parse);
         grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
-                           | PERL_SCAN_DISALLOW_PREFIX
+                       | PERL_SCAN_DISALLOW_PREFIX
  
-                             /* No errors in the first pass (See [perl
-                              * #122671].)  We let the code below find the
-                              * errors when there are multiple chars. */
-                           | ((SIZE_ONLY)
-                              ? PERL_SCAN_SILENT_ILLDIGIT
-                              : 0);
+                           /* No errors in the first pass (See [perl
+                            * #122671].)  We let the code below find the
+                            * errors when there are multiple chars. */
+                       | ((SIZE_ONLY)
+                          ? PERL_SCAN_SILENT_ILLDIGIT
+                          : 0);
  
          /* This routine is the one place where both single- and double-quotish
           * \N{U+xxxx} are evaluated.  The value is a Unicode code point which
           * must be converted to native. */
         *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
-                                         &length_of_hex,
-                                         &grok_hex_flags,
-                                         NULL));
+                                               &length_of_hex,
+                                               &grok_hex_flags,
+                                               NULL));
  
         /* The tokenizer should have guaranteed validity, but it's possible to
           * bypass it by using single quoting, so check.  Don't do the check
@@ -12128,7 +12289,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          }
  
          /* Fail if caller doesn't want to handle a multi-code-point sequence.
-         * But don't backup up the pointer if the caller want to know how many
+         * But don't backup up the pointer if the caller wants to know how many
           * code points there are (they can then handle things) */
          if (! node_p) {
              if (! cp_count) {
@@ -12159,14 +12320,16 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         }
          sv_catpv(substitute_parse, ")");
  
-        RExC_parse = RExC_start = RExC_adjusted_start = SvPV(substitute_parse,
-                                                             len);
+        len = SvCUR(substitute_parse);
  
         /* Don't allow empty number */
         if (len < (STRLEN) 8) {
              RExC_parse = endbrace;
             vFAIL("Invalid hexadecimal number in \\N{U+...}");
         }
+
+        RExC_parse = RExC_start = RExC_adjusted_start
+                                              = SvPV_nolen(substitute_parse);
         RExC_end = RExC_parse + len;
  
          /* The values are Unicode, and therefore not subject to recoding, but
@@ -12176,17 +12339,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          RExC_recode_x_to_native = 1;
  #endif
  
-        if (node_p) {
-            if (!(*node_p = reg(pRExC_state, 1, &flags, depth+1))) {
-                if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                    return FALSE;
-                }
-                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
-                    (UV) flags);
-            }
-            *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
-        }
+        *node_p = reg(pRExC_state, 1, &flags, depth+1);
  
          /* Restore the saved values */
         RExC_start = RExC_adjusted_start = save_start;
@@ -12195,8 +12348,18 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  #ifdef EBCDIC
          RExC_recode_x_to_native = 0;
  #endif
-
          SvREFCNT_dec_NN(substitute_parse);
+
+        if (! *node_p) {
+            if (flags & (RESTART_PASS1|NEED_UTF8)) {
+                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
+                return FALSE;
+            }
+            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
+                (UV) flags);
+        }
+        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+
          nextchar(pRExC_state);
  
          return TRUE;
@@ -12399,6 +12562,52 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
      }
  }
  
+STATIC bool
+S_new_regcurly(const char *s, const char *e)
+{
+    /* This is a temporary function designed to match the most lenient form of
+     * a {m,n} quantifier we ever envision, with either number omitted, and
+     * spaces anywhere between/before/after them.
+     *
+     * If this function fails, then the string it matches is very unlikely to
+     * ever be considered a valid quantifier, so we can allow the '{' that
+     * begins it to be considered as a literal */
+
+    bool has_min = FALSE;
+    bool has_max = FALSE;
+
+    PERL_ARGS_ASSERT_NEW_REGCURLY;
+
+    if (s >= e || *s++ != '{')
+       return FALSE;
+
+    while (s < e && isSPACE(*s)) {
+        s++;
+    }
+    while (s < e && isDIGIT(*s)) {
+        has_min = TRUE;
+        s++;
+    }
+    while (s < e && isSPACE(*s)) {
+        s++;
+    }
+
+    if (*s == ',') {
+       s++;
+        while (s < e && isSPACE(*s)) {
+            s++;
+        }
+        while (s < e && isDIGIT(*s)) {
+            has_max = TRUE;
+            s++;
+        }
+        while (s < e && isSPACE(*s)) {
+            s++;
+        }
+    }
+
+    return s < e && *s == '}' && (has_min || has_max);
+}
  
  /* Parse backref decimal value, unless it's too big to sensibly be a backref,
   * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
@@ -12685,9 +12894,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              else {
                  STRLEN length;
                  char name = *RExC_parse;
-                char * endbrace;
+                char * endbrace = NULL;
                  RExC_parse += 2;
-                endbrace = strchr(RExC_parse, '}');
+                if (RExC_parse < RExC_end) {
+                    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
+                }
  
                  if (! endbrace) {
                      vFAIL2("Missing right brace on \\%c{}", name);
@@ -12708,8 +12919,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  }*/
                  switch (*RExC_parse) {
                      case 'g':
-                        if (length != 1
-                            && (length != 3 || strnNE(RExC_parse + 1, "cb", 2)))
+                        if (    length != 1
+                            && (memNEs(RExC_parse + 1, length - 1, "cb")))
                          {
                              goto bad_bound_type;
                          }
@@ -12833,6 +13044,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* FALLTHROUGH */
  
            finish_meta_pat:
+            if (   UCHARAT(RExC_parse + 1) == '{'
+                && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
+            {
+                RExC_parse += 2;
+                vFAIL("Unescaped left brace in regex is illegal here");
+            }
             nextchar(pRExC_state);
              Set_Node_Length(ret, 2); /* MJD */
             break;
@@ -13072,8 +13289,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             UV ender = 0;
             char *p;
             char *s;
-#define MAX_NODE_STRING_SIZE 127
-           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE];
+
+/* This allows us to fill a node with just enough spare so that if the final
+ * character folds, its expansion is guaranteed to fit */
+#define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
+           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE+1];
+
             char *s0;
             U8 upper_parse = MAX_NODE_STRING_SIZE;
              U8 node_type = compute_EXACTish(pRExC_state);
@@ -13095,9 +13316,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
              /* If a folding node contains only code points that don't
               * participate in folds, it can be changed into an EXACT node,
-             * which allows the optimizer more things to look for */
+             * which allows the optimizer more things to look for, and is
+             * faster to match */
              bool maybe_exact;
  
+            /* The node_type may change below, but since the size of the node
+             * doesn't change, it works */
             ret = reg_node(pRExC_state, node_type);
  
              /* In pass1, folded, we use a temporary buffer instead of the
@@ -13117,36 +13341,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
               * use a pseudo regnode like 'EXACT_ORIG_FOLD' */
              maybe_exact = FOLD && PASS2;
  
-           /* XXX The node can hold up to 255 bytes, yet this only goes to
-             * 127.  I (khw) do not know why.  Keeping it somewhat less than
-             * 255 allows us to not have to worry about overflow due to
-             * converting to utf8 and fold expansion, but that value is
-             * 255-UTF8_MAXBYTES_CASE.  join_exact() may join adjacent nodes
-             * split up by this limit into a single one using the real max of
-             * 255.  Even at 127, this breaks under rare circumstances.  If
-             * folding, we do not want to split a node at a character that is a
-             * non-final in a multi-char fold, as an input string could just
-             * happen to want to match across the node boundary.  The join
-             * would solve that problem if the join actually happens.  But a
-             * series of more than two nodes in a row each of 127 would cause
-             * the first join to succeed to get to 254, but then there wouldn't
-             * be room for the next one, which could at be one of those split
-             * multi-char folds.  I don't know of any fool-proof solution.  One
-             * could back off to end with only a code point that isn't such a
-             * non-final, but it is possible for there not to be any in the
-             * entire node. */
-
-            assert(   ! UTF     /* Is at the beginning of a character */
+            /* This breaks under rare circumstances.  If folding, we do not
+             * want to split a node at a character that is a non-final in a
+             * multi-char fold, as an input string could just happen to want to
+             * match across the node boundary.  The code at the end of the loop
+             * looks for this, and backs off until it finds not such a
+             * character, but it is possible (though extremely, extremely
+             * unlikely) for all characters in the node to be non-final fold
+             * ones, in which case we just leave the node fully filled, and
+             * hope that it doesn't match the string in just the wrong place */
+
+            assert( ! UTF     /* Is at the beginning of a character */
                     || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                     || UTF8_IS_START(UCHARAT(RExC_parse)));
  
              /* Here, we have a literal character.  Find the maximal string of
               * them in the input that we can fit into a single EXACTish node.
               * We quit at the first non-literal or when the node gets full */
-           for (p = RExC_parse;
-                len < upper_parse && p < RExC_end;
-                len++)
-           {
+           for (p = RExC_parse; len < upper_parse && p < RExC_end; ) {
+
+                /* In most cases each iteration adds one byte to the output.
+                 * The exceptions override this */
+                Size_t added_len = 1;
+
                 oldp = p;
  
                  /* White space has already been ignored */
@@ -13229,6 +13446,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              goto loopdone;
                          }
                          p = RExC_parse;
+                        RExC_parse = parse_start;
                          if (ender > 0xff) {
                              REQUIRE_UTF8(flagp);
                          }
@@ -13259,6 +13477,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             const char* error_msg;
  
                             bool valid = grok_bslash_o(&p,
+                                                       RExC_end,
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
@@ -13285,6 +13504,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             const char* error_msg;
  
                             bool valid = grok_bslash_x(&p,
+                                                       RExC_end,
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
@@ -13382,22 +13602,25 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     } /* End of switch on '\' */
                     break;
                 case '{':
-                   /* Currently we don't care if the lbrace is at the start
-                    * of a construct.  This catches it in the middle of a
-                    * literal string, or when it's the first thing after
-                    * something like "\b" */
-                   if (len || (p > RExC_start && isALPHA_A(*(p -1)))) {
-
-                        /* GNU Autoconf is depended on by a lot of code, and
-                         * can't seem to release a new version that avoids the
-                         * deprecation now made fatal.  (A commit to do so has
-                         * been in its repository since early 2013; only one
-                         * pattern is affected.)  As a work-around, don't
-                         * fatalize this if the pattern being compiled is the
-                         * precise one that trips up Autoconf.  See [perl
-                         * #130497] for more details. */
-                        if (memNEs(RExC_start, RExC_end - RExC_start,
-                                   "\\${[^\\}]*}"))
+                    /* Currently we allow an lbrace at the start of a construct
+                     * without raising a warning.  This is because we think we
+                     * will never want such a brace to be meant to be other
+                     * than taken literally. */
+                   if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
+
+                        /* But, we raise a fatal warning otherwise, as the
+                         * deprecation cycle has come and gone.  Except that it
+                         * turns out that some heavily-relied on upstream
+                         * software, notably GNU Autoconf, have failed to fix
+                         * their uses.  For these, don't make it fatal unless
+                         * we anticipate using the '{' for something else.
+                         * This happens after any alpha, and for a looser {m,n}
+                         * quantifier specification */
+                        if (      RExC_strict
+                            || (  p > parse_start + 1
+                                && isALPHA_A(*(p - 1))
+                                && *(p - 2) == '\\')
+                            || new_regcurly(p, RExC_end))
                          {
                              RExC_parse = p + 1;
                              vFAIL("Unescaped left brace in regex is "
@@ -13430,8 +13653,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     break;
                 } /* End of switch on the literal */
  
-               /* Here, have looked at the literal character and <ender>
-                 * contains its ordinal, <p> points to the character after it.
+               /* Here, have looked at the literal character, and <ender>
+                 * contains its ordinal; <p> points to the character after it.
                   * We need to check if the next non-ignored thing is a
                   * quantifier.  Move <p> to after anything that should be
                   * ignored, which, as a side effect, positions <p> for the next
@@ -13447,10 +13670,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                   * this character again next time through, when it will be the
                   * only thing in its new node */
  
-                if ((next_is_quantifier = (   LIKELY(p < RExC_end)
-                                           && UNLIKELY(ISMULT2(p))))
-                    && LIKELY(len))
-               {
+                next_is_quantifier =    LIKELY(p < RExC_end)
+                                     && UNLIKELY(ISMULT2(p));
+
+                if (next_is_quantifier && LIKELY(len)) {
                      p = oldp;
                      goto loopdone;
                  }
@@ -13463,27 +13686,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * character we are appending, hence we can delay getting
                       * its representation until PASS2. */
                      if (SIZE_ONLY) {
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              const STRLEN unilen = UVCHR_SKIP(ender);
                              s += unilen;
-
-                            /* We have to subtract 1 just below (and again in
-                             * the corresponding PASS2 code) because the loop
-                             * increments <len> each time, as all but this path
-                             * (and one other) through it add a single byte to
-                             * the EXACTish node.  But these paths would change
-                             * len to be the correct final value, so cancel out
-                             * the increment that follows */
-                            len += unilen - 1;
+                            added_len = unilen;
                          }
                          else {
                              s++;
                          }
                      } else { /* PASS2 */
                        not_fold_common:
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              U8 * new_s = uvchr_to_utf8((U8*)s, ender);
-                            len += (char *) new_s - s - 1;
+                            added_len = (char *) new_s - s;
                              s = (char *) new_s;
                          }
                          else {
@@ -13496,6 +13711,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      /* Here are folding under /l, and the code point is
                       * problematic.  First, we know we can't simplify things */
                      maybe_exact = FALSE;
+
+                    /* This code point means we can't simplify things */
                      maybe_exactfu = FALSE;
  
                      /* A problematic code point in this context means that its
@@ -13525,7 +13742,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  #endif
                  )) {
                      /* Here, are folding and are not UTF-8 encoded; therefore
-                     * the character must be in the range 0-255, and is not /l
+                     * the character must be in the range 0-255, and is not /l.
                       * (Not /l because we already handled these under /l in
                       * is_PROBLEMATIC_LOCALE_FOLD_cp) */
                      if (IS_IN_SOME_FOLD_L1(ender)) {
@@ -13552,25 +13769,26 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          }
                      }
  
-                    /* Even when folding, we store just the input character, as
-                     * we have an array that finds its fold quickly */
-                    *(s++) = (char) ender;
+                        /* Even when folding, we store just the input
+                         * character, as we have an array that finds its fold
+                         * quickly */
+                        *(s++) = (char) ender;
                  }
                  else {  /* FOLD, and UTF (or sharp s) */
                      /* Unlike the non-fold case, we do actually have to
-                     * calculate the results here in pass 1.  This is for two
-                     * reasons, the folded length may be longer than the
-                     * unfolded, and we have to calculate how many EXACTish
-                     * nodes it will take; and we may run out of room in a node
-                     * in the middle of a potential multi-char fold, and have
-                     * to back off accordingly.  */
+                     * calculate the fold in pass 1.  This is for two reasons,
+                     * the folded length may be longer than the unfolded, and
+                     * we have to calculate how many EXACTish nodes it will
+                     * take; and we may run out of room in a node in the middle
+                     * of a potential multi-char fold, and have to back off
+                     * accordingly.  */
  
                      UV folded;
                      if (isASCII_uni(ender)) {
                          folded = toFOLD(ender);
                          *(s)++ = (U8) folded;
                      }
-                    else {
+                    else {  /* Not ASCII */
                          STRLEN foldlen;
  
                          folded = _to_uni_fold_flags(
@@ -13581,13 +13799,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                                          ? FOLD_FLAGS_NOMIX_ASCII
                                                          : 0));
                          s += foldlen;
-
-                        /* The loop increments <len> each time, as all but this
-                         * path (and one other) through it add a single byte to
-                         * the EXACTish node.  But this one has changed len to
-                         * be the correct final value, so subtract one to
-                         * cancel out the increment that follows */
-                        len += foldlen - 1;
+                        added_len = foldlen;
                      }
                      /* If this node only contains non-folding code points so
                       * far, see if this new one is also non-folding */
@@ -13608,13 +13820,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      ender = folded;
                 }
  
+                len += added_len;
+
                 if (next_is_quantifier) {
  
                      /* Here, the next input is a quantifier, and to get here,
-                     * the current character is the only one in the node.
-                     * Also, here <len> doesn't include the final byte for this
-                     * character */
-                    len++;
+                     * the current character is the only one in the node. */
                      goto loopdone;
                 }
  
@@ -13672,7 +13883,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      s = (char *) utf8_hop((U8 *) s, -1);
  
                      while (s >= s0) {   /* Search backwards until find
-                                           non-problematic char */
+                                           a non-problematic char */
                          if (UTF8_IS_INVARIANT(*s)) {
  
                              /* There are no ascii characters that participate
@@ -13809,6 +14020,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                    : EXACTFU;
                      }
                  }
+
                  alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
                                             FALSE /* Don't look to see if could
                                                      be turned into an EXACT
@@ -13936,6 +14148,17 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
                                               REPORT_LOCATION_ARGS(p)));     \
          }                                                                   \
      } STMT_END
+#define CLEAR_POSIX_WARNINGS()                                              \
+    STMT_START {                                                            \
+        if (posix_warnings && RExC_warn_text)                               \
+            av_clear(RExC_warn_text);                                       \
+    } STMT_END
+
+#define CLEAR_POSIX_WARNINGS_AND_RETURN(ret)                                \
+    STMT_START {                                                            \
+        CLEAR_POSIX_WARNINGS();                                             \
+        return ret;                                                         \
+    } STMT_END
  
  STATIC int
  S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
@@ -14008,7 +14231,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
       *
       * The syntax for a legal posix class is:
       *
-     * qr/(?xa: \[ : \^? [:lower:]{4,6} : \] )/
+     * qr/(?xa: \[ : \^? [[:lower:]]{4,6} : \] )/
       *
       * What this routine considers syntactically to be an intended posix class
       * is this (the comments indicate some restrictions that the pattern
@@ -14033,7 +14256,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
       *                                      # for it to be considered to be
       *                                      # an intended posix class.
       *          \h*
-     *          [:punct:]?                  # The closing class character,
+     *          [[:punct:]]?                # The closing class character,
       *                                      # possibly omitted.  If not a colon
       *                                      # nor semi colon, the class name
       *                                      # must be even closer to a valid
@@ -14072,11 +14295,11 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
       * decide that no posix class was intended.  Should be at least
       * sizeof("alphanumeric") */
      UV input_text[15];
+    STATIC_ASSERT_DECL(C_ARRAY_LENGTH(input_text) >= sizeof "alphanumeric");
  
      PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX;
  
-    if (posix_warnings && RExC_warn_text)
-        av_clear(RExC_warn_text);
+    CLEAR_POSIX_WARNINGS();
  
      if (p >= e) {
          return NOT_MEANT_TO_BE_A_POSIX_CLASS;
@@ -14168,7 +14391,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                      *updated_parse_ptr = (char *) temp_ptr;
                  }
  
-                return OOB_NAMEDCLASS;
+                CLEAR_POSIX_WARNINGS_AND_RETURN(OOB_NAMEDCLASS);
              }
          }
  
@@ -14238,7 +14461,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
          /* We consider something like [^:^alnum:]] to not have been intended to
           * be a posix class, but XXX maybe we should */
          if (complement) {
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
          }
  
          complement = 1;
@@ -14265,7 +14488,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
           * this leaves this construct looking like [:] or [:^], which almost
           * certainly weren't intended to be posix classes */
          if (has_opening_bracket) {
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
          }
  
          /* But this function can be called when we parse the colon for
@@ -14282,7 +14505,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              /* XXX We are currently very restrictive here, so this code doesn't
               * consider the possibility that, say, /[alpha.]]/ was intended to
               * be a posix class. */
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
          }
  
          /* Here we have something like 'foo:]'.  There was no initial colon,
@@ -14452,7 +14675,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              }
  
              /* Otherwise, it can't have meant to have been a class */
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
          }
  
          /* If we ran off the end, and the final character was a punctuation
@@ -14502,14 +14725,14 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
           * class name.  (We can do this on the first pass, as any second pass
           * will yield an even shorter name) */
          if (name_len < 3) {
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
          }
  
          /* Find which class it is.  Initially switch on the length of the name.
           * */
          switch (name_len) {
              case 4:
-                if (memEQ(name_start, "word", 4)) {
+                if (memEQs(name_start, 4, "word")) {
                      /* this is not POSIX, this is the Perl \w */
                      class_number = ANYOF_WORDCHAR;
                  }
@@ -14520,51 +14743,51 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                   * Offset 4 gives the best switch position.  */
                  switch (name_start[4]) {
                      case 'a':
-                        if (memEQ(name_start, "alph", 4)) /* alpha */
+                        if (memBEGINs(name_start, 5, "alph")) /* alpha */
                              class_number = ANYOF_ALPHA;
                          break;
                      case 'e':
-                        if (memEQ(name_start, "spac", 4)) /* space */
+                        if (memBEGINs(name_start, 5, "spac")) /* space */
                              class_number = ANYOF_SPACE;
                          break;
                      case 'h':
-                        if (memEQ(name_start, "grap", 4)) /* graph */
+                        if (memBEGINs(name_start, 5, "grap")) /* graph */
                              class_number = ANYOF_GRAPH;
                          break;
                      case 'i':
-                        if (memEQ(name_start, "asci", 4)) /* ascii */
+                        if (memBEGINs(name_start, 5, "asci")) /* ascii */
                              class_number = ANYOF_ASCII;
                          break;
                      case 'k':
-                        if (memEQ(name_start, "blan", 4)) /* blank */
+                        if (memBEGINs(name_start, 5, "blan")) /* blank */
                              class_number = ANYOF_BLANK;
                          break;
                      case 'l':
-                        if (memEQ(name_start, "cntr", 4)) /* cntrl */
+                        if (memBEGINs(name_start, 5, "cntr")) /* cntrl */
                              class_number = ANYOF_CNTRL;
                          break;
                      case 'm':
-                        if (memEQ(name_start, "alnu", 4)) /* alnum */
+                        if (memBEGINs(name_start, 5, "alnu")) /* alnum */
                              class_number = ANYOF_ALPHANUMERIC;
                          break;
                      case 'r':
-                        if (memEQ(name_start, "lowe", 4)) /* lower */
+                        if (memBEGINs(name_start, 5, "lowe")) /* lower */
                              class_number = (FOLD) ? ANYOF_CASED : ANYOF_LOWER;
-                        else if (memEQ(name_start, "uppe", 4)) /* upper */
+                        else if (memBEGINs(name_start, 5, "uppe")) /* upper */
                              class_number = (FOLD) ? ANYOF_CASED : ANYOF_UPPER;
                          break;
                      case 't':
-                        if (memEQ(name_start, "digi", 4)) /* digit */
+                        if (memBEGINs(name_start, 5, "digi")) /* digit */
                              class_number = ANYOF_DIGIT;
-                        else if (memEQ(name_start, "prin", 4)) /* print */
+                        else if (memBEGINs(name_start, 5, "prin")) /* print */
                              class_number = ANYOF_PRINT;
-                        else if (memEQ(name_start, "punc", 4)) /* punct */
+                        else if (memBEGINs(name_start, 5, "punc")) /* punct */
                              class_number = ANYOF_PUNCT;
                          break;
                  }
                  break;
              case 6:
-                if (memEQ(name_start, "xdigit", 6))
+                if (memEQs(name_start, 6, "xdigit"))
                      class_number = ANYOF_XDIGIT;
                  break;
          }
@@ -14661,7 +14884,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              }
  
              /* Here neither pass found a close-enough class name */
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
          }
  
      probably_meant_to_be:
@@ -14703,7 +14926,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              /* If it is a known class, return the class.  The class number
               * #defines are structured so each complement is +1 to the normal
               * one */
-            return class_number + complement;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(class_number + complement);
          }
          else if (! check_only) {
  
@@ -14768,9 +14991,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                         'stack' of where the undealt-with left
                                         parens would be if they were actually
                                         put there */
-    /* The 'VOL' (expanding to 'volatile') is a workaround for an optimiser bug
+    /* The 'volatile' is a workaround for an optimiser bug
       * in Solaris Studio 12.3. See RT #127455 */
-    VOL IV fence = 0;               /* Position of where most recent undealt-
+    volatile IV fence = 0;          /* Position of where most recent undealt-
                                         with left paren in stack is; -1 if none.
                                       */
      STRLEN len;                     /* Temporary */
@@ -14785,6 +15008,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
      PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
  
+    DEBUG_PARSE("xcls");
+
      if (in_locale) {
          set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
      }
@@ -14802,7 +15027,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * these things, we need to realize that something preceded by a backslash
       * is escaped, so we have to keep track of backslashes */
      if (SIZE_ONLY) {
-        UV depth = 0; /* how many nested (?[...]) constructs */
+        UV nest_depth = 0; /* how many nested (?[...]) constructs */
  
          while (RExC_parse < RExC_end) {
              SV* current = NULL;
@@ -14811,8 +15036,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                      TRUE /* Force /x */ );
  
              switch (*RExC_parse) {
-                case '?':
-                    if (RExC_parse[1] == '[') depth++, RExC_parse++;
+                case '(':
+                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
+                        nest_depth++, RExC_parse+=2;
                      /* FALLTHROUGH */
                  default:
                      break;
@@ -14869,9 +15095,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  }
  
                  case ']':
-                    if (depth--) break;
-                    RExC_parse++;
-                    if (*RExC_parse == ')') {
+                    if (RExC_parse[1] == ')') {
+                        RExC_parse++;
+                        if (nest_depth--) break;
                          node = reganode(pRExC_state, ANYOF, 0);
                          RExC_size += ANYOF_SKIP;
                          nextchar(pRExC_state);
@@ -14883,20 +15109,25 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
                          return node;
                      }
-                    goto no_close;
+                    /* We output the messages even if warnings are off, because we'll fail
+                     * the very next thing, and these give a likely diagnosis for that */
+                    if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
+                        output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
+                    }
+                    RExC_parse++;
+                    vFAIL("Unexpected ']' with no following ')' in (?[...");
              }
  
              RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
          }
  
-      no_close:
          /* We output the messages even if warnings are off, because we'll fail
           * the very next thing, and these give a likely diagnosis for that */
          if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
              output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
          }
  
-        FAIL("Syntax error in (?[...])");
+        vFAIL("Syntax error in (?[...])");
      }
  
      /* Pass 2 only after this. */
@@ -15076,12 +15307,14 @@ redo_curchar:
                       * inversion list, and RExC_parse points to the trailing
                       * ']'; the next character should be the ')' */
                      RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for nested extended charclass");
  
                      /* Then the ')' matching the original '(' handled by this
                       * case: statement */
                      RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
  
                      RExC_parse++;
                      RExC_flags = save_flags;
@@ -16081,6 +16314,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          do_posix_warnings ? &posix_warnings : NULL,
                          TRUE /* checking only */);
          }
+        else if (  strict && ! skip_white
+                 && (   _generic_isCC(value, _CC_VERTSPACE)
+                     || is_VERTWS_cp_high(value)))
+        {
+            vFAIL("Literal vertical space in [] is illegal except under /x");
+        }
          else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
  
@@ -16201,7 +16440,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     vFAIL2("Empty \\%c", (U8)value);
                 if (*RExC_parse == '{') {
                     const U8 c = (U8)value;
-                   e = strchr(RExC_parse, '}');
+                   e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
                      if (!e) {
                          RExC_parse++;
                          vFAIL2("Missing right brace on \\%c{}", c);
@@ -16333,7 +16572,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * referred to outside it. [perl #121777] */
                          if (! has_pkg && curpkg) {
                              char* pkgname = HvNAME(curpkg);
-                            if (strNE(pkgname, "main")) {
+                            if (memNEs(pkgname, HvNAMELEN(curpkg), "main")) {
                                  char* full_name = Perl_form(aTHX_
                                                              "%s::%s",
                                                              pkgname,
@@ -16416,6 +16655,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                     const char* error_msg;
                     bool valid = grok_bslash_o(&RExC_parse,
+                                               RExC_end,
                                                &value,
                                                &error_msg,
                                                 PASS2,   /* warnings only in
@@ -16434,6 +16674,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                     const char* error_msg;
                     bool valid = grok_bslash_x(&RExC_parse,
+                                               RExC_end,
                                                &value,
                                                &error_msg,
                                                PASS2, /* Output warnings */
@@ -16784,7 +17025,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              {
                  /* Here <value> is indeed a multi-char fold.  Get what it is */
  
-                U8 foldbuf[UTF8_MAXBYTES_CASE];
+                U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                  STRLEN foldlen;
  
                  UV folded = _to_uni_fold_flags(
@@ -16859,7 +17100,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                            " be some subset of \"0-9\","
                                            " \"A-Z\", or \"a-z\"");
                      }
-                    else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+                    else if (prevvalue >= FIRST_NON_ASCII_DECIMAL_DIGIT) {
                          SSize_t index_start;
                          SSize_t index_final;
  
@@ -16867,8 +17108,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * can't do the same checks for above-ASCII ranges,
                           * except in the case of digit ones.  These should
                           * contain only digits from the same group of 10.  The
-                         * ASCII case is handled just above.  0x660 is the
-                         * first digit character beyond ASCII.  Hence here, the
+                         * ASCII case is handled just above.  Hence here, the
                           * range could be a range of digits.  First some
                           * unlikely special cases.  Grandfather in that a range
                           * ending in 19DA (NEW TAI LUE THAM DIGIT ONE) is bad
@@ -17162,14 +17402,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  /* The actual POSIXish node for all the rest depends on the
                   * charset modifier.  The ones in the first set depend only on
                   * ASCII or, if available on this platform, also locale */
+
                  case ANYOF_ASCII:
                  case ANYOF_NASCII:
+
  #ifdef HAS_ISASCII
-                    op = (LOC) ? POSIXL : POSIXA;
-#else
-                    op = POSIXA;
+                    if (LOC) {
+                        op = POSIXL;
+                        goto join_posix;
+                    }
  #endif
-                    goto join_posix;
+                    /* (named_class - ANYOF_ASCII) is 0 or 1. xor'ing with
+                     * invert converts that to 1 or 0 */
+                    op = ASCII + ((namedclass - ANYOF_ASCII) ^ invert);
+                    break;
  
                  /* The following don't have any matches in the upper Latin1
                   * range, hence /d is equivalent to /u for them.  Making it /u
@@ -17311,6 +17557,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             TRUE /* downgradable to EXACT */
                                             );
              }
+            else {
+                *flagp |= HASWIDTH|SIMPLE;
+            }
  
              RExC_parse = (char *) cur_parse;
  
@@ -17575,6 +17824,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  if (_invlist_len(only_non_utf8_list) != 0) {
                      ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
                  }
+                SvREFCNT_dec_NN(only_non_utf8_list);
              }
              else {
                  /* Here there were no complemented posix classes.  That means
@@ -17761,25 +18011,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * certain common classes that are easy to test.  Getting to this point in
       * the code means that the class didn't get optimized there.  Since this
       * code is only executed in Pass 2, it is too late to save space--it has
-     * been allocated in Pass 1, and currently isn't given back.  But turning
-     * things into an EXACTish node can allow the optimizer to join it to any
-     * adjacent such nodes.  And if the class is equivalent to things like /./,
-     * expensive run-time swashes can be avoided.  Now that we have more
-     * complete information, we can find things necessarily missed by the
-     * earlier code.  Another possible "optimization" that isn't done is that
-     * something like [Ee] could be changed into an EXACTFU.  khw tried this
-     * and found that the ANYOF is faster, including for code points not in the
-     * bitmap.  This still might make sense to do, provided it got joined with
-     * an adjacent node(s) to create a longer EXACTFU one.  This could be
-     * accomplished by creating a pseudo ANYOF_EXACTFU node type that the join
-     * routine would know is joinable.  If that didn't happen, the node type
-     * could then be made a straight ANYOF */
+     * been allocated in Pass 1, and currently isn't given back.  XXX Why not?
+     * But turning things into an EXACTish node can allow the optimizer to join
+     * it to any adjacent such nodes.  And if the class is equivalent to things
+     * like /./, expensive run-time swashes can be avoided.  Now that we have
+     * more complete information, we can find things necessarily missed by the
+     * earlier code. */
  
      if (optimizable && cp_list && ! invert) {
          UV start, end;
          U8 op = END;  /* The optimzation node-type */
          int posix_class = -1;   /* Illegal value */
          const char * cur_parse= RExC_parse;
+        U8 ANYOFM_mask = 0xFF;
+        U32 anode_arg = 0;
  
          invlist_iterinit(cp_list);
          if (! invlist_iternext(cp_list, &start, &end)) {
@@ -17862,29 +18107,45 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          invlist_iterfinish(cp_list);
  
          if (op == END) {
-            const UV cp_list_len = _invlist_len(cp_list);
-            const UV* cp_list_array = invlist_array(cp_list);
  
              /* Here, didn't find an optimization.  See if this matches any of
-             * the POSIX classes.  These run slightly faster for above-Unicode
-             * code points, so don't bother with POSIXA ones nor the 2 that
-             * have no above-Unicode matches.  We can avoid these checks unless
-             * the ANYOF matches at least as high as the lowest POSIX one
-             * (which was manually found to be \v.  The actual code point may
-             * increase in later Unicode releases, if a higher code point is
-             * assigned to be \v, but this code will never break.  It would
-             * just mean we could execute the checks for posix optimizations
-             * unnecessarily) */
-
-            if (cp_list_array[cp_list_len-1] > 0x2029) {
+             * the POSIX classes.  First try ASCII */
+
+            if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
+                op = ASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
+                op = NASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (invlist_highest(cp_list) >= 0x2029) {
+
+                /* Then try the other POSIX classes.  The POSIXA ones are about
+                 * the same speed as ANYOF ops, but the ones that have
+                 * above-Latin1 code point matches are somewhat faster than
+                 * ANYOF.  So optimize those, but don't bother with the POSIXA
+                 * ones nor [:cntrl:] which has no above-Latin1 matches.  If
+                 * this ANYOF node has a lower highest possible matching code
+                 * point than any of the XPosix ones, we know that it can't
+                 * possibly be the same as any of them, so we can avoid
+                 * executing this code.  The 0x2029 above for the lowest max
+                 * was determined by manual inspection of the classes, and
+                 * comes from \v.  Suppose Unicode in a later version adds a
+                 * higher code point to \v.  All that means is that this code
+                 * can be executed unnecessarily.  It will still give the
+                 * correct answer. */
+
                  for (posix_class = 0;
                       posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
                       posix_class++)
                  {
                      int try_inverted;
-                    if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) {
+
+                    if (posix_class == _CC_CNTRL) {
                          continue;
                      }
+
                      for (try_inverted = 0; try_inverted < 2; try_inverted++) {
  
                          /* Check if matches normal or inverted */
@@ -17902,6 +18163,106 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  }
                found_posix: ;
              }
+
+            /* If it didn't match a POSIX class, it might be able to be turned
+             * into an ANYOFM node.  Compare two different bytes, bit-by-bit.
+             * In some positions, the bits in each will be 1; and in other
+             * positions both will be 0; and in some positions the bit will be
+             * 1 in one byte, and 0 in the other.  Let 'n' be the number of
+             * positions where the bits differ.  We create a mask which has
+             * exactly 'n' 0 bits, each in a position where the two bytes
+             * differ.  Now take the set of all bytes that when ANDed with the
+             * mask yield the same result.  That set has 2**n elements, and is
+             * representable by just two 8 bit numbers: the result and the
+             * mask.  Importantly, matching the set can be vectorized by
+             * creating a word full of the result bytes, and a word full of the
+             * mask bytes, yielding a significant speed up.  Here, see if this
+             * node matches such a set.  As a concrete example consider [01],
+             * and the byte representing '0' which is 0x30 on ASCII machines.
+             * It has the bits 0011 0000.  Take the mask 1111 1110.  If we AND
+             * 0x31 and 0x30 with that mask we get 0x30.  Any other bytes ANDed
+             * yield something else.  So [01], which is a common usage, is
+             * optimizable into ANYOFM, and can benefit from the speed up.  We
+             * can only do this on UTF-8 invariant bytes, because the variance
+             * would throw this off.  */
+            if (   op == END
+                && invlist_highest(cp_list) <=
+#ifdef EBCDIC
+                                               0xFF
+#else
+                                               0x7F
+#endif
+            ) {
+                Size_t cp_count = 0;
+                bool first_time = TRUE;
+                unsigned int lowest_cp = 0xFF;
+                U8 bits_differing = 0;
+
+                /* Only needed on EBCDIC, as there, variants and non- are mixed
+                 * together.  Could #ifdef it out on ASCII, but probably the
+                 * compiler will optimize it out */
+                bool has_variant = FALSE;
+
+                /* Go through the bytes and find the bit positions that differ */
+                invlist_iterinit(cp_list);
+                while (invlist_iternext(cp_list, &start, &end)) {
+                    unsigned int i = start;
+
+                    cp_count += end - start + 1;
+
+                    if (first_time) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            has_variant = TRUE;
+                            continue;
+                        }
+
+                        first_time = FALSE;
+                        lowest_cp = start;
+
+                        i++;
+                    }
+
+                    /* Find the bit positions that differ from the lowest code
+                     * point in the node.  Keep track of all such positions by
+                     * OR'ing */
+                    for (; i <= end; i++) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            has_variant = TRUE;
+                            continue;
+                        }
+
+                        bits_differing  |= i ^ lowest_cp;
+                    }
+                }
+                invlist_iterfinish(cp_list);
+
+                /* At the end of the loop, we count how many bits differ from
+                 * the bits in lowest code point, call the count 'd'.  If the
+                 * set we found contains 2**d elements, it is the closure of
+                 * all code points that differ only in those bit positions.  To
+                 * convince yourself of that, first note that the number in the
+                 * closure must be a power of 2, which we test for.  The only
+                 * way we could have that count and it be some differing set,
+                 * is if we got some code points that don't differ from the
+                 * lowest code point in any position, but do differ from each
+                 * other in some other position.  That means one code point has
+                 * a 1 in that position, and another has a 0.  But that would
+                 * mean that one of them differs from the lowest code point in
+                 * that position, which possibility we've already excluded. */
+                if ( ! has_variant
+                    && cp_count == 1U << PL_bitcount[bits_differing])
+                {
+                    assert(cp_count > 1);
+                    op = ANYOFM;
+
+                    /* We need to make the bits that differ be 0's */
+                    ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
+
+                    /* The argument is the lowest code point */
+                    anode_arg = lowest_cp;
+                    *flagp |= HASWIDTH|SIMPLE;
+                }
+            }
          }
  
          if (op != END) {
@@ -17909,7 +18270,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              RExC_emit = (regnode *)orig_emit;
  
              if (regarglen[op]) {
-                ret = reganode(pRExC_state, op, 0);
+                ret = reganode(pRExC_state, op, anode_arg);
              } else {
                  ret = reg_node(pRExC_state, op);
              }
@@ -17924,6 +18285,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              else if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
                  FLAGS(ret) = posix_class;
              }
+            else if (PL_regkind[op] == ANYOFM) {
+                FLAGS(ret) = ANYOFM_mask;
+            }
  
              SvREFCNT_dec_NN(cp_list);
              return ret;
@@ -18531,6 +18895,7 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
  * if (PASS2)
  *     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
  *
+* ALSO NOTE - operand->flags will be set to 0 as well.
  */
  STATIC void
  S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
@@ -18604,7 +18969,6 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
  #endif
      }
  
-
      place = operand;           /* Op node, where operand used to be. */
  #ifdef RE_TRACK_PATTERN_OFFSETS
      if (RExC_offsets) {         /* MJD */
@@ -18623,6 +18987,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
      }
  #endif
      src = NEXTOPER(place);
+    place->flags = 0;
      FILL_ADVANCE_NODE(place, op);
      Zero(src, offset, regnode);
  }
@@ -18775,6 +19140,36 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
  }
  #endif
  
+STATIC SV*
+S_get_ANYOFM_contents(pTHX_ const regnode * n) {
+
+    /* Returns an inversion list of all the code points matched by the ANYOFM
+     * node 'n' */
+
+    SV * cp_list = _new_invlist(-1);
+    const U8 lowest = ARG(n);
+    unsigned int i;
+    U8 count = 0;
+    U8 needed = 1U << PL_bitcount[ (U8) ~ FLAGS(n)];
+
+    PERL_ARGS_ASSERT_GET_ANYOFM_CONTENTS;
+
+    /* Starting with the lowest code point, any code point that ANDed with the
+     * mask yields the lowest code point is in the set */
+    for (i = lowest; i <= 0xFF; i++) {
+        if ((i & FLAGS(n)) == ARG(n)) {
+            cp_list = add_cp_to_invlist(cp_list, i);
+            count++;
+
+            /* We know how many code points (a power of two) that are in the
+             * set.  No use looking once we've got that number */
+            if (count >= needed) break;
+        }
+    }
+
+    return cp_list;
+}
+
  /*
   - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
   */
@@ -18857,6 +19252,7 @@ void
  Perl_regdump(pTHX_ const regexp *r)
  {
  #ifdef DEBUGGING
+    int i;
      SV * const sv = sv_newmortal();
      SV *dsv= sv_newmortal();
      RXi_GET_DECL(r,ri);
@@ -18867,41 +19263,40 @@ Perl_regdump(pTHX_ const regexp *r)
      (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
  
      /* Header fields of interest. */
-    if (r->anchored_substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr),
-           RE_SV_DUMPLEN(r->anchored_substr), 30);
-        Perl_re_printf( aTHX_
-                     "anchored %s%s at %" IVdf " ",
-                     s, RE_SV_TAIL(r->anchored_substr),
-                     (IV)r->anchored_offset);
-    } else if (r->anchored_utf8) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8),
-           RE_SV_DUMPLEN(r->anchored_utf8), 30);
-        Perl_re_printf( aTHX_
-                     "anchored utf8 %s%s at %" IVdf " ",
-                     s, RE_SV_TAIL(r->anchored_utf8),
-                     (IV)r->anchored_offset);
-    }
-    if (r->float_substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr),
-           RE_SV_DUMPLEN(r->float_substr), 30);
-        Perl_re_printf( aTHX_
-                     "floating %s%s at %" IVdf "..%" UVuf " ",
-                     s, RE_SV_TAIL(r->float_substr),
-                     (IV)r->float_min_offset, (UV)r->float_max_offset);
-    } else if (r->float_utf8) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8),
-           RE_SV_DUMPLEN(r->float_utf8), 30);
-        Perl_re_printf( aTHX_
-                     "floating utf8 %s%s at %" IVdf "..%" UVuf " ",
-                     s, RE_SV_TAIL(r->float_utf8),
-                     (IV)r->float_min_offset, (UV)r->float_max_offset);
+    for (i = 0; i < 2; i++) {
+        if (r->substrs->data[i].substr) {
+            RE_PV_QUOTED_DECL(s, 0, dsv,
+                            SvPVX_const(r->substrs->data[i].substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
+                            PL_dump_re_max_len);
+            Perl_re_printf( aTHX_
+                          "%s %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
+        else if (r->substrs->data[i].utf8_substr) {
+            RE_PV_QUOTED_DECL(s, 1, dsv,
+                            SvPVX_const(r->substrs->data[i].utf8_substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
+                            30);
+            Perl_re_printf( aTHX_
+                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
      }
+
      if (r->check_substr || r->check_utf8)
          Perl_re_printf( aTHX_
                       (const char *)
-                     (r->check_substr == r->float_substr
-                      && r->check_utf8 == r->float_utf8
+                     (   r->check_substr == r->substrs->data[1].substr
+                      && r->check_utf8   == r->substrs->data[1].utf8_substr
                        ? "(checking floating" : "(checking anchored"));
      if (r->intflags & PREGf_NOSCAN)
          Perl_re_printf( aTHX_  " noscan");
@@ -19023,7 +19418,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          * is a crude hack but it may be the best for now since
          * we have no flag "this EXACTish node was UTF-8"
          * --jhi */
-       pv_pretty(sv, STRING(o), STR_LEN(o), 60, PL_colors[0], PL_colors[1],
+       pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
+                  PL_colors[0], PL_colors[1],
                   PERL_PV_ESCAPE_UNI_DETECT |
                   PERL_PV_ESCAPE_NONASCII   |
                   PERL_PV_PRETTY_ELLIPSES   |
@@ -19116,7 +19512,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          if ( k == REF && reginfo) {
              U32 n = ARG(o);  /* which paren pair */
              I32 ln = prog->offs[n].start;
-            if (prog->lastparen < n || ln == -1)
+            if (prog->lastparen < n || ln == -1 || prog->offs[n].end == -1)
                  Perl_sv_catpvf(aTHX_ sv, ": FAIL");
              else if (ln == prog->offs[n].end)
                  Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING");
@@ -19247,7 +19643,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              SV* contents;
  
              /* See if truncation size is overridden */
-            const STRLEN dump_len = (PL_dump_re_max_len)
+            const STRLEN dump_len = (PL_dump_re_max_len > 256)
                                      ? PL_dump_re_max_len
                                      : 256;
  
@@ -19300,6 +19696,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
          SvREFCNT_dec(unresolved);
      }
+    else if (k == ANYOFM) {
+        SV * cp_list = get_ANYOFM_contents(o);
+
+       Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, TRUE);
+       Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        SvREFCNT_dec(cp_list);
+    }
      else if (k == POSIXD || k == NPOSIXD) {
          U8 index = FLAGS(o) * 2;
          if (index < C_ARRAY_LENGTH(anyofs)) {
@@ -19334,8 +19739,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
      /* add on the verb argument if there is one */
      if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) {
-        Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
+        if ( ARG(o) )
+            Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
                         SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
+        else
+            sv_catpvs(sv, ":NULL");
      }
  #else
      PERL_UNUSED_CONTEXT;
@@ -19371,7 +19779,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                       PL_colors[5],PL_colors[0],
                       s,
                       PL_colors[1],
-                     (strlen(s) > 60 ? "..." : ""));
+                     (strlen(s) > PL_dump_re_max_len ? "..." : ""));
         } );
  
      /* use UTF8 check substring if regexp pattern itself is in UTF8 */
@@ -19410,13 +19818,13 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
      } else {
          CALLREGFREE_PVT(rx); /* free the private data */
          SvREFCNT_dec(RXp_PAREN_NAMES(r));
-       Safefree(r->xpv_len_u.xpvlenu_pv);
      }
      if (r->substrs) {
-        SvREFCNT_dec(r->anchored_substr);
-        SvREFCNT_dec(r->anchored_utf8);
-        SvREFCNT_dec(r->float_substr);
-        SvREFCNT_dec(r->float_utf8);
+        int i;
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_dec(r->substrs->data[i].substr);
+            SvREFCNT_dec(r->substrs->data[i].utf8_substr);
+        }
         Safefree(r->substrs);
      }
      RX_MATCH_COPY_FREE(rx);
@@ -19427,12 +19835,19 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
      SvREFCNT_dec(r->qr_anoncv);
      if (r->recurse_locinput)
          Safefree(r->recurse_locinput);
-    rx->sv_u.svu_rx = 0;
  }
  
+
  /*  reg_temp_copy()
  
-    This is a hacky workaround to the structural issue of match results
+    Copy ssv to dsv, both of which should of type SVt_REGEXP or SVt_PVLV,
+    except that dsv will be created if NULL.
+
+    This function is used in two main ways. First to implement
+        $r = qr/....; $s = $$r;
+
+    Secondly, it is used as a hacky workaround to the structural issue of
+    match results
      being stored in the regexp structure which is in turn stored in
      PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
      could be PL_curpm in multiple contexts, and could require multiple
@@ -19448,75 +19863,80 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
  
  
  REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
+Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
  {
-    struct regexp *ret;
-    struct regexp *const r = ReANY(rx);
-    const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
+    struct regexp *drx;
+    struct regexp *const srx = ReANY(ssv);
+    const bool islv = dsv && SvTYPE(dsv) == SVt_PVLV;
  
      PERL_ARGS_ASSERT_REG_TEMP_COPY;
  
-    if (!ret_x)
-       ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+    if (!dsv)
+       dsv = (REGEXP*) newSV_type(SVt_REGEXP);
      else {
-       SvOK_off((SV *)ret_x);
+       SvOK_off((SV *)dsv);
         if (islv) {
-           /* For PVLVs, SvANY points to the xpvlv body while sv_u points
-              to the regexp.  (For SVt_REGEXPs, sv_upgrade has already
-              made both spots point to the same regexp body.) */
+           /* For PVLVs, the head (sv_any) points to an XPVLV, while
+             * the LV's xpvlenu_rx will point to a regexp body, which
+             * we allocate here */
             REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
-           assert(!SvPVX(ret_x));
-           ret_x->sv_u.svu_rx = temp->sv_any;
+           assert(!SvPVX(dsv));
+            ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
             temp->sv_any = NULL;
             SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
             SvREFCNT_dec_NN(temp);
             /* SvCUR still resides in the xpvlv struct, so the regexp copy-
                ing below will not set it. */
-           SvCUR_set(ret_x, SvCUR(rx));
+           SvCUR_set(dsv, SvCUR(ssv));
         }
      }
      /* This ensures that SvTHINKFIRST(sv) is true, and hence that
         sv_force_normal(sv) is called.  */
-    SvFAKE_on(ret_x);
-    ret = ReANY(ret_x);
+    SvFAKE_on(dsv);
+    drx = ReANY(dsv);
  
-    SvFLAGS(ret_x) |= SvUTF8(rx);
+    SvFLAGS(dsv) |= SvFLAGS(ssv) & (SVf_POK|SVp_POK|SVf_UTF8);
+    SvPV_set(dsv, RX_WRAPPED(ssv));
      /* We share the same string buffer as the original regexp, on which we
         hold a reference count, incremented when mother_re is set below.
         The string pointer is copied here, being part of the regexp struct.
       */
-    memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+    memcpy(&(drx->xpv_cur), &(srx->xpv_cur),
            sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
-    if (r->offs) {
-        const I32 npar = r->nparens+1;
-        Newx(ret->offs, npar, regexp_paren_pair);
-        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
-    }
-    if (r->substrs) {
-        Newx(ret->substrs, 1, struct reg_substr_data);
-       StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+    if (!islv)
+        SvLEN_set(dsv, 0);
+    if (srx->offs) {
+        const I32 npar = srx->nparens+1;
+        Newx(drx->offs, npar, regexp_paren_pair);
+        Copy(srx->offs, drx->offs, npar, regexp_paren_pair);
+    }
+    if (srx->substrs) {
+        int i;
+        Newx(drx->substrs, 1, struct reg_substr_data);
+       StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
  
-       SvREFCNT_inc_void(ret->anchored_substr);
-       SvREFCNT_inc_void(ret->anchored_utf8);
-       SvREFCNT_inc_void(ret->float_substr);
-       SvREFCNT_inc_void(ret->float_utf8);
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_inc_void(drx->substrs->data[i].substr);
+            SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr);
+        }
  
         /* check_substr and check_utf8, if non-NULL, point to either their
            anchored or float namesakes, and don't hold a second reference.  */
      }
-    RX_MATCH_COPIED_off(ret_x);
+    RX_MATCH_COPIED_off(dsv);
  #ifdef PERL_ANY_COW
-    ret->saved_copy = NULL;
+    drx->saved_copy = NULL;
  #endif
-    ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
-    SvREFCNT_inc_void(ret->qr_anoncv);
-    if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+    drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
+    SvREFCNT_inc_void(drx->qr_anoncv);
+    if (srx->recurse_locinput)
+        Newx(drx->recurse_locinput,srx->nparens + 1,char *);
  
-    return ret_x;
+    return dsv;
  }
  #endif
  
+
  /* regfree_internal()
  
     Free the private data in a regexp. This is overloadable by
@@ -19544,7 +19964,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
         {
             SV *dsv= sv_newmortal();
              RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
-                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60);
+                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), PL_dump_re_max_len);
              Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n",
                  PL_colors[4],PL_colors[5],s);
          }
@@ -19678,36 +20098,41 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
         /* Do it this way to avoid reading from *r after the StructCopy().
            That way, if any of the sv_dup_inc()s dislodge *r from the L1
            cache, it doesn't matter.  */
+        int i;
         const bool anchored = r->check_substr
-           ? r->check_substr == r->anchored_substr
-           : r->check_utf8 == r->anchored_utf8;
+           ? r->check_substr == r->substrs->data[0].substr
+           : r->check_utf8   == r->substrs->data[0].utf8_substr;
          Newx(ret->substrs, 1, struct reg_substr_data);
         StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
  
-       ret->anchored_substr = sv_dup_inc(ret->anchored_substr, param);
-       ret->anchored_utf8 = sv_dup_inc(ret->anchored_utf8, param);
-       ret->float_substr = sv_dup_inc(ret->float_substr, param);
-       ret->float_utf8 = sv_dup_inc(ret->float_utf8, param);
+        for (i = 0; i < 2; i++) {
+            ret->substrs->data[i].substr =
+                        sv_dup_inc(ret->substrs->data[i].substr, param);
+            ret->substrs->data[i].utf8_substr =
+                        sv_dup_inc(ret->substrs->data[i].utf8_substr, param);
+        }
  
         /* check_substr and check_utf8, if non-NULL, point to either their
            anchored or float namesakes, and don't hold a second reference.  */
  
         if (ret->check_substr) {
             if (anchored) {
-               assert(r->check_utf8 == r->anchored_utf8);
-               ret->check_substr = ret->anchored_substr;
-               ret->check_utf8 = ret->anchored_utf8;
+               assert(r->check_utf8 == r->substrs->data[0].utf8_substr);
+
+               ret->check_substr = ret->substrs->data[0].substr;
+               ret->check_utf8   = ret->substrs->data[0].utf8_substr;
             } else {
-               assert(r->check_substr == r->float_substr);
-               assert(r->check_utf8 == r->float_utf8);
-               ret->check_substr = ret->float_substr;
-               ret->check_utf8 = ret->float_utf8;
+               assert(r->check_substr == r->substrs->data[1].substr);
+               assert(r->check_utf8   == r->substrs->data[1].utf8_substr);
+
+               ret->check_substr = ret->substrs->data[1].substr;
+               ret->check_utf8   = ret->substrs->data[1].utf8_substr;
             }
         } else if (ret->check_utf8) {
             if (anchored) {
-               ret->check_utf8 = ret->anchored_utf8;
+               ret->check_utf8 = ret->substrs->data[0].utf8_substr;
             } else {
-               ret->check_utf8 = ret->float_utf8;
+               ret->check_utf8 = ret->substrs->data[1].utf8_substr;
             }
         }
      }
@@ -19715,7 +20140,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
      RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
      ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
      if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+        Newx(ret->recurse_locinput,r->nparens + 1,char *);
  
      if (ret->pprivate)
         RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -19734,7 +20159,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
                1: a buffer in a different thread
                2: something we no longer hold a reference on
                so we need to copy it locally.  */
-    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
+    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
      ret->mother_re   = NULL;
  }
  #endif /* PERL_IN_XSUB_RE */
@@ -19803,33 +20228,47 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
             d->what[i] = ri->data->what[i];
             switch (d->what[i]) {
                 /* see also regcomp.h and regfree_internal() */
-           case 'a': /* actually an AV, but the dup function is identical.  */
-           case 'r':
-           case 's':
-           case 'S':
-           case 'u': /* actually an HV, but the dup function is identical.  */
+            case 'a': /* actually an AV, but the dup function is identical.
+                         values seem to be "plain sv's" generally. */
+            case 'r': /* a compiled regex (but still just another SV) */
+            case 's': /* an RV (currently only used for an RV to an AV by the ANYOF code)
+                         this use case should go away, the code could have used
+                         'a' instead - see S_set_ANYOF_arg() for array contents. */
+            case 'S': /* actually an SV, but the dup function is identical.  */
+            case 'u': /* actually an HV, but the dup function is identical.
+                         values are "plain sv's" */
                 d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
                 break;
             case 'f':
+                /* Synthetic Start Class - "Fake" charclass we generate to optimize
+                 * patterns which could start with several different things. Pre-TRIE
+                 * this was more important than it is now, however this still helps
+                 * in some places, for instance /x?a+/ might produce a SSC equivalent
+                 * to [xa]. This is used by Perl_re_intuit_start() and S_find_byclass()
+                 * in regexec.c
+                 */
                 /* This is cheating. */
                 Newx(d->data[i], 1, regnode_ssc);
                 StructCopy(ri->data->data[i], d->data[i], regnode_ssc);
                 reti->regstclass = (regnode*)d->data[i];
                 break;
             case 'T':
-               /* Trie stclasses are readonly and can thus be shared
+                /* AHO-CORASICK fail table */
+                /* Trie stclasses are readonly and can thus be shared
                  * without duplication. We free the stclass in pregfree
                  * when the corresponding reg_ac_data struct is freed.
                  */
                 reti->regstclass= ri->regstclass;
                 /* FALLTHROUGH */
             case 't':
+                /* TRIE transition table */
                 OP_REFCNT_LOCK;
                 ((reg_trie_data*)ri->data->data[i])->refcount++;
                 OP_REFCNT_UNLOCK;
                 /* FALLTHROUGH */
-           case 'l':
-           case 'L':
+            case 'l': /* (?{...}) or (??{ ... }) code (cb->block) */
+            case 'L': /* same when RExC_pm_flags & PMf_HAS_CV and code
+                         is not from another regexp */
                 d->data[i] = ri->data->data[i];
                 break;
              default:
@@ -20154,9 +20593,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  #else
          format = "\\x%02" UVXf "-\\x%02" UVXf;
  #endif
-        GCC_DIAG_IGNORE(-Wformat-nonliteral);
+        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
-        GCC_DIAG_RESTORE;
+        GCC_DIAG_RESTORE_STMT;
          break;
      }
  }
@@ -20305,7 +20744,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
       * character class defined by the other arguments:
-     *  'bitmap' points to the bitmap.
+     *  'bitmap' points to the bitmap, or NULL if to ignore that.
       *  'nonbitmap_invlist' is an inversion list of the code points that are in
       *      the bitmap range, but for some reason aren't in the bitmap; NULL if
       *      none.  The reasons for this could be that they require some
@@ -20314,9 +20753,9 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       *      was not resolved at the time of the regex compilation (under /u)
       *  'only_utf8_locale_invlist' is an inversion list of the code points that
       *      are valid only if the runtime locale is a UTF-8 one; NULL if none
-     *  'node' is the regex pattern node.  It is needed only when the above two
-     *      parameters are not null, and is passed so that this routine can
-     *      tease apart the various reasons for them.
+     *  'node' is the regex pattern ANYOF node.  It is needed only when the
+     *      above two parameters are not null, and is passed so that this
+     *      routine can tease apart the various reasons for them.
       *  'force_as_is_display' is TRUE if this routine should definitely NOT try
       *      to invert things to see if that leads to a cleaner display.  If
       *      FALSE, this routine is free to use its judgment about doing this.
@@ -20416,13 +20855,16 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      }
  
      /* Accumulate the bit map into the unconditional match list */
-    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-        if (BITMAP_TEST(bitmap, i)) {
-            int start = i++;
-            for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) {
-                /* empty */
+    if (bitmap) {
+        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+            if (BITMAP_TEST(bitmap, i)) {
+                int start = i++;
+                for (;
+                     i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i);
+                     i++)
+                { /* empty */ }
+                invlist = _add_range_to_invlist(invlist, start, i-1);
              }
-            invlist = _add_range_to_invlist(invlist, start, i-1);
          }
      }
  
@@ -20589,7 +21031,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
         /* While that wasn't END last time... */
         NODE_ALIGN(node);
         op = OP(node);
-       if (op == CLOSE || op == WHILEM)
+       if (op == CLOSE || op == SRCLOSE || op == WHILEM)
             indent--;
         next = regnext((regnode *)node);
  
@@ -20656,7 +21098,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                      indent+3,
                      elem_ptr
                      ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
-                                SvCUR(*elem_ptr), 60,
+                                SvCUR(*elem_ptr), PL_dump_re_max_len,
                                  PL_colors[0], PL_colors[1],
                                  (SvUTF8(*elem_ptr)
                                   ? PERL_PV_ESCAPE_UNI
@@ -20713,7 +21155,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             node = NEXTOPER(node);
             node += regarglen[(U8)op];
         }
-       if (op == CURLYX || op == OPEN)
+       if (op == CURLYX || op == OPEN || op == SROPEN)
             indent++;
      }
      CLEAR_OPTSTART;