Fix skipping char in (*sr:...) defn

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 06e1433..13c4154 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -110,7 +110,6 @@ typedef struct scan_frame {
      regnode *next_regnode;      /* next node to process when last is reached */
      U32 prev_recursed_depth;
      I32 stopparen;              /* what stopparen do we use */
-    U32 is_top_frame;           /* what flags do we use? */
  
      struct scan_frame *this_prev_frame; /* this previous frame */
      struct scan_frame *prev_frame;      /* previous frame */
@@ -150,7 +149,7 @@ struct RExC_state_t {
      I32                sawback;                /* Did we see \1, ...? */
      U32                seen;
      SSize_t    size;                   /* Code size. */
-    I32                npar;            /* Capture buffer count, (OPEN) plus
+    I32         npar;                   /* Capture buffer count, (OPEN) plus
                                             one. ("par" 0 is the whole
                                             pattern)*/
      I32                nestroot;               /* root parens we are in - used by
@@ -213,6 +212,7 @@ struct RExC_state_t {
      bool        seen_unfolded_sharp_s;
      bool        strict;
      bool        study_started;
+    bool        in_script_run;
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -279,6 +279,7 @@ struct RExC_state_t {
  #define RExC_strict (pRExC_state->strict)
  #define RExC_study_started      (pRExC_state->study_started)
  #define RExC_warn_text (pRExC_state->warn_text)
+#define RExC_in_script_run      (pRExC_state->in_script_run)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -344,7 +345,7 @@ struct RExC_state_t {
  /* Change from /d into /u rules, and restart the parse if we've already seen
   * something whose size would increase as a result, by setting *flagp and
   * returning 'restart_retval'.  RExC_uni_semantics is a flag that indicates
- * we've change to /u during the parse.  */
+ * we've changed to /u during the parse.  */
  #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
      STMT_START {                                                            \
              if (DEPENDS_SEMANTICS) {                                        \
@@ -358,6 +359,34 @@ struct RExC_state_t {
              }                                                               \
      } STMT_END
  
+/* Executes a return statement with the value 'X', if 'flags' contains any of
+ * 'RESTART_PASS1', 'NEED_UTF8', or 'extra'.  If so, *flagp is set to those
+ * flags */
+#define RETURN_X_ON_RESTART_OR_FLAGS(X, flags, flagp, extra)                \
+    STMT_START {                                                            \
+            if ((flags) & (RESTART_PASS1|NEED_UTF8|(extra))) {              \
+                *(flagp) = (flags) & (RESTART_PASS1|NEED_UTF8|(extra));     \
+                return X;                                                   \
+            }                                                               \
+    } STMT_END
+
+#define RETURN_NULL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
+                    RETURN_X_ON_RESTART_OR_FLAGS(NULL,flags,flagp,extra)
+
+#define RETURN_X_ON_RESTART(X, flags,flagp)                                 \
+                        RETURN_X_ON_RESTART_OR_FLAGS( X, flags, flagp, 0)
+
+
+#define RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,extra)                  \
+            if (*(flagp) & (RESTART_PASS1|(extra))) return NULL
+
+#define MUST_RESTART(flags) ((flags) & (RESTART_PASS1))
+
+#define RETURN_NULL_ON_RESTART(flags,flagp)                                 \
+                                    RETURN_X_ON_RESTART(NULL, flags,flagp)
+#define RETURN_NULL_ON_RESTART_FLAGP(flagp)                                 \
+                            RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,0)
+
  /* This converts the named class defined in regcomp.h to its equivalent class
   * number defined in handy.h. */
  #define namedclass_to_classnum(class)  ((int) ((class) / 2))
@@ -628,7 +657,7 @@ static const scan_data_t zero_scan_data = {
      UTF8fARG(UTF,                                                           \
               (xI(xC) > eC) /* Don't run off end */                          \
                ? eC - sC   /* Length before the <--HERE */                   \
-              : xI_offset(xC),                                              \
+              : ( __ASSERT_(xI_offset(xC) >= 0) xI_offset(xC) ),            \
               sC),         /* The input pattern printed up to the <--HERE */ \
      UTF8fARG(UTF,                                                           \
               (xI(xC) > eC) ? 0 : eC - xI(xC), /* Length after <--HERE */    \
@@ -1031,6 +1060,7 @@ S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
          );
  
          if (data->last_found) {
+            int i;
              Perl_re_printf(aTHX_
                  "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
                      SvPVX_const(data->last_found),
@@ -1039,22 +1069,17 @@ S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
                      (IV)data->last_start_max
              );
  
-            Perl_re_printf(aTHX_
-                " %sFixed:'%s' @ %" IVdf,
-                data->cur_is_floating == 0 ? "*" : "",
-                SvPVX_const(data->substrs[0].str),
-                (IV)data->substrs[0].min_offset
-            );
-            S_debug_show_study_flags(aTHX_ data->substrs[0].flags," [","]");
-
-            Perl_re_printf(aTHX_
-                " %sFloat: '%s' @ %" IVdf "/%" IVdf,
-                data->cur_is_floating == 1 ? "*" : "",
-                SvPVX_const(data->substrs[1].str),
-                (IV)data->substrs[1].min_offset,
-                (IV)data->substrs[1].max_offset
-            );
-            S_debug_show_study_flags(aTHX_ data->substrs[1].flags," [","]");
+            for (i = 0; i < 2; i++) {
+                Perl_re_printf(aTHX_
+                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
+                    data->cur_is_floating == i ? "*" : "",
+                    i ? "Float" : "Fixed",
+                    SvPVX_const(data->substrs[i].str),
+                    (IV)data->substrs[i].min_offset,
+                    (IV)data->substrs[i].max_offset
+                );
+                S_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
+            }
          }
  
          Perl_re_printf( aTHX_ "\n");
@@ -1122,7 +1147,7 @@ PERL_STATIC_INLINE item*
  push(UV key,item* curr)
  {
      item* head;
-    Newxz(head, 1, item);
+    Newx(head, 1, item);
      head->key = key;
      head->value = 0;
      head->next = curr;
@@ -1192,7 +1217,7 @@ S_edit_distance(const UV* src,
      PERL_ARGS_ASSERT_EDIT_DISTANCE;
  
      /* intialize matrix start values */
-    Newxz(scores, ( (x + 2) * (y + 2)), UV);
+    Newx(scores, ( (x + 2) * (y + 2)), UV);
      scores[0] = score_ceil;
      scores[1 * (y + 2) + 0] = score_ceil;
      scores[0 * (y + 2) + 1] = score_ceil;
@@ -1267,7 +1292,7 @@ S_cntrl_to_mnemonic(const U8 c)
  }
  
  /* Mark that we cannot extend a found fixed substring at this point.
-   Update the longest found anchored substring and the longest found
+   Update the longest found anchored substring or the longest found
     floating substrings if needed. */
  
  STATIC void
@@ -1282,19 +1307,13 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
      PERL_ARGS_ASSERT_SCAN_COMMIT;
  
      if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
+        const U8 i = data->cur_is_floating;
         SvSetMagicSV(longest_sv, data->last_found);
-       if (data->cur_is_floating == 0) { /* fixed */
-           data->substrs[0].min_offset = l ? data->last_start_min : data->pos_min;
+        data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
+
+       if (!i) /* fixed */
             data->substrs[0].max_offset = data->substrs[0].min_offset;
-           if (data->flags & SF_BEFORE_EOL)
-               data->substrs[0].flags |= (data->flags & SF_BEFORE_EOL);
-           else
-               data->substrs[0].flags &= ~SF_BEFORE_EOL;
-           data->substrs[0].minlenp = minlenp;
-           data->substrs[0].lookbehind = 0;
-       }
         else { /* float */
-           data->substrs[1].min_offset = l ? data->last_start_min : data->pos_min;
             data->substrs[1].max_offset = (l
                            ? data->last_start_max
                            : (data->pos_delta > SSize_t_MAX - data->pos_min
@@ -1303,15 +1322,16 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
             if (is_inf
                  || (STRLEN)data->substrs[1].max_offset > (STRLEN)SSize_t_MAX)
                 data->substrs[1].max_offset = SSize_t_MAX;
+        }
  
-           if (data->flags & SF_BEFORE_EOL)
-               data->substrs[1].flags |= (data->flags & SF_BEFORE_EOL);
-           else
-               data->substrs[1].flags &= ~SF_BEFORE_EOL;
-            data->substrs[1].minlenp = minlenp;
-            data->substrs[1].lookbehind = 0;
-       }
+        if (data->flags & SF_BEFORE_EOL)
+            data->substrs[i].flags |= (data->flags & SF_BEFORE_EOL);
+        else
+            data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].minlenp = minlenp;
+        data->substrs[i].lookbehind = 0;
      }
+
      SvCUR_set(data->last_found, 0);
      {
         SV * const sv = data->last_found;
@@ -1711,6 +1731,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
              regnode_charclass_posixl temp;
              int add = 1;    /* To calculate the index of the complement */
  
+            Zero(&temp, 1, regnode_charclass_posixl);
              ANYOF_POSIXL_ZERO(&temp);
              for (i = 0; i < ANYOF_MAX; i++) {
                  assert(i % 2 != 0
@@ -2432,7 +2453,7 @@ is the recommended Unicode-aware way of saying
  } STMT_END
  
  #define TRIE_LIST_NEW(state) STMT_START {                       \
-    Newxz( trie->states[ state ].trans.list,               \
+    Newx( trie->states[ state ].trans.list,                     \
         4, reg_trie_trans_le );                                 \
       TRIE_LIST_CUR( state ) = 1;                                \
       TRIE_LIST_LEN( state ) = 4;                                \
@@ -2553,7 +2574,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
      switch (flags) {
          case EXACT: case EXACTL: break;
-       case EXACTFA:
+       case EXACTFAA:
          case EXACTFU_SS:
         case EXACTFU:
         case EXACTFLU8: folder = PL_fold_latin1; break;
@@ -3627,7 +3648,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
      aho->trie=trie_offset;
      aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
      Copy( trie->states, aho->states, numstates, reg_trie_state );
-    Newxz( q, numstates, U32);
+    Newx( q, numstates, U32);
      aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
      aho->refcount = 1;
      fail = aho->fail;
@@ -3702,10 +3723,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full
   * as possible, even if that means splitting an existing node so that its first
   * part is moved to the preceeding node.  This would maximise the efficiency of
- * memEQ during matching.  Elsewhere in this file, khw proposes splitting
- * EXACTFish nodes into portions that don't change under folding vs those that
- * do.  Those portions that don't change may be the only things in the pattern that
- * could be used to find fixed and floating strings.
+ * memEQ during matching.
   *
   * If a node is to match under /i (folded), the number of characters it matches
   * can be different than its character length if it contains a multi-character
@@ -3713,14 +3731,16 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   * input nodes.
   *
   * And *unfolded_multi_char is set to indicate whether or not the node contains
- * an unfolded multi-char fold.  This happens when whether the fold is valid or
- * not won't be known until runtime; namely for EXACTF nodes that contain LATIN
- * SMALL LETTER SHARP S, as only if the target string being matched against
- * turns out to be UTF-8 is that fold valid; and also for EXACTFL nodes whose
- * folding rules depend on the locale in force at runtime.  (Multi-char folds
- * whose components are all above the Latin1 range are not run-time locale
- * dependent, and have already been folded by the time this function is
- * called.)
+ * an unfolded multi-char fold.  This happens when it won't be known until
+ * runtime whether the fold is valid or not; namely
+ *  1) for EXACTF nodes that contain LATIN SMALL LETTER SHARP S, as only if the
+ *      target string being matched against turns out to be UTF-8 is that fold
+ *      valid; or
+ *  2) for EXACTFL nodes whose folding rules depend on the locale in force at
+ *      runtime.
+ * (Multi-char folds whose components are all above the Latin1 range are not
+ * run-time locale dependent, and have already been folded by the time this
+ * function is called.)
   *
   * This is as good a place as any to discuss the design of handling these
   * multi-character fold sequences.  It's been wrong in Perl for a very long
@@ -3770,7 +3790,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *      described in the next item.
   * 3)   A problem remains for unfolded multi-char folds. (These occur when the
   *      validity of the fold won't be known until runtime, and so must remain
- *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFA
+ *      unfolded for now.  This happens for the sharp s in EXACTF and EXACTFAA
   *      nodes when the pattern isn't in UTF-8.  (Note, BTW, that there cannot
   *      be an EXACTF node with a UTF-8 pattern.)  They also occur for various
   *      folds in EXACTFL nodes, regardless of the UTF-ness of the pattern.)
@@ -3780,28 +3800,28 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *      character in the target string.  (And I do mean character, and not byte
   *      here, unlike other parts of the documentation that have never been
   *      updated to account for multibyte Unicode.)  sharp s in EXACTF and
- *      EXACTFL nodes can match the two character string 'ss'; in EXACTFA nodes
- *      it can match "\x{17F}\x{17F}".  These, along with other ones in EXACTFL
- *      nodes, violate the assumption, and they are the only instances where it
- *      is violated.  I'm reluctant to try to change the assumption, as the
- *      code involved is impenetrable to me (khw), so instead the code here
- *      punts.  This routine examines EXACTFL nodes, and (when the pattern
- *      isn't UTF-8) EXACTF and EXACTFA for such unfolded folds, and returns a
+ *      EXACTFL nodes can match the two character string 'ss'; in EXACTFAA
+ *      nodes it can match "\x{17F}\x{17F}".  These, along with other ones in
+ *      EXACTFL nodes, violate the assumption, and they are the only instances
+ *      where it is violated.  I'm reluctant to try to change the assumption,
+ *      as the code involved is impenetrable to me (khw), so instead the code
+ *      here punts.  This routine examines EXACTFL nodes, and (when the pattern
+ *      isn't UTF-8) EXACTF and EXACTFAA for such unfolded folds, and returns a
   *      boolean indicating whether or not the node contains such a fold.  When
   *      it is true, the caller sets a flag that later causes the optimizer in
   *      this file to not set values for the floating and fixed string lengths,
   *      and thus avoids the optimizer code in regexec.c that makes the invalid
   *      assumption.  Thus, there is no optimization based on string lengths for
   *      EXACTFL nodes that contain these few folds, nor for non-UTF8-pattern
- *      EXACTF and EXACTFA nodes that contain the sharp s.  (The reason the
+ *      EXACTF and EXACTFAA nodes that contain the sharp s.  (The reason the
   *      assumption is wrong only in these cases is that all other non-UTF-8
   *      folds are 1-1; and, for UTF-8 patterns, we pre-fold all other folds to
   *      their expanded versions.  (Again, we can't prefold sharp s to 'ss' in
   *      EXACTF nodes because we don't know at compile time if it actually
   *      matches 'ss' or not.  For EXACTF nodes it will match iff the target
   *      string is in UTF-8.  This is in contrast to EXACTFU nodes, where it
- *      always matches; and EXACTFA where it never does.  In an EXACTFA node in
- *      a UTF-8 pattern, sharp s is folded to "\x{17F}\x{17F}, avoiding the
+ *      always matches; and EXACTFAA where it never does.  In an EXACTFAA node
+ *      in a UTF-8 pattern, sharp s is folded to "\x{17F}\x{17F}, avoiding the
   *      problem; but in a non-UTF8 pattern, folding it to that above-Latin1
   *      string would require the pattern to be forced into UTF-8, the overhead
   *      of which we want to avoid.  Similarly the unfolded multi-char folds in
@@ -3810,9 +3830,9 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *
   *      Similarly, the code that generates tries doesn't currently handle
   *      not-already-folded multi-char folds, and it looks like a pain to change
- *      that.  Therefore, trie generation of EXACTFA nodes with the sharp s
- *      doesn't work.  Instead, such an EXACTFA is turned into a new regnode,
- *      EXACTFA_NO_TRIE, which the trie code knows not to handle.  Most people
+ *      that.  Therefore, trie generation of EXACTFAA nodes with the sharp s
+ *      doesn't work.  Instead, such an EXACTFAA is turned into a new regnode,
+ *      EXACTFAA_NO_TRIE, which the trie code knows not to handle.  Most people
   *      using /iaa matching will be doing so almost entirely with ASCII
   *      strings, so this should rarely be encountered in practice */
  
@@ -3992,10 +4012,10 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                  }
  
                  /* Nodes with 'ss' require special handling, except for
-                 * EXACTFA-ish for which there is no multi-char fold to this */
+                 * EXACTFAA-ish for which there is no multi-char fold to this */
                  if (len == 2 && *s == 's' && *(s+1) == 's'
-                    && OP(scan) != EXACTFA
-                    && OP(scan) != EXACTFA_NO_TRIE)
+                    && OP(scan) != EXACTFAA
+                    && OP(scan) != EXACTFAA_NO_TRIE)
                  {
                      count = 2;
                      if (OP(scan) != EXACTFL) {
@@ -4009,7 +4029,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                      /* Count how many characters are in it.  In the case of
                       * /aa, no folds which contain ASCII code points are
                       * allowed, so check for those, and skip if found. */
-                    if (OP(scan) != EXACTFA && OP(scan) != EXACTFA_NO_TRIE) {
+                    if (OP(scan) != EXACTFAA && OP(scan) != EXACTFAA_NO_TRIE) {
                          count = utf8_length(s, multi_end);
                          s = multi_end;
                      }
@@ -4047,9 +4067,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
              *min_subtract += total_count_delta;
              Safefree(folded);
         }
-       else if (OP(scan) == EXACTFA) {
+       else if (OP(scan) == EXACTFAA) {
  
-            /* Non-UTF-8 pattern, EXACTFA node.  There can't be a multi-char
+            /* Non-UTF-8 pattern, EXACTFAA node.  There can't be a multi-char
               * fold to the ASCII range (and there are no existing ones in the
               * upper latin1 range).  But, as outlined in the comments preceding
               * this function, we need to flag any occurrences of the sharp s.
@@ -4060,7 +4080,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                                        || UNICODE_DOT_DOT_VERSION > 0)
             while (s < s_end) {
                  if (*s == LATIN_SMALL_LETTER_SHARP_S) {
-                    OP(scan) = EXACTFA_NO_TRIE;
+                    OP(scan) = EXACTFAA_NO_TRIE;
                      *unfolded_multi_char = TRUE;
                      break;
                  }
@@ -4069,7 +4089,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
          }
         else {
  
-            /* Non-UTF-8 pattern, not EXACTFA node.  Look for the multi-char
+            /* Non-UTF-8 pattern, not EXACTFAA node.  Look for the multi-char
               * folds that are all Latin1.  As explained in the comments
               * preceding this function, we look also for the sharp s in EXACTF
               * and EXACTFL nodes; it can be in the final position.  Otherwise
@@ -4152,7 +4172,7 @@ S_unwind_scan_frames(pTHX_ const void *p)
      } while (f);
  }
  
-
+/* the return from this sub is the minimum length that could possibly match */
  STATIC SSize_t
  S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          SSize_t *minlenp, SSize_t *deltap,
@@ -4188,6 +4208,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      PERL_ARGS_ASSERT_STUDY_CHUNK;
      RExC_study_started= 1;
  
+    Zero(&data_fake, 1, scan_data_t);
  
      if ( depth == 0 ) {
          while (first_non_open && OP(first_non_open) == OPEN)
@@ -4295,6 +4316,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
              /* we suppose the run is continuous, last=next...
               * NOTE we dont use the return here! */
+            /* DEFINEP study_chunk() recursion */
              (void)study_chunk(pRExC_state, &scan, &minlen,
                                &deltanext, next, &data_fake, stopparen,
                                recursed_depth, NULL, f, depth+1);
@@ -4362,6 +4384,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         f |= SCF_WHILEM_VISITED_POS;
  
                     /* we suppose the run is continuous, last=next...*/
+                    /* recurse study_chunk() for each BRANCH in an alternation */
                     minnext = study_chunk(pRExC_state, &scan, minlenp,
                                        &deltanext, next, &data_fake, stopparen,
                                        recursed_depth, NULL, f,depth+1);
@@ -4566,7 +4589,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  EXACT           | EXACT
                                  EXACTFU         | EXACTFU
                                  EXACTFU_SS      | EXACTFU
-                                EXACTFA         | EXACTFA
+                                EXACTFAA         | EXACTFAA
                                  EXACTL          | EXACTL
                                  EXACTFLU8       | EXACTFLU8
  
@@ -4578,8 +4601,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                           ? EXACT                                            \
                           : ( EXACTFU == (X) || EXACTFU_SS == (X) )          \
                             ? EXACTFU                                        \
-                           : ( EXACTFA == (X) )                             \
-                             ? EXACTFA                                      \
+                           : ( EXACTFAA == (X) )                             \
+                             ? EXACTFAA                                      \
                               : ( EXACTL == (X) )                            \
                                 ? EXACTL                                     \
                                 : ( EXACTFLU8 == (X) )                        \
@@ -4901,6 +4924,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
         else if (OP(scan) == EXACT || OP(scan) == EXACTL) {
             SSize_t l = STR_LEN(scan);
             UV uc;
+            assert(l);
             if (UTF) {
                 const U8 * const s = (U8*)STRING(scan);
                 uc = utf8_to_uvchr_buf(s, s + l, NULL);
@@ -5097,6 +5121,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     f &= ~SCF_WHILEM_VISITED_POS;
  
                 /* This will finish on WHILEM, setting scan, or on NULL: */
+                /* recurse study_chunk() on loop bodies */
                 minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
                                    last, data, stopparen, recursed_depth, NULL,
                                    (mincount == 0
@@ -5132,7 +5157,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 }
                 if (!scan)              /* It was not CURLYX, but CURLY. */
                     scan = next;
-               if (!(flags & SCF_TRIE_DOING_RESTUDY)
+               if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
                     /* ? quantifier ok, except for (?{ ... }) */
                     && (next_is_eval || !(mincount == 0 && maxcount == 1))
                     && (minnext == 0) && (deltanext == 0)
@@ -5259,6 +5284,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         }
  #endif
                         /* Optimize again: */
+                        /* recurse study_chunk() on optimised CURLYX => CURLYM */
                         study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                      NULL, stopparen, recursed_depth, NULL, 0,depth+1);
                     }
@@ -5517,6 +5543,27 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                            (regnode_charclass *) scan);
                     break;
  
+                case ANYOFM:
+                  {
+                    SV* cp_list = get_ANYOFM_contents(scan);
+
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_union(data->start_class,
+                                  cp_list,
+                                  FALSE /* don't invert */
+                                  );
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class,
+                                         cp_list,
+                                         FALSE /* don't invert */
+                                         );
+                    }
+
+                    SvREFCNT_dec_NN(cp_list);
+                    break;
+                  }
+
                 case NPOSIXL:
                      invert = 1;
                      /* FALLTHROUGH */
@@ -5557,20 +5604,25 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      }
                      break;
  
+                case NASCII:
+                    invert = 1;
+                    /* FALLTHROUGH */
+               case ASCII:
+                    my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
+
+                    /* This can be handled as a Posix class */
+                    goto join_posix_and_ascii;
+
                  case NPOSIXA:   /* For these, we always know the exact set of
                                     what's matched */
                      invert = 1;
                      /* FALLTHROUGH */
                 case POSIXA:
-                    if (FLAGS(scan) == _CC_ASCII) {
-                        my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
-                    }
-                    else {
-                        _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
-                                              PL_XPosix_ptrs[_CC_ASCII],
-                                              &my_invlist);
-                    }
-                    goto join_posix;
+                    assert(FLAGS(scan) != _CC_ASCII);
+                    _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
+                                          PL_XPosix_ptrs[_CC_ASCII],
+                                          &my_invlist);
+                    goto join_posix_and_ascii;
  
                 case NPOSIXD:
                 case NPOSIXU:
@@ -5590,7 +5642,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                            &my_invlist);
                      }
  
-                  join_posix:
+                  join_posix_and_ascii:
  
                      if (flags & SCF_DO_STCLASS_AND) {
                          ssc_intersection(data->start_class, my_invlist, invert);
@@ -5649,6 +5701,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      f |= SCF_WHILEM_VISITED_POS;
                  next = regnext(scan);
                  nscan = NEXTOPER(NEXTOPER(scan));
+
+                /* recurse study_chunk() for lookahead body */
                  minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                        last, &data_fake, stopparen,
                                        recursed_depth, NULL, f, depth+1);
@@ -5739,6 +5793,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  next = regnext(scan);
                  nscan = NEXTOPER(NEXTOPER(scan));
  
+                /* positive lookahead study_chunk() recursion */
                  *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                          &deltanext, last, &data_fake,
                                          stopparen, recursed_depth, NULL,
@@ -5767,29 +5822,29 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                          data->flags |= SF_HAS_EVAL;
                      data->whilem_c = data_fake.whilem_c;
                      if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
+                        int i;
                          if (RExC_rx->minlen<*minnextp)
                              RExC_rx->minlen=*minnextp;
                          scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
                          SvREFCNT_dec_NN(data_fake.last_found);
  
-                        if (data_fake.substrs[0].minlenp != minlenp) {
-                            data->substrs[0].min_offset = data_fake.substrs[0].min_offset;
-                            data->substrs[0].max_offset = data_fake.substrs[0].max_offset;
-                            data->substrs[0].minlenp = data_fake.substrs[0].minlenp;
-                            data->substrs[0].lookbehind += scan->flags;
-                        }
-
-                        if (data_fake.substrs[1].minlenp != minlenp) {
-                            data->substrs[1].minlenp = data_fake.substrs[1].minlenp;
-                            data->substrs[1].min_offset = data_fake.substrs[1].min_offset;
-                            data->substrs[1].max_offset = data_fake.substrs[1].max_offset;
-                            data->substrs[1].lookbehind += scan->flags;
+                        for (i = 0; i < 2; i++) {
+                            if (data_fake.substrs[i].minlenp != minlenp) {
+                                data->substrs[i].min_offset =
+                                            data_fake.substrs[i].min_offset;
+                                data->substrs[i].max_offset =
+                                            data_fake.substrs[i].max_offset;
+                                data->substrs[i].minlenp =
+                                            data_fake.substrs[i].minlenp;
+                                data->substrs[i].lookbehind += scan->flags;
+                            }
                          }
                      }
                  }
             }
  #endif
         }
+
         else if (OP(scan) == OPEN) {
             if (stopparen != (I32)ARG(scan))
                 pars++;
@@ -5900,6 +5955,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                          /* We go from the jump point to the branch that follows
                             it. Note this means we need the vestigal unused
                             branches even though they arent otherwise used. */
+                        /* optimise study_chunk() for TRIE */
                          minnext = study_chunk(pRExC_state, &scan, minlenp,
                              &deltanext, (regnode *)nextbranch, &data_fake,
                              stopparen, recursed_depth, NULL, f,depth+1);
@@ -5940,8 +5996,12 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      data->cur_is_floating = 1; /* float */
              }
              min += min1;
-            if (delta != SSize_t_MAX)
-                delta += max1 - min1;
+            if (delta != SSize_t_MAX) {
+                if (SSize_t_MAX - (max1 - min1) >= delta)
+                    delta += max1 - min1;
+                else
+                    delta = SSize_t_MAX;
+            }
              if (flags & SCF_DO_STCLASS_OR) {
                  ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
                  if (min1) {
@@ -6915,7 +6975,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (   ! dump_len_string
              || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
          {
-            PL_dump_re_max_len = 0;
+            PL_dump_re_max_len = 60;    /* A reasonable default */
          }
  #endif
      }
@@ -7032,6 +7092,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_seen_unfolded_sharp_s = 0;
      RExC_contains_locale = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
+    RExC_in_script_run = 0;
      RExC_study_started = 0;
      pRExC_state->runtime_code_qr = NULL;
      RExC_frame_head= NULL;
@@ -7044,7 +7105,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      });
      DEBUG_COMPILE_r({
              SV *dsv= sv_newmortal();
-            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, 60);
+            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, PL_dump_re_max_len);
              Perl_re_printf( aTHX_  "%sCompiling REx%s %s\n",
                            PL_colors[4],PL_colors[5],s);
          });
@@ -7170,14 +7231,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          at least some part of the pattern, and therefore must convert the whole
          thing.
          -- dmq */
-        if (flags & RESTART_PASS1) {
+        if (MUST_RESTART(flags)) {
              if (flags & NEED_UTF8) {
                  S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
                  pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
+                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo pass 1 after upgrade\n"));
              }
              else {
-                DEBUG_PARSE_r(Perl_re_printf( aTHX_
-                "Need to redo pass 1\n"));
+                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo pass 1\n"));
              }
  
              goto redo_first_pass;
@@ -7271,8 +7332,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          /* make sure PL_bitcount bounds not exceeded */
          assert(sizeof(STD_PAT_MODS) <= 8);
  
-        Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
-       r->xpv_len_u.xpvlenu_pv = p;
+        p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
+        SvPOK_on(rx);
         if (RExC_utf8)
             SvFLAGS(rx) |= SVf_UTF8;
          *p++='('; *p++='?';
@@ -7381,7 +7442,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         3-units-long substrs field. */
      Newx(r->substrs, 1, struct reg_substr_data);
      if (RExC_recurse_count) {
-        Newxz(RExC_recurse,RExC_recurse_count,regnode *);
+        Newx(RExC_recurse,RExC_recurse_count,regnode *);
          SAVEFREEPV(RExC_recurse);
      }
  
@@ -7432,12 +7493,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /*  Only one top-level choice.
                                                    */
         SSize_t fake;
-       STRLEN longest_float_length, longest_fixed_length;
+       STRLEN longest_length[2];
         regnode_ssc ch_class; /* pointed to by data */
         int stclass_flag;
         SSize_t last_close = 0; /* pointed to by data */
          regnode *first= scan;
          regnode *first_next= regnext(first);
+        int i;
+
         /*
          * Skip introductions and multiplicators >= 1
          * so that we can extract the 'meat' of the pattern that must
@@ -7578,6 +7641,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         data.last_closep = &last_close;
  
          DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/PATTERN/
+         * (NO top level branches)
+         */
         minlen = study_chunk(pRExC_state, &first, &minlen, &fake,
                               scan + RExC_size, /* Up to end */
              &data, -1, 0, NULL,
@@ -7599,54 +7666,41 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          }
         scan_commit(pRExC_state, &data,&minlen,0);
  
-       longest_float_length = CHR_SVLEN(data.substrs[1].str);
-
-        if (! ((SvCUR(data.substrs[0].str)  /* ok to leave SvCUR */
-                   && data.substrs[0].min_offset == data.substrs[1].min_offset
-                   && SvCUR(data.substrs[0].str) == SvCUR(data.substrs[1].str)))
-            && S_setup_longest (aTHX_ pRExC_state,
-                                    &(r->substrs->data[1]),
-                                    &(data.substrs[1]),
-                                    longest_float_length))
-        {
-           r->substrs->data[1].min_offset =
-                    data.substrs[1].min_offset - data.substrs[1].lookbehind;
  
-           r->substrs->data[1].max_offset = data.substrs[1].max_offset;
-           if (data.substrs[1].max_offset < SSize_t_MAX) /* Don't offset infinity */
-               r->substrs->data[1].max_offset -= data.substrs[1].lookbehind;
+        /* XXX this is done in reverse order because that's the way the
+         * code was before it was parameterised. Don't know whether it
+         * actually needs doing in reverse order. DAPM */
+        for (i = 1; i >= 0; i--) {
+            longest_length[i] = CHR_SVLEN(data.substrs[i].str);
  
-           SvREFCNT_inc_simple_void_NN(data.substrs[1].str);
-       }
-       else {
-           r->substrs->data[1].substr      = NULL;
-            r->substrs->data[1].utf8_substr = NULL;
-           longest_float_length = 0;
-       }
-
-       longest_fixed_length = CHR_SVLEN(data.substrs[0].str);
-
-        if (S_setup_longest (aTHX_ pRExC_state,
-                                &(r->substrs->data[0]),
-                                &(data.substrs[0]),
-                                longest_fixed_length))
-        {
-           r->substrs->data[0].min_offset =
-                    data.substrs[0].min_offset - data.substrs[0].lookbehind;
-            /* XXX this calc isn't necessary for anchored, but is done
-             * for consistency with float code path */
-           r->substrs->data[0].max_offset = data.substrs[0].max_offset;
-
-           if (data.substrs[0].max_offset < SSize_t_MAX) /* Don't offset infinity */
-               r->substrs->data[0].max_offset -= data.substrs[0].lookbehind;
+            if (   !(   i
+                     && SvCUR(data.substrs[0].str)  /* ok to leave SvCUR */
+                     &&    data.substrs[0].min_offset
+                        == data.substrs[1].min_offset
+                     &&    SvCUR(data.substrs[0].str)
+                        == SvCUR(data.substrs[1].str)
+                    )
+                && S_setup_longest (aTHX_ pRExC_state,
+                                        &(r->substrs->data[i]),
+                                        &(data.substrs[i]),
+                                        longest_length[i]))
+            {
+                r->substrs->data[i].min_offset =
+                        data.substrs[i].min_offset - data.substrs[i].lookbehind;
+
+                r->substrs->data[i].max_offset = data.substrs[i].max_offset;
+                /* Don't offset infinity */
+                if (data.substrs[i].max_offset < SSize_t_MAX)
+                    r->substrs->data[i].max_offset -= data.substrs[i].lookbehind;
+                SvREFCNT_inc_simple_void_NN(data.substrs[i].str);
+            }
+            else {
+                r->substrs->data[i].substr      = NULL;
+                r->substrs->data[i].utf8_substr = NULL;
+                longest_length[i] = 0;
+            }
+        }
  
-           SvREFCNT_inc_simple_void_NN(data.substrs[0].str);
-       }
-       else {
-            r->substrs->data[0].substr      = NULL;
-            r->substrs->data[0].utf8_substr = NULL;
-           longest_fixed_length = 0;
-       }
         LEAVE_with_name("study_chunk");
  
         if (ri->regstclass
@@ -7677,26 +7731,17 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              data.start_class = NULL;
         }
  
-        /* A temporary algorithm prefers floated substr to fixed one to dig
-         * more info. */
-       if (longest_fixed_length > longest_float_length) {
-           r->substrs->check_ix = 0;
-           r->check_end_shift  = r->substrs->data[0].end_shift;
-           r->check_substr     = r->substrs->data[0].substr;
-           r->check_utf8       = r->substrs->data[0].utf8_substr;
-           r->check_offset_min = r->substrs->data[0].min_offset;
-           r->check_offset_max = r->substrs->data[0].max_offset;
-            if (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS))
-                r->intflags |= PREGf_NOSCAN;
-       }
-       else {
-           r->substrs->check_ix = 1;
-           r->check_end_shift  = r->substrs->data[1].end_shift;
-           r->check_substr     = r->substrs->data[1].substr;
-           r->check_utf8       = r->substrs->data[1].utf8_substr;
-           r->check_offset_min = r->substrs->data[1].min_offset;
-           r->check_offset_max = r->substrs->data[1].max_offset;
-       }
+        /* A temporary algorithm prefers floated substr to fixed one of
+         * same length to dig more info. */
+       i = (longest_length[0] <= longest_length[1]);
+        r->substrs->check_ix = i;
+        r->check_end_shift  = r->substrs->data[i].end_shift;
+        r->check_substr     = r->substrs->data[i].substr;
+        r->check_utf8       = r->substrs->data[i].utf8_substr;
+        r->check_offset_min = r->substrs->data[i].min_offset;
+        r->check_offset_max = r->substrs->data[i].max_offset;
+        if (!i && (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS)))
+            r->intflags |= PREGf_NOSCAN;
  
         if ((r->check_substr || r->check_utf8) ) {
             r->extflags |= RXf_USE_INTUIT;
@@ -7705,10 +7750,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         }
  
         /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
-       if ( (STRLEN)minlen < longest_float_length )
-            minlen= longest_float_length;
-        if ( (STRLEN)minlen < longest_fixed_length )
-            minlen= longest_fixed_length;
+       if ( (STRLEN)minlen < longest_length[1] )
+            minlen= longest_length[1];
+        if ( (STRLEN)minlen < longest_length[0] )
+            minlen= longest_length[0];
          */
      }
      else {
@@ -7725,6 +7770,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         data.last_closep = &last_close;
  
          DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../
+         * (patterns WITH top level branches)
+         */
         minlen = study_chunk(pRExC_state,
              &scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL,
              SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
@@ -7783,7 +7832,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      if (RExC_seen & REG_RECURSE_SEEN ) {
          r->intflags |= PREGf_RECURSE_SEEN;
-        Newxz(r->recurse_locinput, r->nparens + 1, char *);
+        Newx(r->recurse_locinput, r->nparens + 1, char *);
      }
      if (RExC_seen & REG_GPOS_SEEN)
          r->intflags |= PREGf_GPOS_SEEN;
@@ -10250,8 +10299,8 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
              /* Some characters match above-Latin1 ones under /i.  This
               * is true of EXACTFL ones when the locale is UTF-8 */
              if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
-                && (! isASCII(uc) || (OP(node) != EXACTFA
-                                    && OP(node) != EXACTFA_NO_TRIE)))
+                && (! isASCII(uc) || (OP(node) != EXACTFAA
+                                    && OP(node) != EXACTFAA_NO_TRIE)))
              {
                  add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
              }
@@ -10331,7 +10380,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
                      c = SvUV(*c_p);
  
                      /* /aa doesn't allow folds between ASCII and non- */
-                    if ((OP(node) == EXACTFA || OP(node) == EXACTFA_NO_TRIE)
+                    if ((OP(node) == EXACTFAA || OP(node) == EXACTFAA_NO_TRIE)
                          && isASCII(c) != isASCII(uc))
                      {
                          continue;
@@ -10640,7 +10689,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
       * RExC_parse beyond the '('.  Things like '(?' are indivisible tokens, and
       * this flag alerts us to the need to check for that */
  {
-    regnode *ret;              /* Will be the head of the group. */
+    regnode *ret = NULL;    /* Will be the head of the group. */
      regnode *br;
      regnode *lastbr;
      regnode *ender = NULL;
@@ -10678,47 +10727,77 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
           * here (if paren ==2).  The forms '(*VERB' and '(?...' disallow such
           * intervening space, as the sequence is a token, and a token should be
           * indivisible */
-        bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
+        bool has_intervening_patws = (paren == 2)
+                                  && *(RExC_parse - 1) != '(';
  
          if (RExC_parse >= RExC_end) {
             vFAIL("Unmatched (");
          }
  
-        if ( *RExC_parse == '*') { /* (*VERB:ARG) */
+        if ( *RExC_parse == '*') { /* (*VERB:ARG), (*construct:...) */
             char *start_verb = RExC_parse + 1;
             STRLEN verb_len;
             char *start_arg = NULL;
             unsigned char op = 0;
              int arg_required = 0;
              int internal_argval = -1; /* if >-1 we are not allowed an argument*/
+            bool has_upper = FALSE;
  
              if (has_intervening_patws) {
                  RExC_parse++;   /* past the '*' */
-                vFAIL("In '(*VERB...)', the '(' and '*' must be adjacent");
+
+                /* For strict backwards compatibility, don't change the message
+                 * now that we also have lowercase operands */
+                if (isUPPER(*RExC_parse)) {
+                    vFAIL("In '(*VERB...)', the '(' and '*' must be adjacent");
+                }
+                else {
+                    vFAIL("In '(*...)', the '(' and '*' must be adjacent");
+                }
              }
             while (RExC_parse < RExC_end && *RExC_parse != ')' ) {
                 if ( *RExC_parse == ':' ) {
                     start_arg = RExC_parse + 1;
                     break;
                 }
-               RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                else if (! UTF) {
+                    if (isUPPER(*RExC_parse)) {
+                        has_upper = TRUE;
+                    }
+                    RExC_parse++;
+                }
+                else {
+                    RExC_parse += UTF8SKIP(RExC_parse);
+                }
             }
             verb_len = RExC_parse - start_verb;
             if ( start_arg ) {
                  if (RExC_parse >= RExC_end) {
                      goto unterminated_verb_pattern;
                  }
+
                 RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-               while ( RExC_parse < RExC_end && *RExC_parse != ')' )
+               while ( RExC_parse < RExC_end && *RExC_parse != ')' ) {
                      RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-               if ( RExC_parse >= RExC_end || *RExC_parse != ')' )
+                }
+               if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
                    unterminated_verb_pattern:
-                   vFAIL("Unterminated verb pattern argument");
-               if ( RExC_parse == start_arg )
-                   start_arg = NULL;
+                    if (has_upper) {
+                        vFAIL("Unterminated verb pattern argument");
+                    }
+                    else {
+                        vFAIL("Unterminated '(*...' argument");
+                    }
+                }
             } else {
-               if ( RExC_parse >= RExC_end || *RExC_parse != ')' )
-                   vFAIL("Unterminated verb pattern");
+               if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
+                    if (has_upper) {
+                        vFAIL("Unterminated verb pattern");
+                    }
+                    else {
+                        vFAIL("Unterminated '(*...' construct");
+                    }
+                }
             }
  
              /* Here, we know that RExC_parse < RExC_end */
@@ -10761,13 +10840,131 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      RExC_seen |= REG_CUTGROUP_SEEN;
                  }
                  break;
-           }
+            case 'a':
+                if (memEQs(start_verb, verb_len, "atomic")) {
+                    paren = 't';    /* AtOMIC */
+                    goto alpha_assertions;
+                }
+                break;
+            case 'p':
+                if (   memEQs(start_verb, verb_len, "plb")
+                    || memEQs(start_verb, verb_len, "positive_lookbehind"))
+                {
+                    paren = 'b';
+                    goto lookbehind_alpha_assertions;
+                }
+                else if (   memEQs(start_verb, verb_len, "pla")
+                         || memEQs(start_verb, verb_len, "positive_lookahead"))
+                {
+                    paren = 'a';
+                    goto alpha_assertions;
+                }
+                break;
+            case 'n':
+                if (   memEQs(start_verb, verb_len, "nlb")
+                    || memEQs(start_verb, verb_len, "negative_lookbehind"))
+                {
+                    paren = 'B';
+                    goto lookbehind_alpha_assertions;
+                }
+                else if (   memEQs(start_verb, verb_len, "nla")
+                         || memEQs(start_verb, verb_len, "negative_lookahead"))
+                {
+                    paren = 'A';
+                    goto alpha_assertions;
+                }
+                break;
+            case 's':
+                if (   memEQs(start_verb, verb_len, "sr")
+                    || memEQs(start_verb, verb_len, "script_run"))
+                {
+                    paren = 's';
+
+                    /* This indicates Unicode rules. */
+                    REQUIRE_UNI_RULES(flagp, NULL);
+
+                    if (! start_arg) {
+                        goto no_colon;
+                    }
+
+                    RExC_parse = start_arg;
+
+                    if (PASS2) {
+                        Perl_ck_warner_d(aTHX_
+                            packWARN(WARN_EXPERIMENTAL__SCRIPT_RUN),
+                            "The script_run feature is experimental"
+                            REPORT_LOCATION, REPORT_LOCATION_ARGS(RExC_parse));
+
+                    }
+
+                    if (RExC_in_script_run) {
+                        paren = ':';
+                        ret = NULL;
+                        goto parse_rest;
+                    }
+                    RExC_in_script_run = 1;
+
+                    ret = reg_node(pRExC_state, SROPEN);
+
+                    is_open = 1;
+                    goto parse_rest;
+                }
+
+                break;
+
+            lookbehind_alpha_assertions:
+                RExC_seen |= REG_LOOKBEHIND_SEEN;
+                RExC_in_lookbehind++;
+                /*FALLTHROUGH*/
+
+            alpha_assertions:
+
+                if (PASS2) {
+                    Perl_ck_warner_d(aTHX_
+                        packWARN(WARN_EXPERIMENTAL__ALPHA_ASSERTIONS),
+                        "The alpha_assertions feature is experimental"
+                        REPORT_LOCATION, REPORT_LOCATION_ARGS(RExC_parse));
+                }
+
+                RExC_seen_zerolen++;
+
+                if (! start_arg) {
+                    goto no_colon;
+                }
+
+                /* An empty negative lookahead assertion simply is failure */
+                if (paren == 'A' && RExC_parse == start_arg) {
+                    ret=reganode(pRExC_state, OPFAIL, 0);
+                    nextchar(pRExC_state);
+                    return ret;
+               }
+
+                RExC_parse = start_arg;
+                goto parse_rest;
+
+              no_colon:
+                vFAIL2utf8f(
+                "'(*%" UTF8f "' requires a terminating ':'",
+                UTF8fARG(UTF, verb_len, start_verb));
+               NOT_REACHED; /*NOTREACHED*/
+
+           } /* End of switch */
             if ( ! op ) {
                 RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-                vFAIL2utf8f(
+                if (has_upper || verb_len == 0) {
+                    vFAIL2utf8f(
                      "Unknown verb pattern '%" UTF8f "'",
                      UTF8fARG(UTF, verb_len, start_verb));
+                }
+                else {
+                    vFAIL2utf8f(
+                    "Unknown '(*...)' construct '%" UTF8f "'",
+                    UTF8fARG(UTF, verb_len, start_verb));
+                }
             }
+            if ( RExC_parse == start_arg ) {
+                start_arg = NULL;
+            }
              if ( arg_required && !start_arg ) {
                  vFAIL3("Verb pattern '%.*s' has a mandatory argument",
                      verb_len, start_verb);
@@ -10927,6 +11124,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     paren = 1;
                     goto capturing_parens;
                 }
+
                  RExC_seen |= REG_LOOKBEHIND_SEEN;
                 RExC_in_lookbehind++;
                 RExC_parse++;
@@ -11157,32 +11355,51 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             {
                 int is_define= 0;
                  const int DEFINE_len = sizeof("DEFINE") - 1;
-               if (RExC_parse[0] == '?') {        /* (?(?...)) */
-                    if (   RExC_parse < RExC_end - 1
-                        && (   RExC_parse[1] == '='
-                            || RExC_parse[1] == '!'
-                            || RExC_parse[1] == '<'
-                            || RExC_parse[1] == '{')
-                    ) { /* Lookahead or eval. */
-                       I32 flag;
-                        regnode *tail;
-
-                       ret = reg_node(pRExC_state, LOGICAL);
-                       if (!SIZE_ONLY)
-                           ret->flags = 1;
-
-                        tail = reg(pRExC_state, 1, &flag, depth+1);
-                        if (flag & (RESTART_PASS1|NEED_UTF8)) {
-                            *flagp = flag & (RESTART_PASS1|NEED_UTF8);
-                            return NULL;
-                        }
-                        REGTAIL(pRExC_state, ret, tail);
-                       goto insert_if;
-                   }
-                   /* Fall through to ‘Unknown switch condition’ at the
-                      end of the if/else chain. */
-               }
-               else if ( RExC_parse[0] == '<'     /* (?(<NAME>)...) */
+               if (    RExC_parse < RExC_end - 1
+                    && (   (       RExC_parse[0] == '?'        /* (?(?...)) */
+                            && (   RExC_parse[1] == '='
+                                || RExC_parse[1] == '!'
+                                || RExC_parse[1] == '<'
+                                || RExC_parse[1] == '{'))
+                       || (       RExC_parse[0] == '*'        /* (?(*...)) */
+                            && (   memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "pla:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "plb:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "nla:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "nlb:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "positive_lookahead:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "positive_lookbehind:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "negative_lookahead:")
+                                || memBEGINs(RExC_parse + 1,
+                                         (Size_t) (RExC_end - (RExC_parse + 1)),
+                                         "negative_lookbehind:"))))
+                ) { /* Lookahead or eval. */
+                    I32 flag;
+                    regnode *tail;
+
+                    ret = reg_node(pRExC_state, LOGICAL);
+                    if (!SIZE_ONLY)
+                        ret->flags = 1;
+
+                    tail = reg(pRExC_state, 1, &flag, depth+1);
+                    RETURN_NULL_ON_RESTART(flag,flagp);
+                    REGTAIL(pRExC_state, ret, tail);
+                    goto insert_if;
+                }
+               else if (   RExC_parse[0] == '<'     /* (?(<NAME>)...) */
                          || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
                 {
                     char ch = RExC_parse[0] == '<' ? '>' : '\'';
@@ -11206,8 +11423,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      ret = reganode(pRExC_state,NGROUPP,num);
                      goto insert_if_check_paren;
                 }
-               else if (RExC_end - RExC_parse >= DEFINE_len
-                        && strnEQ(RExC_parse, "DEFINE", DEFINE_len))
+               else if (memBEGINs(RExC_parse,
+                                   (STRLEN) (RExC_end - RExC_parse),
+                                   "DEFINE"))
                  {
                     ret = reganode(pRExC_state,DEFINEP,0);
                     RExC_parse += DEFINE_len;
@@ -11283,10 +11501,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
                      br = regbranch(pRExC_state, &flags, 1,depth+1);
                     if (br == NULL) {
-                        if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                            *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                            return NULL;
-                        }
+                        RETURN_NULL_ON_RESTART(flags,flagp);
                          FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                (UV) flags);
                      } else
@@ -11304,10 +11519,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                          lastbr = reganode(pRExC_state, IFTHEN, 0);
  
                          if (!regbranch(pRExC_state, &flags, 1,depth+1)) {
-                            if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                                return NULL;
-                            }
+                            RETURN_NULL_ON_RESTART(flags,flagp);
                              FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                    (UV) flags);
                          }
@@ -11342,7 +11554,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  vFAIL("Unknown switch condition (?(...))");
             }
             case '[':           /* (?[ ... ]) */
-                return handle_regex_sets(pRExC_state, NULL, flagp, depth,
+                return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
                                           oregcomp_parse);
              case 0: /* A NUL */
                 RExC_parse--; /* for vFAIL to print correctly */
@@ -11364,7 +11576,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  goto parse_rest;
              } /* end switch */
         }
-       else if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
+       else {
+            if (*RExC_parse == '{' && PASS2) {
+                ckWARNregdep(RExC_parse + 1,
+                            "Unescaped left brace in regex is "
+                            "deprecated here (and will be fatal "
+                            "in Perl 5.32), passed through");
+            }
+            /* Not bothering to indent here, as the above 'else' is temporary
+             * */
+        if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
           capturing_parens:
             parno = RExC_npar;
             RExC_npar++;
@@ -11390,6 +11611,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              paren = ':';
             ret = NULL;
         }
+        }
      }
      else                        /* ! paren */
         ret = NULL;
@@ -11402,10 +11624,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
      /*     branch_len = (paren != 0); */
  
      if (br == NULL) {
-        if (flags & (RESTART_PASS1|NEED_UTF8)) {
-            *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-            return NULL;
-        }
+        RETURN_NULL_ON_RESTART(flags,flagp);
          FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
      }
      if (*RExC_parse == '|') {
@@ -11449,10 +11668,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
          br = regbranch(pRExC_state, &flags, 0, depth+1);
  
         if (br == NULL) {
-            if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                return NULL;
-            }
+            RETURN_NULL_ON_RESTART(flags,flagp);
              FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
          }
          REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
@@ -11479,12 +11695,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              Set_Node_Offset(ender,RExC_parse+1); /* MJD */
              Set_Node_Length(ender,1); /* MJD */
             break;
+       case 's':
+           ender = reg_node(pRExC_state, SRCLOSE);
+            RExC_in_script_run = 0;
+           break;
         case '<':
+        case 'a':
+        case 'A':
+        case 'b':
+        case 'B':
         case ',':
         case '=':
         case '!':
             *flagp &= ~HASWIDTH;
             /* FALLTHROUGH */
+        case 't':   /* aTomic */
         case '>':
             ender = reg_node(pRExC_state, SUCCEED);
             break;
@@ -11570,14 +11795,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
  
      {
          const char *p;
-        static const char parens[] = "=!<,>";
+         /* Even/odd or x=don't care: 010101x10x */
+        static const char parens[] = "=!aA<,>Bbt";
+         /* flag below is set to 0 up through 'A'; 1 for larger */
  
         if (paren && (p = strchr(parens, paren))) {
             U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
-           int flag = (p - parens) > 1;
+           int flag = (p - parens) > 3;
  
-           if (paren == '>')
+           if (paren == '>' || paren == 't') {
                 node = SUSPEND, flag = 0;
+            }
             reginsert(pRExC_state, node,ret, depth+1);
              Set_Node_Cur_Length(ret, parse_start);
             Set_Node_Offset(ret, parse_start + 1);
@@ -11663,10 +11891,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         if (latest == NULL) {
             if (flags & TRYAGAIN)
                 continue;
-            if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                return NULL;
-            }
+            RETURN_NULL_ON_RESTART(flags,flagp);
              FAIL2("panic: regpiece returned NULL, flags=%#" UVxf, (UV) flags);
         }
         else if (ret == NULL)
@@ -11736,11 +11961,8 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
      ret = regatom(pRExC_state, &flags,depth+1);
      if (ret == NULL) {
-       if (flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8))
-           *flagp |= flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8);
-        else
-            FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
-       return(NULL);
+        RETURN_NULL_ON_RESTART_OR_FLAGS(flags,flagp,TRYAGAIN);
+        FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
      }
  
      op = *RExC_parse;
@@ -11811,14 +12033,12 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             if ((flags&SIMPLE)) {
                  if (min == 0 && max == REG_INFTY) {
                      reginsert(pRExC_state, STAR, ret, depth+1);
-                    ret->flags = 0;
                      MARK_NAUGHTY(4);
                      RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                      goto nest_check;
                  }
                  if (min == 1 && max == REG_INFTY) {
                      reginsert(pRExC_state, PLUS, ret, depth+1);
-                    ret->flags = 0;
                      MARK_NAUGHTY(3);
                      RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                      goto nest_check;
@@ -11931,7 +12151,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          ender = reg_node(pRExC_state, SUCCEED);
          REGTAIL(pRExC_state, ret, ender);
          reginsert(pRExC_state, SUSPEND, ret, depth+1);
-        ret->flags = 0;
          ender = reg_node(pRExC_state, TAIL);
          REGTAIL(pRExC_state, ret, ender);
      }
@@ -12086,14 +12305,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      RExC_parse++;      /* Skip past the '{' */
  
-    endbrace = strchr(RExC_parse, '}');
+    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
      if (! endbrace) { /* no trailing brace */
          vFAIL2("Missing right brace on \\%c{}", 'N');
      }
-    else if(!(endbrace == RExC_parse           /* nothing between the {} */
-              || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
-                  && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
-                                                       error msg) */
+    else if (!(   endbrace == RExC_parse       /* nothing between the {} */
+               || memBEGINs(RExC_parse,   /* U+ (bad hex is checked below
+                                                   for a  better error msg) */
+                                  (STRLEN) (RExC_end - RExC_parse),
+                                 "U+")))
      {
         RExC_parse = endbrace;  /* position msg's '<--HERE' */
         vFAIL("\\N{NAME} must be resolved by the lexer");
@@ -12141,22 +12361,22 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          /* Convert code point from hex */
         length_of_hex = (STRLEN)(endchar - RExC_parse);
         grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
-                           | PERL_SCAN_DISALLOW_PREFIX
+                       | PERL_SCAN_DISALLOW_PREFIX
  
-                             /* No errors in the first pass (See [perl
-                              * #122671].)  We let the code below find the
-                              * errors when there are multiple chars. */
-                           | ((SIZE_ONLY)
-                              ? PERL_SCAN_SILENT_ILLDIGIT
-                              : 0);
+                           /* No errors in the first pass (See [perl
+                            * #122671].)  We let the code below find the
+                            * errors when there are multiple chars. */
+                       | ((SIZE_ONLY)
+                          ? PERL_SCAN_SILENT_ILLDIGIT
+                          : 0);
  
          /* This routine is the one place where both single- and double-quotish
           * \N{U+xxxx} are evaluated.  The value is a Unicode code point which
           * must be converted to native. */
         *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
-                                         &length_of_hex,
-                                         &grok_hex_flags,
-                                         NULL));
+                                               &length_of_hex,
+                                               &grok_hex_flags,
+                                               NULL));
  
         /* The tokenizer should have guaranteed validity, but it's possible to
           * bypass it by using single quoting, so check.  Don't do the check
@@ -12197,7 +12417,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          }
  
          /* Fail if caller doesn't want to handle a multi-code-point sequence.
-         * But don't backup up the pointer if the caller want to know how many
+         * But don't backup up the pointer if the caller wants to know how many
           * code points there are (they can then handle things) */
          if (! node_p) {
              if (! cp_count) {
@@ -12228,14 +12448,16 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         }
          sv_catpv(substitute_parse, ")");
  
-        RExC_parse = RExC_start = RExC_adjusted_start = SvPV(substitute_parse,
-                                                             len);
+        len = SvCUR(substitute_parse);
  
         /* Don't allow empty number */
         if (len < (STRLEN) 8) {
              RExC_parse = endbrace;
             vFAIL("Invalid hexadecimal number in \\N{U+...}");
         }
+
+        RExC_parse = RExC_start = RExC_adjusted_start
+                                              = SvPV_nolen(substitute_parse);
         RExC_end = RExC_parse + len;
  
          /* The values are Unicode, and therefore not subject to recoding, but
@@ -12245,17 +12467,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          RExC_recode_x_to_native = 1;
  #endif
  
-        if (node_p) {
-            if (!(*node_p = reg(pRExC_state, 1, &flags, depth+1))) {
-                if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                    return FALSE;
-                }
-                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
-                    (UV) flags);
-            }
-            *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
-        }
+        *node_p = reg(pRExC_state, 1, &flags, depth+1);
  
          /* Restore the saved values */
         RExC_start = RExC_adjusted_start = save_start;
@@ -12264,8 +12476,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  #ifdef EBCDIC
          RExC_recode_x_to_native = 0;
  #endif
-
          SvREFCNT_dec_NN(substitute_parse);
+
+        if (! *node_p) {
+            RETURN_X_ON_RESTART(FALSE, flags,flagp);
+            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
+                (UV) flags);
+        }
+        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+
          nextchar(pRExC_state);
  
          return TRUE;
@@ -12660,8 +12879,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         NULL,
                         NULL);
          if (ret == NULL) {
-            if (*flagp & (RESTART_PASS1|NEED_UTF8))
-                return NULL;
+            RETURN_NULL_ON_RESTART_FLAGP_OR_FLAGS(flagp,NEED_UTF8);
              FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                    (UV) *flagp);
          }
@@ -12685,10 +12903,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     }
                     goto tryagain;
                 }
-                if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                    return NULL;
-                }
+                RETURN_NULL_ON_RESTART(flags,flagp);
                  FAIL2("panic: reg returned NULL to regatom, flags=%#" UVxf,
                                                                   (UV) flags);
         }
@@ -12800,9 +13015,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              else {
                  STRLEN length;
                  char name = *RExC_parse;
-                char * endbrace;
+                char * endbrace = NULL;
                  RExC_parse += 2;
-                endbrace = strchr(RExC_parse, '}');
+                if (RExC_parse < RExC_end) {
+                    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
+                }
  
                  if (! endbrace) {
                      vFAIL2("Missing right brace on \\%c{}", name);
@@ -12823,8 +13040,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  }*/
                  switch (*RExC_parse) {
                      case 'g':
-                        if (length != 1
-                            && (length != 3 || strnNE(RExC_parse + 1, "cb", 2)))
+                        if (    length != 1
+                            && (memNEs(RExC_parse + 1, length - 1, "cb")))
                          {
                              goto bad_bound_type;
                          }
@@ -12971,8 +13188,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             TRUE, /* Allow an optimized regnode result */
                             NULL,
                             NULL);
-            if (*flagp & RESTART_PASS1)
-                return NULL;
+            RETURN_NULL_ON_RESTART_FLAGP(flagp);
              /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
               * multi-char folds are allowed.  */
              if (!ret)
@@ -13011,8 +13227,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  break;
              }
  
-            if (*flagp & RESTART_PASS1)
-                return NULL;
+            RETURN_NULL_ON_RESTART_FLAGP(flagp);
  
              /* Here, evaluates to a single code point.  Go get that */
              RExC_parse = parse_start;
@@ -13193,11 +13408,26 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             UV ender = 0;
             char *p;
             char *s;
-#define MAX_NODE_STRING_SIZE 127
-           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE];
+
+/* This allows us to fill a node with just enough spare so that if the final
+ * character folds, its expansion is guaranteed to fit */
+#define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
+           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE+1];
+
             char *s0;
             U8 upper_parse = MAX_NODE_STRING_SIZE;
-            U8 node_type = compute_EXACTish(pRExC_state);
+
+            /* We start out as an EXACT node, even if under /i, until we find a
+             * character which is in a fold.  The algorithm now segregates into
+             * separate nodes, characters that fold from those that don't under
+             * /i.  (This hopefull will create nodes that are fixed strings
+             * even under /i, giving the optimizer something to grab onto to.)
+             * So, if a node has something in it and the next character is in
+             * the opposite category, that node is closed up, and the function
+             * returns.  Then regatom is called again, and a new node is
+             * created for the new category. */
+            U8 node_type = EXACT;
+
              bool next_is_quantifier;
              char * oldp = NULL;
  
@@ -13211,14 +13441,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
               * which don't participate in folds with Latin1-range characters,
               * as the latter's folds aren't known until runtime.  (We don't
               * need to figure this out until pass 2) */
-            bool maybe_exactfu = PASS2
-                               && (node_type == EXACTF || node_type == EXACTFL);
-
-            /* If a folding node contains only code points that don't
-             * participate in folds, it can be changed into an EXACT node,
-             * which allows the optimizer more things to look for */
-            bool maybe_exact;
+            bool maybe_exactfu = PASS2;
  
+            /* The node_type may change below, but since the size of the node
+             * doesn't change, it works */
             ret = reg_node(pRExC_state, node_type);
  
              /* In pass1, folded, we use a temporary buffer instead of the
@@ -13229,45 +13455,31 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
           reparse:
  
-            /* We look for the EXACTFish to EXACT node optimizaton only if
-             * folding.  (And we don't need to figure this out until pass 2).
-             * XXX It might actually make sense to split the node into portions
-             * that are exact and ones that aren't, so that we could later use
-             * the exact ones to find the longest fixed and floating strings.
-             * One would want to join them back into a larger node.  One could
-             * use a pseudo regnode like 'EXACT_ORIG_FOLD' */
-            maybe_exact = FOLD && PASS2;
-
-           /* XXX The node can hold up to 255 bytes, yet this only goes to
-             * 127.  I (khw) do not know why.  Keeping it somewhat less than
-             * 255 allows us to not have to worry about overflow due to
-             * converting to utf8 and fold expansion, but that value is
-             * 255-UTF8_MAXBYTES_CASE.  join_exact() may join adjacent nodes
-             * split up by this limit into a single one using the real max of
-             * 255.  Even at 127, this breaks under rare circumstances.  If
-             * folding, we do not want to split a node at a character that is a
-             * non-final in a multi-char fold, as an input string could just
-             * happen to want to match across the node boundary.  The join
-             * would solve that problem if the join actually happens.  But a
-             * series of more than two nodes in a row each of 127 would cause
-             * the first join to succeed to get to 254, but then there wouldn't
-             * be room for the next one, which could at be one of those split
-             * multi-char folds.  I don't know of any fool-proof solution.  One
-             * could back off to end with only a code point that isn't such a
-             * non-final, but it is possible for there not to be any in the
-             * entire node. */
-
-            assert(   ! UTF     /* Is at the beginning of a character */
+            /* This breaks under rare circumstances.  If folding, we do not
+             * want to split a node at a character that is a non-final in a
+             * multi-char fold, as an input string could just happen to want to
+             * match across the node boundary.  The code at the end of the loop
+             * looks for this, and backs off until it finds not such a
+             * character, but it is possible (though extremely, extremely
+             * unlikely) for all characters in the node to be non-final fold
+             * ones, in which case we just leave the node fully filled, and
+             * hope that it doesn't match the string in just the wrong place */
+
+            assert( ! UTF     /* Is at the beginning of a character */
                     || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                     || UTF8_IS_START(UCHARAT(RExC_parse)));
  
              /* Here, we have a literal character.  Find the maximal string of
               * them in the input that we can fit into a single EXACTish node.
-             * We quit at the first non-literal or when the node gets full */
-           for (p = RExC_parse;
-                len < upper_parse && p < RExC_end;
-                len++)
-           {
+             * We quit at the first non-literal or when the node gets full, or
+             * under /i the categorization of folding/non-folding character
+             * changes */
+           for (p = RExC_parse; len < upper_parse && p < RExC_end; ) {
+
+                /* In most cases each iteration adds one byte to the output.
+                 * The exceptions override this */
+                Size_t added_len = 1;
+
                 oldp = p;
  
                  /* White space has already been ignored */
@@ -13340,8 +13552,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          ) {
                              if (*flagp & NEED_UTF8)
                                  FAIL("panic: grok_bslash_N set NEED_UTF8");
-                            if (*flagp & RESTART_PASS1)
-                                return NULL;
+                            RETURN_NULL_ON_RESTART_FLAGP(flagp);
  
                              /* Here, it wasn't a single code point.  Go close
                               * up this EXACTish node.  The switch() prior to
@@ -13350,6 +13561,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              goto loopdone;
                          }
                          p = RExC_parse;
+                        RExC_parse = parse_start;
                          if (ender > 0xff) {
                              REQUIRE_UTF8(flagp);
                          }
@@ -13380,6 +13592,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             const char* error_msg;
  
                             bool valid = grok_bslash_o(&p,
+                                                       RExC_end,
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
@@ -13406,6 +13619,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             const char* error_msg;
  
                             bool valid = grok_bslash_x(&p,
+                                                       RExC_end,
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
@@ -13554,8 +13768,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     break;
                 } /* End of switch on the literal */
  
-               /* Here, have looked at the literal character and <ender>
-                 * contains its ordinal, <p> points to the character after it.
+               /* Here, have looked at the literal character, and <ender>
+                 * contains its ordinal; <p> points to the character after it.
                   * We need to check if the next non-ignored thing is a
                   * quantifier.  Move <p> to after anything that should be
                   * ignored, which, as a side effect, positions <p> for the next
@@ -13587,27 +13801,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * character we are appending, hence we can delay getting
                       * its representation until PASS2. */
                      if (SIZE_ONLY) {
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              const STRLEN unilen = UVCHR_SKIP(ender);
                              s += unilen;
-
-                            /* We have to subtract 1 just below (and again in
-                             * the corresponding PASS2 code) because the loop
-                             * increments <len> each time, as all but this path
-                             * (and one other) through it add a single byte to
-                             * the EXACTish node.  But these paths would change
-                             * len to be the correct final value, so cancel out
-                             * the increment that follows */
-                            len += unilen - 1;
+                            added_len = unilen;
                          }
                          else {
                              s++;
                          }
                      } else { /* PASS2 */
                        not_fold_common:
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              U8 * new_s = uvchr_to_utf8((U8*)s, ender);
-                            len += (char *) new_s - s - 1;
+                            added_len = (char *) new_s - s;
                              s = (char *) new_s;
                          }
                          else {
@@ -13618,8 +13824,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
  
                      /* Here are folding under /l, and the code point is
-                     * problematic.  First, we know we can't simplify things */
-                    maybe_exact = FALSE;
+                     * problematic.  If this is the first character in the
+                     * node, change the node type to folding.   Otherwise, if
+                     * this is the first problematic character, close up the
+                     * existing node, so can start a new node with this one */
+                    if (! len) {
+                        node_type = EXACTFL;
+                    }
+                    else if (node_type == EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    /* This code point means we can't simplify things */
                      maybe_exactfu = FALSE;
  
                      /* A problematic code point in this context means that its
@@ -13637,108 +13854,165 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * do for both passes is the PASS2 code for non-folding */
                      goto not_fold_common;
                  }
-                else /* A regular FOLD code point */
-                    if (! (   UTF
-#if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
-   || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
-                                      || UNICODE_DOT_DOT_VERSION > 0)
-                            /* See comments for join_exact() as to why we fold
-                             * this non-UTF at compile time */
-                            || (   node_type == EXACTFU
-                                && ender == LATIN_SMALL_LETTER_SHARP_S)
-#endif
-                )) {
+                else                /* A regular FOLD code point */
+                     if (! UTF)
+                {
                      /* Here, are folding and are not UTF-8 encoded; therefore
-                     * the character must be in the range 0-255, and is not /l
+                     * the character must be in the range 0-255, and is not /l.
                       * (Not /l because we already handled these under /l in
                       * is_PROBLEMATIC_LOCALE_FOLD_cp) */
-                    if (IS_IN_SOME_FOLD_L1(ender)) {
-                        maybe_exact = FALSE;
+                    if (! IS_IN_SOME_FOLD_L1(ender)) {
  
-                        /* See if the character's fold differs between /d and
-                         * /u.  This includes the multi-char fold SHARP S to
-                         * 'ss' */
-                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
-                            RExC_seen_unfolded_sharp_s = 1;
-                            maybe_exactfu = FALSE;
+                        /* Start a new node for this non-folding character if
+                         * previous ones in the node were folded */
+                        if (len && node_type != EXACT) {
+                            p = oldp;
+                            goto loopdone;
+                        }
+
+                        *(s++) = (char) ender;
+                    }
+                    else {  /* Here, does participate in some fold */
+
+                        /* if this is the first character in the node, change
+                         * its type to folding.  Otherwise, if this is the
+                         * first folding character in the node, close up the
+                         * existing node, so can start a new node with this
+                         * one.  */
+                        if (! len) {
+                            node_type = compute_EXACTish(pRExC_state);
+                        }
+                        else if (node_type == EXACT) {
+                            p = oldp;
+                            goto loopdone;
                          }
-                        else if (maybe_exactfu
-                            && (PL_fold[ender] != PL_fold_latin1[ender]
+
+                        /* See if the character's fold differs between /d and
+                         * /u.  On non-ancient Unicode versions, this includes
+                         * the multi-char fold SHARP S to 'ss' */
+
  #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
     || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                        || UNICODE_DOT_DOT_VERSION > 0)
-                                || (   len > 0
-                                    && isALPHA_FOLD_EQ(ender, 's')
-                                    && isALPHA_FOLD_EQ(*(s-1), 's'))
+
+                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+
+                            /* See comments for join_exact() as to why we fold
+                             * this non-UTF at compile time */
+                            if (node_type == EXACTFU) {
+                                *(s++) = 's';
+
+                                /* Let the code below add in the extra 's' */
+                                ender = 's';
+                                added_len = 2;
+                            }
+                            else {
+                                RExC_seen_unfolded_sharp_s = 1;
+                                maybe_exactfu = FALSE;
+                            }
+                        }
+                        else if (   len
+                                 && isALPHA_FOLD_EQ(ender, 's')
+                                 && isALPHA_FOLD_EQ(*(s-1), 's'))
+                        {
+                            maybe_exactfu = FALSE;
+                        }
+                        else
  #endif
-                        )) {
+
+                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
                              maybe_exactfu = FALSE;
                          }
-                    }
  
-                    /* Even when folding, we store just the input character, as
-                     * we have an array that finds its fold quickly */
-                    *(s++) = (char) ender;
+                        /* Even when folding, we store just the input
+                         * character, as we have an array that finds its fold
+                         * quickly */
+                        *(s++) = (char) ender;
+                    }
                  }
-                else {  /* FOLD, and UTF (or sharp s) */
+                else {  /* FOLD, and UTF */
                      /* Unlike the non-fold case, we do actually have to
-                     * calculate the results here in pass 1.  This is for two
-                     * reasons, the folded length may be longer than the
-                     * unfolded, and we have to calculate how many EXACTish
-                     * nodes it will take; and we may run out of room in a node
-                     * in the middle of a potential multi-char fold, and have
-                     * to back off accordingly.  */
-
-                    UV folded;
+                     * calculate the fold in pass 1.  This is for two reasons,
+                     * the folded length may be longer than the unfolded, and
+                     * we have to calculate how many EXACTish nodes it will
+                     * take; and we may run out of room in a node in the middle
+                     * of a potential multi-char fold, and have to back off
+                     * accordingly.  */
+
                      if (isASCII_uni(ender)) {
-                        folded = toFOLD(ender);
-                        *(s)++ = (U8) folded;
+
+                        /* As above, we close up and start a new node if the
+                         * previous characters don't match the fold/non-fold
+                         * state of this one.  And if this is the first
+                         * character in the node, and it folds, we change the
+                         * node away from being EXACT */
+                        if (! IS_IN_SOME_FOLD_L1(ender)) {
+                            if (len && node_type != EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            *(s)++ = (U8) ender;
+                        }
+                        else {  /* Is in a fold */
+
+                            if (! len) {
+                                node_type = compute_EXACTish(pRExC_state);
+                            }
+                            else if (node_type == EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            *(s)++ = (U8) toFOLD(ender);
+                        }
                      }
-                    else {
+                    else {  /* Not ASCII */
                          STRLEN foldlen;
  
-                        folded = _to_uni_fold_flags(
+                        /* As above, we close up and start a new node if the
+                         * previous characters don't match the fold/non-fold
+                         * state of this one.  And if this is the first
+                         * character in the node, and it folds, we change the
+                         * node away from being EXACT */
+                        if (! _invlist_contains_cp(PL_utf8_foldable, ender)) {
+                            if (len && node_type != EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            s = (char *) uvchr_to_utf8((U8 *) s, ender);
+                            added_len = UVCHR_SKIP(ender);
+                        }
+                        else {
+
+                            if (! len) {
+                                node_type = compute_EXACTish(pRExC_state);
+                            }
+                            else if (node_type == EXACT) {
+                                p = oldp;
+                                goto loopdone;
+                            }
+
+                            ender = _to_uni_fold_flags(
                                       ender,
                                       (U8 *) s,
                                       &foldlen,
                                       FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
                                                          ? FOLD_FLAGS_NOMIX_ASCII
                                                          : 0));
-                        s += foldlen;
-
-                        /* The loop increments <len> each time, as all but this
-                         * path (and one other) through it add a single byte to
-                         * the EXACTish node.  But this one has changed len to
-                         * be the correct final value, so subtract one to
-                         * cancel out the increment that follows */
-                        len += foldlen - 1;
-                    }
-                    /* If this node only contains non-folding code points so
-                     * far, see if this new one is also non-folding */
-                    if (maybe_exact) {
-                        if (folded != ender) {
-                            maybe_exact = FALSE;
-                        }
-                        else {
-                            /* Here the fold is the original; we have to check
-                             * further to see if anything folds to it */
-                            if (_invlist_contains_cp(PL_utf8_foldable,
-                                                        ender))
-                            {
-                                maybe_exact = FALSE;
-                            }
+                            s += foldlen;
+                            added_len = foldlen;
                          }
                      }
-                    ender = folded;
                 }
  
+                len += added_len;
+
                 if (next_is_quantifier) {
  
                      /* Here, the next input is a quantifier, and to get here,
-                     * the current character is the only one in the node.
-                     * Also, here <len> doesn't include the final byte for this
-                     * character */
-                    len++;
+                     * the current character is the only one in the node. */
                      goto loopdone;
                 }
  
@@ -13796,7 +14070,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      s = (char *) utf8_hop((U8 *) s, -1);
  
                      while (s >= s0) {   /* Search backwards until find
-                                           non-problematic char */
+                                           a non-problematic char */
                          if (UTF8_IS_INVARIANT(*s)) {
  
                              /* There are no ascii characters that participate
@@ -13916,23 +14190,30 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  OP(ret) = NOTHING;
              }
              else {
-                if (FOLD) {
-                    /* If 'maybe_exact' is still set here, means there are no
-                     * code points in the node that participate in folds;
-                     * similarly for 'maybe_exactfu' and code points that match
-                     * differently depending on UTF8ness of the target string
-                     * (for /u), or depending on locale for /l */
-                    if (maybe_exact) {
-                        OP(ret) = (LOC)
-                                  ? EXACTL
-                                  : EXACT;
+                OP(ret) = node_type;
+
+                /* If the node type is EXACT here, check to see if it
+                 * should be EXACTL. */
+                if (node_type == EXACT) {
+                    if (LOC) {
+                        OP(ret) = EXACTL;
                      }
-                    else if (maybe_exactfu) {
-                        OP(ret) = (LOC)
-                                  ? EXACTFLU8
-                                  : EXACTFU;
+                }
+
+                if (FOLD) {
+                    /* If 'maybe_exactfu' is set, then there are no code points
+                     * that match differently depending on UTF8ness of the
+                     * target string (for /u), or depending on locale for /l */
+                    if (maybe_exactfu) {
+                        if (node_type == EXACTF) {
+                            OP(ret) = EXACTFU;
+                        }
+                        else if (node_type == EXACTFL) {
+                            OP(ret) = EXACTFLU8;
+                        }
                      }
                  }
+
                  alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
                                             FALSE /* Don't look to see if could
                                                      be turned into an EXACT
@@ -14644,7 +14925,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
           * */
          switch (name_len) {
              case 4:
-                if (memEQ(name_start, "word", 4)) {
+                if (memEQs(name_start, 4, "word")) {
                      /* this is not POSIX, this is the Perl \w */
                      class_number = ANYOF_WORDCHAR;
                  }
@@ -14655,51 +14936,51 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                   * Offset 4 gives the best switch position.  */
                  switch (name_start[4]) {
                      case 'a':
-                        if (memEQ(name_start, "alph", 4)) /* alpha */
+                        if (memBEGINs(name_start, 5, "alph")) /* alpha */
                              class_number = ANYOF_ALPHA;
                          break;
                      case 'e':
-                        if (memEQ(name_start, "spac", 4)) /* space */
+                        if (memBEGINs(name_start, 5, "spac")) /* space */
                              class_number = ANYOF_SPACE;
                          break;
                      case 'h':
-                        if (memEQ(name_start, "grap", 4)) /* graph */
+                        if (memBEGINs(name_start, 5, "grap")) /* graph */
                              class_number = ANYOF_GRAPH;
                          break;
                      case 'i':
-                        if (memEQ(name_start, "asci", 4)) /* ascii */
+                        if (memBEGINs(name_start, 5, "asci")) /* ascii */
                              class_number = ANYOF_ASCII;
                          break;
                      case 'k':
-                        if (memEQ(name_start, "blan", 4)) /* blank */
+                        if (memBEGINs(name_start, 5, "blan")) /* blank */
                              class_number = ANYOF_BLANK;
                          break;
                      case 'l':
-                        if (memEQ(name_start, "cntr", 4)) /* cntrl */
+                        if (memBEGINs(name_start, 5, "cntr")) /* cntrl */
                              class_number = ANYOF_CNTRL;
                          break;
                      case 'm':
-                        if (memEQ(name_start, "alnu", 4)) /* alnum */
+                        if (memBEGINs(name_start, 5, "alnu")) /* alnum */
                              class_number = ANYOF_ALPHANUMERIC;
                          break;
                      case 'r':
-                        if (memEQ(name_start, "lowe", 4)) /* lower */
+                        if (memBEGINs(name_start, 5, "lowe")) /* lower */
                              class_number = (FOLD) ? ANYOF_CASED : ANYOF_LOWER;
-                        else if (memEQ(name_start, "uppe", 4)) /* upper */
+                        else if (memBEGINs(name_start, 5, "uppe")) /* upper */
                              class_number = (FOLD) ? ANYOF_CASED : ANYOF_UPPER;
                          break;
                      case 't':
-                        if (memEQ(name_start, "digi", 4)) /* digit */
+                        if (memBEGINs(name_start, 5, "digi")) /* digit */
                              class_number = ANYOF_DIGIT;
-                        else if (memEQ(name_start, "prin", 4)) /* print */
+                        else if (memBEGINs(name_start, 5, "prin")) /* print */
                              class_number = ANYOF_PRINT;
-                        else if (memEQ(name_start, "punc", 4)) /* punct */
+                        else if (memBEGINs(name_start, 5, "punc")) /* punct */
                              class_number = ANYOF_PUNCT;
                          break;
                  }
                  break;
              case 6:
-                if (memEQ(name_start, "xdigit", 6))
+                if (memEQs(name_start, 6, "xdigit"))
                      class_number = ANYOF_XDIGIT;
                  break;
          }
@@ -14903,9 +15184,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                         'stack' of where the undealt-with left
                                         parens would be if they were actually
                                         put there */
-    /* The 'VOL' (expanding to 'volatile') is a workaround for an optimiser bug
+    /* The 'volatile' is a workaround for an optimiser bug
       * in Solaris Studio 12.3. See RT #127455 */
-    VOL IV fence = 0;               /* Position of where most recent undealt-
+    volatile IV fence = 0;          /* Position of where most recent undealt-
                                         with left paren in stack is; -1 if none.
                                       */
      STRLEN len;                     /* Temporary */
@@ -14920,6 +15201,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
      PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
  
+    DEBUG_PARSE("xcls");
+
      if (in_locale) {
          set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
      }
@@ -14937,7 +15220,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * these things, we need to realize that something preceded by a backslash
       * is escaped, so we have to keep track of backslashes */
      if (SIZE_ONLY) {
-        UV depth = 0; /* how many nested (?[...]) constructs */
+        UV nest_depth = 0; /* how many nested (?[...]) constructs */
  
          while (RExC_parse < RExC_end) {
              SV* current = NULL;
@@ -14946,8 +15229,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                      TRUE /* Force /x */ );
  
              switch (*RExC_parse) {
-                case '?':
-                    if (RExC_parse[1] == '[') depth++, RExC_parse++;
+                case '(':
+                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
+                        nest_depth++, RExC_parse+=2;
                      /* FALLTHROUGH */
                  default:
                      break;
@@ -15004,9 +15288,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  }
  
                  case ']':
-                    if (depth--) break;
-                    RExC_parse++;
-                    if (*RExC_parse == ')') {
+                    if (RExC_parse[1] == ')') {
+                        RExC_parse++;
+                        if (nest_depth--) break;
                          node = reganode(pRExC_state, ANYOF, 0);
                          RExC_size += ANYOF_SKIP;
                          nextchar(pRExC_state);
@@ -15018,20 +15302,25 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
                          return node;
                      }
-                    goto no_close;
+                    /* We output the messages even if warnings are off, because we'll fail
+                     * the very next thing, and these give a likely diagnosis for that */
+                    if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
+                        output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
+                    }
+                    RExC_parse++;
+                    vFAIL("Unexpected ']' with no following ')' in (?[...");
              }
  
              RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
          }
  
-      no_close:
          /* We output the messages even if warnings are off, because we'll fail
           * the very next thing, and these give a likely diagnosis for that */
          if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
              output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
          }
  
-        FAIL("Syntax error in (?[...])");
+        vFAIL("Syntax error in (?[...])");
      }
  
      /* Pass 2 only after this. */
@@ -15211,12 +15500,14 @@ redo_curchar:
                       * inversion list, and RExC_parse points to the trailing
                       * ']'; the next character should be the ')' */
                      RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for nested extended charclass");
  
                      /* Then the ')' matching the original '(' handled by this
                       * case: statement */
                      RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
  
                      RExC_parse++;
                      RExC_flags = save_flags;
@@ -16216,6 +16507,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          do_posix_warnings ? &posix_warnings : NULL,
                          TRUE /* checking only */);
          }
+        else if (  strict && ! skip_white
+                 && (   _generic_isCC(value, _CC_VERTSPACE)
+                     || is_VERTWS_cp_high(value)))
+        {
+            vFAIL("Literal vertical space in [] is illegal except under /x");
+        }
          else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
  
@@ -16270,8 +16567,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                          if (*flagp & NEED_UTF8)
                              FAIL("panic: grok_bslash_N set NEED_UTF8");
-                        if (*flagp & RESTART_PASS1)
-                            return NULL;
+
+                        RETURN_NULL_ON_RESTART_FLAGP(flagp);
  
                          if (cp_count < 0) {
                              vFAIL("\\N in a character class must be a named character: \\N{...}");
@@ -16336,7 +16633,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     vFAIL2("Empty \\%c", (U8)value);
                 if (*RExC_parse == '{') {
                     const U8 c = (U8)value;
-                   e = strchr(RExC_parse, '}');
+                   e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
                      if (!e) {
                          RExC_parse++;
                          vFAIL2("Missing right brace on \\%c{}", c);
@@ -16468,7 +16765,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * referred to outside it. [perl #121777] */
                          if (! has_pkg && curpkg) {
                              char* pkgname = HvNAME(curpkg);
-                            if (strNE(pkgname, "main")) {
+                            if (memNEs(pkgname, HvNAMELEN(curpkg), "main")) {
                                  char* full_name = Perl_form(aTHX_
                                                              "%s::%s",
                                                              pkgname,
@@ -16551,6 +16848,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                     const char* error_msg;
                     bool valid = grok_bslash_o(&RExC_parse,
+                                               RExC_end,
                                                &value,
                                                &error_msg,
                                                 PASS2,   /* warnings only in
@@ -16569,6 +16867,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                     const char* error_msg;
                     bool valid = grok_bslash_x(&RExC_parse,
+                                               RExC_end,
                                                &value,
                                                &error_msg,
                                                PASS2, /* Output warnings */
@@ -16919,7 +17218,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              {
                  /* Here <value> is indeed a multi-char fold.  Get what it is */
  
-                U8 foldbuf[UTF8_MAXBYTES_CASE];
+                U8 foldbuf[UTF8_MAXBYTES_CASE+1];
                  STRLEN foldlen;
  
                  UV folded = _to_uni_fold_flags(
@@ -16994,7 +17293,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                            " be some subset of \"0-9\","
                                            " \"A-Z\", or \"a-z\"");
                      }
-                    else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+                    else if (prevvalue >= FIRST_NON_ASCII_DECIMAL_DIGIT) {
                          SSize_t index_start;
                          SSize_t index_final;
  
@@ -17002,8 +17301,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * can't do the same checks for above-ASCII ranges,
                           * except in the case of digit ones.  These should
                           * contain only digits from the same group of 10.  The
-                         * ASCII case is handled just above.  0x660 is the
-                         * first digit character beyond ASCII.  Hence here, the
+                         * ASCII case is handled just above.  Hence here, the
                           * range could be a range of digits.  First some
                           * unlikely special cases.  Grandfather in that a range
                           * ending in 19DA (NEW TAI LUE THAM DIGIT ONE) is bad
@@ -17239,7 +17537,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
         ret = reg(pRExC_state, 1, &reg_flags, depth+1);
  
-       *flagp |= reg_flags&(HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PASS1|NEED_UTF8);
+        *flagp |= reg_flags & (HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PASS1|NEED_UTF8);
  
          /* And restore so can parse the rest of the pattern */
          RExC_parse = save_parse;
@@ -17297,14 +17595,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  /* The actual POSIXish node for all the rest depends on the
                   * charset modifier.  The ones in the first set depend only on
                   * ASCII or, if available on this platform, also locale */
+
                  case ANYOF_ASCII:
                  case ANYOF_NASCII:
+
  #ifdef HAS_ISASCII
-                    op = (LOC) ? POSIXL : POSIXA;
-#else
-                    op = POSIXA;
+                    if (LOC) {
+                        op = POSIXL;
+                        goto join_posix;
+                    }
  #endif
-                    goto join_posix;
+                    /* (named_class - ANYOF_ASCII) is 0 or 1. xor'ing with
+                     * invert converts that to 1 or 0 */
+                    op = ASCII + ((namedclass - ANYOF_ASCII) ^ invert);
+                    break;
  
                  /* The following don't have any matches in the upper Latin1
                   * range, hence /d is equivalent to /u for them.  Making it /u
@@ -17446,6 +17750,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             TRUE /* downgradable to EXACT */
                                             );
              }
+            else {
+                *flagp |= HASWIDTH|SIMPLE;
+            }
  
              RExC_parse = (char *) cur_parse;
  
@@ -17710,6 +18017,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  if (_invlist_len(only_non_utf8_list) != 0) {
                      ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
                  }
+                SvREFCNT_dec_NN(only_non_utf8_list);
              }
              else {
                  /* Here there were no complemented posix classes.  That means
@@ -17896,25 +18204,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * certain common classes that are easy to test.  Getting to this point in
       * the code means that the class didn't get optimized there.  Since this
       * code is only executed in Pass 2, it is too late to save space--it has
-     * been allocated in Pass 1, and currently isn't given back.  But turning
-     * things into an EXACTish node can allow the optimizer to join it to any
-     * adjacent such nodes.  And if the class is equivalent to things like /./,
-     * expensive run-time swashes can be avoided.  Now that we have more
-     * complete information, we can find things necessarily missed by the
-     * earlier code.  Another possible "optimization" that isn't done is that
-     * something like [Ee] could be changed into an EXACTFU.  khw tried this
-     * and found that the ANYOF is faster, including for code points not in the
-     * bitmap.  This still might make sense to do, provided it got joined with
-     * an adjacent node(s) to create a longer EXACTFU one.  This could be
-     * accomplished by creating a pseudo ANYOF_EXACTFU node type that the join
-     * routine would know is joinable.  If that didn't happen, the node type
-     * could then be made a straight ANYOF */
+     * been allocated in Pass 1, and currently isn't given back.  XXX Why not?
+     * But turning things into an EXACTish node can allow the optimizer to join
+     * it to any adjacent such nodes.  And if the class is equivalent to things
+     * like /./, expensive run-time swashes can be avoided.  Now that we have
+     * more complete information, we can find things necessarily missed by the
+     * earlier code. */
  
      if (optimizable && cp_list && ! invert) {
          UV start, end;
          U8 op = END;  /* The optimzation node-type */
          int posix_class = -1;   /* Illegal value */
          const char * cur_parse= RExC_parse;
+        U8 ANYOFM_mask = 0xFF;
+        U32 anode_arg = 0;
  
          invlist_iterinit(cp_list);
          if (! invlist_iternext(cp_list, &start, &end)) {
@@ -17997,29 +18300,45 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          invlist_iterfinish(cp_list);
  
          if (op == END) {
-            const UV cp_list_len = _invlist_len(cp_list);
-            const UV* cp_list_array = invlist_array(cp_list);
  
              /* Here, didn't find an optimization.  See if this matches any of
-             * the POSIX classes.  These run slightly faster for above-Unicode
-             * code points, so don't bother with POSIXA ones nor the 2 that
-             * have no above-Unicode matches.  We can avoid these checks unless
-             * the ANYOF matches at least as high as the lowest POSIX one
-             * (which was manually found to be \v.  The actual code point may
-             * increase in later Unicode releases, if a higher code point is
-             * assigned to be \v, but this code will never break.  It would
-             * just mean we could execute the checks for posix optimizations
-             * unnecessarily) */
-
-            if (cp_list_array[cp_list_len-1] > 0x2029) {
+             * the POSIX classes.  First try ASCII */
+
+            if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
+                op = ASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
+                op = NASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (invlist_highest(cp_list) >= 0x2029) {
+
+                /* Then try the other POSIX classes.  The POSIXA ones are about
+                 * the same speed as ANYOF ops, but the ones that have
+                 * above-Latin1 code point matches are somewhat faster than
+                 * ANYOF.  So optimize those, but don't bother with the POSIXA
+                 * ones nor [:cntrl:] which has no above-Latin1 matches.  If
+                 * this ANYOF node has a lower highest possible matching code
+                 * point than any of the XPosix ones, we know that it can't
+                 * possibly be the same as any of them, so we can avoid
+                 * executing this code.  The 0x2029 above for the lowest max
+                 * was determined by manual inspection of the classes, and
+                 * comes from \v.  Suppose Unicode in a later version adds a
+                 * higher code point to \v.  All that means is that this code
+                 * can be executed unnecessarily.  It will still give the
+                 * correct answer. */
+
                  for (posix_class = 0;
                       posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
                       posix_class++)
                  {
                      int try_inverted;
-                    if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) {
+
+                    if (posix_class == _CC_CNTRL) {
                          continue;
                      }
+
                      for (try_inverted = 0; try_inverted < 2; try_inverted++) {
  
                          /* Check if matches normal or inverted */
@@ -18037,6 +18356,106 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  }
                found_posix: ;
              }
+
+            /* If it didn't match a POSIX class, it might be able to be turned
+             * into an ANYOFM node.  Compare two different bytes, bit-by-bit.
+             * In some positions, the bits in each will be 1; and in other
+             * positions both will be 0; and in some positions the bit will be
+             * 1 in one byte, and 0 in the other.  Let 'n' be the number of
+             * positions where the bits differ.  We create a mask which has
+             * exactly 'n' 0 bits, each in a position where the two bytes
+             * differ.  Now take the set of all bytes that when ANDed with the
+             * mask yield the same result.  That set has 2**n elements, and is
+             * representable by just two 8 bit numbers: the result and the
+             * mask.  Importantly, matching the set can be vectorized by
+             * creating a word full of the result bytes, and a word full of the
+             * mask bytes, yielding a significant speed up.  Here, see if this
+             * node matches such a set.  As a concrete example consider [01],
+             * and the byte representing '0' which is 0x30 on ASCII machines.
+             * It has the bits 0011 0000.  Take the mask 1111 1110.  If we AND
+             * 0x31 and 0x30 with that mask we get 0x30.  Any other bytes ANDed
+             * yield something else.  So [01], which is a common usage, is
+             * optimizable into ANYOFM, and can benefit from the speed up.  We
+             * can only do this on UTF-8 invariant bytes, because the variance
+             * would throw this off.  */
+            if (   op == END
+                && invlist_highest(cp_list) <=
+#ifdef EBCDIC
+                                               0xFF
+#else
+                                               0x7F
+#endif
+            ) {
+                Size_t cp_count = 0;
+                bool first_time = TRUE;
+                unsigned int lowest_cp = 0xFF;
+                U8 bits_differing = 0;
+
+                /* Only needed on EBCDIC, as there, variants and non- are mixed
+                 * together.  Could #ifdef it out on ASCII, but probably the
+                 * compiler will optimize it out */
+                bool has_variant = FALSE;
+
+                /* Go through the bytes and find the bit positions that differ */
+                invlist_iterinit(cp_list);
+                while (invlist_iternext(cp_list, &start, &end)) {
+                    unsigned int i = start;
+
+                    cp_count += end - start + 1;
+
+                    if (first_time) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            has_variant = TRUE;
+                            continue;
+                        }
+
+                        first_time = FALSE;
+                        lowest_cp = start;
+
+                        i++;
+                    }
+
+                    /* Find the bit positions that differ from the lowest code
+                     * point in the node.  Keep track of all such positions by
+                     * OR'ing */
+                    for (; i <= end; i++) {
+                        if (! UVCHR_IS_INVARIANT(i)) {
+                            has_variant = TRUE;
+                            continue;
+                        }
+
+                        bits_differing  |= i ^ lowest_cp;
+                    }
+                }
+                invlist_iterfinish(cp_list);
+
+                /* At the end of the loop, we count how many bits differ from
+                 * the bits in lowest code point, call the count 'd'.  If the
+                 * set we found contains 2**d elements, it is the closure of
+                 * all code points that differ only in those bit positions.  To
+                 * convince yourself of that, first note that the number in the
+                 * closure must be a power of 2, which we test for.  The only
+                 * way we could have that count and it be some differing set,
+                 * is if we got some code points that don't differ from the
+                 * lowest code point in any position, but do differ from each
+                 * other in some other position.  That means one code point has
+                 * a 1 in that position, and another has a 0.  But that would
+                 * mean that one of them differs from the lowest code point in
+                 * that position, which possibility we've already excluded. */
+                if ( ! has_variant
+                    && cp_count == 1U << PL_bitcount[bits_differing])
+                {
+                    assert(cp_count > 1);
+                    op = ANYOFM;
+
+                    /* We need to make the bits that differ be 0's */
+                    ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
+
+                    /* The argument is the lowest code point */
+                    anode_arg = lowest_cp;
+                    *flagp |= HASWIDTH|SIMPLE;
+                }
+            }
          }
  
          if (op != END) {
@@ -18044,7 +18463,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              RExC_emit = (regnode *)orig_emit;
  
              if (regarglen[op]) {
-                ret = reganode(pRExC_state, op, 0);
+                ret = reganode(pRExC_state, op, anode_arg);
              } else {
                  ret = reg_node(pRExC_state, op);
              }
@@ -18059,6 +18478,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              else if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
                  FLAGS(ret) = posix_class;
              }
+            else if (PL_regkind[op] == ANYOFM) {
+                FLAGS(ret) = ANYOFM_mask;
+            }
  
              SvREFCNT_dec_NN(cp_list);
              return ret;
@@ -18666,6 +19088,7 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
  * if (PASS2)
  *     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
  *
+* ALSO NOTE - operand->flags will be set to 0 as well.
  */
  STATIC void
  S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
@@ -18739,7 +19162,6 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
  #endif
      }
  
-
      place = operand;           /* Op node, where operand used to be. */
  #ifdef RE_TRACK_PATTERN_OFFSETS
      if (RExC_offsets) {         /* MJD */
@@ -18758,6 +19180,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
      }
  #endif
      src = NEXTOPER(place);
+    place->flags = 0;
      FILL_ADVANCE_NODE(place, op);
      Zero(src, offset, regnode);
  }
@@ -18861,8 +19284,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
                  case EXACT:
                  case EXACTL:
                  case EXACTF:
-                case EXACTFA_NO_TRIE:
-                case EXACTFA:
+                case EXACTFAA_NO_TRIE:
+                case EXACTFAA:
                  case EXACTFU:
                  case EXACTFLU8:
                  case EXACTFU_SS:
@@ -18910,6 +19333,36 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
  }
  #endif
  
+STATIC SV*
+S_get_ANYOFM_contents(pTHX_ const regnode * n) {
+
+    /* Returns an inversion list of all the code points matched by the ANYOFM
+     * node 'n' */
+
+    SV * cp_list = _new_invlist(-1);
+    const U8 lowest = (U8) ARG(n);
+    unsigned int i;
+    U8 count = 0;
+    U8 needed = 1U << PL_bitcount[ (U8) ~ FLAGS(n)];
+
+    PERL_ARGS_ASSERT_GET_ANYOFM_CONTENTS;
+
+    /* Starting with the lowest code point, any code point that ANDed with the
+     * mask yields the lowest code point is in the set */
+    for (i = lowest; i <= 0xFF; i++) {
+        if ((i & FLAGS(n)) == ARG(n)) {
+            cp_list = add_cp_to_invlist(cp_list, i);
+            count++;
+
+            /* We know how many code points (a power of two) that are in the
+             * set.  No use looking once we've got that number */
+            if (count >= needed) break;
+        }
+    }
+
+    return cp_list;
+}
+
  /*
   - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
   */
@@ -18992,6 +19445,7 @@ void
  Perl_regdump(pTHX_ const regexp *r)
  {
  #ifdef DEBUGGING
+    int i;
      SV * const sv = sv_newmortal();
      SV *dsv= sv_newmortal();
      RXi_GET_DECL(r,ri);
@@ -19002,36 +19456,35 @@ Perl_regdump(pTHX_ const regexp *r)
      (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
  
      /* Header fields of interest. */
-    if (r->substrs->data[0].substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->substrs->data[0].substr),
-           RE_SV_DUMPLEN(r->substrs->data[0].substr), 30);
-        Perl_re_printf( aTHX_
-                     "anchored %s%s at %" IVdf " ",
-                     s, RE_SV_TAIL(r->substrs->data[0].substr),
-                     (IV)r->substrs->data[0].min_offset);
-    } else if (r->substrs->data[0].utf8_substr) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->substrs->data[0].utf8_substr),
-           RE_SV_DUMPLEN(r->substrs->data[0].utf8_substr), 30);
-        Perl_re_printf( aTHX_
-                     "anchored utf8 %s%s at %" IVdf " ",
-                     s, RE_SV_TAIL(r->substrs->data[0].utf8_substr),
-                     (IV)r->substrs->data[0].min_offset);
-    }
-    if (r->substrs->data[1].substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->substrs->data[1].substr),
-           RE_SV_DUMPLEN(r->substrs->data[1].substr), 30);
-        Perl_re_printf( aTHX_
-                     "floating %s%s at %" IVdf "..%" UVuf " ",
-                     s, RE_SV_TAIL(r->substrs->data[1].substr),
-                     (IV)r->substrs->data[1].min_offset, (UV)r->substrs->data[1].max_offset);
-    } else if (r->substrs->data[1].utf8_substr) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->substrs->data[1].utf8_substr),
-           RE_SV_DUMPLEN(r->substrs->data[1].utf8_substr), 30);
-        Perl_re_printf( aTHX_
-                     "floating utf8 %s%s at %" IVdf "..%" UVuf " ",
-                     s, RE_SV_TAIL(r->substrs->data[1].utf8_substr),
-                     (IV)r->substrs->data[1].min_offset, (UV)r->substrs->data[1].max_offset);
+    for (i = 0; i < 2; i++) {
+        if (r->substrs->data[i].substr) {
+            RE_PV_QUOTED_DECL(s, 0, dsv,
+                            SvPVX_const(r->substrs->data[i].substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
+                            PL_dump_re_max_len);
+            Perl_re_printf( aTHX_
+                          "%s %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
+        else if (r->substrs->data[i].utf8_substr) {
+            RE_PV_QUOTED_DECL(s, 1, dsv,
+                            SvPVX_const(r->substrs->data[i].utf8_substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
+                            30);
+            Perl_re_printf( aTHX_
+                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
      }
+
      if (r->check_substr || r->check_utf8)
          Perl_re_printf( aTHX_
                       (const char *)
@@ -19158,7 +19611,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          * is a crude hack but it may be the best for now since
          * we have no flag "this EXACTish node was UTF-8"
          * --jhi */
-       pv_pretty(sv, STRING(o), STR_LEN(o), 60, PL_colors[0], PL_colors[1],
+       pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
+                  PL_colors[0], PL_colors[1],
                   PERL_PV_ESCAPE_UNI_DETECT |
                   PERL_PV_ESCAPE_NONASCII   |
                   PERL_PV_PRETTY_ELLIPSES   |
@@ -19251,7 +19705,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          if ( k == REF && reginfo) {
              U32 n = ARG(o);  /* which paren pair */
              I32 ln = prog->offs[n].start;
-            if (prog->lastparen < n || ln == -1)
+            if (prog->lastparen < n || ln == -1 || prog->offs[n].end == -1)
                  Perl_sv_catpvf(aTHX_ sv, ": FAIL");
              else if (ln == prog->offs[n].end)
                  Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING");
@@ -19382,7 +19836,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              SV* contents;
  
              /* See if truncation size is overridden */
-            const STRLEN dump_len = (PL_dump_re_max_len)
+            const STRLEN dump_len = (PL_dump_re_max_len > 256)
                                      ? PL_dump_re_max_len
                                      : 256;
  
@@ -19435,6 +19889,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
          SvREFCNT_dec(unresolved);
      }
+    else if (k == ANYOFM) {
+        SV * cp_list = get_ANYOFM_contents(o);
+
+       Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, TRUE);
+       Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        SvREFCNT_dec(cp_list);
+    }
      else if (k == POSIXD || k == NPOSIXD) {
          U8 index = FLAGS(o) * 2;
          if (index < C_ARRAY_LENGTH(anyofs)) {
@@ -19469,8 +19932,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
      /* add on the verb argument if there is one */
      if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) {
-        Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
+        if ( ARG(o) )
+            Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
                         SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
+        else
+            sv_catpvs(sv, ":NULL");
      }
  #else
      PERL_UNUSED_CONTEXT;
@@ -19506,7 +19972,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                       PL_colors[5],PL_colors[0],
                       s,
                       PL_colors[1],
-                     (strlen(s) > 60 ? "..." : ""));
+                     (strlen(s) > PL_dump_re_max_len ? "..." : ""));
         } );
  
      /* use UTF8 check substring if regexp pattern itself is in UTF8 */
@@ -19545,13 +20011,13 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
      } else {
          CALLREGFREE_PVT(rx); /* free the private data */
          SvREFCNT_dec(RXp_PAREN_NAMES(r));
-       Safefree(r->xpv_len_u.xpvlenu_pv);
      }
      if (r->substrs) {
-        SvREFCNT_dec(r->substrs->data[0].substr);
-        SvREFCNT_dec(r->substrs->data[0].utf8_substr);
-        SvREFCNT_dec(r->substrs->data[1].substr);
-        SvREFCNT_dec(r->substrs->data[1].utf8_substr);
+        int i;
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_dec(r->substrs->data[i].substr);
+            SvREFCNT_dec(r->substrs->data[i].utf8_substr);
+        }
         Safefree(r->substrs);
      }
      RX_MATCH_COPY_FREE(rx);
@@ -19562,12 +20028,19 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
      SvREFCNT_dec(r->qr_anoncv);
      if (r->recurse_locinput)
          Safefree(r->recurse_locinput);
-    rx->sv_u.svu_rx = 0;
  }
  
+
  /*  reg_temp_copy()
  
-    This is a hacky workaround to the structural issue of match results
+    Copy ssv to dsv, both of which should of type SVt_REGEXP or SVt_PVLV,
+    except that dsv will be created if NULL.
+
+    This function is used in two main ways. First to implement
+        $r = qr/....; $s = $$r;
+
+    Secondly, it is used as a hacky workaround to the structural issue of
+    match results
      being stored in the regexp structure which is in turn stored in
      PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
      could be PL_curpm in multiple contexts, and could require multiple
@@ -19583,75 +20056,80 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
  
  
  REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
+Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
  {
-    struct regexp *ret;
-    struct regexp *const r = ReANY(rx);
-    const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
+    struct regexp *drx;
+    struct regexp *const srx = ReANY(ssv);
+    const bool islv = dsv && SvTYPE(dsv) == SVt_PVLV;
  
      PERL_ARGS_ASSERT_REG_TEMP_COPY;
  
-    if (!ret_x)
-       ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+    if (!dsv)
+       dsv = (REGEXP*) newSV_type(SVt_REGEXP);
      else {
-       SvOK_off((SV *)ret_x);
+       SvOK_off((SV *)dsv);
         if (islv) {
-           /* For PVLVs, SvANY points to the xpvlv body while sv_u points
-              to the regexp.  (For SVt_REGEXPs, sv_upgrade has already
-              made both spots point to the same regexp body.) */
+           /* For PVLVs, the head (sv_any) points to an XPVLV, while
+             * the LV's xpvlenu_rx will point to a regexp body, which
+             * we allocate here */
             REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
-           assert(!SvPVX(ret_x));
-           ret_x->sv_u.svu_rx = temp->sv_any;
+           assert(!SvPVX(dsv));
+            ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
             temp->sv_any = NULL;
             SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
             SvREFCNT_dec_NN(temp);
             /* SvCUR still resides in the xpvlv struct, so the regexp copy-
                ing below will not set it. */
-           SvCUR_set(ret_x, SvCUR(rx));
+           SvCUR_set(dsv, SvCUR(ssv));
         }
      }
      /* This ensures that SvTHINKFIRST(sv) is true, and hence that
         sv_force_normal(sv) is called.  */
-    SvFAKE_on(ret_x);
-    ret = ReANY(ret_x);
+    SvFAKE_on(dsv);
+    drx = ReANY(dsv);
  
-    SvFLAGS(ret_x) |= SvUTF8(rx);
+    SvFLAGS(dsv) |= SvFLAGS(ssv) & (SVf_POK|SVp_POK|SVf_UTF8);
+    SvPV_set(dsv, RX_WRAPPED(ssv));
      /* We share the same string buffer as the original regexp, on which we
         hold a reference count, incremented when mother_re is set below.
         The string pointer is copied here, being part of the regexp struct.
       */
-    memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+    memcpy(&(drx->xpv_cur), &(srx->xpv_cur),
            sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
-    if (r->offs) {
-        const I32 npar = r->nparens+1;
-        Newx(ret->offs, npar, regexp_paren_pair);
-        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
-    }
-    if (r->substrs) {
-        Newx(ret->substrs, 1, struct reg_substr_data);
-       StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+    if (!islv)
+        SvLEN_set(dsv, 0);
+    if (srx->offs) {
+        const I32 npar = srx->nparens+1;
+        Newx(drx->offs, npar, regexp_paren_pair);
+        Copy(srx->offs, drx->offs, npar, regexp_paren_pair);
+    }
+    if (srx->substrs) {
+        int i;
+        Newx(drx->substrs, 1, struct reg_substr_data);
+       StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
  
-       SvREFCNT_inc_void(ret->substrs->data[0].substr);
-       SvREFCNT_inc_void(ret->substrs->data[0].utf8_substr);
-       SvREFCNT_inc_void(ret->substrs->data[1].substr);
-       SvREFCNT_inc_void(ret->substrs->data[1].utf8_substr);
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_inc_void(drx->substrs->data[i].substr);
+            SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr);
+        }
  
         /* check_substr and check_utf8, if non-NULL, point to either their
            anchored or float namesakes, and don't hold a second reference.  */
      }
-    RX_MATCH_COPIED_off(ret_x);
+    RX_MATCH_COPIED_off(dsv);
  #ifdef PERL_ANY_COW
-    ret->saved_copy = NULL;
+    drx->saved_copy = NULL;
  #endif
-    ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
-    SvREFCNT_inc_void(ret->qr_anoncv);
-    if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+    drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
+    SvREFCNT_inc_void(drx->qr_anoncv);
+    if (srx->recurse_locinput)
+        Newx(drx->recurse_locinput,srx->nparens + 1,char *);
  
-    return ret_x;
+    return dsv;
  }
  #endif
  
+
  /* regfree_internal()
  
     Free the private data in a regexp. This is overloadable by
@@ -19679,7 +20157,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
         {
             SV *dsv= sv_newmortal();
              RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
-                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60);
+                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), PL_dump_re_max_len);
              Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n",
                  PL_colors[4],PL_colors[5],s);
          }
@@ -19813,16 +20291,19 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
         /* Do it this way to avoid reading from *r after the StructCopy().
            That way, if any of the sv_dup_inc()s dislodge *r from the L1
            cache, it doesn't matter.  */
+        int i;
         const bool anchored = r->check_substr
             ? r->check_substr == r->substrs->data[0].substr
             : r->check_utf8   == r->substrs->data[0].utf8_substr;
          Newx(ret->substrs, 1, struct reg_substr_data);
         StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
  
-       ret->substrs->data[0].substr      = sv_dup_inc(ret->substrs->data[0].substr, param);
-       ret->substrs->data[0].utf8_substr = sv_dup_inc(ret->substrs->data[0].utf8_substr, param);
-       ret->substrs->data[1].substr      = sv_dup_inc(ret->substrs->data[1].substr, param);
-       ret->substrs->data[1].utf8_substr = sv_dup_inc(ret->substrs->data[1].utf8_substr, param);
+        for (i = 0; i < 2; i++) {
+            ret->substrs->data[i].substr =
+                        sv_dup_inc(ret->substrs->data[i].substr, param);
+            ret->substrs->data[i].utf8_substr =
+                        sv_dup_inc(ret->substrs->data[i].utf8_substr, param);
+        }
  
         /* check_substr and check_utf8, if non-NULL, point to either their
            anchored or float namesakes, and don't hold a second reference.  */
@@ -19852,7 +20333,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
      RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
      ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
      if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+        Newx(ret->recurse_locinput,r->nparens + 1,char *);
  
      if (ret->pprivate)
         RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -19871,7 +20352,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
                1: a buffer in a different thread
                2: something we no longer hold a reference on
                so we need to copy it locally.  */
-    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
+    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
      ret->mother_re   = NULL;
  }
  #endif /* PERL_IN_XSUB_RE */
@@ -20305,9 +20786,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  #else
          format = "\\x%02" UVXf "-\\x%02" UVXf;
  #endif
-        GCC_DIAG_IGNORE(-Wformat-nonliteral);
+        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
-        GCC_DIAG_RESTORE;
+        GCC_DIAG_RESTORE_STMT;
          break;
      }
  }
@@ -20456,7 +20937,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
       * character class defined by the other arguments:
-     *  'bitmap' points to the bitmap.
+     *  'bitmap' points to the bitmap, or NULL if to ignore that.
       *  'nonbitmap_invlist' is an inversion list of the code points that are in
       *      the bitmap range, but for some reason aren't in the bitmap; NULL if
       *      none.  The reasons for this could be that they require some
@@ -20465,9 +20946,9 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       *      was not resolved at the time of the regex compilation (under /u)
       *  'only_utf8_locale_invlist' is an inversion list of the code points that
       *      are valid only if the runtime locale is a UTF-8 one; NULL if none
-     *  'node' is the regex pattern node.  It is needed only when the above two
-     *      parameters are not null, and is passed so that this routine can
-     *      tease apart the various reasons for them.
+     *  'node' is the regex pattern ANYOF node.  It is needed only when the
+     *      above two parameters are not null, and is passed so that this
+     *      routine can tease apart the various reasons for them.
       *  'force_as_is_display' is TRUE if this routine should definitely NOT try
       *      to invert things to see if that leads to a cleaner display.  If
       *      FALSE, this routine is free to use its judgment about doing this.
@@ -20567,13 +21048,16 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      }
  
      /* Accumulate the bit map into the unconditional match list */
-    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-        if (BITMAP_TEST(bitmap, i)) {
-            int start = i++;
-            for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) {
-                /* empty */
+    if (bitmap) {
+        for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+            if (BITMAP_TEST(bitmap, i)) {
+                int start = i++;
+                for (;
+                     i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i);
+                     i++)
+                { /* empty */ }
+                invlist = _add_range_to_invlist(invlist, start, i-1);
              }
-            invlist = _add_range_to_invlist(invlist, start, i-1);
          }
      }
  
@@ -20740,7 +21224,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
         /* While that wasn't END last time... */
         NODE_ALIGN(node);
         op = OP(node);
-       if (op == CLOSE || op == WHILEM)
+       if (op == CLOSE || op == SRCLOSE || op == WHILEM)
             indent--;
         next = regnext((regnode *)node);
  
@@ -20807,7 +21291,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                      indent+3,
                      elem_ptr
                      ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
-                                SvCUR(*elem_ptr), 60,
+                                SvCUR(*elem_ptr), PL_dump_re_max_len,
                                  PL_colors[0], PL_colors[1],
                                  (SvUTF8(*elem_ptr)
                                   ? PERL_PV_ESCAPE_UNI
@@ -20864,7 +21348,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             node = NEXTOPER(node);
             node += regarglen[(U8)op];
         }
-       if (op == CURLYX || op == OPEN)
+       if (op == CURLYX || op == OPEN || op == SROPEN)
             indent++;
      }
      CLEAR_OPTSTART;