regcomp.c: With ACCEPT set stopmin even if no data struct present

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 512e6a1..d8bf687 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -294,10 +294,6 @@ struct RExC_state_t {
  #define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
                                                     under /d from /u ? */
  
-#ifdef RE_TRACK_PATTERN_OFFSETS
-#  define RExC_offsets (RExC_rxi->u.offsets) /* I am not like the
-                                                         others */
-#endif
  #define RExC_emit      (pRExC_state->emit)
  #define RExC_emit_start        (pRExC_state->emit_start)
  #define RExC_sawback   (pRExC_state->sawback)
@@ -346,6 +342,146 @@ struct RExC_state_t {
  #define RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS (pRExC_state->sWARN_EXPERIMENTAL__REGEX_SETS)
  #define RExC_unlexed_names (pRExC_state->unlexed_names)
  
+
+/***********************************************************************/
+/* UTILITY MACROS FOR ADVANCING OR SETTING THE PARSE "CURSOR" RExC_parse
+ *
+ * All of these macros depend on the above RExC_ accessor macros, which
+ * in turns depend on a variable pRExC_state being in scope where they
+ * are used. This is the standard regexp parser context variable which is
+ * passed into every non-trivial parse function in this file.
+ *
+ * Note that the UTF macro is itself a wrapper around RExC_utf8, so all
+ * of the macros which do not take an argument will operate on the
+ * pRExC_state structure *only*.
+ *
+ * Please do NOT modify RExC_parse without using these macros. In the
+ * future these macros will be extended for enhanced debugging and trace
+ * output during the parse process.
+ */
+
+/* RExC_parse_incf(flag)
+ *
+ * Increment RExC_parse to point at the next codepoint, while doing
+ * the right thing depending on whether we are parsing UTF-8 strings
+ * or not. The 'flag' argument determines if content is UTF-8 or not,
+ * intended for cases where this is NOT governed by the UTF macro.
+ *
+ * Use RExC_parse_inc() if UTF-8ness is controlled by the UTF macro.
+ *
+ * WARNING: Does NOT take into account RExC_end; it is the callers
+ * responsibility to make sure there are enough octets left in
+ * RExC_parse to ensure that when processing UTF-8 we would not read
+ * past the end of the string.
+ */
+#define RExC_parse_incf(flag) STMT_START {              \
+    RExC_parse += (flag) ? UTF8SKIP(RExC_parse) : 1;    \
+} STMT_END
+
+/* RExC_parse_inc_safef(flag)
+ *
+ * Safely increment RExC_parse to point at the next codepoint,
+ * doing the right thing depending on whether we are parsing
+ * UTF-8 strings or not and NOT reading past the end of the buffer.
+ * The 'flag' argument determines if content is UTF-8 or not,
+ * intended for cases where this is NOT governed by the UTF macro.
+ *
+ * Use RExC_parse_safe() if UTF-8ness is controlled by the UTF macro.
+ *
+ * NOTE: Will NOT read past RExC_end when content is UTF-8.
+ */
+#define RExC_parse_inc_safef(flag) STMT_START {                     \
+    RExC_parse += (flag) ? UTF8_SAFE_SKIP(RExC_parse,RExC_end) : 1; \
+} STMT_END
+
+/* RExC_parse_inc()
+ *
+ * Increment RExC_parse to point at the next codepoint,
+ * doing the right thing depending on whether we are parsing
+ * UTF-8 strings or not.
+ *
+ * WARNING: Does NOT take into account RExC_end, it is the callers
+ * responsibility to make sure there are enough octets left in
+ * RExC_parse to ensure that when processing UTF-8 we would not read
+ * past the end of the string.
+ *
+ * NOTE: whether we are parsing UTF-8 or not is determined by the
+ * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
+ * macro operates on the pRExC_state structure only.
+ */
+#define RExC_parse_inc() RExC_parse_incf(UTF)
+
+/* RExC_parse_inc_safe()
+ *
+ * Safely increment RExC_parse to point at the next codepoint,
+ * doing the right thing depending on whether we are parsing
+ * UTF-8 strings or not and NOT reading past the end of the buffer.
+ *
+ * NOTE: whether we are parsing UTF-8 or not is determined by the
+ * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
+ * macro operates on the pRExC_state structure only.
+ */
+#define RExC_parse_inc_safe() RExC_parse_inc_safef(UTF)
+
+/* RExC_parse_inc_utf8()
+ *
+ * Increment RExC_parse to point at the next utf8 codepoint,
+ * assumes content is UTF-8.
+ *
+ * WARNING: Does NOT take into account RExC_end; it is the callers
+ * responsibility to make sure there are enough octets left in RExC_parse
+ * to ensure that when processing UTF-8 we would not read past the end
+ * of the string.
+ */
+#define RExC_parse_inc_utf8() STMT_START {  \
+    RExC_parse += UTF8SKIP(RExC_parse);     \
+} STMT_END
+
+/* RExC_parse_inc_if_char()
+ *
+ * Increment RExC_parse to point at the next codepoint, if and only
+ * if the current parse point is NOT a NULL, while doing the right thing
+ * depending on whether we are parsing UTF-8 strings or not.
+ *
+ * WARNING: Does NOT take into account RExC_end, it is the callers
+ * responsibility to make sure there are enough octets left in RExC_parse
+ * to ensure that when processing UTF-8 we would not read past the end
+ * of the string.
+ *
+ * NOTE: whether we are parsing UTF-8 or not is determined by the
+ * UTF macro which is defined as cBOOL(RExC_parse_utf8), thus this
+ * macro operates on the pRExC_state structure only.
+ */
+#define RExC_parse_inc_if_char() STMT_START {         \
+    RExC_parse += SKIP_IF_CHAR(RExC_parse,RExC_end);  \
+} STMT_END
+
+/* RExC_parse_inc_by(n_octets)
+ *
+ * Increment the parse cursor by the number of octets specified by
+ * the 'n_octets' argument.
+ *
+ * NOTE: Does NOT check ANY constraints. It is the callers responsibility
+ * that this will not move past the end of the string, or leave the
+ * pointer in the middle of a UTF-8 sequence.
+ *
+ * Typically used to advanced past previously analyzed content.
+ */
+#define RExC_parse_inc_by(n_octets) STMT_START {  \
+    RExC_parse += (n_octets);                     \
+} STMT_END
+
+/* RExC_parse_set(to_ptr)
+ *
+ * Sets the RExC_parse pointer to the pointer specified by the 'to'
+ * argument. No validation whatsoever is performed on the to pointer.
+ */
+#define RExC_parse_set(to_ptr) STMT_START { \
+    RExC_parse = (to_ptr);                  \
+} STMT_END
+
+/**********************************************************************/
+
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
   * a high complexity pattern we assume the benefit of avoiding a full match
@@ -365,7 +501,7 @@ struct RExC_state_t {
                              || ((*s) == '{' && regcurly(s, e, NULL)))
  
  /*
- * Flags to be passed up and down.
+ * Flags to be passed up.
   */
  #define        HASWIDTH        0x01    /* Known to not match null strings, could match
                                     non-null ones. */
@@ -386,6 +522,30 @@ struct RExC_state_t {
  #define TRIE_STCLASS
  #endif
  
+/* About the term "restudy" and the var "restudied" and the defines
+ * "SCF_TRIE_RESTUDY" and "SCF_TRIE_DOING_RESTUDY": All of these relate to
+ * doing multiple study_chunk() calls over the same set of opcodes for* the
+ * purpose of enhanced TRIE optimizations.
+ *
+ * Specifically, when TRIE_STUDY_OPT is defined, and it is defined in normal
+ * builds, (see above), during compilation SCF_TRIE_RESTUDY may be enabled
+ * which then causes the Perl_re_op_compile() to then call the optimizer
+ * S_study_chunk() a second time to perform additional optimizations,
+ * including the aho_corasick startclass optimization.
+ * This additional pass will only happen once, which is managed by the
+ * 'restudied' variable in Perl_re_op_compile().
+ *
+ * When this second pass is under way the flags passed into study_chunk() will
+ * include SCF_TRIE_DOING_RESTUDY and this flag is and must be cascaded down
+ * to any recursive calls to S_study_chunk().
+ *
+ * IMPORTANT: Any logic in study_chunk() that emits warnings should check that
+ * the SCF_TRIE_DOING_RESTUDY flag is NOT set in 'flags', or the warning may
+ * be produced twice.
+ *
+ * See commit 07be1b83a6b2d24b492356181ddf70e1c7917ae3 and
+ * 688e03912e3bff2d2419c457d8b0e1bab3eb7112 for more details.
+ */
  
  
  #define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
@@ -601,6 +761,10 @@ typedef struct scan_data_t {
      I32 flags;             /* common SF_* and SCF_* flags */
      I32 whilem_c;
      SSize_t *last_closep;
+    regnode **last_close_opp; /* pointer to pointer to last CLOSE regop
+                                 seen. DO NOT DEREFERENCE the regnode
+                                 pointer - the op may have been optimized
+                                 away */
      regnode_ssc *start_class;
  } scan_data_t;
  
@@ -614,7 +778,7 @@ static const scan_data_t zero_scan_data = {
          { NULL, 0, 0, 0, 0, 0 },
          { NULL, 0, 0, 0, 0, 0 },
      },
-    0, 0, NULL, NULL
+    0, 0, NULL, NULL, NULL
  };
  
  /* study flags */
@@ -646,14 +810,17 @@ static const scan_data_t zero_scan_data = {
  #define SCF_DO_STCLASS         (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
  #define SCF_WHILEM_VISITED_POS 0x2000
  
-#define SCF_TRIE_RESTUDY        0x4000 /* Do restudy? */
+#define SCF_TRIE_RESTUDY        0x4000 /* Need to do restudy in study_chunk()?
+                                          Search for "restudy" in this file
+                                          to find a detailed explanation.*/
  #define SCF_SEEN_ACCEPT         0x8000
-#define SCF_TRIE_DOING_RESTUDY 0x10000
+#define SCF_TRIE_DOING_RESTUDY 0x10000 /* Are we in restudy right now?
+                                          Search for "restudy" in this file
+                                          to find a detailed explanation. */
  #define SCF_IN_DEFINE          0x20000
  
  
  
-
  #define UTF cBOOL(RExC_utf8)
  
  /* The enums for all these are ordered so things work out correctly */
@@ -1053,75 +1220,24 @@ static const scan_data_t zero_scan_data = {
          }                                                               \
      } STMT_END
  
+#define ckWARNexperimental_with_arg(loc, class, m, arg)                 \
+    STMT_START {                                                        \
+        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
+            RExC_warned_ ## class = 1;                                  \
+            _WARN_HELPER(loc, packWARN(class),                          \
+                      Perl_ck_warner_d(aTHX_ packWARN(class),           \
+                                       m REPORT_LOCATION,               \
+                                       arg, REPORT_LOCATION_ARGS(loc)));\
+        }                                                               \
+    } STMT_END
+
  /* Convert between a pointer to a node and its offset from the beginning of the
   * program */
  #define REGNODE_p(offset)    (RExC_emit_start + (offset))
  #define REGNODE_OFFSET(node) ((node) - RExC_emit_start)
  
-/* Macros for recording node offsets.   20001227 mjd@plover.com
- * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
- * element 2*n-1 of the array.  Element #2n holds the byte length node #n.
- * Element 0 holds the number n.
- * Position is 1 indexed.
- */
-#ifndef RE_TRACK_PATTERN_OFFSETS
-#define Set_Node_Offset_To_R(offset,byte)
-#define Set_Node_Offset(node,byte)
-#define Set_Cur_Node_Offset
-#define Set_Node_Length_To_R(node,len)
-#define Set_Node_Length(node,len)
-#define Set_Node_Cur_Length(node,start)
-#define Node_Offset(n)
-#define Node_Length(n)
-#define Set_Node_Offset_Length(node,offset,len)
-#define ProgLen(ri) ri->u.proglen
-#define SetProgLen(ri,x) ri->u.proglen = x
-#define Track_Code(code)
-#else
-#define ProgLen(ri) ri->u.offsets[0]
-#define SetProgLen(ri,x) ri->u.offsets[0] = x
-#define Set_Node_Offset_To_R(offset,byte) STMT_START {                 \
-        MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n",                \
-                    __LINE__, (int)(offset), (int)(byte)));            \
-        if((offset) < 0) {                                             \
-            Perl_croak(aTHX_ "value of node is %d in Offset macro",     \
-                                         (int)(offset));                \
-        } else {                                                       \
-            RExC_offsets[2*(offset)-1] = (byte);                       \
-        }                                                              \
-} STMT_END
-
-#define Set_Node_Offset(node,byte)                                      \
-    Set_Node_Offset_To_R(REGNODE_OFFSET(node), (byte)-RExC_start)
-#define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
-
-#define Set_Node_Length_To_R(node,len) STMT_START {                    \
-        MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n",          \
-                __LINE__, (int)(node), (int)(len)));                   \
-        if((node) < 0) {                                               \
-            Perl_croak(aTHX_ "value of node is %d in Length macro",     \
-                                         (int)(node));                  \
-        } else {                                                       \
-            RExC_offsets[2*(node)] = (len);                            \
-        }                                                              \
-} STMT_END
-
-#define Set_Node_Length(node,len) \
-    Set_Node_Length_To_R(REGNODE_OFFSET(node), len)
-#define Set_Node_Cur_Length(node, start)                \
-    Set_Node_Length(node, RExC_parse - start)
-
-/* Get offsets and lengths */
-#define Node_Offset(n) (RExC_offsets[2*(REGNODE_OFFSET(n))-1])
-#define Node_Length(n) (RExC_offsets[2*(REGNODE_OFFSET(n))])
-
-#define Set_Node_Offset_Length(node,offset,len) STMT_START {   \
-    Set_Node_Offset_To_R(REGNODE_OFFSET(node), (offset));      \
-    Set_Node_Length_To_R(REGNODE_OFFSET(node), (len)); \
-} STMT_END
-
-#define Track_Code(code) STMT_START { code } STMT_END
-#endif
+#define ProgLen(ri) ri->proglen
+#define SetProgLen(ri,x) ri->proglen = x
  
  #if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
  #define EXPERIMENTAL_INPLACESCAN
@@ -1227,16 +1343,18 @@ S_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
  
  static void
  S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
-                    U32 depth, int is_inf)
+                    U32 depth, int is_inf,
+                    SSize_t min, SSize_t stopmin, SSize_t delta)
  {
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      DEBUG_OPTIMISE_MORE_r({
          if (!data)
              return;
-        Perl_re_indentf(aTHX_  "%s: Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
+        Perl_re_indentf(aTHX_  "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
              depth,
              where,
+            min, stopmin, delta,
              (IV)data->pos_min,
              (IV)data->pos_delta,
              (UV)data->flags
@@ -1303,14 +1421,14 @@ S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
  }
  
  
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf) \
-                    S_debug_studydata(aTHX_ where, data, depth, is_inf)
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) \
+                    S_debug_studydata(aTHX_ where, data, depth, is_inf, min, stopmin, delta)
  
  #  define DEBUG_PEEP(str, scan, depth, flags)   \
                      S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
  
  #else
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf) NOOP
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) NOOP
  #  define DEBUG_PEEP(str, scan, depth, flags)         NOOP
  #endif
  
@@ -1512,7 +1630,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
      }
      data->last_end = -1;
      data->flags &= ~SF_BEFORE_EOL;
-    DEBUG_STUDYDATA("commit", data, 0, is_inf);
+    DEBUG_STUDYDATA("commit", data, 0, is_inf, -1, -1, -1);
  }
  
  /* An SSC is just a regnode_charclass_posix with an extra field: the inversion
@@ -3511,16 +3629,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                                 * sizeof(reg_trie_trans) );
  
      {   /* Modify the program and insert the new TRIE node */
-        U8 nodetype =(U8)(flags & 0xFF);
+        U8 nodetype =(U8) flags;
          char *str=NULL;
  
  #ifdef DEBUGGING
          regnode *optimize = NULL;
-#ifdef RE_TRACK_PATTERN_OFFSETS
-
-        U32 mjd_offset = 0;
-        U32 mjd_nodelen = 0;
-#endif /* RE_TRACK_PATTERN_OFFSETS */
  #endif /* DEBUGGING */
          /*
             This means we convert either the first branch or the first Exact,
@@ -3534,28 +3647,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          if ( first != startbranch || OP( last ) == BRANCH ) {
              /* branch sub-chain */
              NEXT_OFF( first ) = (U16)(last - first);
-#ifdef RE_TRACK_PATTERN_OFFSETS
-            DEBUG_r({
-                mjd_offset= Node_Offset((convert));
-                mjd_nodelen= Node_Length((convert));
-            });
-#endif
              /* whole branch chain */
          }
-#ifdef RE_TRACK_PATTERN_OFFSETS
-        else {
-            DEBUG_r({
-                const  regnode *nop = NEXTOPER( convert );
-                mjd_offset= Node_Offset((nop));
-                mjd_nodelen= Node_Length((nop));
-            });
-        }
-        DEBUG_OPTIMISE_r(
-            Perl_re_indentf( aTHX_  "MJD offset:%" UVuf " MJD length:%" UVuf "\n",
-                depth+1,
-                (UV)mjd_offset, (UV)mjd_nodelen)
-        );
-#endif
          /* But first we check to see if there is a common prefix we can
             split out as an EXACT and put in front of the TRIE node.  */
          trie->startstate= 1;
@@ -3673,15 +3766,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                     DEBUG_r_TEST
  #endif
                     ) {
-                   regnode *fix = convert;
                     U32 word = trie->wordcount;
-#ifdef RE_TRACK_PATTERN_OFFSETS
-                   mjd_nodelen++;
-#endif
-                   Set_Node_Offset_Length(convert, mjd_offset, state - 1);
-                   while( ++fix < n ) {
-                       Set_Node_Offset_Length(fix, 0, 0);
-                   }
                     while (word--) {
                         SV ** const tmp = av_fetch( trie_words, word, 0 );
                         if (tmp) {
@@ -3741,22 +3826,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          }
          /* needed for dumping*/
          DEBUG_r(if (optimize) {
-            regnode *opt = convert;
-
-            while ( ++opt < optimize) {
-                Set_Node_Offset_Length(opt, 0, 0);
-            }
              /*
                  Try to clean up some of the debris left after the
                  optimisation.
               */
              while( optimize < jumper ) {
-                Track_Code( mjd_nodelen += Node_Length((optimize)); );
                  OP( optimize ) = OPTIMIZED;
-                Set_Node_Offset_Length(optimize, 0, 0);
                  optimize++;
              }
-            Set_Node_Offset_Length(convert, mjd_offset, mjd_nodelen);
          });
      } /* end node insert */
  
@@ -3950,7 +4027,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *
   * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full
   * as possible, even if that means splitting an existing node so that its first
- * part is moved to the preceeding node.  This would maximise the efficiency of
+ * part is moved to the preceding node.  This would maximise the efficiency of
   * memEQ during matching.
   *
   * If a node is to match under /i (folded), the number of characters it matches
@@ -4551,36 +4628,59 @@ S_rck_elide_nothing(pTHX_ regnode *node)
  
  /* the return from this sub is the minimum length that could possibly match */
  STATIC SSize_t
-S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
-                        SSize_t *minlenp, SSize_t *deltap,
-                        regnode *last,
-                        scan_data_t *data,
-                        I32 stopparen,
-                        U32 recursed_depth,
-                        regnode_ssc *and_withp,
-                        U32 flags, U32 depth, bool was_mutate_ok)
-                        /* scanp: Start here (read-write). */
-                        /* deltap: Write maxlen-minlen here. */
-                        /* last: Stop before this one. */
-                        /* data: string data about the pattern */
-                        /* stopparen: treat close N as END */
-                        /* recursed: which subroutines have we recursed into */
-                        /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
+S_study_chunk(pTHX_
+    RExC_state_t *pRExC_state,
+    regnode **scanp,        /* Start here (read-write). */
+    SSize_t *minlenp,       /* used for the minlen of substrings? */
+    SSize_t *deltap,        /* Write maxlen-minlen here. */
+    regnode *last,          /* Stop before this one. */
+    scan_data_t *data,      /* string data about the pattern */
+    I32 stopparen,          /* treat CLOSE-N as END, see GOSUB */
+    U32 recursed_depth,     /* how deep have we recursed via GOSUB */
+    regnode_ssc *and_withp, /* Valid if flags & SCF_DO_STCLASS_OR */
+    U32 flags,              /* flags controlling this call, see SCF_ flags */
+    U32 depth,              /* how deep have we recursed period */
+    bool was_mutate_ok      /* TRUE if in-place optimizations are allowed.
+                               FALSE only if the caller (recursively) was
+                               prohibited from modifying the regops, because
+                               a higher caller is holding a ptr to them. */
+)
  {
-    SSize_t final_minlen;
-    /* There must be at least this number of characters to match */
-    SSize_t min = 0;
-    I32 pars = 0, code;
-    regnode *scan = *scanp, *next;
-    SSize_t delta = 0;
+    /* vars about the regnodes we are working with */
+    regnode *scan = *scanp; /* the current opcode we are inspecting */
+    regnode *next = NULL;   /* the next opcode beyond scan, tmp var */
+    regnode *first_non_open = scan; /* FIXME: should this init to NULL?
+                                       the first non open regop, if the init
+                                       val IS an OPEN then we will skip past
+                                       it just after the var decls section */
+    I32 code = 0;           /* temp var used to hold the optype of a regop */
+
+    /* vars about the min and max length of the pattern */
+    SSize_t min = 0;    /* min length of this part of the pattern */
+    SSize_t stopmin = OPTIMIZE_INFTY; /* min length accounting for ACCEPT
+                                         this is adjusted down if we find
+                                         an ACCEPT */
+    SSize_t delta = 0;  /* difference between min and max length
+                           (not accounting for stopmin) */
+
+    /* vars about capture buffers in the pattern */
+    I32 pars = 0;       /* count of OPEN opcodes */
+    I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0; /* is this op an OPEN? */
+
+    /* vars about whether this pattern contains something that can match
+     * infinitely long strings, eg, X* or X+ */
      int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
      int is_inf_internal = 0;           /* The studied chunk is infinite */
-    I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
-    scan_data_t data_fake;
-    SV *re_trie_maxbuff = NULL;
-    regnode *first_non_open = scan;
-    SSize_t stopmin = OPTIMIZE_INFTY;
-    scan_frame *frame = NULL;
+
+    /* scan_data_t (struct) is used to hold information about the substrings
+     * and start class we have extracted from the string */
+    scan_data_t data_fake; /* temp var used for recursing in some cases */
+
+    SV *re_trie_maxbuff = NULL; /* temp var used to hold whether we can do
+                                   trie optimizations */
+
+    scan_frame *frame = NULL;  /* used as part of fake recursion */
+
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_STUDY_CHUNK;
@@ -4593,7 +4693,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
              first_non_open=regnext(first_non_open);
      }
  
-
    fake_study_recurse:
      DEBUG_r(
          RExC_study_chunk_recursed_count++;
@@ -4634,7 +4733,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
           */
          bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
          /* Peephole optimizer: */
-        DEBUG_STUDYDATA("Peep", data, depth, is_inf);
+        DEBUG_STUDYDATA("Peep", data, depth, is_inf, min, stopmin, delta);
          DEBUG_PEEP("Peep", scan, depth, flags);
  
  
@@ -4663,7 +4762,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
              SSize_t minlen = 0;
              SSize_t deltanext = 0;
              SSize_t fake_last_close = 0;
-            I32 f = SCF_IN_DEFINE;
+            regnode *fake_last_close_op = NULL;
+            U32 f = SCF_IN_DEFINE | (flags & SCF_TRIE_DOING_RESTUDY);
  
              StructCopy(&zero_scan_data, &data_fake, scan_data_t);
              scan = regnext(scan);
@@ -4671,6 +4771,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
              DEBUG_PEEP("expect IFTHEN", scan, depth, flags);
  
              data_fake.last_closep= &fake_last_close;
+            data_fake.last_close_opp= &fake_last_close_op;
              minlen = *minlenp;
              next = regnext(scan);
              scan = NEXTOPER(NEXTOPER(scan));
@@ -4716,8 +4817,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      ssc_init_zero(pRExC_state, &accum);
  
                  while (OP(scan) == code) {
-                    SSize_t deltanext, minnext, fake;
-                    I32 f = 0;
+                    SSize_t deltanext, minnext, fake_last_close = 0;
+                    regnode *fake_last_close_op = NULL;
+                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
                      regnode_ssc this_class;
  
                      DEBUG_PEEP("Branch", scan, depth, flags);
@@ -4727,9 +4829,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      if (data) {
                          data_fake.whilem_c = data->whilem_c;
                          data_fake.last_closep = data->last_closep;
+                        data_fake.last_close_opp = data->last_close_opp;
+                    }
+                    else {
+                        data_fake.last_closep = &fake_last_close;
+                        data_fake.last_close_opp = &fake_last_close_op;
                      }
-                    else
-                        data_fake.last_closep = &fake;
  
                      data_fake.pos_delta = delta;
                      next = regnext(scan);
@@ -4741,7 +4846,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      if (flags & SCF_DO_STCLASS) {
                          ssc_init(pRExC_state, &this_class);
                          data_fake.start_class = &this_class;
-                        f = SCF_DO_STCLASS_AND;
+                        f |= SCF_DO_STCLASS_AND;
                      }
                      if (flags & SCF_WHILEM_VISITED_POS)
                          f |= SCF_WHILEM_VISITED_POS;
@@ -4777,6 +4882,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      }
                      if (flags & SCF_DO_STCLASS)
                          ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
+                    DEBUG_STUDYDATA("end BRANCH", data, depth, is_inf, min, stopmin, delta);
                  }
                  if (code == IFTHEN && num < 2) /* Empty ELSE branch */
                      min1 = 0;
@@ -4817,6 +4923,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          flags |= SCF_DO_STCLASS_OR;
                      }
                  }
+                DEBUG_STUDYDATA("pre TRIE", data, depth, is_inf, min, stopmin, delta);
  
                  if (PERL_ENABLE_TRIE_OPTIMISATION
                      && OP(startbranch) == BRANCH
@@ -5156,7 +5263,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          } /* end if ( prev) */
                      } /* TRIE_MAXBUF is non zero */
                  } /* do trie */
-
+                DEBUG_STUDYDATA("after TRIE", data, depth, is_inf, min, stopmin, delta);
              }
              else if ( code == BRANCHJ ) {  /* single branch is optimized. */
                  scan = NEXTOPER(NEXTOPER(scan));
@@ -5233,11 +5340,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                               RExC_study_chunk_recursed_bytes, U8);
                      }
                      /* we havent recursed into this paren yet, so recurse into it */
-                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
+                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf, min, stopmin, delta);
                      PAREN_SET(recursed_depth, paren);
                      my_recursed_depth= recursed_depth + 1;
                  } else {
-                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
+                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf, min, stopmin, delta);
                      /* some form of infinite recursion, assume infinite length
                       * */
                      if (flags & SCF_DO_SUBSTR) {
@@ -5283,7 +5390,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      (frame && frame->in_gosub) || OP(scan) == GOSUB
                  );
  
-                DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
+                DEBUG_STUDYDATA("frame-new", data, depth, is_inf, min, stopmin, delta);
                  DEBUG_PEEP("fnew", scan, depth, flags);
  
                  frame = newframe;
@@ -5349,6 +5456,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
              }
              flags &= ~SCF_DO_STCLASS;
+            DEBUG_STUDYDATA("end EXACT", data, depth, is_inf, min, stopmin, delta);
          }
          else if (PL_regkind[OP(scan)] == EXACT) {
              /* But OP != EXACT!, so is EXACTFish */
@@ -5371,7 +5479,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  OP(scan) = ANYOFM;
                  ARG_SET(scan, *s & mask);
                  FLAGS(scan) = mask;
-                /* we're not EXACTFish any more, so restudy */
+                /* We're not EXACTFish any more, so restudy.
+                 * Search for "restudy" in this file to find
+                 * a comment with details. */
                  continue;
              }
  
@@ -5431,10 +5541,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  flags &= ~SCF_DO_STCLASS;
                  SvREFCNT_dec(EXACTF_invlist);
              }
+            DEBUG_STUDYDATA("end EXACTish", data, depth, is_inf, min, stopmin, delta);
          }
          else if (REGNODE_VARIES(OP(scan))) {
              SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
-            I32 fl = 0, f = flags;
+            I32 fl = 0;
+            U32 f = flags;
              regnode * const oscan = scan;
              regnode_ssc this_class;
              regnode_ssc *oclass = NULL;
@@ -5629,6 +5741,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      delta += (minnext + deltanext) * maxcount
                               - minnext * mincount;
                  }
+
+                if (data && data->flags & SCF_SEEN_ACCEPT) {
+                    if (flags & SCF_DO_SUBSTR) {
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
+                        flags &= ~SCF_DO_SUBSTR;
+                    }
+                    if (stopmin > min)
+                        stopmin = min;
+                    DEBUG_STUDYDATA("after-whilem accept", data, depth, is_inf, min, stopmin, delta);
+                }
                  /* Try powerful optimization CURLYX => CURLYN. */
                  if (  OP(oscan) == CURLYX && data
                        && data->flags & SF_IN_PAR
@@ -5830,16 +5952,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      /* It is counted once already... */
                      data->pos_min += minnext * (mincount - counted);
  #if 0
-Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
+    Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
                                " OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf
                                " maxcount=%" UVuf " mincount=%" UVuf
                                " data->pos_delta=%" UVuf "\n",
-    (UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext, (UV)maxcount,
-    (UV)mincount, (UV)data->pos_delta);
-if (deltanext != OPTIMIZE_INFTY)
-Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
-    (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
-          - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
+        (UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext,
+        (UV)maxcount, (UV)mincount, (UV)data->pos_delta);
+    if (deltanext != OPTIMIZE_INFTY)
+        Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
+            (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
+            - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
  #endif
                      if (deltanext == OPTIMIZE_INFTY
                          || data->pos_delta == OPTIMIZE_INFTY
@@ -6145,18 +6267,32 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     In this case we can't do fixed string optimisation.
                  */
  
-                SSize_t deltanext, minnext, fake = 0;
+                bool is_positive = OP(scan) == IFMATCH ? 1 : 0;
+                SSize_t deltanext, minnext;
+                SSize_t fake_last_close = 0;
+                regnode *fake_last_close_op = NULL;
+                regnode *cur_last_close_op;
                  regnode *nscan;
                  regnode_ssc intrnl;
-                int f = 0;
+                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
  
                  StructCopy(&zero_scan_data, &data_fake, scan_data_t);
                  if (data) {
                      data_fake.whilem_c = data->whilem_c;
                      data_fake.last_closep = data->last_closep;
+                    data_fake.last_close_opp = data->last_close_opp;
                  }
-                else
-                    data_fake.last_closep = &fake;
+                else {
+                    data_fake.last_closep = &fake_last_close;
+                    data_fake.last_close_opp = &fake_last_close_op;
+                }
+
+                /* remember the last_close_op we saw so we can see if
+                 * we are dealing with variable length lookbehind that
+                 * contains capturing buffers, which are considered
+                 * experimental */
+                cur_last_close_op= *(data_fake.last_close_opp);
+
                  data_fake.pos_delta = delta;
                  if ( flags & SCF_DO_STCLASS && !scan->flags
                       && OP(scan) == IFMATCH ) { /* Lookahead */
@@ -6174,6 +6310,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                        last, &data_fake, stopparen,
                                        recursed_depth, NULL, f, depth+1,
                                        mutate_ok);
+
                  if (scan->flags) {
                      if (   deltanext < 0
                          || deltanext > (I32) U8_MAX
@@ -6189,11 +6326,21 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                       * one.  (This leaves it at 0 for non-variable length
                       * matches to avoid breakage for those not using this
                       * extension) */
-                    if (deltanext) {
+                    if (deltanext)  {
                          scan->next_off = deltanext;
-                        ckWARNexperimental(RExC_parse,
-                            WARN_EXPERIMENTAL__VLB,
-                            "Variable length lookbehind is experimental");
+                        if (
+                            /* See a CLOSE op inside this lookbehind? */
+                            cur_last_close_op != *(data_fake.last_close_opp)
+                            /* and not doing restudy. see: restudied */
+                            && !(flags & SCF_TRIE_DOING_RESTUDY)
+                        ) {
+                            /* this is positive variable length lookbehind with
+                             * capture buffers inside of it */
+                            ckWARNexperimental_with_arg(RExC_parse,
+                                WARN_EXPERIMENTAL__VLB,
+                                "Variable length %s lookbehind with capturing is experimental",
+                                is_positive ? "positive" : "negative");
+                        }
                      }
                      scan->flags = (U8)minnext + deltanext;
                  }
@@ -6222,6 +6369,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                     |= SSC_MATCHES_EMPTY_STRING;
                      }
                  }
+                DEBUG_STUDYDATA("end LOOKAROUND", data, depth, is_inf, min, stopmin, delta);
              }
  #if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
              else {
@@ -6232,10 +6380,11 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     length of the pattern, something we won't know about
                     until after the recurse.
                  */
-                SSize_t deltanext, fake = 0;
+                SSize_t deltanext, fake_last_close = 0;
+                regnode *last_close_op = NULL;
                  regnode *nscan;
                  regnode_ssc intrnl;
-                int f = 0;
+                U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
                  /* We use SAVEFREEPV so that when the full compile
                      is finished perl will clean up the allocated
                      minlens when it's all done. This way we don't
@@ -6255,8 +6404,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                          data_fake.last_found=newSVsv(data->last_found);
                      }
                  }
-                else
-                    data_fake.last_closep = &fake;
+                else {
+                    data_fake.last_closep = &fake_last_close;
+                    data_fake.last_close_opp = &fake_last_close_opp;
+                }
                  data_fake.flags = 0;
                  data_fake.substrs[0].flags = 0;
                  data_fake.substrs[1].flags = 0;
@@ -6311,8 +6462,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      data->whilem_c = data_fake.whilem_c;
                      if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
                          int i;
-                        if (RExC_rx->minlen<*minnextp)
-                            RExC_rx->minlen=*minnextp;
+                        if (RExC_rx->minlen < *minnextp)
+                            RExC_rx->minlen = *minnextp;
                          scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
                          SvREFCNT_dec_NN(data_fake.last_found);
  
@@ -6346,8 +6497,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  if ( next && (OP(next) != WHILEM) && next < last)
                      is_par = 0;                /* Disable optimization */
              }
-            if (data)
+            if (data) {
                  *(data->last_closep) = ARG(scan);
+                *(data->last_close_opp) = scan;
+            }
          }
          else if (OP(scan) == EVAL) {
              if (data)
@@ -6361,11 +6514,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
              if (OP(scan)==ACCEPT) {
                  /* m{(*ACCEPT)x} does not have to start with 'x' */
                  flags &= ~SCF_DO_STCLASS;
-                if (data) {
+                if (data)
                      data->flags |= SCF_SEEN_ACCEPT;
-                    if (stopmin > min)
-                        stopmin = min;
-                }
+                if (stopmin > min)
+                    stopmin = min;
              }
          }
          else if (OP(scan) == COMMIT) {
@@ -6430,21 +6582,27 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
  
                  for ( word=1 ; word <= trie->wordcount ; word++)
                  {
-                    SSize_t deltanext=0, minnext=0, f = 0, fake;
+                    SSize_t deltanext = 0, minnext = 0;
+                    U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
+                    SSize_t fake_last_close = 0;
+                    regnode *fake_last_close_op = NULL;
                      regnode_ssc this_class;
  
                      StructCopy(&zero_scan_data, &data_fake, scan_data_t);
                      if (data) {
                          data_fake.whilem_c = data->whilem_c;
                          data_fake.last_closep = data->last_closep;
+                        data_fake.last_close_opp = data->last_close_opp;
+                    }
+                    else {
+                        data_fake.last_closep = &fake_last_close;
+                        data_fake.last_close_opp = &fake_last_close_op;
                      }
-                    else
-                        data_fake.last_closep = &fake;
                      data_fake.pos_delta = delta;
                      if (flags & SCF_DO_STCLASS) {
                          ssc_init(pRExC_state, &this_class);
                          data_fake.start_class = &this_class;
-                        f = SCF_DO_STCLASS_AND;
+                        f |= SCF_DO_STCLASS_AND;
                      }
                      if (flags & SCF_WHILEM_VISITED_POS)
                          f |= SCF_WHILEM_VISITED_POS;
@@ -6490,6 +6648,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      if (flags & SCF_DO_STCLASS)
                          ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
                  }
+                DEBUG_STUDYDATA("after JUMPTRIE", data, depth, is_inf, min, stopmin, delta);
              }
              if (flags & SCF_DO_SUBSTR) {
                  data->pos_min += min1;
@@ -6527,6 +6686,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  }
              }
              scan= tail;
+            DEBUG_STUDYDATA("after TRIE study", data, depth, is_inf, min, stopmin, delta);
              continue;
          }
  #else
@@ -6548,14 +6708,15 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
              if (trie->jump) /* no more substrings -- for now /grr*/
                 flags &= ~SCF_DO_SUBSTR;
          }
-        else if (OP(scan) == REGEX_SET) {
-            Perl_croak(aTHX_ "panic: %s regnode should be resolved"
-                             " before optimization", reg_name[REGEX_SET]);
-        }
  
  #endif /* old or new */
  #endif /* TRIE_STUDY_OPT */
  
+        else if (OP(scan) == REGEX_SET) {
+            Perl_croak(aTHX_ "panic: %s regnode should be resolved"
+                             " before optimization", PL_reg_name[REGEX_SET]);
+        }
+
          /* Else: zero-length, ignore. */
          scan = regnext(scan);
      }
@@ -6565,7 +6726,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
          /* we need to unwind recursion. */
          depth = depth - 1;
  
-        DEBUG_STUDYDATA("frame-end", data, depth, is_inf);
+        DEBUG_STUDYDATA("frame-end", data, depth, is_inf, min, stopmin, delta);
          DEBUG_PEEP("fend", scan, depth, flags);
  
          /* restore previous context */
@@ -6580,7 +6741,17 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
      }
  
      assert(!frame);
-    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
+    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf, min, stopmin, delta);
+
+    if (min > stopmin) {
+        /* stopmin might be shorter than min if we saw an (*ACCEPT). If
+        this is the case then it means this pattern is variable length
+        and we need to ensure that the delta accounts for it. delta
+        represents the difference between min length and max length for
+        this part of the pattern. */
+        delta += min - stopmin;
+        min = stopmin;
+    }
  
      *scanp = scan;
      *deltap = is_inf_internal ? OPTIMIZE_INFTY : delta;
@@ -6602,36 +6773,63 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
      if (flags & SCF_TRIE_RESTUDY)
          data->flags |=         SCF_TRIE_RESTUDY;
  
-    DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
-
-    final_minlen = min < stopmin
-            ? min : stopmin;
  
      if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
-        if (final_minlen > OPTIMIZE_INFTY - delta)
+        if (min > OPTIMIZE_INFTY - delta)
              RExC_maxlen = OPTIMIZE_INFTY;
-        else if (RExC_maxlen < final_minlen + delta)
-            RExC_maxlen = final_minlen + delta;
+        else if (RExC_maxlen < min + delta)
+            RExC_maxlen = min + delta;
      }
-    return final_minlen;
+    DEBUG_STUDYDATA("post-fin", data, depth, is_inf, min, stopmin, delta);
+    return min;
  }
  
+/* add a data member to the struct reg_data attached to this regex, it should
+ * always return a non-zero return */
  STATIC U32
  S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
  {
-    U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0;
+    U32 count = RExC_rxi->data ? RExC_rxi->data->count : 1;
  
      PERL_ARGS_ASSERT_ADD_DATA;
  
+    /* in the below expression we have (count + n - 1), the minus one is there
+     * because the struct that we allocate already contains a slot for 1 data
+     * item, so we do not need to allocate it the first time. IOW, the
+     * sizeof(*RExC_rxi->data) already accounts for one of the elements we need
+     * to allocate. See struct reg_data in regcomp.h
+     */
      Renewc(RExC_rxi->data,
-           sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1),
+           sizeof(*RExC_rxi->data) + (sizeof(void*) * (count + n - 1)),
             char, struct reg_data);
-    if(count)
-        Renew(RExC_rxi->data->what, count + n, U8);
-    else
-        Newx(RExC_rxi->data->what, n, U8);
+    /* however in the data->what expression we use (count + n) and do not
+     * subtract one from the result because the data structure contains a
+     * pointer to an array, and does not allocate the first element as part of
+     * the data struct. */
+    if (count > 1)
+        Renew(RExC_rxi->data->what, (count + n), U8);
+    else {
+        /* when count == 1 it means we have not initialized anything.
+         * we always fill the 0 slot of the data array with a '%' entry, which
+         * means "zero" (all the other types are letters) which exists purely
+         * so the return from add_data is ALWAYS true, so we can tell it apart
+         * from a "no value" idx=0 in places where we would return an index
+         * into add_data.  This is particularly important with the new "single
+         * pass, usually, but not always" strategy that we use, where the code
+         * will use a 0 to represent "not able to compute this yet".
+         */
+        Newx(RExC_rxi->data->what, n+1, U8);
+        /* fill in the placeholder slot of 0 with a what of '%', we use
+         * this because it sorta looks like a zero (0/0) and it is not a letter
+         * like any of the other "whats", this type should never be created
+         * any other way but here. '%' happens to also not appear in this
+         * file for any other reason (at the time of writing this comment)*/
+        RExC_rxi->data->what[0]= '%';
+        RExC_rxi->data->data[0]= NULL;
+    }
      RExC_rxi->data->count = count + n;
      Copy(s, RExC_rxi->data->what + count, n, U8);
+    assert(count>0);
      return count;
  }
  
@@ -6666,6 +6864,7 @@ Perl_reginitcolors(pTHX)
  
  
  #ifdef TRIE_STUDY_OPT
+/* search for "restudy" in this file for a detailed explanation */
  #define CHECK_RESTUDY_GOTO_butfirst(dOsomething)            \
      STMT_START {                                            \
          if (                                                \
@@ -7562,6 +7761,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_state_t RExC_state;
      RExC_state_t * const pRExC_state = &RExC_state;
  #ifdef TRIE_STUDY_OPT
+    /* search for "restudy" in this file for a detailed explanation */
      int restudied = 0;
      RExC_state_t copyRExC_state;
  #endif
@@ -7866,13 +8066,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_rx->intflags = 0;
  
      RExC_flags = rx_flags;     /* don't let top level (?i) bleed */
-    RExC_parse = exp;
+    RExC_parse_set(exp);
  
      /* This NUL is guaranteed because the pattern comes from an SV*, and the sv
       * code makes sure the final byte is an uncounted NUL.  But should this
       * ever not be the case, lots of things could read beyond the end of the
       * buffer: loops like
-     *      while(isFOO(*RExC_parse)) RExC_parse++;
+     *      while(isFOO(*RExC_parse)) RExC_parse_inc_by(1);
       *      strchr(RExC_parse, "foo");
       * etc.  So it is worth noting. */
      assert(*RExC_end == '\0');
@@ -7981,28 +8181,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          RExC_lastparse=NULL;
      });
  
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
-                          "%s %" UVuf " bytes for offset annotations.\n",
-                          RExC_offsets ? "Got" : "Couldn't get",
-                          (UV)((RExC_offsets[0] * 2 + 1))));
-    DEBUG_OFFSETS_r(if (RExC_offsets) {
-        const STRLEN len = RExC_offsets[0];
-        STRLEN i;
-        DECLARE_AND_GET_RE_DEBUG_FLAGS;
-        Perl_re_printf( aTHX_
-                      "Offsets: [%" UVuf "]\n\t", (UV)RExC_offsets[0]);
-        for (i = 1; i <= len; i++) {
-            if (RExC_offsets[i*2-1] || RExC_offsets[i*2])
-                Perl_re_printf( aTHX_  "%" UVuf ":%" UVuf "[%" UVuf "] ",
-                (UV)i, (UV)RExC_offsets[i*2-1], (UV)RExC_offsets[i*2]);
-        }
-        Perl_re_printf( aTHX_  "\n");
-    });
-
-#else
      SetProgLen(RExC_rxi,RExC_size);
-#endif
  
      DEBUG_DUMP_PRE_OPTIMIZE_r({
          SV * const sv = sv_newmortal();
@@ -8049,6 +8228,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
  
  #ifdef TRIE_STUDY_OPT
+    /* search for "restudy" in this file for a detailed explanation */
      if (!restudied) {
          StructCopy(&zero_scan_data, &data, scan_data_t);
          copyRExC_state = RExC_state;
@@ -8082,13 +8262,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         data in the pattern. If there is then we can use it for optimisations */
      if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /*  Only one top-level choice.
                                                    */
-        SSize_t fake;
+        SSize_t fake_deltap;
          STRLEN longest_length[2];
          regnode_ssc ch_class; /* pointed to by data */
          int stclass_flag;
          SSize_t last_close = 0; /* pointed to by data */
          regnode *first= scan;
          regnode *first_next= regnext(first);
+        regnode *last_close_op= NULL;
          int i;
  
          /*
@@ -8145,7 +8326,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          else if (PL_regkind[OP(first)] == TRIE &&
                  ((reg_trie_data *)RExC_rxi->data->data[ ARG(first) ])->minlen>0)
          {
-            /* this can happen only on restudy */
+            /* this can happen only on restudy
+             * Search for "restudy" in this file to find
+             * a comment with details. */
              RExC_rxi->regstclass = construct_ahocorasick_from_trie(pRExC_state, (regnode *)first, 0);
          }
  #endif
@@ -8189,6 +8372,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
          /* Scan is after the zeroth branch, first is atomic matcher. */
  #ifdef TRIE_STUDY_OPT
+        /* search for "restudy" in this file for a detailed explanation */
          DEBUG_PARSE_r(
              if (!restudied)
                  Perl_re_printf( aTHX_  "first at %" IVdf "\n",
@@ -8230,18 +8414,21 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          } else                         /* XXXX Check for BOUND? */
              stclass_flag = 0;
          data.last_closep = &last_close;
+        data.last_close_opp = &last_close_op;
  
          DEBUG_RExC_seen();
          /*
           * MAIN ENTRY FOR study_chunk() FOR m/PATTERN/
           * (NO top level branches)
           */
-        minlen = study_chunk(pRExC_state, &first, &minlen, &fake,
+        minlen = study_chunk(pRExC_state, &first, &minlen, &fake_deltap,
                               scan + RExC_size, /* Up to end */
              &data, -1, 0, NULL,
              SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
                            | (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
              0, TRUE);
+        /* search for "restudy" in this file for a detailed explanation
+         * of 'restudied' and SCF_TRIE_DOING_RESTUDY */
  
  
          CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -8349,9 +8536,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      }
      else {
          /* Several toplevels. Best we can is to set minlen. */
-        SSize_t fake;
+        SSize_t fake_deltap;
          regnode_ssc ch_class;
          SSize_t last_close = 0;
+        regnode *last_close_op = NULL;
  
          DEBUG_PARSE_r(Perl_re_printf( aTHX_  "\nMulti Top Level\n"));
  
@@ -8359,6 +8547,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          ssc_init(pRExC_state, &ch_class);
          data.start_class = &ch_class;
          data.last_closep = &last_close;
+        data.last_close_opp = &last_close_op;
  
          DEBUG_RExC_seen();
          /*
@@ -8366,11 +8555,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
           * (patterns WITH top level branches)
           */
          minlen = study_chunk(pRExC_state,
-            &scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL,
+            &scan, &minlen, &fake_deltap, scan + RExC_size, &data, -1, 0, NULL,
              SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
                                                        ? SCF_TRIE_DOING_RESTUDY
                                                        : 0),
              0, TRUE);
+        /* search for "restudy" in this file for a detailed explanation
+         * of 'restudied' and SCF_TRIE_DOING_RESTUDY */
  
          CHECK_RESTUDY_GOTO_butfirst(NOOP);
  
@@ -8625,7 +8816,7 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
                          return ret;
                  } else {
                      if (retarray)
-                        ret = newSVsv(&PL_sv_undef);
+                        ret = newSV_type(SVt_NULL);
                  }
                  if (retarray)
                      av_push(retarray, ret);
@@ -9012,15 +9203,15 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
            * using do...while */
          if (UTF)
              do {
-                RExC_parse += UTF8SKIP(RExC_parse);
+                RExC_parse_inc_utf8();
              } while (   RExC_parse < RExC_end
                       && isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end));
          else
              do {
-                RExC_parse++;
+                RExC_parse_inc_by(1);
              } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse));
      } else {
-        RExC_parse++; /* so the <- from the vFAIL is after the offending
+        RExC_parse_inc_by(1); /* so the <- from the vFAIL is after the offending
                           character */
          vFAIL("Group name must start with a non-digit word character");
      }
@@ -9516,7 +9707,7 @@ Perl__invlist_search(SV* const invlist, const UV cp)
      PERL_ARGS_ASSERT__INVLIST_SEARCH;
  
      /* If list is empty, return failure. */
-    if (high == 0) {
+    if (UNLIKELY(high == 0)) {
          return -1;
      }
  
@@ -9525,7 +9716,7 @@ Perl__invlist_search(SV* const invlist, const UV cp)
  
      mid = invlist_previous_index(invlist);
      assert(mid >=0);
-    if (mid > highest_element) {
+    if (UNLIKELY(mid > highest_element)) {
          mid = highest_element;
      }
  
@@ -10842,7 +11033,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
  
      /* '^' as an initial flag sets certain defaults */
      if (UCHARAT(RExC_parse) == '^') {
-        RExC_parse++;
+        RExC_parse_inc_by(1);
          has_use_defaults = TRUE;
          STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
          cs = (toUSE_UNI_CHARSET_NOT_DEPENDS)
@@ -10866,7 +11057,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
          if ((RExC_pm_flags & PMf_WILDCARD)) {
              if (flagsp == & negflags) {
                  if (*RExC_parse == 'm') {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      /* diag_listed_as: Use of %s is not allowed in Unicode
                         property wildcard subpatterns in regex; marked by <--
                         HERE in m/%s/ */
@@ -10943,7 +11134,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  has_charset_modifier = DEPENDS_PAT_MOD;
                  break;
                excess_modifier:
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  if (has_charset_modifier == ASCII_RESTRICT_PAT_MOD) {
                      vFAIL2("Regexp modifier \"%c\" may appear a maximum of twice", ASCII_RESTRICT_PAT_MOD);
                  }
@@ -10956,7 +11147,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  }
                  NOT_REACHED; /*NOTREACHED*/
                neg_modifier:
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  vFAIL2("Regexp modifier \"%c\" may not appear after the \"-\"",
                                      *(RExC_parse - 1));
                  NOT_REACHED; /*NOTREACHED*/
@@ -11029,7 +11220,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  if (  (RExC_pm_flags & PMf_WILDCARD)
                      && cs != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
                  {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      /* diag_listed_as: Use of %s is not allowed in Unicode
                         property wildcard subpatterns in regex; marked by <--
                         HERE in m/%s/ */
@@ -11052,20 +11243,20 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  return;
              default:
                fail_modifiers:
-                RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+                RExC_parse_inc_if_char();
                  /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                  vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
                        UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                  NOT_REACHED; /*NOTREACHED*/
          }
  
-        RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+        RExC_parse_inc();
      }
  
      vFAIL("Sequence (?... not terminated");
  
    modifier_illegal_in_wildcard:
-    RExC_parse++;
+    RExC_parse_inc_by(1);
      /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
         subpatterns in regex; marked by <-- HERE in m/%s/ */
      vFAIL2("Use of modifier '%c' is not allowed in Unicode property wildcard"
@@ -11091,7 +11282,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
  STATIC regnode_offset
  S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
                               I32 *flagp,
-                             char * parse_start,
+                             char * backref_parse_start,
                               char ch
                        )
  {
@@ -11105,12 +11296,12 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
  
      if (RExC_parse != name_start && ch == '}') {
          while (isBLANK(*RExC_parse)) {
-            RExC_parse++;
+            RExC_parse_inc_by(1);
          }
      }
      if (RExC_parse == name_start || *RExC_parse != ch) {
          /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
-        vFAIL2("Sequence %.3s... not terminated", parse_start);
+        vFAIL2("Sequence %.3s... not terminated", backref_parse_start);
      }
  
      if (sv_dat) {
@@ -11132,13 +11323,155 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
                      num);
      *flagp |= HASWIDTH;
  
-    Set_Node_Offset(REGNODE_p(ret), parse_start+1);
-    Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
-
      nextchar(pRExC_state);
      return ret;
  }
  
+/* reg_la_NOTHING()
+ *
+ * Maybe parse a parenthezised lookaround construct that is equivalent to a
+ * NOTHING regop when the construct is empty.
+ *
+ * Calls skip_to_be_ignored_text() before checking if the construct is empty.
+ *
+ * Checks for unterminated constructs and throws a "not terminated" error
+ * with the appropriate type if necessary
+ *
+ * Assuming it does not throw an exception increments RExC_seen_zerolen.
+ *
+ * If the construct is empty generates a NOTHING op and returns its
+ * regnode_offset, which the caller would then return to its caller.
+ *
+ * If the construct is not empty increments RExC_in_lookaround, and turns
+ * on any flags provided in RExC_seen, and then returns 0 to signify
+ * that parsing should continue.
+ *
+ * PS: I would have called this reg_parse_lookaround_NOTHING() but then
+ * any use of it would have had to be broken onto multiple lines, hence
+ * the abbreviation.
+ */
+STATIC regnode_offset
+S_reg_la_NOTHING(pTHX_ RExC_state_t *pRExC_state, U32 flags,
+    const char *type)
+{
+
+    PERL_ARGS_ASSERT_REG_LA_NOTHING;
+
+    /* false below so we do not force /x */
+    skip_to_be_ignored_text(pRExC_state, &RExC_parse, FALSE);
+
+    if (RExC_parse >= RExC_end)
+        vFAIL2("Sequence (%s... not terminated", type);
+
+    /* Always increment as NOTHING regops are zerolen */
+    RExC_seen_zerolen++;
+
+    if (*RExC_parse == ')') {
+        regnode_offset ret= reg_node(pRExC_state, NOTHING);
+        nextchar(pRExC_state);
+        return ret;
+    }
+
+    RExC_seen |= flags;
+    RExC_in_lookaround++;
+    return 0; /* keep parsing! */
+}
+
+/* reg_la_OPFAIL()
+ *
+ * Maybe parse a parenthezised lookaround construct that is equivalent to a
+ * OPFAIL regop when the construct is empty.
+ *
+ * Calls skip_to_be_ignored_text() before checking if the construct is empty.
+ *
+ * Checks for unterminated constructs and throws a "not terminated" error
+ * if necessary.
+ *
+ * If the construct is empty generates an OPFAIL op and returns its
+ * regnode_offset which the caller should then return to its caller.
+ *
+ * If the construct is not empty increments RExC_in_lookaround, and also
+ * increments RExC_seen_zerolen, and turns on the flags provided in
+ * RExC_seen, and then returns 0 to signify that parsing should continue.
+ *
+ * PS: I would have called this reg_parse_lookaround_OPFAIL() but then
+ * any use of it would have had to be broken onto multiple lines, hence
+ * the abbreviation.
+ */
+
+STATIC regnode_offset
+S_reg_la_OPFAIL(pTHX_ RExC_state_t *pRExC_state, U32 flags,
+    const char *type)
+{
+
+    PERL_ARGS_ASSERT_REG_LA_OPFAIL;
+
+    /* FALSE so we don't force to /x below */;
+    skip_to_be_ignored_text(pRExC_state, &RExC_parse, FALSE);
+
+    if (RExC_parse >= RExC_end)
+        vFAIL2("Sequence (%s... not terminated", type);
+
+    if (*RExC_parse == ')') {
+        regnode_offset ret= reganode(pRExC_state, OPFAIL, 0);
+        nextchar(pRExC_state);
+        return ret; /* return produced regop */
+    }
+
+    /* only increment zerolen *after* we check if we produce an OPFAIL
+     * as an OPFAIL does not match a zero length construct, as it
+     * does not match ever. */
+    RExC_seen_zerolen++;
+    RExC_seen |= flags;
+    RExC_in_lookaround++;
+    return 0; /* keep parsing! */
+}
+
+/* Below are the main parsing routines.
+ *
+ * S_reg()      parses a whole pattern or subpattern.  It itself handles things
+ *              like the 'xyz' in '(?xyz:...)', and calls S_regbranch for each
+ *              alternation '|' in the '...' pattern.
+ * S_regbranch() effectively implements the concatenation operator, handling
+ *              one alternative of '|', repeatedly calling S_regpiece on each
+ *              segment of the input.
+ * S_regpiece() calls S_regatom to handle the next atomic chunk of the input,
+ *              and then adds any quantifier for that chunk.
+ * S_regatom()  parses the next chunk of the input, returning when it
+ *              determines it has found a complete atomic chunk.  The chunk may
+ *              be a nested subpattern, in which case S_reg is called
+ *              recursively
+ *
+ * The functions generate regnodes as they go along, appending each to the
+ * pattern data structure so far.  They return the offset of the current final
+ * node into that structure, or 0 on failure.
+ *
+ * There are three parameters common to all of them:
+ *   pRExC_state    is a structure with much information about the current
+ *                  state of the parse.  It's easy to add new elements to
+ *                  convey new information, but beware that an error return may
+ *                  require clearing the element.
+ *   flagp          is a pointer to bit flags set in a lower level to pass up
+ *                  to higher levels information, such as the cause of a
+ *                  failure, or some characteristic about the generated node
+ *   depth          is roughly the recursion depth, mostly unused except for
+ *                  pretty printing debugging info.
+ *
+ * There are ancillary functions that these may farm work out to, using the
+ * same parameters.
+ *
+ * The protocol for handling flags is that each function will, before
+ * returning, add into *flagp the flags it needs to pass up.  Each function has
+ * a second flags variable, typically named 'flags', which it sets and clears
+ * at will.  Flag bits in it are used in that function, and it calls the next
+ * layer down with its 'flagp' parameter set to '&flags'.  Thus, upon return,
+ * 'flags' will contain whatever it had before the call, plus whatever that
+ * function passed up.  If it wants to pass any of these up to its caller, it
+ * has to add them to its *flagp.  This means that it takes extra steps to keep
+ * passing a flag upwards, and otherwise the flag bit is cleared for higher
+ * functions.
+ */
+
  /* On success, returns the offset at which any next node should be placed into
   * the regex engine program being compiled.
   *
@@ -11170,8 +11503,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      SV * max_open;  /* Max number of unclosed parens */
      I32 was_in_lookaround = RExC_in_lookaround;
  
-    char * parse_start = RExC_parse; /* MJD */
-    char * const oregcomp_parse = RExC_parse;
+    /* The difference between the following variables can be seen with  *
+     * the broken pattern /(?:foo/ where segment_parse_start will point *
+     * at the 'f', and reg_parse_start will point at the '('            */
+
+    /* the following is used for unmatched '(' errors */
+    char * const reg_parse_start = RExC_parse;
+
+    /* the following is used to track where various segments of
+     * the pattern that we parse out started. */
+    char * segment_parse_start = RExC_parse;
  
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
@@ -11192,9 +11533,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  
      /* Having this true makes it feasible to have a lot fewer tests for the
       * parse pointer being in scope.  For example, we can write
-     *      while(isFOO(*RExC_parse)) RExC_parse++;
+     *      while(isFOO(*RExC_parse)) RExC_parse_inc_by(1);
       * instead of
-     *      while(RExC_parse < RExC_end && isFOO(*RExC_parse)) RExC_parse++;
+     *      while(RExC_parse < RExC_end && isFOO(*RExC_parse)) RExC_parse_inc_by(1);
       */
      assert(*RExC_end == '\0');
  
@@ -11222,11 +11563,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              char *start_arg = NULL;
              unsigned char op = 0;
              int arg_required = 0;
-            int internal_argval = -1; /* if >-1 we are not allowed an argument*/
+            int internal_argval = -1; /* if > -1 no argument allowed */
              bool has_upper = FALSE;
+            U32 seen_flag_set = 0; /* RExC_seen flags we must set */
  
              if (has_intervening_patws) {
-                RExC_parse++;   /* past the '*' */
+                RExC_parse_inc_by(1);   /* past the '*' */
  
                  /* For strict backwards compatibility, don't change the message
                   * now that we also have lowercase operands */
@@ -11246,10 +11588,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      if (isUPPER(*RExC_parse)) {
                          has_upper = TRUE;
                      }
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                  }
                  else {
-                    RExC_parse += UTF8SKIP(RExC_parse);
+                    RExC_parse_inc_utf8();
                  }
              }
              verb_len = RExC_parse - start_verb;
@@ -11258,9 +11600,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      goto unterminated_verb_pattern;
                  }
  
-                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                RExC_parse_inc();
                  while ( RExC_parse < RExC_end && *RExC_parse != ')' ) {
-                    RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                    RExC_parse_inc();
                  }
                  if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
                    unterminated_verb_pattern:
@@ -11379,7 +11721,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          goto no_colon;
                      }
  
-                    RExC_parse = start_arg;
+                    RExC_parse_set(start_arg);
  
                      if (RExC_in_script_run) {
  
@@ -11442,55 +11784,67 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  break;
  
              lookbehind_alpha_assertions:
-                RExC_seen |= REG_LOOKBEHIND_SEEN;
+                seen_flag_set = REG_LOOKBEHIND_SEEN;
                  /*FALLTHROUGH*/
  
              alpha_assertions:
  
-                RExC_in_lookaround++;
-                RExC_seen_zerolen++;
-
-                if (! start_arg) {
+                if ( !start_arg ) {
                      goto no_colon;
                  }
  
-                /* An empty negative lookahead assertion simply is failure */
-                if (paren == 'A' && RExC_parse == start_arg) {
-                    ret=reganode(pRExC_state, OPFAIL, 0);
-                    nextchar(pRExC_state);
-                    return ret;
+                if ( RExC_parse == start_arg ) {
+                    if ( paren == 'A' || paren == 'B' ) {
+                        /* An empty negative lookaround assertion is failure.
+                         * See also: S_reg_la_OPFAIL() */
+
+                        /* Note: OPFAIL is *not* zerolen. */
+                        ret = reganode(pRExC_state, OPFAIL, 0);
+                        nextchar(pRExC_state);
+                        return ret;
+                    }
+                    else
+                    if ( paren == 'a' || paren == 'b' ) {
+                        /* An empty positive lookaround assertion is success.
+                         * See also: S_reg_la_NOTHING() */
+
+                        /* Note: NOTHING is zerolen, so increment here */
+                        RExC_seen_zerolen++;
+                        ret = reg_node(pRExC_state, NOTHING);
+                        nextchar(pRExC_state);
+                        return ret;
+                    }
                  }
  
-                RExC_parse = start_arg;
+                RExC_seen_zerolen++;
+                RExC_in_lookaround++;
+                RExC_seen |= seen_flag_set;
+
+                RExC_parse_set(start_arg);
                  goto parse_rest;
  
                no_colon:
-                vFAIL2utf8f(
-                "'(*%" UTF8f "' requires a terminating ':'",
-                UTF8fARG(UTF, verb_len, start_verb));
+                vFAIL2utf8f( "'(*%" UTF8f "' requires a terminating ':'",
+                    UTF8fARG(UTF, verb_len, start_verb));
                  NOT_REACHED; /*NOTREACHED*/
  
              } /* End of switch */
              if ( ! op ) {
-                RExC_parse += UTF
-                              ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                              : 1;
+                RExC_parse_inc_safe();
                  if (has_upper || verb_len == 0) {
-                    vFAIL2utf8f(
-                    "Unknown verb pattern '%" UTF8f "'",
-                    UTF8fARG(UTF, verb_len, start_verb));
+                    vFAIL2utf8f( "Unknown verb pattern '%" UTF8f "'",
+                        UTF8fARG(UTF, verb_len, start_verb));
                  }
                  else {
-                    vFAIL2utf8f(
-                    "Unknown '(*...)' construct '%" UTF8f "'",
-                    UTF8fARG(UTF, verb_len, start_verb));
+                    vFAIL2utf8f( "Unknown '(*...)' construct '%" UTF8f "'",
+                        UTF8fARG(UTF, verb_len, start_verb));
                  }
              }
              if ( RExC_parse == start_arg ) {
                  start_arg = NULL;
              }
              if ( arg_required && !start_arg ) {
-                vFAIL3("Verb pattern '%.*s' has a mandatory argument",
+                vFAIL3( "Verb pattern '%.*s' has a mandatory argument",
                      (int) verb_len, start_verb);
              }
              if (internal_argval == -1) {
@@ -11500,8 +11854,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              }
              RExC_seen |= REG_VERBARG_SEEN;
              if (start_arg) {
-                SV *sv = newSVpvn( start_arg,
-                                    RExC_parse - start_arg);
+                SV *sv = newSVpvn( start_arg, RExC_parse - start_arg);
                  ARG(REGNODE_p(ret)) = add_data( pRExC_state,
                                          STR_WITH_LEN("S"));
                  RExC_rxi->data->data[ARG(REGNODE_p(ret))]=(void*)sv;
@@ -11523,14 +11876,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              const char impossible_group[] = "Invalid reference to group";
  
              if (has_intervening_patws) {
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  vFAIL("In '(?...)', the '(' and '?' must be adjacent");
              }
  
-            RExC_parse++;           /* past the '?' */
+            RExC_parse_inc_by(1);   /* past the '?' */
              paren = *RExC_parse;    /* might be a trailing NUL, if not
                                         well-formed */
-            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+            RExC_parse_inc();
              if (RExC_parse > RExC_end) {
                  paren = '\0';
              }
@@ -11540,34 +11893,47 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              case 'P':  /* (?P...) variants for those used to PCRE/Python */
                  paren = *RExC_parse;
                  if ( paren == '<') {    /* (?P<...>) named capture */
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      if (RExC_parse >= RExC_end) {
                          vFAIL("Sequence (?P<... not terminated");
                      }
                      goto named_capture;
                  }
                  else if (paren == '>') {   /* (?P>name) named recursion */
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      if (RExC_parse >= RExC_end) {
                          vFAIL("Sequence (?P>... not terminated");
                      }
                      goto named_recursion;
                  }
                  else if (paren == '=') {   /* (?P=...)  named backref */
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      return handle_named_backref(pRExC_state, flagp,
-                                                parse_start, ')');
+                                                segment_parse_start, ')');
                  }
-                RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+                RExC_parse_inc_if_char();
                  /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                  vFAIL3("Sequence (%.*s...) not recognized",
                                  (int) (RExC_parse - seqstart), seqstart);
                  NOT_REACHED; /*NOTREACHED*/
              case '<':           /* (?<...) */
                  /* If you want to support (?<*...), first reconcile with GH #17363 */
-                if (*RExC_parse == '!')
-                    paren = ',';
-                else if (*RExC_parse != '=')
+                if (*RExC_parse == '!') {
+                    paren = ','; /* negative lookbehind (?<! ... ) */
+                    RExC_parse_inc_by(1);
+                    if ((ret= reg_la_OPFAIL(pRExC_state,REG_LB_SEEN,"?<!")))
+                        return ret;
+                    break;
+                }
+                else
+                if (*RExC_parse == '=') {
+                    /* paren = '<' - negative lookahead (?<= ... ) */
+                    RExC_parse_inc_by(1);
+                    if ((ret= reg_la_NOTHING(pRExC_state,REG_LB_SEEN,"?<=")))
+                        return ret;
+                    break;
+                }
+                else
                named_capture:
                  {               /* (?<...>) */
                      char *name_start;
@@ -11648,30 +12014,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      paren = 1;
                      goto capturing_parens;
                  }
-
-                RExC_seen |= REG_LOOKBEHIND_SEEN;
-                RExC_in_lookaround++;
-                RExC_parse++;
-                if (RExC_parse >= RExC_end) {
-                    vFAIL("Sequence (?... not terminated");
-                }
-                RExC_seen_zerolen++;
-                break;
+                NOT_REACHED; /*NOTREACHED*/
              case '=':           /* (?=...) */
-                RExC_seen_zerolen++;
-                RExC_in_lookaround++;
+                if ((ret= reg_la_NOTHING(pRExC_state, 0, "?=")))
+                    return ret;
                  break;
              case '!':           /* (?!...) */
-                RExC_seen_zerolen++;
-                /* check if we're really just a "FAIL" assertion */
-                skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                                        FALSE /* Don't force to /x */ );
-                if (*RExC_parse == ')') {
-                    ret=reganode(pRExC_state, OPFAIL, 0);
-                    nextchar(pRExC_state);
+                if ((ret= reg_la_OPFAIL(pRExC_state, 0, "?!")))
                      return ret;
-                }
-                RExC_in_lookaround++;
                  break;
              case '|':           /* (?|...) */
                  /* branch reset, behave like a (?:...) except that
@@ -11707,7 +12057,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  /*notreached*/
              /* named and numeric backreferences */
              case '&':            /* (?&NAME) */
-                parse_start = RExC_parse - 1;
+                segment_parse_start = RExC_parse - 1;
                named_recursion:
                  {
                      SV *sv_dat = reg_scan_name(pRExC_state,
@@ -11720,7 +12070,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  /* NOTREACHED */
              case '+':
                  if (! inRANGE(RExC_parse[0], '1', '9')) {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      vFAIL("Illegal pattern");
                  }
                  goto parse_recursion;
@@ -11733,14 +12083,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  /* FALLTHROUGH */
              case '1': case '2': case '3': case '4': /* (?1) */
              case '5': case '6': case '7': case '8': case '9':
-                RExC_parse = (char *) seqstart + 1;  /* Point to the digit */
+                RExC_parse_set((char *) seqstart + 1);  /* Point to the digit */
                parse_recursion:
                  {
                      bool is_neg = FALSE;
                      UV unum;
-                    parse_start = RExC_parse - 1; /* MJD */
+                    segment_parse_start = RExC_parse - 1;
                      if (*RExC_parse == '-') {
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                          is_neg = TRUE;
                      }
                      endptr = RExC_end;
@@ -11748,12 +12098,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          && unum <= I32_MAX
                      ) {
                          num = (I32)unum;
-                        RExC_parse = (char*)endptr;
+                        RExC_parse_set((char*)endptr);
                      }
                      else {  /* Overflow, or something like that.  Position
                                 beyond all digits for the message */
                          while (RExC_parse < RExC_end && isDIGIT(*RExC_parse))  {
-                            RExC_parse++;
+                            RExC_parse_inc_by(1);
                          }
                          vFAIL(impossible_group);
                      }
@@ -11771,7 +12121,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  
                      /* Don't overflow */
                      if (UNLIKELY(I32_MAX - RExC_npar < num)) {
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                          vFAIL(impossible_group);
                      }
  
@@ -11799,7 +12149,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      num += RExC_npar;
  
                      if (paren == '-' && num < 1) {
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                          vFAIL(non_existent_group_msg);
                      }
                  }
@@ -11811,7 +12161,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                       * then reparsing */
                      if (ALL_PARENS_COUNTED)  {
                          if (num >= RExC_total_parens) {
-                            RExC_parse++;
+                            RExC_parse_inc_by(1);
                              vFAIL(non_existent_group_msg);
                          }
                      }
@@ -11837,10 +12187,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                              (IV)ARG2L(REGNODE_p(ret))));
                  RExC_seen |= REG_RECURSE_SEEN;
  
-                Set_Node_Length(REGNODE_p(ret),
-                                1 + regarglen[OP(REGNODE_p(ret))]); /* MJD */
-                Set_Node_Offset(REGNODE_p(ret), parse_start); /* MJD */
-
                  *flagp |= POSTPONED;
                  assert(*RExC_parse == ')');
                  nextchar(pRExC_state);
@@ -11851,7 +12197,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              case '?':           /* (??...) */
                  is_logical = 1;
                  if (*RExC_parse != '{') {
-                    RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+                    RExC_parse_inc_if_char();
                      /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                      vFAIL2utf8f(
                          "Sequence (%" UTF8f "...) not recognized",
@@ -11860,7 +12206,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  }
                  *flagp |= POSTPONED;
                  paren = '{';
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  /* FALLTHROUGH */
              case '{':           /* (?{...}) */
              {
@@ -11883,7 +12229,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  }
                  /* this is a pre-compiled code block (?{...}) */
                  cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
-                RExC_parse = RExC_start + cb->end;
+                RExC_parse_set(RExC_start + cb->end);
                  o = cb->block;
                  if (cb->src_regex) {
                      n = add_data(pRExC_state, STR_WITH_LEN("rl"));
@@ -11914,12 +12260,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      if (! REGTAIL(pRExC_state, ret, eval)) {
                          REQUIRE_BRANCHJ(flagp, 0);
                      }
-                    /* deal with the length of this later - MJD */
                      return ret;
                  }
                  ret = reg2Lanode(pRExC_state, EVAL, n, 0);
-                Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1);
-                Set_Node_Offset(REGNODE_p(ret), parse_start);
                  return ret;
              }
              case '(':           /* (?(?{...})...) and (?(?=...)...) */
@@ -11975,7 +12318,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                           || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
                  {
                      char ch = RExC_parse[0] == '<' ? '>' : '\'';
-                    char *name_start= RExC_parse++;
+                    char *name_start= RExC_parse;
+                    RExC_parse_inc_by(1);
                      U32 num = 0;
                      SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
                      if (   RExC_parse == name_start
@@ -11985,7 +12329,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          vFAIL2("Sequence (?(%c... not terminated",
                              (ch == '>' ? '<' : ch));
                      }
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      if (sv_dat) {
                          num = add_data( pRExC_state, STR_WITH_LEN("S"));
                          RExC_rxi->data->data[num]=(void*)sv_dat;
@@ -11999,12 +12343,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                     "DEFINE"))
                  {
                      ret = reganode(pRExC_state, DEFINEP, 0);
-                    RExC_parse += DEFINE_len;
+                    RExC_parse_inc_by(DEFINE_len);
                      is_define = 1;
                      goto insert_if_check_paren;
                  }
                  else if (RExC_parse[0] == 'R') {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      /* parno == 0 => /(?(R)YES|NO)/  "in any form of recursion OR eval"
                       * parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)"
                       * parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)"
@@ -12012,7 +12356,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      parno = 0;
                      if (RExC_parse[0] == '0') {
                          parno = 1;
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                      }
                      else if (inRANGE(RExC_parse[0], '1', '9')) {
                          UV uv;
@@ -12021,12 +12365,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                              && uv <= I32_MAX
                          ) {
                              parno = (I32)uv + 1;
-                            RExC_parse = (char*)endptr;
+                            RExC_parse_set((char*)endptr);
                          }
                          /* else "Switch condition not recognized" below */
                      } else if (RExC_parse[0] == '&') {
                          SV *sv_dat;
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                          sv_dat = reg_scan_name(pRExC_state,
                                                 REG_RSN_RETURN_DATA);
                          if (sv_dat)
@@ -12044,7 +12388,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          && uv <= I32_MAX
                      ) {
                          parno = (I32)uv;
-                        RExC_parse = (char*)endptr;
+                        RExC_parse_set((char*)endptr);
                      }
                      else {
                          vFAIL("panic: grok_atoUV returned FALSE");
@@ -12053,9 +12397,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  
                   insert_if_check_paren:
                      if (UCHARAT(RExC_parse) != ')') {
-                        RExC_parse += UTF
-                                      ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                      : 1;
+                        RExC_parse_inc_safe();
                          vFAIL("Switch condition not recognized");
                      }
                      nextchar(pRExC_state);
@@ -12136,14 +12478,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  #endif
                      return ret;
                  }
-                RExC_parse += UTF
-                              ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                              : 1;
+                RExC_parse_inc_safe();
                  vFAIL("Unknown switch condition (?(...))");
              }
              case '[':           /* (?[ ... ]) */
-                return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
-                                         oregcomp_parse);
+                return handle_regex_sets(pRExC_state, NULL, flagp, depth+1);
              case 0: /* A NUL */
                  RExC_parse--; /* for vFAIL to print correctly */
                  vFAIL("Sequence (? incomplete");
@@ -12157,7 +12496,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              case '*': /* If you want to support (?*...), first reconcile with GH #17363 */
              /* FALLTHROUGH */
              default: /* e.g., (?i) */
-                RExC_parse = (char *) seqstart + 1;
+                RExC_parse_set((char *) seqstart + 1);
                parse_flags:
                  parse_lparen_question_flags(pRExC_state);
                  if (UCHARAT(RExC_parse) != ':') {
@@ -12232,8 +12571,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  RExC_open_parens[parno]= ret;
              }
  
-            Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
-            Set_Node_Offset(REGNODE_p(ret), RExC_parse); /* MJD */
              is_open = 1;
          } else {
              /* with RXf_PMf_NOCAPTURE treat (...) as (?:...) */
@@ -12246,7 +12583,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  
     parse_rest:
      /* Pick up the branches, linking them together. */
-    parse_start = RExC_parse;   /* MJD */
+    segment_parse_start = RExC_parse;
      br = regbranch(pRExC_state, &flags, 1, depth+1);
  
      /*     branch_len = (paren != 0); */
@@ -12259,10 +12596,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
          if (RExC_use_BRANCHJ) {
              reginsert(pRExC_state, BRANCHJ, br, depth+1);
          }
-        else {                  /* MJD */
+        else {
              reginsert(pRExC_state, BRANCH, br, depth+1);
-            Set_Node_Length(REGNODE_p(br), paren != 0);
-            Set_Node_Offset_To_R(br, parse_start-RExC_start);
          }
          have_branch = 1;
      }
@@ -12328,19 +12663,22 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  if (RExC_nestroot == parno)
                      RExC_nestroot = 0;
              }
-            Set_Node_Offset(REGNODE_p(ender), RExC_parse+1); /* MJD */
-            Set_Node_Length(REGNODE_p(ender), 1); /* MJD */
              break;
          case 's':
              ender = reg_node(pRExC_state, SRCLOSE);
              RExC_in_script_run = 0;
              break;
-        case '<':
+        /* LOOKBEHIND ops (not sure why these are duplicated - Yves) */
+        case 'b': /* (*positive_lookbehind: ... ) (*plb: ... ) */
+        case 'B': /* (*negative_lookbehind: ... ) (*nlb: ... ) */
+        case '<': /* (?<= ... ) */
+        case ',': /* (?<! ... ) */
+            *flagp &= ~HASWIDTH;
+            ender = reg_node(pRExC_state, LOOKBEHIND_END);
+            break;
+        /* LOOKAHEAD ops (not sure why these are duplicated - Yves) */
          case 'a':
          case 'A':
-        case 'b':
-        case 'B':
-        case ',':
          case '=':
          case '!':
              *flagp &= ~HASWIDTH;
@@ -12458,8 +12796,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              }
  
              reginsert(pRExC_state, node, ret, depth+1);
-            Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
-            Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
              FLAGS(REGNODE_p(ret)) = flag;
              if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
              {
@@ -12477,14 +12813,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
          }
          if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
-            RExC_parse = oregcomp_parse;
+            RExC_parse_set(reg_parse_start);
              vFAIL("Unmatched (");
          }
          nextchar(pRExC_state);
      }
      else if (!paren && RExC_parse < RExC_end) {
          if (*RExC_parse == ')') {
-            RExC_parse++;
+            RExC_parse_inc_by(1);
              vFAIL("Unmatched )");
          }
          else
@@ -12532,7 +12868,6 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
              ret = reganode(pRExC_state, BRANCHJ, 0);
          else {
              ret = reg_node(pRExC_state, BRANCH);
-            Set_Node_Length(REGNODE_p(ret), 1);
          }
      }
  
@@ -12724,7 +13059,7 @@ S_get_quantifier_value(pTHX_ RExC_state_t *pRExC_state,
      }
      else if (*start == '0') { /* grok_atoUV() fails for only two reasons:
                                   leading zeros or overflow */
-        RExC_parse = (char * ) end;
+        RExC_parse_set((char * ) end);
  
          /* Perhaps too generic a msg for what is only failure from having
           * leading zeros, but this is how it's always behaved. */
@@ -12734,7 +13069,7 @@ S_get_quantifier_value(pTHX_ RExC_state_t *pRExC_state,
  
      /* Here, found a quantifier, but was too large; either it overflowed or was
       * too big a legal number */
-    RExC_parse = (char * ) end;
+    RExC_parse_set((char * ) end);
      vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
  
      NOT_REACHED; /*NOTREACHED*/
@@ -12767,9 +13102,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      const char * const origparse = RExC_parse;
      I32 min;
      I32 max = REG_INFTY;
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    char *parse_start;
-#endif
  
      /* Save the original in case we change the emitted regop to a FAIL. */
      const regnode_offset orig_emit = RExC_emit;
@@ -12786,10 +13118,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
      }
  
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    parse_start = RExC_parse;
-#endif
-
      op = *RExC_parse;
      switch (op) {
          const char * regcurly_return[5];
@@ -12836,7 +13164,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  max = get_quantifier_value(pRExC_state, max_start, max_end);
              }
  
-            RExC_parse = (char *) regcurly_return[RBRACE];
+            RExC_parse_set((char *) regcurly_return[RBRACE]);
              nextchar(pRExC_state);
  
              if (max < min) {    /* If can't match, warn and optimize to fail
@@ -12931,8 +13259,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
          MARK_NAUGHTY_EXP(2, 2);
          reginsert(pRExC_state, CURLY, ret, depth+1);
-        Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
-        Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
      }
      else {  /* not SIMPLE */
          const regnode_offset w = reg_node(pRExC_state, WHILEM);
@@ -12947,10 +13273,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              NEXT_OFF(REGNODE_p(ret)) = 3;        /* Go over LONGJMP. */
          }
          reginsert(pRExC_state, CURLYX, ret, depth+1);
-                        /* MJD hk */
-        Set_Node_Offset(REGNODE_p(ret), parse_start+1);
-        Set_Node_Length(REGNODE_p(ret),
-                        op == '{' ? (RExC_parse - parse_start) : 1);
  
          if (RExC_use_BRANCHJ)
              NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
@@ -12996,7 +13318,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
      /* Forbid extra quantifiers */
      if (isQUANTIFIER(RExC_parse, RExC_end)) {
-        RExC_parse++;
+        RExC_parse_inc_by(1);
          vFAIL("Nested quantifiers");
      }
  
@@ -13009,7 +13331,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
       * reason is to make it harder to write patterns that take a long long time
       * to halt, and because the use of this construct isn't necessary in
       * matching Unicode property values */
-    RExC_parse++;
+    RExC_parse_inc_by(1);
      /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
         subpatterns in regex; marked by <-- HERE in m/%s/
       */
@@ -13142,7 +13464,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
       * [^\n].  The latter is assumed when the {...} following the \N is a legal
       * quantifier, or if there is no '{' at all */
      if (*p != '{' || regcurly(p, RExC_end, NULL)) {
-        RExC_parse = p;
+        RExC_parse_set(p);
          if (cp_count) {
              *cp_count = -1;
          }
@@ -13154,7 +13476,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          *node_p = reg_node(pRExC_state, REG_ANY);
          *flagp |= HASWIDTH|SIMPLE;
          MARK_NAUGHTY(1);
-        Set_Node_Length(REGNODE_p(*(node_p)), 1); /* MJD */
          return TRUE;
      }
  
@@ -13166,7 +13487,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          vFAIL("Missing braces on \\N{}");
      }
  
-    RExC_parse++;       /* Skip past the '{' */
+    RExC_parse_inc_by(1);       /* Skip past the '{' */
  
      endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
      if (! endbrace) { /* no trailing brace */
@@ -13180,9 +13501,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      /* \N{_} is what toke.c returns to us to indicate a name that evaluates to
       * nothing at all (not allowed under strict) */
      if (endbrace - RExC_parse == 1 && *RExC_parse == '_') {
-        RExC_parse = endbrace;
+        RExC_parse_set(endbrace);
          if (strict) {
-            RExC_parse++;   /* Position after the "}" */
+            RExC_parse_inc_by(1);   /* Position after the "}" */
              vFAIL("Zero length \\N{}");
          }
  
@@ -13199,7 +13520,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      }
  
      while (isBLANK(*RExC_parse)) {
-        RExC_parse++;
+        RExC_parse_inc_by(1);
      }
  
      e = endbrace;
@@ -13242,7 +13563,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                                                        UTF,
                                                        &error_msg);
              if (error_msg) {
-                RExC_parse = endbrace;
+                RExC_parse_set(endbrace);
                  vFAIL(error_msg);
              }
  
@@ -13268,7 +13589,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
              /* Here, exactly one code point.  If that isn't what is wanted,
               * fail */
              if (! code_point_p) {
-                RExC_parse = p;
+                RExC_parse_set(p);
                  return FALSE;
              }
  
@@ -13279,7 +13600,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
              /* Have parsed this entire single code point \N{...}.  *cp_count
               * has already been set to 1, so don't do it again. */
-            RExC_parse = endbrace;
+            RExC_parse_set(endbrace);
              nextchar(pRExC_state);
              return TRUE;
          } /* End of is a single code point */
@@ -13300,7 +13621,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
           * case).  */
          if (! node_p) {
              if (! cp_count) {
-                RExC_parse = p;
+                RExC_parse_set(p);
              }
              return FALSE;
          }
@@ -13326,7 +13647,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
           * converted a name to the \N{U+...} form.  This include changing a
           * name that evaluates to multiple code points to \N{U+c1.c2.c3 ...} */
  
-        RExC_parse += 2;    /* Skip past the 'U+' */
+        RExC_parse_inc_by(2);    /* Skip past the 'U+' */
  
          /* Code points are separated by dots.  The '}' terminates the whole
           * thing. */
@@ -13343,12 +13664,12 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
              UV cp = grok_hex(RExC_parse, &len, &flags, &overflow_value);
  
              if (len == 0) {
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                bad_NU:
                  vFAIL("Invalid hexadecimal number in \\N{U+...}");
              }
  
-            RExC_parse += len;
+            RExC_parse_inc_by(len);
  
              if (cp > MAX_LEGAL_CP) {
                  vFAIL(form_cp_too_large_msg(16, start_digit, len, 0));
@@ -13361,13 +13682,13 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
                  /* Here, is a single code point; fail if doesn't want that */
                  if (! code_point_p) {
-                    RExC_parse = p;
+                    RExC_parse_set(p);
                      return FALSE;
                  }
  
                  /* A single code point is easy to handle; just return it */
                  *code_point_p = UNI_TO_NATIVE(cp);
-                RExC_parse = endbrace;
+                RExC_parse_set(endbrace);
                  nextchar(pRExC_state);
                  return TRUE;
              }
@@ -13380,11 +13701,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
               * \N{U+100.} )
               * */
              if (*RExC_parse != '.' || RExC_parse + 1 >= e) {
-                RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
-                              ? UTF8SKIP(RExC_parse)
-                              : 1;
-                RExC_parse = MIN(e, RExC_parse);/* Guard against malformed utf8
-                                                 */
+                /*point to after 1st invalid */
+                RExC_parse_incf(RExC_orig_utf8);
+                /*Guard against malformed utf8*/
+                RExC_parse_set(MIN(e, RExC_parse));
                  goto bad_NU;
              }
  
@@ -13419,7 +13739,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
              /* Move to after the dot (or ending brace the final time through.)
               * */
-            RExC_parse++;
+            RExC_parse_inc_by(1);
              count++;
  
          } while (RExC_parse < e);
@@ -13443,13 +13763,14 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
       * constructs.  This can be called from within a substitute parse already.
       * The error reporting mechanism doesn't work for 2 levels of this, but the
       * code above has validated this new construct, so there should be no
-     * errors generated by the below.  And this isn' an exact copy, so the
+     * errors generated by the below.  And this isn't an exact copy, so the
       * mechanism to seamlessly deal with this won't work, so turn off warnings
       * during it */
      save_start = RExC_start;
      orig_end = RExC_end;
  
-    RExC_parse = RExC_start = SvPVX(substitute_parse);
+    RExC_start = SvPVX(substitute_parse);
+    RExC_parse_set(RExC_start);
      RExC_end = RExC_parse + SvCUR(substitute_parse);
      TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
  
@@ -13458,7 +13779,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      /* Restore the saved values */
      RESTORE_WARNINGS;
      RExC_start = save_start;
-    RExC_parse = endbrace;
+    RExC_parse_set(endbrace);
      RExC_end = orig_end;
      SET_recode_x_to_native(0);
  
@@ -13512,6 +13833,14 @@ S_backref_value(char *p, char *e)
      return I32_MAX;
  }
  
+#ifdef DEBUGGING
+#define REGNODE_GUTS(state,op,extra_size) \
+    regnode_guts_debug(state,op,extra_size)
+#else
+#define REGNODE_GUTS(state,op,extra_size) \
+    regnode_guts(state,extra_size)
+#endif
+
  
  /*
   - regatom - the lowest level
@@ -13587,7 +13916,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
      regnode_offset ret = 0;
      I32 flags = 0;
-    char *parse_start;
+    char *atom_parse_start;
      U8 op;
      int invert = 0;
  
@@ -13600,7 +13929,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      PERL_ARGS_ASSERT_REGATOM;
  
    tryagain:
-    parse_start = RExC_parse;
+    atom_parse_start = RExC_parse;
      assert(RExC_parse < RExC_end);
      switch ((U8)*RExC_parse) {
      case '^':
@@ -13610,7 +13939,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              ret = reg_node(pRExC_state, MBOL);
          else
              ret = reg_node(pRExC_state, SBOL);
-        Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
          break;
      case '$':
          nextchar(pRExC_state);
@@ -13620,7 +13948,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              ret = reg_node(pRExC_state, MEOL);
          else
              ret = reg_node(pRExC_state, SEOL);
-        Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
          break;
      case '.':
          nextchar(pRExC_state);
@@ -13630,11 +13957,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              ret = reg_node(pRExC_state, REG_ANY);
          *flagp |= HASWIDTH|SIMPLE;
          MARK_NAUGHTY(1);
-        Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
          break;
      case '[':
      {
-        char * const oregcomp_parse = ++RExC_parse;
+        char * const cc_parse_start = ++RExC_parse;
          ret = regclass(pRExC_state, flagp, depth+1,
                         FALSE, /* means parse the whole char class */
                         TRUE, /* allow multi-char folds */
@@ -13648,11 +13974,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    (UV) *flagp);
          }
          if (*RExC_parse != ']') {
-            RExC_parse = oregcomp_parse;
+            RExC_parse_set(cc_parse_start);
              vFAIL("Unmatched [");
          }
          nextchar(pRExC_state);
-        Set_Node_Length(REGNODE_p(ret), RExC_parse - oregcomp_parse + 1); /* MJD */
          break;
      }
      case '(':
@@ -13685,7 +14010,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      case '?':
      case '+':
      case '*':
-        RExC_parse++;
+        RExC_parse_inc_by(1);
          vFAIL("Quantifier follows nothing");
          break;
      case '\\':
@@ -13701,7 +14026,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             required, as the default for this switch is to jump to the
             literal text handling code.
          */
-        RExC_parse++;
+        RExC_parse_inc_by(1);
          switch ((U8)*RExC_parse) {
          /* Special Escapes */
          case 'A':
@@ -13720,7 +14045,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              goto finish_meta_pat;
          case 'G':
              if (RExC_pm_flags & PMf_WILDCARD) {
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  /* diag_listed_as: Use of %s is not allowed in Unicode property
                     wildcard subpatterns in regex; marked by <-- HERE in m/%s/
                   */
@@ -13797,14 +14122,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                                     RExC_end - RExC_parse);
                  char * e = endbrace;
  
-                RExC_parse += 2;
+                RExC_parse_inc_by(2);
  
                  if (! endbrace) {
                      vFAIL2("Missing right brace on \\%c{}", name);
                  }
  
                  while (isBLANK(*RExC_parse)) {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                  }
  
                  while (RExC_parse < e && isBLANK(*(e - 1))) {
@@ -13812,7 +14137,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  }
  
                  if (e == RExC_parse) {
-                    RExC_parse = endbrace + 1;  /* After the '}' */
+                    RExC_parse_set(endbrace + 1);  /* After the '}' */
                      vFAIL2("Empty \\%c{}", name);
                  }
  
@@ -13847,13 +14172,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          break;
                      default:
                        bad_bound_type:
-                        RExC_parse = e;
+                        RExC_parse_set(e);
                          vFAIL2utf8f(
                              "'%" UTF8f "' is an unknown bound type",
                              UTF8fARG(UTF, length, e - length));
                          NOT_REACHED; /*NOTREACHED*/
                  }
-                RExC_parse = endbrace;
+                RExC_parse_set(endbrace);
                  REQUIRE_UNI_RULES(flagp, 0);
  
                  if (op == BOUND) {
@@ -13936,15 +14261,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     /* The escapes above that don't take a parameter can't be
                      * followed by a '{'.  But 'pX', 'p{foo}' and
                      * correspondingly 'P' can be */
-            if (   RExC_parse - parse_start == 1
+            if (   RExC_parse - atom_parse_start == 1
                  && UCHARAT(RExC_parse + 1) == '{'
                  && UNLIKELY(! regcurly(RExC_parse + 1, RExC_end, NULL)))
              {
-                RExC_parse += 2;
+                RExC_parse_inc_by(2);
                  vFAIL("Unescaped left brace in regex is illegal here");
              }
-            Set_Node_Offset(REGNODE_p(ret), parse_start);
-            Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); /* MJD */
              nextchar(pRExC_state);
              break;
          case 'N':
@@ -13976,7 +14299,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              RETURN_FAIL_ON_RESTART_FLAGP(flagp);
  
              /* Here, evaluates to a single code point.  Go get that */
-            RExC_parse = parse_start;
+            RExC_parse_set(atom_parse_start);
              goto defchar;
  
          case 'k':    /* Handle \k<NAME> and \k'NAME' and \k{NAME} */
@@ -13988,19 +14311,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                        && ch != '\''
                                        && ch != '{'))
              {
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
-                vFAIL2("Sequence %.2s... not terminated", parse_start);
+                vFAIL2("Sequence %.2s... not terminated", atom_parse_start);
              } else {
-                RExC_parse += 2;
+                RExC_parse_inc_by(2);
                  if (ch == '{') {
                      while (isBLANK(*RExC_parse)) {
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                      }
                  }
                  ret = handle_named_backref(pRExC_state,
                                             flagp,
-                                           parse_start,
+                                           atom_parse_start,
                                             (ch == '<')
                                             ? '>'
                                             : (ch == '{')
@@ -14044,7 +14367,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                  s++;
                              } while isDIGIT(*s);
  
-                            RExC_parse = s;
+                            RExC_parse_set(s);
                              vFAIL("Unterminated \\g{...} pattern");
                          }
  
@@ -14073,7 +14396,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          goto parse_named_seq;
                      }
  
-                    RExC_parse = s;
+                    RExC_parse_set(s);
                      num = S_backref_value(RExC_parse, RExC_end);
                      if (num == 0)
                          vFAIL("Reference to invalid group 0");
@@ -14111,7 +14434,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                           * to be an octal character escape, e.g. \35 or \777.
                           * The above logic should make it obvious why using
                           * octal escapes in patterns is problematic. - Yves */
-                        RExC_parse = parse_start;
+                        RExC_parse_set(atom_parse_start);
                          goto defchar;
                      }
                  }
@@ -14125,10 +14448,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                   * We've already figured out what value the digits represent.
                   * Now, move the parse to beyond them. */
                  if (endbrace) {
-                    RExC_parse = endbrace + 1;
+                    RExC_parse_set(endbrace + 1);
                  }
                  else while (isDIGIT(*RExC_parse)) {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                  }
  
                  if (num >= (I32)RExC_npar) {
@@ -14162,9 +14485,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  }
                  *flagp |= HASWIDTH;
  
-                /* override incorrect value set in reganode MJD */
-                Set_Node_Offset(REGNODE_p(ret), parse_start);
-                Set_Node_Cur_Length(REGNODE_p(ret), parse_start-1);
                  skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                          FALSE /* Don't force to /x */ );
              }
@@ -14176,7 +14496,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          default:
              /* Do not generate "unrecognized" warnings here, we fall
                 back into the quick-grab loop below */
-            RExC_parse = parse_start;
+            RExC_parse_set(atom_parse_start);
              goto defchar;
          } /* end of switch on a \foo sequence */
          break;
@@ -14188,7 +14508,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          assert((RExC_flags & RXf_PMf_EXTENDED) == 0);
          /*
          if (RExC_flags & RXf_PMf_EXTENDED) {
-            RExC_parse = reg_skipcomment( pRExC_state, RExC_parse );
+            RExC_parse_set( reg_skipcomment( pRExC_state, RExC_parse ) );
              if (RExC_parse < RExC_end)
                  goto tryagain;
          }
@@ -14277,8 +14597,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* Allocate an EXACT node.  The node_type may change below to
               * another EXACTish node, but since the size of the node doesn't
               * change, it works */
-            ret = regnode_guts(pRExC_state, node_type, current_string_nodes,
-                                                                    "exact");
+            ret = REGNODE_GUTS(pRExC_state, node_type, current_string_nodes);
              FILL_NODE(ret, node_type);
              RExC_emit++;
  
@@ -14392,7 +14711,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          p++;
                          break;
                      case 'N': /* Handle a single-code point named character. */
-                        RExC_parse = p + 1;
+                        RExC_parse_set( p + 1 );
                          if (! grok_bslash_N(pRExC_state,
                                              NULL,   /* Fail if evaluates to
                                                         anything other than a
@@ -14412,11 +14731,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              /* Here, it wasn't a single code point.  Go close
                               * up this EXACTish node.  The switch() prior to
                               * this switch handles the other cases */
-                            RExC_parse = p = oldp;
+                            p = oldp;
+                            RExC_parse_set(p);
                              goto loopdone;
                          }
                          p = RExC_parse;
-                        RExC_parse = parse_start;
+                        RExC_parse_set(atom_parse_start);
  
                          /* The \N{} means the pattern, if previously /d,
                           * becomes /u.  That means it can't be an EXACTF node,
@@ -14465,7 +14785,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                              FALSE, /* No illegal cp's */
                                              UTF))
                          {
-                            RExC_parse = p; /* going to die anyway; point to
+                            RExC_parse_set(p); /* going to die anyway; point to
                                                 exact spot of failure */
                              vFAIL(message);
                          }
@@ -14484,7 +14804,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                              FALSE, /* No illegal cp's */
                                              UTF))
                          {
-                            RExC_parse = p;    /* going to die anyway; point
+                            RExC_parse_set(p);        /* going to die anyway; point
                                                     to exact spot of failure */
                              vFAIL(message);
                          }
@@ -14508,9 +14828,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          {
                              /* going to die anyway; point to exact spot of
                               * failure */
-                            RExC_parse = p + ((UTF)
+                            char *new_p= p + ((UTF)
                                                ? UTF8_SAFE_SKIP(p, RExC_end)
                                                : 1);
+                            RExC_parse_set(new_p);
                              vFAIL(message);
                          }
  
@@ -14606,11 +14927,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       *      string of characters instead of a meta construct */
                      if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
                          if (      RExC_strict
-                            || (  p > parse_start + 1
+                            || (  p > atom_parse_start + 1
                                  && isALPHA_A(*(p - 1))
                                  && *(p - 2) == '\\'))
                          {
-                            RExC_parse = p + 1;
+                            RExC_parse_set(p + 1);
                              vFAIL("Unescaped left brace in regex is "
                                    "illegal here");
                          }
@@ -15523,8 +15844,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  *flagp |= HASWIDTH | maybe_SIMPLE;
              }
  
-            Set_Node_Length(REGNODE_p(ret), p - parse_start - 1);
-            RExC_parse = p;
+            RExC_parse_set(p);
  
              {
                  /* len is STRLEN which is unsigned, need to copy to signed */
@@ -15544,7 +15864,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          && OP(REGNODE_p(ret)) != SBOL && ! regcurly(RExC_parse, RExC_end, NULL))
      {
          if (RExC_strict) {
-            RExC_parse++;
+            RExC_parse_inc_by(1);
              vFAIL("Unescaped left brace in regex is illegal here");
          }
          ckWARNreg(RExC_parse + 1, "Unescaped left brace in regex is"
@@ -15876,7 +16196,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              if (*temp_ptr == ']') {
                  temp_ptr++;
                  if (! found_problem && ! check_only) {
-                    RExC_parse = (char *) temp_ptr;
+                    RExC_parse_set((char *) temp_ptr);
                      vFAIL3("POSIX syntax [%c %c] is reserved for future "
                              "extensions", open_char, open_char);
                  }
@@ -16434,7 +16754,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              const char * const complement_string = (complement)
                                                     ? "^"
                                                     : "";
-            RExC_parse = (char *) p;
+            RExC_parse_set((char *) p);
              vFAIL3utf8f("POSIX class [:%s%" UTF8f ":] unknown",
                          complement_string,
                          UTF8fARG(UTF, RExC_parse - name_start - 2, name_start));
@@ -16475,8 +16795,7 @@ S_regex_set_precedence(const U8 my_operator) {
  
  STATIC regnode_offset
  S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
-                    I32 *flagp, U32 depth,
-                    char * const oregcomp_parse)
+                    I32 *flagp, U32 depth)
  {
      /* Handle the (?[...]) construct to do set operations */
  
@@ -16505,7 +16824,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
-    PERL_UNUSED_ARG(oregcomp_parse); /* Only for Set_Node_Length */
  
      DEBUG_PARSE("xcls");
  
@@ -16517,10 +16835,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * compile time values are valid in all runtime cases */
      REQUIRE_UNI_RULES(flagp, 0);
  
-    ckWARNexperimental(RExC_parse,
-                       WARN_EXPERIMENTAL__REGEX_SETS,
-                       "The regex_sets feature is experimental");
-
      /* Everything in this construct is a metacharacter.  Operands begin with
       * either a '\' (for an escape sequence), or a '[' for a bracketed
       * character class.  Any other character should be an operator, or
@@ -16597,8 +16911,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * so that everything gets evaluated down to a single operand, which is the
       * result */
  
-    sv_2mortal((SV *)(stack = newAV()));
-    sv_2mortal((SV *)(fence_stack = newAV()));
+    stack = (AV*)newSV_type_mortal(SVt_PVAV);
+    fence_stack = (AV*)newSV_type_mortal(SVt_PVAV);
  
      while (RExC_parse < RExC_end) {
          I32 top_index;              /* Index of top-most element in 'stack' */
@@ -16636,12 +16950,12 @@ redo_curchar:
  
                  if (   RExC_parse < RExC_end - 2
                      && UCHARAT(RExC_parse + 1) == '?'
-                    && UCHARAT(RExC_parse + 2) == '^')
+                    && strchr("^" STD_PAT_MODS, *(RExC_parse + 2)))
                  {
                      const regnode_offset orig_emit = RExC_emit;
                      SV * resultant_invlist;
  
-                    /* If is a '(?^', could be an embedded '(?^flags:(?[...])'.
+                    /* Here it could be an embedded '(?flags:(?[...])'.
                       * This happens when we have some thing like
                       *
                       *   my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/;
@@ -16658,7 +16972,7 @@ redo_curchar:
                       * an error: we need to get a single inversion list back
                       * from the recursion */
  
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      RExC_sets_depth++;
  
                      node = reg(pRExC_state, 2, flagp, depth+1);
@@ -16697,7 +17011,7 @@ redo_curchar:
                                                               FALSE))
                                  || ! IS_OPERATOR(*stacked_ptr))))
                      {
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                          vFAIL("Unexpected '(' with no preceding operator");
                      }
                  }
@@ -16742,7 +17056,7 @@ redo_curchar:
                   * to fool regclass() into thinking it is part of a
                   * '[[:posix:]]'. */
                  if (! is_posix_class) {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                  }
  
                  /* regclass() can only return RESTART_PARSE and NEED_UTF8 if
@@ -16785,13 +17099,13 @@ redo_curchar:
                      if (UCHARAT(RExC_parse - 1) == ']')  {
                          break;
                      }
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      vFAIL("Unexpected ')'");
                  }
  
                  /* If nothing after the fence, is missing an operand */
                  if (top_index - fence < 0) {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      goto bad_syntax;
                  }
                  /* If at least two things on the stack, treat this as an
@@ -16819,7 +17133,7 @@ redo_curchar:
                      goto handle_operand;
                  }
  
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  goto bad_syntax;
  
              case '&':
@@ -16870,7 +17184,7 @@ redo_curchar:
                      }
  
                    unexpected_binary:
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      vFAIL2("Unexpected binary operator '%c' with no "
                             "preceding operand", curchar);
                  }
@@ -16970,7 +17284,7 @@ redo_curchar:
                  break;
  
              default:
-                RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                RExC_parse_inc();
                  if (RExC_parse >= RExC_end) {
                      break;
                  }
@@ -17030,7 +17344,7 @@ redo_curchar:
  
          } /* End of switch on next parse token */
  
-        RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+        RExC_parse_inc();
      } /* End of loop parsing through the construct */
  
      vFAIL("Syntax error in (?[...])");
@@ -17039,7 +17353,7 @@ redo_curchar:
  
      if (RExC_parse >= RExC_end || RExC_parse[1] != ')') {
          if (RExC_parse < RExC_end) {
-            RExC_parse++;
+            RExC_parse_inc_by(1);
          }
  
          vFAIL("Unexpected ']' with no following ')' in (?[...");
@@ -17069,7 +17383,7 @@ redo_curchar:
  
      if (RExC_sets_depth) {  /* If within a recursive call, return in a special
                                 regnode */
-        RExC_parse++;
+        RExC_parse_inc_by(1);
          node = regpnode(pRExC_state, REGEX_SET, final);
      }
      else {
@@ -17091,7 +17405,7 @@ redo_curchar:
          /* About to generate an ANYOF (or similar) node from the inversion list
           * we have calculated */
          save_parse = RExC_parse;
-        RExC_parse = SvPV(result_string, len);
+        RExC_parse_set(SvPV(result_string, len));
          save_end = RExC_end;
          RExC_end = RExC_parse + len;
          TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
@@ -17118,7 +17432,7 @@ redo_curchar:
                      );
  
          RESTORE_WARNINGS;
-        RExC_parse = save_parse + 1;
+        RExC_parse_set(save_parse + 1);
          RExC_end = save_end;
          SvREFCNT_dec_NN(final);
          SvREFCNT_dec_NN(result_string);
@@ -17160,7 +17474,6 @@ redo_curchar:
      }
  
      nextchar(pRExC_state);
-    Set_Node_Length(REGNODE_p(node), RExC_parse - oregcomp_parse + 1); /* MJD */
      return node;
  
    regclass_failed:
@@ -17552,8 +17865,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      AV* posix_warnings = NULL;
      const bool do_posix_warnings = ckWARN(WARN_REGEXP);
-    U8 op = ANYOF;    /* The returned node-type, initialized the expected type.
-                       */
+    U8 op = ANYOF;    /* The returned node-type, initialized to the expected
+                         type. */
      U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
      U32 posixl = 0;       /* bit field of posix classes matched under /l */
  
@@ -17602,7 +17915,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      assert(RExC_parse <= RExC_end);
  
      if (UCHARAT(RExC_parse) == '^') {  /* Complement the class */
-        RExC_parse++;
+        RExC_parse_inc_by(1);
          invert = TRUE;
          allow_mutiple_chars = FALSE;
          MARK_NAUGHTY(1);
@@ -17678,10 +17991,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              value = utf8n_to_uvchr((U8*)RExC_parse,
                                     RExC_end - RExC_parse,
                                     &numlen, UTF8_ALLOW_DEFAULT);
-            RExC_parse += numlen;
+            RExC_parse_inc_by(numlen);
+        }
+        else {
+            value = UCHARAT(RExC_parse);
+            RExC_parse_inc_by(1);
          }
-        else
-            value = UCHARAT(RExC_parse++);
  
          if (value == '[') {
              char * posix_class_end;
@@ -17703,7 +18018,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      av_undef(posix_warnings);
                  }
  
-                RExC_parse = posix_class_end;
+                RExC_parse_set(posix_class_end);
              }
              else if (namedclass == OOB_NAMEDCLASS) {
                  not_posix_region_end = posix_class_end;
@@ -17740,10 +18055,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  value = utf8n_to_uvchr((U8*)RExC_parse,
                                     RExC_end - RExC_parse,
                                     &numlen, UTF8_ALLOW_DEFAULT);
-                RExC_parse += numlen;
+                RExC_parse_inc_by(numlen);
+            }
+            else {
+                value = UCHARAT(RExC_parse);
+                RExC_parse_inc_by(1);
              }
-            else
-                value = UCHARAT(RExC_parse++);
  
              /* Some compilers cannot handle switching on 64-bit integer
               * values, therefore value cannot be an UV.  Yes, this will
@@ -17842,7 +18159,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  char *e;
  
                  if (RExC_pm_flags & PMf_WILDCARD) {
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
                      /* diag_listed_as: Use of %s is not allowed in Unicode
                         property wildcard subpatterns in regex; marked by <--
                         HERE in m/%s/ */
@@ -17859,16 +18176,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      const U8 c = (U8)value;
                      e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
                      if (!e) {
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                          vFAIL2("Missing right brace on \\%c{}", c);
                      }
  
-                    RExC_parse++;
+                    RExC_parse_inc_by(1);
  
                      /* White space is allowed adjacent to the braces and after
                       * any '^', even when not under /x */
                      while (isSPACE(*RExC_parse)) {
-                         RExC_parse++;
+                         RExC_parse_inc_by(1);
                      }
  
                      if (UCHARAT(RExC_parse) == '^') {
@@ -17878,9 +18195,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * that bit) */
                          value ^= 'P' ^ 'p';
  
-                        RExC_parse++;
+                        RExC_parse_inc_by(1);
                          while (isSPACE(*RExC_parse)) {
-                            RExC_parse++;
+                            RExC_parse_inc_by(1);
                          }
                      }
  
@@ -17893,9 +18210,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                  }   /* The \p isn't immediately followed by a '{' */
                  else if (! isALPHA(*RExC_parse)) {
-                    RExC_parse += (UTF)
-                                  ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                  : 1;
+                    RExC_parse_inc_safe();
                      vFAIL2("Character following \\%c must be '{' or a "
                             "single-character Unicode property name",
                             (U8) value);
@@ -17931,7 +18246,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             );
                      if (SvCUR(msg)) {   /* Assumes any error causes a msg */
                          assert(prop_definition == NULL);
-                        RExC_parse = e + 1;
+                        RExC_parse_set(e + 1);
                          if (SvUTF8(msg)) {  /* msg being UTF-8 makes the whole
                                                 thing so, or else the display is
                                                 mojibake */
@@ -17947,7 +18262,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      if (strings) {
                          if (ret_invlist) {
                              if (! prop_definition) {
-                                RExC_parse = e + 1;
+                                RExC_parse_set(e + 1);
                                  vFAIL("Unicode string properties are not implemented in (?[...])");
                              }
                              else {
@@ -17958,7 +18273,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          }
                          else if (! RExC_in_multi_char_class) {
                              if (invert ^ (value == 'P')) {
-                                RExC_parse = e + 1;
+                                RExC_parse_set(e + 1);
                                  vFAIL("Inverting a character class which contains"
                                      " a multi-character sequence is illegal");
                              }
@@ -18065,7 +18380,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      }
                  }
  
-                RExC_parse = e + 1;
+                RExC_parse_set(e + 1);
                  namedclass = ANYOF_UNIPROP;  /* no official name, but it's
                                                  named */
                  }
@@ -18127,14 +18442,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  {
                      /* going to die anyway; point to exact spot of
                          * failure */
-                    RExC_parse += (UTF)
-                                  ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                  : 1;
+                    RExC_parse_inc_safe();
                      vFAIL(message);
                  }
  
                  value = grok_c_char;
-                RExC_parse++;
+                RExC_parse_inc_by(1);
                  if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
                      warn_non_literal_string(RExC_parse, packed_warn, message);
                  }
@@ -18149,12 +18462,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                | PERL_SCAN_NOTIFY_ILLDIGIT;
                      numlen = (strict) ? 4 : 3;
                      value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
-                    RExC_parse += numlen;
+                    RExC_parse_inc_by(numlen);
                      if (numlen != 3) {
                          if (strict) {
-                            RExC_parse += (UTF)
-                                          ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                          : 1;
+                            RExC_parse_inc_safe();
                              vFAIL("Need exactly 3 octal digits");
                          }
                          else if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
@@ -18395,7 +18706,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  /* If the '-' is at the end of the class (just before the ']',
                   * it is a literal minus; otherwise it is a range */
                  if (next_char_ptr < RExC_end && *next_char_ptr != ']') {
-                    RExC_parse = next_char_ptr;
+                    RExC_parse_set(next_char_ptr);
  
                      /* a bad range like \w-, [:word:]- ? */
                      if (namedclass > OOB_NAMEDCLASS) {
@@ -18491,7 +18802,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      if (! RExC_in_multi_char_class) {
                          STRLEN cp_count = utf8_length(foldbuf,
                                                        foldbuf + foldlen);
-                        SV* multi_fold = sv_2mortal(newSVpvs(""));
+                        SV* multi_fold = newSVpvs_flags("", SVs_TEMP);
  
                          Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%" UVXf "}", value);
  
@@ -18778,7 +19089,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
           * reported.  See the comments at the definition of
           * REPORT_LOCATION_ARGS for details */
          RExC_copy_start_in_input = (char *) orig_parse;
-        RExC_start = RExC_parse = SvPV(substitute_parse, len);
+        RExC_start = SvPV(substitute_parse, len);
+        RExC_parse_set( RExC_start );
          RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len;
          RExC_end = RExC_parse + len;
          RExC_in_multi_char_class = 1;
@@ -18788,7 +19100,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          *flagp |= reg_flags & (HASWIDTH|SIMPLE|POSTPONED|RESTART_PARSE|NEED_UTF8);
  
          /* And restore so can parse the rest of the pattern */
-        RExC_parse = save_parse;
+        RExC_parse_set(save_parse);
          RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start;
          RExC_end = save_end;
          RExC_in_multi_char_class = 0;
@@ -19100,7 +19412,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
               * the issues involved */
              if (warn_super) {
                  warn_super = ! (invert
-                               ^ (invlist_highest(cp_list) > PERL_UNICODE_MAX));
+                               ^ (UNICODE_IS_SUPER(invlist_highest(cp_list))));
              }
  
              _invlist_union(properties, cp_list, &cp_list);
@@ -19212,19 +19524,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                              &anyof_flags, &invert, &ret, flagp);
          RETURN_FAIL_ON_RESTART_FLAGP(flagp);
  
-        /* If optimized to something else, finish up and return */
+        /* If optimized to something else and emitted, clean up and return */
          if (ret >= 0) {
-            Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
-                                                   RExC_parse - orig_parse);;
              SvREFCNT_dec(cp_list);;
              SvREFCNT_dec(only_utf8_locale_list);
              SvREFCNT_dec(upper_latin1_only_utf8_matches);
              return ret;
          }
+
+        /* If no optimization was found, an END was returned and we will now
+         * emit an ANYOF */
+        if (op == END) {
+            op = ANYOF;
+        }
      }
  
-    /* Here didn't optimize, or optimized to a specialized ANYOF node.  If the
-     * former, set the particular type */
+    /* Here are going to emit an ANYOF; set the particular type */
      if (op == ANYOF) {
          if (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY) {
              op = ANYOFD;
@@ -19237,7 +19552,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
      }
  
-    ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
+    ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
      FILL_NODE(ret, op);        /* We set the argument later */
      RExC_emit += 1 + regarglen[op];
      ANYOF_FLAGS(REGNODE_p(ret)) = anyof_flags;
@@ -19305,84 +19620,83 @@ S_optimize_regclass(pTHX_
       * ANYOF node.  The parameter names are the same as the corresponding
       * variables in S_regclass.
       *
-     * It returns the new op (ANYOF if no optimization found) and sets *ret to
-     * any created regnode.  If the new op is sufficiently like plain ANYOF, it
-     * leaves *ret unchanged for allocation in S_regclass.
+     * It returns the new op (the impossible END one if no optimization found)
+     * and sets *ret to any created regnode.  If the new op is sufficiently
+     * like plain ANYOF, it leaves *ret unchanged for allocation in S_regclass.
       *
       * Certain of the parameters may be updated as a result of the changes
       * herein */
  
-        U8 op = ANYOF; /* The returned node-type, initialized to the unoptimized
-                        one. */
-        UV value;
-        PERL_UINT_FAST8_T i;
-        UV partial_cp_count = 0;
-        UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
-        UV   end[MAX_FOLD_FROMS+1] = { 0 };
-        bool single_range = FALSE;
+    U8 op = END;    /* The returned node-type, initialized to an impossible
+                      one. */
+    UV value = 0;
+    PERL_UINT_FAST8_T i;
+    UV partial_cp_count = 0;
+    UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
+    UV   end[MAX_FOLD_FROMS+1] = { 0 };
+    bool single_range = FALSE;
+    UV lowest_cp = 0, highest_cp = 0;
  
-        PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS;
+    PERL_ARGS_ASSERT_OPTIMIZE_REGCLASS;
  
      if (cp_list) { /* Count the code points in enough ranges that we would see
                        all the ones possible in any fold in this version of
                        Unicode */
  
-            invlist_iterinit(cp_list);
-            for (i = 0; i <= MAX_FOLD_FROMS; i++) {
-                if (! invlist_iternext(cp_list, &start[i], &end[i])) {
-                    break;
-                }
-                partial_cp_count += end[i] - start[i] + 1;
+        invlist_iterinit(cp_list);
+        for (i = 0; i <= MAX_FOLD_FROMS; i++) {
+            if (! invlist_iternext(cp_list, &start[i], &end[i])) {
+                break;
              }
+            partial_cp_count += end[i] - start[i] + 1;
+        }
  
-            if (i == 1) {
-                single_range = TRUE;
-            }
-            invlist_iterfinish(cp_list);
+        if (i == 1) {
+            single_range = TRUE;
          }
+        invlist_iterfinish(cp_list);
  
-    /* If we know at compile time that this matches every possible code point,
-     * any run-time dependencies don't matter */
+        /* If we know at compile time that this matches every possible code
+         * point, any run-time dependencies don't matter */
          if (start[0] == 0 && end[0] == UV_MAX) {
              if (*invert) {
-                op = OPFAIL;
-                *ret = reganode(pRExC_state, op, 0);
+                goto return_OPFAIL;
              }
              else {
-                op = SANY;
-                *ret = reg_node(pRExC_state, op);
-                MARK_NAUGHTY(1);
+                goto return_SANY;
              }
-            return op;
          }
  
+        /* Use a clearer mnemonic for below */
+        lowest_cp = start[0];
+
+        highest_cp = invlist_highest(cp_list);
+    }
+
      /* Similarly, for /l posix classes, if both a class and its complement
       * match, any run-time dependencies don't matter */
-        if (posixl) {
-            int namedclass;
+    if (posixl) {
+        int namedclass;
          for (namedclass = 0; namedclass < ANYOF_POSIXL_MAX; namedclass += 2) {
-                if (   POSIXL_TEST(posixl, namedclass)      /* class */
-                    && POSIXL_TEST(posixl, namedclass + 1)) /* its complement */
-                {
-                    if (*invert) {
-                        op = OPFAIL;
-                        *ret = reganode(pRExC_state, op, 0);
-                    }
-                    else {
-                        op = SANY;
-                        *ret = reg_node(pRExC_state, op);
-                        MARK_NAUGHTY(1);
-                    }
-                    return op;
+            if (   POSIXL_TEST(posixl, namedclass)      /* class */
+                && POSIXL_TEST(posixl, namedclass + 1)) /* its complement */
+            {
+                if (*invert) {
+                    goto return_OPFAIL;
+                }
+                else {
+                    goto return_SANY;
                  }
+                return op;
              }
+        }
  
          /* For well-behaved locales, some classes are subsets of others, so
           * complementing the subset and including the non-complemented superset
           * should match everything, like [\D[:alnum:]], and
-             * [[:^alpha:][:alnum:]], but some implementations of locales are
-             * buggy, and khw thinks its a bad idea to have optimization change
-             * behavior, even if it avoids an OS bug in a given case */
+         * [[:^alpha:][:alnum:]], but some implementations of locales are
+         * buggy, and khw thinks its a bad idea to have optimization change
+         * behavior, even if it avoids an OS bug in a given case */
  
  #define isSINGLE_BIT_SET(n) isPOWER_OF_2(n)
  
@@ -19391,98 +19705,85 @@ S_optimize_regclass(pTHX_
           * determinable until runtime, but will match whatever the class does
           * outside that range.  (Note that some classes won't match anything
           * outside the range, like [:ascii:]) */
-            if (    isSINGLE_BIT_SET(posixl)
-                && (partial_cp_count == 0 || start[0] > 255))
-            {
-                U8 classnum;
-                SV * class_above_latin1 = NULL;
-                bool already_inverted;
-                bool are_equivalent;
-
-                /* Compute which bit is set, which is the same thing as, e.g.,
-                 * ANYOF_CNTRL.  From
-                 * https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
-                 * */
-            static const int MultiplyDeBruijnBitPosition2[32] = {
-                    0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
-                    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
-                    };
+        if (   isSINGLE_BIT_SET(posixl)
+            && (partial_cp_count == 0 || lowest_cp > 255))
+        {
+            U8 classnum;
+            SV * class_above_latin1 = NULL;
+            bool already_inverted;
+            bool are_equivalent;
+
  
-                namedclass = MultiplyDeBruijnBitPosition2[(posixl
-                                                          * 0x077CB531U) >> 27];
-                classnum = namedclass_to_classnum(namedclass);
+            namedclass = single_1bit_pos32(posixl);
+            classnum = namedclass_to_classnum(namedclass);
  
-                /* The named classes are such that the inverted number is one
-                 * larger than the non-inverted one */
+            /* The named classes are such that the inverted number is one
+             * larger than the non-inverted one */
              already_inverted = namedclass - classnum_to_namedclass(classnum);
  
              /* Create an inversion list of the official property, inverted if
               * the constructed node list is inverted, and restricted to only
               * the above latin1 code points, which are the only ones known at
               * compile time */
-                _invlist_intersection_maybe_complement_2nd(
-                                                    PL_AboveLatin1,
-                                                    PL_XPosix_ptrs[classnum],
-                                                    already_inverted,
-                                                    &class_above_latin1);
+            _invlist_intersection_maybe_complement_2nd(
+                                                PL_AboveLatin1,
+                                                PL_XPosix_ptrs[classnum],
+                                                already_inverted,
+                                                &class_above_latin1);
              are_equivalent = _invlistEQ(class_above_latin1, cp_list, FALSE);
-                SvREFCNT_dec_NN(class_above_latin1);
+            SvREFCNT_dec_NN(class_above_latin1);
  
-                if (are_equivalent) {
+            if (are_equivalent) {
  
-                    /* Resolve the run-time inversion flag with this possibly
-                     * inverted class */
-                    *invert = *invert ^ already_inverted;
+                /* Resolve the run-time inversion flag with this possibly
+                 * inverted class */
+                *invert = *invert ^ already_inverted;
  
-                    op = POSIXL + *invert * (NPOSIXL - POSIXL);
-                    *ret = reg_node(pRExC_state, op);
-                    FLAGS(REGNODE_p(*ret)) = classnum;
-                    return op;
-                }
+                op = POSIXL + *invert * (NPOSIXL - POSIXL);
+                *ret = reg_node(pRExC_state, op);
+                FLAGS(REGNODE_p(*ret)) = classnum;
+                return op;
              }
          }
+    }
  
      /* khw can't think of any other possible transformation involving these. */
-        if (has_runtime_dependency & HAS_USER_DEFINED_PROPERTY) {
-            return op;
-        }
+    if (has_runtime_dependency & HAS_USER_DEFINED_PROPERTY) {
+        return END;
+    }
  
-        if (! has_runtime_dependency) {
+    if (! has_runtime_dependency) {
  
          /* If the list is empty, nothing matches.  This happens, for example,
           * when a Unicode property that doesn't match anything is the only
           * element in the character class (perluniprops.pod notes such
           * properties). */
-            if (partial_cp_count == 0) {
-                if (*invert) {
-                    op = SANY;
-                    *ret = reg_node(pRExC_state, op);
-                }
-                else {
-                    op = OPFAIL;
-                    *ret = reganode(pRExC_state, op, 0);
-                }
-
-                return op;
+        if (partial_cp_count == 0) {
+            if (*invert) {
+                goto return_SANY;
              }
-
-            /* If matches everything but \n */
-            if (   start[0] == 0 && end[0] == '\n' - 1
-                && start[1] == '\n' + 1 && end[1] == UV_MAX)
-            {
-                assert (! *invert);
-                op = REG_ANY;
-                *ret = reg_node(pRExC_state, op);
-                MARK_NAUGHTY(1);
-                return op;
+            else {
+                goto return_OPFAIL;
              }
          }
  
-        /* Next see if can optimize classes that contain just a few code points
+        /* If matches everything but \n */
+        if (   start[0] == 0 && end[0] == '\n' - 1
+            && start[1] == '\n' + 1 && end[1] == UV_MAX)
+        {
+            assert (! *invert);
+            op = REG_ANY;
+            *ret = reg_node(pRExC_state, op);
+            MARK_NAUGHTY(1);
+            return op;
+        }
+    }
+
+    /* Next see if can optimize classes that contain just a few code points
       * into an EXACTish node.  The reason to do this is to let the optimizer
       * join this node with adjacent EXACTish ones, and ANYOF nodes require
-     * runtime conversion to code point from UTF-8.
-         *
+     * runtime conversion to code point from UTF-8, which we'd like to avoid.
+     *
       * An EXACTFish node can be generated even if not under /i, and vice versa.
       * But care must be taken.  An EXACTFish node has to be such that it only
       * matches precisely the code points in the class, but we want to generate
@@ -19496,130 +19797,129 @@ S_optimize_regclass(pTHX_
       * is no simple fold that includes \X{02BC}, there is a multi-char fold
       * that does, and so the node generated for it must be an EXACTFish one.
       * On the other hand qr/:/i should generate a plain EXACT node since the
-     * colon participates in no fold whatsoever, and having it EXACT tells the
-     * optimizer the target string cannot match unless it has a colon in it.
-         */
-        if (   ! posixl
-            && ! *invert
-
-        /* Only try if there are no more code points in the class than in
-         * the max possible fold */
-            &&   inRANGE(partial_cp_count, 1, MAX_FOLD_FROMS + 1))
-        {
+     * colon participates in no fold whatsoever, and having it be EXACT tells
+     * the optimizer the target string cannot match unless it has a colon in
+     * it. */
+    if (   ! posixl
+        && ! *invert
+
+            /* Only try if there are no more code points in the class than in
+             * the max possible fold */
+        &&   inRANGE(partial_cp_count, 1, MAX_FOLD_FROMS + 1))
+    {
          /* We can always make a single code point class into an EXACTish node.
           * */
          if (partial_cp_count == 1 && ! upper_latin1_only_utf8_matches) {
-                if (LOC) {
-
-            /* Here is /l:  Use EXACTL, except if there is a fold not known
-             * until runtime so shows as only a single code point here.
-             * For code points above 255, we know which can cause problems
-             * by having a potential fold to the Latin1 range. */
-                    if (  ! FOLD
-                        || (     start[0] > 255
-                            && ! is_PROBLEMATIC_LOCALE_FOLD_cp(start[0])))
-                    {
-                        op = EXACTL;
-                    }
-                    else {
-                        op = EXACTFL;
-                    }
+            if (LOC) {
+
+                /* Here is /l:  Use EXACTL, except if there is a fold not known
+                 * until runtime so shows as only a single code point here.
+                 * For code points above 255, we know which can cause problems
+                 * by having a potential fold to the Latin1 range. */
+                if (  ! FOLD
+                    || (     lowest_cp > 255
+                        && ! is_PROBLEMATIC_LOCALE_FOLD_cp(lowest_cp)))
+                {
+                    op = EXACTL;
                  }
-                else if (! FOLD) { /* Not /l and not /i */
-                    op = (start[0] < 256) ? EXACT : EXACT_REQ8;
+                else {
+                    op = EXACTFL;
                  }
-                else if (start[0] < 256) { /* /i, not /l, and the code point is
-                                              small */
+            }
+            else if (! FOLD) { /* Not /l and not /i */
+                op = (lowest_cp < 256) ? EXACT : EXACT_REQ8;
+            }
+            else if (lowest_cp < 256) { /* /i, not /l, and the code point is
+                                          small */
  
-                    /* Under /i, it gets a little tricky.  A code point that
+                /* Under /i, it gets a little tricky.  A code point that
                   * doesn't participate in a fold should be an EXACT node.  We
                   * know this one isn't the result of a simple fold, or there'd
                   * be more than one code point in the list, but it could be
-                 * part of a multi- character fold.  In that case we better not
+                 * part of a multi-character fold.  In that case we better not
                   * create an EXACT node, as we would wrongly be telling the
                   * optimizer that this code point must be in the target string,
                   * and that is wrong.  This is because if the sequence around
                   * this code point forms a multi-char fold, what needs to be in
                   * the string could be the code point that folds to the
                   * sequence.
-                     *
+                 *
                   * This handles the case of below-255 code points, as we have
                   * an easy look up for those.  The next clause handles the
                   * above-256 one */
-                    op = IS_IN_SOME_FOLD_L1(start[0])
-                         ? EXACTFU
-                         : EXACT;
-                }
+                op = IS_IN_SOME_FOLD_L1(lowest_cp)
+                     ? EXACTFU
+                     : EXACT;
+            }
              else {  /* /i, larger code point.  Since we are under /i, and have
                         just this code point, we know that it can't fold to
                         something else, so PL_InMultiCharFold applies to it */
-                op = (_invlist_contains_cp(PL_InMultiCharFold, start[0]))
+                op = (_invlist_contains_cp(PL_InMultiCharFold, lowest_cp))
                           ? EXACTFU_REQ8
                           : EXACT_REQ8;
                  }
  
-                value = start[0];
-            }
-            else if (  ! (has_runtime_dependency & ~HAS_D_RUNTIME_DEPENDENCY)
-                     && _invlist_contains_cp(PL_in_some_fold, start[0]))
-            {
+                value = lowest_cp;
+        }
+        else if (  ! (has_runtime_dependency & ~HAS_D_RUNTIME_DEPENDENCY)
+                 && _invlist_contains_cp(PL_in_some_fold, lowest_cp))
+        {
              /* Here, the only runtime dependency, if any, is from /d, and the
               * class matches more than one code point, and the lowest code
               * point participates in some fold.  It might be that the other
               * code points are /i equivalent to this one, and hence they would
-             * representable by an EXACTFish node.  Above, we eliminated
+             * be representable by an EXACTFish node.  Above, we eliminated
               * classes that contain too many code points to be EXACTFish, with
               * the test for MAX_FOLD_FROMS
-                 *
+             *
               * First, special case the ASCII fold pairs, like 'B' and 'b'.  We
               * do this because we have EXACTFAA at our disposal for the ASCII
               * range */
-                if (partial_cp_count == 2 && isASCII(start[0])) {
-
-                    /* The only ASCII characters that participate in folds are
-                     * alphabetics */
-                    assert(isALPHA(start[0]));
-                    if (   end[0] == start[0]   /* First range is a single
-                                                   character, so 2nd exists */
-                        && isALPHA_FOLD_EQ(start[0], start[1]))
-                    {
-
-                        /* Here, is part of an ASCII fold pair */
+            if (partial_cp_count == 2 && isASCII(lowest_cp)) {
+
+                /* The only ASCII characters that participate in folds are
+                 * alphabetics */
+                assert(isALPHA(lowest_cp));
+                if (   end[0] == start[0]   /* First range is a single
+                                               character, so 2nd exists */
+                    && isALPHA_FOLD_EQ(start[0], start[1]))
+                {
+                    /* Here, is part of an ASCII fold pair */
  
-                        if (   ASCII_FOLD_RESTRICTED
-                            || HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(start[0]))
-                        {
-                    /* If the second clause just above was true, it means
-                     * we can't be under /i, or else the list would have
-                     * included more than this fold pair.  Therefore we
-                     * have to exclude the possibility of whatever else it
-                     * is that folds to these, by using EXACTFAA */
-                            op = EXACTFAA;
-                        }
-                        else if (HAS_NONLATIN1_FOLD_CLOSURE(start[0])) {
+                    if (   ASCII_FOLD_RESTRICTED
+                        || HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(lowest_cp))
+                    {
+                        /* If the second clause just above was true, it means
+                         * we can't be under /i, or else the list would have
+                         * included more than this fold pair.  Therefore we
+                         * have to exclude the possibility of whatever else it
+                         * is that folds to these, by using EXACTFAA */
+                        op = EXACTFAA;
+                    }
+                    else if (HAS_NONLATIN1_FOLD_CLOSURE(lowest_cp)) {
  
-                            /* Here, there's no simple fold that start[0] is part
+                        /* Here, there's no simple fold that lowest_cp is part
                           * of, but there is a multi-character one.  If we are
                           * not under /i, we want to exclude that possibility;
                           * if under /i, we want to include it */
-                            op = (FOLD) ? EXACTFU : EXACTFAA;
-                        }
-                        else {
-
-                            /* Here, the only possible fold start[0] particpates in
-                             * is with start[1].  /i or not isn't relevant */
-                            op = EXACTFU;
-                        }
+                        op = (FOLD) ? EXACTFU : EXACTFAA;
+                    }
+                    else {
  
-                        value = toFOLD(start[0]);
+                        /* Here, the only possible fold lowest_cp particpates in
+                         * is with start[1].  /i or not isn't relevant */
+                        op = EXACTFU;
                      }
+
+                    value = toFOLD(lowest_cp);
                  }
-                else if (  ! upper_latin1_only_utf8_matches
+            }
+            else if (  ! upper_latin1_only_utf8_matches
                       || (   _invlist_len(upper_latin1_only_utf8_matches) == 2
-                             && PL_fold_latin1[
-                               invlist_highest(upper_latin1_only_utf8_matches)]
-                             == start[0]))
-                {
+                         && PL_fold_latin1[
+                           invlist_highest(upper_latin1_only_utf8_matches)]
+                         == lowest_cp))
+            {
                  /* Here, the smallest character is non-ascii or there are more
                   * than 2 code points matched by this node.  Also, we either
                   * don't have /d UTF-8 dependent matches, or if we do, they
@@ -19632,9 +19932,9 @@ S_optimize_regclass(pTHX_
                   * above about exceeding the array bounds of PL_fold_latin1[]
                   * because any code point in 'upper_latin1_only_utf8_matches'
                   * is below 256.)
-                     *
-                     * EXACTFAA would apply only to pairs (hence exactly 2 code
-                     * points) in the ASCII range, so we can't use it here to
+                 *
+                 * EXACTFAA would apply only to pairs (hence exactly 2 code
+                 * points) in the ASCII range, so we can't use it here to
                   * artificially restrict the fold domain, so we check if the
                   * class does or does not match some EXACTFish node.  Further,
                   * if we aren't under /i, and and the folded-to character is
@@ -19643,57 +19943,57 @@ S_optimize_regclass(pTHX_
                   * multi-character fold, and we don't here know the context, so
                   * we have to assume it is that multi-char fold, to prevent
                   * potential bugs.
-                     *
+                 *
                   * To do the general case, we first find the fold of the lowest
-                 * code point (which may be higher than the lowest one), then
-                 * find everything that folds to it.  (The data structure we
-                 * have only maps from the folded code points, so we have to do
-                 * the earlier step.) */
-
-                    Size_t foldlen;
-                    U8 foldbuf[UTF8_MAXBYTES_CASE];
-                UV folded = _to_uni_fold_flags(start[0], foldbuf, &foldlen, 0);
-                    U32 first_fold;
-                    const U32 * remaining_folds;
-                    Size_t folds_to_this_cp_count = _inverse_folds(
+                 * code point (which may be higher than that lowest unfolded
+                 * one), then find everything that folds to it.  (The data
+                 * structure we have only maps from the folded code points, so
+                 * we have to do the earlier step.) */
+
+                Size_t foldlen;
+                U8 foldbuf[UTF8_MAXBYTES_CASE];
+                UV folded = _to_uni_fold_flags(lowest_cp, foldbuf, &foldlen, 0);
+                U32 first_fold;
+                const U32 * remaining_folds;
+                Size_t folds_to_this_cp_count = _inverse_folds(
                                                              folded,
                                                              &first_fold,
                                                              &remaining_folds);
-                    Size_t folds_count = folds_to_this_cp_count + 1;
-                    SV * fold_list = _new_invlist(folds_count);
-                    unsigned int i;
-
-                    /* If there are UTF-8 dependent matches, create a temporary
-                     * list of what this node matches, including them. */
-                    SV * all_cp_list = NULL;
-                    SV ** use_this_list = &cp_list;
-
-                    if (upper_latin1_only_utf8_matches) {
-                        all_cp_list = _new_invlist(0);
-                        use_this_list = &all_cp_list;
-                        _invlist_union(cp_list,
-                                       upper_latin1_only_utf8_matches,
-                                       use_this_list);
-                    }
+                Size_t folds_count = folds_to_this_cp_count + 1;
+                SV * fold_list = _new_invlist(folds_count);
+                unsigned int i;
  
-                    /* Having gotten everything that participates in the fold
-                     * containing the lowest code point, we turn that into an
-                     * inversion list, making sure everything is included. */
-                    fold_list = add_cp_to_invlist(fold_list, start[0]);
-                    fold_list = add_cp_to_invlist(fold_list, folded);
-                    if (folds_to_this_cp_count > 0) {
-                        fold_list = add_cp_to_invlist(fold_list, first_fold);
-                        for (i = 0; i + 1 < folds_to_this_cp_count; i++) {
-                            fold_list = add_cp_to_invlist(fold_list,
-                                                        remaining_folds[i]);
-                        }
+                /* If there are UTF-8 dependent matches, create a temporary
+                 * list of what this node matches, including them. */
+                SV * all_cp_list = NULL;
+                SV ** use_this_list = &cp_list;
+
+                if (upper_latin1_only_utf8_matches) {
+                    all_cp_list = _new_invlist(0);
+                    use_this_list = &all_cp_list;
+                    _invlist_union(cp_list,
+                                   upper_latin1_only_utf8_matches,
+                                   use_this_list);
+                }
+
+                /* Having gotten everything that participates in the fold
+                 * containing the lowest code point, we turn that into an
+                 * inversion list, making sure everything is included. */
+                fold_list = add_cp_to_invlist(fold_list, lowest_cp);
+                fold_list = add_cp_to_invlist(fold_list, folded);
+                if (folds_to_this_cp_count > 0) {
+                    fold_list = add_cp_to_invlist(fold_list, first_fold);
+                    for (i = 0; i + 1 < folds_to_this_cp_count; i++) {
+                        fold_list = add_cp_to_invlist(fold_list,
+                                                    remaining_folds[i]);
                      }
+                }
  
                  /* If the fold list is identical to what's in this ANYOF node,
                   * the node can be represented by an EXACTFish one instead */
-                    if (_invlistEQ(*use_this_list, fold_list,
-                                   0 /* Don't complement */ )
-                    ) {
+                if (_invlistEQ(*use_this_list, fold_list,
+                               0 /* Don't complement */ )
+                ) {
  
                      /* But, we have to be careful, as mentioned above.  Just
                       * the right sequence of characters could match this if it
@@ -19703,107 +20003,107 @@ S_optimize_regclass(pTHX_
                       * we aren't under /i and this character participates in a
                       * multi-char fold, we don't optimize into an EXACTFish
                       * node.  So, for each case below we have to check if we
-                     * are folding and if not, if it is not part of a
+                     * are folding, and if not, if it is not part of a
                       * multi-char fold.  */
-                        if (start[0] > 255) {    /* Highish code point */
-                            if (FOLD || ! _invlist_contains_cp(
-                                            PL_InMultiCharFold, folded))
-                            {
-                                op = (LOC)
-                                     ? EXACTFLU8
-                                     : (ASCII_FOLD_RESTRICTED)
-                                       ? EXACTFAA
-                                       : EXACTFU_REQ8;
-                                value = folded;
-                            }
-                        }   /* Below, the lowest code point < 256 */
-                        else if (    FOLD
-                                 &&  folded == 's'
-                                 &&  DEPENDS_SEMANTICS)
+                    if (lowest_cp > 255) {    /* Highish code point */
+                        if (FOLD || ! _invlist_contains_cp(
+                                                   PL_InMultiCharFold, folded))
+                        {
+                            op = (LOC)
+                                 ? EXACTFLU8
+                                 : (ASCII_FOLD_RESTRICTED)
+                                   ? EXACTFAA
+                                   : EXACTFU_REQ8;
+                            value = folded;
+                        }
+                    }   /* Below, the lowest code point < 256 */
+                    else if (    FOLD
+                             &&  folded == 's'
+                             &&  DEPENDS_SEMANTICS)
                      {   /* An EXACTF node containing a single character 's',
                             can be an EXACTFU if it doesn't get joined with an
                             adjacent 's' */
-                            op = EXACTFU_S_EDGE;
-                            value = folded;
-                        }
-                        else if (    FOLD
-                                || ! HAS_NONLATIN1_FOLD_CLOSURE(start[0]))
-                        {
-                            if (upper_latin1_only_utf8_matches) {
-                                op = EXACTF;
+                        op = EXACTFU_S_EDGE;
+                        value = folded;
+                    }
+                    else if (     FOLD
+                             || ! HAS_NONLATIN1_FOLD_CLOSURE(lowest_cp))
+                    {
+                        if (upper_latin1_only_utf8_matches) {
+                            op = EXACTF;
  
-                                /* We can't use the fold, as that only matches
-                                 * under UTF-8 */
-                                value = start[0];
-                            }
-                            else if (     UNLIKELY(start[0] == MICRO_SIGN)
-                                     && ! UTF)
-                    {   /* EXACTFUP is a special node for this character */
-                                op = (ASCII_FOLD_RESTRICTED)
-                                     ? EXACTFAA
-                                     : EXACTFUP;
-                                value = MICRO_SIGN;
-                            }
-                            else if (     ASCII_FOLD_RESTRICTED
-                                     && ! isASCII(start[0]))
+                            /* We can't use the fold, as that only matches
+                             * under UTF-8 */
+                            value = lowest_cp;
+                        }
+                        else if (     UNLIKELY(lowest_cp == MICRO_SIGN)
+                                 && ! UTF)
+                        {   /* EXACTFUP is a special node for this character */
+                            op = (ASCII_FOLD_RESTRICTED)
+                                 ? EXACTFAA
+                                 : EXACTFUP;
+                            value = MICRO_SIGN;
+                        }
+                        else if (     ASCII_FOLD_RESTRICTED
+                                 && ! isASCII(lowest_cp))
                          {   /* For ASCII under /iaa, we can use EXACTFU below
                               */
-                                op = EXACTFAA;
-                                value = folded;
-                            }
-                            else {
-                                op = EXACTFU;
-                                value = folded;
-                            }
+                            op = EXACTFAA;
+                            value = folded;
+                        }
+                        else {
+                            op = EXACTFU;
+                            value = folded;
                          }
                      }
-
-                    SvREFCNT_dec_NN(fold_list);
-                    SvREFCNT_dec(all_cp_list);
                  }
+
+                SvREFCNT_dec_NN(fold_list);
+                SvREFCNT_dec(all_cp_list);
              }
+        }
  
-            if (op != ANYOF) {
-                U8 len;
+        if (op != END) {
+            U8 len;
  
-                /* Here, we have calculated what EXACTish node to use.  Have to
-                 * convert to UTF-8 if not already there */
-                if (value > 255) {
-                    if (! UTF) {
-                        SvREFCNT_dec(cp_list);;
-                        REQUIRE_UTF8(flagp);
-                    }
+            /* Here, we have calculated what EXACTish node to use.  Have to
+             * convert to UTF-8 if not already there */
+            if (value > 255) {
+                if (! UTF) {
+                    SvREFCNT_dec(cp_list);;
+                    REQUIRE_UTF8(flagp);
+                }
  
-                    /* This is a kludge to the special casing issues with this
+                /* This is a kludge to the special casing issues with this
                   * ligature under /aa.  FB05 should fold to FB06, but the call
                   * above to _to_uni_fold_flags() didn't find this, as it didn't
                   * use the /aa restriction in order to not miss other folds
                   * that would be affected.  This is the only instance likely to
                   * ever be a problem in all of Unicode.  So special case it. */
-                    if (   value == LATIN_SMALL_LIGATURE_LONG_S_T
-                        && ASCII_FOLD_RESTRICTED)
-                    {
-                        value = LATIN_SMALL_LIGATURE_ST;
-                    }
+                if (   value == LATIN_SMALL_LIGATURE_LONG_S_T
+                    && ASCII_FOLD_RESTRICTED)
+                {
+                    value = LATIN_SMALL_LIGATURE_ST;
                  }
+            }
  
-                len = (UTF) ? UVCHR_SKIP(value) : 1;
+            len = (UTF) ? UVCHR_SKIP(value) : 1;
  
-                *ret = regnode_guts(pRExC_state, op, len, "exact");
-                FILL_NODE(*ret, op);
-                RExC_emit += 1 + STR_SZ(len);
-                setSTR_LEN(REGNODE_p(*ret), len);
-                if (len == 1) {
-                    *STRINGs(REGNODE_p(*ret)) = (U8) value;
-                }
-                else {
-                    uvchr_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value);
-                }
-                return op;
+            *ret = REGNODE_GUTS(pRExC_state, op, len);
+            FILL_NODE(*ret, op);
+            RExC_emit += 1 + STR_SZ(len);
+            setSTR_LEN(REGNODE_p(*ret), len);
+            if (len == 1) {
+                *STRINGs(REGNODE_p(*ret)) = (U8) value;
              }
+            else {
+                uvchr_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value);
+            }
+            return op;
          }
+    }
  
-        if (! has_runtime_dependency) {
+    if (! has_runtime_dependency) {
  
          /* See if this can be turned into an ANYOFM node.  Think about the bit
           * patterns in two different bytes.  In some positions, the bits in
@@ -19825,59 +20125,60 @@ S_optimize_regclass(pTHX_
           * can benefit from the speed up.  We can only do this on UTF-8
           * invariant bytes, because they have the same bit patterns under UTF-8
           * as not. */
-            PERL_UINT_FAST8_T inverted = 0;
-#ifdef EBCDIC
-            const PERL_UINT_FAST8_T max_permissible = 0xFF;
-#else
-            const PERL_UINT_FAST8_T max_permissible = 0x7F;
-#endif
+        PERL_UINT_FAST8_T inverted = 0;
+
+        /* Highest possible UTF-8 invariant is 7F on ASCII platforms; FF on
+         * EBCDIC */
+        const PERL_UINT_FAST8_T max_permissible
+                                    = nBIT_UMAX(7 + ONE_IF_EBCDIC_ZERO_IF_NOT);
+
          /* If doesn't fit the criteria for ANYOFM, invert and try again.  If
           * that works we will instead later generate an NANYOFM, and invert
           * back when through */
-            if (invlist_highest(cp_list) > max_permissible) {
-                _invlist_invert(cp_list);
-                inverted = 1;
-            }
+        if (highest_cp > max_permissible) {
+            _invlist_invert(cp_list);
+            inverted = 1;
+        }
  
-            if (invlist_highest(cp_list) <= max_permissible) {
-                UV this_start, this_end;
-                UV lowest_cp = UV_MAX;  /* init'ed to suppress compiler warn */
-                U8 bits_differing = 0;
-                Size_t full_cp_count = 0;
-                bool first_time = TRUE;
+        if (invlist_highest(cp_list) <= max_permissible) {
+            UV this_start, this_end;
+            UV lowest_cp = UV_MAX;  /* init'ed to suppress compiler warn */
+            U8 bits_differing = 0;
+            Size_t full_cp_count = 0;
+            bool first_time = TRUE;
  
              /* Go through the bytes and find the bit positions that differ */
-                invlist_iterinit(cp_list);
-                while (invlist_iternext(cp_list, &this_start, &this_end)) {
-                    unsigned int i = this_start;
+            invlist_iterinit(cp_list);
+            while (invlist_iternext(cp_list, &this_start, &this_end)) {
+                unsigned int i = this_start;
  
-                    if (first_time) {
-                        if (! UVCHR_IS_INVARIANT(i)) {
-                            goto done_anyofm;
-                        }
+                if (first_time) {
+                    if (! UVCHR_IS_INVARIANT(i)) {
+                        goto done_anyofm;
+                    }
  
-                        first_time = FALSE;
-                        lowest_cp = this_start;
+                    first_time = FALSE;
+                    lowest_cp = this_start;
  
                      /* We have set up the code point to compare with.  Don't
                       * compare it with itself */
-                        i++;
-                    }
-
-                    /* Find the bit positions that differ from the lowest code
-                     * point in the node.  Keep track of all such positions by
-                     * OR'ing */
-                    for (; i <= this_end; i++) {
-                        if (! UVCHR_IS_INVARIANT(i)) {
-                            goto done_anyofm;
-                        }
+                    i++;
+                }
  
-                        bits_differing  |= i ^ lowest_cp;
+                /* Find the bit positions that differ from the lowest code
+                 * point in the node.  Keep track of all such positions by
+                 * OR'ing */
+                for (; i <= this_end; i++) {
+                    if (! UVCHR_IS_INVARIANT(i)) {
+                        goto done_anyofm;
                      }
  
-                    full_cp_count += this_end - this_start + 1;
+                    bits_differing  |= i ^ lowest_cp;
                  }
  
+                full_cp_count += this_end - this_start + 1;
+            }
+
              /* At the end of the loop, we count how many bits differ from the
               * bits in lowest code point, call the count 'd'.  If the set we
               * found contains 2**d elements, it is the closure of all code
@@ -19891,32 +20192,32 @@ S_optimize_regclass(pTHX_
               * has a 0.  But that would mean that one of them differs from the
               * lowest code point in that position, which possibility we've
               * already excluded.  */
-                if (  (inverted || full_cp_count > 1)
-                    && full_cp_count == 1U << PL_bitcount[bits_differing])
-                {
-                    U8 ANYOFM_mask;
-
-                    op = ANYOFM + inverted;;
+            if (  (inverted || full_cp_count > 1)
+                && full_cp_count == 1U << PL_bitcount[bits_differing])
+            {
+                U8 ANYOFM_mask;
  
-                    /* We need to make the bits that differ be 0's */
-                    ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
+                op = ANYOFM + inverted;;
  
-                    /* The argument is the lowest code point */
-                    *ret = reganode(pRExC_state, op, lowest_cp);
-                    FLAGS(REGNODE_p(*ret)) = ANYOFM_mask;
-                }
+                /* We need to make the bits that differ be 0's */
+                ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
  
-              done_anyofm:
-                invlist_iterfinish(cp_list);
+                /* The argument is the lowest code point */
+                *ret = reganode(pRExC_state, op, lowest_cp);
+                FLAGS(REGNODE_p(*ret)) = ANYOFM_mask;
              }
  
-            if (inverted) {
-                _invlist_invert(cp_list);
-            }
+          done_anyofm:
+            invlist_iterfinish(cp_list);
+        }
  
-            if (op != ANYOF) {
-                return op;
-            }
+        if (inverted) {
+            _invlist_invert(cp_list);
+        }
+
+        if (op != END) {
+            return op;
+        }
  
          /* XXX We could create an ANYOFR_LOW node here if we saved above if all
           * were invariants, it wasn't inverted, and there is a single range.
@@ -19924,121 +20225,121 @@ S_optimize_regclass(pTHX_
           * like /\d/a, but would be twice the size.  Without having actually
           * measured the gain, khw doesn't think the tradeoff is really worth it
           * */
-        }
+    }
  
-        if (! (*anyof_flags & ANYOF_LOCALE_FLAGS)) {
-            PERL_UINT_FAST8_T type;
-            SV * intersection = NULL;
-            SV* d_invlist = NULL;
+    if (! (*anyof_flags & ANYOF_LOCALE_FLAGS)) {
+        PERL_UINT_FAST8_T type;
+        SV * intersection = NULL;
+        SV* d_invlist = NULL;
  
          /* See if this matches any of the POSIX classes.  The POSIXA and POSIXD
           * ones are about the same speed as ANYOF ops, but take less room; the
           * ones that have above-Latin1 code point matches are somewhat faster
-         * than ANYOF.  */
+         * than ANYOF. */
  
-            for (type = POSIXA; type >= POSIXD; type--) {
-                int posix_class;
+        for (type = POSIXA; type >= POSIXD; type--) {
+            int posix_class;
  
-                if (type == POSIXL) {   /* But not /l posix classes */
-                    continue;
-                }
+            if (type == POSIXL) {   /* But not /l posix classes */
+                continue;
+            }
  
-                for (posix_class = 0;
-                     posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
-                     posix_class++)
-                {
-                    SV** our_code_points = &cp_list;
-                    SV** official_code_points;
-                    int try_inverted;
+            for (posix_class = 0;
+                 posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
+                 posix_class++)
+            {
+                SV** our_code_points = &cp_list;
+                SV** official_code_points;
+                int try_inverted;
  
-                    if (type == POSIXA) {
-                        official_code_points = &PL_Posix_ptrs[posix_class];
-                    }
-                    else {
-                        official_code_points = &PL_XPosix_ptrs[posix_class];
-                    }
+                if (type == POSIXA) {
+                    official_code_points = &PL_Posix_ptrs[posix_class];
+                }
+                else {
+                    official_code_points = &PL_XPosix_ptrs[posix_class];
+                }
  
                  /* Skip non-existent classes of this type.  e.g. \v only has an
                   * entry in PL_XPosix_ptrs */
-                    if (! *official_code_points) {
-                        continue;
-                    }
+                if (! *official_code_points) {
+                    continue;
+                }
  
-                    /* Try both the regular class, and its inversion */
-                    for (try_inverted = 0; try_inverted < 2; try_inverted++) {
-                        bool this_inverted = *invert ^ try_inverted;
+                /* Try both the regular class, and its inversion */
+                for (try_inverted = 0; try_inverted < 2; try_inverted++) {
+                    bool this_inverted = *invert ^ try_inverted;
  
-                        if (type != POSIXD) {
+                    if (type != POSIXD) {
  
                          /* This class that isn't /d can't match if we have /d
                           * dependencies */
-                            if (has_runtime_dependency
-                                                    & HAS_D_RUNTIME_DEPENDENCY)
-                            {
-                                continue;
-                            }
+                        if (has_runtime_dependency
+                                                & HAS_D_RUNTIME_DEPENDENCY)
+                        {
+                            continue;
                          }
-                        else /* is /d */ if (! this_inverted) {
+                    }
+                    else /* is /d */ if (! this_inverted) {
  
                          /* /d classes don't match anything non-ASCII below 256
                           * unconditionally (which cp_list contains) */
-                            _invlist_intersection(cp_list, PL_UpperLatin1,
-                                                           &intersection);
-                            if (_invlist_len(intersection) != 0) {
-                                continue;
-                            }
+                        _invlist_intersection(cp_list, PL_UpperLatin1,
+                                                       &intersection);
+                        if (_invlist_len(intersection) != 0) {
+                            continue;
+                        }
  
-                            SvREFCNT_dec(d_invlist);
-                            d_invlist = invlist_clone(cp_list, NULL);
+                        SvREFCNT_dec(d_invlist);
+                        d_invlist = invlist_clone(cp_list, NULL);
  
                          /* But under UTF-8 it turns into using /u rules.  Add
                           * the things it matches under these conditions so that
                           * we check below that these are identical to what the
                           * tested class should match */
-                            if (upper_latin1_only_utf8_matches) {
-                                _invlist_union(
-                                            d_invlist,
-                                            upper_latin1_only_utf8_matches,
-                                            &d_invlist);
-                            }
-                            our_code_points = &d_invlist;
+                        if (upper_latin1_only_utf8_matches) {
+                            _invlist_union(
+                                        d_invlist,
+                                        upper_latin1_only_utf8_matches,
+                                        &d_invlist);
                          }
-                        else {  /* POSIXD, inverted.  If this doesn't have this
-                                   flag set, it isn't /d. */
-                            if (! (*anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
-                            {
-                                continue;
-                            }
-                            our_code_points = &cp_list;
+                        our_code_points = &d_invlist;
+                    }
+                    else {  /* POSIXD, inverted.  If this doesn't have this
+                               flag set, it isn't /d. */
+                        if (! (*anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+                        {
+                            continue;
                          }
+                        our_code_points = &cp_list;
+                    }
  
                      /* Here, have weeded out some things.  We want to see if
                       * the list of characters this node contains
-                         * ('*our_code_points') precisely matches those of the
-                         * class we are currently checking against
-                         * ('*official_code_points'). */
-                        if (_invlistEQ(*our_code_points,
-                                       *official_code_points,
-                                       try_inverted))
-                        {
-                            /* Here, they precisely match.  Optimize this ANYOF
+                     * ('*our_code_points') precisely matches those of the
+                     * class we are currently checking against
+                     * ('*official_code_points'). */
+                    if (_invlistEQ(*our_code_points,
+                                   *official_code_points,
+                                   try_inverted))
+                    {
+                        /* Here, they precisely match.  Optimize this ANYOF
                           * node into its equivalent POSIX one of the correct
                           * type, possibly inverted */
-                            op = (try_inverted)
-                                ? type + NPOSIXA - POSIXA
-                                : type;
-                            *ret = reg_node(pRExC_state, op);
-                            FLAGS(REGNODE_p(*ret)) = posix_class;
-                            SvREFCNT_dec(d_invlist);
-                            SvREFCNT_dec(intersection);
-                            return op;
-                        }
+                        op = (try_inverted)
+                            ? type + NPOSIXA - POSIXA
+                            : type;
+                        *ret = reg_node(pRExC_state, op);
+                        FLAGS(REGNODE_p(*ret)) = posix_class;
+                        SvREFCNT_dec(d_invlist);
+                        SvREFCNT_dec(intersection);
+                        return op;
                      }
                  }
              }
-            SvREFCNT_dec(d_invlist);
-            SvREFCNT_dec(intersection);
          }
+        SvREFCNT_dec(d_invlist);
+        SvREFCNT_dec(intersection);
+    }
  
      /* If it is a single contiguous range, ANYOFR is an efficient regnode, both
       * in size and speed.  Currently, a 20 bit range base (smallest code point
@@ -20046,110 +20347,109 @@ S_optimize_regclass(pTHX_
       * This allows for using it on all of the Unicode code points except for
       * the highest plane, which is only for private use code points.  khw
       * doubts that a bigger delta is likely in real world applications */
-        if (     single_range
-            && ! has_runtime_dependency
-            &&   *anyof_flags == 0
-            &&   start[0] < (1 << ANYOFR_BASE_BITS)
-            &&   end[0] - start[0]
-                    < ((1U << (sizeof(((struct regnode_1 *)NULL)->arg1)
-                                   * CHARBITS - ANYOFR_BASE_BITS))))
+    if (     single_range
+        && ! has_runtime_dependency
+        &&   *anyof_flags == 0
+        &&   start[0] < (1 << ANYOFR_BASE_BITS)
+        &&   end[0] - start[0]
+                < ((1U << (sizeof(((struct regnode_1 *)NULL)->arg1)
+                               * CHARBITS - ANYOFR_BASE_BITS))))
  
-        {
-            U8 low_utf8[UTF8_MAXBYTES+1];
-            U8 high_utf8[UTF8_MAXBYTES+1];
+    {
+        U8 low_utf8[UTF8_MAXBYTES+1];
+        U8 high_utf8[UTF8_MAXBYTES+1];
  
-            op = ANYOFR;
-            *ret = reganode(pRExC_state, op,
+        op = ANYOFR;
+        *ret = reganode(pRExC_state, op,
                          (start[0] | (end[0] - start[0]) << ANYOFR_BASE_BITS));
  
          /* Place the lowest UTF-8 start byte in the flags field, so as to allow
           * efficient ruling out at run time of many possible inputs.  */
-            (void) uvchr_to_utf8(low_utf8, start[0]);
-            (void) uvchr_to_utf8(high_utf8, end[0]);
+        (void) uvchr_to_utf8(low_utf8, start[0]);
+        (void) uvchr_to_utf8(high_utf8, end[0]);
  
-            /* If all code points share the same first byte, this can be an
-             * ANYOFRb.  Otherwise store the lowest UTF-8 start byte which can
+        /* If all code points share the same first byte, this can be an
+         * ANYOFRb.  Otherwise store the lowest UTF-8 start byte which can
           * quickly rule out many inputs at run-time without having to compute
           * the code point from UTF-8.  For EBCDIC, we use I8, as not doing that
           * transformation would not rule out nearly so many things */
-            if (low_utf8[0] == high_utf8[0]) {
-                op = ANYOFRb;
-                OP(REGNODE_p(*ret)) = op;
-                ANYOF_FLAGS(REGNODE_p(*ret)) = low_utf8[0];
-            }
-            else {
+        if (low_utf8[0] == high_utf8[0]) {
+            op = ANYOFRb;
+            OP(REGNODE_p(*ret)) = op;
+            ANYOF_FLAGS(REGNODE_p(*ret)) = low_utf8[0];
+        }
+        else {
              ANYOF_FLAGS(REGNODE_p(*ret)) = NATIVE_UTF8_TO_I8(low_utf8[0]);
-            }
-
-            return op;
          }
  
-        /* If didn't find an optimization and there is no need for a bitmap,
-         * optimize to indicate that */
-        if (     start[0] >= NUM_ANYOF_CODE_POINTS
-            && ! LOC
-            && ! upper_latin1_only_utf8_matches
-            &&   *anyof_flags == 0)
-        {
-            U8 low_utf8[UTF8_MAXBYTES+1];
-            UV highest_cp = invlist_highest(cp_list);
+        return op;
+    }
+
+    /* If didn't find an optimization and there is no need for a bitmap,
+     * optimize to indicate that */
+    if (     lowest_cp >= NUM_ANYOF_CODE_POINTS
+        && ! LOC
+        && ! upper_latin1_only_utf8_matches
+        &&   *anyof_flags == 0)
+    {
+        U8 low_utf8[UTF8_MAXBYTES+1];
+        UV highest_cp = invlist_highest(cp_list);
  
          /* Currently the maximum allowed code point by the system is IV_MAX.
           * Higher ones are reserved for future internal use.  This particular
           * regnode can be used for higher ones, but we can't calculate the code
           * point of those.  IV_MAX suffices though, as it will be a large first
           * byte */
-            Size_t low_len = uvchr_to_utf8(low_utf8, MIN(start[0], IV_MAX))
-                           - low_utf8;
+        Size_t low_len = uvchr_to_utf8(low_utf8, MIN(lowest_cp, IV_MAX))
+                       - low_utf8;
  
          /* We store the lowest possible first byte of the UTF-8 representation,
           * using the flags field.  This allows for quick ruling out of some
           * inputs without having to convert from UTF-8 to code point.  For
           * EBCDIC, we use I8, as not doing that transformation would not rule
           * out nearly so many things */
-            *anyof_flags = NATIVE_UTF8_TO_I8(low_utf8[0]);
+        *anyof_flags = NATIVE_UTF8_TO_I8(low_utf8[0]);
  
-            op = ANYOFH;
+        op = ANYOFH;
  
-            /* If the first UTF-8 start byte for the highest code point in the
-             * range is suitably small, we may be able to get an upper bound as
-             * well */
-            if (highest_cp <= IV_MAX) {
-                U8 high_utf8[UTF8_MAXBYTES+1];
+        /* If the first UTF-8 start byte for the highest code point in the
+         * range is suitably small, we may be able to get an upper bound as
+         * well */
+        if (highest_cp <= IV_MAX) {
+            U8 high_utf8[UTF8_MAXBYTES+1];
              Size_t high_len = uvchr_to_utf8(high_utf8, highest_cp) - high_utf8;
  
-                /* If the lowest and highest are the same, we can get an exact
+            /* If the lowest and highest are the same, we can get an exact
               * first byte instead of a just minimum or even a sequence of exact
               * leading bytes.  We signal these with different regnodes */
-                if (low_utf8[0] == high_utf8[0]) {
-                    Size_t len = find_first_differing_byte_pos(low_utf8,
-                                                               high_utf8,
-                                                       MIN(low_len, high_len));
+            if (low_utf8[0] == high_utf8[0]) {
+                Size_t len = find_first_differing_byte_pos(low_utf8,
+                                                           high_utf8,
+                                                   MIN(low_len, high_len));
  
-                    if (len == 1) {
+                if (len == 1) {
  
                      /* No need to convert to I8 for EBCDIC as this is an exact
                       * match */
-                        *anyof_flags = low_utf8[0];
-                        op = ANYOFHb;
-                    }
-                    else {
-                        op = ANYOFHs;
-                        *ret = regnode_guts(pRExC_state, op,
-                                           regarglen[op] + STR_SZ(len),
-                                           "anyofhs");
-                        FILL_NODE(*ret, op);
-                        ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len
-                                                                        = len;
-                        Copy(low_utf8,  /* Add the common bytes */
-                        ((struct regnode_anyofhs *) REGNODE_p(*ret))->string,
-                           len, U8);
-                        RExC_emit += NODE_SZ_STR(REGNODE_p(*ret));
-                        set_ANYOF_arg(pRExC_state, REGNODE_p(*ret), cp_list,
-                                                  NULL, only_utf8_locale_list);
-                        return op;
-                    }
+                    *anyof_flags = low_utf8[0];
+                    op = ANYOFHb;
+                }
+                else {
+                    op = ANYOFHs;
+                    *ret = REGNODE_GUTS(pRExC_state, op,
+                                       regarglen[op] + STR_SZ(len));
+                    FILL_NODE(*ret, op);
+                    ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len
+                                                                    = len;
+                    Copy(low_utf8,  /* Add the common bytes */
+                    ((struct regnode_anyofhs *) REGNODE_p(*ret))->string,
+                       len, U8);
+                    RExC_emit += NODE_SZ_STR(REGNODE_p(*ret));
+                    set_ANYOF_arg(pRExC_state, REGNODE_p(*ret), cp_list,
+                                              NULL, only_utf8_locale_list);
+                    return op;
                  }
+            }
              else if (NATIVE_UTF8_TO_I8(high_utf8[0]) <= MAX_ANYOF_HRx_BYTE) {
  
                  /* Here, the high byte is not the same as the low, but is small
@@ -20159,27 +20459,38 @@ S_optimize_regclass(pTHX_
                   * platforms, I8 is used.  On ASCII platforms I8 is the same
                   * thing as UTF-8 */
  
-                    U8 bits = 0;
-                    U8 max_range_diff = MAX_ANYOF_HRx_BYTE - *anyof_flags;
-                    U8 range_diff = NATIVE_UTF8_TO_I8(high_utf8[0])
-                                - *anyof_flags;
+                U8 bits = 0;
+                U8 max_range_diff = MAX_ANYOF_HRx_BYTE - *anyof_flags;
+                U8 range_diff = NATIVE_UTF8_TO_I8(high_utf8[0])
+                            - *anyof_flags;
  
-                    if (range_diff <= max_range_diff / 8) {
-                        bits = 3;
-                    }
-                    else if (range_diff <= max_range_diff / 4) {
-                        bits = 2;
-                    }
-                    else if (range_diff <= max_range_diff / 2) {
-                        bits = 1;
-                    }
-                    *anyof_flags = (*anyof_flags - 0xC0) << 2 | bits;
-                    op = ANYOFHr;
+                if (range_diff <= max_range_diff / 8) {
+                    bits = 3;
+                }
+                else if (range_diff <= max_range_diff / 4) {
+                    bits = 2;
                  }
+                else if (range_diff <= max_range_diff / 2) {
+                    bits = 1;
+                }
+                *anyof_flags = (*anyof_flags - 0xC0) << 2 | bits;
+                op = ANYOFHr;
              }
          }
+    }
  
-        return op;
+    return op;
+
+  return_OPFAIL:
+    op = OPFAIL;
+    *ret = reganode(pRExC_state, op, 0);
+    return op;
+
+  return_SANY:
+    op = SANY;
+    *ret = reg_node(pRExC_state, op);
+    MARK_NAUGHTY(1);
+    return op;
  }
  
  #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
@@ -20588,7 +20899,7 @@ S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state,
     those two cases, the parse position is advanced beyond all such comments and
     white space.
  
-   This is the UTF, (?#...), and /x friendly way of saying RExC_parse++.
+   This is the UTF, (?#...), and /x friendly way of saying RExC_parse_inc_by(1).
  */
  
  STATIC void
@@ -20601,9 +20912,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
                 || UTF8_IS_INVARIANT(*RExC_parse)
                 || UTF8_IS_START(*RExC_parse));
  
-        RExC_parse += (UTF)
-                      ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                      : 1;
+        RExC_parse_inc_safe();
  
          skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                  FALSE /* Don't force /x */ );
@@ -20634,54 +20943,39 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
      if (size > 0) {
          Zero(REGNODE_p(RExC_emit), size, regnode);
      }
-
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    Renew(RExC_offsets, 2*RExC_size+1, U32);
-    if (size > 0) {
-        Zero(RExC_offsets + 2*(RExC_size - size) + 1, 2 * size, U32);
-    }
-    RExC_offsets[0] = RExC_size;
-#endif
  }
  
  STATIC regnode_offset
-S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size, const char* const name)
+S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const STRLEN extra_size)
  {
-    /* Allocate a regnode for 'op', with 'extra_size' extra (smallest) regnode
-     * equivalents space.  It aligns and increments RExC_size
+    /* Allocate a regnode that is (1 + extra_size) times as big as the
+     * smallest regnode worth of space, and also aligns and increments
+     * RExC_size appropriately.
       *
       * It returns the regnode's offset into the regex engine program */
  
      const regnode_offset ret = RExC_emit;
  
-    DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
      PERL_ARGS_ASSERT_REGNODE_GUTS;
  
      SIZE_ALIGN(RExC_size);
      change_engine_size(pRExC_state, (Ptrdiff_t) 1 + extra_size);
      NODE_ALIGN_FILL(REGNODE_p(ret));
-#ifndef RE_TRACK_PATTERN_OFFSETS
-    PERL_UNUSED_ARG(name);
-    PERL_UNUSED_ARG(op);
-#else
+    return(ret);
+}
+
+#ifdef DEBUGGING
+
+STATIC regnode_offset
+S_regnode_guts_debug(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size) {
+    PERL_ARGS_ASSERT_REGNODE_GUTS_DEBUG;
      assert(extra_size >= regarglen[op] || PL_regkind[op] == ANYOF);
+    return S_regnode_guts(aTHX_ pRExC_state, extra_size);
+}
  
-    if (RExC_offsets) {         /* MJD */
-        MJD_OFFSET_DEBUG(
-              ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
-              name, __LINE__,
-              PL_reg_name[op],
-              (UV)(RExC_emit) > RExC_offsets[0]
-                ? "Overwriting end of array!\n" : "OK",
-              (UV)(RExC_emit),
-              (UV)(RExC_parse - RExC_start),
-              (UV)RExC_offsets[0]));
-        Set_Node_Offset(REGNODE_p(RExC_emit), RExC_parse + (op == END));
-    }
  #endif
-    return(ret);
-}
+
+
  
  /*
  - reg_node - emit a node
@@ -20689,7 +20983,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
  STATIC regnode_offset /* Location. */
  S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  {
-    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg_node");
+    const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
      regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REG_NODE;
@@ -20707,7 +21001,7 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  STATIC regnode_offset /* Location. */
  S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  {
-    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reganode");
+    const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
      regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REGANODE;
@@ -20726,7 +21020,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  STATIC regnode_offset /* Location. */
  S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, SV * arg)
  {
-    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regpnode");
+    const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
      regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REGPNODE;
@@ -20741,7 +21035,7 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
  {
      /* emit a node with U32 and I32 arguments */
  
-    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg2Lanode");
+    const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
      regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REG2LANODE;
@@ -20823,41 +21117,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
  
      while (src > REGNODE_p(operand)) {
          StructCopy(--src, --dst, regnode);
-#ifdef RE_TRACK_PATTERN_OFFSETS
-        if (RExC_offsets) {     /* MJD 20010112 */
-            MJD_OFFSET_DEBUG(
-                 ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n",
-                  "reginsert",
-                  __LINE__,
-                  PL_reg_name[op],
-                  (UV)(REGNODE_OFFSET(dst)) > RExC_offsets[0]
-                    ? "Overwriting end of array!\n" : "OK",
-                  (UV)REGNODE_OFFSET(src),
-                  (UV)REGNODE_OFFSET(dst),
-                  (UV)RExC_offsets[0]));
-            Set_Node_Offset_To_R(REGNODE_OFFSET(dst), Node_Offset(src));
-            Set_Node_Length_To_R(REGNODE_OFFSET(dst), Node_Length(src));
-        }
-#endif
      }
  
      place = REGNODE_p(operand);        /* Op node, where operand used to be. */
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    if (RExC_offsets) {         /* MJD */
-        MJD_OFFSET_DEBUG(
-              ("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n",
-              "reginsert",
-              __LINE__,
-              PL_reg_name[op],
-              (UV)REGNODE_OFFSET(place) > RExC_offsets[0]
-              ? "Overwriting end of array!\n" : "OK",
-              (UV)REGNODE_OFFSET(place),
-              (UV)(RExC_parse - RExC_start),
-              (UV)RExC_offsets[0]));
-        Set_Node_Offset(place, RExC_parse);
-        Set_Node_Length(place, 1);
-    }
-#endif
      src = NEXTOPER(place);
      FLAGS(place) = 0;
      FILL_NODE(operand, op);
@@ -21097,8 +21359,8 @@ S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
      ASSUME(REG_EXTFLAGS_NAME_SIZE <= sizeof(flags)*8);
  
      for (bit=0; bit<REG_EXTFLAGS_NAME_SIZE; bit++) {
-        if (flags & (1<<bit)) {
-            if ((1<<bit) & RXf_PMf_CHARSET) {  /* Output separately, below */
+        if (flags & (1U<<bit)) {
+            if ((1U<<bit) & RXf_PMf_CHARSET) { /* Output separately, below */
                  continue;
              }
              if (!set++ && lead)
@@ -21384,13 +21646,21 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          } else if ( pRExC_state ) {
              name_list= RExC_paren_name_list;
          }
-        if (name_list) {
+        if ( name_list ) {
              if ( k != REF || (OP(o) < REFN)) {
                  SV **name= av_fetch(name_list, parno, 0 );
                  if (name)
                      Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
              }
-            else {
+            else
+            if (parno > 0) {
+                /* parno must always be larger than 0 for this block
+                 * as it represents a slot into the data array, which
+                 * has the 0 slot reserved for a placeholder so any valid
+                 * index into it is always true, eg non-zero
+                 * see the '%' "what" type and the implementation of
+                 * S_add_data()
+                 */
                  SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
                  I32 *nums=(I32*)SvPVX(sv_dat);
                  SV **name= av_fetch(name_list, nums[0], 0 );
@@ -21742,7 +22012,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
  
      DEBUG_COMPILE_r(
          {
-            if (prog->maxlen > 0) {
+            if (prog->maxlen > 0 && (prog->check_utf8 || prog->check_substr)) {
                  const char * const s = SvPV_nolen_const(RX_UTF8(r)
                        ? prog->check_utf8 : prog->check_substr);
  
@@ -21876,6 +22146,16 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
               * we allocate here */
              REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
              assert(!SvPVX(dsv));
+            /* We "steal" the body from the newly allocated SV temp, changing
+             * the pointer in its HEAD to NULL. We then change its type to
+             * SVt_NULL so that when we immediately release its only reference,
+             * no memory deallocation happens.
+             *
+             * The body will eventually be freed (from the PVLV) either in
+             * Perl_sv_force_normal_flags() (if the PVLV is "downgraded" and
+             * the regexp body needs to be removed)
+             * or in Perl_sv_clear() (if the PVLV still holds the pointer until
+             * the PVLV itself is deallocated). */
              ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
              temp->sv_any = NULL;
              SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
@@ -21969,10 +22249,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
          }
      });
  
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    if (ri->u.offsets)
-        Safefree(ri->u.offsets);             /* 20010421 MJD */
-#endif
      if (ri->code_blocks)
          S_free_codeblocks(aTHX_ ri->code_blocks);
  
@@ -22043,6 +22319,12 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                      }
                  }
                  break;
+            case '%':
+                /* NO-OP a '%' data contains a null pointer, so that add_data
+                 * always returns non-zero, this should only ever happen in the
+                 * 0 index */
+                assert(n==0);
+                break;
              default:
                  Perl_croak(aTHX_ "panic: regfree data code '%c'",
                                                      ri->data->what[n]);
@@ -22271,6 +22553,13 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                           is not from another regexp */
                  d->data[i] = ri->data->data[i];
                  break;
+            case '%':
+                /* this is a placeholder type, it exists purely so that
+                 * add_data always returns a non-zero value, this type of
+                 * entry should ONLY be present in the 0 slot of the array */
+                assert(i == 0);
+                d->data[i]= ri->data->data[i];
+                break;
              default:
                  Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'",
                                                             ri->data->what[i]);
@@ -22284,14 +22573,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
  
      reti->name_list_idx = ri->name_list_idx;
  
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    if (ri->u.offsets) {
-        Newx(reti->u.offsets, 2*len+1, U32);
-        Copy(ri->u.offsets, reti->u.offsets, 2*len+1, U32);
-    }
-#else
      SetProgLen(reti, len);
-#endif
  
      return (void*)reti;
  }
@@ -23352,7 +23634,7 @@ S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len,
  
      U32 flags = PMf_MULTILINE|PMf_WILDCARD;
      U32 rx_flags;
-    SV * subpattern_sv = sv_2mortal(newSVpvn(subpattern, len));
+    SV * subpattern_sv = newSVpvn_flags(subpattern, len, SVs_TEMP);
      REGEXP * subpattern_re;
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
@@ -24592,11 +24874,10 @@ S_parse_uniprop_string(pTHX_
               * We start by constructing the hash key name, consisting of the
               * fully qualified subroutine name, preceded by the /i status, so
               * that there is a key for /i and a different key for non-/i */
-            key = newSVpvn(((to_fold) ? "1" : "0"), 1);
+            key = newSVpvn_flags(((to_fold) ? "1" : "0"), 1, SVs_TEMP);
              fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
                                            non_pkg_begin != 0);
              sv_catsv(key, fq_name);
-            sv_2mortal(key);
  
              /* We only call the sub once throughout the life of the program
               * (with the /i, non-/i exception noted above).  That means the