regcomp.c: calculate variants instead of assuming worst case

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 04e2228..2f72faf 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -139,23 +139,31 @@ struct RExC_state_t {
                                             corresponding to copy_start */
      SSize_t    whilem_seen;            /* number of WHILEM in this expr */
      regnode    *emit_start;            /* Start of emitted-code area */
                                             corresponding to copy_start */
      SSize_t    whilem_seen;            /* number of WHILEM in this expr */
      regnode    *emit_start;            /* Start of emitted-code area */
-    regnode    *emit_bound;            /* First regnode outside of the
-                                           allocated space */
      regnode_offset emit;               /* Code-emit pointer */
      I32                naughty;                /* How bad is this pattern? */
      I32                sawback;                /* Did we see \1, ...? */
      U32                seen;
      SSize_t    size;                   /* Number of regnode equivalents in
                                             pattern */
      regnode_offset emit;               /* Code-emit pointer */
      I32                naughty;                /* How bad is this pattern? */
      I32                sawback;                /* Did we see \1, ...? */
      U32                seen;
      SSize_t    size;                   /* Number of regnode equivalents in
                                             pattern */
-    I32         npar;                   /* Capture buffer count, (OPEN) plus
-                                           one. ("par" 0 is the whole
-                                           pattern)*/
-    I32         total_par;              /* Capture buffer count after parse
-                                           completed, (OPEN) plus one. ("par" 0
-                                           is the whole pattern)*/
+
+    /* position beyond 'precomp' of the warning message furthest away from
+     * 'precomp'.  During the parse, no warnings are raised for any problems
+     * earlier in the parse than this position.  This works if warnings are
+     * raised the first time a given spot is parsed, and if only one
+     * independent warning is raised for any given spot */
+    Size_t     latest_warn_offset;
+
+    I32         npar;                   /* Capture buffer count so far in the
+                                           parse, (OPEN) plus one. ("par" 0 is
+                                           the whole pattern)*/
+    I32         total_par;              /* During initial parse, is either 0,
+                                           or -1; the latter indicating a
+                                           reparse is needed.  After that pass,
+                                           it is what 'npar' became after the
+                                           pass.  Hence, it being > 0 indicates
+                                           we are in a reparse situation */
      I32                nestroot;               /* root parens we are in - used by
                                             accept */
      I32                nestroot;               /* root parens we are in - used by
                                             accept */
-    I32                extralen;
      I32                seen_zerolen;
      regnode_offset *open_parens;       /* offsets to open parens */
      regnode_offset *close_parens;      /* offsets to close parens */
      I32                seen_zerolen;
      regnode_offset *open_parens;       /* offsets to open parens */
      regnode_offset *close_parens;      /* offsets to close parens */
@@ -215,7 +223,7 @@ struct RExC_state_t {
      bool        strict;
      bool        study_started;
      bool        in_script_run;
      bool        strict;
      bool        study_started;
      bool        in_script_run;
-    bool        pass1;
+    bool        use_BRANCHJ;
  };
  
  #define RExC_flags     (pRExC_state->flags)
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -230,6 +238,7 @@ struct RExC_state_t {
  #define RExC_start     (pRExC_state->start)
  #define RExC_end       (pRExC_state->end)
  #define RExC_parse     (pRExC_state->parse)
  #define RExC_start     (pRExC_state->start)
  #define RExC_end       (pRExC_state->end)
  #define RExC_parse     (pRExC_state->parse)
+#define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
  #define RExC_whilem_seen       (pRExC_state->whilem_seen)
  
  /* Set during the sizing pass when there is a LATIN SMALL LETTER SHARP S in any
  #define RExC_whilem_seen       (pRExC_state->whilem_seen)
  
  /* Set during the sizing pass when there is a LATIN SMALL LETTER SHARP S in any
@@ -247,9 +256,7 @@ struct RExC_state_t {
                                                           others */
  #endif
  #define RExC_emit      (pRExC_state->emit)
                                                           others */
  #endif
  #define RExC_emit      (pRExC_state->emit)
-#define RExC_pass1     (pRExC_state->pass1)
  #define RExC_emit_start        (pRExC_state->emit_start)
  #define RExC_emit_start        (pRExC_state->emit_start)
-#define RExC_emit_bound        (pRExC_state->emit_bound)
  #define RExC_sawback   (pRExC_state->sawback)
  #define RExC_seen      (pRExC_state->seen)
  #define RExC_size      (pRExC_state->size)
  #define RExC_sawback   (pRExC_state->sawback)
  #define RExC_seen      (pRExC_state->seen)
  #define RExC_size      (pRExC_state->size)
@@ -257,7 +264,6 @@ struct RExC_state_t {
  #define RExC_npar      (pRExC_state->npar)
  #define RExC_total_parens      (pRExC_state->total_par)
  #define RExC_nestroot   (pRExC_state->nestroot)
  #define RExC_npar      (pRExC_state->npar)
  #define RExC_total_parens      (pRExC_state->total_par)
  #define RExC_nestroot   (pRExC_state->nestroot)
-#define RExC_extralen  (pRExC_state->extralen)
  #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
  #define RExC_utf8      (pRExC_state->utf8)
  #define RExC_uni_semantics     (pRExC_state->uni_semantics)
  #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
  #define RExC_utf8      (pRExC_state->utf8)
  #define RExC_uni_semantics     (pRExC_state->uni_semantics)
@@ -284,7 +290,7 @@ struct RExC_state_t {
  #define RExC_study_started      (pRExC_state->study_started)
  #define RExC_warn_text (pRExC_state->warn_text)
  #define RExC_in_script_run      (pRExC_state->in_script_run)
  #define RExC_study_started      (pRExC_state->study_started)
  #define RExC_warn_text (pRExC_state->warn_text)
  #define RExC_in_script_run      (pRExC_state->in_script_run)
-#define RExC_use_BRANCHJ        (!SIZE_ONLY && RExC_extralen)
+#define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -341,7 +347,6 @@ struct RExC_state_t {
  
  #define REQUIRE_UTF8(flagp) STMT_START {                                   \
                                       if (!UTF) {                           \
  
  #define REQUIRE_UTF8(flagp) STMT_START {                                   \
                                       if (!UTF) {                           \
-                                         assert(PASS1);                    \
                                           *flagp = RESTART_PARSE|NEED_UTF8; \
                                           return 0;                         \
                                       }                                     \
                                           *flagp = RESTART_PARSE|NEED_UTF8; \
                                           return 0;                         \
                                       }                                     \
@@ -354,16 +359,26 @@ struct RExC_state_t {
  #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
      STMT_START {                                                            \
              if (DEPENDS_SEMANTICS) {                                        \
  #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
      STMT_START {                                                            \
              if (DEPENDS_SEMANTICS) {                                        \
-                assert(PASS1);                                              \
                  set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                  RExC_uni_semantics = 1;                                     \
                  set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                  RExC_uni_semantics = 1;                                     \
-                if (RExC_seen_unfolded_sharp_s) {                           \
                      *flagp |= RESTART_PARSE;                                \
                      return restart_retval;                                  \
                      *flagp |= RESTART_PARSE;                                \
                      return restart_retval;                                  \
-                }                                                           \
              }                                                               \
      } STMT_END
  
              }                                                               \
      } STMT_END
  
+#define BRANCH_MAX_OFFSET   U16_MAX
+#define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
+    STMT_START {                                                            \
+                RExC_use_BRANCHJ = 1;                                       \
+                *flagp |= RESTART_PARSE;                                    \
+                return restart_retval;                                      \
+    } STMT_END
+
+#define REQUIRE_PARENS_PASS                                                 \
+    STMT_START {                                                            \
+                    if (RExC_total_parens == 0) RExC_total_parens = -1;     \
+    } STMT_END
+
  /* Executes a return statement with the value 'X', if 'flags' contains any of
   * 'RESTART_PARSE', 'NEED_UTF8', or 'extra'.  If so, *flagp is set to those
   * flags */
  /* Executes a return statement with the value 'X', if 'flags' contains any of
   * 'RESTART_PARSE', 'NEED_UTF8', or 'extra'.  If so, *flagp is set to those
   * flags */
@@ -671,7 +686,7 @@ static const scan_data_t zero_scan_data = {
  #define REPORT_LOCATION_ARGS(xC)                                            \
      UTF8fARG(UTF,                                                           \
               (xI(xC) > eI) /* Don't run off end */                          \
  #define REPORT_LOCATION_ARGS(xC)                                            \
      UTF8fARG(UTF,                                                           \
               (xI(xC) > eI) /* Don't run off end */                          \
-              ? eC - sC   /* Length before the <--HERE */                   \
+              ? eI - sI   /* Length before the <--HERE */                   \
                : ((xI_offset(xC) >= 0)                                       \
                   ? xI_offset(xC)                                            \
                   : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
                : ((xI_offset(xC) >= 0)                                       \
                   ? xI_offset(xC)                                            \
                   : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
@@ -693,6 +708,10 @@ static const scan_data_t zero_scan_data = {
      STMT_START {                                                           \
          if (RExC_rx_sv)                                                     \
              SAVEFREESV(RExC_rx_sv);                                         \
      STMT_START {                                                           \
          if (RExC_rx_sv)                                                     \
              SAVEFREESV(RExC_rx_sv);                                         \
+        if (RExC_open_parens)                                               \
+            SAVEFREEPV(RExC_open_parens);                                   \
+        if (RExC_close_parens)                                              \
+            SAVEFREEPV(RExC_close_parens);                                  \
      } STMT_END
  
  /*
      } STMT_END
  
  /*
@@ -800,17 +819,22 @@ static const scan_data_t zero_scan_data = {
  #define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
  #define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
  
  #define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
  #define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
  
-/* Outputting warnings is generally deferred until the 2nd pass.  This is
- * because the first pass can be restarted, for example if the pattern has to
- * be converted to UTF-8.  If a warning had already been output earlier in the
- * pass, it would be re-output after the restart.  Pass 2 is never restarted,
- * so the problem simply goes away if we defer the output to that pass.  See
- * [perl #122671]. 'RExC_copy_start_in_constructed' being NULL is a flag to
- * not generate any warnings */
+/* Since a warning can be generated multiple times as the input is reparsed, we
+ * output it the first time we come to that point in the parse, but suppress it
+ * otherwise.  'RExC_copy_start_in_constructed' being NULL is a flag to not
+ * generate any warnings */
  #define TO_OUTPUT_WARNINGS(loc)                                         \
  #define TO_OUTPUT_WARNINGS(loc)                                         \
-  (PASS2 && RExC_copy_start_in_constructed)
+  (   RExC_copy_start_in_constructed                                    \
+   && ((xI(loc)) - RExC_precomp) > (Ptrdiff_t) RExC_latest_warn_offset)
  
  
-#define UPDATE_WARNINGS_LOC(loc)  NOOP
+/* After we've emitted a warning, we save the position in the input so we don't
+ * output it again */
+#define UPDATE_WARNINGS_LOC(loc)                                        \
+    STMT_START {                                                        \
+        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
+            RExC_latest_warn_offset = (xI(loc)) - RExC_precomp;         \
+        }                                                               \
+    } STMT_END
  
  /* 'warns' is the output of the packWARNx macro used in 'code' */
  #define _WARN_HELPER(loc, warns, code)                                  \
  
  /* 'warns' is the output of the packWARNx macro used in 'code' */
  #define _WARN_HELPER(loc, warns, code)                                  \
@@ -941,11 +965,11 @@ static const scan_data_t zero_scan_data = {
  #define Set_Node_Offset_Length(node,offset,len)
  #define ProgLen(ri) ri->u.proglen
  #define SetProgLen(ri,x) ri->u.proglen = x
  #define Set_Node_Offset_Length(node,offset,len)
  #define ProgLen(ri) ri->u.proglen
  #define SetProgLen(ri,x) ri->u.proglen = x
+#define Track_Code(code)
  #else
  #define ProgLen(ri) ri->u.offsets[0]
  #define SetProgLen(ri,x) ri->u.offsets[0] = x
  #define Set_Node_Offset_To_R(offset,byte) STMT_START {                 \
  #else
  #define ProgLen(ri) ri->u.offsets[0]
  #define SetProgLen(ri,x) ri->u.offsets[0] = x
  #define Set_Node_Offset_To_R(offset,byte) STMT_START {                 \
-    if (! SIZE_ONLY) {                                                 \
         MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n",         \
                     __LINE__, (int)(offset), (int)(byte)));             \
         if((offset) < 0) {                                              \
         MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n",         \
                     __LINE__, (int)(offset), (int)(byte)));             \
         if((offset) < 0) {                                              \
@@ -954,7 +978,6 @@ static const scan_data_t zero_scan_data = {
         } else {                                                        \
              RExC_offsets[2*(offset)-1] = (byte);                       \
         }                                                               \
         } else {                                                        \
              RExC_offsets[2*(offset)-1] = (byte);                       \
         }                                                               \
-    }                                                                  \
  } STMT_END
  
  #define Set_Node_Offset(node,byte)                                      \
  } STMT_END
  
  #define Set_Node_Offset(node,byte)                                      \
@@ -962,7 +985,6 @@ static const scan_data_t zero_scan_data = {
  #define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
  
  #define Set_Node_Length_To_R(node,len) STMT_START {                    \
  #define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
  
  #define Set_Node_Length_To_R(node,len) STMT_START {                    \
-    if (! SIZE_ONLY) {                                                 \
         MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n",           \
                 __LINE__, (int)(node), (int)(len)));                    \
         if((node) < 0) {                                                \
         MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n",           \
                 __LINE__, (int)(node), (int)(len)));                    \
         if((node) < 0) {                                                \
@@ -971,7 +993,6 @@ static const scan_data_t zero_scan_data = {
         } else {                                                        \
             RExC_offsets[2*(node)] = (len);                             \
         }                                                               \
         } else {                                                        \
             RExC_offsets[2*(node)] = (len);                             \
         }                                                               \
-    }                                                                  \
  } STMT_END
  
  #define Set_Node_Length(node,len) \
  } STMT_END
  
  #define Set_Node_Length(node,len) \
@@ -987,6 +1008,8 @@ static const scan_data_t zero_scan_data = {
      Set_Node_Offset_To_R(REGNODE_OFFSET(node), (offset));      \
      Set_Node_Length_To_R(REGNODE_OFFSET(node), (len)); \
  } STMT_END
      Set_Node_Offset_To_R(REGNODE_OFFSET(node), (offset));      \
      Set_Node_Length_To_R(REGNODE_OFFSET(node), (len)); \
  } STMT_END
+
+#define Track_Code(code) STMT_START { code } STMT_END
  #endif
  
  #if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
  #endif
  
  #if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
@@ -3583,9 +3606,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  optimisation.
               */
              while( optimize < jumper ) {
                  optimisation.
               */
              while( optimize < jumper ) {
-#ifdef RE_TRACK_PATTERN_OFFSETS
-                mjd_nodelen += Node_Length((optimize));
-#endif
+                Track_Code( mjd_nodelen += Node_Length((optimize)); );
                  OP( optimize ) = OPTIMIZED;
                  Set_Node_Offset_Length(optimize, 0, 0);
                  optimize++;
                  OP( optimize ) = OPTIMIZED;
                  Set_Node_Offset_Length(optimize, 0, 0);
                  optimize++;
@@ -6328,8 +6349,10 @@ S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs)
          return;
      for (n = 0; n < cbs->count; n++) {
          REGEXP *rx = cbs->cb[n].src_regex;
          return;
      for (n = 0; n < cbs->count; n++) {
          REGEXP *rx = cbs->cb[n].src_regex;
-        cbs->cb[n].src_regex = NULL;
-        SvREFCNT_dec(rx);
+        if (rx) {
+            cbs->cb[n].src_regex = NULL;
+            SvREFCNT_dec_NN(rx);
+        }
      }
      Safefree(cbs->cb);
      Safefree(cbs);
      }
      Safefree(cbs->cb);
      Safefree(cbs);
@@ -6373,7 +6396,8 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
      DEBUG_PARSE_r(Perl_re_printf( aTHX_
          "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
  
      DEBUG_PARSE_r(Perl_re_printf( aTHX_
          "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
  
-    Newx(dst, *plen_p * 2 + 1, U8);
+    /* 1 for each byte + 1 for each byte that expands to two, + trailing NUL */
+    Newx(dst, *plen_p + variant_under_utf8_count(src, src + *plen_p) + 1, U8);
      d = dst;
  
      while (s < *plen_p) {
      d = dst;
  
      while (s < *plen_p) {
@@ -7068,7 +7092,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                      bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
  {
      REGEXP *Rx;         /* Capital 'R' means points to a REGEXP */
                      bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
  {
      REGEXP *Rx;         /* Capital 'R' means points to a REGEXP */
-    struct regexp *r;
      STRLEN plen;
      char *exp;
      regnode *scan;
      STRLEN plen;
      char *exp;
      regnode *scan;
@@ -7232,7 +7255,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      /* ignore the utf8ness if the pattern is 0 length */
      RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
  
      /* ignore the utf8ness if the pattern is 0 length */
      RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
  
-    RExC_rx_sv = NULL;                                                     \
      RExC_uni_semantics = 0;
      RExC_seen_unfolded_sharp_s = 0;
      RExC_contains_locale = 0;
      RExC_uni_semantics = 0;
      RExC_seen_unfolded_sharp_s = 0;
      RExC_contains_locale = 0;
@@ -7243,7 +7265,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_frame_head= NULL;
      RExC_frame_last= NULL;
      RExC_frame_count= 0;
      RExC_frame_head= NULL;
      RExC_frame_last= NULL;
      RExC_frame_count= 0;
+    RExC_latest_warn_offset = 0;
+    RExC_use_BRANCHJ = 0;
      RExC_total_parens = 0;
      RExC_total_parens = 0;
+    RExC_open_parens = NULL;
+    RExC_close_parens = NULL;
+    RExC_paren_names = NULL;
+    RExC_size = 0;
+#ifdef DEBUGGING
+    RExC_paren_name_list = NULL;
+#endif
  
      DEBUG_r({
          RExC_mysv1= sv_newmortal();
  
      DEBUG_r({
          RExC_mysv1= sv_newmortal();
@@ -7287,6 +7318,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          return old_re;
      }
  
          return old_re;
      }
  
+    /* Allocate the pattern's SV */
+    RExC_rx_sv = Rx = (REGEXP*) newSV_type(SVt_REGEXP);
+    RExC_rx = ReANY(Rx);
+    if ( RExC_rx == NULL )
+        FAIL("Regexp out of space");
+
      rx_flags = orig_rx_flags;
  
      if (   initial_charset == REGEX_DEPENDS_CHARSET
      rx_flags = orig_rx_flags;
  
      if (   initial_charset == REGEX_DEPENDS_CHARSET
@@ -7298,8 +7335,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
      }
  
         set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
      }
  
-    RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
-    RExC_flags = rx_flags;
      RExC_pm_flags = pm_flags;
  
      if (runtime_code) {
      RExC_pm_flags = pm_flags;
  
      if (runtime_code) {
@@ -7323,43 +7358,82 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_maxlen = 0;
      RExC_in_lookbehind = 0;
      RExC_seen_zerolen = *exp == '^' ? -1 : 0;
      RExC_maxlen = 0;
      RExC_in_lookbehind = 0;
      RExC_seen_zerolen = *exp == '^' ? -1 : 0;
-    RExC_extralen = 0;
  #ifdef EBCDIC
      RExC_recode_x_to_native = 0;
  #endif
      RExC_in_multi_char_class = 0;
  
  #ifdef EBCDIC
      RExC_recode_x_to_native = 0;
  #endif
      RExC_in_multi_char_class = 0;
  
-    /* First pass: determine size, legality. */
-    RExC_pass1 = TRUE;
-    RExC_parse = exp;
-    RExC_start = RExC_copy_start_in_constructed = exp;
-    RExC_end = exp + plen;
-    RExC_precomp_end = RExC_end;
-    RExC_naughty = 0;
-    RExC_npar = 1;
+    RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
+    RExC_precomp_end = RExC_end = exp + plen;
      RExC_nestroot = 0;
      RExC_nestroot = 0;
-    RExC_size = 0L;
-    RExC_emit = 1;
      RExC_whilem_seen = 0;
      RExC_whilem_seen = 0;
-    RExC_open_parens = 0;
-    RExC_close_parens = 0;
      RExC_end_op = NULL;
      RExC_end_op = NULL;
-    RExC_paren_names = NULL;
-#ifdef DEBUGGING
-    RExC_paren_name_list = NULL;
-#endif
      RExC_recurse = NULL;
      RExC_study_chunk_recursed = NULL;
      RExC_study_chunk_recursed_bytes= 0;
      RExC_recurse_count = 0;
      pRExC_state->code_index = 0;
  
      RExC_recurse = NULL;
      RExC_study_chunk_recursed = NULL;
      RExC_study_chunk_recursed_bytes= 0;
      RExC_recurse_count = 0;
      pRExC_state->code_index = 0;
  
-    /* We allocate scratch space as large as the largest node, for use in the
-     * first pass.  Since many functions return RExC_emit on success, and '0'
-     * if an error, RExC_emit must never be 0, so we set it to 1 and double
-     * the scratch space */
-    Newxc(RExC_emit_start, 2 * sizeof(regnode_ssc), char, regnode);
-    SAVEFREEPV(RExC_emit_start);
+    /* Initialize the string in the compiled pattern.  This is so that there is
+     * something to output if necessary */
+    set_regex_pv(pRExC_state, Rx);
+
+    DEBUG_PARSE_r({
+        Perl_re_printf( aTHX_
+            "Starting parse and generation\n");
+        RExC_lastnum=0;
+        RExC_lastparse=NULL;
+    });
+
+    /* Allocate space and zero-initialize. Note, the two step process
+       of zeroing when in debug mode, thus anything assigned has to
+       happen after that */
+    if (!  RExC_size) {
+
+        /* On the first pass of the parse, we guess how big this will be.  Then
+         * we grow in one operation to that amount and then give it back.  As
+         * we go along, we re-allocate what we need.
+         *
+         * XXX Currently the guess is essentially that the pattern will be an
+         * EXACT node with one byte input, one byte output.  This is crude, and
+         * better heuristics are welcome.
+         *
+         * On any subsequent passes, we guess what we actually computed in the
+         * latest earlier pass.  Such a pass probably didn't complete so is
+         * missing stuff.  We could improve those guesses by knowing where the
+         * parse stopped, and use the length so far plus apply the above
+         * assumption to what's left. */
+        RExC_size = STR_SZ(RExC_end - RExC_start);
+    }
+
+    Newxc(RExC_rxi, sizeof(regexp_internal) + RExC_size, char, regexp_internal);
+    if ( RExC_rxi == NULL )
+        FAIL("Regexp out of space");
+
+    Zero(RExC_rxi, sizeof(regexp_internal) + RExC_size, char);
+    RXi_SET( RExC_rx, RExC_rxi );
+
+    /* We start from 0 (over from 0 in the case this is a reparse.  The first
+     * node parsed will give back any excess memory we have allocated so far).
+     * */
+    RExC_size = 0;
+
+    /* non-zero initialization begins here */
+    RExC_rx->engine= eng;
+    RExC_rx->extflags = rx_flags;
+    RXp_COMPFLAGS(RExC_rx) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK;
+
+    if (pm_flags & PMf_IS_QR) {
+       RExC_rxi->code_blocks = pRExC_state->code_blocks;
+        if (RExC_rxi->code_blocks) {
+            RExC_rxi->code_blocks->refcnt++;
+        }
+    }
+
+    RExC_rx->intflags = 0;
+
+    RExC_flags = rx_flags;     /* don't let top level (?i) bleed */
+    RExC_parse = exp;
  
      /* This NUL is guaranteed because the pattern comes from an SV*, and the sv
       * code makes sure the final byte is an uncounted NUL.  But should this
  
      /* This NUL is guaranteed because the pattern comes from an SV*, and the sv
       * code makes sure the final byte is an uncounted NUL.  But should this
@@ -7370,13 +7444,34 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
       * etc.  So it is worth noting. */
      assert(*RExC_end == '\0');
  
       * etc.  So it is worth noting. */
      assert(*RExC_end == '\0');
  
-    DEBUG_PARSE_r(
-        Perl_re_printf( aTHX_  "Starting first pass (sizing)\n");
-        RExC_lastnum=0;
-        RExC_lastparse=NULL;
-    );
+    RExC_naughty = 0;
+    RExC_npar = 1;
+    RExC_emit_start = RExC_rxi->program;
+    pRExC_state->code_index = 0;
+
+    *((char*) RExC_emit_start) = (char) REG_MAGIC;
+    RExC_emit = 1;
+
+    /* Do the parse */
+    if (reg(pRExC_state, 0, &flags, 1)) {
+
+        /* Success!, But if RExC_total_parens < 0, we need to redo the parse
+         * knowing how many parens there actually are */
+        if (RExC_total_parens < 0) {
+            flags |= RESTART_PARSE;
+        }
+
+        /* We have that number in RExC_npar */
+        RExC_total_parens = RExC_npar;
+    }
+    else if (! MUST_RESTART(flags)) {
+       ReREFCNT_dec(Rx);
+        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile, flags=%#" UVxf, (UV) flags);
+    }
+
+    /* Here, we either have success, or we have to redo the parse for some reason */
+    if (MUST_RESTART(flags)) {
  
  
-    if (reg(pRExC_state, 0, &flags, 1) == 0) {
          /* It's possible to write a regexp in ascii that represents Unicode
          codepoints outside of the byte range, such as via \x{100}. If we
          detect such a sequence we have to convert the entire pattern to utf8
          /* It's possible to write a regexp in ascii that represents Unicode
          codepoints outside of the byte range, such as via \x{100}. If we
          detect such a sequence we have to convert the entire pattern to utf8
@@ -7385,142 +7480,91 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          at least some part of the pattern, and therefore must convert the whole
          thing.
          -- dmq */
          at least some part of the pattern, and therefore must convert the whole
          thing.
          -- dmq */
-        if (MUST_RESTART(flags)) {
-            if (flags & NEED_UTF8) {
-                S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
-                pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
-                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse after upgrade\n"));
-            }
-            else {
-                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
+        if (flags & NEED_UTF8) {
+
+            /* We have stored the offset of the final warning output so far.
+             * That must be adjusted.  Any variant characters between the start
+             * of the pattern and this warning count for 2 bytes in the final,
+             * so just add them again */
+            if (UNLIKELY(RExC_latest_warn_offset > 0)) {
+                RExC_latest_warn_offset +=
+                            variant_under_utf8_count((U8 *) exp, (U8 *) exp
+                                                + RExC_latest_warn_offset);
              }
              }
+            S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
+            pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
+            DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse after upgrade\n"));
+        }
+        else {
+            DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
+        }
  
  
-            goto redo_parse;
+        if (RExC_total_parens > 0) {
+            /* Make enough room for all the known parens, and zero it */
+            Renew(RExC_open_parens, RExC_total_parens, regnode_offset);
+            Zero(RExC_open_parens, RExC_total_parens, regnode_offset);
+            RExC_open_parens[0] = 1;    /* +1 for REG_MAGIC */
+
+            Renew(RExC_close_parens, RExC_total_parens, regnode_offset);
+            Zero(RExC_close_parens, RExC_total_parens, regnode_offset);
+        }
+        else { /* Parse did not complete.  Reinitialize the parentheses
+                  structures */
+            RExC_total_parens = 0;
+            if (RExC_open_parens) {
+                Safefree(RExC_open_parens);
+                RExC_open_parens = NULL;
+            }
+            if (RExC_close_parens) {
+                Safefree(RExC_close_parens);
+                RExC_close_parens = NULL;
+            }
          }
          }
-        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags);
-    }
  
  
-    DEBUG_PARSE_r({
-        Perl_re_printf( aTHX_
-            "Required size %" IVdf " nodes\n"
-            "Starting second pass (creation)\n",
-            (IV)RExC_size);
-        RExC_lastnum=0;
-        RExC_lastparse=NULL;
-    });
+        /* Clean up what we did in this parse */
+        SvREFCNT_dec_NN(RExC_rx_sv);
  
  
-    /* The first pass could have found things that force Unicode semantics */
-    if ((RExC_utf8 || RExC_uni_semantics)
-        && get_regex_charset(rx_flags) == REGEX_DEPENDS_CHARSET)
-    {
-       set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
+        goto redo_parse;
      }
  
      }
  
-    /* Small enough for pointer-storage convention?
-       If extralen==0, this means that we will not need long jumps. */
-    if (RExC_size >= 0x10000L && RExC_extralen)
-        RExC_size += RExC_extralen;
-    else
-       RExC_extralen = 0;
-    if (RExC_whilem_seen > 15)
-       RExC_whilem_seen = 15;
+    /* Here, we have successfully parsed and generated the pattern's program
+     * for the regex engine.  We are ready to finish things up and look for
+     * optimizations. */
  
  
-    /* Allocate space and zero-initialize. Note, the two step process
-       of zeroing when in debug mode, thus anything assigned has to
-       happen after that */
-    RExC_rx_sv = Rx = (REGEXP*) newSV_type(SVt_REGEXP);
-    RExC_rx = ReANY(Rx);
-    Newxc(RExC_rxi, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
-        char, regexp_internal);
-    if ( RExC_rx == NULL || RExC_rxi == NULL )
-       FAIL("Regexp out of space");
-#ifdef DEBUGGING
-    /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
-    Zero(RExC_rxi, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
-         char);
-#else
-    /* bulk initialize base fields with 0. */
-    Zero(RExC_rxi, sizeof(regexp_internal), char);
-#endif
-
-    /* non-zero initialization begins here */
-    RXi_SET( RExC_rx, RExC_rxi );
-    RExC_rx->engine= eng;
-    RExC_rx->extflags = rx_flags;
-    RXp_COMPFLAGS(RExC_rx) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK;
+    SetProgLen(RExC_rxi,RExC_size);
  
  
-    if (pm_flags & PMf_IS_QR) {
-       RExC_rxi->code_blocks = pRExC_state->code_blocks;
-       if (RExC_rxi->code_blocks)
-            RExC_rxi->code_blocks->refcnt++;
+    /* The values for the two variables below are now immutable, we can add
+     * them to the list to free without overwhelming it */
+    if (RExC_open_parens) {
+        SAVEFREEPV(RExC_open_parens);
+    }
+    if (RExC_close_parens) {
+        SAVEFREEPV(RExC_close_parens);
      }
  
      }
  
-    /* Set up the string to compile, with correct modifiers, etc */
+    /* Update the string to compile, with correct modifiers, etc */
      set_regex_pv(pRExC_state, Rx);
  
      set_regex_pv(pRExC_state, Rx);
  
-    RExC_rx->intflags = 0;
-    RExC_total_parens = RExC_npar;
-    RExC_rx->nparens = RExC_total_parens - 1;  /* set early to validate backrefs */
+    RExC_rx->nparens = RExC_total_parens - 1;
+
+    /* Uses the upper 4 bits of the FLAGS field, so keep within that size */
+    if (RExC_whilem_seen > 15)
+        RExC_whilem_seen = 15;
+
+    DEBUG_PARSE_r({
+        Perl_re_printf( aTHX_
+            "Required size %" IVdf " nodes\n", (IV)RExC_size);
+        RExC_lastnum=0;
+        RExC_lastparse=NULL;
+    });
  
  
-    /* Useful during FAIL. */
  #ifdef RE_TRACK_PATTERN_OFFSETS
  #ifdef RE_TRACK_PATTERN_OFFSETS
-    Newxz(RExC_offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
      DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
                            "%s %" UVuf " bytes for offset annotations.\n",
                            RExC_offsets ? "Got" : "Couldn't get",
                            (UV)((2*RExC_size+1) * sizeof(U32))));
  #endif
      DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
                            "%s %" UVuf " bytes for offset annotations.\n",
                            RExC_offsets ? "Got" : "Couldn't get",
                            (UV)((2*RExC_size+1) * sizeof(U32))));
  #endif
-    SetProgLen(RExC_rxi, RExC_size);
-    RExC_rx_sv = Rx;
-
-    /* Second pass: emit code. */
-    RExC_pass1 = FALSE;
-    RExC_flags = rx_flags;     /* don't let top level (?i) bleed */
-    RExC_pm_flags = pm_flags;
-    RExC_parse = exp;
-    RExC_end = exp + plen;
-    RExC_naughty = 0;
-    RExC_emit_start = RExC_rxi->program;
-    RExC_emit = 1;
-    RExC_emit_bound = RExC_rxi->program + RExC_size + 1;
-    pRExC_state->code_index = 0;
-
-    *((char*) RExC_emit_start) = (char) REG_MAGIC;
-    /* setup various meta data about recursion, this all requires
-     * RExC_npar to be correctly set, and a bit later on we clear it */
-    if (RExC_seen & REG_RECURSE_SEEN) {
-        DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-            "%*s%*s Setting up open/close parens\n",
-                  22, "|    |", (int)(0 * 2 + 1), ""));
-
-        /* setup RExC_open_parens, which holds the address of each
-         * OPEN tag, and to make things simpler for the 0 index
-         * the start of the program - this is used later for offsets */
-        Newxz(RExC_open_parens, RExC_npar, regnode_offset);
-        SAVEFREEPV(RExC_open_parens);
-        RExC_open_parens[0] = RExC_emit;
  
  
-        /* setup RExC_close_parens, which holds the address of each
-         * CLOSE tag, and to make things simpler for the 0 index
-         * the end of the program - this is used later for offsets */
-        Newxz(RExC_close_parens, RExC_npar, regnode_offset);
-        SAVEFREEPV(RExC_close_parens);
-        /* we dont know where end op starts yet, so we dont
-         * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */
-
-        /* Note, RExC_npar is 1 + the number of parens in a pattern.
-         * So its 1 if there are no parens. */
-        RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
-                                         ((RExC_npar & 0x07) != 0);
-        Newx(RExC_study_chunk_recursed,
-             RExC_study_chunk_recursed_bytes * RExC_npar, U8);
-        SAVEFREEPV(RExC_study_chunk_recursed);
-    }
-    RExC_npar = 1;
-    if (reg(pRExC_state, 0, &flags, 1) == 0) {
-       ReREFCNT_dec(Rx);
-        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags);
-    }
      DEBUG_OPTIMISE_r(
          Perl_re_printf( aTHX_  "Starting post parse optimization\n");
      );
      DEBUG_OPTIMISE_r(
          Perl_re_printf( aTHX_  "Starting post parse optimization\n");
      );
@@ -7533,6 +7577,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          SAVEFREEPV(RExC_recurse);
      }
  
          SAVEFREEPV(RExC_recurse);
      }
  
+    if (RExC_seen & REG_RECURSE_SEEN) {
+        /* Note, RExC_total_parens is 1 + the number of parens in a pattern.
+         * So its 1 if there are no parens. */
+        RExC_study_chunk_recursed_bytes= (RExC_total_parens >> 3) +
+                                         ((RExC_total_parens & 0x07) != 0);
+        Newx(RExC_study_chunk_recursed,
+             RExC_study_chunk_recursed_bytes * RExC_total_parens, U8);
+        SAVEFREEPV(RExC_study_chunk_recursed);
+    }
+
    reStudy:
      RExC_rx->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0;
      DEBUG_r(
    reStudy:
      RExC_rx->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0;
      DEBUG_r(
@@ -8522,31 +8576,38 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
                           character */
          vFAIL("Group name must start with a non-digit word character");
      }
                           character */
          vFAIL("Group name must start with a non-digit word character");
      }
-    if ( flags ) {
      sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
                              SVs_TEMP | (UTF ? SVf_UTF8 : 0));
      sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
                              SVs_TEMP | (UTF ? SVf_UTF8 : 0));
-        if ( flags == REG_RSN_RETURN_NAME)
-            return sv_name;
-        else if (flags==REG_RSN_RETURN_DATA) {
-            HE *he_str = NULL;
-            SV *sv_dat = NULL;
-            if ( ! sv_name )      /* should not happen*/
-                Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
-            if (RExC_paren_names)
-                he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
-            if ( he_str )
-                sv_dat = HeVAL(he_str);
-            if ( ! sv_dat )     /* Didn't find group */
+    if ( flags == REG_RSN_RETURN_NAME)
+        return sv_name;
+    else if (flags==REG_RSN_RETURN_DATA) {
+        HE *he_str = NULL;
+        SV *sv_dat = NULL;
+        if ( ! sv_name )      /* should not happen*/
+            Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
+        if (RExC_paren_names)
+            he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
+        if ( he_str )
+            sv_dat = HeVAL(he_str);
+        if ( ! sv_dat ) {   /* Didn't find group */
+
+            /* It might be a forward reference; we can't fail until we
+                * know, by completing the parse to get all the groups, and
+                * then reparsing */
+            if (RExC_total_parens > 0)  {
                  vFAIL("Reference to nonexistent named group");
                  vFAIL("Reference to nonexistent named group");
-            return sv_dat;
-        }
-        else {
-            Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
-                      (unsigned long) flags);
+            }
+            else {
+                REQUIRE_PARENS_PASS;
+            }
          }
          }
-        NOT_REACHED; /* NOTREACHED */
+        return sv_dat;
      }
      }
-    return NULL;
+    else {
+        Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
+                   (unsigned long) flags);
+    }
+    NOT_REACHED; /* NOTREACHED */
  }
  
  #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
  }
  
  #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
@@ -8566,10 +8627,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
      } else                                                      \
          Perl_re_printf( aTHX_ "%16s","");                       \
                                                                  \
      } else                                                      \
          Perl_re_printf( aTHX_ "%16s","");                       \
                                                                  \
-    if (SIZE_ONLY)                                              \
-       num = RExC_size + 1;                                     \
-    else                                                        \
-       num=REG_NODE_NUM(REGNODE_p(RExC_emit));                  \
+    num=REG_NODE_NUM(REGNODE_p(RExC_emit));                     \
      if (RExC_lastnum!=num)                                      \
         Perl_re_printf( aTHX_ "|%4d", num);                      \
      else                                                        \
      if (RExC_lastnum!=num)                                      \
         Perl_re_printf( aTHX_ "|%4d", num);                      \
      else                                                        \
@@ -10614,7 +10672,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  NOT_REACHED; /*NOTREACHED*/
              case ONCE_PAT_MOD: /* 'o' */
              case GLOBAL_PAT_MOD: /* 'g' */
                  NOT_REACHED; /*NOTREACHED*/
              case ONCE_PAT_MOD: /* 'o' */
              case GLOBAL_PAT_MOD: /* 'g' */
-                if (PASS2 && ckWARN(WARN_REGEXP)) {
+                if (ckWARN(WARN_REGEXP)) {
                      const I32 wflagbit = *RExC_parse == 'o'
                                           ? WASTED_O
                                           : WASTED_G;
                      const I32 wflagbit = *RExC_parse == 'o'
                                           ? WASTED_O
                                           : WASTED_G;
@@ -10634,7 +10692,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  break;
  
              case CONTINUE_PAT_MOD: /* 'c' */
                  break;
  
              case CONTINUE_PAT_MOD: /* 'c' */
-                if (PASS2 && ckWARN(WARN_REGEXP)) {
+                if (ckWARN(WARN_REGEXP)) {
                      if (! (wastedflags & WASTED_C) ) {
                          wastedflags |= WASTED_GC;
                         /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
                      if (! (wastedflags & WASTED_C) ) {
                          wastedflags |= WASTED_GC;
                         /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
@@ -10721,9 +10779,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
      regnode_offset ret;
      char* name_start = RExC_parse;
      U32 num = 0;
      regnode_offset ret;
      char* name_start = RExC_parse;
      U32 num = 0;
-    SV *sv_dat = reg_scan_name(pRExC_state, SIZE_ONLY
-                                            ? REG_RSN_RETURN_NULL
-                                            : REG_RSN_RETURN_DATA);
+    SV *sv_dat = reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF;
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF;
@@ -11109,20 +11165,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  ret = reg2Lanode(pRExC_state, op, 0, internal_argval);
              }
              RExC_seen |= REG_VERBARG_SEEN;
                  ret = reg2Lanode(pRExC_state, op, 0, internal_argval);
              }
              RExC_seen |= REG_VERBARG_SEEN;
-            if ( ! SIZE_ONLY ) {
-                if (start_arg) {
-                    SV *sv = newSVpvn( start_arg,
-                                       RExC_parse - start_arg);
-                    ARG(REGNODE_p(ret)) = add_data( pRExC_state,
-                                         STR_WITH_LEN("S"));
-                    RExC_rxi->data->data[ARG(REGNODE_p(ret))]=(void*)sv;
-                    FLAGS(REGNODE_p(ret)) = 1;
-                } else {
-                    FLAGS(REGNODE_p(ret)) = 0;
-                }
-                if ( internal_argval != -1 )
-                    ARG2L_SET(REGNODE_p(ret), internal_argval);
+            if (start_arg) {
+                SV *sv = newSVpvn( start_arg,
+                                    RExC_parse - start_arg);
+                ARG(REGNODE_p(ret)) = add_data( pRExC_state,
+                                        STR_WITH_LEN("S"));
+                RExC_rxi->data->data[ARG(REGNODE_p(ret))]=(void*)sv;
+                FLAGS(REGNODE_p(ret)) = 1;
+            } else {
+                FLAGS(REGNODE_p(ret)) = 0;
              }
              }
+            if ( internal_argval != -1 )
+                ARG2L_SET(REGNODE_p(ret), internal_argval);
             nextchar(pRExC_state);
             return ret;
          }
             nextchar(pRExC_state);
             return ret;
          }
@@ -11183,10 +11237,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  /* FALLTHROUGH */
              case '\'':          /* (?'...') */
                      name_start = RExC_parse;
                  /* FALLTHROUGH */
              case '\'':          /* (?'...') */
                      name_start = RExC_parse;
-                    svname = reg_scan_name(pRExC_state,
-                        SIZE_ONLY    /* reverse test from the others */
-                        ? REG_RSN_RETURN_NAME
-                        : REG_RSN_RETURN_NULL);
+                    svname = reg_scan_name(pRExC_state, REG_RSN_RETURN_NAME);
                     if (   RExC_parse == name_start
                          || RExC_parse >= RExC_end
                          || *RExC_parse != paren)
                     if (   RExC_parse == name_start
                          || RExC_parse >= RExC_end
                          || *RExC_parse != paren)
@@ -11194,7 +11245,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         vFAIL2("Sequence (?%c... not terminated",
                             paren=='>' ? '<' : paren);
                      }
                         vFAIL2("Sequence (?%c... not terminated",
                             paren=='>' ? '<' : paren);
                      }
-                   if (SIZE_ONLY) {
+                   {
                         HE *he_str;
                         SV *sv_dat = NULL;
                          if (!svname) /* shouldn't happen */
                         HE *he_str;
                         SV *sv_dat = NULL;
                          if (!svname) /* shouldn't happen */
@@ -11286,6 +11337,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                    buffers in alternations share the same numbers */
                 paren = ':';
                 after_freeze = freeze_paren = RExC_npar;
                    buffers in alternations share the same numbers */
                 paren = ':';
                 after_freeze = freeze_paren = RExC_npar;
+
+                /* XXX This construct currently requires an extra pass.
+                 * Investigation would be required to see if that could be
+                 * changed */
+                REQUIRE_PARENS_PASS;
                 break;
             case ':':           /* (?:...) */
             case '>':           /* (?>...) */
                 break;
             case ':':           /* (?:...) */
             case '>':           /* (?>...) */
@@ -11300,6 +11356,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     FAIL("Sequence (?R) not terminated");
                  num = 0;
                  RExC_seen |= REG_RECURSE_SEEN;
                     FAIL("Sequence (?R) not terminated");
                  num = 0;
                  RExC_seen |= REG_RECURSE_SEEN;
+
+                /* XXX These constructs currently require an extra pass.
+                 * It probably could be changed */
+                REQUIRE_PARENS_PASS;
+
                 *flagp |= POSTPONED;
                  goto gen_recurse_regop;
                 /*notreached*/
                 *flagp |= POSTPONED;
                  goto gen_recurse_regop;
                 /*notreached*/
@@ -11308,9 +11369,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  parse_start = RExC_parse - 1;
                named_recursion:
                  {
                  parse_start = RExC_parse - 1;
                named_recursion:
                  {
-                   SV *sv_dat = reg_scan_name(pRExC_state,
-                       SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
-                    num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
+                    SV *sv_dat = reg_scan_name(pRExC_state,
+                                               REG_RSN_RETURN_DATA);
+                   num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                  }
                  if (RExC_parse >= RExC_end || *RExC_parse != ')')
                      vFAIL("Sequence (?&... not terminated");
                  }
                  if (RExC_parse >= RExC_end || *RExC_parse != ')')
                      vFAIL("Sequence (?&... not terminated");
@@ -11372,8 +11433,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      */
                      num = RExC_npar + num;
                      if (num < 1)  {
                      */
                      num = RExC_npar + num;
                      if (num < 1)  {
-                        RExC_parse++;
-                        vFAIL("Reference to nonexistent group");
+
+                        /* It might be a forward reference; we can't fail until
+                         * we know, by completing the parse to get all the
+                         * groups, and then reparsing */
+                        if (RExC_total_parens > 0)  {
+                            RExC_parse++;
+                            vFAIL("Reference to nonexistent group");
+                        }
+                        else {
+                            REQUIRE_PARENS_PASS;
+                        }
                      }
                  } else if ( paren == '+' ) {
                      num = RExC_npar + num - 1;
                      }
                  } else if ( paren == '+' ) {
                      num = RExC_npar + num - 1;
@@ -11388,18 +11458,27 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                   */
  
                  ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                   */
  
                  ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
-                if (!SIZE_ONLY) {
-                   if (num > (I32)RExC_rx->nparens) {
-                       RExC_parse++;
-                       vFAIL("Reference to nonexistent group");
-                   }
-                   RExC_recurse_count++;
-                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
-                              22, "|    |", (int)(depth * 2 + 1), "",
-                              (UV)ARG(REGNODE_p(ret)),
-                              (IV)ARG2L(REGNODE_p(ret))));
+                if (num >= RExC_npar) {
+
+                    /* It might be a forward reference; we can't fail until we
+                     * know, by completing the parse to get all the groups, and
+                     * then reparsing */
+                    if (RExC_total_parens > 0)  {
+                        if (num >= RExC_total_parens) {
+                            RExC_parse++;
+                            vFAIL("Reference to nonexistent group");
+                        }
+                    }
+                    else {
+                        REQUIRE_PARENS_PASS;
+                    }
                  }
                  }
+                RExC_recurse_count++;
+                DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                    "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
+                            22, "|    |", (int)(depth * 2 + 1), "",
+                            (UV)ARG(REGNODE_p(ret)),
+                            (IV)ARG2L(REGNODE_p(ret))));
                  RExC_seen |= REG_RECURSE_SEEN;
  
                  Set_Node_Length(REGNODE_p(ret),
                  RExC_seen |= REG_RECURSE_SEEN;
  
                  Set_Node_Length(REGNODE_p(ret),
@@ -11449,20 +11528,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 /* this is a pre-compiled code block (?{...}) */
                 cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
                 RExC_parse = RExC_start + cb->end;
                 /* this is a pre-compiled code block (?{...}) */
                 cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
                 RExC_parse = RExC_start + cb->end;
-               if (!SIZE_ONLY) {
                 o = cb->block;
                 o = cb->block;
-                   if (cb->src_regex) {
-                       n = add_data(pRExC_state, STR_WITH_LEN("rl"));
-                       RExC_rxi->data->data[n] =
-                           (void*)SvREFCNT_inc((SV*)cb->src_regex);
-                       RExC_rxi->data->data[n+1] = (void*)o;
-                   }
-                   else {
-                       n = add_data(pRExC_state,
-                              (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1);
-                       RExC_rxi->data->data[n] = (void*)o;
-                   }
-               }
+                if (cb->src_regex) {
+                    n = add_data(pRExC_state, STR_WITH_LEN("rl"));
+                    RExC_rxi->data->data[n] =
+                        (void*)SvREFCNT_inc((SV*)cb->src_regex);
+                    RExC_rxi->data->data[n+1] = (void*)o;
+                }
+                else {
+                    n = add_data(pRExC_state,
+                            (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1);
+                    RExC_rxi->data->data[n] = (void*)o;
+                }
                 pRExC_state->code_index++;
                 nextchar(pRExC_state);
  
                 pRExC_state->code_index++;
                 nextchar(pRExC_state);
  
@@ -11477,9 +11554,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                          * return value */
                                         RExC_flags & RXf_PMf_COMPILETIME
                                        );
                                          * return value */
                                         RExC_flags & RXf_PMf_COMPILETIME
                                        );
-                   if (!SIZE_ONLY) {
-                       FLAGS(REGNODE_p(ret)) = 2;
-                    }
+                    FLAGS(REGNODE_p(ret)) = 2;
                      REGTAIL(pRExC_state, ret, eval);
                      /* deal with the length of this later - MJD */
                     return ret;
                      REGTAIL(pRExC_state, ret, eval);
                      /* deal with the length of this later - MJD */
                     return ret;
@@ -11529,8 +11604,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      regnode_offset tail;
  
                      ret = reg_node(pRExC_state, LOGICAL);
                      regnode_offset tail;
  
                      ret = reg_node(pRExC_state, LOGICAL);
-                    if (!SIZE_ONLY)
-                        FLAGS(REGNODE_p(ret)) = 1;
+                    FLAGS(REGNODE_p(ret)) = 1;
  
                      tail = reg(pRExC_state, 1, &flag, depth+1);
                      RETURN_FAIL_ON_RESTART(flag, flagp);
  
                      tail = reg(pRExC_state, 1, &flag, depth+1);
                      RETURN_FAIL_ON_RESTART(flag, flagp);
@@ -11543,8 +11617,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     char ch = RExC_parse[0] == '<' ? '>' : '\'';
                     char *name_start= RExC_parse++;
                     U32 num = 0;
                     char ch = RExC_parse[0] == '<' ? '>' : '\'';
                     char *name_start= RExC_parse++;
                     U32 num = 0;
-                   SV *sv_dat=reg_scan_name(pRExC_state,
-                       SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+                   SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
                     if (   RExC_parse == name_start
                          || RExC_parse >= RExC_end
                          || *RExC_parse != ch)
                     if (   RExC_parse == name_start
                          || RExC_parse >= RExC_end
                          || *RExC_parse != ch)
@@ -11595,20 +11668,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         SV *sv_dat;
                         RExC_parse++;
                         sv_dat = reg_scan_name(pRExC_state,
                         SV *sv_dat;
                         RExC_parse++;
                         sv_dat = reg_scan_name(pRExC_state,
-                            SIZE_ONLY
-                            ? REG_RSN_RETURN_NULL
-                            : REG_RSN_RETURN_DATA);
-
-                        /* we should only have a false sv_dat when
-                         * SIZE_ONLY is true, and we always have false
-                         * sv_dat when SIZE_ONLY is true.
-                         * reg_scan_name() will VFAIL() if the name is
-                         * unknown when SIZE_ONLY is false, and otherwise
-                         * will return something, and when SIZE_ONLY is
-                         * true, reg_scan_name() just parses the string,
-                         * and doesnt return anything. (in theory) */
-                        assert(SIZE_ONLY ? !sv_dat : !!sv_dat);
-
+                                               REG_RSN_RETURN_DATA);
                          if (sv_dat)
                              parno = 1 + *((I32 *)SvPVX(sv_dat));
                     }
                          if (sv_dat)
                              parno = 1 + *((I32 *)SvPVX(sv_dat));
                     }
@@ -11688,9 +11748,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     }
                     else
                          REGTAIL(pRExC_state, ret, ender);
                     }
                     else
                          REGTAIL(pRExC_state, ret, ender);
+#if 0  /* Removing this doesn't cause failures in the test suite -- khw */
                      RExC_size++; /* XXX WHY do we need this?!!
                                      For large programs it seems to be required
                                      but I can't figure out why. -- dmq*/
                      RExC_size++; /* XXX WHY do we need this?!!
                                      For large programs it seems to be required
                                      but I can't figure out why. -- dmq*/
+#endif
                     return ret;
                 }
                  RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                     return ret;
                 }
                  RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
@@ -11732,20 +11794,48 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
           capturing_parens:
             parno = RExC_npar;
             RExC_npar++;
           capturing_parens:
             parno = RExC_npar;
             RExC_npar++;
+            if (RExC_total_parens <= 0) {
+                /* If we are in our first pass through (and maybe only pass),
+                 * we  need to allocate memory for the capturing parentheses
+                 * data structures.  Since we start at npar=1, when it reaches
+                 * 2, for the first time it has something to put in it.  Above
+                 * 2 means we extend what we already have */
+                if (RExC_npar == 2) {
+                    /* setup RExC_open_parens, which holds the address of each
+                     * OPEN tag, and to make things simpler for the 0 index the
+                     * start of the program - this is used later for offsets */
+                    Newxz(RExC_open_parens, RExC_npar, regnode_offset);
+                    RExC_open_parens[0] = 1;    /* +1 for REG_MAGIC */
+
+                    /* setup RExC_close_parens, which holds the address of each
+                     * CLOSE tag, and to make things simpler for the 0 index
+                     * the end of the program - this is used later for offsets
+                     * */
+                    Newxz(RExC_close_parens, RExC_npar, regnode_offset);
+                    /* we dont know where end op starts yet, so we dont need to
+                     * set RExC_close_parens[0] like we do RExC_open_parens[0]
+                     * above */
+                }
+                else {
+                    Renew(RExC_open_parens, RExC_npar, regnode_offset);
+                    Zero(RExC_open_parens + RExC_npar - 1, 1, regnode_offset);
+
+                    Renew(RExC_close_parens, RExC_npar, regnode_offset);
+                    Zero(RExC_close_parens + RExC_npar - 1, 1, regnode_offset);
+                }
+            }
  
             ret = reganode(pRExC_state, OPEN, parno);
  
             ret = reganode(pRExC_state, OPEN, parno);
-           if (!SIZE_ONLY ){
-               if (!RExC_nestroot)
-                   RExC_nestroot = parno;
-                if (RExC_open_parens && !RExC_open_parens[parno])
-               {
-                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting open paren #%" IVdf " to %d\n",
-                        22, "|    |", (int)(depth * 2 + 1), "",
-                       (IV)parno, REG_NODE_NUM(REGNODE_p(ret))));
-                    RExC_open_parens[parno]= ret;
-               }
-           }
+            if (!RExC_nestroot)
+                RExC_nestroot = parno;
+            if (RExC_open_parens && !RExC_open_parens[parno])
+            {
+                DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                    "%*s%*s Setting open paren #%" IVdf " to %d\n",
+                    22, "|    |", (int)(depth * 2 + 1), "",
+                    (IV)parno, REG_NODE_NUM(REGNODE_p(ret))));
+                RExC_open_parens[parno]= ret;
+            }
  
              Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
              Set_Node_Offset(REGNODE_p(ret), RExC_parse); /* MJD */
  
              Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
              Set_Node_Offset(REGNODE_p(ret), RExC_parse); /* MJD */
@@ -11781,8 +11871,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              Set_Node_Offset_To_R(br, parse_start-RExC_start);
          }
         have_branch = 1;
              Set_Node_Offset_To_R(br, parse_start-RExC_start);
          }
         have_branch = 1;
-       if (SIZE_ONLY)
-           RExC_extralen += 1;         /* For BRANCHJ-BRANCH. */
      }
      else if (paren == ':') {
         *flagp |= flags&SIMPLE;
      }
      else if (paren == ':') {
         *flagp |= flags&SIMPLE;
@@ -11803,8 +11891,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
                      ender);
         }
                      REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
                      ender);
         }
-       if (SIZE_ONLY)
-           RExC_extralen += 2;         /* Account for LONGJMP. */
         nextchar(pRExC_state);
         if (freeze_paren) {
             if (RExC_npar > after_freeze)
         nextchar(pRExC_state);
         if (freeze_paren) {
             if (RExC_npar > after_freeze)
@@ -11864,21 +11950,19 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             break;
         case 0:
             ender = reg_node(pRExC_state, END);
             break;
         case 0:
             ender = reg_node(pRExC_state, END);
-           if (!SIZE_ONLY) {
-                assert(!RExC_end_op); /* there can only be one! */
-                RExC_end_op = REGNODE_p(ender);
-                if (RExC_close_parens) {
-                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting close paren #0 (END) to %d\n",
-                        22, "|    |", (int)(depth * 2 + 1), "",
-                        REG_NODE_NUM(REGNODE_p(ender))));
+            assert(!RExC_end_op); /* there can only be one! */
+            RExC_end_op = REGNODE_p(ender);
+            if (RExC_close_parens) {
+                DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                    "%*s%*s Setting close paren #0 (END) to %d\n",
+                    22, "|    |", (int)(depth * 2 + 1), "",
+                    REG_NODE_NUM(REGNODE_p(ender))));
  
  
-                    RExC_close_parens[0]= ender;
-                }
+                RExC_close_parens[0]= ender;
              }
             break;
         }
              }
             break;
         }
-        DEBUG_PARSE_r(if (!SIZE_ONLY) {
+        DEBUG_PARSE_r(
              DEBUG_PARSE_MSG("lsbr");
              regprop(RExC_rx, RExC_mysv1, REGNODE_p(lastbr), NULL, pRExC_state);
              regprop(RExC_rx, RExC_mysv2, REGNODE_p(ender), NULL, pRExC_state);
              DEBUG_PARSE_MSG("lsbr");
              regprop(RExC_rx, RExC_mysv1, REGNODE_p(lastbr), NULL, pRExC_state);
              regprop(RExC_rx, RExC_mysv2, REGNODE_p(ender), NULL, pRExC_state);
@@ -11889,10 +11973,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                            (IV)REG_NODE_NUM(REGNODE_p(ender)),
                            (IV)(ender - lastbr)
              );
                            (IV)REG_NODE_NUM(REGNODE_p(ender)),
                            (IV)(ender - lastbr)
              );
-        });
+        );
          REGTAIL(pRExC_state, lastbr, ender);
  
          REGTAIL(pRExC_state, lastbr, ender);
  
-       if (have_branch && !SIZE_ONLY) {
+       if (have_branch) {
              char is_nothing= 1;
             if (depth==1)
                  RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN;
              char is_nothing= 1;
             if (depth==1)
                  RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN;
@@ -11924,7 +12008,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  br= PL_regkind[OP(ret_as_regnode)] != BRANCH
                                 ? regnext(ret_as_regnode)
                                 : ret_as_regnode;
                  br= PL_regkind[OP(ret_as_regnode)] != BRANCH
                                 ? regnext(ret_as_regnode)
                                 : ret_as_regnode;
-                DEBUG_PARSE_r(if (!SIZE_ONLY) {
+                DEBUG_PARSE_r(
                      DEBUG_PARSE_MSG("NADA");
                      regprop(RExC_rx, RExC_mysv1, ret_as_regnode,
                                       NULL, pRExC_state);
                      DEBUG_PARSE_MSG("NADA");
                      regprop(RExC_rx, RExC_mysv1, ret_as_regnode,
                                       NULL, pRExC_state);
@@ -11937,7 +12021,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                    (IV)REG_NODE_NUM(REGNODE_p(ender)),
                                    (IV)(ender - ret)
                      );
                                    (IV)REG_NODE_NUM(REGNODE_p(ender)),
                                    (IV)(ender - ret)
                      );
-                });
+                );
                  OP(br)= NOTHING;
                  if (OP(REGNODE_p(ender)) == TAIL) {
                      NEXT_OFF(br)= 0;
                  OP(br)= NOTHING;
                  if (OP(REGNODE_p(ender)) == TAIL) {
                      NEXT_OFF(br)= 0;
@@ -12042,9 +12126,6 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
          }
      }
  
          }
      }
  
-    if (!first && SIZE_ONLY)
-       RExC_extralen += 1;                     /* BRANCHJ */
-
      *flagp = WORST;                    /* Tentatively. */
  
      skip_to_be_ignored_text(pRExC_state, &RExC_parse,
      *flagp = WORST;                    /* Tentatively. */
  
      skip_to_be_ignored_text(pRExC_state, &RExC_parse,
@@ -12067,6 +12148,11 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
             /* FIXME adding one for every branch after the first is probably
              * excessive now we have TRIE support. (hv) */
             MARK_NAUGHTY(1);
             /* FIXME adding one for every branch after the first is probably
              * excessive now we have TRIE support. (hv) */
             MARK_NAUGHTY(1);
+            if (     chain > (SSize_t) BRANCH_MAX_OFFSET
+                && ! RExC_use_BRANCHJ)
+            {
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
              REGTAIL(pRExC_state, chain, latest);
         }
         chain = latest;
              REGTAIL(pRExC_state, chain, latest);
         }
         chain = latest;
@@ -12183,11 +12269,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              if (max < min) {    /* If can't match, warn and optimize to fail
                                     unconditionally */
                  reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
              if (max < min) {    /* If can't match, warn and optimize to fail
                                     unconditionally */
                  reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
-                if (PASS2) {
-                    ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
-                    NEXT_OFF(REGNODE_p(orig_emit)) =
-                                        regarglen[OPFAIL] + NODE_STEP_REGNODE;
-                }
+                ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+                NEXT_OFF(REGNODE_p(orig_emit)) =
+                                    regarglen[OPFAIL] + NODE_STEP_REGNODE;
                  return ret;
              }
              else if (min == max && *RExC_parse == '?')
                  return ret;
              }
              else if (min == max && *RExC_parse == '?')
@@ -12236,8 +12320,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
                                                         LONGJMP. */
                  REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
                      NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
                                                         LONGJMP. */
                  REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
-               if (SIZE_ONLY)
-                   RExC_whilem_seen++, RExC_extralen += 3;
+                RExC_whilem_seen++;
                  MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
             }
             FLAGS(REGNODE_p(ret)) = 0;
                  MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
             }
             FLAGS(REGNODE_p(ret)) = 0;
@@ -12246,10 +12329,8 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 *flagp = WORST;
             if (max > 0)
                 *flagp |= HASWIDTH;
                 *flagp = WORST;
             if (max > 0)
                 *flagp |= HASWIDTH;
-           if (!SIZE_ONLY) {
-               ARG1_SET(REGNODE_p(ret), (U16)min);
-               ARG2_SET(REGNODE_p(ret), (U16)max);
-           }
+            ARG1_SET(REGNODE_p(ret), (U16)min);
+            ARG2_SET(REGNODE_p(ret), (U16)max);
              if (max == REG_INFTY)
                  RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
  
              if (max == REG_INFTY)
                  RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
  
@@ -12298,7 +12379,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         goto do_curly;
      }
    nest_check:
         goto do_curly;
      }
    nest_check:
-    if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
+    if (!(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
         ckWARN2reg(RExC_parse,
                    "%" UTF8f " matches null string many times",
                    UTF8fARG(UTF, (RExC_parse >= origparse
         ckWARN2reg(RExC_parse,
                    "%" UTF8f " matches null string many times",
                    UTF8fARG(UTF, (RExC_parse >= origparse
@@ -12744,22 +12825,13 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
  
      PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
  
  
      PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
  
-    /* Don't bother to check for downgrading in PASS1, as it doesn't make any
-     * sizing difference, and is extra work that is thrown away */
-    if (downgradable && ! PASS2) {
-        downgradable = FALSE;
-    }
-
      if (! len_passed_in) {
          if (UTF) {
              if (UVCHR_IS_INVARIANT(code_point)) {
                  if (LOC || ! FOLD) {    /* /l defers folding until runtime */
                      *character = (U8) code_point;
                  }
      if (! len_passed_in) {
          if (UTF) {
              if (UVCHR_IS_INVARIANT(code_point)) {
                  if (LOC || ! FOLD) {    /* /l defers folding until runtime */
                      *character = (U8) code_point;
                  }
-                else { /* Here is /i and not /l. (toFOLD() is defined on just
-                          ASCII, which isn't the same thing as INVARIANT on
-                          EBCDIC, but it works there, as the extra invariants
-                          fold to themselves) */
+                else { /* Here is /i and not /l. */
                      *character = toFOLD((U8) code_point);
  
                      /* We can downgrade to an EXACT node if this character
                      *character = toFOLD((U8) code_point);
  
                      /* We can downgrade to an EXACT node if this character
@@ -12848,15 +12920,14 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
          }
      }
  
          }
      }
  
-    if (SIZE_ONLY) {
-        RExC_size += STR_SZ(len);
+    if (downgradable) {
+        change_engine_size(pRExC_state, STR_SZ(len));
      }
      }
-    else {
-        RExC_emit += STR_SZ(len);
-        STR_LEN(REGNODE_p(node)) = len;
-        if (! len_passed_in) {
-            Copy((char *) character, STRING(REGNODE_p(node)), len, char);
-        }
+
+    RExC_emit += STR_SZ(len);
+    STR_LEN(REGNODE_p(node)) = len;
+    if (! len_passed_in) {
+        Copy((char *) character, STRING(REGNODE_p(node)), len, char);
      }
  
      *flagp |= HASWIDTH;
      }
  
      *flagp |= HASWIDTH;
@@ -12874,8 +12945,7 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
          *flagp |= SIMPLE;
      }
  
          *flagp |= SIMPLE;
      }
  
-    /* The OP may not be well defined in PASS1 */
-    if (PASS2 && OP(REGNODE_p(node)) == EXACTFL) {
+    if (OP(REGNODE_p(node)) == EXACTFL) {
          RExC_contains_locale = 1;
      }
  }
          RExC_contains_locale = 1;
      }
  }
@@ -13140,8 +13210,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* SBOL is shared with /^/ so we set the flags so we can tell
               * /\A/ from /^/ in split. We check ret because first pass we
               * have no regop struct to set the flags on. */
              /* SBOL is shared with /^/ so we set the flags so we can tell
               * /\A/ from /^/ in split. We check ret because first pass we
               * have no regop struct to set the flags on. */
-            if (PASS2)
-                FLAGS(REGNODE_p(ret)) = 1;
+            FLAGS(REGNODE_p(ret)) = 1;
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'G':
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'G':
@@ -13202,7 +13271,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             *flagp |= SIMPLE;
             if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
                  FLAGS(REGNODE_p(ret)) = TRADITIONAL_BOUND;
             *flagp |= SIMPLE;
             if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
                  FLAGS(REGNODE_p(ret)) = TRADITIONAL_BOUND;
-                if (PASS2 && op > BOUNDA) {  /* /aa is same as /a */
+                if (op > BOUNDA) {  /* /aa is same as /a */
                      OP(REGNODE_p(ret)) = BOUNDA;
                  }
              }
                      OP(REGNODE_p(ret)) = BOUNDA;
                  }
              }
@@ -13270,7 +13339,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  RExC_parse = endbrace;
                  REQUIRE_UNI_RULES(flagp, 0);
  
                  RExC_parse = endbrace;
                  REQUIRE_UNI_RULES(flagp, 0);
  
-                if (PASS2 && op >= BOUNDA) {  /* /aa is same as /a */
+                if (op >= BOUNDA) {  /* /aa is same as /a */
                      OP(REGNODE_p(ret)) = BOUNDU;
                      length += 4;
  
                      OP(REGNODE_p(ret)) = BOUNDU;
                      length += 4;
  
@@ -13286,7 +13355,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  }
             }
  
                  }
             }
  
-            if (PASS2 && invert) {
+            if (invert) {
                  OP(REGNODE_p(ret)) += NBOUND - BOUND;
              }
             goto finish_meta_pat;
                  OP(REGNODE_p(ret)) += NBOUND - BOUND;
              }
             goto finish_meta_pat;
@@ -13351,9 +13420,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              }
  
             ret = reg_node(pRExC_state, op);
              }
  
             ret = reg_node(pRExC_state, op);
-            if (! SIZE_ONLY) {
-                FLAGS(REGNODE_p(ret)) = namedclass_to_classnum(arg);
-            }
+            FLAGS(REGNODE_p(ret)) = namedclass_to_classnum(arg);
  
             *flagp |= HASWIDTH|SIMPLE;
              /* FALLTHROUGH */
  
             *flagp |= HASWIDTH|SIMPLE;
              /* FALLTHROUGH */
@@ -13537,9 +13604,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          vFAIL("Unterminated \\g{...} pattern");
                      RExC_parse++;
                  }
                          vFAIL("Unterminated \\g{...} pattern");
                      RExC_parse++;
                  }
-                if (!SIZE_ONLY) {
-                    if (num > (I32)RExC_rx->nparens)
-                        vFAIL("Reference to nonexistent group");
+                if (num >= (I32)RExC_npar) {
+
+                    /* It might be a forward reference; we can't fail until we
+                     * know, by completing the parse to get all the groups, and
+                     * then reparsing */
+                    if (RExC_total_parens > 0)  {
+                        if (num >= RExC_total_parens)  {
+                            vFAIL("Reference to nonexistent group");
+                        }
+                    }
+                    else {
+                        REQUIRE_PARENS_PASS;
+                    }
                  }
                  RExC_sawback = 1;
                  ret = reganode(pRExC_state,
                  }
                  RExC_sawback = 1;
                  ret = reganode(pRExC_state,
@@ -13605,7 +13682,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  /* This allows us to fill a node with just enough spare so that if the final
   * character folds, its expansion is guaranteed to fit */
  #define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
  /* This allows us to fill a node with just enough spare so that if the final
   * character folds, its expansion is guaranteed to fit */
  #define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
-           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE+1];
  
             char *s0;
             U8 upper_parse = MAX_NODE_STRING_SIZE;
  
             char *s0;
             U8 upper_parse = MAX_NODE_STRING_SIZE;
@@ -13621,6 +13697,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
               * created for the new category. */
              U8 node_type = EXACT;
  
               * created for the new category. */
              U8 node_type = EXACT;
  
+            /* Assume node will be fully used; the excess is given back at the end */
+            Ptrdiff_t initial_size = STR_SZ(256);
+
              bool next_is_quantifier;
              char * oldp = NULL;
  
              bool next_is_quantifier;
              char * oldp = NULL;
  
@@ -13634,19 +13713,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
               * which don't participate in folds with Latin1-range characters,
               * as the latter's folds aren't known until runtime.  (We don't
               * need to figure this out until pass 2) */
               * which don't participate in folds with Latin1-range characters,
               * as the latter's folds aren't known until runtime.  (We don't
               * need to figure this out until pass 2) */
-            bool maybe_exactfu = PASS2;
+            bool maybe_exactfu = TRUE;
  
              /* To see if RExC_uni_semantics changes during parsing of the node.
               * */
              bool uni_semantics_at_node_start;
  
  
              /* To see if RExC_uni_semantics changes during parsing of the node.
               * */
              bool uni_semantics_at_node_start;
  
-            /* The node_type may change below, but since the size of the node
-             * doesn't change, it works */
-           ret = reg_node(pRExC_state, node_type);
+            /* Allocate an EXACT node.  The node_type may change below to
+             * another EXACTish node, but since the size of the node doesn't
+             * change, it works */
+            ret = regnode_guts(pRExC_state, node_type, initial_size, "exact");
+            FILL_NODE(ret, node_type);
+            RExC_emit++;
  
  
-            /* In pass1, folded, we use a temporary buffer instead of the
-             * actual node, as the node doesn't exist yet */
-           s = (SIZE_ONLY && FOLD) ? foldbuf : STRING(REGNODE_p(ret));
+           s = STRING(REGNODE_p(ret));
  
              s0 = s;
  
  
              s0 = s;
  
@@ -14001,19 +14081,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
                  if (! FOLD) {  /* The simple case, just append the literal */
  
  
                  if (! FOLD) {  /* The simple case, just append the literal */
  
-                    /* In the sizing pass, we need only the size of the
-                     * character we are appending, hence we can delay getting
-                     * its representation until PASS2. */
-                    if (SIZE_ONLY) {
-                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
-                            const STRLEN unilen = UVCHR_SKIP(ender);
-                            s += unilen;
-                            added_len = unilen;
-                        }
-                        else {
-                            s++;
-                        }
-                    } else { /* PASS2 */
                        not_fold_common:
                          if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              U8 * new_s = uvchr_to_utf8((U8*)s, ender);
                        not_fold_common:
                          if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                              U8 * new_s = uvchr_to_utf8((U8*)s, ender);
@@ -14023,7 +14090,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          else {
                              *(s++) = (char) ender;
                          }
                          else {
                              *(s++) = (char) ender;
                          }
-                    }
                  }
                  else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
  
                  }
                  else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
  
@@ -14402,6 +14468,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            loopdone:   /* Jumped to when encounters something that shouldn't be
                           in the node */
  
            loopdone:   /* Jumped to when encounters something that shouldn't be
                           in the node */
  
+            change_engine_size(pRExC_state, - (initial_size - STR_SZ(len)));
+
              /* I (khw) don't know if you can get here with zero length, but the
               * old code handled this situation by creating a zero-length EXACT
               * node.  Might as well be NOTHING instead */
              /* I (khw) don't know if you can get here with zero length, but the
               * old code handled this situation by creating a zero-length EXACT
               * node.  Might as well be NOTHING instead */
@@ -14458,7 +14526,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      /* Position parse to next real character */
      skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                              FALSE /* Don't force to /x */ );
      /* Position parse to next real character */
      skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                              FALSE /* Don't force to /x */ );
-    if (   PASS2 && *RExC_parse == '{'
+    if (   *RExC_parse == '{'
          && OP(REGNODE_p(ret)) != SBOL && ! regcurly(RExC_parse))
      {
          if (RExC_strict || new_regcurly(RExC_parse, RExC_end)) {
          && OP(REGNODE_p(ret)) != SBOL && ! regcurly(RExC_parse))
      {
          if (RExC_strict || new_regcurly(RExC_parse, RExC_end)) {
@@ -15425,7 +15493,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      const bool save_fold = FOLD;    /* Temporary */
      char *save_end, *save_parse;    /* Temporaries */
      const bool in_locale = LOC;     /* we turn off /l during processing */
      const bool save_fold = FOLD;    /* Temporary */
      char *save_end, *save_parse;    /* Temporaries */
      const bool in_locale = LOC;     /* we turn off /l during processing */
-    AV* posix_warnings = NULL;
  
      GET_RE_DEBUG_FLAGS_DECL;
  
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -15441,117 +15508,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * compile time values are valid in all runtime cases */
      REQUIRE_UNI_RULES(flagp, 0);
  
       * compile time values are valid in all runtime cases */
      REQUIRE_UNI_RULES(flagp, 0);
  
-    /* This will return only an ANYOF regnode, or (unlikely) something smaller
-     * (such as EXACT).  Thus we can skip most everything if just sizing.  We
-     * call regclass to handle '[]' so as to not have to reinvent its parsing
-     * rules here (throwing away the size it computes each time).  And, we exit
-     * upon an unescaped ']' that isn't one ending a regclass.  To do both
-     * these things, we need to realize that something preceded by a backslash
-     * is escaped, so we have to keep track of backslashes */
-    if (SIZE_ONLY) {
-        UV nest_depth = 0; /* how many nested (?[...]) constructs */
-
-        while (RExC_parse < RExC_end) {
-            SV* current = NULL;
-
-            skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                                    TRUE /* Force /x */ );
-
-            switch (*RExC_parse) {
-                case '(':
-                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
-                        nest_depth++, RExC_parse+=2;
-                    /* FALLTHROUGH */
-                default:
-                    break;
-                case '\\':
-                    /* Skip past this, so the next character gets skipped, after
-                     * the switch */
-                    RExC_parse++;
-                    if (*RExC_parse == 'c') {
-                            /* Skip the \cX notation for control characters */
-                            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-                    }
-                    break;
-
-                case '[':
-                {
-                    /* See if this is a [:posix:] class. */
-                    bool is_posix_class = (OOB_NAMEDCLASS
-                            < handle_possible_posix(pRExC_state,
-                                                RExC_parse + 1,
-                                                NULL,
-                                                NULL,
-                                                TRUE /* checking only */));
-                    /* If it is a posix class, leave the parse pointer at the
-                     * '[' to fool regclass() into thinking it is part of a
-                     * '[[:posix:]]'. */
-                    if (! is_posix_class) {
-                        RExC_parse++;
-                    }
-
-                    /* regclass() can only return RESTART_PARSE and NEED_UTF8
-                     * if multi-char folds are allowed.  */
-                    if (!regclass(pRExC_state, flagp, depth+1,
-                                  is_posix_class, /* parse the whole char
-                                                     class only if not a
-                                                     posix class */
-                                  FALSE, /* don't allow multi-char folds */
-                                  TRUE, /* silence non-portable warnings. */
-                                  TRUE, /* strict */
-                                  FALSE, /* Require return to be an ANYOF */
-                                  &current,
-                                  &posix_warnings
-                                 ))
-                        FAIL2("panic: regclass returned failure to handle_sets, "
-                              "flags=%#" UVxf, (UV) *flagp);
-
-                    /* function call leaves parse pointing to the ']', except
-                     * if we faked it */
-                    if (is_posix_class) {
-                        RExC_parse--;
-                    }
-
-                    SvREFCNT_dec(current);   /* In case it returned something */
-                    break;
-                }
-
-                case ']':
-                    if (RExC_parse[1] == ')') {
-                        RExC_parse++;
-                        if (nest_depth--) break;
-                        node = reganode(pRExC_state, ANYOF, 0);
-                        nextchar(pRExC_state);
-                        Set_Node_Length(REGNODE_p(node),
-                                RExC_parse - oregcomp_parse + 1); /* MJD */
-                        if (in_locale) {
-                            set_regex_charset(&RExC_flags, REGEX_LOCALE_CHARSET);
-                        }
-
-                        return node;
-                    }
-                    /* We output the messages even if warnings are off, because we'll fail
-                     * the very next thing, and these give a likely diagnosis for that */
-                    if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
-                        output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
-                    }
-                    RExC_parse++;
-                    vFAIL("Unexpected ']' with no following ')' in (?[...");
-            }
-
-            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-        }
-
-        /* We output the messages even if warnings are off, because we'll fail
-         * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
-            output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
-        }
-
-        vFAIL("Syntax error in (?[...])");
-    }
-
-    /* Pass 2 only after this. */
      ckWARNexperimental(RExC_parse,
                         WARN_EXPERIMENTAL__REGEX_SETS,
                         "The regex_sets feature is experimental");
      ckWARNexperimental(RExC_parse,
                         WARN_EXPERIMENTAL__REGEX_SETS,
                         "The regex_sets feature is experimental");
@@ -15644,8 +15600,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
          skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                  TRUE /* Force /x */ );
  
          skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                  TRUE /* Force /x */ );
-        if (RExC_parse >= RExC_end) {
-            Perl_croak(aTHX_ "panic: Read past end of '(?[ ])'");
+        if (RExC_parse >= RExC_end) {   /* Fail */
+            break;
          }
  
          curchar = UCHARAT(RExC_parse);
          }
  
          curchar = UCHARAT(RExC_parse);
@@ -15820,6 +15776,10 @@ redo_curchar:
                            "flags=%#" UVxf, (UV) *flagp);
                  }
  
                            "flags=%#" UVxf, (UV) *flagp);
                  }
  
+                if (! current) {
+                    break;
+                }
+
                  /* function call leaves parse pointing to the ']', except if we
                   * faked it */
                  if (is_posix_class) {
                  /* function call leaves parse pointing to the ']', except if we
                   * faked it */
                  if (is_posix_class) {
@@ -15839,6 +15799,9 @@ redo_curchar:
  
              case ')':
                  if (av_tindex_skip_len_mg(fence_stack) < 0) {
  
              case ')':
                  if (av_tindex_skip_len_mg(fence_stack) < 0) {
+                    if (UCHARAT(RExC_parse - 1) == ']')  {
+                        break;
+                    }
                      RExC_parse++;
                      vFAIL("Unexpected ')'");
                  }
                      RExC_parse++;
                      vFAIL("Unexpected ')'");
                  }
@@ -16025,6 +15988,9 @@ redo_curchar:
  
              default:
                  RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
  
              default:
                  RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                if (RExC_parse >= RExC_end) {
+                    break;
+                }
                  vFAIL("Unexpected character");
  
            handle_operand:
                  vFAIL("Unexpected character");
  
            handle_operand:
@@ -16084,7 +16050,18 @@ redo_curchar:
          RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
      } /* End of loop parsing through the construct */
  
          RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
      } /* End of loop parsing through the construct */
  
+    vFAIL("Syntax error in (?[...])");
+
    done:
    done:
+
+    if (RExC_parse >= RExC_end || RExC_parse[1] != ')') {
+        if (RExC_parse < RExC_end) {
+            RExC_parse++;
+        }
+
+        vFAIL("Unexpected ']' with no following ')' in (?[...");
+    }
+
      if (av_tindex_skip_len_mg(fence_stack) >= 0) {
          vFAIL("Unmatched (");
      }
      if (av_tindex_skip_len_mg(fence_stack) >= 0) {
          vFAIL("Unmatched (");
      }
@@ -16368,8 +16345,10 @@ S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_war
                      PREPARE_TO_DIE;
                  }
              }
                      PREPARE_TO_DIE;
                  }
              }
-            Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s",
+            if (TO_OUTPUT_WARNINGS(RExC_parse)) {
+                Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s",
                                             SvPVX(msg));
                                             SvPVX(msg));
+            }
              SvREFCNT_dec_NN(msg);
          }
      }
              SvREFCNT_dec_NN(msg);
          }
      }
@@ -16576,7 +16555,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      const regnode_offset orig_emit = RExC_emit; /* Save the original RExC_emit in
          case we need to change the emitted regop to an EXACT. */
      const char * orig_parse = RExC_parse;
      const regnode_offset orig_emit = RExC_emit; /* Save the original RExC_emit in
          case we need to change the emitted regop to an EXACT. */
      const char * orig_parse = RExC_parse;
-    const SSize_t orig_size = RExC_size;
      bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
  
      /* This variable is used to mark where the end in the input is of something
      bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
  
      /* This variable is used to mark where the end in the input is of something
@@ -16587,8 +16565,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      char *not_posix_region_end = RExC_parse - 1;
  
      AV* posix_warnings = NULL;
      char *not_posix_region_end = RExC_parse - 1;
  
      AV* posix_warnings = NULL;
-    const bool do_posix_warnings =     return_posix_warnings
-                                   || (PASS2 && ckWARN(WARN_REGEXP));
+    const bool do_posix_warnings = return_posix_warnings || ckWARN(WARN_REGEXP);
      U8 op = END;    /* The returned node-type, initialized to an impossible
                         one.  */
      U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
      U8 op = END;    /* The returned node-type, initialized to an impossible
                         one.  */
      U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
@@ -16610,14 +16587,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      allow_multi_folds = FALSE;
  #endif
  
      allow_multi_folds = FALSE;
  #endif
  
-    if (SIZE_ONLY) {
-       listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
-    }
-    else {
-       listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
-       initial_listsv_len = SvCUR(listsv);
-        SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
-    }
+    listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
+    initial_listsv_len = SvCUR(listsv);
+    SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
  
      SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
  
  
      SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
  
@@ -16638,7 +16610,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                  &not_posix_region_end,
                                                  NULL,
                                                  TRUE /* checking only */);
                                                  &not_posix_region_end,
                                                  NULL,
                                                  TRUE /* checking only */);
-        if (PASS2 && maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
+        if (maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
              ckWARN4reg(not_posix_region_end,
                      "POSIX syntax [%c %c] belongs inside character classes%s",
                      *RExC_parse, *RExC_parse,
              ckWARN4reg(not_posix_region_end,
                      "POSIX syntax [%c %c] belongs inside character classes%s",
                      *RExC_parse, *RExC_parse,
@@ -16920,7 +16892,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     e = RExC_parse;
                     n = 1;
                 }
                     e = RExC_parse;
                     n = 1;
                 }
-               if (!SIZE_ONLY) {
+               {
                      char* name = RExC_parse;
                      char* base_name;    /* name after any packages are stripped */
                      char* lookup_name = NULL;
                      char* name = RExC_parse;
                      char* base_name;    /* name after any packages are stripped */
                      char* lookup_name = NULL;
@@ -17111,7 +17083,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              }
                         }
                      }
                              }
                         }
                      }
-                } /* End of actually getting the values in pass 2 */
+                }
  
                 RExC_parse = e + 1;
                  namedclass = ANYOF_UNIPROP;  /* no official name, but it's
  
                 RExC_parse = e + 1;
                  namedclass = ANYOF_UNIPROP;  /* no official name, but it's
@@ -17182,8 +17154,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
                              vFAIL("Need exactly 3 octal digits");
                          }
                              RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
                              vFAIL("Need exactly 3 octal digits");
                          }
-                        else if (! SIZE_ONLY /* like \08, \178 */
-                                 && numlen < 3
+                        else if (   numlen < 3 /* like \08, \178 */
                                   && RExC_parse < RExC_end
                                   && isDIGIT(*RExC_parse)
                                   && ckWARN(WARN_REGEXP))
                                   && RExC_parse < RExC_end
                                   && isDIGIT(*RExC_parse)
                                   && ckWARN(WARN_REGEXP))
@@ -17198,7 +17169,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 }
             default:
                 /* Allow \_ to not give an error */
                 }
             default:
                 /* Allow \_ to not give an error */
-               if (!SIZE_ONLY && isWORDCHAR(value) && value != '_') {
+               if (isWORDCHAR(value) && value != '_') {
                      if (strict) {
                          vFAIL2("Unrecognized escape \\%c in character class",
                                 (int)value);
                      if (strict) {
                          vFAIL2("Unrecognized escape \\%c in character class",
                                 (int)value);
@@ -17222,24 +17193,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              * literal, as is the character that began the false range, i.e.
              * the 'a' in the examples */
             if (range) {
              * literal, as is the character that began the false range, i.e.
              * the 'a' in the examples */
             if (range) {
-               if (!SIZE_ONLY) {
-                   const int w = (RExC_parse >= rangebegin)
-                                  ? RExC_parse - rangebegin
-                                  : 0;
-                    if (strict) {
-                        vFAIL2utf8f(
-                            "False [] range \"%" UTF8f "\"",
-                            UTF8fARG(UTF, w, rangebegin));
-                    }
-                    else {
-                        ckWARN2reg(RExC_parse,
-                            "False [] range \"%" UTF8f "\"",
-                            UTF8fARG(UTF, w, rangebegin));
-                        cp_list = add_cp_to_invlist(cp_list, '-');
-                        cp_foldable_list = add_cp_to_invlist(cp_foldable_list,
-                                                             prevvalue);
-                    }
-               }
+                const int w = (RExC_parse >= rangebegin)
+                                ? RExC_parse - rangebegin
+                                : 0;
+                if (strict) {
+                    vFAIL2utf8f(
+                        "False [] range \"%" UTF8f "\"",
+                        UTF8fARG(UTF, w, rangebegin));
+                }
+                else {
+                    ckWARN2reg(RExC_parse,
+                        "False [] range \"%" UTF8f "\"",
+                        UTF8fARG(UTF, w, rangebegin));
+                    cp_list = add_cp_to_invlist(cp_list, '-');
+                    cp_foldable_list = add_cp_to_invlist(cp_foldable_list,
+                                                            prevvalue);
+                }
  
                 range = 0; /* this was not a true range */
                  element_count += 2; /* So counts for three values */
  
                 range = 0; /* this was not a true range */
                  element_count += 2; /* So counts for three values */
@@ -17296,32 +17265,30 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  /* The above-Latin1 characters are not subject to locale rules.
                   * Just add them, in the second pass, to the
                   * unconditionally-matched list */
                  /* The above-Latin1 characters are not subject to locale rules.
                   * Just add them, in the second pass, to the
                   * unconditionally-matched list */
-                if (! SIZE_ONLY) {
-
-                    /* Get the list of the above-Latin1 code points this
-                     * matches */
-                    _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
-                                          PL_XPosix_ptrs[classnum],
-
-                                          /* Odd numbers are complements, like
-                                           * NDIGIT, NASCII, ... */
-                                          namedclass % 2 != 0,
-                                          &scratch_list);
-                    /* Checking if 'cp_list' is NULL first saves an extra
-                     * clone.  Its reference count will be decremented at the
-                     * next union, etc, or if this is the only instance, at the
-                     * end of the routine */
-                    if (! cp_list) {
-                        cp_list = scratch_list;
-                    }
-                    else {
-                        _invlist_union(cp_list, scratch_list, &cp_list);
-                        SvREFCNT_dec_NN(scratch_list);
-                    }
-                    continue;   /* Go get next character */
+
+                /* Get the list of the above-Latin1 code points this
+                    * matches */
+                _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
+                                        PL_XPosix_ptrs[classnum],
+
+                                        /* Odd numbers are complements, like
+                                        * NDIGIT, NASCII, ... */
+                                        namedclass % 2 != 0,
+                                        &scratch_list);
+                /* Checking if 'cp_list' is NULL first saves an extra
+                    * clone.  Its reference count will be decremented at the
+                    * next union, etc, or if this is the only instance, at the
+                    * end of the routine */
+                if (! cp_list) {
+                    cp_list = scratch_list;
                  }
                  }
+                else {
+                    _invlist_union(cp_list, scratch_list, &cp_list);
+                    SvREFCNT_dec_NN(scratch_list);
+                }
+                continue;   /* Go get next character */
              }
              }
-            else if (! SIZE_ONLY) {
+            else {
  
                  /* Here, not in pass1 (in that pass we skip calculating the
                   * contents of this class), and is not /l, or is a POSIX class
  
                  /* Here, not in pass1 (in that pass we skip calculating the
                   * contents of this class), and is not /l, or is a POSIX class
@@ -17439,7 +17406,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                      /* a bad range like \w-, [:word:]- ? */
                      if (namedclass > OOB_NAMEDCLASS) {
  
                      /* a bad range like \w-, [:word:]- ? */
                      if (namedclass > OOB_NAMEDCLASS) {
-                        if (strict || (PASS2 && ckWARN(WARN_REGEXP))) {
+                        if (strict || ckWARN(WARN_REGEXP)) {
                              const int w = RExC_parse >= rangebegin
                                            ?  RExC_parse - rangebegin
                                            : 0;
                              const int w = RExC_parse >= rangebegin
                                            ?  RExC_parse - rangebegin
                                            : 0;
@@ -17453,9 +17420,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                      w, w, rangebegin);
                              }
                          }
                                      w, w, rangebegin);
                              }
                          }
-                        if (!SIZE_ONLY) {
-                            cp_list = add_cp_to_invlist(cp_list, '-');
-                        }
+                        cp_list = add_cp_to_invlist(cp_list, '-');
                          element_count++;
                      } else
                          range = 1;     /* yeah, it's a range! */
                          element_count++;
                      } else
                          range = 1;     /* yeah, it's a range! */
@@ -17541,7 +17506,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              }
          }
  
              }
          }
  
-        if (strict && PASS2 && ckWARN(WARN_REGEXP)) {
+        if (strict && ckWARN(WARN_REGEXP)) {
              if (range) {
  
                  /* If the range starts above 255, everything is portable and
              if (range) {
  
                  /* If the range starts above 255, everything is portable and
@@ -17673,48 +17638,45 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
  
          /* Deal with this element of the class */
          }
  
          /* Deal with this element of the class */
-       if (! SIZE_ONLY) {
  
  #ifndef EBCDIC
  
  #ifndef EBCDIC
-            cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
-                                                     prevvalue, value);
+        cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                    prevvalue, value);
  #else
  #else
-            /* On non-ASCII platforms, for ranges that span all of 0..255, and
-             * ones that don't require special handling, we can just add the
-             * range like we do for ASCII platforms */
-            if ((UNLIKELY(prevvalue == 0) && value >= 255)
-                || ! (prevvalue < 256
-                      && (unicode_range
-                          || (! non_portable_endpoint
-                              && ((isLOWER_A(prevvalue) && isLOWER_A(value))
-                                  || (isUPPER_A(prevvalue)
-                                      && isUPPER_A(value)))))))
-            {
-                cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
-                                                         prevvalue, value);
+        /* On non-ASCII platforms, for ranges that span all of 0..255, and ones
+         * that don't require special handling, we can just add the range like
+         * we do for ASCII platforms */
+        if ((UNLIKELY(prevvalue == 0) && value >= 255)
+            || ! (prevvalue < 256
+                    && (unicode_range
+                        || (! non_portable_endpoint
+                            && ((isLOWER_A(prevvalue) && isLOWER_A(value))
+                                || (isUPPER_A(prevvalue)
+                                    && isUPPER_A(value)))))))
+        {
+            cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                        prevvalue, value);
+        }
+        else {
+            /* Here, requires special handling.  This can be because it is a
+             * range whose code points are considered to be Unicode, and so
+             * must be individually translated into native, or because its a
+             * subrange of 'A-Z' or 'a-z' which each aren't contiguous in
+             * EBCDIC, but we have defined them to include only the "expected"
+             * upper or lower case ASCII alphabetics.  Subranges above 255 are
+             * the same in native and Unicode, so can be added as a range */
+            U8 start = NATIVE_TO_LATIN1(prevvalue);
+            unsigned j;
+            U8 end = (value < 256) ? NATIVE_TO_LATIN1(value) : 255;
+            for (j = start; j <= end; j++) {
+                cp_foldable_list = add_cp_to_invlist(cp_foldable_list, LATIN1_TO_NATIVE(j));
              }
              }
-            else {
-                /* Here, requires special handling.  This can be because it is
-                 * a range whose code points are considered to be Unicode, and
-                 * so must be individually translated into native, or because
-                 * its a subrange of 'A-Z' or 'a-z' which each aren't
-                 * contiguous in EBCDIC, but we have defined them to include
-                 * only the "expected" upper or lower case ASCII alphabetics.
-                 * Subranges above 255 are the same in native and Unicode, so
-                 * can be added as a range */
-                U8 start = NATIVE_TO_LATIN1(prevvalue);
-                unsigned j;
-                U8 end = (value < 256) ? NATIVE_TO_LATIN1(value) : 255;
-                for (j = start; j <= end; j++) {
-                    cp_foldable_list = add_cp_to_invlist(cp_foldable_list, LATIN1_TO_NATIVE(j));
-                }
-                if (value > 255) {
-                    cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
-                                                             256, value);
-                }
+            if (value > 255) {
+                cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                            256, value);
              }
              }
-#endif
          }
          }
+#endif
  
         range = 0; /* this range (if it was one) is done now */
      } /* End of loop through all the text within the brackets */
  
         range = 0; /* this range (if it was one) is done now */
      } /* End of loop through all the text within the brackets */
@@ -17990,21 +17952,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
               * the parse */
              const char * cur_parse = RExC_parse;
              RExC_parse = (char *)orig_parse;
               * the parse */
              const char * cur_parse = RExC_parse;
              RExC_parse = (char *)orig_parse;
-                if (PL_regkind[op] == POSIXD) {
-                    if (op == POSIXL) {
-                        RExC_contains_locale = 1;
-                    }
-                    if (invert) {
-                        op += NPOSIXD - POSIXD;
-                    }
+            if (PL_regkind[op] == POSIXD) {
+                if (op == POSIXL) {
+                    RExC_contains_locale = 1;
+                }
+                if (invert) {
+                    op += NPOSIXD - POSIXD;
                  }
                  }
+            }
  
              ret = reg_node(pRExC_state, op);
  
              if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
  
              ret = reg_node(pRExC_state, op);
  
              if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
-                if (! SIZE_ONLY) {
-                    FLAGS(REGNODE_p(ret)) = arg;
-                }
+                FLAGS(REGNODE_p(ret)) = arg;
                  *flagp |= HASWIDTH|SIMPLE;
              }
              else if (PL_regkind[op] == EXACT) {
                  *flagp |= HASWIDTH|SIMPLE;
              }
              else if (PL_regkind[op] == EXACT) {
@@ -18027,20 +17987,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
      }
  
          }
      }
  
-    /* Assume we are going to generate an ANYOF-type node. */
-    op = (posixl)
-          ? ANYOFPOSIXL
-          : (LOC)
-             ? ANYOFL
-             : ANYOF;
-    ret = reganode(pRExC_state, op, 0);
-
-    if (SIZE_ONLY) {
-        return ret;
-    }
-
-    /****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
-
      /* If folding, we calculate all characters that could fold to or from the
       * ones already on the list */
      if (cp_foldable_list) {
      /* If folding, we calculate all characters that could fold to or from the
       * ones already on the list */
      if (cp_foldable_list) {
@@ -18422,19 +18368,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      }
  
      if (ret_invlist) {
      }
  
      if (ret_invlist) {
-        assert(cp_list);
-
          *ret_invlist = cp_list;
          SvREFCNT_dec(swash);
  
          *ret_invlist = cp_list;
          SvREFCNT_dec(swash);
  
-        /* Discard the generated node */
-        if (SIZE_ONLY) {
-            RExC_size = orig_size;
-        }
-        else {
-            RExC_emit = orig_emit;
-        }
-        return orig_emit;
+        return RExC_emit;
      }
  
      /* Some character classes are equivalent to other nodes.  Such nodes take
      }
  
      /* Some character classes are equivalent to other nodes.  Such nodes take
@@ -18727,13 +18664,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      }
  
      /* It's going to be an ANYOF node. */
      }
  
      /* It's going to be an ANYOF node. */
-    OP(REGNODE_p(ret)) = (use_anyofd)
+    op = (use_anyofd)
           ? ANYOFD
           : ((posixl)
              ? ANYOFPOSIXL
              : ((LOC)
                 ? ANYOFL
                 : ANYOF));
           ? ANYOFD
           : ((posixl)
              ? ANYOFPOSIXL
              : ((LOC)
                 ? ANYOFL
                 : ANYOF));
+    ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
+    FILL_NODE(ret, op);        /* We set the argument later */
+    RExC_emit += 1 + regarglen[op];
      ANYOF_FLAGS(REGNODE_p(ret)) = anyof_flags;
  
      /* Here, <cp_list> contains all the code points we can determine at
      ANYOF_FLAGS(REGNODE_p(ret)) = anyof_flags;
  
      /* Here, <cp_list> contains all the code points we can determine at
@@ -19233,6 +19173,28 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
      PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE;
  
      RExC_size += size;
      PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE;
  
      RExC_size += size;
+
+    Renewc(RExC_rxi,
+           sizeof(regexp_internal) + (RExC_size + 1) * sizeof(regnode),
+                                                /* +1 for REG_MAGIC */
+           char,
+           regexp_internal);
+    if ( RExC_rxi == NULL )
+       FAIL("Regexp out of space");
+    RXi_SET(RExC_rx, RExC_rxi);
+
+    RExC_emit_start = RExC_rxi->program;
+    if (size > 0) {
+        Zero(REGNODE_p(RExC_emit), size, regnode);
+    }
+
+#ifdef RE_TRACK_PATTERN_OFFSETS
+    Renew(RExC_offsets, 2*RExC_size+1, U32);
+    if (size > 0) {
+        Zero(RExC_offsets + 2*(RExC_size - size) + 1, 2 * size, U32);
+    }
+    RExC_offsets[0] = 2*RExC_size+1;
+#endif
  }
  
  STATIC regnode_offset
  }
  
  STATIC regnode_offset
@@ -19250,21 +19212,15 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
  
      PERL_ARGS_ASSERT_REGNODE_GUTS;
  
  
      PERL_ARGS_ASSERT_REGNODE_GUTS;
  
-    assert(extra_size >= regarglen[op]);
-
-    if (SIZE_ONLY) {
-       SIZE_ALIGN(RExC_size);
+    SIZE_ALIGN(RExC_size);
      change_engine_size(pRExC_state, (Ptrdiff_t) 1 + extra_size);
      change_engine_size(pRExC_state, (Ptrdiff_t) 1 + extra_size);
-       return(ret);
-    }
-    if (REGNODE_p(RExC_emit) >= RExC_emit_bound)
-        Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d, %p>=%p",
-                  op, (void*)REGNODE_p(RExC_emit), (void*)RExC_emit_bound);
-
      NODE_ALIGN_FILL(REGNODE_p(ret));
  #ifndef RE_TRACK_PATTERN_OFFSETS
      PERL_UNUSED_ARG(name);
      NODE_ALIGN_FILL(REGNODE_p(ret));
  #ifndef RE_TRACK_PATTERN_OFFSETS
      PERL_UNUSED_ARG(name);
+    PERL_UNUSED_ARG(op);
  #else
  #else
+    assert(extra_size >= regarglen[op] || PL_regkind[op] == ANYOF);
+
      if (RExC_offsets) {         /* MJD */
         MJD_OFFSET_DEBUG(
                ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
      if (RExC_offsets) {         /* MJD */
         MJD_OFFSET_DEBUG(
                ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
@@ -19288,16 +19244,14 @@ STATIC regnode_offset /* Location. */
  S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  {
      const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg_node");
  S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  {
      const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg_node");
+    regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REG_NODE;
  
      assert(regarglen[op] == 0);
  
  
      PERL_ARGS_ASSERT_REG_NODE;
  
      assert(regarglen[op] == 0);
  
-    if (PASS2) {
-        regnode_offset ptr = ret;
-        FILL_ADVANCE_NODE(ptr, op);
-        RExC_emit = ptr;
-    }
+    FILL_ADVANCE_NODE(ptr, op);
+    RExC_emit = ptr;
      return(ret);
  }
  
      return(ret);
  }
  
@@ -19308,17 +19262,15 @@ STATIC regnode_offset /* Location. */
  S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  {
      const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reganode");
  S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  {
      const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reganode");
+    regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REGANODE;
  
      /* ANYOF are special cased to allow non-length 1 args */
  
      PERL_ARGS_ASSERT_REGANODE;
  
      /* ANYOF are special cased to allow non-length 1 args */
-    assert(regarglen[op] == 1 || PL_regkind[op] == ANYOF);
+    assert(regarglen[op] == 1);
  
  
-    if (PASS2) {
-        regnode_offset ptr = ret;
-        FILL_ADVANCE_NODE_ARG(ptr, op, arg);
-        RExC_emit = ptr;
-    }
+    FILL_ADVANCE_NODE_ARG(ptr, op, arg);
+    RExC_emit = ptr;
      return(ret);
  }
  
      return(ret);
  }
  
@@ -19328,16 +19280,14 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
      /* emit a node with U32 and I32 arguments */
  
      const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg2Lanode");
      /* emit a node with U32 and I32 arguments */
  
      const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg2Lanode");
+    regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REG2LANODE;
  
      assert(regarglen[op] == 2);
  
  
      PERL_ARGS_ASSERT_REG2LANODE;
  
      assert(regarglen[op] == 2);
  
-    if (PASS2) {
-        regnode_offset ptr = ret;
-        FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2);
-        RExC_emit = ptr;
-    }
+    FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2);
+    RExC_emit = ptr;
      return(ret);
  }
  
      return(ret);
  }
  
@@ -19350,9 +19300,8 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
  * IMPORTANT NOTE - it is the *callers* responsibility to correctly
  * set up NEXT_OFF() of the inserted node if needed. Something like this:
  *
  * IMPORTANT NOTE - it is the *callers* responsibility to correctly
  * set up NEXT_OFF() of the inserted node if needed. Something like this:
  *
-* reginsert(pRExC, OPFAIL, orig_emit, depth+1);
-* if (PASS2)
-*     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
+*   reginsert(pRExC, OPFAIL, orig_emit, depth+1);
+*   NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
  *
  * ALSO NOTE - FLAGS(newly-inserted-operator) will be set to 0 as well.
  */
  *
  * ALSO NOTE - FLAGS(newly-inserted-operator) will be set to 0 as well.
  */
@@ -19372,13 +19321,10 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
      PERL_UNUSED_ARG(depth);
  /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
      DEBUG_PARSE_FMT("inst"," - %s", PL_reg_name[op]);
      PERL_UNUSED_ARG(depth);
  /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
      DEBUG_PARSE_FMT("inst"," - %s", PL_reg_name[op]);
-    if (SIZE_ONLY) {
-       RExC_size += size;
-       return;
-    }
      assert(!RExC_study_started); /* I believe we should never use reginsert once we have started
                                      studying. If this is wrong then we need to adjust RExC_recurse
                                      below like we do with RExC_open_parens/RExC_close_parens. */
      assert(!RExC_study_started); /* I believe we should never use reginsert once we have started
                                      studying. If this is wrong then we need to adjust RExC_recurse
                                      below like we do with RExC_open_parens/RExC_close_parens. */
+    change_engine_size(pRExC_state, (Ptrdiff_t) size);
      src = REGNODE_p(RExC_emit);
      RExC_emit += size;
      dst = REGNODE_p(RExC_emit);
      src = REGNODE_p(RExC_emit);
      RExC_emit += size;
      dst = REGNODE_p(RExC_emit);
@@ -19472,9 +19418,6 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
      PERL_UNUSED_ARG(depth);
  #endif
  
      PERL_UNUSED_ARG(depth);
  #endif
  
-    if (SIZE_ONLY)
-       return;
-
      /* Find last node. */
      scan = (regnode_offset) p;
      for (;;) {
      /* Find last node. */
      scan = (regnode_offset) p;
      for (;;) {
@@ -19532,9 +19475,6 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
      PERL_ARGS_ASSERT_REGTAIL_STUDY;
  
  
      PERL_ARGS_ASSERT_REGTAIL_STUDY;
  
  
-    if (SIZE_ONLY)
-        return exact;
-
      /* Find last node. */
  
      scan = p;
      /* Find last node. */
  
      scan = p;
@@ -20275,6 +20215,9 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
  
      PERL_ARGS_ASSERT_PREGFREE2;
  
  
      PERL_ARGS_ASSERT_PREGFREE2;
  
+    if (! r)
+        return;
+
      if (r->mother_re) {
          ReREFCNT_dec(r->mother_re);
      } else {
      if (r->mother_re) {
          ReREFCNT_dec(r->mother_re);
      } else {
@@ -20420,6 +20363,10 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
  
      PERL_ARGS_ASSERT_REGFREE_INTERNAL;
  
  
      PERL_ARGS_ASSERT_REGFREE_INTERNAL;
  
+    if (! ri) {
+        return;
+    }
+
      DEBUG_COMPILE_r({
         if (!PL_colorset)
             reginitcolors();
      DEBUG_COMPILE_r({
         if (!PL_colorset)
             reginitcolors();