This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Remove another sizing pass relict
[perl5.git] / regcomp.c
index 8f338ae..1d9ff63 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -139,23 +139,31 @@ struct RExC_state_t {
                                            corresponding to copy_start */
     SSize_t    whilem_seen;            /* number of WHILEM in this expr */
     regnode    *emit_start;            /* Start of emitted-code area */
-    regnode    *emit_bound;            /* First regnode outside of the
-                                           allocated space */
     regnode_offset emit;               /* Code-emit pointer */
     I32                naughty;                /* How bad is this pattern? */
     I32                sawback;                /* Did we see \1, ...? */
     U32                seen;
     SSize_t    size;                   /* Number of regnode equivalents in
                                            pattern */
-    I32         npar;                   /* Capture buffer count, (OPEN) plus
-                                           one. ("par" 0 is the whole
-                                           pattern)*/
-    I32         total_par;              /* Capture buffer count after parse
-                                           completed, (OPEN) plus one. ("par" 0
-                                           is the whole pattern)*/
+
+    /* position beyond 'precomp' of the warning message furthest away from
+     * 'precomp'.  During the parse, no warnings are raised for any problems
+     * earlier in the parse than this position.  This works if warnings are
+     * raised the first time a given spot is parsed, and if only one
+     * independent warning is raised for any given spot */
+    Size_t     latest_warn_offset;
+
+    I32         npar;                   /* Capture buffer count so far in the
+                                           parse, (OPEN) plus one. ("par" 0 is
+                                           the whole pattern)*/
+    I32         total_par;              /* During initial parse, is either 0,
+                                           or -1; the latter indicating a
+                                           reparse is needed.  After that pass,
+                                           it is what 'npar' became after the
+                                           pass.  Hence, it being > 0 indicates
+                                           we are in a reparse situation */
     I32                nestroot;               /* root parens we are in - used by
                                            accept */
-    I32                extralen;
     I32                seen_zerolen;
     regnode_offset *open_parens;       /* offsets to open parens */
     regnode_offset *close_parens;      /* offsets to close parens */
@@ -211,11 +219,11 @@ struct RExC_state_t {
 #define RExC_mysv2     (pRExC_state->mysv2)
 
 #endif
-    bool        seen_unfolded_sharp_s;
+    bool        seen_d_op;
     bool        strict;
     bool        study_started;
     bool        in_script_run;
-    bool        pass1;
+    bool        use_BRANCHJ;
 };
 
 #define RExC_flags     (pRExC_state->flags)
@@ -230,26 +238,18 @@ struct RExC_state_t {
 #define RExC_start     (pRExC_state->start)
 #define RExC_end       (pRExC_state->end)
 #define RExC_parse     (pRExC_state->parse)
+#define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
 #define RExC_whilem_seen       (pRExC_state->whilem_seen)
+#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
+                                                   under /d from /u ? */
 
-/* Set during the sizing pass when there is a LATIN SMALL LETTER SHARP S in any
- * EXACTF node, hence was parsed under /di rules.  If later in the parse,
- * something forces the pattern into using /ui rules, the sharp s should be
- * folded into the sequence 'ss', which takes up more space than previously
- * calculated.  This means that the sizing pass needs to be restarted.  (The
- * node also becomes an EXACTFU_SS.)  For all other characters, an EXACTF node
- * that gets converted to /ui (and EXACTFU) occupies the same amount of space,
- * so there is no need to resize [perl #125990]. */
-#define RExC_seen_unfolded_sharp_s (pRExC_state->seen_unfolded_sharp_s)
 
 #ifdef RE_TRACK_PATTERN_OFFSETS
 #  define RExC_offsets (RExC_rxi->u.offsets) /* I am not like the
                                                          others */
 #endif
 #define RExC_emit      (pRExC_state->emit)
-#define RExC_pass1     (pRExC_state->pass1)
 #define RExC_emit_start        (pRExC_state->emit_start)
-#define RExC_emit_bound        (pRExC_state->emit_bound)
 #define RExC_sawback   (pRExC_state->sawback)
 #define RExC_seen      (pRExC_state->seen)
 #define RExC_size      (pRExC_state->size)
@@ -257,7 +257,6 @@ struct RExC_state_t {
 #define RExC_npar      (pRExC_state->npar)
 #define RExC_total_parens      (pRExC_state->total_par)
 #define RExC_nestroot   (pRExC_state->nestroot)
-#define RExC_extralen  (pRExC_state->extralen)
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
 #define RExC_utf8      (pRExC_state->utf8)
 #define RExC_uni_semantics     (pRExC_state->uni_semantics)
@@ -284,7 +283,7 @@ struct RExC_state_t {
 #define RExC_study_started      (pRExC_state->study_started)
 #define RExC_warn_text (pRExC_state->warn_text)
 #define RExC_in_script_run      (pRExC_state->in_script_run)
-#define RExC_use_BRANCHJ        (!SIZE_ONLY && RExC_extralen)
+#define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
 
 /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
  * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -341,56 +340,66 @@ struct RExC_state_t {
 
 #define REQUIRE_UTF8(flagp) STMT_START {                                   \
                                      if (!UTF) {                           \
-                                         assert(PASS1);                    \
                                          *flagp = RESTART_PARSE|NEED_UTF8; \
                                          return 0;                         \
                                      }                                     \
                              } STMT_END
 
-/* Change from /d into /u rules, and restart the parse if we've already seen
- * something whose size would increase as a result, by setting *flagp and
- * returning 'restart_retval'.  RExC_uni_semantics is a flag that indicates
- * we've changed to /u during the parse.  */
+/* Change from /d into /u rules, and restart the parse.  RExC_uni_semantics is
+ * a flag that indicates we've changed to /u during the parse.  */
 #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
     STMT_START {                                                            \
             if (DEPENDS_SEMANTICS) {                                        \
-                assert(PASS1);                                              \
                 set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                 RExC_uni_semantics = 1;                                     \
-                if (RExC_seen_unfolded_sharp_s) {                           \
+                if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) {     \
+                    /* No need to restart the parse if we haven't seen      \
+                     * anything that differs between /u and /d, and no need \
+                     * to restart immediately if we're going to reparse     \
+                     * anyway to count parens */                            \
                     *flagp |= RESTART_PARSE;                                \
                     return restart_retval;                                  \
                 }                                                           \
             }                                                               \
     } STMT_END
 
-/* Executes a return statement with the value 'X', if 'flags' contains any of
- * 'RESTART_PARSE', 'NEED_UTF8', or 'extra'.  If so, *flagp is set to those
- * flags */
-#define RETURN_X_ON_RESTART_OR_FLAGS(X, flags, flagp, extra)                \
+#define BRANCH_MAX_OFFSET   U16_MAX
+#define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
+    STMT_START {                                                            \
+                RExC_use_BRANCHJ = 1;                                       \
+                if (LIKELY(RExC_total_parens >= 0)) {                       \
+                    /* No need to restart the parse immediately if we're    \
+                     * going to reparse anyway to count parens */           \
+                    *flagp |= RESTART_PARSE;                                \
+                    return restart_retval;                                  \
+                }                                                           \
+    } STMT_END
+
+#define REQUIRE_PARENS_PASS                                                 \
+    STMT_START {                                                            \
+                    if (RExC_total_parens == 0) RExC_total_parens = -1;     \
+    } STMT_END
+
+/* This is used to return failure (zero) early from the calling function if
+ * various flags in 'flags' are set.  Two flags always cause a return:
+ * 'RESTART_PARSE' and 'NEED_UTF8'.   'extra' can be used to specify any
+ * additional flags that should cause a return; 0 if none.  If the return will
+ * be done, '*flagp' is first set to be all of the flags that caused the
+ * return. */
+#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
     STMT_START {                                                            \
             if ((flags) & (RESTART_PARSE|NEED_UTF8|(extra))) {              \
                 *(flagp) = (flags) & (RESTART_PARSE|NEED_UTF8|(extra));     \
-                return X;                                                   \
+                return 0;                                                   \
             }                                                               \
     } STMT_END
 
-#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
-                    RETURN_X_ON_RESTART_OR_FLAGS(0,flags,flagp,extra)
-
-#define RETURN_X_ON_RESTART(X, flags,flagp)                                 \
-                        RETURN_X_ON_RESTART_OR_FLAGS( X, flags, flagp, 0)
-
-
-#define RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp,extra)                  \
-            if (*(flagp) & (RESTART_PARSE|(extra))) return 0
-
 #define MUST_RESTART(flags) ((flags) & (RESTART_PARSE))
 
 #define RETURN_FAIL_ON_RESTART(flags,flagp)                                 \
-                                    RETURN_X_ON_RESTART(0, flags,flagp)
+                        RETURN_FAIL_ON_RESTART_OR_FLAGS( flags, flagp, 0)
 #define RETURN_FAIL_ON_RESTART_FLAGP(flagp)                                 \
-                            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, 0)
+                                    if (MUST_RESTART(*(flagp))) return 0
 
 /* This converts the named class defined in regcomp.h to its equivalent class
  * number defined in handy.h. */
@@ -671,7 +680,7 @@ static const scan_data_t zero_scan_data = {
 #define REPORT_LOCATION_ARGS(xC)                                            \
     UTF8fARG(UTF,                                                           \
              (xI(xC) > eI) /* Don't run off end */                          \
-              ? eC - sC   /* Length before the <--HERE */                   \
+              ? eI - sI   /* Length before the <--HERE */                   \
               : ((xI_offset(xC) >= 0)                                       \
                  ? xI_offset(xC)                                            \
                  : (Perl_croak(aTHX_ "panic: %s: %d: negative offset: %"    \
@@ -693,6 +702,10 @@ static const scan_data_t zero_scan_data = {
     STMT_START {                                                           \
         if (RExC_rx_sv)                                                     \
             SAVEFREESV(RExC_rx_sv);                                         \
+        if (RExC_open_parens)                                               \
+            SAVEFREEPV(RExC_open_parens);                                   \
+        if (RExC_close_parens)                                              \
+            SAVEFREEPV(RExC_close_parens);                                  \
     } STMT_END
 
 /*
@@ -800,17 +813,22 @@ static const scan_data_t zero_scan_data = {
 #define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
 #define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
 
-/* Outputting warnings is generally deferred until the 2nd pass.  This is
- * because the first pass can be restarted, for example if the pattern has to
- * be converted to UTF-8.  If a warning had already been output earlier in the
- * pass, it would be re-output after the restart.  Pass 2 is never restarted,
- * so the problem simply goes away if we defer the output to that pass.  See
- * [perl #122671]. 'RExC_copy_start_in_constructed' being NULL is a flag to
- * not generate any warnings */
+/* Since a warning can be generated multiple times as the input is reparsed, we
+ * output it the first time we come to that point in the parse, but suppress it
+ * otherwise.  'RExC_copy_start_in_constructed' being NULL is a flag to not
+ * generate any warnings */
 #define TO_OUTPUT_WARNINGS(loc)                                         \
-  (PASS2 && RExC_copy_start_in_constructed)
+  (   RExC_copy_start_in_constructed                                    \
+   && ((xI(loc)) - RExC_precomp) > (Ptrdiff_t) RExC_latest_warn_offset)
 
-#define UPDATE_WARNINGS_LOC(loc)  NOOP
+/* After we've emitted a warning, we save the position in the input so we don't
+ * output it again */
+#define UPDATE_WARNINGS_LOC(loc)                                        \
+    STMT_START {                                                        \
+        if (TO_OUTPUT_WARNINGS(loc)) {                                  \
+            RExC_latest_warn_offset = (xI(loc)) - RExC_precomp;         \
+        }                                                               \
+    } STMT_END
 
 /* 'warns' is the output of the packWARNx macro used in 'code' */
 #define _WARN_HELPER(loc, warns, code)                                  \
@@ -941,11 +959,11 @@ static const scan_data_t zero_scan_data = {
 #define Set_Node_Offset_Length(node,offset,len)
 #define ProgLen(ri) ri->u.proglen
 #define SetProgLen(ri,x) ri->u.proglen = x
+#define Track_Code(code)
 #else
 #define ProgLen(ri) ri->u.offsets[0]
 #define SetProgLen(ri,x) ri->u.offsets[0] = x
 #define Set_Node_Offset_To_R(offset,byte) STMT_START {                 \
-    if (! SIZE_ONLY) {                                                 \
        MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n",         \
                    __LINE__, (int)(offset), (int)(byte)));             \
        if((offset) < 0) {                                              \
@@ -954,7 +972,6 @@ static const scan_data_t zero_scan_data = {
        } else {                                                        \
             RExC_offsets[2*(offset)-1] = (byte);                       \
        }                                                               \
-    }                                                                  \
 } STMT_END
 
 #define Set_Node_Offset(node,byte)                                      \
@@ -962,7 +979,6 @@ static const scan_data_t zero_scan_data = {
 #define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
 
 #define Set_Node_Length_To_R(node,len) STMT_START {                    \
-    if (! SIZE_ONLY) {                                                 \
        MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n",           \
                __LINE__, (int)(node), (int)(len)));                    \
        if((node) < 0) {                                                \
@@ -971,7 +987,6 @@ static const scan_data_t zero_scan_data = {
        } else {                                                        \
            RExC_offsets[2*(node)] = (len);                             \
        }                                                               \
-    }                                                                  \
 } STMT_END
 
 #define Set_Node_Length(node,len) \
@@ -987,6 +1002,8 @@ static const scan_data_t zero_scan_data = {
     Set_Node_Offset_To_R(REGNODE_OFFSET(node), (offset));      \
     Set_Node_Length_To_R(REGNODE_OFFSET(node), (len)); \
 } STMT_END
+
+#define Track_Code(code) STMT_START { code } STMT_END
 #endif
 
 #if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
@@ -3583,9 +3600,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                 optimisation.
              */
             while( optimize < jumper ) {
-#ifdef RE_TRACK_PATTERN_OFFSETS
-                mjd_nodelen += Node_Length((optimize));
-#endif
+                Track_Code( mjd_nodelen += Node_Length((optimize)); );
                 OP( optimize ) = OPTIMIZED;
                 Set_Node_Offset_Length(optimize, 0, 0);
                 optimize++;
@@ -5615,21 +5630,16 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                           (regnode_charclass *) scan);
                    break;
 
+                case NANYOFM:
                 case ANYOFM:
                   {
                     SV* cp_list = get_ANYOFM_contents(scan);
 
                     if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class,
-                                  cp_list,
-                                  FALSE /* don't invert */
-                                  );
+                        ssc_union(data->start_class, cp_list, invert);
                     }
                     else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class,
-                                         cp_list,
-                                         FALSE /* don't invert */
-                                         );
+                        ssc_intersection(data->start_class, cp_list, invert);
                     }
 
                     SvREFCNT_dec_NN(cp_list);
@@ -6375,7 +6385,8 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
     DEBUG_PARSE_r(Perl_re_printf( aTHX_
         "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
 
-    Newx(dst, *plen_p * 2 + 1, U8);
+    /* 1 for each byte + 1 for each byte that expands to two, + trailing NUL */
+    Newx(dst, *plen_p + variant_under_utf8_count(src, src + *plen_p) + 1, U8);
     d = dst;
 
     while (s < *plen_p) {
@@ -6945,7 +6956,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
      * properly wrapped with the right modifiers */
 
     bool has_p     = ((RExC_rx->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
-    bool has_charset = (get_regex_charset(RExC_rx->extflags)
+    bool has_charset = RExC_utf8 || (get_regex_charset(RExC_rx->extflags)
                                                 != REGEX_DEPENDS_CHARSET);
 
     /* The caret is output if there are any defaults: if not all the STD
@@ -6989,7 +7000,14 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
     }
     if (has_charset) {
         STRLEN len;
-        const char* const name = get_regex_charset_name(RExC_rx->extflags, &len);
+        const char* name;
+
+        name = get_regex_charset_name(RExC_rx->extflags, &len);
+        if strEQ(name, DEPENDS_PAT_MODS) {  /* /d under UTF-8 => /u */
+            assert(RExC_utf8);
+            name = UNICODE_PAT_MODS;
+            len = sizeof(UNICODE_PAT_MODS) - 1;
+        }
         Copy(name, p, len, char);
         p += len;
     }
@@ -7051,14 +7069,25 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
  * pm_flags field of the related PMOP. Currently we're only interested in
  * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
  *
- * We can't allocate space until we know how big the compiled form will be,
- * but we can't compile it (and thus know how big it is) until we've got a
- * place to put the code.  So we cheat:  we compile it twice, once with code
- * generation turned off and size counting turned on, and once "for real".
- * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the
- * code and thus invalidate pointers into it.  (Note that it has to be in
- * one piece because free() must be able to free it all.) [NB: not true in perl]
+ * For many years this code had an initial sizing pass that calculated
+ * (sometimes incorrectly, leading to security holes) the size needed for the
+ * compiled pattern.  That was changed by commit
+ * 7c932d07cab18751bfc7515b4320436273a459e2 in 5.29, which reallocs the size, a
+ * node at a time, as parsing goes along.  Patches welcome to fix any obsolete
+ * references to this sizing pass.
+ *
+ * Now, an initial crude guess as to the size needed is made, based on the
+ * length of the pattern.  Patches welcome to improve that guess.  That amount
+ * of space is malloc'd and then immediately freed, and then clawed back node
+ * by node.  This design is to minimze, to the extent possible, memory churn
+ * when doing the the reallocs.
+ *
+ * A separate parentheses counting pass may be needed in some cases.
+ * (Previously the sizing pass did this.)  Patches welcome to reduce the number
+ * of these cases.
+ *
+ * The existence of a sizing pass necessitated design decisions that are no
+ * longer needed.  There are potential areas of simplification.
  *
  * Beware that the optimization-preparation code in here knows about some
  * of the structure of the compiled regexp.  [I'll say.]
@@ -7070,7 +7099,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                     bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
 {
     REGEXP *Rx;         /* Capital 'R' means points to a REGEXP */
-    struct regexp *r;
     STRLEN plen;
     char *exp;
     regnode *scan;
@@ -7234,9 +7262,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* ignore the utf8ness if the pattern is 0 length */
     RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
 
-    RExC_rx_sv = NULL;                                                     \
-    RExC_uni_semantics = 0;
-    RExC_seen_unfolded_sharp_s = 0;
+    RExC_uni_semantics = RExC_utf8; /* UTF-8 implies unicode semantics;
+                                       otherwise we may find later this should
+                                       be 1 */
     RExC_contains_locale = 0;
     RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
     RExC_in_script_run = 0;
@@ -7245,7 +7273,17 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_frame_head= NULL;
     RExC_frame_last= NULL;
     RExC_frame_count= 0;
+    RExC_latest_warn_offset = 0;
+    RExC_use_BRANCHJ = 0;
     RExC_total_parens = 0;
+    RExC_open_parens = NULL;
+    RExC_close_parens = NULL;
+    RExC_paren_names = NULL;
+    RExC_size = 0;
+    RExC_seen_d_op = FALSE;
+#ifdef DEBUGGING
+    RExC_paren_name_list = NULL;
+#endif
 
     DEBUG_r({
         RExC_mysv1= sv_newmortal();
@@ -7289,19 +7327,21 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         return old_re;
     }
 
+    /* Allocate the pattern's SV */
+    RExC_rx_sv = Rx = (REGEXP*) newSV_type(SVt_REGEXP);
+    RExC_rx = ReANY(Rx);
+    if ( RExC_rx == NULL )
+        FAIL("Regexp out of space");
+
     rx_flags = orig_rx_flags;
 
-    if (   initial_charset == REGEX_DEPENDS_CHARSET
-        && (RExC_utf8 ||RExC_uni_semantics))
-    {
+    if (initial_charset == REGEX_DEPENDS_CHARSET && RExC_uni_semantics) {
 
        /* Set to use unicode semantics if the pattern is in utf8 and has the
         * 'depends' charset specified, as it means unicode when utf8  */
        set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
     }
 
-    RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
-    RExC_flags = rx_flags;
     RExC_pm_flags = pm_flags;
 
     if (runtime_code) {
@@ -7325,43 +7365,82 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_maxlen = 0;
     RExC_in_lookbehind = 0;
     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
-    RExC_extralen = 0;
 #ifdef EBCDIC
     RExC_recode_x_to_native = 0;
 #endif
     RExC_in_multi_char_class = 0;
 
-    /* First pass: determine size, legality. */
-    RExC_pass1 = TRUE;
-    RExC_parse = exp;
-    RExC_start = RExC_copy_start_in_constructed = exp;
-    RExC_end = exp + plen;
-    RExC_precomp_end = RExC_end;
-    RExC_naughty = 0;
-    RExC_npar = 1;
+    RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
+    RExC_precomp_end = RExC_end = exp + plen;
     RExC_nestroot = 0;
-    RExC_size = 0L;
-    RExC_emit = 1;
     RExC_whilem_seen = 0;
-    RExC_open_parens = 0;
-    RExC_close_parens = 0;
     RExC_end_op = NULL;
-    RExC_paren_names = NULL;
-#ifdef DEBUGGING
-    RExC_paren_name_list = NULL;
-#endif
     RExC_recurse = NULL;
     RExC_study_chunk_recursed = NULL;
     RExC_study_chunk_recursed_bytes= 0;
     RExC_recurse_count = 0;
     pRExC_state->code_index = 0;
 
-    /* We allocate scratch space as large as the largest node, for use in the
-     * first pass.  Since many functions return RExC_emit on success, and '0'
-     * if an error, RExC_emit must never be 0, so we set it to 1 and double
-     * the scratch space */
-    Newxc(RExC_emit_start, 2 * sizeof(regnode_ssc), char, regnode);
-    SAVEFREEPV(RExC_emit_start);
+    /* Initialize the string in the compiled pattern.  This is so that there is
+     * something to output if necessary */
+    set_regex_pv(pRExC_state, Rx);
+
+    DEBUG_PARSE_r({
+        Perl_re_printf( aTHX_
+            "Starting parse and generation\n");
+        RExC_lastnum=0;
+        RExC_lastparse=NULL;
+    });
+
+    /* Allocate space and zero-initialize. Note, the two step process
+       of zeroing when in debug mode, thus anything assigned has to
+       happen after that */
+    if (!  RExC_size) {
+
+        /* On the first pass of the parse, we guess how big this will be.  Then
+         * we grow in one operation to that amount and then give it back.  As
+         * we go along, we re-allocate what we need.
+         *
+         * XXX Currently the guess is essentially that the pattern will be an
+         * EXACT node with one byte input, one byte output.  This is crude, and
+         * better heuristics are welcome.
+         *
+         * On any subsequent passes, we guess what we actually computed in the
+         * latest earlier pass.  Such a pass probably didn't complete so is
+         * missing stuff.  We could improve those guesses by knowing where the
+         * parse stopped, and use the length so far plus apply the above
+         * assumption to what's left. */
+        RExC_size = STR_SZ(RExC_end - RExC_start);
+    }
+
+    Newxc(RExC_rxi, sizeof(regexp_internal) + RExC_size, char, regexp_internal);
+    if ( RExC_rxi == NULL )
+        FAIL("Regexp out of space");
+
+    Zero(RExC_rxi, sizeof(regexp_internal) + RExC_size, char);
+    RXi_SET( RExC_rx, RExC_rxi );
+
+    /* We start from 0 (over from 0 in the case this is a reparse.  The first
+     * node parsed will give back any excess memory we have allocated so far).
+     * */
+    RExC_size = 0;
+
+    /* non-zero initialization begins here */
+    RExC_rx->engine= eng;
+    RExC_rx->extflags = rx_flags;
+    RXp_COMPFLAGS(RExC_rx) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK;
+
+    if (pm_flags & PMf_IS_QR) {
+       RExC_rxi->code_blocks = pRExC_state->code_blocks;
+        if (RExC_rxi->code_blocks) {
+            RExC_rxi->code_blocks->refcnt++;
+        }
+    }
+
+    RExC_rx->intflags = 0;
+
+    RExC_flags = rx_flags;     /* don't let top level (?i) bleed */
+    RExC_parse = exp;
 
     /* This NUL is guaranteed because the pattern comes from an SV*, and the sv
      * code makes sure the final byte is an uncounted NUL.  But should this
@@ -7372,13 +7451,34 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      * etc.  So it is worth noting. */
     assert(*RExC_end == '\0');
 
-    DEBUG_PARSE_r(
-        Perl_re_printf( aTHX_  "Starting first pass (sizing)\n");
-        RExC_lastnum=0;
-        RExC_lastparse=NULL;
-    );
+    RExC_naughty = 0;
+    RExC_npar = 1;
+    RExC_emit_start = RExC_rxi->program;
+    pRExC_state->code_index = 0;
+
+    *((char*) RExC_emit_start) = (char) REG_MAGIC;
+    RExC_emit = 1;
+
+    /* Do the parse */
+    if (reg(pRExC_state, 0, &flags, 1)) {
+
+        /* Success!, But if RExC_total_parens < 0, we need to redo the parse
+         * knowing how many parens there actually are */
+        if (RExC_total_parens < 0) {
+            flags |= RESTART_PARSE;
+        }
+
+        /* We have that number in RExC_npar */
+        RExC_total_parens = RExC_npar;
+    }
+    else if (! MUST_RESTART(flags)) {
+       ReREFCNT_dec(Rx);
+        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile, flags=%#" UVxf, (UV) flags);
+    }
+
+    /* Here, we either have success, or we have to redo the parse for some reason */
+    if (MUST_RESTART(flags)) {
 
-    if (reg(pRExC_state, 0, &flags, 1) == 0) {
         /* It's possible to write a regexp in ascii that represents Unicode
         codepoints outside of the byte range, such as via \x{100}. If we
         detect such a sequence we have to convert the entire pattern to utf8
@@ -7387,142 +7487,96 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         at least some part of the pattern, and therefore must convert the whole
         thing.
         -- dmq */
-        if (MUST_RESTART(flags)) {
-            if (flags & NEED_UTF8) {
-                S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
-                pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
-                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse after upgrade\n"));
-            }
-            else {
-                DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
+        if (flags & NEED_UTF8) {
+
+            /* We have stored the offset of the final warning output so far.
+             * That must be adjusted.  Any variant characters between the start
+             * of the pattern and this warning count for 2 bytes in the final,
+             * so just add them again */
+            if (UNLIKELY(RExC_latest_warn_offset > 0)) {
+                RExC_latest_warn_offset +=
+                            variant_under_utf8_count((U8 *) exp, (U8 *) exp
+                                                + RExC_latest_warn_offset);
             }
+            S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
+            pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
+            DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse after upgrade\n"));
+        }
+        else {
+            DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
+        }
 
-            goto redo_parse;
+        if (RExC_total_parens > 0) {
+            /* Make enough room for all the known parens, and zero it */
+            Renew(RExC_open_parens, RExC_total_parens, regnode_offset);
+            Zero(RExC_open_parens, RExC_total_parens, regnode_offset);
+            RExC_open_parens[0] = 1;    /* +1 for REG_MAGIC */
+
+            Renew(RExC_close_parens, RExC_total_parens, regnode_offset);
+            Zero(RExC_close_parens, RExC_total_parens, regnode_offset);
+        }
+        else { /* Parse did not complete.  Reinitialize the parentheses
+                  structures */
+            RExC_total_parens = 0;
+            if (RExC_open_parens) {
+                Safefree(RExC_open_parens);
+                RExC_open_parens = NULL;
+            }
+            if (RExC_close_parens) {
+                Safefree(RExC_close_parens);
+                RExC_close_parens = NULL;
+            }
         }
-        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags);
-    }
 
-    DEBUG_PARSE_r({
-        Perl_re_printf( aTHX_
-            "Required size %" IVdf " nodes\n"
-            "Starting second pass (creation)\n",
-            (IV)RExC_size);
-        RExC_lastnum=0;
-        RExC_lastparse=NULL;
-    });
+        /* Clean up what we did in this parse */
+        SvREFCNT_dec_NN(RExC_rx_sv);
 
-    /* The first pass could have found things that force Unicode semantics */
-    if ((RExC_utf8 || RExC_uni_semantics)
-        && get_regex_charset(rx_flags) == REGEX_DEPENDS_CHARSET)
-    {
-       set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
+        goto redo_parse;
     }
 
-    /* Small enough for pointer-storage convention?
-       If extralen==0, this means that we will not need long jumps. */
-    if (RExC_size >= 0x10000L && RExC_extralen)
-        RExC_size += RExC_extralen;
-    else
-       RExC_extralen = 0;
-    if (RExC_whilem_seen > 15)
-       RExC_whilem_seen = 15;
-
-    /* Allocate space and zero-initialize. Note, the two step process
-       of zeroing when in debug mode, thus anything assigned has to
-       happen after that */
-    RExC_rx_sv = Rx = (REGEXP*) newSV_type(SVt_REGEXP);
-    RExC_rx = ReANY(Rx);
-    Newxc(RExC_rxi, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
-        char, regexp_internal);
-    if ( RExC_rx == NULL || RExC_rxi == NULL )
-       FAIL("Regexp out of space");
-#ifdef DEBUGGING
-    /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
-    Zero(RExC_rxi, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
-         char);
-#else
-    /* bulk initialize base fields with 0. */
-    Zero(RExC_rxi, sizeof(regexp_internal), char);
-#endif
+    /* Here, we have successfully parsed and generated the pattern's program
+     * for the regex engine.  We are ready to finish things up and look for
+     * optimizations. */
 
-    /* non-zero initialization begins here */
-    RXi_SET( RExC_rx, RExC_rxi );
-    RExC_rx->engine= eng;
-    RExC_rx->extflags = rx_flags;
-    RXp_COMPFLAGS(RExC_rx) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK;
+    /* Update the string to compile, with correct modifiers, etc */
+    set_regex_pv(pRExC_state, Rx);
 
-    if (pm_flags & PMf_IS_QR) {
-       RExC_rxi->code_blocks = pRExC_state->code_blocks;
-       if (RExC_rxi->code_blocks)
-            RExC_rxi->code_blocks->refcnt++;
-    }
+    RExC_rx->nparens = RExC_total_parens - 1;
 
-    /* Set up the string to compile, with correct modifiers, etc */
-    set_regex_pv(pRExC_state, Rx);
+    /* Uses the upper 4 bits of the FLAGS field, so keep within that size */
+    if (RExC_whilem_seen > 15)
+        RExC_whilem_seen = 15;
 
-    RExC_rx->intflags = 0;
-    RExC_total_parens = RExC_npar;
-    RExC_rx->nparens = RExC_total_parens - 1;  /* set early to validate backrefs */
+    DEBUG_PARSE_r({
+        Perl_re_printf( aTHX_
+            "Required size %" IVdf " nodes\n", (IV)RExC_size);
+        RExC_lastnum=0;
+        RExC_lastparse=NULL;
+    });
 
-    /* Useful during FAIL. */
 #ifdef RE_TRACK_PATTERN_OFFSETS
-    Newxz(RExC_offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
     DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
                           "%s %" UVuf " bytes for offset annotations.\n",
                           RExC_offsets ? "Got" : "Couldn't get",
-                          (UV)((2*RExC_size+1) * sizeof(U32))));
-#endif
-    SetProgLen(RExC_rxi, RExC_size);
-    RExC_rx_sv = Rx;
+                          (UV)((RExC_offsets[0] * 2 + 1))));
+    DEBUG_OFFSETS_r(if (RExC_offsets) {
+        const STRLEN len = RExC_offsets[0];
+        STRLEN i;
+        GET_RE_DEBUG_FLAGS_DECL;
+        Perl_re_printf( aTHX_
+                      "Offsets: [%" UVuf "]\n\t", (UV)RExC_offsets[0]);
+        for (i = 1; i <= len; i++) {
+            if (RExC_offsets[i*2-1] || RExC_offsets[i*2])
+                Perl_re_printf( aTHX_  "%" UVuf ":%" UVuf "[%" UVuf "] ",
+                (UV)i, (UV)RExC_offsets[i*2-1], (UV)RExC_offsets[i*2]);
+        }
+        Perl_re_printf( aTHX_  "\n");
+    });
 
-    /* Second pass: emit code. */
-    RExC_pass1 = FALSE;
-    RExC_flags = rx_flags;     /* don't let top level (?i) bleed */
-    RExC_pm_flags = pm_flags;
-    RExC_parse = exp;
-    RExC_end = exp + plen;
-    RExC_naughty = 0;
-    RExC_emit_start = RExC_rxi->program;
-    RExC_emit = 1;
-    RExC_emit_bound = RExC_rxi->program + RExC_size + 1;
-    pRExC_state->code_index = 0;
+#else
+    SetProgLen(RExC_rxi,RExC_size);
+#endif
 
-    *((char*) RExC_emit_start) = (char) REG_MAGIC;
-    /* setup various meta data about recursion, this all requires
-     * RExC_npar to be correctly set, and a bit later on we clear it */
-    if (RExC_seen & REG_RECURSE_SEEN) {
-        DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-            "%*s%*s Setting up open/close parens\n",
-                  22, "|    |", (int)(0 * 2 + 1), ""));
-
-        /* setup RExC_open_parens, which holds the address of each
-         * OPEN tag, and to make things simpler for the 0 index
-         * the start of the program - this is used later for offsets */
-        Newxz(RExC_open_parens, RExC_npar, regnode_offset);
-        SAVEFREEPV(RExC_open_parens);
-        RExC_open_parens[0] = RExC_emit;
-
-        /* setup RExC_close_parens, which holds the address of each
-         * CLOSE tag, and to make things simpler for the 0 index
-         * the end of the program - this is used later for offsets */
-        Newxz(RExC_close_parens, RExC_npar, regnode_offset);
-        SAVEFREEPV(RExC_close_parens);
-        /* we dont know where end op starts yet, so we dont
-         * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */
-
-        /* Note, RExC_npar is 1 + the number of parens in a pattern.
-         * So its 1 if there are no parens. */
-        RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
-                                         ((RExC_npar & 0x07) != 0);
-        Newx(RExC_study_chunk_recursed,
-             RExC_study_chunk_recursed_bytes * RExC_npar, U8);
-        SAVEFREEPV(RExC_study_chunk_recursed);
-    }
-    RExC_npar = 1;
-    if (reg(pRExC_state, 0, &flags, 1) == 0) {
-       ReREFCNT_dec(Rx);
-        Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags);
-    }
     DEBUG_OPTIMISE_r(
         Perl_re_printf( aTHX_  "Starting post parse optimization\n");
     );
@@ -7535,6 +7589,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         SAVEFREEPV(RExC_recurse);
     }
 
+    if (RExC_seen & REG_RECURSE_SEEN) {
+        /* Note, RExC_total_parens is 1 + the number of parens in a pattern.
+         * So its 1 if there are no parens. */
+        RExC_study_chunk_recursed_bytes= (RExC_total_parens >> 3) +
+                                         ((RExC_total_parens & 0x07) != 0);
+        Newx(RExC_study_chunk_recursed,
+             RExC_study_chunk_recursed_bytes * RExC_total_parens, U8);
+        SAVEFREEPV(RExC_study_chunk_recursed);
+    }
+
   reStudy:
     RExC_rx->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0;
     DEBUG_r(
@@ -8026,21 +8090,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         Perl_re_printf( aTHX_ "Final program:\n");
         regdump(RExC_rx);
     });
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    DEBUG_OFFSETS_r(if (RExC_offsets) {
-        const STRLEN len = RExC_offsets[0];
-        STRLEN i;
-        GET_RE_DEBUG_FLAGS_DECL;
-        Perl_re_printf( aTHX_
-                      "Offsets: [%" UVuf "]\n\t", (UV)RExC_offsets[0]);
-        for (i = 1; i <= len; i++) {
-            if (RExC_offsets[i*2-1] || RExC_offsets[i*2])
-                Perl_re_printf( aTHX_  "%" UVuf ":%" UVuf "[%" UVuf "] ",
-                (UV)i, (UV)RExC_offsets[i*2-1], (UV)RExC_offsets[i*2]);
-            }
-        Perl_re_printf( aTHX_  "\n");
-    });
-#endif
+
+    if (RExC_open_parens) {
+        Safefree(RExC_open_parens);
+        RExC_open_parens = NULL;
+    }
+    if (RExC_close_parens) {
+        Safefree(RExC_close_parens);
+        RExC_close_parens = NULL;
+    }
 
 #ifdef USE_ITHREADS
     /* under ithreads the ?pat? PMf_USED flag on the pmop is simulated
@@ -8524,31 +8582,36 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
                          character */
         vFAIL("Group name must start with a non-digit word character");
     }
-    if ( flags ) {
     sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
                             SVs_TEMP | (UTF ? SVf_UTF8 : 0));
-        if ( flags == REG_RSN_RETURN_NAME)
-            return sv_name;
-        else if (flags==REG_RSN_RETURN_DATA) {
-            HE *he_str = NULL;
-            SV *sv_dat = NULL;
-            if ( ! sv_name )      /* should not happen*/
-                Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
-            if (RExC_paren_names)
-                he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
-            if ( he_str )
-                sv_dat = HeVAL(he_str);
-            if ( ! sv_dat )     /* Didn't find group */
+    if ( flags == REG_RSN_RETURN_NAME)
+        return sv_name;
+    else if (flags==REG_RSN_RETURN_DATA) {
+        HE *he_str = NULL;
+        SV *sv_dat = NULL;
+        if ( ! sv_name )      /* should not happen*/
+            Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
+        if (RExC_paren_names)
+            he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
+        if ( he_str )
+            sv_dat = HeVAL(he_str);
+        if ( ! sv_dat ) {   /* Didn't find group */
+
+            /* It might be a forward reference; we can't fail until we
+                * know, by completing the parse to get all the groups, and
+                * then reparsing */
+            if (RExC_total_parens > 0)  {
                 vFAIL("Reference to nonexistent named group");
-            return sv_dat;
-        }
-        else {
-            Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
-                      (unsigned long) flags);
+            }
+            else {
+                REQUIRE_PARENS_PASS;
+            }
         }
-        NOT_REACHED; /* NOTREACHED */
+        return sv_dat;
     }
-    return NULL;
+
+    Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
+                     (unsigned long) flags);
 }
 
 #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
@@ -8568,10 +8631,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
     } else                                                      \
         Perl_re_printf( aTHX_ "%16s","");                       \
                                                                 \
-    if (SIZE_ONLY)                                              \
-       num = RExC_size + 1;                                     \
-    else                                                        \
-       num=REG_NODE_NUM(REGNODE_p(RExC_emit));                  \
+    num=REG_NODE_NUM(REGNODE_p(RExC_emit));                     \
     if (RExC_lastnum!=num)                                      \
        Perl_re_printf( aTHX_ "|%4d", num);                      \
     else                                                        \
@@ -10519,14 +10579,14 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
         RExC_parse++;
         has_use_defaults = TRUE;
         STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
-        set_regex_charset(&RExC_flags, (RExC_utf8 || RExC_uni_semantics)
+        set_regex_charset(&RExC_flags, (RExC_uni_semantics)
                                         ? REGEX_UNICODE_CHARSET
                                         : REGEX_DEPENDS_CHARSET);
     }
 
     cs = get_regex_charset(RExC_flags);
     if (cs == REGEX_DEPENDS_CHARSET
-        && (RExC_utf8 || RExC_uni_semantics))
+        && (RExC_uni_semantics))
     {
         cs = REGEX_UNICODE_CHARSET;
     }
@@ -10591,7 +10651,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  * pattern (or target, not known until runtime) are
                  * utf8, or something in the pattern indicates unicode
                  * semantics */
-                cs = (RExC_utf8 || RExC_uni_semantics)
+                cs = (RExC_uni_semantics)
                      ? REGEX_UNICODE_CHARSET
                      : REGEX_DEPENDS_CHARSET;
                 has_charset_modifier = DEPENDS_PAT_MOD;
@@ -10616,7 +10676,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 NOT_REACHED; /*NOTREACHED*/
             case ONCE_PAT_MOD: /* 'o' */
             case GLOBAL_PAT_MOD: /* 'g' */
-                if (PASS2 && ckWARN(WARN_REGEXP)) {
+                if (ckWARN(WARN_REGEXP)) {
                     const I32 wflagbit = *RExC_parse == 'o'
                                          ? WASTED_O
                                          : WASTED_G;
@@ -10636,7 +10696,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 break;
 
             case CONTINUE_PAT_MOD: /* 'c' */
-                if (PASS2 && ckWARN(WARN_REGEXP)) {
+                if (ckWARN(WARN_REGEXP)) {
                     if (! (wastedflags & WASTED_C) ) {
                         wastedflags |= WASTED_GC;
                        /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
@@ -10723,9 +10783,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
     regnode_offset ret;
     char* name_start = RExC_parse;
     U32 num = 0;
-    SV *sv_dat = reg_scan_name(pRExC_state, SIZE_ONLY
-                                            ? REG_RSN_RETURN_NULL
-                                            : REG_RSN_RETURN_DATA);
+    SV *sv_dat = reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF;
@@ -10766,7 +10824,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
  *
  * Returns 0 otherwise, with *flagp set to indicate why:
  *  TRYAGAIN        at the end of (?) that only sets flags.
- *  RESTART_PARSE   if the sizing scan needs to be restarted, or'd with
+ *  RESTART_PARSE   if the parse needs to be restarted, or'd with
  *                  NEED_UTF8 if the pattern needs to be upgraded to UTF-8.
  *  Otherwise would only return 0 if regbranch() returns 0, which cannot
  *  happen.  */
@@ -11111,20 +11169,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 ret = reg2Lanode(pRExC_state, op, 0, internal_argval);
             }
             RExC_seen |= REG_VERBARG_SEEN;
-            if ( ! SIZE_ONLY ) {
-                if (start_arg) {
-                    SV *sv = newSVpvn( start_arg,
-                                       RExC_parse - start_arg);
-                    ARG(REGNODE_p(ret)) = add_data( pRExC_state,
-                                         STR_WITH_LEN("S"));
-                    RExC_rxi->data->data[ARG(REGNODE_p(ret))]=(void*)sv;
-                    FLAGS(REGNODE_p(ret)) = 1;
-                } else {
-                    FLAGS(REGNODE_p(ret)) = 0;
-                }
-                if ( internal_argval != -1 )
-                    ARG2L_SET(REGNODE_p(ret), internal_argval);
+            if (start_arg) {
+                SV *sv = newSVpvn( start_arg,
+                                    RExC_parse - start_arg);
+                ARG(REGNODE_p(ret)) = add_data( pRExC_state,
+                                        STR_WITH_LEN("S"));
+                RExC_rxi->data->data[ARG(REGNODE_p(ret))]=(void*)sv;
+                FLAGS(REGNODE_p(ret)) = 1;
+            } else {
+                FLAGS(REGNODE_p(ret)) = 0;
             }
+            if ( internal_argval != -1 )
+                ARG2L_SET(REGNODE_p(ret), internal_argval);
            nextchar(pRExC_state);
            return ret;
         }
@@ -11185,10 +11241,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 /* FALLTHROUGH */
             case '\'':          /* (?'...') */
                     name_start = RExC_parse;
-                    svname = reg_scan_name(pRExC_state,
-                        SIZE_ONLY    /* reverse test from the others */
-                        ? REG_RSN_RETURN_NAME
-                        : REG_RSN_RETURN_NULL);
+                    svname = reg_scan_name(pRExC_state, REG_RSN_RETURN_NAME);
                    if (   RExC_parse == name_start
                         || RExC_parse >= RExC_end
                         || *RExC_parse != paren)
@@ -11196,7 +11249,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                        vFAIL2("Sequence (?%c... not terminated",
                            paren=='>' ? '<' : paren);
                     }
-                   if (SIZE_ONLY) {
+                   {
                        HE *he_str;
                        SV *sv_dat = NULL;
                         if (!svname) /* shouldn't happen */
@@ -11288,6 +11341,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                   buffers in alternations share the same numbers */
                paren = ':';
                after_freeze = freeze_paren = RExC_npar;
+
+                /* XXX This construct currently requires an extra pass.
+                 * Investigation would be required to see if that could be
+                 * changed */
+                REQUIRE_PARENS_PASS;
                break;
            case ':':           /* (?:...) */
            case '>':           /* (?>...) */
@@ -11302,6 +11360,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                    FAIL("Sequence (?R) not terminated");
                 num = 0;
                 RExC_seen |= REG_RECURSE_SEEN;
+
+                /* XXX These constructs currently require an extra pass.
+                 * It probably could be changed */
+                REQUIRE_PARENS_PASS;
+
                *flagp |= POSTPONED;
                 goto gen_recurse_regop;
                /*notreached*/
@@ -11310,9 +11373,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 parse_start = RExC_parse - 1;
               named_recursion:
                 {
-                   SV *sv_dat = reg_scan_name(pRExC_state,
-                       SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
-                    num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
+                    SV *sv_dat = reg_scan_name(pRExC_state,
+                                               REG_RSN_RETURN_DATA);
+                   num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                 }
                 if (RExC_parse >= RExC_end || *RExC_parse != ')')
                     vFAIL("Sequence (?&... not terminated");
@@ -11374,8 +11437,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     */
                     num = RExC_npar + num;
                     if (num < 1)  {
-                        RExC_parse++;
-                        vFAIL("Reference to nonexistent group");
+
+                        /* It might be a forward reference; we can't fail until
+                         * we know, by completing the parse to get all the
+                         * groups, and then reparsing */
+                        if (RExC_total_parens > 0)  {
+                            RExC_parse++;
+                            vFAIL("Reference to nonexistent group");
+                        }
+                        else {
+                            REQUIRE_PARENS_PASS;
+                        }
                     }
                 } else if ( paren == '+' ) {
                     num = RExC_npar + num - 1;
@@ -11390,18 +11462,27 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  */
 
                 ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
-                if (!SIZE_ONLY) {
-                   if (num > (I32)RExC_rx->nparens) {
-                       RExC_parse++;
-                       vFAIL("Reference to nonexistent group");
-                   }
-                   RExC_recurse_count++;
-                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
-                              22, "|    |", (int)(depth * 2 + 1), "",
-                              (UV)ARG(REGNODE_p(ret)),
-                              (IV)ARG2L(REGNODE_p(ret))));
+                if (num >= RExC_npar) {
+
+                    /* It might be a forward reference; we can't fail until we
+                     * know, by completing the parse to get all the groups, and
+                     * then reparsing */
+                    if (RExC_total_parens > 0)  {
+                        if (num >= RExC_total_parens) {
+                            RExC_parse++;
+                            vFAIL("Reference to nonexistent group");
+                        }
+                    }
+                    else {
+                        REQUIRE_PARENS_PASS;
+                    }
                 }
+                RExC_recurse_count++;
+                DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                    "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
+                            22, "|    |", (int)(depth * 2 + 1), "",
+                            (UV)ARG(REGNODE_p(ret)),
+                            (IV)ARG2L(REGNODE_p(ret))));
                 RExC_seen |= REG_RECURSE_SEEN;
 
                 Set_Node_Length(REGNODE_p(ret),
@@ -11451,20 +11532,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                /* this is a pre-compiled code block (?{...}) */
                cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
                RExC_parse = RExC_start + cb->end;
-               if (!SIZE_ONLY) {
                o = cb->block;
-                   if (cb->src_regex) {
-                       n = add_data(pRExC_state, STR_WITH_LEN("rl"));
-                       RExC_rxi->data->data[n] =
-                           (void*)SvREFCNT_inc((SV*)cb->src_regex);
-                       RExC_rxi->data->data[n+1] = (void*)o;
-                   }
-                   else {
-                       n = add_data(pRExC_state,
-                              (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1);
-                       RExC_rxi->data->data[n] = (void*)o;
-                   }
-               }
+                if (cb->src_regex) {
+                    n = add_data(pRExC_state, STR_WITH_LEN("rl"));
+                    RExC_rxi->data->data[n] =
+                        (void*)SvREFCNT_inc((SV*)cb->src_regex);
+                    RExC_rxi->data->data[n+1] = (void*)o;
+                }
+                else {
+                    n = add_data(pRExC_state,
+                            (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1);
+                    RExC_rxi->data->data[n] = (void*)o;
+                }
                pRExC_state->code_index++;
                nextchar(pRExC_state);
 
@@ -11479,9 +11558,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                         * return value */
                                        RExC_flags & RXf_PMf_COMPILETIME
                                       );
-                   if (!SIZE_ONLY) {
-                       FLAGS(REGNODE_p(ret)) = 2;
-                    }
+                    FLAGS(REGNODE_p(ret)) = 2;
                     REGTAIL(pRExC_state, ret, eval);
                     /* deal with the length of this later - MJD */
                    return ret;
@@ -11531,8 +11608,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     regnode_offset tail;
 
                     ret = reg_node(pRExC_state, LOGICAL);
-                    if (!SIZE_ONLY)
-                        FLAGS(REGNODE_p(ret)) = 1;
+                    FLAGS(REGNODE_p(ret)) = 1;
 
                     tail = reg(pRExC_state, 1, &flag, depth+1);
                     RETURN_FAIL_ON_RESTART(flag, flagp);
@@ -11545,8 +11621,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                    char ch = RExC_parse[0] == '<' ? '>' : '\'';
                    char *name_start= RExC_parse++;
                    U32 num = 0;
-                   SV *sv_dat=reg_scan_name(pRExC_state,
-                       SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+                   SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
                    if (   RExC_parse == name_start
                         || RExC_parse >= RExC_end
                         || *RExC_parse != ch)
@@ -11597,20 +11672,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                        SV *sv_dat;
                        RExC_parse++;
                        sv_dat = reg_scan_name(pRExC_state,
-                            SIZE_ONLY
-                            ? REG_RSN_RETURN_NULL
-                            : REG_RSN_RETURN_DATA);
-
-                        /* we should only have a false sv_dat when
-                         * SIZE_ONLY is true, and we always have false
-                         * sv_dat when SIZE_ONLY is true.
-                         * reg_scan_name() will VFAIL() if the name is
-                         * unknown when SIZE_ONLY is false, and otherwise
-                         * will return something, and when SIZE_ONLY is
-                         * true, reg_scan_name() just parses the string,
-                         * and doesnt return anything. (in theory) */
-                        assert(SIZE_ONLY ? !sv_dat : !!sv_dat);
-
+                                               REG_RSN_RETURN_DATA);
                         if (sv_dat)
                             parno = 1 + *((I32 *)SvPVX(sv_dat));
                    }
@@ -11690,9 +11752,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                    }
                    else
                         REGTAIL(pRExC_state, ret, ender);
+#if 0  /* Removing this doesn't cause failures in the test suite -- khw */
                     RExC_size++; /* XXX WHY do we need this?!!
                                     For large programs it seems to be required
                                     but I can't figure out why. -- dmq*/
+#endif
                    return ret;
                }
                 RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
@@ -11734,20 +11798,48 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
          capturing_parens:
            parno = RExC_npar;
            RExC_npar++;
+            if (RExC_total_parens <= 0) {
+                /* If we are in our first pass through (and maybe only pass),
+                 * we  need to allocate memory for the capturing parentheses
+                 * data structures.  Since we start at npar=1, when it reaches
+                 * 2, for the first time it has something to put in it.  Above
+                 * 2 means we extend what we already have */
+                if (RExC_npar == 2) {
+                    /* setup RExC_open_parens, which holds the address of each
+                     * OPEN tag, and to make things simpler for the 0 index the
+                     * start of the program - this is used later for offsets */
+                    Newxz(RExC_open_parens, RExC_npar, regnode_offset);
+                    RExC_open_parens[0] = 1;    /* +1 for REG_MAGIC */
+
+                    /* setup RExC_close_parens, which holds the address of each
+                     * CLOSE tag, and to make things simpler for the 0 index
+                     * the end of the program - this is used later for offsets
+                     * */
+                    Newxz(RExC_close_parens, RExC_npar, regnode_offset);
+                    /* we dont know where end op starts yet, so we dont need to
+                     * set RExC_close_parens[0] like we do RExC_open_parens[0]
+                     * above */
+                }
+                else {
+                    Renew(RExC_open_parens, RExC_npar, regnode_offset);
+                    Zero(RExC_open_parens + RExC_npar - 1, 1, regnode_offset);
+
+                    Renew(RExC_close_parens, RExC_npar, regnode_offset);
+                    Zero(RExC_close_parens + RExC_npar - 1, 1, regnode_offset);
+                }
+            }
 
            ret = reganode(pRExC_state, OPEN, parno);
-           if (!SIZE_ONLY ){
-               if (!RExC_nestroot)
-                   RExC_nestroot = parno;
-                if (RExC_open_parens && !RExC_open_parens[parno])
-               {
-                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting open paren #%" IVdf " to %d\n",
-                        22, "|    |", (int)(depth * 2 + 1), "",
-                       (IV)parno, REG_NODE_NUM(REGNODE_p(ret))));
-                    RExC_open_parens[parno]= ret;
-               }
-           }
+            if (!RExC_nestroot)
+                RExC_nestroot = parno;
+            if (RExC_open_parens && !RExC_open_parens[parno])
+            {
+                DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                    "%*s%*s Setting open paren #%" IVdf " to %d\n",
+                    22, "|    |", (int)(depth * 2 + 1), "",
+                    (IV)parno, REG_NODE_NUM(REGNODE_p(ret))));
+                RExC_open_parens[parno]= ret;
+            }
 
             Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
             Set_Node_Offset(REGNODE_p(ret), RExC_parse); /* MJD */
@@ -11783,8 +11875,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             Set_Node_Offset_To_R(br, parse_start-RExC_start);
         }
        have_branch = 1;
-       if (SIZE_ONLY)
-           RExC_extralen += 1;         /* For BRANCHJ-BRANCH. */
     }
     else if (paren == ':') {
        *flagp |= flags&SIMPLE;
@@ -11805,8 +11895,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
                     ender);
        }
-       if (SIZE_ONLY)
-           RExC_extralen += 2;         /* Account for LONGJMP. */
        nextchar(pRExC_state);
        if (freeze_paren) {
            if (RExC_npar > after_freeze)
@@ -11866,21 +11954,19 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
            break;
        case 0:
            ender = reg_node(pRExC_state, END);
-           if (!SIZE_ONLY) {
-                assert(!RExC_end_op); /* there can only be one! */
-                RExC_end_op = REGNODE_p(ender);
-                if (RExC_close_parens) {
-                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting close paren #0 (END) to %d\n",
-                        22, "|    |", (int)(depth * 2 + 1), "",
-                        REG_NODE_NUM(REGNODE_p(ender))));
+            assert(!RExC_end_op); /* there can only be one! */
+            RExC_end_op = REGNODE_p(ender);
+            if (RExC_close_parens) {
+                DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                    "%*s%*s Setting close paren #0 (END) to %d\n",
+                    22, "|    |", (int)(depth * 2 + 1), "",
+                    REG_NODE_NUM(REGNODE_p(ender))));
 
-                    RExC_close_parens[0]= ender;
-                }
+                RExC_close_parens[0]= ender;
             }
            break;
        }
-        DEBUG_PARSE_r(if (!SIZE_ONLY) {
+        DEBUG_PARSE_r(
             DEBUG_PARSE_MSG("lsbr");
             regprop(RExC_rx, RExC_mysv1, REGNODE_p(lastbr), NULL, pRExC_state);
             regprop(RExC_rx, RExC_mysv2, REGNODE_p(ender), NULL, pRExC_state);
@@ -11891,10 +11977,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                           (IV)REG_NODE_NUM(REGNODE_p(ender)),
                           (IV)(ender - lastbr)
             );
-        });
+        );
         REGTAIL(pRExC_state, lastbr, ender);
 
-       if (have_branch && !SIZE_ONLY) {
+       if (have_branch) {
             char is_nothing= 1;
            if (depth==1)
                 RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN;
@@ -11926,7 +12012,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 br= PL_regkind[OP(ret_as_regnode)] != BRANCH
                                ? regnext(ret_as_regnode)
                                : ret_as_regnode;
-                DEBUG_PARSE_r(if (!SIZE_ONLY) {
+                DEBUG_PARSE_r(
                     DEBUG_PARSE_MSG("NADA");
                     regprop(RExC_rx, RExC_mysv1, ret_as_regnode,
                                      NULL, pRExC_state);
@@ -11939,7 +12025,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                   (IV)REG_NODE_NUM(REGNODE_p(ender)),
                                   (IV)(ender - ret)
                     );
-                });
+                );
                 OP(br)= NOTHING;
                 if (OP(REGNODE_p(ender)) == TAIL) {
                     NEXT_OFF(br)= 0;
@@ -12016,7 +12102,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  * On success, returns the offset at which any next node should be placed into
  * the regex engine program being compiled.
  *
- * Returns 0 otherwise, setting flagp to RESTART_PARSE if the sizing scan needs
+ * Returns 0 otherwise, setting flagp to RESTART_PARSE if the parse needs
  * to be restarted, or'd with NEED_UTF8 if the pattern needs to be upgraded to
  * UTF-8
  */
@@ -12044,9 +12130,6 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         }
     }
 
-    if (!first && SIZE_ONLY)
-       RExC_extralen += 1;                     /* BRANCHJ */
-
     *flagp = WORST;                    /* Tentatively. */
 
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
@@ -12069,6 +12152,13 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
            /* FIXME adding one for every branch after the first is probably
             * excessive now we have TRIE support. (hv) */
            MARK_NAUGHTY(1);
+            if (     chain > (SSize_t) BRANCH_MAX_OFFSET
+                && ! RExC_use_BRANCHJ)
+            {
+                /* XXX We could just redo this branch, but figuring out what
+                 * bookkeeping needs to be reset is a pain */
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
             REGTAIL(pRExC_state, chain, latest);
        }
        chain = latest;
@@ -12100,7 +12190,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
  *
  * Returns 0 otherwise, with *flagp set to indicate why:
  *  TRYAGAIN        if regatom() returns 0 with TRYAGAIN.
- *  RESTART_PARSE   if the sizing scan needs to be restarted, or'd with
+ *  RESTART_PARSE   if the parse needs to be restarted, or'd with
  *                  NEED_UTF8 if the pattern needs to be upgraded to UTF-8.
  */
 STATIC regnode_offset
@@ -12185,11 +12275,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             if (max < min) {    /* If can't match, warn and optimize to fail
                                    unconditionally */
                 reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
-                if (PASS2) {
-                    ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
-                    NEXT_OFF(REGNODE_p(orig_emit)) =
-                                        regarglen[OPFAIL] + NODE_STEP_REGNODE;
-                }
+                ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+                NEXT_OFF(REGNODE_p(orig_emit)) =
+                                    regarglen[OPFAIL] + NODE_STEP_REGNODE;
                 return ret;
             }
             else if (min == max && *RExC_parse == '?')
@@ -12238,8 +12326,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
                                                        LONGJMP. */
                 REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
-               if (SIZE_ONLY)
-                   RExC_whilem_seen++, RExC_extralen += 3;
+                RExC_whilem_seen++;
                 MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
            }
            FLAGS(REGNODE_p(ret)) = 0;
@@ -12248,10 +12335,8 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                *flagp = WORST;
            if (max > 0)
                *flagp |= HASWIDTH;
-           if (!SIZE_ONLY) {
-               ARG1_SET(REGNODE_p(ret), (U16)min);
-               ARG2_SET(REGNODE_p(ret), (U16)max);
-           }
+            ARG1_SET(REGNODE_p(ret), (U16)min);
+            ARG2_SET(REGNODE_p(ret), (U16)max);
             if (max == REG_INFTY)
                 RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
 
@@ -12300,7 +12385,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        goto do_curly;
     }
   nest_check:
-    if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
+    if (!(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
        ckWARN2reg(RExC_parse,
                   "%" UTF8f " matches null string many times",
                   UTF8fARG(UTF, (RExC_parse >= origparse
@@ -12393,7 +12478,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * function calling S_reg().
   *
   * The final possibility is that it is premature to be calling this function;
-  * that pass1 needs to be restarted.  This can happen when this changes from
+  * the parse needs to be restarted.  This can happen when this changes from
   * /d to /u rules, or when the pattern needs to be upgraded to UTF-8.  The
   * latter occurs only when the fourth possibility would otherwise be in
   * effect, and is because one of those code points requires the pattern to be
@@ -12675,7 +12760,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     SvREFCNT_dec_NN(substitute_parse);
 
     if (! *node_p) {
-        RETURN_X_ON_RESTART(FALSE, flags, flagp);
+        RETURN_FAIL_ON_RESTART(flags, flagp);
         FAIL2("panic: reg returned failure to grok_bslash_N, flags=%#" UVxf,
             (UV) flags);
     }
@@ -12726,8 +12811,7 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
      *
      * If <len> is zero, the function assumes that the node is to contain only
      * the single character given by <code_point> and calculates what <len>
-     * should be.  In pass 1, it sizes the node appropriately.  In pass 2, it
-     * additionally will populate the node's STRING with <code_point> or its
+     * should be.  It populates the node's STRING with <code_point> or its
      * fold if folding.
      *
      * In both cases <*flagp> is appropriately set
@@ -12746,22 +12830,13 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
 
     PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
 
-    /* Don't bother to check for downgrading in PASS1, as it doesn't make any
-     * sizing difference, and is extra work that is thrown away */
-    if (downgradable && ! PASS2) {
-        downgradable = FALSE;
-    }
-
     if (! len_passed_in) {
         if (UTF) {
             if (UVCHR_IS_INVARIANT(code_point)) {
                 if (LOC || ! FOLD) {    /* /l defers folding until runtime */
                     *character = (U8) code_point;
                 }
-                else { /* Here is /i and not /l. (toFOLD() is defined on just
-                          ASCII, which isn't the same thing as INVARIANT on
-                          EBCDIC, but it works there, as the extra invariants
-                          fold to themselves) */
+                else { /* Here is /i and not /l. */
                     *character = toFOLD((U8) code_point);
 
                     /* We can downgrade to an EXACT node if this character
@@ -12778,7 +12853,7 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
                 }
                 len = 1;
             }
-            else if (FOLD && (! LOC
+            else if (FOLD && (   ! LOC
                               || ! is_PROBLEMATIC_LOCALE_FOLD_cp(code_point)))
             {   /* Folding, and ok to do so now */
                 UV folded = _to_uni_fold_flags(
@@ -12850,15 +12925,14 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
         }
     }
 
-    if (SIZE_ONLY) {
-        RExC_size += STR_SZ(len);
+    if (downgradable) {
+        change_engine_size(pRExC_state, STR_SZ(len));
     }
-    else {
-        RExC_emit += STR_SZ(len);
-        STR_LEN(REGNODE_p(node)) = len;
-        if (! len_passed_in) {
-            Copy((char *) character, STRING(REGNODE_p(node)), len, char);
-        }
+
+    RExC_emit += STR_SZ(len);
+    STR_LEN(REGNODE_p(node)) = len;
+    if (! len_passed_in) {
+        Copy((char *) character, STRING(REGNODE_p(node)), len, char);
     }
 
     *flagp |= HASWIDTH;
@@ -12876,8 +12950,7 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
         *flagp |= SIMPLE;
     }
 
-    /* The OP may not be well defined in PASS1 */
-    if (PASS2 && OP(REGNODE_p(node)) == EXACTFL) {
+    if (OP(REGNODE_p(node)) == EXACTFL) {
         RExC_contains_locale = 1;
     }
 }
@@ -12961,7 +13034,7 @@ S_backref_value(char *p, char *e)
    at which any  next regnode should be placed.
 
    Returns 0, setting *flagp to TRYAGAIN if reg() returns 0 with TRYAGAIN.
-   Returns 0, setting *flagp to RESTART_PARSE if the sizing scan needs to be
+   Returns 0, setting *flagp to RESTART_PARSE if the parse needs to be
    restarted, or'd with NEED_UTF8 if the pattern needs to be upgraded to UTF-8
    Otherwise does not return 0.
 
@@ -13072,10 +13145,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        FALSE, /* don't silence non-portable warnings. */
                        (bool) RExC_strict,
                        TRUE, /* Allow an optimized regnode result */
-                       NULL,
                        NULL);
         if (ret == 0) {
-            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, NEED_UTF8);
+            RETURN_FAIL_ON_RESTART_FLAGP(flagp);
             FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                   (UV) *flagp);
         }
@@ -13124,7 +13196,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        /* Special Escapes
 
           This switch handles escape sequences that resolve to some kind
-          of special regop and not to literal text. Escape sequnces that
+          of special regop and not to literal text. Escape sequences that
           resolve to literal text are handled below in the switch marked
           "Literal Escapes".
 
@@ -13140,10 +13212,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            RExC_seen_zerolen++;
            ret = reg_node(pRExC_state, SBOL);
             /* SBOL is shared with /^/ so we set the flags so we can tell
-             * /\A/ from /^/ in split. We check ret because first pass we
-             * have no regop struct to set the flags on. */
-            if (PASS2)
-                FLAGS(REGNODE_p(ret)) = 1;
+             * /\A/ from /^/ in split. */
+            FLAGS(REGNODE_p(ret)) = 1;
            *flagp |= SIMPLE;
            goto finish_meta_pat;
        case 'G':
@@ -13196,7 +13266,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             RExC_seen |= REG_LOOKBEHIND_SEEN;
            op = BOUND + charset;
 
-            if (op == BOUNDL) {
+            if (op == BOUND) {
+                RExC_seen_d_op = TRUE;
+            }
+            else if (op == BOUNDL) {
                 RExC_contains_locale = 1;
             }
 
@@ -13204,7 +13277,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            *flagp |= SIMPLE;
            if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
                 FLAGS(REGNODE_p(ret)) = TRADITIONAL_BOUND;
-                if (PASS2 && op > BOUNDA) {  /* /aa is same as /a */
+                if (op > BOUNDA) {  /* /aa is same as /a */
                     OP(REGNODE_p(ret)) = BOUNDA;
                 }
             }
@@ -13272,7 +13345,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 RExC_parse = endbrace;
                 REQUIRE_UNI_RULES(flagp, 0);
 
-                if (PASS2 && op >= BOUNDA) {  /* /aa is same as /a */
+                if (op >= BOUNDA) {  /* /aa is same as /a */
                     OP(REGNODE_p(ret)) = BOUNDU;
                     length += 4;
 
@@ -13288,7 +13361,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 }
            }
 
-            if (PASS2 && invert) {
+            if (invert) {
                 OP(REGNODE_p(ret)) += NBOUND - BOUND;
             }
            goto finish_meta_pat;
@@ -13345,6 +13418,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else if (op == POSIXL) {
                 RExC_contains_locale = 1;
             }
+            else if (op == POSIXD) {
+                RExC_seen_d_op = TRUE;
+            }
 
           join_posix_op_known:
 
@@ -13353,9 +13429,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             }
 
            ret = reg_node(pRExC_state, op);
-            if (! SIZE_ONLY) {
-                FLAGS(REGNODE_p(ret)) = namedclass_to_classnum(arg);
-            }
+            FLAGS(REGNODE_p(ret)) = namedclass_to_classnum(arg);
 
            *flagp |= HASWIDTH|SIMPLE;
             /* FALLTHROUGH */
@@ -13382,7 +13456,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                      non-portables */
                            (bool) RExC_strict,
                            TRUE, /* Allow an optimized regnode result */
-                           NULL,
                            NULL);
             RETURN_FAIL_ON_RESTART_FLAGP(flagp);
             /* regclass() can only return RESTART_PARSE and NEED_UTF8 if
@@ -13539,9 +13612,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         vFAIL("Unterminated \\g{...} pattern");
                     RExC_parse++;
                 }
-                if (!SIZE_ONLY) {
-                    if (num > (I32)RExC_rx->nparens)
-                        vFAIL("Reference to nonexistent group");
+                if (num >= (I32)RExC_npar) {
+
+                    /* It might be a forward reference; we can't fail until we
+                     * know, by completing the parse to get all the groups, and
+                     * then reparsing */
+                    if (RExC_total_parens > 0)  {
+                        if (num >= RExC_total_parens)  {
+                            vFAIL("Reference to nonexistent group");
+                        }
+                    }
+                    else {
+                        REQUIRE_PARENS_PASS;
+                    }
                 }
                 RExC_sawback = 1;
                 ret = reganode(pRExC_state,
@@ -13555,6 +13638,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                        ? REFFL
                                        : REFF),
                                 num);
+                if (OP(REGNODE_p(ret)) == REFF) {
+                    RExC_seen_d_op = TRUE;
+                }
                 *flagp |= HASWIDTH;
 
                 /* override incorrect value set in reganode MJD */
@@ -13607,7 +13693,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 /* This allows us to fill a node with just enough spare so that if the final
  * character folds, its expansion is guaranteed to fit */
 #define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
-           char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE+1];
 
            char *s0;
            U8 upper_parse = MAX_NODE_STRING_SIZE;
@@ -13615,14 +13700,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             /* We start out as an EXACT node, even if under /i, until we find a
              * character which is in a fold.  The algorithm now segregates into
              * separate nodes, characters that fold from those that don't under
-             * /i.  (This hopefull will create nodes that are fixed strings
-             * even under /i, giving the optimizer something to grab onto to.)
+             * /i.  (This hopefully will create nodes that are fixed strings
+             * even under /i, giving the optimizer something to grab on to.)
              * So, if a node has something in it and the next character is in
              * the opposite category, that node is closed up, and the function
              * returns.  Then regatom is called again, and a new node is
              * created for the new category. */
             U8 node_type = EXACT;
 
+            /* Assume the node will be fully used; the excess is given back at
+             * the end.  We can't make any other length assumptions, as a byte
+             * input sequence could shrink down. */
+            Ptrdiff_t initial_size = STR_SZ(256);
+
             bool next_is_quantifier;
             char * oldp = NULL;
 
@@ -13634,21 +13724,17 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              * Similarly, we can convert EXACTFL nodes to EXACTFLU8 if they
              * contain only above-Latin1 characters (hence must be in UTF8),
              * which don't participate in folds with Latin1-range characters,
-             * as the latter's folds aren't known until runtime.  (We don't
-             * need to figure this out until pass 2) */
-            bool maybe_exactfu = PASS2;
+             * as the latter's folds aren't known until runtime. */
+            bool maybe_exactfu = TRUE;
 
-            /* To see if RExC_uni_semantics changes during parsing of the node.
-             * */
-            bool uni_semantics_at_node_start;
-
-            /* The node_type may change below, but since the size of the node
-             * doesn't change, it works */
-           ret = reg_node(pRExC_state, node_type);
+            /* Allocate an EXACT node.  The node_type may change below to
+             * another EXACTish node, but since the size of the node doesn't
+             * change, it works */
+            ret = regnode_guts(pRExC_state, node_type, initial_size, "exact");
+            FILL_NODE(ret, node_type);
+            RExC_emit++;
 
-            /* In pass1, folded, we use a temporary buffer instead of the
-             * actual node, as the node doesn't exist yet */
-           s = (SIZE_ONLY && FOLD) ? foldbuf : STRING(REGNODE_p(ret));
+           s = STRING(REGNODE_p(ret));
 
             s0 = s;
 
@@ -13668,7 +13754,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                    || UTF8_IS_START(UCHARAT(RExC_parse)));
 
-            uni_semantics_at_node_start = cBOOL(RExC_uni_semantics);
 
             /* Here, we have a literal character.  Find the maximal string of
              * them in the input that we can fit into a single EXACTish node.
@@ -13767,6 +13852,24 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         if (ender > 0xff) {
                             REQUIRE_UTF8(flagp);
                         }
+
+                        /* The \N{} means the pattern, if previously /d,
+                         * becomes /u.  That means it can't be an EXACTF node,
+                         * but an EXACTFU */
+                        if (node_type == EXACTF) {
+                            node_type = EXACTFU;
+
+                            /* If the node already contains something that
+                             * differs between EXACTF and EXACTFU, reparse it
+                             * as EXACTFU */
+                            if (! maybe_exactfu) {
+                                len = 0;
+                                s = s0;
+                                maybe_exactfu = TRUE;   /* Prob. unnecessary */
+                                goto reparse;
+                            }
+                        }
+
                         break;
                    case 'r':
                        ender = '\r';
@@ -14003,29 +14106,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
                 if (! FOLD) {  /* The simple case, just append the literal */
 
-                    /* In the sizing pass, we need only the size of the
-                     * character we are appending, hence we can delay getting
-                     * its representation until PASS2. */
-                    if (SIZE_ONLY) {
-                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
-                            const STRLEN unilen = UVCHR_SKIP(ender);
-                            s += unilen;
-                            added_len = unilen;
+                      not_fold_common:
+                        if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
+                            *(s++) = (char) ender;
                         }
                         else {
-                            s++;
-                        }
-                    } else { /* PASS2 */
-                      not_fold_common:
-                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                             U8 * new_s = uvchr_to_utf8((U8*)s, ender);
                             added_len = (char *) new_s - s;
                             s = (char *) new_s;
                         }
-                        else {
-                            *(s++) = (char) ender;
-                        }
-                    }
                 }
                 else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
 
@@ -14045,192 +14134,115 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     /* This code point means we can't simplify things */
                     maybe_exactfu = FALSE;
 
-                    /* A problematic code point in this context means that its
-                     * fold isn't known until runtime, so we can't fold it now.
-                     * (The non-problematic code points are the above-Latin1
-                     * ones that fold to also all above-Latin1.  Their folds
-                     * don't vary no matter what the locale is.) But here we
-                     * have characters whose fold depends on the locale.
-                     * Unlike the non-folding case above, we have to keep track
-                     * of these in the sizing pass, so that we can make sure we
-                     * don't split too-long nodes in the middle of a potential
-                     * multi-char fold.  And unlike the regular fold case
-                     * handled in the else clauses below, we don't actually
-                     * fold and don't have special cases to consider.  What we
-                     * do for both passes is the PASS2 code for non-folding */
+                    /* Here, we are adding a problematic fold character.
+                     * "Problematic" in this context means that its fold isn't
+                     * known until runtime.  (The non-problematic code points
+                     * are the above-Latin1 ones that fold to also all
+                     * above-Latin1.  Their folds don't vary no matter what the
+                     * locale is.) But here we have characters whose fold
+                     * depends on the locale.  We just add in the unfolded
+                     * character, and wait until runtime to fold it */
                     goto not_fold_common;
                 }
-                else                /* A regular FOLD code point */
-                     if (! UTF)
+                else /* regular fold; see if actually is in a fold */
+                     if (   (ender < 256 && ! IS_IN_SOME_FOLD_L1(ender))
+                         || (ender > 255
+                            && ! _invlist_contains_cp(PL_utf8_foldable, ender)))
                 {
-                    /* Here, are folding and are not UTF-8 encoded; therefore
-                     * the character must be in the range 0-255, and is not /l.
-                     * (Not /l because we already handled these under /l in
-                     * is_PROBLEMATIC_LOCALE_FOLD_cp) */
-                    if (! IS_IN_SOME_FOLD_L1(ender)) {
-
-                        /* Start a new node for this non-folding character if
-                         * previous ones in the node were folded */
-                        if (len && node_type != EXACT) {
-                            p = oldp;
-                            goto loopdone;
-                        }
+                    /* Here, folding, but the character isn't in a fold.
+                     *
+                     * Start a new node if previous characters in the node were
+                     * folded */
+                    if (len && node_type != EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    /* Here, continuing a node with non-folded characters.  Add
+                     * this one */
 
+                    if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
                         *(s++) = (char) ender;
                     }
-                    else {  /* Here, does participate in some fold */
-
-                        /* if this is the first character in the node, change
-                         * its type to folding.  Otherwise, if this is the
-                         * first folding character in the node, close up the
-                         * existing node, so can start a new node with this
-                         * one.  */
-                        if (! len) {
-                            node_type = compute_EXACTish(pRExC_state);
+                    else {
+                        s = (char *) uvchr_to_utf8((U8 *) s, ender);
+                        added_len = UVCHR_SKIP(ender);
+                    }
+                }
+                else {  /* Here, does participate in some fold */
+
+                    /* If this is the first character in the node, change its
+                     * type to folding.  Otherwise, if this is the first
+                     * folding character in the node, close up the existing
+                     * node, so can start a new node with this one.  */
+                    if (! len) {
+                        node_type = compute_EXACTish(pRExC_state);
+                    }
+                    else if (node_type == EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    if (UTF) {  /* For UTF-8, we add the folded value */
+                        if (UVCHR_IS_INVARIANT(ender)) {
+                            *(s)++ = (U8) toFOLD(ender);
                         }
-                        else if (node_type == EXACT) {
-                            p = oldp;
-                            goto loopdone;
+                        else {
+                            ender = _to_uni_fold_flags(
+                                    ender,
+                                    (U8 *) s,
+                                    &added_len,
+                                    FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
+                                                    ? FOLD_FLAGS_NOMIX_ASCII
+                                                    : 0));
+                            s += added_len;
                         }
+                    }
+                    else {
 
-                        /* See if the character's fold differs between /d and
-                         * /u.  On non-ancient Unicode versions, this includes
-                         * the multi-char fold SHARP S to 'ss' */
+                        /* Here is non-UTF8; we don't normally store the folded
+                         * value.  First, see if the character's fold differs
+                         * between /d and /u. */
+                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
+                            maybe_exactfu = FALSE;
+                        }
 
 #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
    || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                       || UNICODE_DOT_DOT_VERSION > 0)
 
-                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                        /* On non-ancient Unicode versions, this includes the
+                         * multi-char fold SHARP S to 'ss' */
 
-                            /* See comments for join_exact() as to why we fold
-                             * this non-UTF at compile time */
-                            if (node_type == EXACTFU) {
-                                *(s++) = 's';
+                        else if (UNLIKELY(   ender == LATIN_SMALL_LETTER_SHARP_S
+                                          || (   len
+                                              && isALPHA_FOLD_EQ(ender, 's')
+                                              && isALPHA_FOLD_EQ(*(s-1), 's'))))
+                        {
 
-                                /* Let the code below add in the extra 's' */
-                                ender = 's';
-                                added_len = 2;
-                            }
-                            else if (   uni_semantics_at_node_start
-                                     != RExC_uni_semantics)
-                            {
-                                /* Here, we are supossed to be using Unicode
-                                 * rules, but this folding node is not.  This
-                                 * happens during pass 1 when the node started
-                                 * out not under Unicode rules, but a \N{} was
-                                 * encountered during the processing of it,
-                                 * causing Unicode rules to be switched into.
-                                 * Pass 1 continues uninterrupted, as by the
-                                 * time we get to pass 2, we will know enough
-                                 * to generate the correct folds.  Except in
-                                 * this one case, we need to restart the node,
-                                 * because the fold of the sharp s requires 2
-                                 * characters, and the sizing needs to account
-                                 * for that. */
-                                p = oldp;
-                                goto loopdone;
+                            if (node_type == EXACTFU) {
+                                /* See comments for join_exact() as to why we
+                                 * fold this non-UTF at compile time */
+                                if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                                    *(s++) = 's';
+
+                                    /* Let the code below add in the extra 's' */
+                                    ender = 's';
+                                    added_len = 2;
+                                }
                             }
                             else {
-                                RExC_seen_unfolded_sharp_s = 1;
                                 maybe_exactfu = FALSE;
                             }
                         }
-                        else if (   len
-                                 && isALPHA_FOLD_EQ(ender, 's')
-                                 && isALPHA_FOLD_EQ(*(s-1), 's'))
-                        {
-                            maybe_exactfu = FALSE;
-                        }
-                        else
 #endif
 
-                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
-                            maybe_exactfu = FALSE;
-                        }
-
                         /* Even when folding, we store just the input
                          * character, as we have an array that finds its fold
                          * quickly */
                         *(s++) = (char) ender;
                     }
-                }
-                else {  /* FOLD, and UTF */
-                    /* Unlike the non-fold case, we do actually have to
-                     * calculate the fold in pass 1.  This is for two reasons,
-                     * the folded length may be longer than the unfolded, and
-                     * we have to calculate how many EXACTish nodes it will
-                     * take; and we may run out of room in a node in the middle
-                     * of a potential multi-char fold, and have to back off
-                     * accordingly.  */
-
-                    if (isASCII_uni(ender)) {
-
-                        /* As above, we close up and start a new node if the
-                         * previous characters don't match the fold/non-fold
-                         * state of this one.  And if this is the first
-                         * character in the node, and it folds, we change the
-                         * node away from being EXACT */
-                        if (! IS_IN_SOME_FOLD_L1(ender)) {
-                            if (len && node_type != EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            *(s)++ = (U8) ender;
-                        }
-                        else {  /* Is in a fold */
-
-                            if (! len) {
-                                node_type = compute_EXACTish(pRExC_state);
-                            }
-                            else if (node_type == EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            *(s)++ = (U8) toFOLD(ender);
-                        }
-                    }
-                    else {  /* Not ASCII */
-                        STRLEN foldlen;
-
-                        /* As above, we close up and start a new node if the
-                         * previous characters don't match the fold/non-fold
-                         * state of this one.  And if this is the first
-                         * character in the node, and it folds, we change the
-                         * node away from being EXACT */
-                        if (! _invlist_contains_cp(PL_utf8_foldable, ender)) {
-                            if (len && node_type != EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            s = (char *) uvchr_to_utf8((U8 *) s, ender);
-                            added_len = UVCHR_SKIP(ender);
-                        }
-                        else {
-
-                            if (! len) {
-                                node_type = compute_EXACTish(pRExC_state);
-                            }
-                            else if (node_type == EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            ender = _to_uni_fold_flags(
-                                     ender,
-                                     (U8 *) s,
-                                     &foldlen,
-                                     FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
-                                                        ? FOLD_FLAGS_NOMIX_ASCII
-                                                        : 0));
-                            s += foldlen;
-                            added_len = foldlen;
-                        }
-                    }
-               }
+               } /* End of adding current character to the node */
 
                 len += added_len;
 
@@ -14404,6 +14416,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
           loopdone:   /* Jumped to when encounters something that shouldn't be
                          in the node */
 
+            /* Free up any over-allocated space */
+            change_engine_size(pRExC_state, - (initial_size - STR_SZ(len)));
+
             /* I (khw) don't know if you can get here with zero length, but the
              * old code handled this situation by creating a zero-length EXACT
              * node.  Might as well be NOTHING instead */
@@ -14433,6 +14448,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             OP(REGNODE_p(ret)) = EXACTFLU8;
                         }
                     }
+                    else if (node_type == EXACTF) {
+                        RExC_seen_d_op = TRUE;
+                    }
                 }
 
                 alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
@@ -14460,7 +14478,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     /* Position parse to next real character */
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                             FALSE /* Don't force to /x */ );
-    if (   PASS2 && *RExC_parse == '{'
+    if (   *RExC_parse == '{'
         && OP(REGNODE_p(ret)) != SBOL && ! regcurly(RExC_parse))
     {
         if (RExC_strict || new_regcurly(RExC_parse, RExC_end)) {
@@ -14622,17 +14640,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
      * In b) there may be errors or warnings generated.  If 'check_only' is
      * TRUE, then any errors are discarded.  Warnings are returned to the
      * caller via an AV* created into '*posix_warnings' if it is not NULL.  If
-     * instead it is NULL, warnings are suppressed.  This is done in all
-     * passes.  The reason for this is that the rest of the parsing is heavily
-     * dependent on whether this routine found a valid posix class or not.  If
-     * it did, the closing ']' is absorbed as part of the class.  If no class,
-     * or an invalid one is found, any ']' will be considered the terminator of
-     * the outer bracketed character class, leading to very different results.
-     * In particular, a '(?[ ])' construct will likely have a syntax error if
-     * the class is parsed other than intended, and this will happen in pass1,
-     * before the warnings would normally be output.  This mechanism allows the
-     * caller to output those warnings in pass1 just before dieing, giving a
-     * much better clue as to what is wrong.
+     * instead it is NULL, warnings are suppressed.
      *
      * The reason for this function, and its complexity is that a bracketed
      * character class can contain just about anything.  But it's easy to
@@ -15427,7 +15435,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
     const bool save_fold = FOLD;    /* Temporary */
     char *save_end, *save_parse;    /* Temporaries */
     const bool in_locale = LOC;     /* we turn off /l during processing */
-    AV* posix_warnings = NULL;
 
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -15443,117 +15450,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      * compile time values are valid in all runtime cases */
     REQUIRE_UNI_RULES(flagp, 0);
 
-    /* This will return only an ANYOF regnode, or (unlikely) something smaller
-     * (such as EXACT).  Thus we can skip most everything if just sizing.  We
-     * call regclass to handle '[]' so as to not have to reinvent its parsing
-     * rules here (throwing away the size it computes each time).  And, we exit
-     * upon an unescaped ']' that isn't one ending a regclass.  To do both
-     * these things, we need to realize that something preceded by a backslash
-     * is escaped, so we have to keep track of backslashes */
-    if (SIZE_ONLY) {
-        UV nest_depth = 0; /* how many nested (?[...]) constructs */
-
-        while (RExC_parse < RExC_end) {
-            SV* current = NULL;
-
-            skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                                    TRUE /* Force /x */ );
-
-            switch (*RExC_parse) {
-                case '(':
-                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
-                        nest_depth++, RExC_parse+=2;
-                    /* FALLTHROUGH */
-                default:
-                    break;
-                case '\\':
-                    /* Skip past this, so the next character gets skipped, after
-                     * the switch */
-                    RExC_parse++;
-                    if (*RExC_parse == 'c') {
-                            /* Skip the \cX notation for control characters */
-                            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-                    }
-                    break;
-
-                case '[':
-                {
-                    /* See if this is a [:posix:] class. */
-                    bool is_posix_class = (OOB_NAMEDCLASS
-                            < handle_possible_posix(pRExC_state,
-                                                RExC_parse + 1,
-                                                NULL,
-                                                NULL,
-                                                TRUE /* checking only */));
-                    /* If it is a posix class, leave the parse pointer at the
-                     * '[' to fool regclass() into thinking it is part of a
-                     * '[[:posix:]]'. */
-                    if (! is_posix_class) {
-                        RExC_parse++;
-                    }
-
-                    /* regclass() can only return RESTART_PARSE and NEED_UTF8
-                     * if multi-char folds are allowed.  */
-                    if (!regclass(pRExC_state, flagp, depth+1,
-                                  is_posix_class, /* parse the whole char
-                                                     class only if not a
-                                                     posix class */
-                                  FALSE, /* don't allow multi-char folds */
-                                  TRUE, /* silence non-portable warnings. */
-                                  TRUE, /* strict */
-                                  FALSE, /* Require return to be an ANYOF */
-                                  &current,
-                                  &posix_warnings
-                                 ))
-                        FAIL2("panic: regclass returned failure to handle_sets, "
-                              "flags=%#" UVxf, (UV) *flagp);
-
-                    /* function call leaves parse pointing to the ']', except
-                     * if we faked it */
-                    if (is_posix_class) {
-                        RExC_parse--;
-                    }
-
-                    SvREFCNT_dec(current);   /* In case it returned something */
-                    break;
-                }
-
-                case ']':
-                    if (RExC_parse[1] == ')') {
-                        RExC_parse++;
-                        if (nest_depth--) break;
-                        node = reganode(pRExC_state, ANYOF, 0);
-                        nextchar(pRExC_state);
-                        Set_Node_Length(REGNODE_p(node),
-                                RExC_parse - oregcomp_parse + 1); /* MJD */
-                        if (in_locale) {
-                            set_regex_charset(&RExC_flags, REGEX_LOCALE_CHARSET);
-                        }
-
-                        return node;
-                    }
-                    /* We output the messages even if warnings are off, because we'll fail
-                     * the very next thing, and these give a likely diagnosis for that */
-                    if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
-                        output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
-                    }
-                    RExC_parse++;
-                    vFAIL("Unexpected ']' with no following ')' in (?[...");
-            }
-
-            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-        }
-
-        /* We output the messages even if warnings are off, because we'll fail
-         * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
-            output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
-        }
-
-        vFAIL("Syntax error in (?[...])");
-    }
-
-    /* Pass 2 only after this. */
     ckWARNexperimental(RExC_parse,
                        WARN_EXPERIMENTAL__REGEX_SETS,
                        "The regex_sets feature is experimental");
@@ -15646,8 +15542,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
         skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                 TRUE /* Force /x */ );
-        if (RExC_parse >= RExC_end) {
-            Perl_croak(aTHX_ "panic: Read past end of '(?[ ])'");
+        if (RExC_parse >= RExC_end) {   /* Fail */
+            break;
         }
 
         curchar = UCHARAT(RExC_parse);
@@ -15776,8 +15672,7 @@ redo_curchar:
                               FALSE, /* don't silence non-portable warnings.  */
                               TRUE,  /* strict */
                               FALSE, /* Require return to be an ANYOF */
-                              &current,
-                              NULL))
+                              &current))
                 {
                     FAIL2("panic: regclass returned failure to handle_sets, "
                           "flags=%#" UVxf, (UV) *flagp);
@@ -15814,14 +15709,16 @@ redo_curchar:
                                 TRUE, /* silence non-portable warnings. */
                                 TRUE, /* strict */
                                 FALSE, /* Require return to be an ANYOF */
-                                &current,
-                                NULL
-                                ))
+                                &current))
                 {
                     FAIL2("panic: regclass returned failure to handle_sets, "
                           "flags=%#" UVxf, (UV) *flagp);
                 }
 
+                if (! current) {
+                    break;
+                }
+
                 /* function call leaves parse pointing to the ']', except if we
                  * faked it */
                 if (is_posix_class) {
@@ -15841,6 +15738,9 @@ redo_curchar:
 
             case ')':
                 if (av_tindex_skip_len_mg(fence_stack) < 0) {
+                    if (UCHARAT(RExC_parse - 1) == ']')  {
+                        break;
+                    }
                     RExC_parse++;
                     vFAIL("Unexpected ')'");
                 }
@@ -16027,6 +15927,9 @@ redo_curchar:
 
             default:
                 RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                if (RExC_parse >= RExC_end) {
+                    break;
+                }
                 vFAIL("Unexpected character");
 
           handle_operand:
@@ -16086,7 +15989,18 @@ redo_curchar:
         RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
     } /* End of loop parsing through the construct */
 
+    vFAIL("Syntax error in (?[...])");
+
   done:
+
+    if (RExC_parse >= RExC_end || RExC_parse[1] != ')') {
+        if (RExC_parse < RExC_end) {
+            RExC_parse++;
+        }
+
+        vFAIL("Unexpected ']' with no following ')' in (?[...");
+    }
+
     if (av_tindex_skip_len_mg(fence_stack) >= 0) {
         vFAIL("Unmatched (");
     }
@@ -16145,7 +16059,6 @@ redo_curchar:
                              they're valid on this machine */
                     FALSE, /* similarly, no need for strict */
                     FALSE, /* Require return to be an ANYOF */
-                    NULL,
                     NULL
                 );
 
@@ -16340,45 +16253,33 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
 }
 
 STATIC void
-S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
 {
-    /* If the final parameter is NULL, output the elements of the array given
-     * by '*posix_warnings' as REGEXP warnings.  Otherwise, the elements are
-     * pushed onto it, (creating if necessary) */
+    /* Output the elements of the array given by '*posix_warnings' as REGEXP
+     * warnings. */
 
     SV * msg;
-    const bool first_is_fatal =  ! return_posix_warnings
-                                && ckDEAD(packWARN(WARN_REGEXP));
+    const bool first_is_fatal = ckDEAD(packWARN(WARN_REGEXP));
 
-    PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+    PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS;
+
+    if (! TO_OUTPUT_WARNINGS(RExC_parse)) {
+        return;
+    }
 
     while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
-        if (return_posix_warnings) {
-            if (! *return_posix_warnings) { /* mortalize to not leak if
-                                               warnings are fatal */
-                *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
-            }
-            av_push(*return_posix_warnings, msg);
-        }
-        else {
-            if (first_is_fatal) {           /* Avoid leaking this */
-                av_undef(posix_warnings);   /* This isn't necessary if the
-                                               array is mortal, but is a
-                                               fail-safe */
-                (void) sv_2mortal(msg);
-                if (ckDEAD(packWARN(WARN_REGEXP))) {
-                    PREPARE_TO_DIE;
-                }
-            }
-            Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s",
-                                           SvPVX(msg));
-            SvREFCNT_dec_NN(msg);
+        if (first_is_fatal) {           /* Avoid leaking this */
+            av_undef(posix_warnings);   /* This isn't necessary if the
+                                            array is mortal, but is a
+                                            fail-safe */
+            (void) sv_2mortal(msg);
+            PREPARE_TO_DIE;
         }
+        Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+        SvREFCNT_dec_NN(msg);
     }
 
-    if (! return_posix_warnings) {
-        UPDATE_WARNINGS_LOC(RExC_parse);
-    }
+    UPDATE_WARNINGS_LOC(RExC_parse);
 }
 
 STATIC AV *
@@ -16461,8 +16362,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  const bool strict,
                  bool optimizable,                  /* ? Allow a non-ANYOF return
                                                        node */
-                 SV** ret_invlist, /* Return an inversion list, not a node */
-                 AV** return_posix_warnings
+                 SV** ret_invlist  /* Return an inversion list, not a node */
           )
 {
     /* parse a bracketed class specification.  Most of these will produce an
@@ -16489,7 +16389,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      * On success, returns the offset at which any next node should be placed
      * into the regex engine program being compiled.
      *
-     * Returns 0 otherwise, setting flagp to RESTART_PARSE if the sizing scan needs
+     * Returns 0 otherwise, setting flagp to RESTART_PARSE if the parse needs
      * to be restarted, or'd with NEED_UTF8 if the pattern needs to be upgraded to
      * UTF-8
      */
@@ -16575,10 +16475,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     bool warn_super = ALWAYS_WARN_SUPER;
 
-    const regnode_offset orig_emit = RExC_emit; /* Save the original RExC_emit in
-        case we need to change the emitted regop to an EXACT. */
     const char * orig_parse = RExC_parse;
-    const SSize_t orig_size = RExC_size;
     bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
 
     /* This variable is used to mark where the end in the input is of something
@@ -16589,8 +16486,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     char *not_posix_region_end = RExC_parse - 1;
 
     AV* posix_warnings = NULL;
-    const bool do_posix_warnings =     return_posix_warnings
-                                   || (PASS2 && ckWARN(WARN_REGEXP));
+    const bool do_posix_warnings = ckWARN(WARN_REGEXP);
     U8 op = END;    /* The returned node-type, initialized to an impossible
                        one.  */
     U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
@@ -16604,6 +16500,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     PERL_UNUSED_ARG(depth);
 #endif
 
+
+    /* If wants an inversion list returned, we can't optimize to something
+     * else. */
+    if (ret_invlist) {
+        optimizable = FALSE;
+    }
+
     DEBUG_PARSE("clas");
 
 #if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */      \
@@ -16612,14 +16515,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     allow_multi_folds = FALSE;
 #endif
 
-    if (SIZE_ONLY) {
-       listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
-    }
-    else {
-       listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
-       initial_listsv_len = SvCUR(listsv);
-        SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
-    }
+    listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
+    initial_listsv_len = SvCUR(listsv);
+    SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
 
     SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
 
@@ -16640,7 +16538,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                 &not_posix_region_end,
                                                 NULL,
                                                 TRUE /* checking only */);
-        if (PASS2 && maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
+        if (maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
             ckWARN4reg(not_posix_region_end,
                     "POSIX syntax [%c %c] belongs inside character classes%s",
                     *RExC_parse, *RExC_parse,
@@ -16671,12 +16569,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         {
             /* Warnings about posix class issues are considered tentative until
              * we are far enough along in the parse that we can no longer
-             * change our mind, at which point we either output them or add
-             * them, if it has so specified, to what gets returned to the
-             * caller.  This is done each time through the loop so that a later
-             * class won't zap them before they have been dealt with. */
-            output_or_return_posix_warnings(pRExC_state, posix_warnings,
-                                            return_posix_warnings);
+             * change our mind, at which point we output them.  This is done
+             * each time through the loop so that a later class won't zap them
+             * before they have been dealt with. */
+            output_posix_warnings(pRExC_state, posix_warnings);
         }
 
         if  (RExC_parse >= stop_ptr) {
@@ -16922,7 +16818,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    e = RExC_parse;
                    n = 1;
                }
-               if (!SIZE_ONLY) {
+               {
                     char* name = RExC_parse;
                     char* base_name;    /* name after any packages are stripped */
                     char* lookup_name = NULL;
@@ -17113,7 +17009,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             }
                        }
                     }
-                } /* End of actually getting the values in pass 2 */
+                }
 
                RExC_parse = e + 1;
                 namedclass = ANYOF_UNIPROP;  /* no official name, but it's
@@ -17184,8 +17080,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
                             vFAIL("Need exactly 3 octal digits");
                         }
-                        else if (! SIZE_ONLY /* like \08, \178 */
-                                 && numlen < 3
+                        else if (   numlen < 3 /* like \08, \178 */
                                  && RExC_parse < RExC_end
                                  && isDIGIT(*RExC_parse)
                                  && ckWARN(WARN_REGEXP))
@@ -17200,7 +17095,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                }
            default:
                /* Allow \_ to not give an error */
-               if (!SIZE_ONLY && isWORDCHAR(value) && value != '_') {
+               if (isWORDCHAR(value) && value != '_') {
                     if (strict) {
                         vFAIL2("Unrecognized escape \\%c in character class",
                                (int)value);
@@ -17224,24 +17119,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             * literal, as is the character that began the false range, i.e.
             * the 'a' in the examples */
            if (range) {
-               if (!SIZE_ONLY) {
-                   const int w = (RExC_parse >= rangebegin)
-                                  ? RExC_parse - rangebegin
-                                  : 0;
-                    if (strict) {
-                        vFAIL2utf8f(
-                            "False [] range \"%" UTF8f "\"",
-                            UTF8fARG(UTF, w, rangebegin));
-                    }
-                    else {
-                        ckWARN2reg(RExC_parse,
-                            "False [] range \"%" UTF8f "\"",
-                            UTF8fARG(UTF, w, rangebegin));
-                        cp_list = add_cp_to_invlist(cp_list, '-');
-                        cp_foldable_list = add_cp_to_invlist(cp_foldable_list,
-                                                             prevvalue);
-                    }
-               }
+                const int w = (RExC_parse >= rangebegin)
+                                ? RExC_parse - rangebegin
+                                : 0;
+                if (strict) {
+                    vFAIL2utf8f(
+                        "False [] range \"%" UTF8f "\"",
+                        UTF8fARG(UTF, w, rangebegin));
+                }
+                else {
+                    ckWARN2reg(RExC_parse,
+                        "False [] range \"%" UTF8f "\"",
+                        UTF8fARG(UTF, w, rangebegin));
+                    cp_list = add_cp_to_invlist(cp_list, '-');
+                    cp_foldable_list = add_cp_to_invlist(cp_foldable_list,
+                                                            prevvalue);
+                }
 
                range = 0; /* this was not a true range */
                 element_count += 2; /* So counts for three values */
@@ -17296,39 +17189,33 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 POSIXL_SET(posixl, namedclass);
 
                 /* The above-Latin1 characters are not subject to locale rules.
-                 * Just add them, in the second pass, to the
-                 * unconditionally-matched list */
-                if (! SIZE_ONLY) {
-
-                    /* Get the list of the above-Latin1 code points this
-                     * matches */
-                    _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
-                                          PL_XPosix_ptrs[classnum],
-
-                                          /* Odd numbers are complements, like
-                                           * NDIGIT, NASCII, ... */
-                                          namedclass % 2 != 0,
-                                          &scratch_list);
-                    /* Checking if 'cp_list' is NULL first saves an extra
-                     * clone.  Its reference count will be decremented at the
-                     * next union, etc, or if this is the only instance, at the
-                     * end of the routine */
-                    if (! cp_list) {
-                        cp_list = scratch_list;
-                    }
-                    else {
-                        _invlist_union(cp_list, scratch_list, &cp_list);
-                        SvREFCNT_dec_NN(scratch_list);
-                    }
-                    continue;   /* Go get next character */
+                 * Just add them to the unconditionally-matched list */
+
+                /* Get the list of the above-Latin1 code points this matches */
+                _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
+                                        PL_XPosix_ptrs[classnum],
+
+                                        /* Odd numbers are complements, like
+                                        * NDIGIT, NASCII, ... */
+                                        namedclass % 2 != 0,
+                                        &scratch_list);
+                /* Checking if 'cp_list' is NULL first saves an extra clone.
+                 * Its reference count will be decremented at the next union,
+                 * etc, or if this is the only instance, at the end of the
+                 * routine */
+                if (! cp_list) {
+                    cp_list = scratch_list;
                 }
+                else {
+                    _invlist_union(cp_list, scratch_list, &cp_list);
+                    SvREFCNT_dec_NN(scratch_list);
+                }
+                continue;   /* Go get next character */
             }
-            else if (! SIZE_ONLY) {
+            else {
 
-                /* Here, not in pass1 (in that pass we skip calculating the
-                 * contents of this class), and is not /l, or is a POSIX class
-                 * for which /l doesn't matter (or is a Unicode property, which
-                 * is skipped here). */
+                /* Here, is not /l, or is a POSIX class for which /l doesn't
+                 * matter (or is a Unicode property, which is skipped here). */
                 if (namedclass >= ANYOF_POSIXL_MAX) {  /* If a special class */
                     if (namedclass != ANYOF_UNIPROP) { /* UNIPROP = \p and \P */
 
@@ -17441,7 +17328,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                     /* a bad range like \w-, [:word:]- ? */
                     if (namedclass > OOB_NAMEDCLASS) {
-                        if (strict || (PASS2 && ckWARN(WARN_REGEXP))) {
+                        if (strict || ckWARN(WARN_REGEXP)) {
                             const int w = RExC_parse >= rangebegin
                                           ?  RExC_parse - rangebegin
                                           : 0;
@@ -17455,9 +17342,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                     w, w, rangebegin);
                             }
                         }
-                        if (!SIZE_ONLY) {
-                            cp_list = add_cp_to_invlist(cp_list, '-');
-                        }
+                        cp_list = add_cp_to_invlist(cp_list, '-');
                         element_count++;
                     } else
                         range = 1;     /* yeah, it's a range! */
@@ -17474,8 +17359,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          * <prevvalue> is the beginning of the range, if any; or <value> if
          * not. */
 
-       /* non-Latin1 code point implies unicode semantics.  Must be set in
-        * pass1 so is there for the whole of pass 2 */
+       /* non-Latin1 code point implies unicode semantics. */
        if (value > 255) {
             REQUIRE_UNI_RULES(flagp, 0);
        }
@@ -17543,7 +17427,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             }
         }
 
-        if (strict && PASS2 && ckWARN(WARN_REGEXP)) {
+        if (strict && ckWARN(WARN_REGEXP)) {
             if (range) {
 
                 /* If the range starts above 255, everything is portable and
@@ -17675,56 +17559,51 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         }
 
         /* Deal with this element of the class */
-       if (! SIZE_ONLY) {
 
 #ifndef EBCDIC
-            cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
-                                                     prevvalue, value);
+        cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                    prevvalue, value);
 #else
-            /* On non-ASCII platforms, for ranges that span all of 0..255, and
-             * ones that don't require special handling, we can just add the
-             * range like we do for ASCII platforms */
-            if ((UNLIKELY(prevvalue == 0) && value >= 255)
-                || ! (prevvalue < 256
-                      && (unicode_range
-                          || (! non_portable_endpoint
-                              && ((isLOWER_A(prevvalue) && isLOWER_A(value))
-                                  || (isUPPER_A(prevvalue)
-                                      && isUPPER_A(value)))))))
-            {
-                cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
-                                                         prevvalue, value);
+        /* On non-ASCII platforms, for ranges that span all of 0..255, and ones
+         * that don't require special handling, we can just add the range like
+         * we do for ASCII platforms */
+        if ((UNLIKELY(prevvalue == 0) && value >= 255)
+            || ! (prevvalue < 256
+                    && (unicode_range
+                        || (! non_portable_endpoint
+                            && ((isLOWER_A(prevvalue) && isLOWER_A(value))
+                                || (isUPPER_A(prevvalue)
+                                    && isUPPER_A(value)))))))
+        {
+            cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                        prevvalue, value);
+        }
+        else {
+            /* Here, requires special handling.  This can be because it is a
+             * range whose code points are considered to be Unicode, and so
+             * must be individually translated into native, or because its a
+             * subrange of 'A-Z' or 'a-z' which each aren't contiguous in
+             * EBCDIC, but we have defined them to include only the "expected"
+             * upper or lower case ASCII alphabetics.  Subranges above 255 are
+             * the same in native and Unicode, so can be added as a range */
+            U8 start = NATIVE_TO_LATIN1(prevvalue);
+            unsigned j;
+            U8 end = (value < 256) ? NATIVE_TO_LATIN1(value) : 255;
+            for (j = start; j <= end; j++) {
+                cp_foldable_list = add_cp_to_invlist(cp_foldable_list, LATIN1_TO_NATIVE(j));
             }
-            else {
-                /* Here, requires special handling.  This can be because it is
-                 * a range whose code points are considered to be Unicode, and
-                 * so must be individually translated into native, or because
-                 * its a subrange of 'A-Z' or 'a-z' which each aren't
-                 * contiguous in EBCDIC, but we have defined them to include
-                 * only the "expected" upper or lower case ASCII alphabetics.
-                 * Subranges above 255 are the same in native and Unicode, so
-                 * can be added as a range */
-                U8 start = NATIVE_TO_LATIN1(prevvalue);
-                unsigned j;
-                U8 end = (value < 256) ? NATIVE_TO_LATIN1(value) : 255;
-                for (j = start; j <= end; j++) {
-                    cp_foldable_list = add_cp_to_invlist(cp_foldable_list, LATIN1_TO_NATIVE(j));
-                }
-                if (value > 255) {
-                    cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
-                                                             256, value);
-                }
+            if (value > 255) {
+                cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                            256, value);
             }
-#endif
         }
+#endif
 
        range = 0; /* this range (if it was one) is done now */
     } /* End of loop through all the text within the brackets */
 
-
     if (   posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
-        output_or_return_posix_warnings(pRExC_state, posix_warnings,
-                                        return_posix_warnings);
+        output_posix_warnings(pRExC_state, posix_warnings);
     }
 
     /* If anything in the class expands to more than one character, we have to
@@ -17826,223 +17705,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         return ret;
     }
 
-    /* Here, we've gone through the entire class and dealt with multi-char
-     * folds.  We are now in a position that we can do some checks to see if we
-     * can optimize this ANYOF node into a simpler one, even in Pass 1.
-     * Currently we only do two checks:
-     * 1) is in the unlikely event that the user has specified both, eg. \w and
-     *    \W under /l, then the class matches everything.  (This optimization
-     *    is done only to make the optimizer code run later work.)
-     * 2) if the character class contains only a single element (including a
-     *    single range), we see if there is an equivalent node for it.
-     * Other checks are possible */
-    if (   optimizable
-        && ! ret_invlist   /* Can't optimize if returning the constructed
-                              inversion list */
-        && (UNLIKELY(posixl_matches_all) || element_count == 1))
-    {
-        U8 op = END;
-        U8 arg = 0;
-
-        if (UNLIKELY(posixl_matches_all)) {
-            op = SANY;
-        }
-        else if (namedclass > OOB_NAMEDCLASS) { /* this is a single named
-                                                   class, like \w or [:digit:]
-                                                   or \p{foo} */
-
-            /* All named classes are mapped into POSIXish nodes, with its FLAG
-             * argument giving which class it is */
-            switch ((I32)namedclass) {
-                case ANYOF_UNIPROP:
-                    break;
-
-                /* These don't depend on the charset modifiers.  They always
-                 * match under /u rules */
-                case ANYOF_NHORIZWS:
-                case ANYOF_HORIZWS:
-                    namedclass = ANYOF_BLANK + namedclass - ANYOF_HORIZWS;
-                    /* FALLTHROUGH */
-
-                case ANYOF_NVERTWS:
-                case ANYOF_VERTWS:
-                    op = POSIXU;
-                    goto join_posix;
-
-                /* The actual POSIXish node for all the rest depends on the
-                 * charset modifier.  The ones in the first set depend only on
-                 * ASCII or, if available on this platform, also locale */
-
-                case ANYOF_ASCII:
-                case ANYOF_NASCII:
-
-#ifdef HAS_ISASCII
-                    if (LOC) {
-                        op = POSIXL;
-                        goto join_posix;
-                    }
-#endif
-                    /* (named_class - ANYOF_ASCII) is 0 or 1. xor'ing with
-                     * invert converts that to 1 or 0 */
-                    op = ASCII + ((namedclass - ANYOF_ASCII) ^ invert);
-                    break;
-
-                /* The following don't have any matches in the upper Latin1
-                 * range, hence /d is equivalent to /u for them.  Making it /u
-                 * saves some branches at runtime */
-                case ANYOF_DIGIT:
-                case ANYOF_NDIGIT:
-                case ANYOF_XDIGIT:
-                case ANYOF_NXDIGIT:
-                    if (! DEPENDS_SEMANTICS) {
-                        goto treat_as_default;
-                    }
-
-                    op = POSIXU;
-                    goto join_posix;
-
-                /* The following change to CASED under /i */
-                case ANYOF_LOWER:
-                case ANYOF_NLOWER:
-                case ANYOF_UPPER:
-                case ANYOF_NUPPER:
-                    if (FOLD) {
-                        namedclass = ANYOF_CASED + (namedclass % 2);
-                    }
-                    /* FALLTHROUGH */
-
-                /* The rest have more possibilities depending on the charset.
-                 * We take advantage of the enum ordering of the charset
-                 * modifiers to get the exact node type, */
-                default:
-                  treat_as_default:
-                    op = POSIXD + get_regex_charset(RExC_flags);
-                    if (op > POSIXA) { /* /aa is same as /a */
-                        op = POSIXA;
-                    }
-
-                  join_posix:
-                    /* The odd numbered ones are the complements of the
-                     * next-lower even number one */
-                    if (namedclass % 2 == 1) {
-                        invert = ! invert;
-                        namedclass--;
-                    }
-                    arg = namedclass_to_classnum(namedclass);
-                    break;
-            }
-        }
-        else if (value == prevvalue) {
-
-            /* Here, the class consists of just a single code point */
-
-            if (invert) {
-                if (! LOC && value == '\n') {
-                    op = REG_ANY; /* Optimize [^\n] */
-                    *flagp |= HASWIDTH|SIMPLE;
-                    MARK_NAUGHTY(1);
-                }
-            }
-            else if (value < 256 || UTF) {
-
-                /* Optimize a single value into an EXACTish node, but not if it
-                 * would require converting the pattern to UTF-8. */
-                op = compute_EXACTish(pRExC_state);
-            }
-        } /* Otherwise is a range */
-        else if (! LOC) {   /* locale could vary these */
-            if (prevvalue == '0') {
-                if (value == '9') {
-                    arg = _CC_DIGIT;
-                    op = POSIXA;
-                }
-            }
-            else if (! FOLD || ASCII_FOLD_RESTRICTED) {
-                /* We can optimize A-Z or a-z, but not if they could match
-                 * something like the KELVIN SIGN under /i. */
-                if (prevvalue == 'A') {
-                    if (value == 'Z'
-#ifdef EBCDIC
-                        && ! non_portable_endpoint
-#endif
-                    ) {
-                        arg = (FOLD) ? _CC_ALPHA : _CC_UPPER;
-                        op = POSIXA;
-                    }
-                }
-                else if (prevvalue == 'a') {
-                    if (value == 'z'
-#ifdef EBCDIC
-                        && ! non_portable_endpoint
-#endif
-                    ) {
-                        arg = (FOLD) ? _CC_ALPHA : _CC_LOWER;
-                        op = POSIXA;
-                    }
-                }
-            }
-        }
-
-        /* Here, we have changed <op> away from its initial value iff we found
-         * an optimization */
-        if (op != END) {
-
-            /* Emit the calculated regnode,
-             * which should correspond to the beginning, not current, state of
-             * the parse */
-            const char * cur_parse = RExC_parse;
-            RExC_parse = (char *)orig_parse;
-                if (PL_regkind[op] == POSIXD) {
-                    if (op == POSIXL) {
-                        RExC_contains_locale = 1;
-                    }
-                    if (invert) {
-                        op += NPOSIXD - POSIXD;
-                    }
-                }
-
-            ret = reg_node(pRExC_state, op);
-
-            if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
-                if (! SIZE_ONLY) {
-                    FLAGS(REGNODE_p(ret)) = arg;
-                }
-                *flagp |= HASWIDTH|SIMPLE;
-            }
-            else if (PL_regkind[op] == EXACT) {
-                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value,
-                                           TRUE /* downgradable to EXACT */
-                                           );
-            }
-            else {
-                *flagp |= HASWIDTH|SIMPLE;
-            }
-
-            RExC_parse = (char *) cur_parse;
-
-            SvREFCNT_dec(posixes);
-            SvREFCNT_dec(nposixes);
-            SvREFCNT_dec(simple_posixes);
-            SvREFCNT_dec(cp_list);
-            SvREFCNT_dec(cp_foldable_list);
-            return ret;
-        }
-    }
-
-    /* Assume we are going to generate an ANYOF-type node. */
-    op = (posixl)
-          ? ANYOFPOSIXL
-          : (LOC)
-             ? ANYOFL
-             : ANYOF;
-    ret = reganode(pRExC_state, op, 0);
-
-    if (SIZE_ONLY) {
-        return ret;
-    }
-
-    /****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
-
     /* If folding, we calculate all characters that could fold to or from the
      * ones already on the list */
     if (cp_foldable_list) {
@@ -18397,10 +18059,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  || (anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
     {
         use_anyofd = TRUE;
+        RExC_seen_d_op = TRUE;
         optimizable = FALSE;
     }
 
-
     /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
      * at compile time.  Besides not inverting folded locale now, we can't
      * invert if there are things such as \w, which aren't known until runtime
@@ -18424,284 +18086,285 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     }
 
     if (ret_invlist) {
-        assert(cp_list);
-
         *ret_invlist = cp_list;
         SvREFCNT_dec(swash);
 
-        /* Discard the generated node */
-        if (SIZE_ONLY) {
-            RExC_size = orig_size;
-        }
-        else {
-            RExC_emit = orig_emit;
-        }
-        return orig_emit;
+        return RExC_emit;
     }
 
     /* Some character classes are equivalent to other nodes.  Such nodes take
      * up less room and generally fewer operations to execute than ANYOF nodes.
-     * Above, we checked for and optimized into some such equivalents for
-     * certain common classes that are easy to test.  Getting to this point in
-     * the code means that the class didn't get optimized there.  Since this
-     * code is only executed in Pass 2, it is too late to save space--it has
-     * been allocated in Pass 1, and currently isn't given back.  XXX Why not?
-     * But turning things into an EXACTish node can allow the optimizer to join
-     * it to any adjacent such nodes.  And if the class is equivalent to things
-     * like /./, expensive run-time swashes can be avoided.  Now that we have
-     * more complete information, we can find things necessarily missed by the
-     * earlier code. */
-
-    if (optimizable && cp_list && ! invert) {
-        UV start, end;
-        U8 op = END;  /* The optimzation node-type */
+     * */
+
+    if (optimizable) {
         int posix_class = -1;   /* Illegal value */
         const char * cur_parse= RExC_parse;
         U8 ANYOFM_mask = 0xFF;
         U32 anode_arg = 0;
+        UV start, end;
 
-        invlist_iterinit(cp_list);
-        if (! invlist_iternext(cp_list, &start, &end)) {
-
-            /* Here, the list is empty.  This happens, for example, when a
-             * Unicode property that doesn't match anything is the only element
-             * in the character class (perluniprops.pod notes such properties).
-             * */
-            op = OPFAIL;
-            *flagp |= HASWIDTH|SIMPLE;
+        if (UNLIKELY(posixl_matches_all)) {
+            op = SANY;
         }
-        else if (start == end) {    /* The range is a single code point */
-            if (! invlist_iternext(cp_list, &start, &end)
+        else if (cp_list && ! invert) {
 
-                    /* Don't do this optimization if it would require changing
-                     * the pattern to UTF-8 */
-                && (start < 256 || UTF))
-            {
-                /* Here, the list contains a single code point.  Can optimize
-                 * into an EXACTish node */
+            invlist_iterinit(cp_list);
+            if (! invlist_iternext(cp_list, &start, &end)) {
 
-                value = start;
+                /* Here, the list is empty.  This happens, for example, when a
+                 * Unicode property that doesn't match anything is the only
+                 * element in the character class (perluniprops.pod notes such
+                 * properties).  */
+                op = OPFAIL;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (start == end) {    /* The range is a single code point */
+                if (! invlist_iternext(cp_list, &start, &end)
 
-                if (! FOLD) {
-                    op = (LOC)
-                         ? EXACTL
-                         : EXACT;
-                }
-                else if (LOC) {
+                        /* Don't do this optimization if it would require
+                         * changing the pattern to UTF-8 */
+                    && (start < 256 || UTF))
+                {
+                    /* Here, the list contains a single code point.  Can
+                     * optimize into an EXACTish node */
 
-                    /* A locale node under folding with one code point can be
-                     * an EXACTFL, as its fold won't be calculated until
-                     * runtime */
-                    op = EXACTFL;
-                }
-                else {
+                    value = start;
 
-                    /* Here, we are generally folding, but there is only one
-                     * code point to match.  If we have to, we use an EXACT
-                     * node, but it would be better for joining with adjacent
-                     * nodes in the optimization pass if we used the same
-                     * EXACTFish node that any such are likely to be.  We can
-                     * do this iff the code point doesn't participate in any
-                     * folds.  For example, an EXACTF of a colon is the same as
-                     * an EXACT one, since nothing folds to or from a colon. */
-                    if (value < 256) {
-                        if (IS_IN_SOME_FOLD_L1(value)) {
-                            op = EXACT;
-                        }
+                    if (! FOLD) {
+                        op = (LOC)
+                             ? EXACTL
+                             : EXACT;
+                    }
+                    else if (LOC) {
+
+                        /* A locale node under folding with one code point can
+                         * be an EXACTFL, as its fold won't be calculated until
+                         * runtime */
+                        op = EXACTFL;
                     }
                     else {
-                        if (_invlist_contains_cp(PL_utf8_foldable, value)) {
-                            op = EXACT;
+
+                        /* Here, we are generally folding, but there is only
+                         * one code point to match.  If we have to, we use an
+                         * EXACT node, but it would be better for joining with
+                         * adjacent nodes in the optimization phase if we used
+                         * the same EXACTFish node that any such are likely to
+                         * be.  We can do this iff the code point doesn't
+                         * participate in any folds.  For example, an EXACTF of
+                         * a colon is the same as an EXACT one, since nothing
+                         * folds to or from a colon. */
+                        if (value < 256) {
+                            if (IS_IN_SOME_FOLD_L1(value)) {
+                                op = EXACT;
+                            }
+                        }
+                        else {
+                            if (_invlist_contains_cp(PL_utf8_foldable, value)) {
+                                op = EXACT;
+                            }
                         }
-                    }
 
-                    /* If we haven't found the node type, above, it means we
-                     * can use the prevailing one */
-                    if (op == END) {
-                        op = compute_EXACTish(pRExC_state);
+                        /* If we haven't found the node type, above, it means
+                         * we can use the prevailing one */
+                        if (op == END) {
+                            op = compute_EXACTish(pRExC_state);
+                        }
                     }
                 }
+            }   /* End of first range contains just a single code point */
+            else if (start == 0) {
+                if (end == UV_MAX) {
+                    op = SANY;
+                    *flagp |= HASWIDTH|SIMPLE;
+                    MARK_NAUGHTY(1);
+                }
+                else if (end == '\n' - 1
+                        && invlist_iternext(cp_list, &start, &end)
+                        && start == '\n' + 1 && end == UV_MAX)
+                {
+                    op = REG_ANY;
+                    *flagp |= HASWIDTH|SIMPLE;
+                    MARK_NAUGHTY(1);
+                }
             }
-        }   /* End of first range contains just a single code point */
-        else if (start == 0) {
-            if (end == UV_MAX) {
-                op = SANY;
-                *flagp |= HASWIDTH|SIMPLE;
-                MARK_NAUGHTY(1);
-            }
-            else if (end == '\n' - 1
-                    && invlist_iternext(cp_list, &start, &end)
-                    && start == '\n' + 1 && end == UV_MAX)
-            {
-                op = REG_ANY;
-                *flagp |= HASWIDTH|SIMPLE;
-                MARK_NAUGHTY(1);
-            }
-        }
-        invlist_iterfinish(cp_list);
+            invlist_iterfinish(cp_list);
 
-        if (op == END) {
+            if (op == END) {
 
-            /* Here, didn't find an optimization.  See if this matches any of
-             * the POSIX classes.  First try ASCII */
+                /* Here, didn't find an optimization.  See if this matches any
+                 * of the POSIX classes.  First try ASCII */
 
-            if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
-                op = ASCII;
-                *flagp |= HASWIDTH|SIMPLE;
-            }
-            else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
-                op = NASCII;
-                *flagp |= HASWIDTH|SIMPLE;
-            }
-            else if (invlist_highest(cp_list) >= 0x2029) {
-
-                /* Then try the other POSIX classes.  The POSIXA ones are about
-                 * the same speed as ANYOF ops, but the ones that have
-                 * above-Latin1 code point matches are somewhat faster than
-                 * ANYOF.  So optimize those, but don't bother with the POSIXA
-                 * ones nor [:cntrl:] which has no above-Latin1 matches.  If
-                 * this ANYOF node has a lower highest possible matching code
-                 * point than any of the XPosix ones, we know that it can't
-                 * possibly be the same as any of them, so we can avoid
-                 * executing this code.  The 0x2029 above for the lowest max
-                 * was determined by manual inspection of the classes, and
-                 * comes from \v.  Suppose Unicode in a later version adds a
-                 * higher code point to \v.  All that means is that this code
-                 * can be executed unnecessarily.  It will still give the
-                 * correct answer. */
-
-                for (posix_class = 0;
-                     posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
-                     posix_class++)
-                {
-                    int try_inverted;
+                if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
+                    op = ASCII;
+                    *flagp |= HASWIDTH|SIMPLE;
+                }
+                else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
+                    op = NASCII;
+                    *flagp |= HASWIDTH|SIMPLE;
+                }
+                else {
 
-                    if (posix_class == _CC_CNTRL) {
-                        continue;
-                    }
+                    /* Then try the other POSIX classes.  The POSIXA ones are
+                     * about the same speed as ANYOF ops, but take less room;
+                     * the ones that have above-Latin1 code point matches are
+                     * somewhat faster than ANYOF. */
 
-                    for (try_inverted = 0; try_inverted < 2; try_inverted++) {
+                    for (posix_class = 0;
+                         posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
+                         posix_class++)
+                    {
+                        int try_inverted;
 
-                        /* Check if matches normal or inverted */
-                        if (_invlistEQ(cp_list,
-                                       PL_XPosix_ptrs[posix_class],
-                                       try_inverted))
+                        for (try_inverted = 0; try_inverted < 2; try_inverted++)
                         {
-                            op = (try_inverted)
-                                 ? NPOSIXU
-                                 : POSIXU;
-                            *flagp |= HASWIDTH|SIMPLE;
-                            goto found_posix;
+
+                            /* Check if matches POSIXA, normal or inverted */
+                            if (PL_Posix_ptrs[posix_class]) {
+                                if (_invlistEQ(cp_list,
+                                               PL_Posix_ptrs[posix_class],
+                                               try_inverted))
+                                {
+                                    op = (try_inverted)
+                                        ? NPOSIXA
+                                        : POSIXA;
+                                    *flagp |= HASWIDTH|SIMPLE;
+                                    goto found_posix;
+                                }
+                            }
+
+                            /* Check if matches POSIXU, normal or inverted */
+                            if (_invlistEQ(cp_list,
+                                           PL_XPosix_ptrs[posix_class],
+                                           try_inverted))
+                            {
+                                op = (try_inverted)
+                                     ? NPOSIXU
+                                     : POSIXU;
+                                *flagp |= HASWIDTH|SIMPLE;
+                                goto found_posix;
+                            }
                         }
                     }
-                }
-              found_posix: ;
-            }
-
-            /* If it didn't match a POSIX class, it might be able to be turned
-             * into an ANYOFM node.  Compare two different bytes, bit-by-bit.
-             * In some positions, the bits in each will be 1; and in other
-             * positions both will be 0; and in some positions the bit will be
-             * 1 in one byte, and 0 in the other.  Let 'n' be the number of
-             * positions where the bits differ.  We create a mask which has
-             * exactly 'n' 0 bits, each in a position where the two bytes
-             * differ.  Now take the set of all bytes that when ANDed with the
-             * mask yield the same result.  That set has 2**n elements, and is
-             * representable by just two 8 bit numbers: the result and the
-             * mask.  Importantly, matching the set can be vectorized by
-             * creating a word full of the result bytes, and a word full of the
-             * mask bytes, yielding a significant speed up.  Here, see if this
-             * node matches such a set.  As a concrete example consider [01],
-             * and the byte representing '0' which is 0x30 on ASCII machines.
-             * It has the bits 0011 0000.  Take the mask 1111 1110.  If we AND
-             * 0x31 and 0x30 with that mask we get 0x30.  Any other bytes ANDed
-             * yield something else.  So [01], which is a common usage, is
-             * optimizable into ANYOFM, and can benefit from the speed up.  We
-             * can only do this on UTF-8 invariant bytes, because the variance
-             * would throw this off.  */
-            if (   op == END
-                && invlist_highest(cp_list) <=
+                  found_posix: ;
+                }
+
+                /* If it didn't match a POSIX class, it might be able to be
+                 * turned into an ANYOFM node.  Compare two different bytes,
+                 * bit-by-bit.  In some positions, the bits in each will be 1;
+                 * and in other positions both will be 0; and in some positions
+                 * the bit will be 1 in one byte, and 0 in the other.  Let 'n'
+                 * be the number of positions where the bits differ.  We create
+                 * a mask which has exactly 'n' 0 bits, each in a position
+                 * where the two bytes differ.  Now take the set of all bytes
+                 * that when ANDed with the mask yield the same result.  That
+                 * set has 2**n elements, and is representable by just two 8
+                 * bit numbers: the result and the mask.  Importantly, matching
+                 * the set can be vectorized by creating a word full of the
+                 * result bytes, and a word full of the mask bytes, yielding a
+                 * significant speed up.  Here, see if this node matches such a
+                 * set.  As a concrete example consider [01], and the byte
+                 * representing '0' which is 0x30 on ASCII machines.  It has
+                 * the bits 0011 0000.  Take the mask 1111 1110.  If we AND
+                 * 0x31 and 0x30 with that mask we get 0x30.  Any other bytes
+                 * ANDed yield something else.  So [01], which is a common
+                 * usage, is optimizable into ANYOFM, and can benefit from the
+                 * speed up.  We can only do this on UTF-8 invariant bytes,
+                 * because the variance would throw this off.  */
+                if (op == END) {
+                    PERL_UINT_FAST8_T inverted = 0;
 #ifdef EBCDIC
-                                               0xFF
+                    const PERL_UINT_FAST8_T max_permissible = 0xFF;
 #else
-                                               0x7F
+                    const PERL_UINT_FAST8_T max_permissible = 0x7F;
 #endif
-            ) {
-                Size_t cp_count = 0;
-                bool first_time = TRUE;
-                unsigned int lowest_cp = 0xFF;
-                U8 bits_differing = 0;
-
-                /* Only needed on EBCDIC, as there, variants and non- are mixed
-                 * together.  Could #ifdef it out on ASCII, but probably the
-                 * compiler will optimize it out */
-                bool has_variant = FALSE;
-
-                /* Go through the bytes and find the bit positions that differ */
-                invlist_iterinit(cp_list);
-                while (invlist_iternext(cp_list, &start, &end)) {
-                    unsigned int i = start;
-
-                    cp_count += end - start + 1;
-
-                    if (first_time) {
-                        if (! UVCHR_IS_INVARIANT(i)) {
-                            has_variant = TRUE;
-                            continue;
-                        }
+                    if (invlist_highest(cp_list) > max_permissible) {
+                        _invlist_invert(cp_list);
+                        inverted = 1;
+                    }
 
-                        first_time = FALSE;
-                        lowest_cp = start;
+                    if (invlist_highest(cp_list) <= max_permissible) {
+                    Size_t cp_count = 0;
+                    bool first_time = TRUE;
+                    unsigned int lowest_cp = 0xFF;
+                    U8 bits_differing = 0;
 
-                        i++;
-                    }
+                    /* Only needed on EBCDIC, as there, variants and non- are mixed
+                     * together.  Could #ifdef it out on ASCII, but probably the
+                     * compiler will optimize it out */
+                    bool has_variant = FALSE;
 
-                    /* Find the bit positions that differ from the lowest code
-                     * point in the node.  Keep track of all such positions by
-                     * OR'ing */
-                    for (; i <= end; i++) {
-                        if (! UVCHR_IS_INVARIANT(i)) {
-                            has_variant = TRUE;
-                            continue;
+                    /* Go through the bytes and find the bit positions that differ */
+                    invlist_iterinit(cp_list);
+                    while (invlist_iternext(cp_list, &start, &end)) {
+                        unsigned int i = start;
+
+                        cp_count += end - start + 1;
+
+                        if (first_time) {
+                            if (! UVCHR_IS_INVARIANT(i)) {
+                                has_variant = TRUE;
+                                continue;
+                            }
+
+                            first_time = FALSE;
+                            lowest_cp = start;
+
+                            i++;
                         }
 
-                        bits_differing  |= i ^ lowest_cp;
+                        /* Find the bit positions that differ from the lowest
+                         * code point in the node.  Keep track of all such
+                         * positions by OR'ing */
+                        for (; i <= end; i++) {
+                            if (! UVCHR_IS_INVARIANT(i)) {
+                                has_variant = TRUE;
+                                continue;
+                            }
+
+                            bits_differing  |= i ^ lowest_cp;
+                        }
                     }
-                }
-                invlist_iterfinish(cp_list);
-
-                /* At the end of the loop, we count how many bits differ from
-                 * the bits in lowest code point, call the count 'd'.  If the
-                 * set we found contains 2**d elements, it is the closure of
-                 * all code points that differ only in those bit positions.  To
-                 * convince yourself of that, first note that the number in the
-                 * closure must be a power of 2, which we test for.  The only
-                 * way we could have that count and it be some differing set,
-                 * is if we got some code points that don't differ from the
-                 * lowest code point in any position, but do differ from each
-                 * other in some other position.  That means one code point has
-                 * a 1 in that position, and another has a 0.  But that would
-                 * mean that one of them differs from the lowest code point in
-                 * that position, which possibility we've already excluded. */
-                if ( ! has_variant
-                    && cp_count == 1U << PL_bitcount[bits_differing])
-                {
-                    assert(cp_count > 1);
-                    op = ANYOFM;
+                    invlist_iterfinish(cp_list);
+
+                    /* At the end of the loop, we count how many bits differ
+                     * from the bits in lowest code point, call the count 'd'.
+                     * If the set we found contains 2**d elements, it is the
+                     * closure of all code points that differ only in those bit
+                     * positions.  To convince yourself of that, first note
+                     * that the number in the closure must be a power of 2,
+                     * which we test for.  The only way we could have that
+                     * count and it be some differing set, is if we got some
+                     * code points that don't differ from the lowest code point
+                     * in any position, but do differ from each other in some
+                     * other position.  That means one code point has a 1 in
+                     * that position, and another has a 0.  But that would mean
+                     * that one of them differs from the lowest code point in
+                     * that position, which possibility we've already excluded.
+                     * */
+                    if ( ! has_variant
+                        && cp_count == 1U << PL_bitcount[bits_differing])
+                    {
+                        assert(inverted || cp_count > 1);
+                        op = ANYOFM + inverted;;
 
-                    /* We need to make the bits that differ be 0's */
-                    ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS */
+                        /* We need to make the bits that differ be 0's */
+                        ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS
+                                                         */
 
-                    /* The argument is the lowest code point */
-                    anode_arg = lowest_cp;
-                    *flagp |= HASWIDTH|SIMPLE;
+                        /* The argument is the lowest code point */
+                        anode_arg = lowest_cp;
+                        *flagp |= HASWIDTH|SIMPLE;
+                    }
                 }
+                if (inverted) {
+                    _invlist_invert(cp_list);
+                }
+            }
             }
         }
 
         if (op != END) {
             RExC_parse = (char *)orig_parse;
-            RExC_emit = orig_emit;
 
             if (regarglen[op]) {
                 ret = reganode(pRExC_state, op, anode_arg);
@@ -18726,16 +18389,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             SvREFCNT_dec_NN(cp_list);
             return ret;
         }
-    }
+    }   /* End of seeing if can optimize it into a different node */
 
     /* It's going to be an ANYOF node. */
-    OP(REGNODE_p(ret)) = (use_anyofd)
+    op = (use_anyofd)
          ? ANYOFD
          : ((posixl)
             ? ANYOFPOSIXL
             : ((LOC)
                ? ANYOFL
                : ANYOF));
+    ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
+    FILL_NODE(ret, op);        /* We set the argument later */
+    RExC_emit += 1 + regarglen[op];
     ANYOF_FLAGS(REGNODE_p(ret)) = anyof_flags;
 
     /* Here, <cp_list> contains all the code points we can determine at
@@ -19235,16 +18901,37 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
     PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE;
 
     RExC_size += size;
+
+    Renewc(RExC_rxi,
+           sizeof(regexp_internal) + (RExC_size + 1) * sizeof(regnode),
+                                                /* +1 for REG_MAGIC */
+           char,
+           regexp_internal);
+    if ( RExC_rxi == NULL )
+       FAIL("Regexp out of space");
+    RXi_SET(RExC_rx, RExC_rxi);
+
+    RExC_emit_start = RExC_rxi->program;
+    if (size > 0) {
+        Zero(REGNODE_p(RExC_emit), size, regnode);
+    }
+
+#ifdef RE_TRACK_PATTERN_OFFSETS
+    Renew(RExC_offsets, 2*RExC_size+1, U32);
+    if (size > 0) {
+        Zero(RExC_offsets + 2*(RExC_size - size) + 1, 2 * size, U32);
+    }
+    RExC_offsets[0] = RExC_size;
+#endif
 }
 
 STATIC regnode_offset
 S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size, const char* const name)
 {
-    /* Allocate a regnode for 'op', with 'extra_size' extra space.  In pass1,
-     * it aligns and increments RExC_size; in pass2, RExC_emit
+    /* Allocate a regnode for 'op', with 'extra_size' extra space.  It aligns
+     * and increments RExC_size and RExC_emit
      *
-     * It returns the renode's offset into the regex engine program (meaningful
-     * only in pass2 */
+     * It returns the regnode's offset into the regex engine program */
 
     const regnode_offset ret = RExC_emit;
 
@@ -19252,21 +18939,15 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
 
     PERL_ARGS_ASSERT_REGNODE_GUTS;
 
-    assert(extra_size >= regarglen[op]);
-
-    if (SIZE_ONLY) {
-       SIZE_ALIGN(RExC_size);
+    SIZE_ALIGN(RExC_size);
     change_engine_size(pRExC_state, (Ptrdiff_t) 1 + extra_size);
-       return(ret);
-    }
-    if (REGNODE_p(RExC_emit) >= RExC_emit_bound)
-        Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d, %p>=%p",
-                  op, (void*)REGNODE_p(RExC_emit), (void*)RExC_emit_bound);
-
     NODE_ALIGN_FILL(REGNODE_p(ret));
 #ifndef RE_TRACK_PATTERN_OFFSETS
     PERL_UNUSED_ARG(name);
+    PERL_UNUSED_ARG(op);
 #else
+    assert(extra_size >= regarglen[op] || PL_regkind[op] == ANYOF);
+
     if (RExC_offsets) {         /* MJD */
        MJD_OFFSET_DEBUG(
               ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
@@ -19290,16 +18971,14 @@ STATIC regnode_offset /* Location. */
 S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
 {
     const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg_node");
+    regnode_offset ptr = ret;
 
     PERL_ARGS_ASSERT_REG_NODE;
 
     assert(regarglen[op] == 0);
 
-    if (PASS2) {
-        regnode_offset ptr = ret;
-        FILL_ADVANCE_NODE(ptr, op);
-        RExC_emit = ptr;
-    }
+    FILL_ADVANCE_NODE(ptr, op);
+    RExC_emit = ptr;
     return(ret);
 }
 
@@ -19310,17 +18989,15 @@ STATIC regnode_offset /* Location. */
 S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 {
     const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reganode");
+    regnode_offset ptr = ret;
 
     PERL_ARGS_ASSERT_REGANODE;
 
     /* ANYOF are special cased to allow non-length 1 args */
-    assert(regarglen[op] == 1 || PL_regkind[op] == ANYOF);
+    assert(regarglen[op] == 1);
 
-    if (PASS2) {
-        regnode_offset ptr = ret;
-        FILL_ADVANCE_NODE_ARG(ptr, op, arg);
-        RExC_emit = ptr;
-    }
+    FILL_ADVANCE_NODE_ARG(ptr, op, arg);
+    RExC_emit = ptr;
     return(ret);
 }
 
@@ -19330,16 +19007,14 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
     /* emit a node with U32 and I32 arguments */
 
     const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg2Lanode");
+    regnode_offset ptr = ret;
 
     PERL_ARGS_ASSERT_REG2LANODE;
 
     assert(regarglen[op] == 2);
 
-    if (PASS2) {
-        regnode_offset ptr = ret;
-        FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2);
-        RExC_emit = ptr;
-    }
+    FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2);
+    RExC_emit = ptr;
     return(ret);
 }
 
@@ -19352,9 +19027,8 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
 * IMPORTANT NOTE - it is the *callers* responsibility to correctly
 * set up NEXT_OFF() of the inserted node if needed. Something like this:
 *
-* reginsert(pRExC, OPFAIL, orig_emit, depth+1);
-* if (PASS2)
-*     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
+*   reginsert(pRExC, OPFAIL, orig_emit, depth+1);
+*   NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
 *
 * ALSO NOTE - FLAGS(newly-inserted-operator) will be set to 0 as well.
 */
@@ -19374,13 +19048,10 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
     PERL_UNUSED_ARG(depth);
 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
     DEBUG_PARSE_FMT("inst"," - %s", PL_reg_name[op]);
-    if (SIZE_ONLY) {
-       RExC_size += size;
-       return;
-    }
     assert(!RExC_study_started); /* I believe we should never use reginsert once we have started
                                     studying. If this is wrong then we need to adjust RExC_recurse
                                     below like we do with RExC_open_parens/RExC_close_parens. */
+    change_engine_size(pRExC_state, (Ptrdiff_t) size);
     src = REGNODE_p(RExC_emit);
     RExC_emit += size;
     dst = REGNODE_p(RExC_emit);
@@ -19474,9 +19145,6 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
     PERL_UNUSED_ARG(depth);
 #endif
 
-    if (SIZE_ONLY)
-       return;
-
     /* Find last node. */
     scan = (regnode_offset) p;
     for (;;) {
@@ -19534,9 +19202,6 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
     PERL_ARGS_ASSERT_REGTAIL_STUDY;
 
 
-    if (SIZE_ONLY)
-        return exact;
-
     /* Find last node. */
 
     scan = p;
@@ -19607,8 +19272,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
 STATIC SV*
 S_get_ANYOFM_contents(pTHX_ const regnode * n) {
 
-    /* Returns an inversion list of all the code points matched by the ANYOFM
-     * node 'n' */
+    /* Returns an inversion list of all the code points matched by the
+     * ANYOFM/NANYOFM node 'n' */
 
     SV * cp_list = _new_invlist(-1);
     const U8 lowest = (U8) ARG(n);
@@ -19631,6 +19296,9 @@ S_get_ANYOFM_contents(pTHX_ const regnode * n) {
         }
     }
 
+    if (OP(n) == NANYOFM) {
+        _invlist_invert(cp_list);
+    }
     return cp_list;
 }
 
@@ -20164,6 +19832,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         SV * cp_list = get_ANYOFM_contents(o);
 
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+        if (OP(o) == NANYOFM) {
+            _invlist_invert(cp_list);
+        }
+
         put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, TRUE);
        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);