Rmv unused regex implementation structure element

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index bbb998f..64d8907 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -101,14 +101,6 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  #define        STATIC  static
  #endif
  
-#ifndef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#endif
-
-#ifndef MAX
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-#endif
-
  /* this is a chain of data about sub patterns we are processing that
     need to be handled separately/specially in study_chunk. Its so
     we can simulate recursion without losing state.  */
@@ -168,7 +160,7 @@ struct RExC_state_t {
      I32                seen_zerolen;
      regnode    **open_parens;          /* pointers to open parens */
      regnode    **close_parens;         /* pointers to close parens */
-    regnode    *opend;                 /* END node in program */
+    regnode     *end_op;                /* END node in program */
      I32                utf8;           /* whether the pattern is utf8 or not */
      I32                orig_utf8;      /* whether the pattern was originally in utf8 */
                                 /* XXX use this for future optimisation of case
@@ -179,13 +171,12 @@ struct RExC_state_t {
      HV         *paren_names;           /* Paren names */
  
      regnode    **recurse;              /* Recurse regops */
-    I32                recurse_count;          /* Number of recurse regops */
+    I32                recurse_count;                /* Number of recurse regops we have generated */
      U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
                                             through */
      U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
      I32                in_lookbehind;
      I32                contains_locale;
-    I32                contains_i;
      I32                override_recoding;
  #ifdef EBCDIC
      I32                recode_x_to_native;
@@ -199,6 +190,7 @@ struct RExC_state_t {
      scan_frame *frame_head;
      scan_frame *frame_last;
      U32         frame_count;
+    AV         *warn_text;
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
@@ -222,6 +214,7 @@ struct RExC_state_t {
  #endif
      bool        seen_unfolded_sharp_s;
      bool        strict;
+    bool        study_started;
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -269,7 +262,7 @@ struct RExC_state_t {
  #define RExC_orig_utf8 (pRExC_state->orig_utf8)
  #define RExC_open_parens       (pRExC_state->open_parens)
  #define RExC_close_parens      (pRExC_state->close_parens)
-#define RExC_opend     (pRExC_state->opend)
+#define RExC_end_op    (pRExC_state->end_op)
  #define RExC_paren_names       (pRExC_state->paren_names)
  #define RExC_recurse   (pRExC_state->recurse)
  #define RExC_recurse_count     (pRExC_state->recurse_count)
@@ -278,7 +271,6 @@ struct RExC_state_t {
                                     (pRExC_state->study_chunk_recursed_bytes)
  #define RExC_in_lookbehind     (pRExC_state->in_lookbehind)
  #define RExC_contains_locale   (pRExC_state->contains_locale)
-#define RExC_contains_i (pRExC_state->contains_i)
  #define RExC_override_recoding (pRExC_state->override_recoding)
  #ifdef EBCDIC
  #   define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
@@ -288,6 +280,8 @@ struct RExC_state_t {
  #define RExC_frame_last (pRExC_state->frame_last)
  #define RExC_frame_count (pRExC_state->frame_count)
  #define RExC_strict (pRExC_state->strict)
+#define RExC_study_started      (pRExC_state->study_started)
+#define RExC_warn_text (pRExC_state->warn_text)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -502,7 +496,20 @@ static const scan_data_t zero_scan_data =
  #define SF_HAS_PAR             0x0080
  #define SF_IN_PAR              0x0100
  #define SF_HAS_EVAL            0x0200
+
+
+/* SCF_DO_SUBSTR is the flag that tells the regexp analyzer to track the
+ * longest substring in the pattern. When it is not set the optimiser keeps
+ * track of position, but does not keep track of the actual strings seen,
+ *
+ * So for instance /foo/ will be parsed with SCF_DO_SUBSTR being true, but
+ * /foo/i will not.
+ *
+ * Similarly, /foo.*(blah|erm|huh).*fnorble/ will have "foo" and "fnorble"
+ * parsed with SCF_DO_SUBSTR on, but while processing the (...) it will be
+ * turned off because of the alternation (BRANCH). */
  #define SCF_DO_SUBSTR          0x0400
+
  #define SCF_DO_STCLASS_AND     0x0800
  #define SCF_DO_STCLASS_OR      0x1000
  #define SCF_DO_STCLASS         (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
@@ -547,7 +554,6 @@ static const scan_data_t zero_scan_data =
  #define OOB_UNICODE            0xDEADBEEF
  
  #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
  
  
  /* length of regex to show in messages that don't mark a position within */
@@ -562,7 +568,7 @@ static const scan_data_t zero_scan_data =
  #define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
  
  #define REPORT_LOCATION " in regex; marked by " MARKER1    \
-                        " in m/%"UTF8f MARKER2 "%"UTF8f"/"
+                        " in m/%" UTF8f MARKER2 "%" UTF8f "/"
  
  /* The code in this file in places uses one level of recursion with parsing
   * rebased to an alternate string constructed by us in memory.  This can take
@@ -650,11 +656,11 @@ static const scan_data_t zero_scan_data =
  } STMT_END
  
  #define        FAIL(msg) _FAIL(                            \
-    Perl_croak(aTHX_ "%s in regex m/%"UTF8f"%s/",          \
+    Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/",        \
             msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
  
  #define        FAIL2(msg,arg) _FAIL(                       \
-    Perl_croak(aTHX_ msg " in regex m/%"UTF8f"%s/",        \
+    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",      \
             arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
  
  /*
@@ -898,52 +904,78 @@ static const scan_data_t zero_scan_data =
  #define EXPERIMENTAL_INPLACESCAN
  #endif /*PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS*/
  
-#define DEBUG_RExC_seen() \
+#ifdef DEBUGGING
+int
+Perl_re_printf(pTHX_ const char *fmt, ...)
+{
+    va_list ap;
+    int result;
+    PerlIO *f= Perl_debug_log;
+    PERL_ARGS_ASSERT_RE_PRINTF;
+    va_start(ap, fmt);
+    result = PerlIO_vprintf(f, fmt, ap);
+    va_end(ap);
+    return result;
+}
+
+int
+Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
+{
+    va_list ap;
+    int result;
+    PerlIO *f= Perl_debug_log;
+    PERL_ARGS_ASSERT_RE_INDENTF;
+    va_start(ap, depth);
+    PerlIO_printf(f, "%*s", ( (int)depth % 20 ) * 2, "");
+    result = PerlIO_vprintf(f, fmt, ap);
+    va_end(ap);
+    return result;
+}
+#endif /* DEBUGGING */
+
+#define DEBUG_RExC_seen()                                                   \
          DEBUG_OPTIMISE_MORE_r({                                             \
-            PerlIO_printf(Perl_debug_log,"RExC_seen: ");                    \
+            Perl_re_printf( aTHX_ "RExC_seen: ");                                       \
                                                                              \
              if (RExC_seen & REG_ZERO_LEN_SEEN)                              \
-                PerlIO_printf(Perl_debug_log,"REG_ZERO_LEN_SEEN ");         \
+                Perl_re_printf( aTHX_ "REG_ZERO_LEN_SEEN ");                            \
                                                                              \
              if (RExC_seen & REG_LOOKBEHIND_SEEN)                            \
-                PerlIO_printf(Perl_debug_log,"REG_LOOKBEHIND_SEEN ");       \
+                Perl_re_printf( aTHX_ "REG_LOOKBEHIND_SEEN ");                          \
                                                                              \
              if (RExC_seen & REG_GPOS_SEEN)                                  \
-                PerlIO_printf(Perl_debug_log,"REG_GPOS_SEEN ");             \
+                Perl_re_printf( aTHX_ "REG_GPOS_SEEN ");                                \
                                                                              \
              if (RExC_seen & REG_RECURSE_SEEN)                               \
-                PerlIO_printf(Perl_debug_log,"REG_RECURSE_SEEN ");          \
+                Perl_re_printf( aTHX_ "REG_RECURSE_SEEN ");                             \
                                                                              \
-            if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)                         \
-                PerlIO_printf(Perl_debug_log,"REG_TOP_LEVEL_BRANCHES_SEEN ");    \
+            if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)                    \
+                Perl_re_printf( aTHX_ "REG_TOP_LEVEL_BRANCHES_SEEN ");                  \
                                                                              \
              if (RExC_seen & REG_VERBARG_SEEN)                               \
-                PerlIO_printf(Perl_debug_log,"REG_VERBARG_SEEN ");          \
+                Perl_re_printf( aTHX_ "REG_VERBARG_SEEN ");                             \
                                                                              \
              if (RExC_seen & REG_CUTGROUP_SEEN)                              \
-                PerlIO_printf(Perl_debug_log,"REG_CUTGROUP_SEEN ");         \
+                Perl_re_printf( aTHX_ "REG_CUTGROUP_SEEN ");                            \
                                                                              \
              if (RExC_seen & REG_RUN_ON_COMMENT_SEEN)                        \
-                PerlIO_printf(Perl_debug_log,"REG_RUN_ON_COMMENT_SEEN ");   \
+                Perl_re_printf( aTHX_ "REG_RUN_ON_COMMENT_SEEN ");                      \
                                                                              \
              if (RExC_seen & REG_UNFOLDED_MULTI_SEEN)                        \
-                PerlIO_printf(Perl_debug_log,"REG_UNFOLDED_MULTI_SEEN ");   \
-                                                                            \
-            if (RExC_seen & REG_GOSTART_SEEN)                               \
-                PerlIO_printf(Perl_debug_log,"REG_GOSTART_SEEN ");          \
+                Perl_re_printf( aTHX_ "REG_UNFOLDED_MULTI_SEEN ");                      \
                                                                              \
-            if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)                               \
-                PerlIO_printf(Perl_debug_log,"REG_UNBOUNDED_QUANTIFIER_SEEN ");          \
+            if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)                  \
+                Perl_re_printf( aTHX_ "REG_UNBOUNDED_QUANTIFIER_SEEN ");                \
                                                                              \
-            PerlIO_printf(Perl_debug_log,"\n");                             \
+            Perl_re_printf( aTHX_ "\n");                                                \
          });
  
  #define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
-  if ((flags) & flag) PerlIO_printf(Perl_debug_log, "%s ", #flag)
+  if ((flags) & flag) Perl_re_printf( aTHX_  "%s ", #flag)
  
  #define DEBUG_SHOW_STUDY_FLAGS(flags,open_str,close_str)                    \
      if ( ( flags ) ) {                                                      \
-        PerlIO_printf(Perl_debug_log, "%s", open_str);                      \
+        Perl_re_printf( aTHX_  "%s", open_str);                                         \
          DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_SEOL);                     \
          DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_MEOL);                     \
          DEBUG_SHOW_STUDY_FLAG(flags,SF_IS_INF);                             \
@@ -959,31 +991,31 @@ static const scan_data_t zero_scan_data =
          DEBUG_SHOW_STUDY_FLAG(flags,SCF_SEEN_ACCEPT);                       \
          DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_DOING_RESTUDY);                \
          DEBUG_SHOW_STUDY_FLAG(flags,SCF_IN_DEFINE);                         \
-        PerlIO_printf(Perl_debug_log, "%s", close_str);                     \
+        Perl_re_printf( aTHX_  "%s", close_str);                                        \
      }
  
  
  #define DEBUG_STUDYDATA(str,data,depth)                              \
  DEBUG_OPTIMISE_MORE_r(if(data){                                      \
-    PerlIO_printf(Perl_debug_log,                                    \
-        "%*s" str "Pos:%"IVdf"/%"IVdf                                \
-        " Flags: 0x%"UVXf,                                           \
-        (int)(depth)*2, "",                                          \
+    Perl_re_indentf( aTHX_  "" str "Pos:%" IVdf "/%" IVdf            \
+        " Flags: 0x%" UVXf,                                          \
+        depth,                                                       \
          (IV)((data)->pos_min),                                       \
          (IV)((data)->pos_delta),                                     \
          (UV)((data)->flags)                                          \
      );                                                               \
      DEBUG_SHOW_STUDY_FLAGS((data)->flags," [ ","]");                 \
-    PerlIO_printf(Perl_debug_log,                                    \
-        " Whilem_c: %"IVdf" Lcp: %"IVdf" %s",                        \
+    Perl_re_printf( aTHX_                                            \
+        " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",                    \
          (IV)((data)->whilem_c),                                      \
          (IV)((data)->last_closep ? *((data)->last_closep) : -1),     \
          is_inf ? "INF " : ""                                         \
      );                                                               \
      if ((data)->last_found)                                          \
-        PerlIO_printf(Perl_debug_log,                                \
-            "Last:'%s' %"IVdf":%"IVdf"/%"IVdf" %sFixed:'%s' @ %"IVdf \
-            " %sFloat: '%s' @ %"IVdf"/%"IVdf"",                      \
+        Perl_re_printf( aTHX_                                        \
+            "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf                   \
+            " %sFixed:'%s' @ %" IVdf                                 \
+            " %sFloat: '%s' @ %" IVdf "/%" IVdf,                     \
              SvPVX_const((data)->last_found),                         \
              (IV)((data)->last_end),                                  \
              (IV)((data)->last_start_min),                            \
@@ -998,9 +1030,10 @@ DEBUG_OPTIMISE_MORE_r(if(data){                                      \
              (IV)((data)->offset_float_min),                          \
              (IV)((data)->offset_float_max)                           \
          );                                                           \
-    PerlIO_printf(Perl_debug_log,"\n");                              \
+    Perl_re_printf( aTHX_ "\n");                                                 \
  });
  
+
  /* =========================================================
   * BEGIN edit_distance stuff.
   *
@@ -1240,8 +1273,8 @@ S_ssc_anything(pTHX_ regnode_ssc *ssc)
  
      assert(is_ANYOF_SYNTHETIC(ssc));
  
-    ssc->invlist = sv_2mortal(_new_invlist(2)); /* mortalize so won't leak */
-    _append_range_to_invlist(ssc->invlist, 0, UV_MAX);
+    /* mortalize so won't leak */
+    ssc->invlist = sv_2mortal(_add_range_to_invlist(NULL, 0, UV_MAX));
      ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
  }
  
@@ -1423,7 +1456,12 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      /* Add in the points from the bit map */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (ANYOF_BITMAP_TEST(node, i)) {
-            invlist = add_cp_to_invlist(invlist, i);
+            unsigned int start = i++;
+
+            for (; i < NUM_ANYOF_CODE_POINTS && ANYOF_BITMAP_TEST(node, i); ++i) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
              new_node_has_latin1 = TRUE;
          }
      }
@@ -1946,14 +1984,13 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
  
      PERL_ARGS_ASSERT_DUMP_TRIE;
  
-    PerlIO_printf( Perl_debug_log, "%*sChar : %-6s%-6s%-4s ",
-        (int)depth * 2 + 2,"",
-        "Match","Base","Ofs" );
+    Perl_re_indentf( aTHX_  "Char : %-6s%-6s%-4s ",
+        depth+1, "Match","Base","Ofs" );
  
      for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
         SV ** const tmp = av_fetch( revcharmap, state, 0);
          if ( tmp ) {
-            PerlIO_printf( Perl_debug_log, "%*s",
+            Perl_re_printf( aTHX_  "%*s",
                  colwidth,
                  pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
                             PL_colors[0], PL_colors[1],
@@ -1963,27 +2000,25 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
              );
          }
      }
-    PerlIO_printf( Perl_debug_log, "\n%*sState|-----------------------",
-        (int)depth * 2 + 2,"");
+    Perl_re_printf( aTHX_  "\n");
+    Perl_re_indentf( aTHX_ "State|-----------------------", depth+1);
  
      for( state = 0 ; state < trie->uniquecharcount ; state++ )
-        PerlIO_printf( Perl_debug_log, "%.*s", colwidth, "--------");
-    PerlIO_printf( Perl_debug_log, "\n");
+        Perl_re_printf( aTHX_  "%.*s", colwidth, "--------");
+    Perl_re_printf( aTHX_  "\n");
  
      for( state = 1 ; state < trie->statecount ; state++ ) {
         const U32 base = trie->states[ state ].trans.base;
  
-        PerlIO_printf( Perl_debug_log, "%*s#%4"UVXf"|",
-                                       (int)depth * 2 + 2,"", (UV)state);
+        Perl_re_indentf( aTHX_  "#%4" UVXf "|", depth+1, (UV)state);
  
          if ( trie->states[ state ].wordnum ) {
-            PerlIO_printf( Perl_debug_log, " W%4X",
-                                           trie->states[ state ].wordnum );
+            Perl_re_printf( aTHX_  " W%4X", trie->states[ state ].wordnum );
          } else {
-            PerlIO_printf( Perl_debug_log, "%6s", "" );
+            Perl_re_printf( aTHX_  "%6s", "" );
          }
  
-        PerlIO_printf( Perl_debug_log, " @%4"UVXf" ", (UV)base );
+        Perl_re_printf( aTHX_  " @%4" UVXf " ", (UV)base );
  
          if ( base ) {
              U32 ofs = 0;
@@ -1994,7 +2029,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
                                                                      != state))
                      ofs++;
  
-            PerlIO_printf( Perl_debug_log, "+%2"UVXf"[ ", (UV)ofs);
+            Perl_re_printf( aTHX_  "+%2" UVXf "[ ", (UV)ofs);
  
              for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
                  if ( ( base + ofs >= trie->uniquecharcount )
@@ -2003,28 +2038,27 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
                          && trie->trans[ base + ofs
                                      - trie->uniquecharcount ].check == state )
                  {
-                   PerlIO_printf( Perl_debug_log, "%*"UVXf,
-                    colwidth,
-                    (UV)trie->trans[ base + ofs
-                                             - trie->uniquecharcount ].next );
+                   Perl_re_printf( aTHX_  "%*" UVXf, colwidth,
+                    (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next
+                   );
                  } else {
-                    PerlIO_printf( Perl_debug_log, "%*s",colwidth,"   ." );
+                    Perl_re_printf( aTHX_  "%*s",colwidth,"   ." );
                  }
              }
  
-            PerlIO_printf( Perl_debug_log, "]");
+            Perl_re_printf( aTHX_  "]");
  
          }
-        PerlIO_printf( Perl_debug_log, "\n" );
+        Perl_re_printf( aTHX_  "\n" );
      }
-    PerlIO_printf(Perl_debug_log, "%*sword_info N:(prev,len)=",
-                                (int)depth*2, "");
+    Perl_re_indentf( aTHX_  "word_info N:(prev,len)=",
+                                depth);
      for (word=1; word <= trie->wordcount; word++) {
-       PerlIO_printf(Perl_debug_log, " %d:(%d,%d)",
+        Perl_re_printf( aTHX_  " %d:(%d,%d)",
             (int)word, (int)(trie->wordinfo[word].prev),
             (int)(trie->wordinfo[word].len));
      }
-    PerlIO_printf(Perl_debug_log, "\n" );
+    Perl_re_printf( aTHX_  "\n" );
  }
  /*
    Dumps a fully constructed but uncompressed trie in list form.
@@ -2045,19 +2079,20 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
      PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
  
      /* print out the table precompression.  */
-    PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s",
-        (int)depth * 2 + 2,"", (int)depth * 2 + 2,"",
-        "------:-----+-----------------\n" );
+    Perl_re_indentf( aTHX_  "State :Word | Transition Data\n",
+            depth+1 );
+    Perl_re_indentf( aTHX_  "%s",
+            depth+1, "------:-----+-----------------\n" );
  
      for( state=1 ; state < next_alloc ; state ++ ) {
          U16 charid;
  
-        PerlIO_printf( Perl_debug_log, "%*s %4"UVXf" :",
-            (int)depth * 2 + 2,"", (UV)state  );
+        Perl_re_indentf( aTHX_  " %4" UVXf " :",
+            depth+1, (UV)state  );
          if ( ! trie->states[ state ].wordnum ) {
-            PerlIO_printf( Perl_debug_log, "%5s| ","");
+            Perl_re_printf( aTHX_  "%5s| ","");
          } else {
-            PerlIO_printf( Perl_debug_log, "W%4x| ",
+            Perl_re_printf( aTHX_  "W%4x| ",
                  trie->states[ state ].wordnum
              );
          }
@@ -2065,7 +2100,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
             SV ** const tmp = av_fetch( revcharmap,
                                          TRIE_LIST_ITEM(state,charid).forid, 0);
             if ( tmp ) {
-                PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" | ",
+                Perl_re_printf( aTHX_  "%*s:%3X=%4" UVXf " | ",
                      colwidth,
                      pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp),
                                colwidth,
@@ -2077,11 +2112,11 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
                      (UV)TRIE_LIST_ITEM(state,charid).newstate
                  );
                  if (!(charid % 10))
-                    PerlIO_printf(Perl_debug_log, "\n%*s| ",
+                    Perl_re_printf( aTHX_  "\n%*s| ",
                          (int)((depth * 2) + 14), "");
              }
          }
-        PerlIO_printf( Perl_debug_log, "\n");
+        Perl_re_printf( aTHX_  "\n");
      }
  }
  
@@ -2109,12 +2144,12 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
         that they are identical.
       */
  
-    PerlIO_printf( Perl_debug_log, "%*sChar : ",(int)depth * 2 + 2,"" );
+    Perl_re_indentf( aTHX_  "Char : ", depth+1 );
  
      for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
         SV ** const tmp = av_fetch( revcharmap, charid, 0);
          if ( tmp ) {
-            PerlIO_printf( Perl_debug_log, "%*s",
+            Perl_re_printf( aTHX_  "%*s",
                  colwidth,
                  pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
                             PL_colors[0], PL_colors[1],
@@ -2125,32 +2160,33 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
          }
      }
  
-    PerlIO_printf( Perl_debug_log, "\n%*sState+-",(int)depth * 2 + 2,"" );
+    Perl_re_printf( aTHX_ "\n");
+    Perl_re_indentf( aTHX_  "State+-", depth+1 );
  
      for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
-        PerlIO_printf( Perl_debug_log, "%.*s", colwidth,"--------");
+        Perl_re_printf( aTHX_  "%.*s", colwidth,"--------");
      }
  
-    PerlIO_printf( Perl_debug_log, "\n" );
+    Perl_re_printf( aTHX_  "\n" );
  
      for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
  
-        PerlIO_printf( Perl_debug_log, "%*s%4"UVXf" : ",
-            (int)depth * 2 + 2,"",
+        Perl_re_indentf( aTHX_  "%4" UVXf " : ",
+            depth+1,
              (UV)TRIE_NODENUM( state ) );
  
          for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
              UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
              if (v)
-                PerlIO_printf( Perl_debug_log, "%*"UVXf, colwidth, v );
+                Perl_re_printf( aTHX_  "%*" UVXf, colwidth, v );
              else
-                PerlIO_printf( Perl_debug_log, "%*s", colwidth, "." );
+                Perl_re_printf( aTHX_  "%*s", colwidth, "." );
          }
          if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
-            PerlIO_printf( Perl_debug_log, " (%4"UVXf")\n",
+            Perl_re_printf( aTHX_  " (%4" UVXf ")\n",
                                              (UV)trie->trans[ state ].check );
          } else {
-            PerlIO_printf( Perl_debug_log, " (%4"UVXf") W%4X\n",
+            Perl_re_printf( aTHX_  " (%4" UVXf ") W%4X\n",
                                              (UV)trie->trans[ state ].check,
              trie->states[ TRIE_NODENUM( state ) ].wordnum );
          }
@@ -2388,6 +2424,21 @@ is the recommended Unicode-aware way of saying
             : ( state==1 ? special : 0 )                                        \
        )
  
+#define TRIE_BITMAP_SET_FOLDED(trie, uvc, folder)           \
+STMT_START {                                                \
+    TRIE_BITMAP_SET(trie, uvc);                             \
+    /* store the folded codepoint */                        \
+    if ( folder )                                           \
+        TRIE_BITMAP_SET(trie, folder[(U8) uvc ]);           \
+                                                            \
+    if ( !UTF ) {                                           \
+        /* store first byte of utf8 representation of */    \
+        /* variant codepoints */                            \
+        if (! UVCHR_IS_INVARIANT(uvc)) {                    \
+            TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));   \
+        }                                                   \
+    }                                                       \
+} STMT_END
  #define MADE_TRIE       1
  #define MADE_JUMP_TRIE  2
  #define MADE_EXACT_TRIE 4
@@ -2462,9 +2513,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
      }
      DEBUG_TRIE_COMPILE_r({
-        PerlIO_printf( Perl_debug_log,
-          "%*smake_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
-          (int)depth * 2 + 2, "",
+        Perl_re_indentf( aTHX_
+          "make_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n",
+          depth+1,
            REG_NODE_NUM(startbranch),REG_NODE_NUM(first),
            REG_NODE_NUM(last), REG_NODE_NUM(tail), (int)depth);
      });
@@ -2503,8 +2554,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
      for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
          regnode *noper = NEXTOPER( cur );
-        const U8 *uc = (U8*)STRING( noper );
-        const U8 *e  = uc + STR_LEN( noper );
+        const U8 *uc;
+        const U8 *e;
          int foldlen = 0;
          U32 wordlen      = 0;         /* required init */
          STRLEN minchars = 0;
@@ -2513,18 +2564,31 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                                                 bitmap?*/
  
          if (OP(noper) == NOTHING) {
+            /* skip past a NOTHING at the start of an alternation
+             * eg, /(?:)a|(?:b)/ should be the same as /a|b/
+             */
              regnode *noper_next= regnext(noper);
-            if (noper_next != tail && OP(noper_next) == flags) {
-                noper = noper_next;
-                uc= (U8*)STRING(noper);
-                e= uc + STR_LEN(noper);
-               trie->minlen= STR_LEN(noper);
-            } else {
-               trie->minlen= 0;
-               continue;
-           }
+            if (noper_next < tail)
+                noper= noper_next;
          }
  
+        if ( noper < tail &&
+                (
+                    OP(noper) == flags ||
+                    (
+                        flags == EXACTFU &&
+                        OP(noper) == EXACTFU_SS
+                    )
+                )
+        ) {
+            uc= (U8*)STRING(noper);
+            e= uc + STR_LEN(noper);
+        } else {
+            trie->minlen= 0;
+            continue;
+        }
+
+
          if ( set_bit ) { /* bitmap only alloced when !(UTF&&Folding) */
              TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte
                                            regardless of encoding */
@@ -2533,6 +2597,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  TRIE_BITMAP_SET(trie, LATIN_SMALL_LETTER_SHARP_S);
              }
          }
+
          for ( ; uc < e ; uc += len ) {  /* Look at each char in the current
                                             branch */
              TRIE_CHARCOUNT(trie)++;
@@ -2616,18 +2681,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  if ( set_bit ) {
                     /* store the codepoint in the bitmap, and its folded
                      * equivalent. */
-                    TRIE_BITMAP_SET(trie, uvc);
-
-                   /* store the folded codepoint */
-                    if ( folder ) TRIE_BITMAP_SET(trie, folder[(U8) uvc ]);
-
-                   if ( !UTF ) {
-                       /* store first byte of utf8 representation of
-                          variant codepoints */
-                       if (! UVCHR_IS_INVARIANT(uvc)) {
-                           TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));
-                       }
-                   }
+                    TRIE_BITMAP_SET_FOLDED(trie, uvc, folder);
                      set_bit = 0; /* We've done our bit :-) */
                  }
              } else {
@@ -2646,7 +2700,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
  
                  if ( !svpp )
-                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
+                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%" UVXf, uvc );
  
                  if ( !SvTRUE( *svpp ) ) {
                      sv_setiv( *svpp, ++trie->uniquecharcount );
@@ -2667,9 +2721,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          }
      } /* end first pass */
      DEBUG_TRIE_COMPILE_r(
-        PerlIO_printf( Perl_debug_log,
-                "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
-                (int)depth * 2 + 2,"",
+        Perl_re_indentf( aTHX_
+                "TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
+                depth+1,
                  ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
                 (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
                 (int)trie->minlen, (int)trie->maxlen )
@@ -2717,9 +2771,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
          STRLEN transcount = 1;
  
-        DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
-            "%*sCompiling trie using list compiler\n",
-            (int)depth * 2 + 2, ""));
+        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using list compiler\n",
+            depth+1));
  
         trie->states = (reg_trie_state *)
             PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
@@ -2730,22 +2783,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
  
              regnode *noper   = NEXTOPER( cur );
-           U8 *uc           = (U8*)STRING( noper );
-            const U8 *e      = uc + STR_LEN( noper );
             U32 state        = 1;         /* required init */
             U16 charid       = 0;         /* sanity init */
              U32 wordlen      = 0;         /* required init */
  
              if (OP(noper) == NOTHING) {
                  regnode *noper_next= regnext(noper);
-                if (noper_next != tail && OP(noper_next) == flags) {
-                    noper = noper_next;
-                    uc= (U8*)STRING(noper);
-                    e= uc + STR_LEN(noper);
-                }
+                if (noper_next < tail)
+                    noper= noper_next;
              }
  
-            if (OP(noper) != NOTHING) {
+            if ( noper < tail && ( OP(noper) == flags || ( flags == EXACTFU && OP(noper) == EXACTFU_SS ) ) ) {
+                const U8 *uc= (U8*)STRING(noper);
+                const U8 *e= uc + STR_LEN(noper);
+
                  for ( ; uc < e ; uc += len ) {
  
                      TRIE_READ_CHAR;
@@ -2793,7 +2844,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                          }
                          state = newstate;
                      } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                     }
                 }
             }
@@ -2827,7 +2878,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
                  /*
                  DEBUG_TRIE_COMPILE_MORE_r(
-                    PerlIO_printf( Perl_debug_log, "tp: %d zp: %d ",tp,zp)
+                    Perl_re_printf( aTHX_  "tp: %d zp: %d ",tp,zp)
                  );
                  */
  
@@ -2889,7 +2940,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  }
                  /*
                  DEBUG_TRIE_COMPILE_MORE_r(
-                    PerlIO_printf( Perl_debug_log, " base: %d\n",base);
+                    Perl_re_printf( aTHX_  " base: %d\n",base);
                  );
                  */
                  trie->states[ state ].trans.base=base;
@@ -2932,9 +2983,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
             we have to use TRIE_NODENUM() to convert.
  
          */
-        DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
-            "%*sCompiling trie using table compiler\n",
-            (int)depth * 2 + 2, ""));
+        DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_  "Compiling trie using table compiler\n",
+            depth+1));
  
         trie->trans = (reg_trie_trans *)
             PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
@@ -2949,8 +2999,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
  
              regnode *noper   = NEXTOPER( cur );
-           const U8 *uc     = (U8*)STRING( noper );
-            const U8 *e      = uc + STR_LEN( noper );
  
              U32 state        = 1;         /* required init */
  
@@ -2961,14 +3009,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
              if (OP(noper) == NOTHING) {
                  regnode *noper_next= regnext(noper);
-                if (noper_next != tail && OP(noper_next) == flags) {
-                    noper = noper_next;
-                    uc= (U8*)STRING(noper);
-                    e= uc + STR_LEN(noper);
-                }
+                if (noper_next < tail)
+                    noper= noper_next;
              }
  
-            if ( OP(noper) != NOTHING ) {
+            if ( noper < tail && ( OP(noper) == flags || ( flags == EXACTFU && OP(noper) == EXACTFU_SS ) ) ) {
+                const U8 *uc= (U8*)STRING(noper);
+                const U8 *e= uc + STR_LEN(noper);
+
                  for ( ; uc < e ; uc += len ) {
  
                      TRIE_READ_CHAR;
@@ -2993,7 +3041,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                          }
                          state = trie->trans[ state + charid ].next;
                      } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                      }
                      /* charid is now 0 if we dont know the char read, or
                       * nonzero if we do */
@@ -3126,9 +3174,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
             PerlMemShared_realloc( trie->states, laststate
                                    * sizeof(reg_trie_state) );
          DEBUG_TRIE_COMPILE_MORE_r(
-            PerlIO_printf( Perl_debug_log,
-                "%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
-                (int)depth * 2 + 2,"",
+            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n",
+                depth+1,
                  (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount
                         + 1 ),
                  (IV)next_alloc,
@@ -3139,9 +3186,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          } /* end table compress */
      }
      DEBUG_TRIE_COMPILE_MORE_r(
-            PerlIO_printf(Perl_debug_log,
-                "%*sStatecount:%"UVxf" Lasttrans:%"UVxf"\n",
-                (int)depth * 2 + 2, "",
+            Perl_re_indentf( aTHX_  "Statecount:%" UVxf " Lasttrans:%" UVxf "\n",
+                depth+1,
                  (UV)trie->statecount,
                  (UV)trie->lasttrans)
      );
@@ -3191,9 +3237,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
              });
          }
          DEBUG_OPTIMISE_r(
-            PerlIO_printf(Perl_debug_log,
-                "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
-                (int)depth * 2 + 2, "",
+            Perl_re_indentf( aTHX_  "MJD offset:%" UVuf " MJD length:%" UVuf "\n",
+                depth+1,
                  (UV)mjd_offset, (UV)mjd_nodelen)
          );
  #endif
@@ -3201,13 +3246,19 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
             split out as an EXACT and put in front of the TRIE node.  */
          trie->startstate= 1;
          if ( trie->bitmap && !widecharmap && !trie->jump  ) {
+            /* we want to find the first state that has more than
+             * one transition, if that state is not the first state
+             * then we have a common prefix which we can remove.
+             */
              U32 state;
              for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
                  U32 ofs = 0;
-                I32 idx = -1;
+                I32 first_ofs = -1; /* keeps track of the ofs of the first
+                                       transition, -1 means none */
                  U32 count = 0;
                  const U32 base = trie->states[ state ].trans.base;
  
+                /* does this state terminate an alternation? */
                  if ( trie->states[state].wordnum )
                          count = 1;
  
@@ -3217,46 +3268,54 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                           trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
                      {
                          if ( ++count > 1 ) {
-                            SV **tmp = av_fetch( revcharmap, ofs, 0);
-                           const U8 *ch = (U8*)SvPV_nolen_const( *tmp );
+                            /* we have more than one transition */
+                            SV **tmp;
+                            U8 *ch;
+                            /* if this is the first state there is no common prefix
+                             * to extract, so we can exit */
                              if ( state == 1 ) break;
+                            tmp = av_fetch( revcharmap, ofs, 0);
+                            ch = (U8*)SvPV_nolen_const( *tmp );
+
+                            /* if we are on count 2 then we need to initialize the
+                             * bitmap, and store the previous char if there was one
+                             * in it*/
                              if ( count == 2 ) {
+                                /* clear the bitmap */
                                  Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
                                  DEBUG_OPTIMISE_r(
-                                    PerlIO_printf(Perl_debug_log,
-                                       "%*sNew Start State=%"UVuf" Class: [",
-                                        (int)depth * 2 + 2, "",
+                                    Perl_re_indentf( aTHX_  "New Start State=%" UVuf " Class: [",
+                                        depth+1,
                                          (UV)state));
-                               if (idx >= 0) {
-                                   SV ** const tmp = av_fetch( revcharmap, idx, 0);
+                                if (first_ofs >= 0) {
+                                    SV ** const tmp = av_fetch( revcharmap, first_ofs, 0);
                                     const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
  
-                                    TRIE_BITMAP_SET(trie,*ch);
-                                    if ( folder )
-                                        TRIE_BITMAP_SET(trie, folder[ *ch ]);
+                                    TRIE_BITMAP_SET_FOLDED(trie,*ch,folder);
                                      DEBUG_OPTIMISE_r(
-                                        PerlIO_printf(Perl_debug_log, "%s", (char*)ch)
+                                        Perl_re_printf( aTHX_  "%s", (char*)ch)
                                      );
                                 }
                             }
-                           TRIE_BITMAP_SET(trie,*ch);
-                           if ( folder )
-                               TRIE_BITMAP_SET(trie,folder[ *ch ]);
-                           DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"%s", ch));
+                            /* store the current firstchar in the bitmap */
+                            TRIE_BITMAP_SET_FOLDED(trie,*ch,folder);
+                            DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
                         }
-                        idx = ofs;
+                        first_ofs = ofs;
                     }
                  }
                  if ( count == 1 ) {
-                    SV **tmp = av_fetch( revcharmap, idx, 0);
+                    /* This state has only one transition, its transition is part
+                     * of a common prefix - we need to concatenate the char it
+                     * represents to what we have so far. */
+                    SV **tmp = av_fetch( revcharmap, first_ofs, 0);
                      STRLEN len;
                      char *ch = SvPV( *tmp, len );
                      DEBUG_OPTIMISE_r({
                          SV *sv=sv_newmortal();
-                        PerlIO_printf( Perl_debug_log,
-                           "%*sPrefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n",
-                            (int)depth * 2 + 2, "",
-                            (UV)state, (UV)idx,
+                        Perl_re_indentf( aTHX_  "Prefix State: %" UVuf " Ofs:%" UVuf " Char='%s'\n",
+                            depth+1,
+                            (UV)state, (UV)first_ofs,
                              pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
                                 PL_colors[0], PL_colors[1],
                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
@@ -3275,7 +3334,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                 } else {
  #ifdef DEBUGGING
                     if (state>1)
-                       DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"]\n"));
+                        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "]\n"));
  #endif
                     break;
                 }
@@ -3547,14 +3606,13 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
       */
      fail[ 0 ] = fail[ 1 ] = 0;
      DEBUG_TRIE_COMPILE_r({
-        PerlIO_printf(Perl_debug_log,
-                     "%*sStclass Failtable (%"UVuf" states): 0",
-                     (int)(depth * 2), "", (UV)numstates
+        Perl_re_indentf( aTHX_  "Stclass Failtable (%" UVuf " states): 0",
+                      depth, (UV)numstates
          );
          for( q_read=1; q_read<numstates; q_read++ ) {
-            PerlIO_printf(Perl_debug_log, ", %"UVuf, (UV)fail[q_read]);
+            Perl_re_printf( aTHX_  ", %" UVuf, (UV)fail[q_read]);
          }
-        PerlIO_printf(Perl_debug_log, "\n");
+        Perl_re_printf( aTHX_  "\n");
      });
      Safefree(q);
      /*RExC_seen |= REG_TRIEDFA_SEEN;*/
@@ -3562,15 +3620,15 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  }
  
  
-#define DEBUG_PEEP(str,scan,depth) \
-    DEBUG_OPTIMISE_r({if (scan){ \
-       regnode *Next = regnext(scan); \
-       regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state); \
-       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)", \
-           (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),\
-           Next ? (REG_NODE_NUM(Next)) : 0 ); \
+#define DEBUG_PEEP(str,scan,depth)         \
+    DEBUG_OPTIMISE_r({if (scan){           \
+       regnode *Next = regnext(scan);      \
+       regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);\
+       Perl_re_indentf( aTHX_  "" str ">%3d: %s (%d)", \
+           depth, REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),\
+           Next ? (REG_NODE_NUM(Next)) : 0 );\
         DEBUG_SHOW_STUDY_FLAGS(flags," [ ","]");\
-       PerlIO_printf(Perl_debug_log, "\n"); \
+       Perl_re_printf( aTHX_  "\n");                   \
     }});
  
  /* The below joins as many adjacent EXACTish nodes as possible into a single
@@ -3850,7 +3908,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                      }
                      else {
                          STRLEN len;
-                        _to_utf8_fold_flags(s, d, &len, FOLD_FLAGS_FULL);
+                        _toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
                          d += len;
                      }
                      s += s_len;
@@ -4070,6 +4128,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_STUDY_CHUNK;
+    RExC_study_started= 1;
  
  
      if ( depth == 0 ) {
@@ -4084,9 +4143,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      );
      DEBUG_OPTIMISE_MORE_r(
      {
-        PerlIO_printf(Perl_debug_log,
-            "%*sstudy_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p",
-            (int)(depth*2), "", (long)stopparen,
+        Perl_re_indentf( aTHX_  "study_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p",
+            depth, (long)stopparen,
              (unsigned long)RExC_study_chunk_recursed_count,
              (unsigned long)depth, (unsigned long)recursed_depth,
              scan,
@@ -4105,16 +4163,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                     (( j - 1 ) * RExC_study_chunk_recursed_bytes), i)
                          )
                      ) {
-                        PerlIO_printf(Perl_debug_log," %d",(int)i);
+                        Perl_re_printf( aTHX_ " %d",(int)i);
                          break;
                      }
                  }
                  if ( j + 1 < recursed_depth ) {
-                    PerlIO_printf(Perl_debug_log, ",");
+                    Perl_re_printf( aTHX_  ",");
                  }
              }
          }
-        PerlIO_printf(Perl_debug_log,"\n");
+        Perl_re_printf( aTHX_ "\n");
      }
      );
      while ( scan && OP(scan) != END && scan < last ){
@@ -4127,9 +4185,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
          DEBUG_PEEP("Peep", scan, depth);
  
  
-        /* The reason we do this here we need to deal with things like /(?:f)(?:o)(?:o)/
-         * which cant be dealt with by the normal EXACT parsing code, as each (?:..) is handled
-         * by a different invocation of reg() -- Yves
+        /* The reason we do this here is that we need to deal with things like
+         * /(?:f)(?:o)(?:o)/ which cant be dealt with by the normal EXACT
+         * parsing code, as each (?:..) is handled by a different invocation of
+         * reg() -- Yves
           */
          JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
  
@@ -4390,9 +4449,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                          DEBUG_TRIE_COMPILE_r({
                              regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
-                            PerlIO_printf( Perl_debug_log, "%*s%s%s\n",
-                              (int)depth * 2 + 2, "",
-                              "Looking for TRIE'able sequences. Tail node is: ",
+                            Perl_re_indentf( aTHX_  "%s %" UVuf ":%s\n",
+                              depth+1,
+                              "Looking for TRIE'able sequences. Tail node is ",
+                              (UV)(tail - RExC_emit_start),
                                SvPV_nolen_const( RExC_mysv )
                              );
                          });
@@ -4475,25 +4535,26 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              U8 noper_trietype = TRIE_TYPE( noper_type );
  #if defined(DEBUGGING) || defined(NOJUMPTRIE)
                              regnode * const noper_next = regnext( noper );
-                           U8 noper_next_type = (noper_next && noper_next != tail) ? OP(noper_next) : 0;
-                           U8 noper_next_trietype = (noper_next && noper_next != tail) ? TRIE_TYPE( noper_next_type ) :0;
+                            U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
+                            U8 noper_next_trietype = (noper_next && noper_next < tail) ? TRIE_TYPE( noper_next_type ) :0;
  #endif
  
                              DEBUG_TRIE_COMPILE_r({
                                  regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                                PerlIO_printf( Perl_debug_log, "%*s- %s (%d)",
-                                   (int)depth * 2 + 2,"", SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
+                                Perl_re_indentf( aTHX_  "- %d:%s (%d)",
+                                   depth+1,
+                                   REG_NODE_NUM(cur), SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
  
                                  regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state);
-                                PerlIO_printf( Perl_debug_log, " -> %s",
-                                    SvPV_nolen_const(RExC_mysv));
+                                Perl_re_printf( aTHX_  " -> %d:%s",
+                                    REG_NODE_NUM(noper), SvPV_nolen_const(RExC_mysv));
  
                                  if ( noper_next ) {
                                    regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state);
-                                  PerlIO_printf( Perl_debug_log,"\t=> %s\t",
-                                    SvPV_nolen_const(RExC_mysv));
+                                  Perl_re_printf( aTHX_ "\t=> %d:%s\t",
+                                    REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv));
                                  }
-                                PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d,tt==%s,nt==%s,nnt==%s)\n",
+                                Perl_re_printf( aTHX_  "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n",
                                     REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
                                    PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype]
                                 );
@@ -4504,12 +4565,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              if ( noper_trietype
                                    &&
                                    (
-                                        ( noper_trietype == NOTHING)
+                                        ( noper_trietype == NOTHING )
                                          || ( trietype == NOTHING )
                                          || ( trietype == noper_trietype )
                                    )
  #ifdef NOJUMPTRIE
-                                  && noper_next == tail
+                                  && noper_next >= tail
  #endif
                                    && count < U16_MAX)
                              {
@@ -4522,7 +4583,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                     if ( noper_trietype == NOTHING ) {
  #if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
                                         regnode * const noper_next = regnext( noper );
-                                        U8 noper_next_type = (noper_next && noper_next!=tail) ? OP(noper_next) : 0;
+                                        U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
                                         U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
  #endif
  
@@ -4569,7 +4630,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  }
                                  if ( noper_trietype
  #ifdef NOJUMPTRIE
-                                     && noper_next == tail
+                                     && noper_next >= tail
  #endif
                                  ){
                                      /* noper is triable, so we can start a new
@@ -4589,10 +4650,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          } /* loop over branches */
                          DEBUG_TRIE_COMPILE_r({
                              regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                            PerlIO_printf( Perl_debug_log,
-                              "%*s- %s (%d) <SCAN FINISHED>\n",
-                              (int)depth * 2 + 2,
-                              "", SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
+                            Perl_re_indentf( aTHX_  "- %s (%d) <SCAN FINISHED> ",
+                              depth+1, SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
+                            Perl_re_printf( aTHX_  "(First==%d, Last==%d, Cur==%d, tt==%s)\n",
+                               REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
+                               PL_reg_name[trietype]
+                            );
  
                          });
                          if ( last && trietype ) {
@@ -4610,7 +4673,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                       depth==0 ) {
                                      flags |= SCF_TRIE_RESTUDY;
                                      if ( startbranch == first
-                                         && scan == tail )
+                                         && scan >= tail )
                                      {
                                          RExC_seen &=~REG_TOP_LEVEL_BRANCHES_SEEN;
                                      }
@@ -4629,9 +4692,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                       * turn it into a plain NOTHING op. */
                                      DEBUG_TRIE_COMPILE_r({
                                          regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
-                                        PerlIO_printf( Perl_debug_log,
-                                          "%*s- %s (%d) <NOTHING BRANCH SEQUENCE>\n", (int)depth * 2 + 2,
-                                          "", SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
+                                        Perl_re_indentf( aTHX_  "- %s (%d) <NOTHING BRANCH SEQUENCE>\n",
+                                          depth+1,
+                                          SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
  
                                      });
                                      OP(startbranch)= NOTHING;
@@ -4651,29 +4714,24 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             } else                      /* single branch is optimized. */
                 scan = NEXTOPER(scan);
             continue;
-       } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) {
+        } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) {
              I32 paren = 0;
              regnode *start = NULL;
              regnode *end = NULL;
              U32 my_recursed_depth= recursed_depth;
  
-
-            if (OP(scan) != SUSPEND) { /* GOSUB/GOSTART */
+            if (OP(scan) != SUSPEND) { /* GOSUB */
                  /* Do setup, note this code has side effects beyond
                   * the rest of this block. Specifically setting
                   * RExC_recurse[] must happen at least once during
                   * study_chunk(). */
-               if (OP(scan) == GOSUB) {
-                   paren = ARG(scan);
-                   RExC_recurse[ARG2L(scan)] = scan;
-                    start = RExC_open_parens[paren-1];
-                    end   = RExC_close_parens[paren-1];
-                } else {
-                    start = RExC_rxi->program + 1;
-                    end   = RExC_opend;
-                }
+                paren = ARG(scan);
+                RExC_recurse[ARG2L(scan)] = scan;
+                start = RExC_open_parens[paren];
+                end   = RExC_close_parens[paren];
+
                  /* NOTE we MUST always execute the above code, even
-                 * if we do nothing with a GOSUB/GOSTART */
+                 * if we do nothing with a GOSUB */
                  if (
                      ( flags & SCF_IN_DEFINE )
                      ||
@@ -4722,11 +4780,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                               RExC_study_chunk_recursed_bytes, U8);
                      }
                      /* we havent recursed into this paren yet, so recurse into it */
-                   DEBUG_STUDYDATA("set:", data,depth);
+                    DEBUG_STUDYDATA("gosub-set:", data,depth);
                      PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
                      my_recursed_depth= recursed_depth + 1;
                  } else {
-                   DEBUG_STUDYDATA("inf:", data,depth);
+                    DEBUG_STUDYDATA("gosub-inf:", data,depth);
                      /* some form of infinite recursion, assume infinite length
                       * */
                      if (flags & SCF_DO_SUBSTR) {
@@ -5028,7 +5086,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     SAVEFREESV(RExC_rx_sv);
                     Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
                         "Quantifier unexpected on zero-length expression "
-                       "in regex m/%"UTF8f"/",
+                       "in regex m/%" UTF8f "/",
                          UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
                                   RExC_precomp));
                     (void)ReREFCNT_inc(RExC_rx_sv);
@@ -5069,8 +5127,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if (OP(nxt) != CLOSE)
                         goto nogo;
                     if (RExC_open_parens) {
-                       RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
-                       RExC_close_parens[ARG(nxt1)-1]=nxt+2; /*close->while*/
+                        RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/
+                        RExC_close_parens[ARG(nxt1)]=nxt+2; /*close->while*/
                     }
                     /* Now we know that nxt2 is the only contents: */
                     oscan->flags = (U8)ARG(nxt);
@@ -5116,8 +5174,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                         oscan->flags = (U8)ARG(nxt);
                         if (RExC_open_parens) {
-                           RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
-                           RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /*close->NOTHING*/
+                            RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/
+                            RExC_close_parens[ARG(nxt1)]=nxt2+1; /*close->NOTHING*/
                         }
                         OP(nxt1) = OPTIMIZED;   /* was OPEN. */
                         OP(nxt) = OPTIMIZED;    /* was CLOSE. */
@@ -5224,13 +5282,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* It is counted once already... */
                     data->pos_min += minnext * (mincount - counted);
  #if 0
-PerlIO_printf(Perl_debug_log, "counted=%"UVuf" deltanext=%"UVuf
-                              " SSize_t_MAX=%"UVuf" minnext=%"UVuf
-                              " maxcount=%"UVuf" mincount=%"UVuf"\n",
+Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
+                              " SSize_t_MAX=%" UVuf " minnext=%" UVuf
+                              " maxcount=%" UVuf " mincount=%" UVuf "\n",
      (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
      (UV)mincount);
  if (deltanext != SSize_t_MAX)
-PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
+Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
      (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
            - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
  #endif
@@ -5535,7 +5593,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                         FAIL("Variable length lookbehind not implemented");
                      }
                      else if (minnext > (I32)U8_MAX) {
-                       FAIL2("Lookbehind longer than %"UVuf" not implemented",
+                       FAIL2("Lookbehind longer than %" UVuf " not implemented",
                                (UV)U8_MAX);
                      }
                      scan->flags = (U8)minnext;
@@ -5624,7 +5682,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                         FAIL("Variable length lookbehind not implemented");
                      }
                      else if (*minnextp > (I32)U8_MAX) {
-                       FAIL2("Lookbehind longer than %"UVuf" not implemented",
+                       FAIL2("Lookbehind longer than %" UVuf " not implemented",
                                (UV)U8_MAX);
                      }
                      scan->flags = (U8)*minnextp;
@@ -5868,15 +5926,10 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
         /* Else: zero-length, ignore. */
         scan = regnext(scan);
      }
-    /* If we are exiting a recursion we can unset its recursed bit
-     * and allow ourselves to enter it again - no danger of an
-     * infinite loop there.
-    if (stopparen > -1 && recursed) {
-       DEBUG_STUDYDATA("unset:", data,depth);
-        PAREN_UNSET( recursed, stopparen);
-    }
-    */
+
+  finish:
      if (frame) {
+        /* we need to unwind recursion. */
          depth = depth - 1;
  
          DEBUG_STUDYDATA("frame-end:",data,depth);
@@ -5893,7 +5946,6 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
          goto fake_study_recurse;
      }
  
-  finish:
      assert(!frame);
      DEBUG_STUDYDATA("pre-fin:",data,depth);
  
@@ -6044,7 +6096,7 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  
      /* Dispatch a request to compile a regexp to correct regexp engine. */
      DEBUG_COMPILE_r({
-       PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
+        Perl_re_printf( aTHX_  "Using engine %" UVxf "\n",
                         PTR2UV(eng));
      });
      return CALLREGCOMP_ENG(eng, pattern, flags);
@@ -6089,7 +6141,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
      bool do_end = 0;
      GET_RE_DEBUG_FLAGS_DECL;
  
-    DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+    DEBUG_PARSE_r(Perl_re_printf( aTHX_
          "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
  
      Newx(dst, *plen_p * 2 + 1, U8);
@@ -6291,8 +6343,20 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                  sv_catsv_nomg(pat, msv);
                  rx = msv;
              }
-            else
-                pat = msv;
+            else {
+                /* We have only one SV to process, but we need to verify
+                 * it is properly null terminated or we will fail asserts
+                 * later. In theory we probably shouldn't get such SV's,
+                 * but if we do we should handle it gracefully. */
+                if ( SvTYPE(msv) != SVt_PV || (SvLEN(msv) > SvCUR(msv) && *(SvEND(msv)) == 0) ) {
+                    /* not a string, or a string with a trailing null */
+                    pat = msv;
+                } else {
+                    /* a string with no trailing null, we need to copy it
+                     * so it we have a trailing null */
+                    pat = newSVsv(msv);
+                }
+            }
  
              if (code)
                  pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
@@ -6457,7 +6521,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
             *p++ = 'x';
         *p++ = '\0';
         DEBUG_COMPILE_r({
-           PerlIO_printf(Perl_debug_log,
+            Perl_re_printf( aTHX_
                 "%sre-parsing pattern for runtime code:%s %s\n",
                 PL_colors[4],PL_colors[5],newpat);
         });
@@ -6483,7 +6547,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
             {
                 Safefree(pRExC_state->code_blocks);
                  /* use croak_sv ? */
-               Perl_croak_nocontext("%"SVf, SVfARG(errsv));
+               Perl_croak_nocontext("%" SVf, SVfARG(errsv));
             }
         }
         assert(SvROK(qr_ref));
@@ -6611,7 +6675,11 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest,
          calculate it.*/
      ml = minlen ? *(minlen) : (SSize_t)longest_length;
      *rx_end_shift = ml - offset
-        - longest_length + (SvTAIL(sv_longest) != 0)
+        - longest_length
+            /* XXX SvTAIL is always false here - did you mean FBMcf_TAIL
+             * intead? - DAPM
+            + (SvTAIL(sv_longest) != 0)
+            */
          + lookbehind;
  
      t = (eol/* Can't have SEOL and MULTI */
@@ -6736,6 +6804,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  #endif
      }
  
+    pRExC_state->warn_text = NULL;
      pRExC_state->code_blocks = NULL;
      pRExC_state->num_code_blocks = 0;
  
@@ -6792,7 +6861,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      }
  
-    DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+    DEBUG_PARSE_r(Perl_re_printf( aTHX_
          "Assembling pattern from %d elements%s\n", pat_count,
              orig_rx_flags & RXf_SPLIT ? " for split" : ""));
  
@@ -6821,7 +6890,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                  *is_bare_re = TRUE;
              SvREFCNT_inc(re);
              Safefree(pRExC_state->code_blocks);
-            DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+            DEBUG_PARSE_r(Perl_re_printf( aTHX_
                  "Precompiled pattern%s\n",
                      orig_rx_flags & RXf_SPLIT ? " for split" : ""));
  
@@ -6850,8 +6919,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_uni_semantics = 0;
      RExC_seen_unfolded_sharp_s = 0;
      RExC_contains_locale = 0;
-    RExC_contains_i = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
+    RExC_study_started = 0;
      pRExC_state->runtime_code_qr = NULL;
      RExC_frame_head= NULL;
      RExC_frame_last= NULL;
@@ -6864,7 +6933,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      DEBUG_COMPILE_r({
              SV *dsv= sv_newmortal();
              RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, 60);
-            PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
+            Perl_re_printf( aTHX_  "%sCompiling REx%s %s\n",
                            PL_colors[4],PL_colors[5],s);
          });
  
@@ -6901,9 +6970,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      rx_flags = orig_rx_flags;
  
-    if (rx_flags & PMf_FOLD) {
-        RExC_contains_i = 1;
-    }
      if (   initial_charset == REGEX_DEPENDS_CHARSET
          && (RExC_utf8 ||RExC_uni_semantics))
      {
@@ -6959,7 +7025,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_whilem_seen = 0;
      RExC_open_parens = NULL;
      RExC_close_parens = NULL;
-    RExC_opend = NULL;
+    RExC_end_op = NULL;
      RExC_paren_names = NULL;
  #ifdef DEBUGGING
      RExC_paren_name_list = NULL;
@@ -6980,7 +7046,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      assert(*RExC_end == '\0');
  
      DEBUG_PARSE_r(
-       PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n");
+        Perl_re_printf( aTHX_  "Starting first pass (sizing)\n");
          RExC_lastnum=0;
          RExC_lastparse=NULL;
      );
@@ -7010,20 +7076,20 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                      pRExC_state->num_code_blocks);
              }
              else {
-                DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+                DEBUG_PARSE_r(Perl_re_printf( aTHX_
                  "Need to redo pass 1\n"));
              }
  
              goto redo_first_pass;
          }
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags);
      }
      if (code_blocksv)
         SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */
  
      DEBUG_PARSE_r({
-        PerlIO_printf(Perl_debug_log,
-            "Required size %"IVdf" nodes\n"
+        Perl_re_printf( aTHX_
+            "Required size %" IVdf " nodes\n"
              "Starting second pass (creation)\n",
              (IV)RExC_size);
          RExC_lastnum=0;
@@ -7157,29 +7223,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      r->intflags = 0;
      r->nparens = RExC_npar - 1;        /* set early to validate backrefs */
  
-    /* setup various meta data about recursion, this all requires
-     * RExC_npar to be correctly set, and a bit later on we clear it */
-    if (RExC_seen & REG_RECURSE_SEEN) {
-        Newxz(RExC_open_parens, RExC_npar,regnode *);
-        SAVEFREEPV(RExC_open_parens);
-        Newxz(RExC_close_parens,RExC_npar,regnode *);
-        SAVEFREEPV(RExC_close_parens);
-    }
-    if (RExC_seen & (REG_RECURSE_SEEN | REG_GOSTART_SEEN)) {
-        /* Note, RExC_npar is 1 + the number of parens in a pattern.
-         * So its 1 if there are no parens. */
-        RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
-                                         ((RExC_npar & 0x07) != 0);
-        Newx(RExC_study_chunk_recursed,
-             RExC_study_chunk_recursed_bytes * RExC_npar, U8);
-        SAVEFREEPV(RExC_study_chunk_recursed);
-    }
-
      /* Useful during FAIL. */
  #ifdef RE_TRACK_PATTERN_OFFSETS
      Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
-    DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log,
-                          "%s %"UVuf" bytes for offset annotations.\n",
+    DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
+                          "%s %" UVuf " bytes for offset annotations.\n",
                            ri->u.offsets ? "Got" : "Couldn't get",
                            (UV)((2*RExC_size+1) * sizeof(U32))));
  #endif
@@ -7194,17 +7242,51 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_parse = exp;
      RExC_end = exp + plen;
      RExC_naughty = 0;
-    RExC_npar = 1;
      RExC_emit_start = ri->program;
      RExC_emit = ri->program;
      RExC_emit_bound = ri->program + RExC_size + 1;
      pRExC_state->code_index = 0;
  
      *((char*) RExC_emit++) = (char) REG_MAGIC;
+    /* setup various meta data about recursion, this all requires
+     * RExC_npar to be correctly set, and a bit later on we clear it */
+    if (RExC_seen & REG_RECURSE_SEEN) {
+        DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+            "%*s%*s Setting up open/close parens\n",
+                  22, "|    |", (int)(0 * 2 + 1), ""));
+
+        /* setup RExC_open_parens, which holds the address of each
+         * OPEN tag, and to make things simpler for the 0 index
+         * the start of the program - this is used later for offsets */
+        Newxz(RExC_open_parens, RExC_npar,regnode *);
+        SAVEFREEPV(RExC_open_parens);
+        RExC_open_parens[0] = RExC_emit;
+
+        /* setup RExC_close_parens, which holds the address of each
+         * CLOSE tag, and to make things simpler for the 0 index
+         * the end of the program - this is used later for offsets */
+        Newxz(RExC_close_parens, RExC_npar,regnode *);
+        SAVEFREEPV(RExC_close_parens);
+        /* we dont know where end op starts yet, so we dont
+         * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */
+
+        /* Note, RExC_npar is 1 + the number of parens in a pattern.
+         * So its 1 if there are no parens. */
+        RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
+                                         ((RExC_npar & 0x07) != 0);
+        Newx(RExC_study_chunk_recursed,
+             RExC_study_chunk_recursed_bytes * RExC_npar, U8);
+        SAVEFREEPV(RExC_study_chunk_recursed);
+    }
+    RExC_npar = 1;
      if (reg(pRExC_state, 0, &flags,1) == NULL) {
         ReREFCNT_dec(rx);
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags);
      }
+    DEBUG_OPTIMISE_r(
+        Perl_re_printf( aTHX_  "Starting post parse optimization\n");
+    );
+
      /* XXXX To minimize changes to RE engine we always allocate
         3-units-long substrs field. */
      Newx(r->substrs, 1, struct reg_substr_data);
@@ -7231,7 +7313,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          copyRExC_state = RExC_state;
      } else {
          U32 seen=RExC_seen;
-        DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log,"Restudying\n"));
+        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "Restudying\n"));
  
          RExC_state = copyRExC_state;
          if (seen & REG_TOP_LEVEL_BRANCHES_SEEN)
@@ -7365,12 +7447,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  #ifdef TRIE_STUDY_OPT
         DEBUG_PARSE_r(
             if (!restudied)
-               PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
+                Perl_re_printf( aTHX_  "first at %" IVdf "\n",
                               (IV)(first - scan + 1))
          );
  #else
         DEBUG_PARSE_r(
-           PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
+            Perl_re_printf( aTHX_  "first at %" IVdf "\n",
                 (IV)(first - scan + 1))
          );
  #endif
@@ -7499,7 +7581,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
                        regprop(r, sv, (regnode*)data.start_class, NULL, pRExC_state);
-                     PerlIO_printf(Perl_debug_log,
+                      Perl_re_printf( aTHX_
                                     "synthetic stclass \"%s\".\n",
                                     SvPVX_const(sv));});
              data.start_class = NULL;
@@ -7544,7 +7626,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         regnode_ssc ch_class;
         SSize_t last_close = 0;
  
-       DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n"));
+        DEBUG_PARSE_r(Perl_re_printf( aTHX_  "\nMulti Top Level\n"));
  
         scan = ri->program + 1;
         ssc_init(pRExC_state, &ch_class);
@@ -7579,7 +7661,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
                        regprop(r, sv, (regnode*)data.start_class, NULL, pRExC_state);
-                     PerlIO_printf(Perl_debug_log,
+                      Perl_re_printf( aTHX_
                                     "synthetic stclass \"%s\".\n",
                                     SvPVX_const(sv));});
              data.start_class = NULL;
@@ -7597,13 +7679,17 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      /* Guard against an embedded (?=) or (?<=) with a longer minlen than
         the "real" pattern. */
      DEBUG_OPTIMISE_r({
-        PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%"IVdf"\n",
+        Perl_re_printf( aTHX_ "minlen: %" IVdf " r->minlen:%" IVdf " maxlen:%" IVdf "\n",
                        (IV)minlen, (IV)r->minlen, (IV)RExC_maxlen);
      });
      r->minlenret = minlen;
      if (r->minlen < minlen)
          r->minlen = minlen;
  
+    if (RExC_seen & REG_RECURSE_SEEN ) {
+        r->intflags |= PREGf_RECURSE_SEEN;
+        Newxz(r->recurse_locinput, r->nparens + 1, char *);
+    }
      if (RExC_seen & REG_GPOS_SEEN)
          r->intflags |= PREGf_GPOS_SEEN;
      if (RExC_seen & REG_LOOKBEHIND_SEEN)
@@ -7677,23 +7763,22 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                     = (void*)SvREFCNT_inc(RExC_paren_name_list);
      } else
  #endif
-        ri->name_list_idx = 0;
+    ri->name_list_idx = 0;
  
-    if (RExC_recurse_count) {
-        for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
-            const regnode *scan = RExC_recurse[RExC_recurse_count-1];
-            ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
-        }
+    while ( RExC_recurse_count > 0 ) {
+        const regnode *scan = RExC_recurse[ --RExC_recurse_count ];
+        ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan );
      }
+
      Newxz(r->offs, RExC_npar, regexp_paren_pair);
      /* assume we don't need to swap parens around before we match */
      DEBUG_TEST_r({
-        PerlIO_printf(Perl_debug_log,"study_chunk_recursed_count: %lu\n",
+        Perl_re_printf( aTHX_ "study_chunk_recursed_count: %lu\n",
              (unsigned long)RExC_study_chunk_recursed_count);
      });
      DEBUG_DUMP_r({
          DEBUG_RExC_seen();
-        PerlIO_printf(Perl_debug_log,"Final program:\n");
+        Perl_re_printf( aTHX_ "Final program:\n");
          regdump(r);
      });
  #ifdef RE_TRACK_PATTERN_OFFSETS
@@ -7701,14 +7786,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          const STRLEN len = ri->u.offsets[0];
          STRLEN i;
          GET_RE_DEBUG_FLAGS_DECL;
-        PerlIO_printf(Perl_debug_log,
-                      "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]);
+        Perl_re_printf( aTHX_
+                      "Offsets: [%" UVuf "]\n\t", (UV)ri->u.offsets[0]);
          for (i = 1; i <= len; i++) {
              if (ri->u.offsets[i*2-1] || ri->u.offsets[i*2])
-                PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ",
+                Perl_re_printf( aTHX_  "%" UVuf ":%" UVuf "[%" UVuf "] ",
                  (UV)i, (UV)ri->u.offsets[i*2-1], (UV)ri->u.offsets[i*2]);
              }
-        PerlIO_printf(Perl_debug_log, "\n");
+        Perl_re_printf( aTHX_  "\n");
      });
  #endif
  
@@ -8047,7 +8132,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
          }
      } else {
        ret_undef:
-        sv_setsv(sv,&PL_sv_undef);
+        sv_set_undef(sv);
          return;
      }
  }
@@ -8179,17 +8264,18 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  
      assert (RExC_parse <= RExC_end);
      if (RExC_parse == RExC_end) NOOP;
-    else if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
+    else if (isIDFIRST_lazy_if_safe(RExC_parse, RExC_end, UTF)) {
           /* Note that the code here assumes well-formed UTF-8.  Skip IDFIRST by
            * using do...while */
         if (UTF)
             do {
                 RExC_parse += UTF8SKIP(RExC_parse);
-           } while (isWORDCHAR_utf8((U8*)RExC_parse));
+           } while (   RExC_parse < RExC_end
+                     && isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end));
         else
             do {
                 RExC_parse++;
-           } while (isWORDCHAR(*RExC_parse));
+           } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse));
      } else {
          RExC_parse++; /* so the <- from the vFAIL is after the offending
                           character */
@@ -8226,7 +8312,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
      int num;                                                    \
      if (RExC_lastparse!=RExC_parse) {                           \
-        PerlIO_printf(Perl_debug_log, "%s",                     \
+        Perl_re_printf( aTHX_  "%s",                                        \
              Perl_pv_pretty(aTHX_ RExC_mysv1, RExC_parse,        \
                  RExC_end - RExC_parse, 16,                      \
                  "", "",                                         \
@@ -8238,17 +8324,17 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
              )                                                   \
          );                                                      \
      } else                                                      \
-        PerlIO_printf(Perl_debug_log,"%16s","");                \
+        Perl_re_printf( aTHX_ "%16s","");                                   \
                                                                  \
      if (SIZE_ONLY)                                              \
         num = RExC_size + 1;                                     \
      else                                                        \
         num=REG_NODE_NUM(RExC_emit);                             \
      if (RExC_lastnum!=num)                                      \
-       PerlIO_printf(Perl_debug_log,"|%4d",num);                \
+       Perl_re_printf( aTHX_ "|%4d",num);                                   \
      else                                                        \
-       PerlIO_printf(Perl_debug_log,"|%4s","");                 \
-    PerlIO_printf(Perl_debug_log,"|%*s%-4s",                    \
+       Perl_re_printf( aTHX_ "|%4s","");                                    \
+    Perl_re_printf( aTHX_ "|%*s%-4s",                                       \
          (int)((depth*2)), "",                                   \
          (funcname)                                              \
      );                                                          \
@@ -8260,11 +8346,11 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  
  #define DEBUG_PARSE(funcname)     DEBUG_PARSE_r({           \
      DEBUG_PARSE_MSG((funcname));                            \
-    PerlIO_printf(Perl_debug_log,"%4s","\n");               \
+    Perl_re_printf( aTHX_ "%4s","\n");                                  \
  })
-#define DEBUG_PARSE_FMT(funcname,fmt,args)     DEBUG_PARSE_r({           \
+#define DEBUG_PARSE_FMT(funcname,fmt,args)     DEBUG_PARSE_r({\
      DEBUG_PARSE_MSG((funcname));                            \
-    PerlIO_printf(Perl_debug_log,fmt "\n",args);               \
+    Perl_re_printf( aTHX_ fmt "\n",args);                               \
  })
  
  /* This section of code defines the inversion list object and its methods.  The
@@ -8273,33 +8359,47 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
   * as an SVt_INVLIST scalar.
   *
   * An inversion list for Unicode is an array of code points, sorted by ordinal
- * number.  The zeroth element is the first code point in the list.  The 1th
- * element is the first element beyond that not in the list.  In other words,
- * the first range is
- *  invlist[0]..(invlist[1]-1)
- * The other ranges follow.  Thus every element whose index is divisible by two
- * marks the beginning of a range that is in the list, and every element not
- * divisible by two marks the beginning of a range not in the list.  A single
- * element inversion list that contains the single code point N generally
- * consists of two elements
- *  invlist[0] == N
- *  invlist[1] == N+1
- * (The exception is when N is the highest representable value on the
- * machine, in which case the list containing just it would be a single
- * element, itself.  By extension, if the last range in the list extends to
- * infinity, then the first element of that range will be in the inversion list
- * at a position that is divisible by two, and is the final element in the
- * list.)
+ * number.  Each element gives the code point that begins a range that extends
+ * up-to but not including the code point given by the next element.  The final
+ * element gives the first code point of a range that extends to the platform's
+ * infinity.  The even-numbered elements (invlist[0], invlist[2], invlist[4],
+ * ...) give ranges whose code points are all in the inversion list.  We say
+ * that those ranges are in the set.  The odd-numbered elements give ranges
+ * whose code points are not in the inversion list, and hence not in the set.
+ * Thus, element [0] is the first code point in the list.  Element [1]
+ * is the first code point beyond that not in the list; and element [2] is the
+ * first code point beyond that that is in the list.  In other words, the first
+ * range is invlist[0]..(invlist[1]-1), and all code points in that range are
+ * in the inversion list.  The second range is invlist[1]..(invlist[2]-1), and
+ * all code points in that range are not in the inversion list.  The third
+ * range invlist[2]..(invlist[3]-1) gives code points that are in the inversion
+ * list, and so forth.  Thus every element whose index is divisible by two
+ * gives the beginning of a range that is in the list, and every element whose
+ * index is not divisible by two gives the beginning of a range not in the
+ * list.  If the final element's index is divisible by two, the inversion list
+ * extends to the platform's infinity; otherwise the highest code point in the
+ * inversion list is the contents of that element minus 1.
+ *
+ * A range that contains just a single code point N will look like
+ *  invlist[i]   == N
+ *  invlist[i+1] == N+1
+ *
+ * If N is UV_MAX (the highest representable code point on the machine), N+1 is
+ * impossible to represent, so element [i+1] is omitted.  The single element
+ * inversion list
+ *  invlist[0] == UV_MAX
+ * contains just UV_MAX, but is interpreted as matching to infinity.
+ *
   * Taking the complement (inverting) an inversion list is quite simple, if the
   * first element is 0, remove it; otherwise add a 0 element at the beginning.
   * This implementation reserves an element at the beginning of each inversion
   * list to always contain 0; there is an additional flag in the header which
   * indicates if the list begins at the 0, or is offset to begin at the next
- * element.
+ * element.  This means that the inversion list can be inverted without any
+ * copying; just flip the flag.
   *
   * More about inversion lists can be found in "Unicode Demystified"
   * Chapter 13 by Richard Gillam, published by Addison-Wesley.
- * More will be coming when functionality is added later.
   *
   * The inversion list data structure is currently implemented as an SV pointing
   * to an array of UVs that the SV thinks are bytes.  This allows us to have an
@@ -8311,6 +8411,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  
  /* The header definitions are in F<invlist_inline.h> */
  
+#ifndef PERL_IN_XSUB_RE
+
  PERL_STATIC_INLINE UV*
  S__invlist_array_init(SV* const invlist, const bool will_have_0)
  {
@@ -8337,6 +8439,8 @@ S__invlist_array_init(SV* const invlist, const bool will_have_0)
      return zero_addr + *offset;
  }
  
+#endif
+
  PERL_STATIC_INLINE void
  S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  {
@@ -8359,9 +8463,9 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  STATIC void
  S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
  {
-    /* Replaces the inversion list in 'src' with the one in 'dest'.  It steals
-     * the list from 'src', so 'src' is made to have a NULL list.  This is
-     * similar to what SvSetMagicSV() would do, if it were implemented on
+    /* Replaces the inversion list in 'dest' with the one from 'src'.  It
+     * steals the list from 'src', so 'src' is made to have a NULL list.  This
+     * is similar to what SvSetMagicSV() would do, if it were implemented on
       * inversion lists, though this routine avoids a copy */
  
      const UV src_len          = _invlist_len(src);
@@ -8473,6 +8577,8 @@ S_invlist_is_iterating(SV* const invlist)
      return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
  }
  
+#ifndef PERL_IN_XSUB_RE
+
  PERL_STATIC_INLINE UV
  S_invlist_max(SV* const invlist)
  {
@@ -8489,8 +8595,6 @@ S_invlist_max(SV* const invlist)
             ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
             : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
  }
-
-#ifndef PERL_IN_XSUB_RE
  SV*
  Perl__new_invlist(pTHX_ IV initial_size)
  {
@@ -8576,7 +8680,6 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list)
  
      return invlist;
  }
-#endif /* ifndef PERL_IN_XSUB_RE */
  
  STATIC void
  S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
@@ -8620,18 +8723,18 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
  
         UV final_element = len - 1;
         array = invlist_array(invlist);
-       if (array[final_element] > start
+       if (   array[final_element] > start
             || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
         {
-           Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%"UVuf", start=%"UVuf", match=%c",
+           Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
                      array[final_element], start,
                      ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
         }
  
-       /* Here, it is a legal append.  If the new range begins with the first
-        * value not in the set, it is extending the set, so the new first
-        * value not in the set is one greater than the newly extended range.
-        * */
+        /* Here, it is a legal append.  If the new range begins 1 above the end
+         * of the range below it, it is extending the range below it, so the
+         * new first value not in the set is one greater than the newly
+         * extended range.  */
          offset = *get_invlist_offset_addr(invlist);
         if (array[final_element] == start) {
             if (end != UV_MAX) {
@@ -8639,7 +8742,8 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
             }
             else {
                 /* But if the end is the maximum representable on the machine,
-                * just let the range that this would extend to have no end */
+                 * assume that infinity was actually what was meant.  Just let
+                 * the range that this would extend to have no end */
                 invlist_set_len(invlist, len - 1, offset);
             }
             return;
@@ -8677,9 +8781,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
      }
  }
  
-#ifndef PERL_IN_XSUB_RE
-
-IV
+SSize_t
  Perl__invlist_search(SV* const invlist, const UV cp)
  {
      /* Searches the inversion list for the entry that contains the input code
@@ -8870,26 +8972,20 @@ void
  Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
                                           const bool complement_b, SV** output)
  {
-    /* Take the union of two inversion lists and point <output> to it.  *output
-     * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
-     * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise just its contents will be modified to be
-     * the union.  The first list, <a>, may be NULL, in which case a copy of
-     * the second list is returned.  If <complement_b> is TRUE, the union is
-     * taken of the complement (inversion) of <b> instead of b itself.
+    /* Take the union of two inversion lists and point '*output' to it.  On
+     * input, '*output' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
+     * even 'a' or 'b').  If to an inversion list, the contents of the original
+     * list will be replaced by the union.  The first list, 'a', may be
+     * NULL, in which case a copy of the second list is placed in '*output'.
+     * If 'complement_b' is TRUE, the union is taken of the complement
+     * (inversion) of 'b' instead of b itself.
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
       * length there.  The preface says to incorporate its examples into your
       * code at your own risk.
       *
-     * The algorithm is like a merge sort.
-     *
-     * XXX A potential performance improvement is to keep track as we go along
-     * if only one of the inputs contributes to the result, meaning the other
-     * is a subset of that one.  In that case, we can skip the final copy and
-     * return the larger of the input lists, but then outside code might need
-     * to keep track of whether to free the input list or not */
+     * The algorithm is like a merge sort. */
  
      const UV* array_a;    /* a's array */
      const UV* array_b;
@@ -8898,7 +8994,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
      SV* u;                     /* the resulting union */
      UV* array_u;
-    UV len_u;
+    UV len_u = 0;
  
      UV i_a = 0;                    /* current index into a's array */
      UV i_b = 0;
@@ -8906,110 +9002,94 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
      /* running count, as explained in the algorithm source book; items are
       * stopped accumulating and are output when the count changes to/from 0.
-     * The count is incremented when we start a range that's in the set, and
-     * decremented when we start a range that's not in the set.  So its range
-     * is 0 to 2.  Only when the count is zero is something not in the set.
-     */
+     * The count is incremented when we start a range that's in an input's set,
+     * and decremented when we start a range that's not in a set.  So this
+     * variable can be 0, 1, or 2.  When it is 0 neither input is in their set,
+     * and hence nothing goes into the union; 1, just one of the inputs is in
+     * its set (and its current range gets added to the union); and 2 when both
+     * inputs are in their sets.  */
      UV count = 0;
  
      PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
      assert(a != b);
+    assert(*output == NULL || SvTYPE(*output) == SVt_INVLIST);
  
      len_b = _invlist_len(b);
      if (len_b == 0) {
  
-        /* Here, 'b' is empty.  If the output is the complement of 'b', the
-         * union is all possible code points, and we need not even look at 'a'.
-         * It's easiest to create a new inversion list that matches everything.
-         * */
+        /* Here, 'b' is empty, hence it's complement is all possible code
+         * points.  So if the union includes the complement of 'b', it includes
+         * everything, and we need not even look at 'a'.  It's easiest to
+         * create a new inversion list that matches everything.  */
          if (complement_b) {
-            SV* everything = _new_invlist(1);
-            _append_range_to_invlist(everything, 0, UV_MAX);
+            SV* everything = _add_range_to_invlist(NULL, 0, UV_MAX);
  
-            /* If the output didn't exist, just point it at the new list */
-            if (*output == NULL) {
+            if (*output == NULL) { /* If the output didn't exist, just point it
+                                      at the new list */
                  *output = everything;
-                return;
+            }
+            else { /* Otherwise, replace its contents with the new list */
+                invlist_replace_list_destroys_src(*output, everything);
+                SvREFCNT_dec_NN(everything);
              }
  
-            /* Otherwise, replace its contents with the new list */
-            invlist_replace_list_destroys_src(*output, everything);
-            SvREFCNT_dec_NN(everything);
              return;
          }
  
-        /* Here, we don't want the complement of 'b', and since it is empty,
+        /* Here, we don't want the complement of 'b', and since 'b' is empty,
           * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
           * output will be empty */
  
-        if (a == NULL) {
-            *output = _new_invlist(0);
+        if (a == NULL || _invlist_len(a) == 0) {
+            if (*output == NULL) {
+                *output = _new_invlist(0);
+            }
+            else {
+                invlist_clear(*output);
+            }
              return;
          }
  
-        if (_invlist_len(a) == 0) {
-            invlist_clear(*output);
+        /* Here, 'a' is not empty, but 'b' is, so 'a' entirely determines the
+         * union.  We can just return a copy of 'a' if '*output' doesn't point
+         * to an existing list */
+        if (*output == NULL) {
+            *output = invlist_clone(a);
              return;
          }
  
-        /* Here, 'a' is not empty, and entirely determines the union.  If the
-         * output is not to overwrite 'b', we can just return 'a'. */
-        if (*output != b) {
-
-            /* If the output is to overwrite 'a', we have a no-op, as it's
-             * already in 'a' */
-            if (*output == a) {
-                return;
-            }
-
-            /* But otherwise we have to copy 'a' to the output */
-            *output = invlist_clone(a);
+        /* If the output is to overwrite 'a', we have a no-op, as it's
+         * already in 'a' */
+        if (*output == a) {
              return;
          }
  
-        /* Here, 'b' is to be overwritten by the output, which will be 'a' */
+        /* Here, '*output' is to be overwritten by 'a' */
          u = invlist_clone(a);
          invlist_replace_list_destroys_src(*output, u);
          SvREFCNT_dec_NN(u);
  
-       return;
+        return;
      }
  
-    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
-
-        /* Here, 'a' is empty.  That means the union will come entirely from
-         * 'b'.  If the output is not to overwrite 'a', we can just return
-         * what's in 'b'.  */
-        if (*output != a) {
+    /* Here 'b' is not empty.  See about 'a' */
  
-            /* If the output is to overwrite 'b', it's already in 'b', but
-             * otherwise we have to copy 'b' to the output */
-            if (*output != b) {
-                *output = invlist_clone(b);
-            }
+    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
  
-            /* And if the output is to be the inversion of 'b', do that */
-            if (complement_b) {
-                _invlist_invert(*output);
-            }
+        /* Here, 'a' is empty (and b is not).  That means the union will come
+         * entirely from 'b'.  If '*output' is NULL, we can directly return a
+         * clone of 'b'.  Otherwise, we replace the contents of '*output' with
+         * the clone */
  
-            return;
+        SV ** dest = (*output == NULL) ? output : &u;
+        *dest = invlist_clone(b);
+        if (complement_b) {
+            _invlist_invert(*dest);
          }
  
-        /* Here, 'a', which is empty or even NULL, is to be overwritten by the
-         * output, which will either be 'b' or the complement of 'b' */
-
-        if (a == NULL) {
-            *output = invlist_clone(b);
-        }
-        else {
-            u = invlist_clone(b);
+        if (dest == &u) {
              invlist_replace_list_destroys_src(*output, u);
              SvREFCNT_dec_NN(u);
-       }
-
-        if (complement_b) {
-            _invlist_invert(*output);
          }
  
         return;
@@ -9043,32 +9123,32 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      u = _new_invlist(len_a + len_b);
  
      /* Will contain U+0000 if either component does */
-    array_u = _invlist_array_init(u, (len_a > 0 && array_a[0] == 0)
-                                     || (len_b > 0 && array_b[0] == 0));
+    array_u = _invlist_array_init(u, (    len_a > 0 && array_a[0] == 0)
+                                      || (len_b > 0 && array_b[0] == 0));
  
-    /* Go through each list item by item, stopping when exhausted one of
-     * them */
+    /* Go through each input list item by item, stopping when have exhausted
+     * one of them */
      while (i_a < len_a && i_b < len_b) {
         UV cp;      /* The element to potentially add to the union's array */
         bool cp_in_set;   /* is it in the the input list's set or not */
  
         /* We need to take one or the other of the two inputs for the union.
          * Since we are merging two sorted lists, we take the smaller of the
-        * next items.  In case of a tie, we take the one that is in its set
-        * first.  If we took one not in the set first, it would decrement the
-        * count, possibly to 0 which would cause it to be output as ending the
-        * range, and the next time through we would take the same number, and
-        * output it again as beginning the next range.  By doing it the
-        * opposite way, there is no possibility that the count will be
-        * momentarily decremented to 0, and thus the two adjoining ranges will
-        * be seamlessly merged.  (In a tie and both are in the set or both not
-        * in the set, it doesn't matter which we take first.) */
-       if (array_a[i_a] < array_b[i_b]
-           || (array_a[i_a] == array_b[i_b]
+         * next items.  In case of a tie, we take first the one that is in its
+         * set.  If we first took the one not in its set, it would decrement
+         * the count, possibly to 0 which would cause it to be output as ending
+         * the range, and the next time through we would take the same number,
+         * and output it again as beginning the next range.  By doing it the
+         * opposite way, there is no possibility that the count will be
+         * momentarily decremented to 0, and thus the two adjoining ranges will
+         * be seamlessly merged.  (In a tie and both are in the set or both not
+         * in the set, it doesn't matter which we take first.) */
+       if (       array_a[i_a] < array_b[i_b]
+           || (   array_a[i_a] == array_b[i_b]
                 && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-           cp= array_a[i_a++];
+           cp = array_a[i_a++];
         }
         else {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
@@ -9092,87 +9172,75 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         }
      }
  
-    /* Here, we are finished going through at least one of the lists, which
-     * means there is something remaining in at most one.  We check if the list
-     * that hasn't been exhausted is positioned such that we are in the middle
-     * of a range in its set or not.  (i_a and i_b point to the element beyond
-     * the one we care about.) If in the set, we decrement 'count'; if 0, there
-     * is potentially more to output.
-     * There are four cases:
-     * 1) Both weren't in their sets, count is 0, and remains 0.  What's left
-     *    in the union is entirely from the non-exhausted set.
-     * 2) Both were in their sets, count is 2.  Nothing further should
-     *    be output, as everything that remains will be in the exhausted
-     *    list's set, hence in the union; decrementing to 1 but not 0 insures
-     *    that
-     * 3) the exhausted was in its set, non-exhausted isn't, count is 1.
-     *    Nothing further should be output because the union includes
-     *    everything from the exhausted set.  Not decrementing ensures that.
-     * 4) the exhausted wasn't in its set, non-exhausted is, count is 1;
-     *    decrementing to 0 insures that we look at the remainder of the
-     *    non-exhausted set */
-    if ((i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+
+    /* The loop above increments the index into exactly one of the input lists
+     * each iteration, and ends when either index gets to its list end.  That
+     * means the other index is lower than its end, and so something is
+     * remaining in that one.  We decrement 'count', as explained below, if
+     * that list is in its set.  (i_a and i_b each currently index the element
+     * beyond the one we care about.) */
+    if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
         || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count--;
      }
  
-    /* The final length is what we've output so far, plus what else is about to
-     * be output.  (If 'count' is non-zero, then the input list we exhausted
-     * has everything remaining up to the machine's limit in its set, and hence
-     * in the union, so there will be no further output. */
-    len_u = i_u;
-    if (count == 0) {
-       /* At most one of the subexpressions will be non-zero */
-       len_u += (len_a - i_a) + (len_b - i_b);
+    /* Above we decremented 'count' if the list that had unexamined elements in
+     * it was in its set.  This has made it so that 'count' being non-zero
+     * means there isn't anything left to output; and 'count' equal to 0 means
+     * that what is left to output is precisely that which is left in the
+     * non-exhausted input list.
+     *
+     * To see why, note first that the exhausted input obviously has nothing
+     * left to add to the union.  If it was in its set at its end, that means
+     * the set extends from here to the platform's infinity, and hence so does
+     * the union and the non-exhausted set is irrelevant.  The exhausted set
+     * also contributed 1 to 'count'.  If 'count' was 2, it got decremented to
+     * 1, but if it was 1, the non-exhausted set wasn't in its set, and so
+     * 'count' remains at 1.  This is consistent with the decremented 'count'
+     * != 0 meaning there's nothing left to add to the union.
+     *
+     * But if the exhausted input wasn't in its set, it contributed 0 to
+     * 'count', and the rest of the union will be whatever the other input is.
+     * If 'count' was 0, neither list was in its set, and 'count' remains 0;
+     * otherwise it gets decremented to 0.  This is consistent with 'count'
+     * == 0 meaning the remainder of the union is whatever is left in the
+     * non-exhausted list. */
+    if (count != 0) {
+        len_u = i_u;
+    }
+    else {
+        IV copy_count = len_a - i_a;
+        if (copy_count > 0) {   /* The non-exhausted input is 'a' */
+           Copy(array_a + i_a, array_u + i_u, copy_count, UV);
+        }
+        else { /* The non-exhausted input is b */
+            copy_count = len_b - i_b;
+           Copy(array_b + i_b, array_u + i_u, copy_count, UV);
+        }
+        len_u = i_u + copy_count;
      }
  
-    /* Set result to final length, which can change the pointer to array_u, so
-     * re-find it */
+    /* Set the result to the final length, which can change the pointer to
+     * array_u, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
      if (len_u != _invlist_len(u)) {
         invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
         invlist_trim(u);
         array_u = invlist_array(u);
      }
  
-    /* When 'count' is 0, the list that was exhausted (if one was shorter than
-     * the other) ended with everything above it not in its set.  That means
-     * that the remaining part of the union is precisely the same as the
-     * non-exhausted list, so can just copy it unchanged.  (If both lists were
-     * exhausted at the same time, then the operations below will be both 0.)
-     */
-    if (count == 0) {
-       IV copy_count; /* At most one will have a non-zero copy count */
-       if ((copy_count = len_a - i_a) > 0) {
-           Copy(array_a + i_a, array_u + i_u, copy_count, UV);
-       }
-       else if ((copy_count = len_b - i_b) > 0) {
-           Copy(array_b + i_b, array_u + i_u, copy_count, UV);
-       }
-    }
-
-    if (a != *output && b != *output) {
+    if (*output == NULL) {  /* Simply return the new inversion list */
          *output = u;
      }
      else {
-        /*  Here, the output is to be the same as one of the input scalars,
-         *  hence replacing it.  The simple thing to do is to free the input
-         *  scalar, making it instead be the output one.  But experience has
-         *  shown [perl #127392] that if the input is a mortal, we can get a
-         *  huge build-up of these during regex compilation before they get
-         *  freed.  So for that case, replace just the input's interior with
-         *  the output's, and then free the output */
-
-        assert(! invlist_is_iterating(*output));
-
-        if (! SvTEMP(*output)) {
-            SvREFCNT_dec_NN(*output);
-            *output = u;
-        }
-        else {
-            invlist_replace_list_destroys_src(*output, u);
-            SvREFCNT_dec_NN(u);
-        }
+        /* Otherwise, overwrite the inversion list that was in '*output'.  We
+         * could instead free '*output', and then set it to 'u', but experience
+         * has shown [perl #127392] that if the input is a mortal, we can get a
+         * huge build-up of these during regex compilation before they get
+         * freed. */
+        invlist_replace_list_destroys_src(*output, u);
+        SvREFCNT_dec_NN(u);
      }
  
      return;
@@ -9182,14 +9250,13 @@ void
  Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
                                                 const bool complement_b, SV** i)
  {
-    /* Take the intersection of two inversion lists and point <i> to it.  *i
-     * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
-     * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise just its contents will be modified to be
-     * the intersection.  The first list, <a>, may be NULL, in which case an
-     * empty list is returned.  If <complement_b> is TRUE, the result will be
-     * the intersection of <a> and the complement (or inversion) of <b> instead
-     * of <b> directly.
+    /* Take the intersection of two inversion lists and point '*i' to it.  On
+     * input, '*i' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
+     * even 'a' or 'b').  If to an inversion list, the contents of the original
+     * list will be replaced by the intersection.  The first list, 'a', may be
+     * NULL, in which case '*i' will be an empty list.  If 'complement_b' is
+     * TRUE, the result will be the intersection of 'a' and the complement (or
+     * inversion) of 'b' instead of 'b' directly.
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
@@ -9207,22 +9274,23 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
      SV* r;                  /* the resulting intersection */
      UV* array_r;
-    UV len_r;
+    UV len_r = 0;
  
      UV i_a = 0;                    /* current index into a's array */
      UV i_b = 0;
      UV i_r = 0;
  
-    /* running count, as explained in the algorithm source book; items are
-     * stopped accumulating and are output when the count changes to/from 2.
-     * The count is incremented when we start a range that's in the set, and
-     * decremented when we start a range that's not in the set.  So its range
-     * is 0 to 2.  Only when the count is 2 is something in the intersection.
-     */
+    /* running count of how many of the two inputs are postitioned at ranges
+     * that are in their sets.  As explained in the algorithm source book,
+     * items are stopped accumulating and are output when the count changes
+     * to/from 2.  The count is incremented when we start a range that's in an
+     * input's set, and decremented when we start a range that's not in a set.
+     * Only when it is 2 are we in the intersection. */
      UV count = 0;
  
      PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
      assert(a != b);
+    assert(*i == NULL || SvTYPE(*i) == SVt_INVLIST);
  
      /* Special case if either one is empty */
      len_a = (a == NULL) ? 0 : _invlist_len(a);
@@ -9238,13 +9306,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
                  return;
              }
  
-            /* If not overwriting either input, just make a copy of 'a' */
-            if (*i != b) {
+            if (*i == NULL) {
                  *i = invlist_clone(a);
                  return;
              }
  
-            /* Here we are overwriting 'b' with 'a's contents */
              r = invlist_clone(a);
              invlist_replace_list_destroys_src(*i, r);
              SvREFCNT_dec_NN(r);
@@ -9290,10 +9356,10 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      r= _new_invlist(len_a + len_b);
  
      /* Will contain U+0000 iff both components do */
-    array_r = _invlist_array_init(r, len_a > 0 && array_a[0] == 0
-                                    && len_b > 0 && array_b[0] == 0);
+    array_r = _invlist_array_init(r,    len_a > 0 && array_a[0] == 0
+                                     && len_b > 0 && array_b[0] == 0);
  
-    /* Go through each list item by item, stopping when exhausted one of
+    /* Go through each list item by item, stopping when have exhausted one of
       * them */
      while (i_a < len_a && i_b < len_b) {
         UV cp;      /* The element to potentially add to the intersection's
@@ -9302,21 +9368,21 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
         /* We need to take one or the other of the two inputs for the
          * intersection.  Since we are merging two sorted lists, we take the
-        * smaller of the next items.  In case of a tie, we take the one that
-        * is not in its set first (a difference from the union algorithm).  If
-        * we took one in the set first, it would increment the count, possibly
-        * to 2 which would cause it to be output as starting a range in the
-        * intersection, and the next time through we would take that same
-        * number, and output it again as ending the set.  By doing it the
-        * opposite of this, there is no possibility that the count will be
-        * momentarily incremented to 2.  (In a tie and both are in the set or
-        * both not in the set, it doesn't matter which we take first.) */
-       if (array_a[i_a] < array_b[i_b]
-           || (array_a[i_a] == array_b[i_b]
+         * smaller of the next items.  In case of a tie, we take first the one
+         * that is not in its set (a difference from the union algorithm).  If
+         * we first took the one in its set, it would increment the count,
+         * possibly to 2 which would cause it to be output as starting a range
+         * in the intersection, and the next time through we would take that
+         * same number, and output it again as ending the set.  By doing the
+         * opposite of this, there is no possibility that the count will be
+         * momentarily incremented to 2.  (In a tie and both are in the set or
+         * both not in the set, it doesn't matter which we take first.) */
+       if (       array_a[i_a] < array_b[i_b]
+           || (   array_a[i_a] == array_b[i_b]
                 && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-           cp= array_a[i_a++];
+           cp = array_a[i_a++];
         }
         else {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
@@ -9338,135 +9404,342 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
             }
             count--;
         }
+
      }
  
-    /* Here, we are finished going through at least one of the lists, which
-     * means there is something remaining in at most one.  We check if the list
-     * that has been exhausted is positioned such that we are in the middle
-     * of a range in its set or not.  (i_a and i_b point to elements 1 beyond
-     * the ones we care about.)  There are four cases:
-     * 1) Both weren't in their sets, count is 0, and remains 0.  There's
-     *    nothing left in the intersection.
-     * 2) Both were in their sets, count is 2 and perhaps is incremented to
-     *    above 2.  What should be output is exactly that which is in the
-     *    non-exhausted set, as everything it has is also in the intersection
-     *    set, and everything it doesn't have can't be in the intersection
-     * 3) The exhausted was in its set, non-exhausted isn't, count is 1, and
-     *    gets incremented to 2.  Like the previous case, the intersection is
-     *    everything that remains in the non-exhausted set.
-     * 4) the exhausted wasn't in its set, non-exhausted is, count is 1, and
-     *    remains 1.  And the intersection has nothing more. */
-    if ((i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
-       || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
+    /* The loop above increments the index into exactly one of the input lists
+     * each iteration, and ends when either index gets to its list end.  That
+     * means the other index is lower than its end, and so something is
+     * remaining in that one.  We increment 'count', as explained below, if the
+     * exhausted list was in its set.  (i_a and i_b each currently index the
+     * element beyond the one we care about.) */
+    if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+        || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count++;
      }
  
-    /* The final length is what we've output so far plus what else is in the
-     * intersection.  At most one of the subexpressions below will be non-zero
-     * */
-    len_r = i_r;
-    if (count >= 2) {
-       len_r += (len_a - i_a) + (len_b - i_b);
+    /* Above we incremented 'count' if the exhausted list was in its set.  This
+     * has made it so that 'count' being below 2 means there is nothing left to
+     * output; otheriwse what's left to add to the intersection is precisely
+     * that which is left in the non-exhausted input list.
+     *
+     * To see why, note first that the exhausted input obviously has nothing
+     * left to affect the intersection.  If it was in its set at its end, that
+     * means the set extends from here to the platform's infinity, and hence
+     * anything in the non-exhausted's list will be in the intersection, and
+     * anything not in it won't be.  Hence, the rest of the intersection is
+     * precisely what's in the non-exhausted list  The exhausted set also
+     * contributed 1 to 'count', meaning 'count' was at least 1.  Incrementing
+     * it means 'count' is now at least 2.  This is consistent with the
+     * incremented 'count' being >= 2 means to add the non-exhausted list to
+     * the intersection.
+     *
+     * But if the exhausted input wasn't in its set, it contributed 0 to
+     * 'count', and the intersection can't include anything further; the
+     * non-exhausted set is irrelevant.  'count' was at most 1, and doesn't get
+     * incremented.  This is consistent with 'count' being < 2 meaning nothing
+     * further to add to the intersection. */
+    if (count < 2) { /* Nothing left to put in the intersection. */
+        len_r = i_r;
+    }
+    else { /* copy the non-exhausted list, unchanged. */
+        IV copy_count = len_a - i_a;
+        if (copy_count > 0) {   /* a is the one with stuff left */
+           Copy(array_a + i_a, array_r + i_r, copy_count, UV);
+        }
+        else {  /* b is the one with stuff left */
+            copy_count = len_b - i_b;
+           Copy(array_b + i_b, array_r + i_r, copy_count, UV);
+        }
+        len_r = i_r + copy_count;
      }
  
-    /* Set result to final length, which can change the pointer to array_r, so
-     * re-find it */
+    /* Set the result to the final length, which can change the pointer to
+     * array_r, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
      if (len_r != _invlist_len(r)) {
         invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
         invlist_trim(r);
         array_r = invlist_array(r);
      }
  
-    /* Finish outputting any remaining */
-    if (count >= 2) { /* At most one will have a non-zero copy count */
-       IV copy_count;
-       if ((copy_count = len_a - i_a) > 0) {
-           Copy(array_a + i_a, array_r + i_r, copy_count, UV);
-       }
-       else if ((copy_count = len_b - i_b) > 0) {
-           Copy(array_b + i_b, array_r + i_r, copy_count, UV);
-       }
-    }
-
-    if (a != *i && b != *i) {
+    if (*i == NULL) { /* Simply return the calculated intersection */
          *i = r;
      }
-    else {
-        /*  Here, the output is to be the same as one of the input scalars,
-         *  hence replacing it.  The simple thing to do is to free the input
-         *  scalar, making it instead be the output one.  But experience has
-         *  shown [perl #127392] that if the input is a mortal, we can get a
-         *  huge build-up of these during regex compilation before they get
-         *  freed.  So for that case, replace just the input's interior with
-         *  the output's, and then free the output.  A short-cut in this case
-         *  is if the output is empty, we can just set the input to be empty */
-
-        assert(! invlist_is_iterating(*i));
-
-        if (! SvTEMP(*i)) {
-            SvREFCNT_dec_NN(*i);
-            *i = r;
+    else { /* Otherwise, replace the existing inversion list in '*i'.  We could
+              instead free '*i', and then set it to 'r', but experience has
+              shown [perl #127392] that if the input is a mortal, we can get a
+              huge build-up of these during regex compilation before they get
+              freed. */
+        if (len_r) {
+            invlist_replace_list_destroys_src(*i, r);
          }
          else {
-            if (len_r) {
-                invlist_replace_list_destroys_src(*i, r);
-            }
-            else {
-                invlist_clear(*i);
-            }
-            SvREFCNT_dec_NN(r);
+            invlist_clear(*i);
          }
+        SvREFCNT_dec_NN(r);
      }
  
      return;
  }
  
  SV*
-Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
+Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end)
  {
      /* Add the range from 'start' to 'end' inclusive to the inversion list's
       * set.  A pointer to the inversion list is returned.  This may actually be
       * a new list, in which case the passed in one has been destroyed.  The
       * passed-in inversion list can be NULL, in which case a new one is created
-     * with just the one range in it */
-
-    SV* range_invlist;
-    UV len;
-
+     * with just the one range in it.  The new list is not necessarily
+     * NUL-terminated.  Space is not freed if the inversion list shrinks as a
+     * result of this function.  The gain would not be large, and in many
+     * cases, this is called multiple times on a single inversion list, so
+     * anything freed may almost immediately be needed again.
+     *
+     * This used to mostly call the 'union' routine, but that is much more
+     * heavyweight than really needed for a single range addition */
+
+    UV* array;              /* The array implementing the inversion list */
+    UV len;                 /* How many elements in 'array' */
+    SSize_t i_s;            /* index into the invlist array where 'start'
+                               should go */
+    SSize_t i_e = 0;        /* And the index where 'end' should go */
+    UV cur_highest;         /* The highest code point in the inversion list
+                               upon entry to this function */
+
+    /* This range becomes the whole inversion list if none already existed */
      if (invlist == NULL) {
         invlist = _new_invlist(2);
-       len = 0;
+        _append_range_to_invlist(invlist, start, end);
+        return invlist;
      }
-    else {
-       len = _invlist_len(invlist);
+
+    /* Likewise, if the inversion list is currently empty */
+    len = _invlist_len(invlist);
+    if (len == 0) {
+        _append_range_to_invlist(invlist, start, end);
+        return invlist;
      }
  
-    /* If comes after the final entry actually in the list, can just append it
-     * to the end, */
-    if (len == 0
-       || (! ELEMENT_RANGE_MATCHES_INVLIST(len - 1)
-            && start >= invlist_array(invlist)[len - 1]))
-    {
-       _append_range_to_invlist(invlist, start, end);
-       return invlist;
+    /* Starting here, we have to know the internals of the list */
+    array = invlist_array(invlist);
+
+    /* If the new range ends higher than the current highest ... */
+    cur_highest = invlist_highest(invlist);
+    if (end > cur_highest) {
+
+        /* If the whole range is higher, we can just append it */
+        if (start > cur_highest) {
+            _append_range_to_invlist(invlist, start, end);
+            return invlist;
+        }
+
+        /* Otherwise, add the portion that is higher ... */
+        _append_range_to_invlist(invlist, cur_highest + 1, end);
+
+        /* ... and continue on below to handle the rest.  As a result of the
+         * above append, we know that the index of the end of the range is the
+         * final even numbered one of the array.  Recall that the final element
+         * always starts a range that extends to infinity.  If that range is in
+         * the set (meaning the set goes from here to infinity), it will be an
+         * even index, but if it isn't in the set, it's odd, and the final
+         * range in the set is one less, which is even. */
+        if (end == UV_MAX) {
+            i_e = len;
+        }
+        else {
+            i_e = len - 2;
+        }
+    }
+
+    /* We have dealt with appending, now see about prepending.  If the new
+     * range starts lower than the current lowest ... */
+    if (start < array[0]) {
+
+        /* Adding something which has 0 in it is somewhat tricky, and uncommon.
+         * Let the union code handle it, rather than having to know the
+         * trickiness in two code places.  */
+        if (UNLIKELY(start == 0)) {
+            SV* range_invlist;
+
+            range_invlist = _new_invlist(2);
+            _append_range_to_invlist(range_invlist, start, end);
+
+            _invlist_union(invlist, range_invlist, &invlist);
+
+            SvREFCNT_dec_NN(range_invlist);
+
+            return invlist;
+        }
+
+        /* If the whole new range comes before the first entry, and doesn't
+         * extend it, we have to insert it as an additional range */
+        if (end < array[0] - 1) {
+            i_s = i_e = -1;
+            goto splice_in_new_range;
+        }
+
+        /* Here the new range adjoins the existing first range, extending it
+         * downwards. */
+        array[0] = start;
+
+        /* And continue on below to handle the rest.  We know that the index of
+         * the beginning of the range is the first one of the array */
+        i_s = 0;
+    }
+    else { /* Not prepending any part of the new range to the existing list.
+            * Find where in the list it should go.  This finds i_s, such that:
+            *     invlist[i_s] <= start < array[i_s+1]
+            */
+        i_s = _invlist_search(invlist, start);
+    }
+
+    /* At this point, any extending before the beginning of the inversion list
+     * and/or after the end has been done.  This has made it so that, in the
+     * code below, each endpoint of the new range is either in a range that is
+     * in the set, or is in a gap between two ranges that are.  This means we
+     * don't have to worry about exceeding the array bounds.
+     *
+     * Find where in the list the new range ends (but we can skip this if we
+     * have already determined what it is, or if it will be the same as i_s,
+     * which we already have computed) */
+    if (i_e == 0) {
+        i_e = (start == end)
+              ? i_s
+              : _invlist_search(invlist, end);
      }
  
-    /* Here, can't just append things, create and return a new inversion list
-     * which is the union of this range and the existing inversion list.  (If
-     * the new range is well-behaved wrt to the old one, we could just insert
-     * it, doing a Move() down on the tail of the old one (potentially growing
-     * it first).  But to determine that means we would have the extra
-     * (possibly throw-away) work of first finding where the new one goes and
-     * whether it disrupts (splits) an existing range, so it doesn't appear to
-     * me (khw) that it's worth it) */
-    range_invlist = _new_invlist(2);
-    _append_range_to_invlist(range_invlist, start, end);
+    /* Here generally invlist[i_e] <= end < array[i_e+1].  But if invlist[i_e]
+     * is a range that goes to infinity there is no element at invlist[i_e+1],
+     * so only the first relation holds. */
+
+    if ( ! ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
+
+        /* Here, the ranges on either side of the beginning of the new range
+         * are in the set, and this range starts in the gap between them.
+         *
+         * The new range extends the range above it downwards if the new range
+         * ends at or above that range's start */
+        const bool extends_the_range_above = (   end == UV_MAX
+                                              || end + 1 >= array[i_s+1]);
+
+        /* The new range extends the range below it upwards if it begins just
+         * after where that range ends */
+        if (start == array[i_s]) {
+
+            /* If the new range fills the entire gap between the other ranges,
+             * they will get merged together.  Other ranges may also get
+             * merged, depending on how many of them the new range spans.  In
+             * the general case, we do the merge later, just once, after we
+             * figure out how many to merge.  But in the case where the new
+             * range exactly spans just this one gap (possibly extending into
+             * the one above), we do the merge here, and an early exit.  This
+             * is done here to avoid having to special case later. */
+            if (i_e - i_s <= 1) {
+
+                /* If i_e - i_s == 1, it means that the new range terminates
+                 * within the range above, and hence 'extends_the_range_above'
+                 * must be true.  (If the range above it extends to infinity,
+                 * 'i_s+2' will be above the array's limit, but 'len-i_s-2'
+                 * will be 0, so no harm done.) */
+                if (extends_the_range_above) {
+                    Move(array + i_s + 2, array + i_s, len - i_s - 2, UV);
+                    invlist_set_len(invlist,
+                                    len - 2,
+                                    *(get_invlist_offset_addr(invlist)));
+                    return invlist;
+                }
+
+                /* Here, i_e must == i_s.  We keep them in sync, as they apply
+                 * to the same range, and below we are about to decrement i_s
+                 * */
+                i_e--;
+            }
+
+            /* Here, the new range is adjacent to the one below.  (It may also
+             * span beyond the range above, but that will get resolved later.)
+             * Extend the range below to include this one. */
+            array[i_s] = (end == UV_MAX) ? UV_MAX : end + 1;
+            i_s--;
+            start = array[i_s];
+        }
+        else if (extends_the_range_above) {
  
-    _invlist_union(invlist, range_invlist, &invlist);
+            /* Here the new range only extends the range above it, but not the
+             * one below.  It merges with the one above.  Again, we keep i_e
+             * and i_s in sync if they point to the same range */
+            if (i_e == i_s) {
+                i_e++;
+            }
+            i_s++;
+            array[i_s] = start;
+        }
+    }
+
+    /* Here, we've dealt with the new range start extending any adjoining
+     * existing ranges.
+     *
+     * If the new range extends to infinity, it is now the final one,
+     * regardless of what was there before */
+    if (UNLIKELY(end == UV_MAX)) {
+        invlist_set_len(invlist, i_s + 1, *(get_invlist_offset_addr(invlist)));
+        return invlist;
+    }
+
+    /* If i_e started as == i_s, it has also been dealt with,
+     * and been updated to the new i_s, which will fail the following if */
+    if (! ELEMENT_RANGE_MATCHES_INVLIST(i_e)) {
+
+        /* Here, the ranges on either side of the end of the new range are in
+         * the set, and this range ends in the gap between them.
+         *
+         * If this range is adjacent to (hence extends) the range above it, it
+         * becomes part of that range; likewise if it extends the range below,
+         * it becomes part of that range */
+        if (end + 1 == array[i_e+1]) {
+            i_e++;
+            array[i_e] = start;
+        }
+        else if (start <= array[i_e]) {
+            array[i_e] = end + 1;
+            i_e--;
+        }
+    }
+
+    if (i_s == i_e) {
+
+        /* If the range fits entirely in an existing range (as possibly already
+         * extended above), it doesn't add anything new */
+        if (ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
+            return invlist;
+        }
  
-    /* The temporary can be freed */
-    SvREFCNT_dec_NN(range_invlist);
+        /* Here, no part of the range is in the list.  Must add it.  It will
+         * occupy 2 more slots */
+      splice_in_new_range:
+
+        invlist_extend(invlist, len + 2);
+        array = invlist_array(invlist);
+        /* Move the rest of the array down two slots. Don't include any
+         * trailing NUL */
+        Move(array + i_e + 1, array + i_e + 3, len - i_e - 1, UV);
+
+        /* Do the actual splice */
+        array[i_e+1] = start;
+        array[i_e+2] = end + 1;
+        invlist_set_len(invlist, len + 2, *(get_invlist_offset_addr(invlist)));
+        return invlist;
+    }
+
+    /* Here the new range crossed the boundaries of a pre-existing range.  The
+     * code above has adjusted things so that both ends are in ranges that are
+     * in the set.  This means everything in between must also be in the set.
+     * Just squash things together */
+    Move(array + i_e + 1, array + i_s + 1, len - i_e - 1, UV);
+    invlist_set_len(invlist,
+                    len - i_e + i_s,
+                    *(get_invlist_offset_addr(invlist)));
  
      return invlist;
  }
@@ -9492,7 +9765,7 @@ Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0,
  
      PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
  
-    _append_range_to_invlist(invlist, element0, element0);
+    invlist = add_cp_to_invlist(invlist, element0);
      offset = *get_invlist_offset_addr(invlist);
  
      invlist_set_len(invlist, size, offset);
@@ -9682,18 +9955,18 @@ S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%cINFINITY%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFINITY%c",
                                            start, intra_range_delimiter,
                                                   inter_range_delimiter);
         }
         else if (end != start) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c%04"UVXf"%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
                                           start,
                                                     intra_range_delimiter,
                                                    end, inter_range_delimiter);
         }
         else {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
                                            start, inter_range_delimiter);
         }
      }
@@ -9740,16 +10013,16 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
             Perl_dump_indent(aTHX_ level, file,
-                                       "%s[%"UVuf"] 0x%04"UVXf" .. INFINITY\n",
+                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFINITY\n",
                                     indent, (UV)count, start);
         }
         else if (end != start) {
             Perl_dump_indent(aTHX_ level, file,
-                                    "%s[%"UVuf"] 0x%04"UVXf" .. 0x%04"UVXf"\n",
+                                    "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n",
                                 indent, (UV)count, start,         end);
         }
         else {
-           Perl_dump_indent(aTHX_ level, file, "%s[%"UVuf"] 0x%04"UVXf"\n",
+           Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
                                              indent, (UV)count, start);
         }
          count += 2;
@@ -9765,9 +10038,10 @@ Perl__load_PL_utf8_foldclosures (pTHX)
       * to force that */
      if (! PL_utf8_tofold) {
          U8 dummy[UTF8_MAXBYTES_CASE+1];
+        const U8 hyphen[] = HYPHEN_UTF8;
  
          /* This string is just a short named one above \xff */
-        to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
+        toFOLD_utf8_safe(hyphen, hyphen + sizeof(hyphen) - 1, dummy, NULL);
          assert(PL_utf8_tofold); /* Verify that worked */
      }
      PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
@@ -9787,9 +10061,6 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
      UV len_a = _invlist_len(a);
      UV len_b = _invlist_len(b);
  
-    UV i = 0;              /* current index into the arrays */
-    bool retval = TRUE;     /* Assume are identical until proven otherwise */
-
      PERL_ARGS_ASSERT__INVLISTEQ;
  
      /* If are to compare 'a' with the complement of b, set it
@@ -9819,20 +10090,9 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
          }
      }
  
-    /* Make sure that the lengths are the same, as well as the final element
-     * before looping through the remainder.  (Thus we test the length, final,
-     * and first elements right off the bat) */
-    if (len_a != len_b || array_a[len_a-1] != array_b[len_a-1]) {
-        retval = FALSE;
-    }
-    else for (i = 0; i < len_a - 1; i++) {
-        if (array_a[i] != array_b[i]) {
-            retval = FALSE;
-            break;
-        }
-    }
+    return    len_a == len_b
+           && memEQ(array_a, array_b, len_a * sizeof(array_a[0]));
  
-    return retval;
  }
  #endif
  
@@ -9918,7 +10178,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
                  }
                  else {
                      STRLEN len;
-                    to_utf8_fold(s, d, &len);
+                    toFOLD_utf8_safe(s, e, d, &len);
                      d += len;
                      s += UTF8SKIP(s);
                  }
@@ -9959,7 +10219,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
              {
                  AV* list = (AV*) *listp;
                  IV k;
-                for (k = 0; k <= av_tindex(list); k++) {
+                for (k = 0; k <= av_tindex_nomg(list); k++) {
                      SV** c_p = av_fetch(list, k, FALSE);
                      UV c;
                      assert(c_p);
@@ -10174,11 +10434,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  RExC_flags |= posflags;
                  RExC_flags &= ~negflags;
                  set_regex_charset(&RExC_flags, cs);
-                if (RExC_flags & RXf_PMf_FOLD) {
-                    RExC_contains_i = 1;
-                }
-                if (PASS2) {
-                    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
+                if (UNLIKELY((x_mod_count) > 1)) {
+                    vFAIL("Only one /x regex modifier is allowed");
                  }
                  return;
                  /*NOTREACHED*/
@@ -10186,7 +10444,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                fail_modifiers:
                  RExC_parse += SKIP_IF_CHAR(RExC_parse);
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
-                vFAIL2utf8f("Sequence (%"UTF8f"...) not recognized",
+                vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
                        UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                  NOT_REACHED; /*NOTREACHED*/
          }
@@ -10398,7 +10656,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             if ( ! op ) {
                 RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                  vFAIL2utf8f(
-                    "Unknown verb pattern '%"UTF8f"'",
+                    "Unknown verb pattern '%" UTF8f "'",
                      UTF8fARG(UTF, verb_len, start_verb));
             }
              if ( arg_required && !start_arg ) {
@@ -10563,7 +10821,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  RExC_seen |= REG_LOOKBEHIND_SEEN;
                 RExC_in_lookbehind++;
                 RExC_parse++;
-                assert(RExC_parse < RExC_end);
+                if (RExC_parse >= RExC_end) {
+                    vFAIL("Sequence (?... not terminated");
+                }
+
                  /* FALLTHROUGH */
             case '=':           /* (?=...) */
                 RExC_seen_zerolen++;
@@ -10594,13 +10855,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 break;
             case '0' :           /* (?0) */
             case 'R' :           /* (?R) */
-               if (*RExC_parse != ')')
+                if (RExC_parse == RExC_end || *RExC_parse != ')')
                     FAIL("Sequence (?R) not terminated");
-               ret = reg_node(pRExC_state, GOSTART);
-                    RExC_seen |= REG_GOSTART_SEEN;
+                num = 0;
+                RExC_seen |= REG_RECURSE_SEEN;
                 *flagp |= POSTPONED;
-               nextchar(pRExC_state);
-               return ret;
+                goto gen_recurse_regop;
                 /*notreached*/
              /* named and numeric backreferences */
              case '&':            /* (?&NAME) */
@@ -10676,6 +10936,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  } else if ( paren == '+' ) {
                      num = RExC_npar + num - 1;
                  }
+                /* We keep track how many GOSUB items we have produced.
+                   To start off the ARG2L() of the GOSUB holds its "id",
+                   which is used later in conjunction with RExC_recurse
+                   to calculate the offset we need to jump for the GOSUB,
+                   which it will store in the final representation.
+                   We have to defer the actual calculation until much later
+                   as the regop may move.
+                 */
  
                  ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                  if (!SIZE_ONLY) {
@@ -10684,16 +10952,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         vFAIL("Reference to nonexistent group");
                     }
                     RExC_recurse_count++;
-                   DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
-                        "%*s%*s Recurse #%"UVuf" to %"IVdf"\n",
+                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                        "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
                                22, "|    |", (int)(depth * 2 + 1), "",
                                (UV)ARG(ret), (IV)ARG2L(ret)));
                  }
                  RExC_seen |= REG_RECURSE_SEEN;
+
                  Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
                 Set_Node_Offset(ret, parse_start); /* MJD */
  
                  *flagp |= POSTPONED;
+                assert(*RExC_parse == ')');
                  nextchar(pRExC_state);
                  return ret;
  
@@ -10705,7 +10975,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      RExC_parse += SKIP_IF_CHAR(RExC_parse);
                      /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                      vFAIL2utf8f(
-                        "Sequence (%"UTF8f"...) not recognized",
+                        "Sequence (%" UTF8f "...) not recognized",
                          UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                     NOT_REACHED; /*NOTREACHED*/
                 }
@@ -10836,13 +11106,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 }
                 else if (RExC_parse[0] == 'R') {
                     RExC_parse++;
+                    /* parno == 0 => /(?(R)YES|NO)/  "in any form of recursion OR eval"
+                     * parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)"
+                     * parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)"
+                     */
                     parno = 0;
-                   if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
+                    if (RExC_parse[0] == '0') {
+                        parno = 1;
+                        RExC_parse++;
+                    }
+                    else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                          UV uv;
                          if (grok_atoUV(RExC_parse, &uv, &endptr)
                              && uv <= I32_MAX
                          ) {
-                            parno = (I32)uv;
+                            parno = (I32)uv + 1;
                              RExC_parse = (char*)endptr;
                          }
                          /* else "Switch condition not recognized" below */
@@ -10853,7 +11131,19 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                              SIZE_ONLY
                              ? REG_RSN_RETURN_NULL
                              : REG_RSN_RETURN_DATA);
-                       parno = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
+
+                        /* we should only have a false sv_dat when
+                         * SIZE_ONLY is true, and we always have false
+                         * sv_dat when SIZE_ONLY is true.
+                         * reg_scan_name() will VFAIL() if the name is
+                         * unknown when SIZE_ONLY is false, and otherwise
+                         * will return something, and when SIZE_ONLY is
+                         * true, reg_scan_name() just parses the string,
+                         * and doesnt return anything. (in theory) */
+                        assert(SIZE_ONLY ? !sv_dat : !!sv_dat);
+
+                        if (sv_dat)
+                            parno = 1 + *((I32 *)SvPVX(sv_dat));
                     }
                     ret = reganode(pRExC_state,INSUBP,parno);
                     goto insert_if_check_paren;
@@ -10887,7 +11177,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                              *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                              return NULL;
                          }
-                        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
+                        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                (UV) flags);
                      } else
                          REGTAIL(pRExC_state, br, reganode(pRExC_state,
@@ -10908,7 +11198,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                  *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                                  return NULL;
                              }
-                            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
+                            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                    (UV) flags);
                          }
                          REGTAIL(pRExC_state, ret, lastbr);
@@ -10973,14 +11263,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             if (!SIZE_ONLY ){
                 if (!RExC_nestroot)
                     RExC_nestroot = parno;
-                if (RExC_seen & REG_RECURSE_SEEN
-                   && !RExC_open_parens[parno-1])
+                if (RExC_open_parens && !RExC_open_parens[parno])
                 {
-                   DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
-                        "%*s%*s Setting open paren #%"IVdf" to %d\n",
+                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                        "%*s%*s Setting open paren #%" IVdf " to %d\n",
                          22, "|    |", (int)(depth * 2 + 1), "",
                         (IV)parno, REG_NODE_NUM(ret)));
-                   RExC_open_parens[parno-1]= ret;
+                    RExC_open_parens[parno]= ret;
                 }
             }
              Set_Node_Length(ret, 1); /* MJD */
@@ -11007,7 +11296,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              *flagp = flags & (RESTART_PASS1|NEED_UTF8);
              return NULL;
          }
-        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
      }
      if (*RExC_parse == '|') {
         if (!SIZE_ONLY && RExC_extralen) {
@@ -11054,7 +11343,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                  return NULL;
              }
-            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
          }
          REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
         lastbr = br;
@@ -11069,11 +11358,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             break;
         case 1: case 2:
             ender = reganode(pRExC_state, CLOSE, parno);
-            if (!SIZE_ONLY && RExC_seen & REG_RECURSE_SEEN) {
-               DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
-                        "%*s%*s Setting close paren #%"IVdf" to %d\n",
+            if ( RExC_close_parens ) {
+                DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                        "%*s%*s Setting close paren #%" IVdf " to %d\n",
                          22, "|    |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender)));
-               RExC_close_parens[parno-1]= ender;
+                RExC_close_parens[parno]= ender;
                 if (RExC_nestroot == parno)
                     RExC_nestroot = 0;
             }
@@ -11092,8 +11381,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
         case 0:
             ender = reg_node(pRExC_state, END);
             if (!SIZE_ONLY) {
-                assert(!RExC_opend); /* there can only be one! */
-                RExC_opend = ender;
+                assert(!RExC_end_op); /* there can only be one! */
+                RExC_end_op = ender;
+                if (RExC_close_parens) {
+                    DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
+                        "%*s%*s Setting close paren #0 (END) to %d\n",
+                        22, "|    |", (int)(depth * 2 + 1), "", REG_NODE_NUM(ender)));
+
+                    RExC_close_parens[0]= ender;
+                }
              }
             break;
         }
@@ -11101,7 +11397,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              DEBUG_PARSE_MSG("lsbr");
              regprop(RExC_rx, RExC_mysv1, lastbr, NULL, pRExC_state);
              regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
-            PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+            Perl_re_printf( aTHX_  "~ tying lastbr %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n",
                            SvPV_nolen_const(RExC_mysv1),
                            (IV)REG_NODE_NUM(lastbr),
                            SvPV_nolen_const(RExC_mysv2),
@@ -11140,7 +11436,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      DEBUG_PARSE_MSG("NADA");
                      regprop(RExC_rx, RExC_mysv1, ret, NULL, pRExC_state);
                      regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
-                    PerlIO_printf(Perl_debug_log, "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+                    Perl_re_printf( aTHX_  "~ converting ret %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n",
                                    SvPV_nolen_const(RExC_mysv1),
                                    (IV)REG_NODE_NUM(ret),
                                    SvPV_nolen_const(RExC_mysv2),
@@ -11261,10 +11557,10 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
                  *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                  return NULL;
              }
-            FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regpiece returned NULL, flags=%#" UVxf, (UV) flags);
         }
         else if (ret == NULL)
-           ret = latest;
+            ret = latest;
         *flagp |= flags&(HASWIDTH|POSTPONED);
         if (chain == NULL)      /* First piece. */
             *flagp |= flags&SPSTART;
@@ -11333,7 +11629,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         if (flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8))
             *flagp |= flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8);
          else
-            FAIL2("panic: regatom returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
         return(NULL);
      }
  
@@ -11514,7 +11810,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
         SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
         ckWARN2reg(RExC_parse,
-                  "%"UTF8f" matches null string many times",
+                  "%" UTF8f " matches null string many times",
                    UTF8fARG(UTF, (RExC_parse >= origparse
                                   ? RExC_parse - origparse
                                   : 0),
@@ -11688,13 +11984,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      RExC_parse++;      /* Skip past the '{' */
  
-    if (! (endbrace = strchr(RExC_parse, '}'))  /* no trailing brace */
-       || ! (endbrace == RExC_parse            /* nothing between the {} */
+    if (! (endbrace = strchr(RExC_parse, '}'))) { /* no trailing brace */
+        vFAIL2("Missing right brace on \\%c{}", 'N');
+    }
+    else if(!(endbrace == RExC_parse           /* nothing between the {} */
                || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
                    && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
                                                         error msg) */
      {
-       if (endbrace) RExC_parse = endbrace;    /* position msg's '<--HERE' */
+       RExC_parse = endbrace;  /* position msg's '<--HERE' */
         vFAIL("\\N{NAME} must be resolved by the lexer");
      }
  
@@ -11851,7 +12149,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                      *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                      return FALSE;
                  }
-                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"",
+                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
                      (UV) flags);
              }
              *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
@@ -11874,39 +12172,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  }
  
  
-/*
- * reg_recode
- *
- * It returns the code point in utf8 for the value in *encp.
- *    value: a code value in the source encoding
- *    encp:  a pointer to an Encode object
- *
- * If the result from Encode is not a single character,
- * it returns U+FFFD (Replacement character) and sets *encp to NULL.
- */
-STATIC UV
-S_reg_recode(pTHX_ const U8 value, SV **encp)
-{
-    STRLEN numlen = 1;
-    SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
-    const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
-    const STRLEN newlen = SvCUR(sv);
-    UV uv = UNICODE_REPLACEMENT;
-
-    PERL_ARGS_ASSERT_REG_RECODE;
-
-    if (newlen)
-       uv = SvUTF8(sv)
-            ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
-            : *(U8*)s;
-
-    if (!newlen || numlen != newlen) {
-       uv = UNICODE_REPLACEMENT;
-       *encp = NULL;
-    }
-    return uv;
-}
-
  PERL_STATIC_INLINE U8
  S_compute_EXACTish(RExC_state_t *pRExC_state)
  {
@@ -12120,13 +12385,15 @@ S_backref_value(char *p)
  /*
   - regatom - the lowest level
  
-   Try to identify anything special at the start of the pattern. If there
-   is, then handle it as required. This may involve generating a single regop,
-   such as for an assertion; or it may involve recursing, such as to
-   handle a () structure.
+   Try to identify anything special at the start of the current parse position.
+   If there is, then handle it as required. This may involve generating a
+   single regop, such as for an assertion; or it may involve recursing, such as
+   to handle a () structure.
  
     If the string doesn't start with something special then we gobble up
-   as much literal text as we can.
+   as much literal text as we can.  If we encounter a quantifier, we have to
+   back off the final literal character, as that quantifier applies to just it
+   and not to the whole string of literals.
  
     Once we have been able to handle whatever type of thing started the
     sequence, we return.
@@ -12248,7 +12515,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          if (ret == NULL) {
              if (*flagp & (RESTART_PASS1|NEED_UTF8))
                  return NULL;
-            FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
+            FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                    (UV) *flagp);
          }
         if (*RExC_parse != ']') {
@@ -12275,7 +12542,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                      return NULL;
                  }
-                FAIL2("panic: reg returned NULL to regatom, flags=%#"UVxf"",
+                FAIL2("panic: reg returned NULL to regatom, flags=%#" UVxf,
                                                                   (UV) flags);
         }
         *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
@@ -12438,7 +12705,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        bad_bound_type:
                          RExC_parse = endbrace;
                         vFAIL2utf8f(
-                            "'%"UTF8f"' is an unknown bound type",
+                            "'%" UTF8f "' is an unknown bound type",
                             UTF8fARG(UTF, length, endbrace - length));
                          NOT_REACHED; /*NOTREACHED*/
                  }
@@ -12556,7 +12823,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
               * multi-char folds are allowed.  */
              if (!ret)
-                FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
+                FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                        (UV) *flagp);
  
              RExC_parse--;
@@ -12841,6 +13108,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                     || UTF8_IS_START(UCHARAT(RExC_parse)));
  
+            /* Here, we have a literal character.  Find the maximal string of
+             * them in the input that we can fit into a single EXACTish node.
+             * We quit at the first non-literal or when the node gets full */
             for (p = RExC_parse;
                  len < upper_parse && p < RExC_end;
                  len++)
@@ -12971,9 +13241,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                 vFAIL(error_msg);
                             }
                              ender = result;
-                           if (IN_ENCODING && ender < 0x100) {
-                               goto recode_encoding;
-                           }
                             if (ender > 0xff) {
                                 REQUIRE_UTF8(flagp);
                             }
@@ -13006,11 +13273,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                  if (RExC_recode_x_to_native) {
                                      ender = LATIN1_TO_NATIVE(ender);
                                  }
-                                else
  #endif
-                                if (IN_ENCODING) {
-                                    goto recode_encoding;
-                                }
                             }
                              else {
                                 REQUIRE_UTF8(flagp);
@@ -13070,17 +13333,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                           form_short_octal_warning(p, numlen));
                              }
                         }
-                       if (IN_ENCODING && ender < 0x100)
-                           goto recode_encoding;
-                       break;
-                     recode_encoding:
-                       if (! RExC_override_recoding) {
-                           SV* enc = _get_encoding();
-                           ender = reg_recode((U8)ender, &enc);
-                           if (!enc && PASS2)
-                               ckWARNreg(p, "Invalid escape in the specified encoding");
-                           REQUIRE_UTF8(flagp);
-                       }
                         break;
                     case '\0':
                         if (p >= RExC_end)
@@ -13098,14 +13350,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     } /* End of switch on '\' */
                     break;
                 case '{':
-                   /* Currently we don't warn when the lbrace is at the start
+                   /* Currently we don't care if the lbrace is at the start
                      * of a construct.  This catches it in the middle of a
                      * literal string, or when it's the first thing after
                      * something like "\b" */
-                   if (! SIZE_ONLY
-                       && (len || (p > RExC_start && isALPHA_A(*(p -1)))))
-                   {
-                       ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through");
+                   if (len || (p > RExC_start && isALPHA_A(*(p -1)))) {
+                        RExC_parse = p + 1;
+                       vFAIL("Unescaped left brace in regex is illegal here");
                     }
                     /*FALLTHROUGH*/
                 default:    /* A literal character */
@@ -13137,6 +13388,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                   * the node, close the node with just them, and set up to do
                   * this character again next time through, when it will be the
                   * only thing in its new node */
+
                  if ((next_is_quantifier = (   LIKELY(p < RExC_end)
                                             && UNLIKELY(ISMULT2(p))))
                      && LIKELY(len))
@@ -13510,8 +13762,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             RExC_parse = p - 1;
              Set_Node_Cur_Length(ret, parse_start);
             RExC_parse = p;
-            skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                                    FALSE /* Don't force to /x */ );
             {
                 /* len is STRLEN which is unsigned, need to copy to signed */
                 IV iv = len;
@@ -13523,6 +13773,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         break;
      } /* End of giant switch on input character */
  
+    /* Position parse to next real character */
+    skip_to_be_ignored_text(pRExC_state, &RExC_parse,
+                                            FALSE /* Don't force to /x */ );
+    if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) {
+        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here, passed through");
+    }
+
      return(ret);
  }
  
@@ -13613,8 +13870,8 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
   * routine. q.v. */
  #define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
          if (posix_warnings) {                                               \
-            if (! warn_text) warn_text = newAV();                           \
-            av_push(warn_text, Perl_newSVpvf(aTHX_                          \
+            if (! RExC_warn_text ) RExC_warn_text = (AV *) sv_2mortal((SV *) newAV()); \
+            av_push(RExC_warn_text, Perl_newSVpvf(aTHX_                          \
                                               WARNING_PREFIX                 \
                                               text                           \
                                               REPORT_LOCATION,               \
@@ -13745,7 +14002,6 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
      bool has_opening_colon    = FALSE;
      int class_number          = OOB_NAMEDCLASS; /* Out-of-bounds until find
                                                     valid class */
-    AV* warn_text             = NULL;   /* any warning messages */
      const char * possible_end = NULL;   /* used for a 2nd parse pass */
      const char* name_start;             /* ptr to class name first char */
  
@@ -13761,6 +14017,9 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
  
      PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX;
  
+    if (posix_warnings && RExC_warn_text)
+        av_clear(RExC_warn_text);
+
      if (p >= e) {
          return NOT_MEANT_TO_BE_A_POSIX_CLASS;
      }
@@ -13787,85 +14046,78 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
  
      /* For [. .] and [= =].  These are quite different internally from [: :],
       * so they are handled separately.  */
-    if (POSIXCC_NOTYET(*p)) {
+    if (POSIXCC_NOTYET(*p) && p < e - 3) /* 1 for the close, and 1 for the ']'
+                                            and 1 for at least one char in it
+                                          */
+    {
          const char open_char  = *p;
          const char * temp_ptr = p + 1;
-        unsigned int len      = 0;
  
          /* These two constructs are not handled by perl, and if we find a
-         * syntactically valid one, we croak.  It looks like just about any
-         * byte can be in them, but they are likely very short, like [.ch.] to
-         * denote a ligature 'ch' single character.  If we find something that
-         * started out to look like one of these constructs, but isn't, we
-         * break so that it can be checked for being a class name with a typo
-         * of '.' or '=' instead of a colon */
-        while (temp_ptr < e) {
-            len++;
-
-            /* qr/[[.].]]/, for example, is valid.  But otherwise we quit on an
-             * unexpected ']'.  It is possible, it appears, for such a ']' to
-             * be not in the final position, but that's so unlikely that that
-             * case is not handled. */
-            if (*temp_ptr == ']' && temp_ptr[1] != open_char) {
-                break;
-            }
-
-            /* XXX this could be cut down, but this value is certainly large
-             * enough */
-            if (len > 10) {
-                break;
-            }
+         * syntactically valid one, we croak.  khw, who wrote this code, finds
+         * this explanation of them very unclear:
+         * http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap09.html
+         * And searching the rest of the internet wasn't very helpful either.
+         * It looks like just about any byte can be in these constructs,
+         * depending on the locale.  But unless the pattern is being compiled
+         * under /l, which is very rare, Perl runs under the C or POSIX locale.
+         * In that case, it looks like [= =] isn't allowed at all, and that
+         * [. .] could be any single code point, but for longer strings the
+         * constituent characters would have to be the ASCII alphabetics plus
+         * the minus-hyphen.  Any sensible locale definition would limit itself
+         * to these.  And any portable one definitely should.  Trying to parse
+         * the general case is a nightmare (see [perl #127604]).  So, this code
+         * looks only for interiors of these constructs that match:
+         *      qr/.|[-\w]{2,}/
+         * Using \w relaxes the apparent rules a little, without adding much
+         * danger of mistaking something else for one of these constructs.
+         *
+         * [. .] in some implementations described on the internet is usable to
+         * escape a character that otherwise is special in bracketed character
+         * classes.  For example [.].] means a literal right bracket instead of
+         * the ending of the class
+         *
+         * [= =] can legitimately contain a [. .] construct, but we don't
+         * handle this case, as that [. .] construct will later get parsed
+         * itself and croak then.  And [= =] is checked for even when not under
+         * /l, as Perl has long done so.
+         *
+         * The code below relies on there being a trailing NUL, so it doesn't
+         * have to keep checking if the parse ptr < e.
+         */
+        if (temp_ptr[1] == open_char) {
+            temp_ptr++;
+        }
+        else while (    temp_ptr < e
+                    && (isWORDCHAR(*temp_ptr) || *temp_ptr == '-'))
+        {
+            temp_ptr++;
+        }
  
-            if (*temp_ptr == open_char) {
+        if (*temp_ptr == open_char) {
+            temp_ptr++;
+            if (*temp_ptr == ']') {
                  temp_ptr++;
-                if (*temp_ptr == ']') {
-                    temp_ptr++;
-                    if (! found_problem && ! check_only) {
-                        RExC_parse = (char *) temp_ptr;
-                        vFAIL3("POSIX syntax [%c %c] is reserved for future "
-                               "extensions", open_char, open_char);
-                    }
-
-                    /* Here, the syntax wasn't completely valid, or else the
-                     * call is to check-only */
-                    if (updated_parse_ptr) {
-                        *updated_parse_ptr = (char *) temp_ptr;
-                    }
-
-                    return OOB_NAMEDCLASS;
+                if (! found_problem && ! check_only) {
+                    RExC_parse = (char *) temp_ptr;
+                    vFAIL3("POSIX syntax [%c %c] is reserved for future "
+                            "extensions", open_char, open_char);
                  }
-            }
-            else if (*temp_ptr == '\\') {
-
-                /* A backslash is treate as like any other character, unless it
-                 * precedes a comment starter.  XXX multiple backslashes in a
-                 * row are not handled specially here, nor would they ever
-                 * likely to be handled specially in one of these constructs */
-                if (temp_ptr[1] == '#' && (RExC_flags & RXf_PMf_EXTENDED)) {
-                    temp_ptr++;
+
+                /* Here, the syntax wasn't completely valid, or else the call
+                 * is to check-only */
+                if (updated_parse_ptr) {
+                    *updated_parse_ptr = (char *) temp_ptr;
                  }
-                temp_ptr++;
-            }
-            else if (*temp_ptr == '#' && (RExC_flags & RXf_PMf_EXTENDED)) {
-                break;  /* Under no circumstances can we look at the interior
-                           of a comment */
-            }
-            else if (*temp_ptr == '\n') {   /* And we don't allow newlines
-                                               either as it's extremely
-                                               unlikely that one could be in an
-                                               intended class */
-                break;
-            }
-            else if (UTF && ! UTF8_IS_INVARIANT(*temp_ptr)) {
-                /* XXX Since perl will never handle multi-byte locales, except
-                 * for UTF-8, we could break if we found a byte above latin1,
-                 * but perhaps the person intended to use one. */
-                temp_ptr += UTF8SKIP(temp_ptr);
-            }
-            else {
-                temp_ptr++;
+
+                return OOB_NAMEDCLASS;
              }
          }
+
+        /* If we find something that started out to look like one of these
+         * constructs, but isn't, we continue below so that it can be checked
+         * for being a class name with a typo of '.' or '=' instead of a colon.
+         * */
      }
  
      /* Here, we think there is a possibility that a [: :] class was meant, and
@@ -14385,14 +14637,8 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                  ADD_POSIX_WARNING(p, "there is no terminating ']'");
              }
  
-            if (warn_text) {
-                if (posix_warnings) {
-                    /* mortalize to avoid a leak with FATAL warnings */
-                    *posix_warnings = (AV *) sv_2mortal((SV *) warn_text);
-                }
-                else {
-                    SvREFCNT_dec_NN(warn_text);
-                }
+            if (posix_warnings && RExC_warn_text && av_top_index(RExC_warn_text) > -1) {
+                *posix_warnings = RExC_warn_text;
              }
          }
          else if (class_number != OOB_NAMEDCLASS) {
@@ -14409,7 +14655,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                                                     ? "^"
                                                     : "";
              RExC_parse = (char *) p;
-            vFAIL3utf8f("POSIX class [:%s%"UTF8f":] unknown",
+            vFAIL3utf8f("POSIX class [:%s%" UTF8f ":] unknown",
                          complement_string,
                          UTF8fARG(UTF, RExC_parse - name_start - 2, name_start));
          }
@@ -14464,7 +14710,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                         'stack' of where the undealt-with left
                                         parens would be if they were actually
                                         put there */
-    IV fence = 0;                   /* Position of where most recent undealt-
+    /* The 'VOL' (expanding to 'volatile') is a workaround for an optimiser bug
+     * in Solaris Studio 12.3. See RT #127455 */
+    VOL IV fence = 0;               /* Position of where most recent undealt-
                                         with left paren in stack is; -1 if none.
                                       */
      STRLEN len;                     /* Temporary */
@@ -14550,7 +14798,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                    &posix_warnings
                                   ))
                          FAIL2("panic: regclass returned NULL to handle_sets, "
-                              "flags=%#"UVxf"", (UV) *flagp);
+                              "flags=%#" UVxf, (UV) *flagp);
  
                      /* function call leaves parse pointing to the ']', except
                       * if we faked it */
@@ -14586,7 +14834,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
        no_close:
          /* We output the messages even if warnings are off, because we'll fail
           * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings && av_tindex(posix_warnings) >= 0) {
+        if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
              output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
          }
  
@@ -14695,7 +14943,13 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  
  redo_curchar:
  
-        top_index = av_tindex(stack);
+#ifdef ENABLE_REGEX_SETS_DEBUGGING
+                    /* Enable with -Accflags=-DENABLE_REGEX_SETS_DEBUGGING */
+        DEBUG_U(dump_regex_sets_structures(pRExC_state,
+                                           stack, fence, fence_stack));
+#endif
+
+        top_index = av_tindex_nomg(stack);
  
          switch (curchar) {
              SV** stacked_ptr;       /* Ptr to something already on 'stack' */
@@ -14796,8 +15050,8 @@ redo_curchar:
                  }
  
                  /* Stack the position of this undealt-with left paren */
-                fence = top_index + 1;
                  av_push(fence_stack, newSViv(fence));
+                fence = top_index + 1;
                  break;
  
              case '\\':
@@ -14813,7 +15067,7 @@ redo_curchar:
                                NULL))
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
-                          "flags=%#"UVxf"", (UV) *flagp);
+                          "flags=%#" UVxf, (UV) *flagp);
                  }
  
                  /* regclass() will return with parsing just the \ sequence,
@@ -14852,7 +15106,7 @@ redo_curchar:
                                  ))
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
-                          "flags=%#"UVxf"", (UV) *flagp);
+                          "flags=%#" UVxf, (UV) *flagp);
                  }
  
                  /* function call leaves parse pointing to the ']', except if we
@@ -14873,12 +15127,17 @@ redo_curchar:
                  goto done;
  
              case ')':
-                if (av_tindex(fence_stack) < 0) {
+                if (av_tindex_nomg(fence_stack) < 0) {
                      RExC_parse++;
                      vFAIL("Unexpected ')'");
                  }
  
-                 /* If at least two thing on the stack, treat this as an
+                /* If nothing after the fence, is missing an operand */
+                if (top_index - fence < 0) {
+                    RExC_parse++;
+                    goto bad_syntax;
+                }
+                /* If at least two things on the stack, treat this as an
                    * operator */
                  if (top_index - fence >= 1) {
                      goto join_operators;
@@ -15016,17 +15275,12 @@ redo_curchar:
                      {
                          SV* i = NULL;
                          SV* u = NULL;
-                        SV* element;
  
                          _invlist_union(lhs, rhs, &u);
                          _invlist_intersection(lhs, rhs, &i);
-                        /* _invlist_subtract will overwrite rhs
-                            without freeing what it already contains */
-                        element = rhs;
                          _invlist_subtract(u, i, &rhs);
                          SvREFCNT_dec_NN(i);
                          SvREFCNT_dec_NN(u);
-                        SvREFCNT_dec_NN(element);
                          break;
                      }
                  }
@@ -15065,10 +15319,11 @@ redo_curchar:
            handle_operand:
  
              /* Here 'current' is the operand.  If something is already on the
-             * stack, we have to check if it is a !. */
-            top_index = av_tindex(stack);   /* Code above may have altered the
-                                             * stack in the time since we
-                                             * earlier set 'top_index'. */
+             * stack, we have to check if it is a !.  But first, the code above
+             * may have altered the stack in the time since we earlier set
+             * 'top_index'.  */
+
+            top_index = av_tindex_nomg(stack);
              if (top_index - fence >= 0) {
                  /* If the top entry on the stack is an operator, it had better
                   * be a '!', otherwise the entry below the top operand should
@@ -15090,7 +15345,6 @@ redo_curchar:
  
                      only_to_avoid_leaks = av_pop(stack);
                      SvREFCNT_dec(only_to_avoid_leaks);
-                    top_index = av_tindex(stack);
  
                      /* And we redo with the inverted operand.  This allows
                       * handling multiple ! in a row */
@@ -15120,15 +15374,15 @@ redo_curchar:
      } /* End of loop parsing through the construct */
  
    done:
-    if (av_tindex(fence_stack) >= 0) {
+    if (av_tindex_nomg(fence_stack) >= 0) {
          vFAIL("Unmatched (");
      }
  
-    if (av_tindex(stack) < 0   /* Was empty */
+    if (av_tindex_nomg(stack) < 0   /* Was empty */
          || ((final = av_pop(stack)) == NULL)
          || ! IS_OPERAND(final)
          || SvTYPE(final) != SVt_INVLIST
-        || av_tindex(stack) >= 0)  /* More left on stack */
+        || av_tindex_nomg(stack) >= 0)  /* More left on stack */
      {
        bad_syntax:
          SvREFCNT_dec(final);
@@ -15148,10 +15402,10 @@ redo_curchar:
      result_string = newSVpvs("");
      while (invlist_iternext(final, &start, &end)) {
          if (start == end) {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}", start);
+            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start);
          }
          else {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}-\\x{%"UVXf"}",
+            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%" UVXf "}",
                                                       start,          end);
          }
      }
@@ -15181,7 +15435,7 @@ redo_curchar:
                      NULL
                  );
      if (!node)
-        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf,
+        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#" UVxf,
                      PTR2UV(flagp));
  
      /* Fix up the node type if we are in locale.  (We have pretended we are
@@ -15223,6 +15477,61 @@ redo_curchar:
      Set_Node_Length(node, RExC_parse - oregcomp_parse + 1); /* MJD */
      return node;
  }
+
+#ifdef ENABLE_REGEX_SETS_DEBUGGING
+
+STATIC void
+S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
+                             AV * stack, const IV fence, AV * fence_stack)
+{   /* Dumps the stacks in handle_regex_sets() */
+
+    const SSize_t stack_top = av_tindex_nomg(stack);
+    const SSize_t fence_stack_top = av_tindex_nomg(fence_stack);
+    SSize_t i;
+
+    PERL_ARGS_ASSERT_DUMP_REGEX_SETS_STRUCTURES;
+
+    PerlIO_printf(Perl_debug_log, "\nParse position is:%s\n", RExC_parse);
+
+    if (stack_top < 0) {
+        PerlIO_printf(Perl_debug_log, "Nothing on stack\n");
+    }
+    else {
+        PerlIO_printf(Perl_debug_log, "Stack: (fence=%d)\n", (int) fence);
+        for (i = stack_top; i >= 0; i--) {
+            SV ** element_ptr = av_fetch(stack, i, FALSE);
+            if (! element_ptr) {
+            }
+
+            if (IS_OPERATOR(*element_ptr)) {
+                PerlIO_printf(Perl_debug_log, "[%d]: %c\n",
+                                            (int) i, (int) SvIV(*element_ptr));
+            }
+            else {
+                PerlIO_printf(Perl_debug_log, "[%d] ", (int) i);
+                sv_dump(*element_ptr);
+            }
+        }
+    }
+
+    if (fence_stack_top < 0) {
+        PerlIO_printf(Perl_debug_log, "Nothing on fence_stack\n");
+    }
+    else {
+        PerlIO_printf(Perl_debug_log, "Fence_stack: \n");
+        for (i = fence_stack_top; i >= 0; i--) {
+            SV ** element_ptr = av_fetch(fence_stack, i, FALSE);
+            if (! element_ptr) {
+            }
+
+            PerlIO_printf(Perl_debug_log, "[%d]: %d\n",
+                                            (int) i, (int) SvIV(*element_ptr));
+        }
+    }
+}
+
+#endif
+
  #undef IS_OPERATOR
  #undef IS_OPERAND
  
@@ -15629,7 +15938,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      while (1) {
  
          if (   posix_warnings
-            && av_tindex(posix_warnings) >= 0
+            && av_tindex_nomg(posix_warnings) >= 0
              && RExC_parse > not_posix_region_end)
          {
              /* Warnings about posix class issues are considered tentative until
@@ -15685,7 +15994,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   * posix class, and it failed, it was a false alarm, as this
                   * successful one proves */
                  if (   posix_warnings
-                    && av_tindex(posix_warnings) >= 0
+                    && av_tindex_nomg(posix_warnings) >= 0
                      && not_posix_region_end >= RExC_parse
                      && not_posix_region_end <= posix_class_end)
                  {
@@ -15714,6 +16023,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
          else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
+
+            if (RExC_parse >= RExC_end) {
+                vFAIL("Unmatched [");
+            }
+
             if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
@@ -15881,8 +16195,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                       * discussed in commit
                       * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
                      name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
+                    SAVEFREEPV(name);
                      if (FOLD) {
                          lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
+
+                        /* The function call just below that uses this can fail
+                         * to return, leaking memory if we don't do this */
+                        SAVEFREEPV(lookup_name);
                      }
  
                      /* Look up the property name, and get its swash and
@@ -15898,9 +16217,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                               NULL, /* No inversion list */
                                               &swash_init_flags
                                              );
-                    if (lookup_name) {
-                        Safefree(lookup_name);
-                    }
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
@@ -15948,7 +16264,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              RExC_parse = e + 1;
  
                              /* diag_listed_as: Can't find Unicode property definition "%s" */
-                            vFAIL3utf8f("%s \"%"UTF8f"\"",
+                            vFAIL3utf8f("%s \"%" UTF8f "\"",
                                  msg, UTF8fARG(UTF, n, name));
                          }
  
@@ -15963,11 +16279,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                              pkgname,
                                                              name);
                                  n = strlen(full_name);
-                                Safefree(name);
                                  name = savepvn(full_name, n);
+                                SAVEFREEPV(name);
                              }
                          }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%" UTF8f "%s\n",
                                          (value == 'p' ? '+' : '!'),
                                          (FOLD) ? "__" : "",
                                          UTF8fARG(UTF, n, name),
@@ -16019,7 +16335,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              _invlist_union(properties, invlist, &properties);
                         }
                     }
-                   Safefree(name);
                 }
                 RExC_parse = e + 1;
                  namedclass = ANYOF_UNIPROP;  /* no official name, but it's
@@ -16053,9 +16368,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
                 }
                  non_portable_endpoint++;
-               if (IN_ENCODING && value < 0x100) {
-                   goto recode_encoding;
-               }
                 break;
             case 'x':
                 RExC_parse--;   /* function expects to be pointed at the 'x' */
@@ -16073,8 +16385,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
                 }
                  non_portable_endpoint++;
-               if (IN_ENCODING && value < 0x100)
-                   goto recode_encoding;
                 break;
             case 'c':
                 value = grok_bslash_c(*RExC_parse++, PASS2);
@@ -16107,23 +16417,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          }
                      }
                      non_portable_endpoint++;
-                   if (IN_ENCODING && value < 0x100)
-                       goto recode_encoding;
-                   break;
-               }
-             recode_encoding:
-               if (! RExC_override_recoding) {
-                   SV* enc = _get_encoding();
-                   value = reg_recode((U8)value, &enc);
-                   if (!enc) {
-                        if (strict) {
-                            vFAIL("Invalid escape in the specified encoding");
-                        }
-                        else if (PASS2) {
-                            ckWARNreg(RExC_parse,
-                                 "Invalid escape in the specified encoding");
-                        }
-                    }
                     break;
                 }
             default:
@@ -16160,13 +16453,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                    : 0;
                      if (strict) {
                          vFAIL2utf8f(
-                            "False [] range \"%"UTF8f"\"",
+                            "False [] range \"%" UTF8f "\"",
                              UTF8fARG(UTF, w, rangebegin));
                      }
                      else {
                          SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
                          ckWARN2reg(RExC_parse,
-                            "False [] range \"%"UTF8f"\"",
+                            "False [] range \"%" UTF8f "\"",
                              UTF8fARG(UTF, w, rangebegin));
                          (void)ReREFCNT_inc(RExC_rx_sv);
                          cp_list = add_cp_to_invlist(cp_list, '-');
@@ -16264,9 +16557,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              else if (! SIZE_ONLY) {
  
                  /* Here, not in pass1 (in that pass we skip calculating the
-                 * contents of this class), and is /l, or is a POSIX class for
-                 * which /l doesn't matter (or is a Unicode property, which is
-                 * skipped here). */
+                 * contents of this class), and is not /l, or is a POSIX class
+                 * for which /l doesn't matter (or is a Unicode property, which
+                 * is skipped here). */
                  if (namedclass >= ANYOF_POSIXL_MAX) {  /* If a special class */
                      if (namedclass != ANYOF_UNIPROP) { /* UNIPROP = \p and \P */
  
@@ -16291,9 +16584,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  &cp_list);
                      }
                  }
-                else if (UNI_SEMANTICS
+                else if (  UNI_SEMANTICS
                          || classnum == _CC_ASCII
-                        || (DEPENDS_SEMANTICS && (classnum == _CC_DIGIT
+                        || (DEPENDS_SEMANTICS && (   classnum == _CC_DIGIT
                                                    || classnum == _CC_XDIGIT)))
                  {
                      /* We usually have to worry about /d and /a affecting what
@@ -16354,7 +16647,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  #endif
                  w = RExC_parse - rangebegin;
                  vFAIL2utf8f(
-                    "Invalid [] range \"%"UTF8f"\"",
+                    "Invalid [] range \"%" UTF8f "\"",
                      UTF8fARG(UTF, w, rangebegin));
                  NOT_REACHED; /* NOTREACHED */
             }
@@ -16459,7 +16752,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                        foldbuf + foldlen);
                          SV* multi_fold = sv_2mortal(newSVpvs(""));
  
-                        Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
+                        Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%" UVXf "}", value);
  
                          multi_char_matches
                                          = add_multi_match(multi_char_matches,
@@ -16626,7 +16919,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      } /* End of loop through all the text within the brackets */
  
  
-    if (   posix_warnings && av_tindex(posix_warnings) >= 0) {
+    if (   posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
          output_or_return_posix_warnings(pRExC_state, posix_warnings,
                                          return_posix_warnings);
      }
@@ -16659,7 +16952,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  #endif
  
          /* Look at the longest folds first */
-        for (cp_count = av_tindex(multi_char_matches); cp_count > 0; cp_count--) {
+        for (cp_count = av_tindex_nomg(multi_char_matches);
+                        cp_count > 0;
+                        cp_count--)
+        {
  
              if (av_exists(multi_char_matches, cp_count)) {
                  AV** this_array_ptr;
@@ -17040,7 +17336,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      {
                          AV* list = (AV*) *listp;
                          IV k;
-                        for (k = 0; k <= av_tindex(list); k++) {
+                        for (k = 0; k <= av_tindex_nomg(list); k++) {
                              SV** c_p = av_fetch(list, k, FALSE);
                              UV c;
                              assert(c_p);
@@ -17087,76 +17383,152 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         SvREFCNT_dec_NN(cp_foldable_list);
      }
  
-    /* And combine the result (if any) with any inversion list from posix
+    /* And combine the result (if any) with any inversion lists from posix
       * classes.  The lists are kept separate up to now because we don't want to
       * fold the classes (folding of those is automatically handled by the swash
       * fetching code) */
-    if (simple_posixes) {
-        _invlist_union(cp_list, simple_posixes, &cp_list);
-        SvREFCNT_dec_NN(simple_posixes);
+    if (simple_posixes) {   /* These are the classes known to be unaffected by
+                               /a, /aa, and /d */
+        if (cp_list) {
+            _invlist_union(cp_list, simple_posixes, &cp_list);
+            SvREFCNT_dec_NN(simple_posixes);
+        }
+        else {
+            cp_list = simple_posixes;
+        }
      }
      if (posixes || nposixes) {
-        if (posixes && AT_LEAST_ASCII_RESTRICTED) {
+
+        /* We have to adjust /a and /aa */
+        if (AT_LEAST_ASCII_RESTRICTED) {
+
              /* Under /a and /aa, nothing above ASCII matches these */
-            _invlist_intersection(posixes,
-                                  PL_XPosix_ptrs[_CC_ASCII],
-                                  &posixes);
-        }
-        if (nposixes) {
-            if (DEPENDS_SEMANTICS) {
-                /* Under /d, everything in the upper half of the Latin1 range
-                 * matches these complements */
-                ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+            if (posixes) {
+                _invlist_intersection(posixes,
+                                    PL_XPosix_ptrs[_CC_ASCII],
+                                    &posixes);
              }
-            else if (AT_LEAST_ASCII_RESTRICTED) {
-                /* Under /a and /aa, everything above ASCII matches these
-                 * complements */
+
+            /* Under /a and /aa, everything above ASCII matches these
+             * complements */
+            if (nposixes) {
                  _invlist_union_complement_2nd(nposixes,
                                                PL_XPosix_ptrs[_CC_ASCII],
                                                &nposixes);
              }
-            if (posixes) {
-                _invlist_union(posixes, nposixes, &posixes);
-                SvREFCNT_dec_NN(nposixes);
-            }
-            else {
-                posixes = nposixes;
-            }
          }
+
          if (! DEPENDS_SEMANTICS) {
-            if (cp_list) {
-                _invlist_union(cp_list, posixes, &cp_list);
-                SvREFCNT_dec_NN(posixes);
+
+            /* For everything but /d, we can just add the current 'posixes' and
+             * 'nposixes' to the main list */
+            if (posixes) {
+                if (cp_list) {
+                    _invlist_union(cp_list, posixes, &cp_list);
+                    SvREFCNT_dec_NN(posixes);
+                }
+                else {
+                    cp_list = posixes;
+                }
              }
-            else {
-                cp_list = posixes;
+            if (nposixes) {
+                if (cp_list) {
+                    _invlist_union(cp_list, nposixes, &cp_list);
+                    SvREFCNT_dec_NN(nposixes);
+                }
+                else {
+                    cp_list = nposixes;
+                }
              }
          }
          else {
-            /* Under /d, we put into a separate list the Latin1 things that
-             * match only when the target string is utf8 */
-            SV* nonascii_but_latin1_properties = NULL;
-            _invlist_intersection(posixes, PL_UpperLatin1,
-                                  &nonascii_but_latin1_properties);
-            _invlist_subtract(posixes, nonascii_but_latin1_properties,
-                              &posixes);
-            if (cp_list) {
-                _invlist_union(cp_list, posixes, &cp_list);
-                SvREFCNT_dec_NN(posixes);
+            /* Under /d, things like \w match upper Latin1 characters only if
+             * the target string is in UTF-8.  But things like \W match all the
+             * upper Latin1 characters if the target string is not in UTF-8.
+             *
+             * Handle the case where there something like \W separately */
+            if (nposixes) {
+                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
+
+                /* A complemented posix class matches all upper Latin1
+                 * characters if not in UTF-8.  And it matches just certain
+                 * ones when in UTF-8.  That means those certain ones are
+                 * matched regardless, so can just be added to the
+                 * unconditional list */
+                if (cp_list) {
+                    _invlist_union(cp_list, nposixes, &cp_list);
+                    SvREFCNT_dec_NN(nposixes);
+                    nposixes = NULL;
+                }
+                else {
+                    cp_list = nposixes;
+                }
+
+                /* Likewise for 'posixes' */
+                _invlist_union(posixes, cp_list, &cp_list);
+
+                /* Likewise for anything else in the range that matched only
+                 * under UTF-8 */
+                if (has_upper_latin1_only_utf8_matches) {
+                    _invlist_union(cp_list,
+                                   has_upper_latin1_only_utf8_matches,
+                                   &cp_list);
+                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                    has_upper_latin1_only_utf8_matches = NULL;
+                }
+
+                /* If we don't match all the upper Latin1 characters regardless
+                 * of UTF-8ness, we have to set a flag to match the rest when
+                 * not in UTF-8 */
+                _invlist_subtract(only_non_utf8_list, cp_list,
+                                  &only_non_utf8_list);
+                if (_invlist_len(only_non_utf8_list) != 0) {
+                    ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+                }
              }
              else {
-                cp_list = posixes;
-            }
-
-            if (has_upper_latin1_only_utf8_matches) {
+                /* Here there were no complemented posix classes.  That means
+                 * the upper Latin1 characters in 'posixes' match only when the
+                 * target string is in UTF-8.  So we have to add them to the
+                 * list of those types of code points, while adding the
+                 * remainder to the unconditional list.
+                 *
+                 * First calculate what they are */
+                SV* nonascii_but_latin1_properties = NULL;
+                _invlist_intersection(posixes, PL_UpperLatin1,
+                                      &nonascii_but_latin1_properties);
+
+                /* And add them to the final list of such characters. */
                  _invlist_union(has_upper_latin1_only_utf8_matches,
                                 nonascii_but_latin1_properties,
                                 &has_upper_latin1_only_utf8_matches);
-                SvREFCNT_dec_NN(nonascii_but_latin1_properties);
-            }
-            else {
-                has_upper_latin1_only_utf8_matches
-                                            = nonascii_but_latin1_properties;
+
+                /* Remove them from what now becomes the unconditional list */
+                _invlist_subtract(posixes, nonascii_but_latin1_properties,
+                                  &posixes);
+
+                /* And add those unconditional ones to the final list */
+                if (cp_list) {
+                    _invlist_union(cp_list, posixes, &cp_list);
+                    SvREFCNT_dec_NN(posixes);
+                    posixes = NULL;
+                }
+                else {
+                    cp_list = posixes;
+                }
+
+                SvREFCNT_dec(nonascii_but_latin1_properties);
+
+                /* Get rid of any characters that we now know are matched
+                 * unconditionally from the conditional list, which may make
+                 * that list empty */
+                _invlist_subtract(has_upper_latin1_only_utf8_matches,
+                                  cp_list,
+                                  &has_upper_latin1_only_utf8_matches);
+                if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
+                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                    has_upper_latin1_only_utf8_matches = NULL;
+                }
              }
          }
      }
@@ -17246,79 +17618,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              invlist_iterfinish(cp_list);
          }
      }
-
-#define MATCHES_ALL_NON_UTF8_NON_ASCII(ret)                                 \
-    (   DEPENDS_SEMANTICS                                                   \
-     && (ANYOF_FLAGS(ret)                                                   \
-        & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
-
-    /* See if we can simplify things under /d */
-    if (   has_upper_latin1_only_utf8_matches
-        || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
+    else if (   DEPENDS_SEMANTICS
+             && (    has_upper_latin1_only_utf8_matches
+                 || (ANYOF_FLAGS(ret) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
      {
-        /* But not if we are inverting, as that screws it up */
-        if (! invert) {
-            if (has_upper_latin1_only_utf8_matches) {
-                if (MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
-
-                    /* Here, we have both the flag and inversion list.  Any
-                     * character in 'has_upper_latin1_only_utf8_matches'
-                     * matches when UTF-8 is in effect, but it also matches
-                     * when UTF-8 is not in effect because of
-                     * MATCHES_ALL_NON_UTF8_NON_ASCII.  Therefore it matches
-                     * unconditionally, so can be added to the regular list,
-                     * and 'has_upper_latin1_only_utf8_matches' cleared */
-                    _invlist_union(cp_list,
-                                   has_upper_latin1_only_utf8_matches,
-                                   &cp_list);
-                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
-                    has_upper_latin1_only_utf8_matches = NULL;
-                }
-                else if (cp_list) {
-
-                    /* Here, 'cp_list' gives chars that always match, and
-                     * 'has_upper_latin1_only_utf8_matches' gives chars that
-                     * were specified to match only if the target string is in
-                     * UTF-8.  It may be that these overlap, so we can subtract
-                     * the unconditionally matching from the conditional ones,
-                     * to make the conditional list as small as possible,
-                     * perhaps even clearing it, in which case more
-                     * optimizations are possible later */
-                    _invlist_subtract(has_upper_latin1_only_utf8_matches,
-                                      cp_list,
-                                      &has_upper_latin1_only_utf8_matches);
-                    if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
-                        SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
-                        has_upper_latin1_only_utf8_matches = NULL;
-                    }
-                }
-            }
-
-            /* Similarly, if the unconditional matches include every upper
-             * latin1 character, we can clear that flag to permit later
-             * optimizations */
-            if (cp_list && MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
-                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
-                _invlist_subtract(only_non_utf8_list, cp_list,
-                                  &only_non_utf8_list);
-                if (_invlist_len(only_non_utf8_list) == 0) {
-                    ANYOF_FLAGS(ret) &= ~ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
-                }
-                SvREFCNT_dec_NN(only_non_utf8_list);
-                only_non_utf8_list = NULL;;
-            }
-        }
-
-        /* If we haven't gotten rid of all conditional matching, we change the
-         * regnode type to indicate that */
-        if (   has_upper_latin1_only_utf8_matches
-            || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
-        {
-            OP(ret) = ANYOFD;
-            optimizable = FALSE;
-        }
+        OP(ret) = ANYOFD;
+        optimizable = FALSE;
      }
-#undef MATCHES_ALL_NON_UTF8_NON_ASCII
+
  
      /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
       * at compile time.  Besides not inverting folded locale now, we can't
@@ -17720,7 +18027,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
             si = *ary;  /* ary[0] = the string to initialize the swash with */
  
-            if (av_tindex(av) >= 2) {
+            if (av_tindex_nomg(av) >= 2) {
                  if (only_utf8_locale_ptr
                      && ary[2]
                      && ary[2] != &PL_sv_undef)
@@ -17736,7 +18043,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                   * is any inversion list generated at compile time; [4]
                   * indicates if that inversion list has any user-defined
                   * properties in it. */
-                if (av_tindex(av) >= 3) {
+                if (av_tindex_nomg(av) >= 3) {
                      invlist = ary[3];
                      if (SvUV(ary[4])) {
                          swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
@@ -18016,7 +18323,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
          RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
  
          skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                                FALSE /* Don't assume /x */ );
+                                FALSE /* Don't force /x */ );
      }
  }
  
@@ -18049,7 +18356,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
  #else
      if (RExC_offsets) {         /* MJD */
         MJD_OFFSET_DEBUG(
-              ("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n",
+              ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
                name, __LINE__,
                PL_reg_name[op],
                (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0]
@@ -18146,15 +18453,23 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
         RExC_size += size;
         return;
      }
-
+    assert(!RExC_study_started); /* I believe we should never use reginsert once we have started
+                                    studying. If this is wrong then we need to adjust RExC_recurse
+                                    below like we do with RExC_open_parens/RExC_close_parens. */
      src = RExC_emit;
      RExC_emit += size;
      dst = RExC_emit;
      if (RExC_open_parens) {
          int paren;
-        /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/
+        /*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/
+        /* remember that RExC_npar is rex->nparens + 1,
+         * iow it is 1 more than the number of parens seen in
+         * the pattern so far. */
          for ( paren=0 ; paren < RExC_npar ; paren++ ) {
-            if ( RExC_open_parens[paren] >= opnd ) {
+            /* note, RExC_open_parens[0] is the start of the
+             * regex, it can't move. RExC_close_parens[0] is the end
+             * of the regex, it *can* move. */
+            if ( paren && RExC_open_parens[paren] >= opnd ) {
                  /*DEBUG_PARSE_FMT("open"," - %d",size);*/
                  RExC_open_parens[paren] += size;
              } else {
@@ -18168,13 +18483,15 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
              }
          }
      }
+    if (RExC_end_op)
+        RExC_end_op += size;
  
      while (src > opnd) {
         StructCopy(--src, --dst, regnode);
  #ifdef RE_TRACK_PATTERN_OFFSETS
          if (RExC_offsets) {     /* MJD 20010112 */
             MJD_OFFSET_DEBUG(
-                 ("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n",
+                 ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n",
                    "reg_insert",
                   __LINE__,
                   PL_reg_name[op],
@@ -18194,7 +18511,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
  #ifdef RE_TRACK_PATTERN_OFFSETS
      if (RExC_offsets) {         /* MJD */
         MJD_OFFSET_DEBUG(
-              ("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
+              ("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n",
                "reginsert",
               __LINE__,
               PL_reg_name[op],
@@ -18240,7 +18557,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
          DEBUG_PARSE_r({
              DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
              regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
-            PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n",
+            Perl_re_printf( aTHX_  "~ %s (%d) %s %s\n",
                  SvPV_nolen_const(RExC_mysv), REG_NODE_NUM(scan),
                      (temp == NULL ? "->" : ""),
                      (temp == NULL ? PL_reg_name[OP(val)] : "")
@@ -18330,7 +18647,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
          DEBUG_PARSE_r({
              DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
              regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
-            PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n",
+            Perl_re_printf( aTHX_  "~ %s (%d) -> %s\n",
                  SvPV_nolen_const(RExC_mysv),
                  REG_NODE_NUM(scan),
                  PL_reg_name[exact]);
@@ -18342,8 +18659,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
      DEBUG_PARSE_r({
          DEBUG_PARSE_MSG("");
          regprop(RExC_rx, RExC_mysv, val, NULL, pRExC_state);
-        PerlIO_printf(Perl_debug_log,
-                      "~ attach to %s (%"IVdf") offset to %"IVdf"\n",
+        Perl_re_printf( aTHX_
+                      "~ attach to %s (%" IVdf ") offset to %" IVdf "\n",
                       SvPV_nolen_const(RExC_mysv),
                       (IV)REG_NODE_NUM(val),
                       (IV)(val - scan)
@@ -18376,15 +18693,15 @@ S_regdump_intflags(pTHX_ const char *lead, const U32 flags)
      for (bit=0; bit<REG_INTFLAGS_NAME_SIZE; bit++) {
          if (flags & (1<<bit)) {
              if (!set++ && lead)
-                PerlIO_printf(Perl_debug_log, "%s",lead);
-            PerlIO_printf(Perl_debug_log, "%s ",PL_reg_intflags_name[bit]);
+                Perl_re_printf( aTHX_  "%s",lead);
+            Perl_re_printf( aTHX_  "%s ",PL_reg_intflags_name[bit]);
          }
      }
      if (lead)  {
          if (set)
-            PerlIO_printf(Perl_debug_log, "\n");
+            Perl_re_printf( aTHX_  "\n");
          else
-            PerlIO_printf(Perl_debug_log, "%s[none-set]\n",lead);
+            Perl_re_printf( aTHX_  "%s[none-set]\n",lead);
      }
  }
  
@@ -18403,37 +18720,37 @@ S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
                 continue;
             }
              if (!set++ && lead)
-                PerlIO_printf(Perl_debug_log, "%s",lead);
-            PerlIO_printf(Perl_debug_log, "%s ",PL_reg_extflags_name[bit]);
+                Perl_re_printf( aTHX_  "%s",lead);
+            Perl_re_printf( aTHX_  "%s ",PL_reg_extflags_name[bit]);
          }
      }
      if ((cs = get_regex_charset(flags)) != REGEX_DEPENDS_CHARSET) {
              if (!set++ && lead) {
-                PerlIO_printf(Perl_debug_log, "%s",lead);
+                Perl_re_printf( aTHX_  "%s",lead);
              }
              switch (cs) {
                  case REGEX_UNICODE_CHARSET:
-                    PerlIO_printf(Perl_debug_log, "UNICODE");
+                    Perl_re_printf( aTHX_  "UNICODE");
                      break;
                  case REGEX_LOCALE_CHARSET:
-                    PerlIO_printf(Perl_debug_log, "LOCALE");
+                    Perl_re_printf( aTHX_  "LOCALE");
                      break;
                  case REGEX_ASCII_RESTRICTED_CHARSET:
-                    PerlIO_printf(Perl_debug_log, "ASCII-RESTRICTED");
+                    Perl_re_printf( aTHX_  "ASCII-RESTRICTED");
                      break;
                  case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                    PerlIO_printf(Perl_debug_log, "ASCII-MORE_RESTRICTED");
+                    Perl_re_printf( aTHX_  "ASCII-MORE_RESTRICTED");
                      break;
                  default:
-                    PerlIO_printf(Perl_debug_log, "UNKNOWN CHARACTER SET");
+                    Perl_re_printf( aTHX_  "UNKNOWN CHARACTER SET");
                      break;
              }
      }
      if (lead)  {
          if (set)
-            PerlIO_printf(Perl_debug_log, "\n");
+            Perl_re_printf( aTHX_  "\n");
          else
-            PerlIO_printf(Perl_debug_log, "%s[none-set]\n",lead);
+            Perl_re_printf( aTHX_  "%s[none-set]\n",lead);
      }
  }
  #endif
@@ -18455,70 +18772,70 @@ Perl_regdump(pTHX_ const regexp *r)
      if (r->anchored_substr) {
         RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr),
             RE_SV_DUMPLEN(r->anchored_substr), 30);
-       PerlIO_printf(Perl_debug_log,
-                     "anchored %s%s at %"IVdf" ",
+        Perl_re_printf( aTHX_
+                     "anchored %s%s at %" IVdf " ",
                       s, RE_SV_TAIL(r->anchored_substr),
                       (IV)r->anchored_offset);
      } else if (r->anchored_utf8) {
         RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8),
             RE_SV_DUMPLEN(r->anchored_utf8), 30);
-       PerlIO_printf(Perl_debug_log,
-                     "anchored utf8 %s%s at %"IVdf" ",
+        Perl_re_printf( aTHX_
+                     "anchored utf8 %s%s at %" IVdf " ",
                       s, RE_SV_TAIL(r->anchored_utf8),
                       (IV)r->anchored_offset);
      }
      if (r->float_substr) {
         RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr),
             RE_SV_DUMPLEN(r->float_substr), 30);
-       PerlIO_printf(Perl_debug_log,
-                     "floating %s%s at %"IVdf"..%"UVuf" ",
+        Perl_re_printf( aTHX_
+                     "floating %s%s at %" IVdf "..%" UVuf " ",
                       s, RE_SV_TAIL(r->float_substr),
                       (IV)r->float_min_offset, (UV)r->float_max_offset);
      } else if (r->float_utf8) {
         RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8),
             RE_SV_DUMPLEN(r->float_utf8), 30);
-       PerlIO_printf(Perl_debug_log,
-                     "floating utf8 %s%s at %"IVdf"..%"UVuf" ",
+        Perl_re_printf( aTHX_
+                     "floating utf8 %s%s at %" IVdf "..%" UVuf " ",
                       s, RE_SV_TAIL(r->float_utf8),
                       (IV)r->float_min_offset, (UV)r->float_max_offset);
      }
      if (r->check_substr || r->check_utf8)
-       PerlIO_printf(Perl_debug_log,
+        Perl_re_printf( aTHX_
                       (const char *)
                       (r->check_substr == r->float_substr
                        && r->check_utf8 == r->float_utf8
                        ? "(checking floating" : "(checking anchored"));
      if (r->intflags & PREGf_NOSCAN)
-       PerlIO_printf(Perl_debug_log, " noscan");
+        Perl_re_printf( aTHX_  " noscan");
      if (r->extflags & RXf_CHECK_ALL)
-       PerlIO_printf(Perl_debug_log, " isall");
+        Perl_re_printf( aTHX_  " isall");
      if (r->check_substr || r->check_utf8)
-       PerlIO_printf(Perl_debug_log, ") ");
+        Perl_re_printf( aTHX_  ") ");
  
      if (ri->regstclass) {
          regprop(r, sv, ri->regstclass, NULL, NULL);
-       PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv));
+        Perl_re_printf( aTHX_  "stclass %s ", SvPVX_const(sv));
      }
      if (r->intflags & PREGf_ANCH) {
-       PerlIO_printf(Perl_debug_log, "anchored");
+        Perl_re_printf( aTHX_  "anchored");
          if (r->intflags & PREGf_ANCH_MBOL)
-           PerlIO_printf(Perl_debug_log, "(MBOL)");
+            Perl_re_printf( aTHX_  "(MBOL)");
          if (r->intflags & PREGf_ANCH_SBOL)
-           PerlIO_printf(Perl_debug_log, "(SBOL)");
+            Perl_re_printf( aTHX_  "(SBOL)");
          if (r->intflags & PREGf_ANCH_GPOS)
-           PerlIO_printf(Perl_debug_log, "(GPOS)");
-       (void)PerlIO_putc(Perl_debug_log, ' ');
+            Perl_re_printf( aTHX_  "(GPOS)");
+        Perl_re_printf( aTHX_ " ");
      }
      if (r->intflags & PREGf_GPOS_SEEN)
-       PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", (UV)r->gofs);
+        Perl_re_printf( aTHX_  "GPOS:%" UVuf " ", (UV)r->gofs);
      if (r->intflags & PREGf_SKIP)
-       PerlIO_printf(Perl_debug_log, "plus ");
+        Perl_re_printf( aTHX_  "plus ");
      if (r->intflags & PREGf_IMPLICIT)
-       PerlIO_printf(Perl_debug_log, "implicit ");
-    PerlIO_printf(Perl_debug_log, "minlen %"IVdf" ", (IV)r->minlen);
+        Perl_re_printf( aTHX_  "implicit ");
+    Perl_re_printf( aTHX_  "minlen %" IVdf " ", (IV)r->minlen);
      if (r->extflags & RXf_EVAL_SEEN)
-       PerlIO_printf(Perl_debug_log, "with eval ");
-    PerlIO_printf(Perl_debug_log, "\n");
+        Perl_re_printf( aTHX_  "with eval ");
+    Perl_re_printf( aTHX_  "\n");
      DEBUG_FLAGS_r({
          regdump_extflags("r->extflags: ",r->extflags);
          regdump_intflags("r->intflags: ",r->intflags);
@@ -18591,7 +18908,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
      PERL_ARGS_ASSERT_REGPROP;
  
-    sv_setpvn(sv, "", 0);
+    SvPVCLEAR(sv);
  
      if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
         /* It would be nice to FAIL() here, but this may be called from
@@ -18627,9 +18944,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
             = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
  
          Perl_sv_catpvf(aTHX_ sv, "-%s",PL_reg_name[o->flags]);
-        DEBUG_TRIE_COMPILE_r(
+        DEBUG_TRIE_COMPILE_r({
+          if (trie->jump)
+            sv_catpvs(sv, "(JUMP)");
            Perl_sv_catpvf(aTHX_ sv,
-            "<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
+            "<S:%" UVuf "/%" IVdf " W:%" UVuf " L:%" UVuf "/%" UVuf " C:%" UVuf "/%" UVuf ">",
              (UV)trie->startstate,
              (IV)trie->statecount-1, /* -1 because of the unused 0 element */
              (UV)trie->wordcount,
@@ -18638,7 +18957,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              (UV)TRIE_CHARCOUNT(trie),
              (UV)trie->uniquecharcount
            );
-        );
+        });
          if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
              sv_catpvs(sv, "[");
              (void) put_charclass_bitmap_innards(sv,
@@ -18647,11 +18966,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                                   : TRIE_BITMAP(trie)),
                                                  NULL,
                                                  NULL,
-                                                NULL
+                                                NULL,
+                                                FALSE
                                                 );
              sv_catpvs(sv, "]");
          }
-
      } else if (k == CURLY) {
          U32 lo = ARG1(o), hi = ARG2(o);
         if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
@@ -18670,7 +18989,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      {
          AV *name_list= NULL;
          U32 parno= OP(o) == ACCEPT ? (U32)ARG2L(o) : ARG(o);
-        Perl_sv_catpvf(aTHX_ sv, "%"UVuf, (UV)parno);        /* Parenth number */
+        Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno);        /* Parenth number */
         if ( RXp_PAREN_NAMES(prog) ) {
              name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
          } else if ( pRExC_state ) {
@@ -18680,7 +18999,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              if ( k != REF || (OP(o) < NREF)) {
                  SV **name= av_fetch(name_list, parno, 0 );
                 if (name)
-                   Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                   Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
              }
              else {
                  SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
@@ -18689,10 +19008,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                  I32 n;
                  if (name) {
                      for ( n=0; n<SvIVX(sv_dat); n++ ) {
-                        Perl_sv_catpvf(aTHX_ sv, "%s%"IVdf,
+                        Perl_sv_catpvf(aTHX_ sv, "%s%" IVdf,
                                     (n ? "," : ""), (IV)nums[n]);
                      }
-                    Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
                  }
              }
          }
@@ -18719,11 +19038,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          }
  
          /* Paren and offset */
-       Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o));
+        Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o),
+                (int)((o + (int)ARG2L(o)) - progi->program) );
          if (name_list) {
              SV **name= av_fetch(name_list, ARG(o), 0 );
              if (name)
-                Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
          }
      }
      else if (k == LOGICAL)
@@ -18745,6 +19065,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* And things that aren't in the bitmap, but are small enough to be */
          SV* bitmap_range_not_in_bitmap = NULL;
  
+        const bool inverted = flags & ANYOF_INVERT;
+
         if (OP(o) == ANYOFL) {
              if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
                  sv_catpvs(sv, "{utf8-locale-reqd}");
@@ -18789,21 +19111,37 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                                ANYOF_BITMAP(o),
                                                bitmap_range_not_in_bitmap,
                                                only_utf8_locale_invlist,
-                                              o);
+                                              o,
+
+                                              /* Can't try inverting for a
+                                               * better display if there are
+                                               * things that haven't been
+                                               * resolved */
+                                              unresolved != NULL);
          SvREFCNT_dec(bitmap_range_not_in_bitmap);
  
          /* If there are user-defined properties which haven't been defined yet,
-         * output them, in a separate [] from the bitmap range stuff */
+         * output them.  If the result is not to be inverted, it is clearest to
+         * output them in a separate [] from the bitmap range stuff.  If the
+         * result is to be complemented, we have to show everything in one [],
+         * as the inversion applies to the whole thing.  Use {braces} to
+         * separate them from anything in the bitmap and anything above the
+         * bitmap. */
          if (unresolved) {
-            if (do_sep) {
-                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
+            if (inverted) {
+                if (! do_sep) { /* If didn't output anything in the bitmap */
+                    sv_catpvs(sv, "^");
+                }
+                sv_catpvs(sv, "{");
              }
-            if (flags & ANYOF_INVERT) {
-                sv_catpvs(sv, "^");
+            else if (do_sep) {
+                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
              }
              sv_catsv(sv, unresolved);
-            do_sep = TRUE;
-            SvREFCNT_dec_NN(unresolved);
+            if (inverted) {
+                sv_catpvs(sv, "}");
+            }
+            do_sep = ! inverted;
          }
  
          /* And, finally, add the above-the-bitmap stuff */
@@ -18820,9 +19158,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                  Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
              }
  
-            /* And, for easy of understanding, it is always output not-shown as
-             * complemented */
-            if (flags & ANYOF_INVERT) {
+            /* And, for easy of understanding, it is shown in the
+             * uncomplemented form if possible.  The one exception being if
+             * there are unresolved items, where the inversion has to be
+             * delayed until runtime */
+            if (inverted && ! unresolved) {
                  _invlist_invert(nonbitmap_invlist);
                  _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
              }
@@ -18859,6 +19199,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
          /* And finally the matching, closing ']' */
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        SvREFCNT_dec(unresolved);
      }
      else if (k == POSIXD || k == NPOSIXD) {
          U8 index = FLAGS(o) * 2;
@@ -18894,7 +19236,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
      /* add on the verb argument if there is one */
      if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) {
-        Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
+        Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
                         SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
      }
  #else
@@ -18924,7 +19266,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                       ? prog->check_utf8 : prog->check_substr);
  
             if (!PL_colorset) reginitcolors();
-           PerlIO_printf(Perl_debug_log,
+            Perl_re_printf( aTHX_
                       "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
                       PL_colors[4],
                       RX_UTF8(r) ? "utf8 " : "",
@@ -18985,6 +19327,8 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
  #endif
      Safefree(r->offs);
      SvREFCNT_dec(r->qr_anoncv);
+    if (r->recurse_locinput)
+        Safefree(r->recurse_locinput);
      rx->sv_u.svu_rx = 0;
  }
  
@@ -19068,6 +19412,8 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
  #endif
      ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
      SvREFCNT_inc_void(ret->qr_anoncv);
+    if (r->recurse_locinput)
+        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
  
      return ret_x;
  }
@@ -19101,7 +19447,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
             SV *dsv= sv_newmortal();
              RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
                  dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60);
-            PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
+            Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n",
                  PL_colors[4],PL_colors[5],s);
          }
      });
@@ -19206,7 +19552,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
  #define SAVEPVN(p,n)   ((p) ? savepvn(p,n) : NULL)
  
  /*
-   re_dup - duplicate a regexp.
+   re_dup_guts - duplicate a regexp.
  
     This routine is expected to clone a given regexp structure. It is only
     compiled under USE_ITHREADS.
@@ -19274,6 +19620,8 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
  
      RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
      ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
+    if (r->recurse_locinput)
+        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
  
      if (ret->pprivate)
         RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -19328,6 +19676,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
            char, regexp_internal);
      Copy(ri->program, reti->program, len+1, regnode);
  
+
      reti->num_code_blocks = ri->num_code_blocks;
      if (ri->code_blocks) {
         int n;
@@ -19388,7 +19737,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                 d->data[i] = ri->data->data[i];
                 break;
              default:
-               Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'",
+                Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'",
                                                             ri->data->what[i]);
             }
         }
@@ -19468,7 +19817,7 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
         l1 = 512;
      Copy(message, buf, l1 , char);
      /* l1-1 to avoid \n */
-    Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
+    Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, l1-1, buf));
  }
  
  /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
@@ -19520,7 +19869,7 @@ S_put_code_point(pTHX_ SV *sv, UV c)
      PERL_ARGS_ASSERT_PUT_CODE_POINT;
  
      if (c > 255) {
-        Perl_sv_catpvf(aTHX_ sv, "\\x{%04"UVXf"}", c);
+        Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c);
      }
      else if (isPRINT(c)) {
         const char string = (char) c;
@@ -19665,30 +20014,36 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
           * mnemonic names.  Split off any of those at the beginning and end of
           * the range to print mnemonically.  It isn't possible for many of
           * these to be in a row, so this won't overwhelm with output */
-        while (isMNEMONIC_CNTRL(start) && start <= end) {
-            put_code_point(sv, start);
-            start++;
-        }
-        if (start < end && isMNEMONIC_CNTRL(end)) {
-
-            /* Here, the final character in the range has a mnemonic name.
-             * Work backwards from the end to find the final non-mnemonic */
-            UV temp_end = end - 1;
-            while (isMNEMONIC_CNTRL(temp_end)) {
-                temp_end--;
+        if (   start <= end
+            && (isMNEMONIC_CNTRL(start) || isMNEMONIC_CNTRL(end)))
+        {
+            while (isMNEMONIC_CNTRL(start) && start <= end) {
+                put_code_point(sv, start);
+                start++;
              }
  
-            /* And separately output the interior range that doesn't start or
-             * end with mnemonics */
-            put_range(sv, start, temp_end, FALSE);
+            /* If this didn't take care of the whole range ... */
+            if (start <= end) {
  
-            /* Then output the mnemonic trailing controls */
-            start = temp_end + 1;
-            while (start <= end) {
-                put_code_point(sv, start);
-                start++;
+                /* Look backwards from the end to find the final non-mnemonic
+                 * */
+                UV temp_end = end;
+                while (isMNEMONIC_CNTRL(temp_end)) {
+                    temp_end--;
+                }
+
+                /* And separately output the interior range that doesn't start
+                 * or end with mnemonics */
+                put_range(sv, start, temp_end, FALSE);
+
+                /* Then output the mnemonic trailing controls */
+                start = temp_end + 1;
+                while (start <= end) {
+                    put_code_point(sv, start);
+                    start++;
+                }
+                break;
              }
-            break;
          }
  
          /* As a final resort, output the range or subrange as hex. */
@@ -19698,10 +20053,10 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                      : NUM_ANYOF_CODE_POINTS - 1;
  #if NUM_ANYOF_CODE_POINTS > 256
          format = (this_end < 256)
-                 ? "\\x%02"UVXf"-\\x%02"UVXf""
-                 : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
+                 ? "\\x%02" UVXf "-\\x%02" UVXf
+                 : "\\x{%04" UVXf "}-\\x{%04" UVXf "}";
  #else
-        format = "\\x%02"UVXf"-\\x%02"UVXf"";
+        format = "\\x%02" UVXf "-\\x%02" UVXf;
  #endif
          GCC_DIAG_IGNORE(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
@@ -19776,7 +20131,9 @@ S_put_charclass_bitmap_innards_common(pTHX_
  )
  {
      /* Create and return an SV containing a displayable version of the bitmap
-     * and associated information determined by the input parameters. */
+     * and associated information determined by the input parameters.  If the
+     * output would have been only the inversion indicator '^', NULL is instead
+     * returned. */
  
      SV * output;
  
@@ -19835,9 +20192,8 @@ S_put_charclass_bitmap_innards_common(pTHX_
          }
      }
  
-    /* If the only thing we output is the '^', clear it */
      if (invert && SvCUR(output) == 1) {
-        SvCUR_set(output, 0);
+        return NULL;
      }
  
      return output;
@@ -19848,7 +20204,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
                                       char *bitmap,
                                       SV *nonbitmap_invlist,
                                       SV *only_utf8_locale_invlist,
-                                     const regnode * const node)
+                                     const regnode * const node,
+                                     const bool force_as_is_display)
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
       * character class defined by the other arguments:
@@ -19864,13 +20221,16 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       *  'node' is the regex pattern node.  It is needed only when the above two
       *      parameters are not null, and is passed so that this routine can
       *      tease apart the various reasons for them.
+     *  'force_as_is_display' is TRUE if this routine should definitely NOT try
+     *      to invert things to see if that leads to a cleaner display.  If
+     *      FALSE, this routine is free to use its judgment about doing this.
       *
       * It returns TRUE if there was actually something output.  (It may be that
       * the bitmap, etc is empty.)
       *
       * When called for outputting the bitmap of a non-ANYOF node, just pass the
-     * bitmap, with the succeeding parameters set to NULL.
-     *
+     * bitmap, with the succeeding parameters set to NULL, and the final one to
+     * FALSE.
       */
  
      /* In general, it tries to display the 'cleanest' representation of the
@@ -19878,7 +20238,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       * whether the class itself is to be inverted.  However,  there are some
       * cases where it can't try inverting, as what actually matches isn't known
       * until runtime, and hence the inversion isn't either. */
-    bool inverting_allowed = TRUE;
+    bool inverting_allowed = ! force_as_is_display;
  
      int i;
      STRLEN orig_sv_cur = SvCUR(sv);
@@ -19894,7 +20254,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
                                         is UTF-8 */
  
      SV* as_is_display;      /* The output string when we take the inputs
-                              literally */
+                               literally */
      SV* inverted_display;   /* The output string when we invert the inputs */
  
      U8 flags = (node) ? ANYOF_FLAGS(node) : 0;
@@ -19932,14 +20292,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
              /* And this flag for matching all non-ASCII 0xFF and below */
              if (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
              {
-                if (invert) {
-                    not_utf8 = _new_invlist(0);
-                }
-                else {
-                    not_utf8 = invlist_clone(PL_UpperLatin1);
-                }
-                inverting_allowed = FALSE;  /* XXX needs more work to be able
-                                               to allow this */
+                not_utf8 = invlist_clone(PL_UpperLatin1);
              }
          }
          else if (OP(node) == ANYOFL) {
@@ -19969,7 +20322,11 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      /* Accumulate the bit map into the unconditional match list */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (BITMAP_TEST(bitmap, i)) {
-            invlist = add_cp_to_invlist(invlist, i);
+            int start = i++;
+            for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
          }
      }
  
@@ -20010,7 +20367,10 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  
      /* If have to take the output as-is, just do that */
      if (! inverting_allowed) {
-        sv_catsv(sv, as_is_display);
+        if (as_is_display) {
+            sv_catsv(sv, as_is_display);
+            SvREFCNT_dec_NN(as_is_display);
+        }
      }
      else { /* But otherwise, create the output again on the inverted input, and
                use whichever version is shorter */
@@ -20037,6 +20397,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
           * conditional code points, so that when inverted, they will be gone
           * from it */
          _invlist_union(only_utf8, invlist, &invlist);
+        _invlist_union(not_utf8, invlist, &invlist);
          _invlist_union(only_utf8_locale, invlist, &invlist);
          _invlist_invert(invlist);
          _invlist_intersection(invlist, PL_InBitmap, &invlist);
@@ -20045,10 +20406,13 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
              _invlist_invert(only_utf8);
              _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
          }
+        else if (not_utf8) {
  
-        if (not_utf8) {
-            _invlist_invert(not_utf8);
-            _invlist_intersection(not_utf8, PL_UpperLatin1, &not_utf8);
+            /* If a code point matches iff the target string is not in UTF-8,
+             * then complementing the result has it not match iff not in UTF-8,
+             * which is the same thing as matching iff it is UTF-8. */
+            only_utf8 = not_utf8;
+            not_utf8 = NULL;
          }
  
          if (only_utf8_locale) {
@@ -20067,17 +20431,19 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  
          /* Use the shortest representation, taking into account our bias
           * against showing it inverted */
-        if (SvCUR(inverted_display) + inverted_bias
-            < SvCUR(as_is_display) + as_is_bias)
+        if (   inverted_display
+            && (   ! as_is_display
+                || (  SvCUR(inverted_display) + inverted_bias
+                    < SvCUR(as_is_display)    + as_is_bias)))
          {
             sv_catsv(sv, inverted_display);
          }
-        else {
+        else if (as_is_display) {
             sv_catsv(sv, as_is_display);
          }
  
-        SvREFCNT_dec_NN(as_is_display);
-        SvREFCNT_dec_NN(inverted_display);
+        SvREFCNT_dec(as_is_display);
+        SvREFCNT_dec(inverted_display);
      }
  
      SvREFCNT_dec_NN(invlist);
@@ -20089,11 +20455,11 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      return SvCUR(sv) > orig_sv_cur;
  }
  
-#define CLEAR_OPTSTART \
+#define CLEAR_OPTSTART                                                       \
      if (optstart) STMT_START {                                               \
-        DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log,                       \
-                              " (%"IVdf" nodes)\n", (IV)(node - optstart))); \
-       optstart=NULL;                                                       \
+        DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_                                           \
+                              " (%" IVdf " nodes)\n", (IV)(node - optstart))); \
+        optstart=NULL;                                                       \
      } STMT_END
  
  #define DUMPUNTIL(b,e)                                                       \
@@ -20115,7 +20481,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
      PERL_ARGS_ASSERT_DUMPUNTIL;
  
  #ifdef DEBUG_DUMPUNTIL
-    PerlIO_printf(Perl_debug_log, "--- %d : %d - %d - %d\n",indent,node-start,
+    Perl_re_printf( aTHX_  "--- %d : %d - %d - %d\n",indent,node-start,
          last ? last-start : 0,plast ? plast-start : 0);
  #endif
  
@@ -20141,18 +20507,18 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             CLEAR_OPTSTART;
  
          regprop(r, sv, node, NULL, NULL);
-       PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
+        Perl_re_printf( aTHX_  "%4" IVdf ":%*s%s", (IV)(node - start),
                       (int)(2*indent + 1), "", SvPVX_const(sv));
  
          if (OP(node) != OPTIMIZED) {
              if (next == NULL)          /* Next ptr. */
-                PerlIO_printf(Perl_debug_log, " (0)");
+                Perl_re_printf( aTHX_  " (0)");
              else if (PL_regkind[(U8)op] == BRANCH
                       && PL_regkind[OP(next)] != BRANCH )
-                PerlIO_printf(Perl_debug_log, " (FAIL)");
+                Perl_re_printf( aTHX_  " (FAIL)");
              else
-                PerlIO_printf(Perl_debug_log, " (%"IVdf")", (IV)(next - start));
-            (void)PerlIO_putc(Perl_debug_log, '\n');
+                Perl_re_printf( aTHX_  " (%" IVdf ")", (IV)(next - start));
+            Perl_re_printf( aTHX_ "\n");
          }
  
        after_print:
@@ -20186,12 +20552,12 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
  #endif
             const regnode *nextbranch= NULL;
             I32 word_idx;
-            sv_setpvs(sv, "");
+            SvPVCLEAR(sv);
             for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
                 SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
  
-                PerlIO_printf(Perl_debug_log, "%*s%s ",
-                   (int)(2*(indent+3)), "",
+                Perl_re_indentf( aTHX_  "%s ",
+                    indent+3,
                      elem_ptr
                      ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
                                  SvCUR(*elem_ptr), 60,
@@ -20206,7 +20572,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                  );
                  if (trie->jump) {
                      U16 dist= trie->jump[word_idx+1];
-                   PerlIO_printf(Perl_debug_log, "(%"UVuf")\n",
+                    Perl_re_printf( aTHX_  "(%" UVuf ")\n",
                                 (UV)((dist ? this_trie + dist : next) - start));
                      if (dist) {
                          if (!nextbranch)
@@ -20216,7 +20582,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                      if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                          nextbranch= regnext((regnode *)nextbranch);
                  } else {
-                    PerlIO_printf(Perl_debug_log, "\n");
+                    Perl_re_printf( aTHX_  "\n");
                 }
             }
             if (last && next > last)
@@ -20256,7 +20622,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
      }
      CLEAR_OPTSTART;
  #ifdef DEBUG_DUMPUNTIL
-    PerlIO_printf(Perl_debug_log, "--- %d\n", (int)indent);
+    Perl_re_printf( aTHX_  "--- %d\n", (int)indent);
  #endif
      return node;
  }