Deparse: better handle BEGIN { use_ok() }

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index ac66432..97c5949 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -119,8 +119,7 @@ typedef struct scan_frame {
  
  /* Certain characters are output as a sequence with the first being a
   * backslash. */
-#define isBACKSLASHED_PUNCT(c)                                              \
-                    ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
+#define isBACKSLASHED_PUNCT(c)  strchr("-[]\\^", c)
  
  
  struct RExC_state_t {
@@ -177,15 +176,13 @@ struct RExC_state_t {
      U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
      I32                in_lookbehind;
      I32                contains_locale;
-    I32                contains_i;
      I32                override_recoding;
  #ifdef EBCDIC
      I32                recode_x_to_native;
  #endif
      I32                in_multi_char_class;
-    struct reg_code_block *code_blocks;        /* positions of literal (?{})
+    struct reg_code_blocks *code_blocks;/* positions of literal (?{})
                                             within pattern */
-    int                num_code_blocks;        /* size of code_blocks[] */
      int                code_index;             /* next code_blocks[] slot */
      SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
      scan_frame *frame_head;
@@ -272,8 +269,6 @@ struct RExC_state_t {
                                     (pRExC_state->study_chunk_recursed_bytes)
  #define RExC_in_lookbehind     (pRExC_state->in_lookbehind)
  #define RExC_contains_locale   (pRExC_state->contains_locale)
-#define RExC_contains_i (pRExC_state->contains_i)
-#define RExC_override_recoding (pRExC_state->override_recoding)
  #ifdef EBCDIC
  #   define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
  #endif
@@ -556,7 +551,6 @@ static const scan_data_t zero_scan_data =
  #define OOB_UNICODE            0xDEADBEEF
  
  #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
  
  
  /* length of regex to show in messages that don't mark a position within */
@@ -571,7 +565,7 @@ static const scan_data_t zero_scan_data =
  #define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
  
  #define REPORT_LOCATION " in regex; marked by " MARKER1    \
-                        " in m/%"UTF8f MARKER2 "%"UTF8f"/"
+                        " in m/%" UTF8f MARKER2 "%" UTF8f "/"
  
  /* The code in this file in places uses one level of recursion with parsing
   * rebased to an alternate string constructed by us in memory.  This can take
@@ -659,11 +653,11 @@ static const scan_data_t zero_scan_data =
  } STMT_END
  
  #define        FAIL(msg) _FAIL(                            \
-    Perl_croak(aTHX_ "%s in regex m/%"UTF8f"%s/",          \
+    Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/",        \
             msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
  
  #define        FAIL2(msg,arg) _FAIL(                       \
-    Perl_croak(aTHX_ msg " in regex m/%"UTF8f"%s/",        \
+    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",      \
             arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
  
  /*
@@ -1000,24 +994,25 @@ Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
  
  #define DEBUG_STUDYDATA(str,data,depth)                              \
  DEBUG_OPTIMISE_MORE_r(if(data){                                      \
-    Perl_re_indentf( aTHX_  "" str "Pos:%"IVdf"/%"IVdf                           \
-        " Flags: 0x%"UVXf,                                           \
+    Perl_re_indentf( aTHX_  "" str "Pos:%" IVdf "/%" IVdf            \
+        " Flags: 0x%" UVXf,                                          \
          depth,                                                       \
          (IV)((data)->pos_min),                                       \
          (IV)((data)->pos_delta),                                     \
          (UV)((data)->flags)                                          \
      );                                                               \
      DEBUG_SHOW_STUDY_FLAGS((data)->flags," [ ","]");                 \
-    Perl_re_printf( aTHX_                                                        \
-        " Whilem_c: %"IVdf" Lcp: %"IVdf" %s",                        \
+    Perl_re_printf( aTHX_                                            \
+        " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",                    \
          (IV)((data)->whilem_c),                                      \
          (IV)((data)->last_closep ? *((data)->last_closep) : -1),     \
          is_inf ? "INF " : ""                                         \
      );                                                               \
      if ((data)->last_found)                                          \
-        Perl_re_printf( aTHX_                                                    \
-            "Last:'%s' %"IVdf":%"IVdf"/%"IVdf" %sFixed:'%s' @ %"IVdf \
-            " %sFloat: '%s' @ %"IVdf"/%"IVdf"",                      \
+        Perl_re_printf( aTHX_                                        \
+            "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf                   \
+            " %sFixed:'%s' @ %" IVdf                                 \
+            " %sFloat: '%s' @ %" IVdf "/%" IVdf,                     \
              SvPVX_const((data)->last_found),                         \
              (IV)((data)->last_end),                                  \
              (IV)((data)->last_start_min),                            \
@@ -2012,7 +2007,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
      for( state = 1 ; state < trie->statecount ; state++ ) {
         const U32 base = trie->states[ state ].trans.base;
  
-        Perl_re_indentf( aTHX_  "#%4"UVXf"|", depth+1, (UV)state);
+        Perl_re_indentf( aTHX_  "#%4" UVXf "|", depth+1, (UV)state);
  
          if ( trie->states[ state ].wordnum ) {
              Perl_re_printf( aTHX_  " W%4X", trie->states[ state ].wordnum );
@@ -2020,7 +2015,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
              Perl_re_printf( aTHX_  "%6s", "" );
          }
  
-        Perl_re_printf( aTHX_  " @%4"UVXf" ", (UV)base );
+        Perl_re_printf( aTHX_  " @%4" UVXf " ", (UV)base );
  
          if ( base ) {
              U32 ofs = 0;
@@ -2031,7 +2026,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
                                                                      != state))
                      ofs++;
  
-            Perl_re_printf( aTHX_  "+%2"UVXf"[ ", (UV)ofs);
+            Perl_re_printf( aTHX_  "+%2" UVXf "[ ", (UV)ofs);
  
              for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
                  if ( ( base + ofs >= trie->uniquecharcount )
@@ -2040,7 +2035,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
                          && trie->trans[ base + ofs
                                      - trie->uniquecharcount ].check == state )
                  {
-                   Perl_re_printf( aTHX_  "%*"UVXf, colwidth,
+                   Perl_re_printf( aTHX_  "%*" UVXf, colwidth,
                      (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next
                     );
                  } else {
@@ -2089,7 +2084,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
      for( state=1 ; state < next_alloc ; state ++ ) {
          U16 charid;
  
-        Perl_re_indentf( aTHX_  " %4"UVXf" :",
+        Perl_re_indentf( aTHX_  " %4" UVXf " :",
              depth+1, (UV)state  );
          if ( ! trie->states[ state ].wordnum ) {
              Perl_re_printf( aTHX_  "%5s| ","");
@@ -2102,7 +2097,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
             SV ** const tmp = av_fetch( revcharmap,
                                          TRIE_LIST_ITEM(state,charid).forid, 0);
             if ( tmp ) {
-                Perl_re_printf( aTHX_  "%*s:%3X=%4"UVXf" | ",
+                Perl_re_printf( aTHX_  "%*s:%3X=%4" UVXf " | ",
                      colwidth,
                      pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp),
                                colwidth,
@@ -2173,22 +2168,22 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
  
      for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
  
-        Perl_re_indentf( aTHX_  "%4"UVXf" : ",
+        Perl_re_indentf( aTHX_  "%4" UVXf " : ",
              depth+1,
              (UV)TRIE_NODENUM( state ) );
  
          for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
              UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
              if (v)
-                Perl_re_printf( aTHX_  "%*"UVXf, colwidth, v );
+                Perl_re_printf( aTHX_  "%*" UVXf, colwidth, v );
              else
                  Perl_re_printf( aTHX_  "%*s", colwidth, "." );
          }
          if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
-            Perl_re_printf( aTHX_  " (%4"UVXf")\n",
+            Perl_re_printf( aTHX_  " (%4" UVXf ")\n",
                                              (UV)trie->trans[ state ].check );
          } else {
-            Perl_re_printf( aTHX_  " (%4"UVXf") W%4X\n",
+            Perl_re_printf( aTHX_  " (%4" UVXf ") W%4X\n",
                                              (UV)trie->trans[ state ].check,
              trie->states[ TRIE_NODENUM( state ) ].wordnum );
          }
@@ -2359,8 +2354,9 @@ is the recommended Unicode-aware way of saying
  
  #define TRIE_LIST_PUSH(state,fid,ns) STMT_START {               \
      if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) {    \
-       U32 ging = TRIE_LIST_LEN( state ) *= 2;                 \
+       U32 ging = TRIE_LIST_LEN( state ) * 2;                  \
         Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
+        TRIE_LIST_LEN( state ) = ging;                          \
      }                                                           \
      TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid;     \
      TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns;   \
@@ -2702,7 +2698,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
  
                  if ( !svpp )
-                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
+                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%" UVXf, uvc );
  
                  if ( !SvTRUE( *svpp ) ) {
                      sv_setiv( *svpp, ++trie->uniquecharcount );
@@ -2846,7 +2842,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                          }
                          state = newstate;
                      } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                     }
                 }
             }
@@ -3043,7 +3039,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                          }
                          state = trie->trans[ state + charid ].next;
                      } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                      }
                      /* charid is now 0 if we dont know the char read, or
                       * nonzero if we do */
@@ -3176,7 +3172,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
             PerlMemShared_realloc( trie->states, laststate
                                    * sizeof(reg_trie_state) );
          DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
+            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n",
                  depth+1,
                  (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount
                         + 1 ),
@@ -3188,7 +3184,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          } /* end table compress */
      }
      DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Statecount:%"UVxf" Lasttrans:%"UVxf"\n",
+            Perl_re_indentf( aTHX_  "Statecount:%" UVxf " Lasttrans:%" UVxf "\n",
                  depth+1,
                  (UV)trie->statecount,
                  (UV)trie->lasttrans)
@@ -3239,7 +3235,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
              });
          }
          DEBUG_OPTIMISE_r(
-            Perl_re_indentf( aTHX_  "MJD offset:%"UVuf" MJD length:%"UVuf"\n",
+            Perl_re_indentf( aTHX_  "MJD offset:%" UVuf " MJD length:%" UVuf "\n",
                  depth+1,
                  (UV)mjd_offset, (UV)mjd_nodelen)
          );
@@ -3286,7 +3282,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                                  /* clear the bitmap */
                                  Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
                                  DEBUG_OPTIMISE_r(
-                                    Perl_re_indentf( aTHX_  "New Start State=%"UVuf" Class: [",
+                                    Perl_re_indentf( aTHX_  "New Start State=%" UVuf " Class: [",
                                          depth+1,
                                          (UV)state));
                                  if (first_ofs >= 0) {
@@ -3315,7 +3311,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                      char *ch = SvPV( *tmp, len );
                      DEBUG_OPTIMISE_r({
                          SV *sv=sv_newmortal();
-                        Perl_re_indentf( aTHX_  "Prefix State: %"UVuf" Ofs:%"UVuf" Char='%s'\n",
+                        Perl_re_indentf( aTHX_  "Prefix State: %" UVuf " Ofs:%" UVuf " Char='%s'\n",
                              depth+1,
                              (UV)state, (UV)first_ofs,
                              pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
@@ -3608,11 +3604,11 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
       */
      fail[ 0 ] = fail[ 1 ] = 0;
      DEBUG_TRIE_COMPILE_r({
-        Perl_re_indentf( aTHX_  "Stclass Failtable (%"UVuf" states): 0",
+        Perl_re_indentf( aTHX_  "Stclass Failtable (%" UVuf " states): 0",
                        depth, (UV)numstates
          );
          for( q_read=1; q_read<numstates; q_read++ ) {
-            Perl_re_printf( aTHX_  ", %"UVuf, (UV)fail[q_read]);
+            Perl_re_printf( aTHX_  ", %" UVuf, (UV)fail[q_read]);
          }
          Perl_re_printf( aTHX_  "\n");
      });
@@ -3910,7 +3906,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                      }
                      else {
                          STRLEN len;
-                        _to_utf8_fold_flags(s, d, &len, FOLD_FLAGS_FULL);
+                        _toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
                          d += len;
                      }
                      s += s_len;
@@ -4451,7 +4447,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                          DEBUG_TRIE_COMPILE_r({
                              regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
-                            Perl_re_indentf( aTHX_  "%s %"UVuf":%s\n",
+                            Perl_re_indentf( aTHX_  "%s %" UVuf ":%s\n",
                                depth+1,
                                "Looking for TRIE'able sequences. Tail node is ",
                                (UV)(tail - RExC_emit_start),
@@ -5088,7 +5084,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     SAVEFREESV(RExC_rx_sv);
                     Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
                         "Quantifier unexpected on zero-length expression "
-                       "in regex m/%"UTF8f"/",
+                       "in regex m/%" UTF8f "/",
                          UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
                                   RExC_precomp));
                     (void)ReREFCNT_inc(RExC_rx_sv);
@@ -5215,15 +5211,21 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                             However, this time it's not a subexpression
                             we care about, but the expression itself. */
                          && (maxcount == REG_INFTY)
-                        && data && ++data->whilem_c < 16) {
+                        && data) {
                     /* This stays as CURLYX, we can put the count/of pair. */
                     /* Find WHILEM (as in regexec.c) */
                     regnode *nxt = oscan + NEXT_OFF(oscan);
  
                     if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
                         nxt += ARG(nxt);
-                   PREVOPER(nxt)->flags = (U8)(data->whilem_c
-                       | (RExC_whilem_seen << 4)); /* On WHILEM */
+                    nxt = PREVOPER(nxt);
+                    if (nxt->flags & 0xf) {
+                        /* we've already set whilem count on this node */
+                    } else if (++data->whilem_c < 16) {
+                        assert(data->whilem_c <= RExC_whilem_seen);
+                        nxt->flags = (U8)(data->whilem_c
+                            | (RExC_whilem_seen << 4)); /* On WHILEM */
+                    }
                 }
                 if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
                     pars++;
@@ -5284,13 +5286,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* It is counted once already... */
                     data->pos_min += minnext * (mincount - counted);
  #if 0
-Perl_re_printf( aTHX_  "counted=%"UVuf" deltanext=%"UVuf
-                              " SSize_t_MAX=%"UVuf" minnext=%"UVuf
-                              " maxcount=%"UVuf" mincount=%"UVuf"\n",
+Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
+                              " SSize_t_MAX=%" UVuf " minnext=%" UVuf
+                              " maxcount=%" UVuf " mincount=%" UVuf "\n",
      (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
      (UV)mincount);
  if (deltanext != SSize_t_MAX)
-Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
+Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
      (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
            - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
  #endif
@@ -5595,7 +5597,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                         FAIL("Variable length lookbehind not implemented");
                      }
                      else if (minnext > (I32)U8_MAX) {
-                       FAIL2("Lookbehind longer than %"UVuf" not implemented",
+                       FAIL2("Lookbehind longer than %" UVuf " not implemented",
                                (UV)U8_MAX);
                      }
                      scan->flags = (U8)minnext;
@@ -5684,7 +5686,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                         FAIL("Variable length lookbehind not implemented");
                      }
                      else if (*minnextp > (I32)U8_MAX) {
-                       FAIL2("Lookbehind longer than %"UVuf" not implemented",
+                       FAIL2("Lookbehind longer than %" UVuf " not implemented",
                                (UV)U8_MAX);
                      }
                      scan->flags = (U8)*minnextp;
@@ -6098,7 +6100,7 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  
      /* Dispatch a request to compile a regexp to correct regexp engine. */
      DEBUG_COMPILE_r({
-        Perl_re_printf( aTHX_  "Using engine %"UVxf"\n",
+        Perl_re_printf( aTHX_  "Using engine %" UVxf "\n",
                         PTR2UV(eng));
      });
      return CALLREGCOMP_ENG(eng, pattern, flags);
@@ -6125,6 +6127,39 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
  }
  
  
+static void
+S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs)
+{
+    int n;
+
+    if (--cbs->refcnt > 0)
+        return;
+    for (n = 0; n < cbs->count; n++) {
+        REGEXP *rx = cbs->cb[n].src_regex;
+        cbs->cb[n].src_regex = NULL;
+        SvREFCNT_dec(rx);
+    }
+    Safefree(cbs->cb);
+    Safefree(cbs);
+}
+
+
+static struct reg_code_blocks *
+S_alloc_code_blocks(pTHX_  int ncode)
+{
+     struct reg_code_blocks *cbs;
+    Newx(cbs, 1, struct reg_code_blocks);
+    cbs->count = ncode;
+    cbs->refcnt = 1;
+    SAVEDESTRUCTOR_X(S_free_codeblocks, cbs);
+    if (ncode)
+        Newx(cbs->cb, ncode, struct reg_code_block);
+    else
+        cbs->cb = NULL;
+    return cbs;
+}
+
+
  /* upgrade pattern pat_p of length plen_p to UTF8, and if there are code
   * blocks, recalculate the indices. Update pat_p and plen_p in-place to
   * point to the realloced string and length.
@@ -6151,14 +6186,16 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
  
      while (s < *plen_p) {
          append_utf8_from_native_byte(src[s], &d);
+
          if (n < num_code_blocks) {
-            if (!do_end && pRExC_state->code_blocks[n].start == s) {
-                pRExC_state->code_blocks[n].start = d - dst - 1;
+            assert(pRExC_state->code_blocks);
+            if (!do_end && pRExC_state->code_blocks->cb[n].start == s) {
+                pRExC_state->code_blocks->cb[n].start = d - dst - 1;
                  assert(*(d - 1) == '(');
                  do_end = 1;
              }
-            else if (do_end && pRExC_state->code_blocks[n].end == s) {
-                pRExC_state->code_blocks[n].end = d - dst - 1;
+            else if (do_end && pRExC_state->code_blocks->cb[n].end == s) {
+                pRExC_state->code_blocks->cb[n].end = d - dst - 1;
                  assert(*(d - 1) == ')');
                  do_end = 0;
                  n++;
@@ -6278,10 +6315,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
              if (oplist->op_type == OP_NULL
                  && (oplist->op_flags & OPf_SPECIAL))
              {
-                assert(n < pRExC_state->num_code_blocks);
-                pRExC_state->code_blocks[n].start = pat ? SvCUR(pat) : 0;
-                pRExC_state->code_blocks[n].block = oplist;
-                pRExC_state->code_blocks[n].src_regex = NULL;
+                assert(n < pRExC_state->code_blocks->count);
+                pRExC_state->code_blocks->cb[n].start = pat ? SvCUR(pat) : 0;
+                pRExC_state->code_blocks->cb[n].block = oplist;
+                pRExC_state->code_blocks->cb[n].src_regex = NULL;
                  n++;
                  code = 1;
                  oplist = OpSIBLING(oplist); /* skip CONST */
@@ -6311,7 +6348,8 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
              sv_setsv(pat, sv);
              /* overloading involved: all bets are off over literal
               * code. Pretend we haven't seen it */
-            pRExC_state->num_code_blocks -= n;
+            if (n)
+                pRExC_state->code_blocks->count -= n;
              n = 0;
          }
          else  {
@@ -6361,7 +6399,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
              }
  
              if (code)
-                pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+                pRExC_state->code_blocks->cb[n-1].end = SvCUR(pat)-1;
          }
  
          /* extract any code blocks within any embedded qr//'s */
@@ -6370,25 +6408,31 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
          {
  
              RXi_GET_DECL(ReANY((REGEXP *)rx), ri);
-            if (ri->num_code_blocks) {
+            if (ri->code_blocks && ri->code_blocks->count) {
                  int i;
                  /* the presence of an embedded qr// with code means
                   * we should always recompile: the text of the
                   * qr// may not have changed, but it may be a
                   * different closure than last time */
                  *recompile_p = 1;
-                Renew(pRExC_state->code_blocks,
-                    pRExC_state->num_code_blocks + ri->num_code_blocks,
-                    struct reg_code_block);
-                pRExC_state->num_code_blocks += ri->num_code_blocks;
+                if (pRExC_state->code_blocks) {
+                    int new_count = pRExC_state->code_blocks->count
+                            + ri->code_blocks->count;
+                    Renew(pRExC_state->code_blocks->cb,
+                            new_count, struct reg_code_block);
+                    pRExC_state->code_blocks->count = new_count;
+                }
+                else
+                    pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_
+                                                    ri->code_blocks->count);
  
-                for (i=0; i < ri->num_code_blocks; i++) {
+                for (i=0; i < ri->code_blocks->count; i++) {
                      struct reg_code_block *src, *dst;
                      STRLEN offset =  orig_patlen
                          + ReANY((REGEXP *)rx)->pre_prefix;
-                    assert(n < pRExC_state->num_code_blocks);
-                    src = &ri->code_blocks[i];
-                    dst = &pRExC_state->code_blocks[n];
+                    assert(n < pRExC_state->code_blocks->count);
+                    src = &ri->code_blocks->cb[i];
+                    dst = &pRExC_state->code_blocks->cb[n];
                      dst->start     = src->start + offset;
                      dst->end       = src->end   + offset;
                      dst->block     = src->block;
@@ -6423,10 +6467,11 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
      PERL_UNUSED_CONTEXT;
  
      for (s = 0; s < plen; s++) {
-       if (n < pRExC_state->num_code_blocks
-           && s == pRExC_state->code_blocks[n].start)
+       if (   pRExC_state->code_blocks
+            && n < pRExC_state->code_blocks->count
+           && s == pRExC_state->code_blocks->cb[n].start)
         {
-           s = pRExC_state->code_blocks[n].end;
+           s = pRExC_state->code_blocks->cb[n].end;
             n++;
             continue;
         }
@@ -6486,7 +6531,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
         int n = 0;
         STRLEN s;
         char *p, *newpat;
-       int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
+       int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */
         SV *sv, *qr_ref;
         dSP;
  
@@ -6501,12 +6546,13 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
         *p++ = 'q'; *p++ = 'r'; *p++ = '\'';
  
         for (s = 0; s < plen; s++) {
-           if (n < pRExC_state->num_code_blocks
-               && s == pRExC_state->code_blocks[n].start)
+           if (   pRExC_state->code_blocks
+               && n < pRExC_state->code_blocks->count
+               && s == pRExC_state->code_blocks->cb[n].start)
             {
                 /* blank out literal code block */
                 assert(pat[s] == '(');
-               while (s <= pRExC_state->code_blocks[n].end) {
+               while (s <= pRExC_state->code_blocks->cb[n].end) {
                     *p++ = '_';
                     s++;
                 }
@@ -6519,8 +6565,12 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
             *p++ = pat[s];
         }
         *p++ = '\'';
-       if (pRExC_state->pm_flags & RXf_PMf_EXTENDED)
+       if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) {
             *p++ = 'x';
+            if (pRExC_state->pm_flags & RXf_PMf_EXTENDED_MORE) {
+                *p++ = 'x';
+            }
+        }
         *p++ = '\0';
         DEBUG_COMPILE_r({
              Perl_re_printf( aTHX_
@@ -6546,11 +6596,8 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
         {
             SV * const errsv = ERRSV;
             if (SvTRUE_NN(errsv))
-           {
-               Safefree(pRExC_state->code_blocks);
                  /* use croak_sv ? */
-               Perl_croak_nocontext("%"SVf, SVfARG(errsv));
-           }
+               Perl_croak_nocontext("%" SVf, SVfARG(errsv));
         }
         assert(SvROK(qr_ref));
         qr = SvRV(qr_ref);
@@ -6582,42 +6629,46 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
         struct reg_code_block *new_block, *dst;
         RExC_state_t * const r1 = pRExC_state; /* convenient alias */
         int i1 = 0, i2 = 0;
+        int r1c, r2c;
  
-       if (!r2->num_code_blocks) /* we guessed wrong */
+       if (!r2->code_blocks || !r2->code_blocks->count) /* we guessed wrong */
         {
             SvREFCNT_dec_NN(qr);
             return 1;
         }
  
-       Newx(new_block,
-           r1->num_code_blocks + r2->num_code_blocks,
-           struct reg_code_block);
+        if (!r1->code_blocks)
+            r1->code_blocks = S_alloc_code_blocks(aTHX_ 0);
+
+        r1c = r1->code_blocks->count;
+        r2c = r2->code_blocks->count;
+
+       Newx(new_block, r1c + r2c, struct reg_code_block);
+
         dst = new_block;
  
-       while (    i1 < r1->num_code_blocks
-               || i2 < r2->num_code_blocks)
-       {
+       while (i1 < r1c || i2 < r2c) {
             struct reg_code_block *src;
             bool is_qr = 0;
  
-           if (i1 == r1->num_code_blocks) {
-               src = &r2->code_blocks[i2++];
+           if (i1 == r1c) {
+               src = &r2->code_blocks->cb[i2++];
                 is_qr = 1;
             }
-           else if (i2 == r2->num_code_blocks)
-               src = &r1->code_blocks[i1++];
-           else if (  r1->code_blocks[i1].start
-                    < r2->code_blocks[i2].start)
+           else if (i2 == r2c)
+               src = &r1->code_blocks->cb[i1++];
+           else if (  r1->code_blocks->cb[i1].start
+                    < r2->code_blocks->cb[i2].start)
             {
-               src = &r1->code_blocks[i1++];
-               assert(src->end < r2->code_blocks[i2].start);
+               src = &r1->code_blocks->cb[i1++];
+               assert(src->end < r2->code_blocks->cb[i2].start);
             }
             else {
-               assert(  r1->code_blocks[i1].start
-                      > r2->code_blocks[i2].start);
-               src = &r2->code_blocks[i2++];
+               assert(  r1->code_blocks->cb[i1].start
+                      > r2->code_blocks->cb[i2].start);
+               src = &r2->code_blocks->cb[i2++];
                 is_qr = 1;
-               assert(src->end < r1->code_blocks[i1].start);
+               assert(src->end < r1->code_blocks->cb[i1].start);
             }
  
             assert(pat[src->start] == '(');
@@ -6629,9 +6680,9 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
                                     : src->src_regex;
             dst++;
         }
-       r1->num_code_blocks += r2->num_code_blocks;
-       Safefree(r1->code_blocks);
-       r1->code_blocks = new_block;
+       r1->code_blocks->count += r2c;
+       Safefree(r1->code_blocks->cb);
+       r1->code_blocks->cb = new_block;
      }
  
      SvREFCNT_dec_NN(qr);
@@ -6750,7 +6801,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      SSize_t minlen = 0;
      U32 rx_flags;
      SV *pat;
-    SV *code_blocksv = NULL;
      SV** new_patternp = patternp;
  
      /* these are all flags - maybe they should be turned
@@ -6808,7 +6858,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      pRExC_state->warn_text = NULL;
      pRExC_state->code_blocks = NULL;
-    pRExC_state->num_code_blocks = 0;
  
      if (is_bare_re)
         *is_bare_re = FALSE;
@@ -6822,10 +6871,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o))
             if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
                 ncode++; /* count of DO blocks */
-       if (ncode) {
-           pRExC_state->num_code_blocks = ncode;
-           Newx(pRExC_state->code_blocks, ncode, struct reg_code_block);
-       }
+
+       if (ncode)
+            pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_ ncode);
      }
  
      if (!pat_count) {
@@ -6869,7 +6917,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      /* set expr to the first arg op */
  
-    if (pRExC_state->num_code_blocks
+    if (pRExC_state->code_blocks && pRExC_state->code_blocks->count
           && expr->op_type != OP_CONST)
      {
              expr = cLISTOPx(expr)->op_first;
@@ -6891,7 +6939,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              if (is_bare_re)
                  *is_bare_re = TRUE;
              SvREFCNT_inc(re);
-            Safefree(pRExC_state->code_blocks);
              DEBUG_PARSE_r(Perl_re_printf( aTHX_
                  "Precompiled pattern%s\n",
                      orig_rx_flags & RXf_SPLIT ? " for split" : ""));
@@ -6911,7 +6958,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             pat = newSVpvn_flags(exp, plen, SVs_TEMP |
                                         (IN_BYTES ? 0 : SvUTF8(pat)));
         }
-       Safefree(pRExC_state->code_blocks);
         return CALLREGCOMP_ENG(eng, pat, orig_rx_flags);
      }
  
@@ -6921,7 +6967,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_uni_semantics = 0;
      RExC_seen_unfolded_sharp_s = 0;
      RExC_contains_locale = 0;
-    RExC_contains_i = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
      RExC_study_started = 0;
      pRExC_state->runtime_code_qr = NULL;
@@ -6967,15 +7012,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          && memEQ(RX_PRECOMP(old_re), exp, plen)
         && !runtime_code /* with runtime code, always recompile */ )
      {
-        Safefree(pRExC_state->code_blocks);
          return old_re;
      }
  
      rx_flags = orig_rx_flags;
  
-    if (rx_flags & PMf_FOLD) {
-        RExC_contains_i = 1;
-    }
      if (   initial_charset == REGEX_DEPENDS_CHARSET
          && (RExC_utf8 ||RExC_uni_semantics))
      {
@@ -6999,7 +7040,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             /* whoops, we have a non-utf8 pattern, whilst run-time code
              * got compiled as utf8. Try again with a utf8 pattern */
              S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
-                                    pRExC_state->num_code_blocks);
+                pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
              goto redo_first_pass;
         }
      }
@@ -7012,7 +7053,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_in_lookbehind = 0;
      RExC_seen_zerolen = *exp == '^' ? -1 : 0;
      RExC_extralen = 0;
-    RExC_override_recoding = 0;
  #ifdef EBCDIC
      RExC_recode_x_to_native = 0;
  #endif
@@ -7056,17 +7096,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          RExC_lastnum=0;
          RExC_lastparse=NULL;
      );
-    /* reg may croak on us, not giving us a chance to free
-       pRExC_state->code_blocks.  We cannot SAVEFREEPV it now, as we may
-       need it to survive as long as the regexp (qr/(?{})/).
-       We must check that code_blocksv is not already set, because we may
-       have jumped back to restart the sizing pass. */
-    if (pRExC_state->code_blocks && !code_blocksv) {
-       code_blocksv = newSV_type(SVt_PV);
-       SAVEFREESV(code_blocksv);
-       SvPV_set(code_blocksv, (char *)pRExC_state->code_blocks);
-       SvLEN_set(code_blocksv, 1); /*sufficient to make sv_clear free it*/
-    }
+
      if (reg(pRExC_state, 0, &flags,1) == NULL) {
          /* It's possible to write a regexp in ascii that represents Unicode
          codepoints outside of the byte range, such as via \x{100}. If we
@@ -7079,7 +7109,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (flags & RESTART_PASS1) {
              if (flags & NEED_UTF8) {
                  S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
-                                    pRExC_state->num_code_blocks);
+                pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
              }
              else {
                  DEBUG_PARSE_r(Perl_re_printf( aTHX_
@@ -7088,14 +7118,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
              goto redo_first_pass;
          }
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags);
      }
-    if (code_blocksv)
-       SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */
  
      DEBUG_PARSE_r({
          Perl_re_printf( aTHX_
-            "Required size %"IVdf" nodes\n"
+            "Required size %" IVdf " nodes\n"
              "Starting second pass (creation)\n",
              (IV)RExC_size);
          RExC_lastnum=0;
@@ -7144,16 +7172,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      if (pm_flags & PMf_IS_QR) {
         ri->code_blocks = pRExC_state->code_blocks;
-       ri->num_code_blocks = pRExC_state->num_code_blocks;
-    }
-    else
-    {
-       int n;
-       for (n = 0; n < pRExC_state->num_code_blocks; n++)
-           if (pRExC_state->code_blocks[n].src_regex)
-               SAVEFREESV(pRExC_state->code_blocks[n].src_regex);
-       if(pRExC_state->code_blocks)
-           SAVEFREEPV(pRExC_state->code_blocks); /* often null */
+       if (ri->code_blocks)
+            ri->code_blocks->refcnt++;
      }
  
      {
@@ -7170,7 +7190,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                                     == REG_RUN_ON_COMMENT_SEEN);
         U8 reganch = (U8)((r->extflags & RXf_PMf_STD_PMMOD)
                             >> RXf_PMf_STD_PMMOD_SHIFT);
-       const char *fptr = STD_PAT_MODS;        /*"msixn"*/
+       const char *fptr = STD_PAT_MODS;        /*"msixxn"*/
         char *p;
  
          /* We output all the necessary flags; we never output a minus, as all
@@ -7233,7 +7253,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  #ifdef RE_TRACK_PATTERN_OFFSETS
      Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
      DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
-                          "%s %"UVuf" bytes for offset annotations.\n",
+                          "%s %" UVuf " bytes for offset annotations.\n",
                            ri->u.offsets ? "Got" : "Couldn't get",
                            (UV)((2*RExC_size+1) * sizeof(U32))));
  #endif
@@ -7287,7 +7307,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_npar = 1;
      if (reg(pRExC_state, 0, &flags,1) == NULL) {
         ReREFCNT_dec(rx);
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags);
      }
      DEBUG_OPTIMISE_r(
          Perl_re_printf( aTHX_  "Starting post parse optimization\n");
@@ -7432,7 +7452,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              !sawlookahead &&
             (OP(first) == STAR &&
             PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
-            !(r->intflags & PREGf_ANCH) && !pRExC_state->num_code_blocks)
+            !(r->intflags & PREGf_ANCH) && !pRExC_state->code_blocks)
         {
             /* turn .* into ^.* with an implied $*=1 */
             const int type =
@@ -7445,7 +7465,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         }
          if (sawplus && !sawminmod && !sawlookahead
              && (!sawopen || !RExC_sawback)
-           && !pRExC_state->num_code_blocks) /* May examine pos and $& */
+           && !pRExC_state->code_blocks) /* May examine pos and $& */
             /* x+ must match at the 1st pos of run of x's */
             r->intflags |= PREGf_SKIP;
  
@@ -7453,12 +7473,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  #ifdef TRIE_STUDY_OPT
         DEBUG_PARSE_r(
             if (!restudied)
-                Perl_re_printf( aTHX_  "first at %"IVdf"\n",
+                Perl_re_printf( aTHX_  "first at %" IVdf "\n",
                               (IV)(first - scan + 1))
          );
  #else
         DEBUG_PARSE_r(
-            Perl_re_printf( aTHX_  "first at %"IVdf"\n",
+            Perl_re_printf( aTHX_  "first at %" IVdf "\n",
                 (IV)(first - scan + 1))
          );
  #endif
@@ -7685,7 +7705,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      /* Guard against an embedded (?=) or (?<=) with a longer minlen than
         the "real" pattern. */
      DEBUG_OPTIMISE_r({
-        Perl_re_printf( aTHX_ "minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%"IVdf"\n",
+        Perl_re_printf( aTHX_ "minlen: %" IVdf " r->minlen:%" IVdf " maxlen:%" IVdf "\n",
                        (IV)minlen, (IV)r->minlen, (IV)RExC_maxlen);
      });
      r->minlenret = minlen;
@@ -7701,7 +7721,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      if (RExC_seen & REG_LOOKBEHIND_SEEN)
          r->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the
                                                  lookbehind */
-    if (pRExC_state->num_code_blocks)
+    if (pRExC_state->code_blocks)
         r->extflags |= RXf_EVAL_SEEN;
      if (RExC_seen & REG_VERBARG_SEEN)
      {
@@ -7773,6 +7793,18 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      while ( RExC_recurse_count > 0 ) {
          const regnode *scan = RExC_recurse[ --RExC_recurse_count ];
+        /*
+         * This data structure is set up in study_chunk() and is used
+         * to calculate the distance between a GOSUB regopcode and
+         * the OPEN/CURLYM (CURLYM's are special and can act like OPEN's)
+         * it refers to.
+         *
+         * If for some reason someone writes code that optimises
+         * away a GOSUB opcode then the assert should be changed to
+         * an if(scan) to guard the ARG2L_SET() - Yves
+         *
+         */
+        assert(scan && OP(scan) == GOSUB);
          ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan );
      }
  
@@ -7793,10 +7825,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          STRLEN i;
          GET_RE_DEBUG_FLAGS_DECL;
          Perl_re_printf( aTHX_
-                      "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]);
+                      "Offsets: [%" UVuf "]\n\t", (UV)ri->u.offsets[0]);
          for (i = 1; i <= len; i++) {
              if (ri->u.offsets[i*2-1] || ri->u.offsets[i*2])
-                Perl_re_printf( aTHX_  "%"UVuf":%"UVuf"[%"UVuf"] ",
+                Perl_re_printf( aTHX_  "%" UVuf ":%" UVuf "[%" UVuf "] ",
                  (UV)i, (UV)ri->u.offsets[i*2-1], (UV)ri->u.offsets[i*2]);
              }
          Perl_re_printf( aTHX_  "\n");
@@ -7863,21 +7895,18 @@ SV*
  Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
                           const U32 flags)
  {
-    AV *retarray = NULL;
      SV *ret;
      struct regexp *const rx = ReANY(r);
  
      PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
  
-    if (flags & RXapif_ALL)
-        retarray=newAV();
-
      if (rx && RXp_PAREN_NAMES(rx)) {
          HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
          if (he_str) {
              IV i;
              SV* sv_dat=HeVAL(he_str);
              I32 *nums=(I32*)SvPVX(sv_dat);
+            AV * const retarray = (flags & RXapif_ALL) ? newAV() : NULL;
              for ( i=0; i<SvIVX(sv_dat); i++ ) {
                  if ((I32)(rx->nparens) >= nums[i]
                      && rx->offs[nums[i]].start != -1
@@ -8138,7 +8167,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
          }
      } else {
        ret_undef:
-        sv_setsv(sv,&PL_sv_undef);
+        sv_set_undef(sv);
          return;
      }
  }
@@ -8270,17 +8299,18 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  
      assert (RExC_parse <= RExC_end);
      if (RExC_parse == RExC_end) NOOP;
-    else if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
+    else if (isIDFIRST_lazy_if_safe(RExC_parse, RExC_end, UTF)) {
           /* Note that the code here assumes well-formed UTF-8.  Skip IDFIRST by
            * using do...while */
         if (UTF)
             do {
                 RExC_parse += UTF8SKIP(RExC_parse);
-           } while (isWORDCHAR_utf8((U8*)RExC_parse));
+           } while (   RExC_parse < RExC_end
+                     && isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end));
         else
             do {
                 RExC_parse++;
-           } while (isWORDCHAR(*RExC_parse));
+           } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse));
      } else {
          RExC_parse++; /* so the <- from the vFAIL is after the offending
                           character */
@@ -8731,7 +8761,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
         if (   array[final_element] > start
             || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
         {
-           Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%"UVuf", start=%"UVuf", match=%c",
+           Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
                      array[final_element], start,
                      ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
         }
@@ -9960,18 +9990,18 @@ S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%cINFINITY%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFINITY%c",
                                            start, intra_range_delimiter,
                                                   inter_range_delimiter);
         }
         else if (end != start) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c%04"UVXf"%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
                                           start,
                                                     intra_range_delimiter,
                                                    end, inter_range_delimiter);
         }
         else {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
                                            start, inter_range_delimiter);
         }
      }
@@ -10018,16 +10048,16 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
             Perl_dump_indent(aTHX_ level, file,
-                                       "%s[%"UVuf"] 0x%04"UVXf" .. INFINITY\n",
+                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFINITY\n",
                                     indent, (UV)count, start);
         }
         else if (end != start) {
             Perl_dump_indent(aTHX_ level, file,
-                                    "%s[%"UVuf"] 0x%04"UVXf" .. 0x%04"UVXf"\n",
+                                    "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n",
                                 indent, (UV)count, start,         end);
         }
         else {
-           Perl_dump_indent(aTHX_ level, file, "%s[%"UVuf"] 0x%04"UVXf"\n",
+           Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
                                              indent, (UV)count, start);
         }
          count += 2;
@@ -10043,9 +10073,10 @@ Perl__load_PL_utf8_foldclosures (pTHX)
       * to force that */
      if (! PL_utf8_tofold) {
          U8 dummy[UTF8_MAXBYTES_CASE+1];
+        const U8 hyphen[] = HYPHEN_UTF8;
  
          /* This string is just a short named one above \xff */
-        to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
+        toFOLD_utf8_safe(hyphen, hyphen + sizeof(hyphen) - 1, dummy, NULL);
          assert(PL_utf8_tofold); /* Verify that worked */
      }
      PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
@@ -10065,9 +10096,6 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
      UV len_a = _invlist_len(a);
      UV len_b = _invlist_len(b);
  
-    UV i = 0;              /* current index into the arrays */
-    bool retval = TRUE;     /* Assume are identical until proven otherwise */
-
      PERL_ARGS_ASSERT__INVLISTEQ;
  
      /* If are to compare 'a' with the complement of b, set it
@@ -10097,20 +10125,9 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
          }
      }
  
-    /* Make sure that the lengths are the same, as well as the final element
-     * before looping through the remainder.  (Thus we test the length, final,
-     * and first elements right off the bat) */
-    if (len_a != len_b || array_a[len_a-1] != array_b[len_a-1]) {
-        retval = FALSE;
-    }
-    else for (i = 0; i < len_a - 1; i++) {
-        if (array_a[i] != array_b[i]) {
-            retval = FALSE;
-            break;
-        }
-    }
+    return    len_a == len_b
+           && memEQ(array_a, array_b, len_a * sizeof(array_a[0]));
  
-    return retval;
  }
  #endif
  
@@ -10196,7 +10213,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
                  }
                  else {
                      STRLEN len;
-                    to_utf8_fold(s, d, &len);
+                    toFOLD_utf8_safe(s, e, d, &len);
                      d += len;
                      s += UTF8SKIP(s);
                  }
@@ -10237,7 +10254,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
              {
                  AV* list = (AV*) *listp;
                  IV k;
-                for (k = 0; k <= av_tindex_nomg(list); k++) {
+                for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
                      SV** c_p = av_fetch(list, k, FALSE);
                      UV c;
                      assert(c_p);
@@ -10446,26 +10463,28 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  }
                  flagsp = &negflags;
                  wastedflags = 0;  /* reset so (?g-c) warns twice */
+                x_mod_count = 0;
                  break;
              case ':':
              case ')':
+
+                if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) {
+                    negflags |= RXf_PMf_EXTENDED_MORE;
+                }
                  RExC_flags |= posflags;
+
+                if (negflags & RXf_PMf_EXTENDED) {
+                    negflags |= RXf_PMf_EXTENDED_MORE;
+                }
                  RExC_flags &= ~negflags;
                  set_regex_charset(&RExC_flags, cs);
-                if (RExC_flags & RXf_PMf_FOLD) {
-                    RExC_contains_i = 1;
-                }
  
-                if (UNLIKELY((x_mod_count) > 1)) {
-                    vFAIL("Only one /x regex modifier is allowed");
-                }
                  return;
-                /*NOTREACHED*/
              default:
                fail_modifiers:
                  RExC_parse += SKIP_IF_CHAR(RExC_parse);
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
-                vFAIL2utf8f("Sequence (%"UTF8f"...) not recognized",
+                vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
                        UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                  NOT_REACHED; /*NOTREACHED*/
          }
@@ -10677,7 +10696,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             if ( ! op ) {
                 RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                  vFAIL2utf8f(
-                    "Unknown verb pattern '%"UTF8f"'",
+                    "Unknown verb pattern '%" UTF8f "'",
                      UTF8fARG(UTF, verb_len, start_verb));
             }
              if ( arg_required && !start_arg ) {
@@ -10974,7 +10993,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     }
                     RExC_recurse_count++;
                      DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Recurse #%"UVuf" to %"IVdf"\n",
+                        "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
                                22, "|    |", (int)(depth * 2 + 1), "",
                                (UV)ARG(ret), (IV)ARG2L(ret)));
                  }
@@ -10996,7 +11015,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      RExC_parse += SKIP_IF_CHAR(RExC_parse);
                      /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                      vFAIL2utf8f(
-                        "Sequence (%"UTF8f"...) not recognized",
+                        "Sequence (%" UTF8f "...) not recognized",
                          UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                     NOT_REACHED; /*NOTREACHED*/
                 }
@@ -11011,9 +11030,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
  
                 RExC_seen_zerolen++;
  
-               if (   !pRExC_state->num_code_blocks
-                   || pRExC_state->code_index >= pRExC_state->num_code_blocks
-                   || pRExC_state->code_blocks[pRExC_state->code_index].start
+               if (   !pRExC_state->code_blocks
+                   || pRExC_state->code_index
+                                        >= pRExC_state->code_blocks->count
+                   || pRExC_state->code_blocks->cb[pRExC_state->code_index].start
                         != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
                             - RExC_start)
                 ) {
@@ -11022,7 +11042,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     FAIL("Eval-group not allowed at runtime, use re 'eval'");
                 }
                 /* this is a pre-compiled code block (?{...}) */
-               cb = &pRExC_state->code_blocks[pRExC_state->code_index];
+               cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
                 RExC_parse = RExC_start + cb->end;
                 if (!SIZE_ONLY) {
                     OP *o = cb->block;
@@ -11198,7 +11218,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                              *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                              return NULL;
                          }
-                        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
+                        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                (UV) flags);
                      } else
                          REGTAIL(pRExC_state, br, reganode(pRExC_state,
@@ -11219,7 +11239,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                  *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                                  return NULL;
                              }
-                            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
+                            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                    (UV) flags);
                          }
                          REGTAIL(pRExC_state, ret, lastbr);
@@ -11287,7 +11307,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  if (RExC_open_parens && !RExC_open_parens[parno])
                 {
                      DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting open paren #%"IVdf" to %d\n",
+                        "%*s%*s Setting open paren #%" IVdf " to %d\n",
                          22, "|    |", (int)(depth * 2 + 1), "",
                         (IV)parno, REG_NODE_NUM(ret)));
                      RExC_open_parens[parno]= ret;
@@ -11317,7 +11337,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              *flagp = flags & (RESTART_PASS1|NEED_UTF8);
              return NULL;
          }
-        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
      }
      if (*RExC_parse == '|') {
         if (!SIZE_ONLY && RExC_extralen) {
@@ -11364,7 +11384,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                  return NULL;
              }
-            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
          }
          REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
         lastbr = br;
@@ -11381,7 +11401,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             ender = reganode(pRExC_state, CLOSE, parno);
              if ( RExC_close_parens ) {
                  DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting close paren #%"IVdf" to %d\n",
+                        "%*s%*s Setting close paren #%" IVdf " to %d\n",
                          22, "|    |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender)));
                  RExC_close_parens[parno]= ender;
                 if (RExC_nestroot == parno)
@@ -11418,7 +11438,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              DEBUG_PARSE_MSG("lsbr");
              regprop(RExC_rx, RExC_mysv1, lastbr, NULL, pRExC_state);
              regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
-            Perl_re_printf( aTHX_  "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+            Perl_re_printf( aTHX_  "~ tying lastbr %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n",
                            SvPV_nolen_const(RExC_mysv1),
                            (IV)REG_NODE_NUM(lastbr),
                            SvPV_nolen_const(RExC_mysv2),
@@ -11457,7 +11477,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      DEBUG_PARSE_MSG("NADA");
                      regprop(RExC_rx, RExC_mysv1, ret, NULL, pRExC_state);
                      regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
-                    Perl_re_printf( aTHX_  "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+                    Perl_re_printf( aTHX_  "~ converting ret %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n",
                                    SvPV_nolen_const(RExC_mysv1),
                                    (IV)REG_NODE_NUM(ret),
                                    SvPV_nolen_const(RExC_mysv2),
@@ -11578,7 +11598,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
                  *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                  return NULL;
              }
-            FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regpiece returned NULL, flags=%#" UVxf, (UV) flags);
         }
         else if (ret == NULL)
              ret = latest;
@@ -11607,7 +11627,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
  }
  
  /*
- - regpiece - something followed by possible [*+?]
+ - regpiece - something followed by possible quantifier * + ? {n,m}
   *
   * Note that the branching code sequences used for ? and the general cases
   * of * and + are somewhat optimized:  they use the same NOTHING node as
@@ -11650,7 +11670,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         if (flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8))
             *flagp |= flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8);
          else
-            FAIL2("panic: regatom returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
         return(NULL);
      }
  
@@ -11702,19 +11722,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             nextchar(pRExC_state);
              if (max < min) {    /* If can't match, warn and optimize to fail
                                     unconditionally */
-                if (SIZE_ONLY) {
-
-                    /* We can't back off the size because we have to reserve
-                     * enough space for all the things we are about to throw
-                     * away, but we can shrink it by the amount we are about
-                     * to re-use here */
-                    RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
-                }
-                else {
+                reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+                if (PASS2) {
                      ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
-                    RExC_emit = orig_emit;
+                    NEXT_OFF(orig_emit)= regarglen[OPFAIL] + NODE_STEP_REGNODE;
                  }
-                ret = reganode(pRExC_state, OPFAIL, 0);
                  return ret;
              }
              else if (min == max && *RExC_parse == '?')
@@ -11831,7 +11843,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
         SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
         ckWARN2reg(RExC_parse,
-                  "%"UTF8f" matches null string many times",
+                  "%" UTF8f " matches null string many times",
                    UTF8fARG(UTF, (RExC_parse >= origparse
                                   ? RExC_parse - origparse
                                   : 0),
@@ -12005,13 +12017,16 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      RExC_parse++;      /* Skip past the '{' */
  
-    if (! (endbrace = strchr(RExC_parse, '}'))  /* no trailing brace */
-       || ! (endbrace == RExC_parse            /* nothing between the {} */
+    endbrace = strchr(RExC_parse, '}');
+    if (! endbrace) { /* no trailing brace */
+        vFAIL2("Missing right brace on \\%c{}", 'N');
+    }
+    else if(!(endbrace == RExC_parse           /* nothing between the {} */
                || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
                    && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
                                                         error msg) */
      {
-       if (endbrace) RExC_parse = endbrace;    /* position msg's '<--HERE' */
+       RExC_parse = endbrace;  /* position msg's '<--HERE' */
         vFAIL("\\N{NAME} must be resolved by the lexer");
      }
  
@@ -12157,7 +12172,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          /* The values are Unicode, and therefore not subject to recoding, but
           * have to be converted to native on a non-Unicode (meaning non-ASCII)
           * platform. */
-       RExC_override_recoding = 1;
  #ifdef EBCDIC
          RExC_recode_x_to_native = 1;
  #endif
@@ -12168,7 +12182,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                      *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                      return FALSE;
                  }
-                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"",
+                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
                      (UV) flags);
              }
              *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
@@ -12178,7 +12192,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         RExC_start = RExC_adjusted_start = save_start;
         RExC_parse = endbrace;
         RExC_end = orig_end;
-       RExC_override_recoding = 0;
  #ifdef EBCDIC
          RExC_recode_x_to_native = 0;
  #endif
@@ -12386,6 +12399,52 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
      }
  }
  
+STATIC bool
+S_new_regcurly(const char *s, const char *e)
+{
+    /* This is a temporary function designed to match the most lenient form of
+     * a {m,n} quantifier we ever envision, with either number omitted, and
+     * spaces anywhere between/before/after them.
+     *
+     * If this function fails, then the string it matches is very unlikely to
+     * ever be considered a valid quantifier, so we can allow the '{' that
+     * begins it to be considered as a literal */
+
+    bool has_min = FALSE;
+    bool has_max = FALSE;
+
+    PERL_ARGS_ASSERT_NEW_REGCURLY;
+
+    if (s >= e || *s++ != '{')
+       return FALSE;
+
+    while (s < e && isSPACE(*s)) {
+        s++;
+    }
+    while (s < e && isDIGIT(*s)) {
+        has_min = TRUE;
+        s++;
+    }
+    while (s < e && isSPACE(*s)) {
+        s++;
+    }
+
+    if (*s == ',') {
+       s++;
+        while (s < e && isSPACE(*s)) {
+            s++;
+        }
+        while (s < e && isDIGIT(*s)) {
+            has_max = TRUE;
+            s++;
+        }
+        while (s < e && isSPACE(*s)) {
+            s++;
+        }
+    }
+
+    return s < e && *s == '}' && (has_min || has_max);
+}
  
  /* Parse backref decimal value, unless it's too big to sensibly be a backref,
   * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
@@ -12534,7 +12593,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          if (ret == NULL) {
              if (*flagp & (RESTART_PASS1|NEED_UTF8))
                  return NULL;
-            FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
+            FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                    (UV) *flagp);
          }
         if (*RExC_parse != ']') {
@@ -12561,7 +12620,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                      return NULL;
                  }
-                FAIL2("panic: reg returned NULL to regatom, flags=%#"UVxf"",
+                FAIL2("panic: reg returned NULL to regatom, flags=%#" UVxf,
                                                                   (UV) flags);
         }
         *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
@@ -12724,7 +12783,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        bad_bound_type:
                          RExC_parse = endbrace;
                         vFAIL2utf8f(
-                            "'%"UTF8f"' is an unknown bound type",
+                            "'%" UTF8f "' is an unknown bound type",
                             UTF8fARG(UTF, length, endbrace - length));
                          NOT_REACHED; /*NOTREACHED*/
                  }
@@ -12820,6 +12879,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* FALLTHROUGH */
  
            finish_meta_pat:
+            if (   UCHARAT(RExC_parse + 1) == '{'
+                && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
+            {
+                RExC_parse += 2;
+                vFAIL("Unescaped left brace in regex is illegal here");
+            }
             nextchar(pRExC_state);
              Set_Node_Length(ret, 2); /* MJD */
             break;
@@ -12842,7 +12907,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
               * multi-char folds are allowed.  */
              if (!ret)
-                FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
+                FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                        (UV) *flagp);
  
              RExC_parse--;
@@ -13369,14 +13434,43 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     } /* End of switch on '\' */
                     break;
                 case '{':
-                   /* Currently we don't care if the lbrace is at the start
-                    * of a construct.  This catches it in the middle of a
-                    * literal string, or when it's the first thing after
-                    * something like "\b" */
-                   if (len || (p > RExC_start && isALPHA_A(*(p -1)))) {
-                        RExC_parse = p + 1;
-                       vFAIL("Unescaped left brace in regex is illegal here");
+                    /* Currently we allow an lbrace at the start of a construct
+                     * without raising a warning.  This is because we think we
+                     * will never want such a brace to be meant to be other
+                     * than taken literally. */
+                   if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
+
+                        /* But, we raise a fatal warning otherwise, as the
+                         * deprecation cycle has come and gone.  Except that it
+                         * turns out that some heavily-relied on upstream
+                         * software, notably GNU Autoconf, have failed to fix
+                         * their uses.  For these, don't make it fatal unless
+                         * we anticipate using the '{' for something else.
+                         * This happens after any alpha, and for a looser {m,n}
+                         * quantifier specification */
+                        if (      RExC_strict
+                            || (  p > parse_start + 1
+                                && isALPHA_A(*(p - 1))
+                                && *(p - 2) == '\\')
+                            || new_regcurly(p, RExC_end))
+                        {
+                            RExC_parse = p + 1;
+                            vFAIL("Unescaped left brace in regex is "
+                                  "illegal here");
+                        }
+                        if (PASS2) {
+                            ckWARNregdep(p + 1,
+                                        "Unescaped left brace in regex is "
+                                        "deprecated here (and will be fatal "
+                                        "in Perl 5.30), passed through");
+                        }
                     }
+                   goto normal_default;
+                case '}':
+                case ']':
+                    if (PASS2 && p > RExC_parse && RExC_strict) {
+                        ckWARN2reg(p + 1, "Unescaped literal '%c'", *p);
+                    }
                     /*FALLTHROUGH*/
                 default:    /* A literal character */
                   normal_default:
@@ -13408,10 +13502,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                   * this character again next time through, when it will be the
                   * only thing in its new node */
  
-                if ((next_is_quantifier = (   LIKELY(p < RExC_end)
-                                           && UNLIKELY(ISMULT2(p))))
-                    && LIKELY(len))
-               {
+                next_is_quantifier =    LIKELY(p < RExC_end)
+                                     && UNLIKELY(ISMULT2(p));
+
+                if (next_is_quantifier && LIKELY(len)) {
                      p = oldp;
                      goto loopdone;
                  }
@@ -13796,7 +13890,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                              FALSE /* Don't force to /x */ );
      if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) {
-        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here, passed through");
+        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through");
      }
  
      return(ret);
@@ -14674,7 +14768,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                                                     ? "^"
                                                     : "";
              RExC_parse = (char *) p;
-            vFAIL3utf8f("POSIX class [:%s%"UTF8f":] unknown",
+            vFAIL3utf8f("POSIX class [:%s%" UTF8f ":] unknown",
                          complement_string,
                          UTF8fARG(UTF, RExC_parse - name_start - 2, name_start));
          }
@@ -14817,7 +14911,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                    &posix_warnings
                                   ))
                          FAIL2("panic: regclass returned NULL to handle_sets, "
-                              "flags=%#"UVxf"", (UV) *flagp);
+                              "flags=%#" UVxf, (UV) *flagp);
  
                      /* function call leaves parse pointing to the ']', except
                       * if we faked it */
@@ -14853,7 +14947,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
        no_close:
          /* We output the messages even if warnings are off, because we'll fail
           * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
+        if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
              output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
          }
  
@@ -14968,7 +15062,7 @@ redo_curchar:
                                             stack, fence, fence_stack));
  #endif
  
-        top_index = av_tindex_nomg(stack);
+        top_index = av_tindex_skip_len_mg(stack);
  
          switch (curchar) {
              SV** stacked_ptr;       /* Ptr to something already on 'stack' */
@@ -15086,7 +15180,7 @@ redo_curchar:
                                NULL))
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
-                          "flags=%#"UVxf"", (UV) *flagp);
+                          "flags=%#" UVxf, (UV) *flagp);
                  }
  
                  /* regclass() will return with parsing just the \ sequence,
@@ -15125,7 +15219,7 @@ redo_curchar:
                                  ))
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
-                          "flags=%#"UVxf"", (UV) *flagp);
+                          "flags=%#" UVxf, (UV) *flagp);
                  }
  
                  /* function call leaves parse pointing to the ']', except if we
@@ -15146,7 +15240,7 @@ redo_curchar:
                  goto done;
  
              case ')':
-                if (av_tindex_nomg(fence_stack) < 0) {
+                if (av_tindex_skip_len_mg(fence_stack) < 0) {
                      RExC_parse++;
                      vFAIL("Unexpected ')'");
                  }
@@ -15342,7 +15436,7 @@ redo_curchar:
               * may have altered the stack in the time since we earlier set
               * 'top_index'.  */
  
-            top_index = av_tindex_nomg(stack);
+            top_index = av_tindex_skip_len_mg(stack);
              if (top_index - fence >= 0) {
                  /* If the top entry on the stack is an operator, it had better
                   * be a '!', otherwise the entry below the top operand should
@@ -15393,15 +15487,15 @@ redo_curchar:
      } /* End of loop parsing through the construct */
  
    done:
-    if (av_tindex_nomg(fence_stack) >= 0) {
+    if (av_tindex_skip_len_mg(fence_stack) >= 0) {
          vFAIL("Unmatched (");
      }
  
-    if (av_tindex_nomg(stack) < 0   /* Was empty */
+    if (av_tindex_skip_len_mg(stack) < 0   /* Was empty */
          || ((final = av_pop(stack)) == NULL)
          || ! IS_OPERAND(final)
          || SvTYPE(final) != SVt_INVLIST
-        || av_tindex_nomg(stack) >= 0)  /* More left on stack */
+        || av_tindex_skip_len_mg(stack) >= 0)  /* More left on stack */
      {
        bad_syntax:
          SvREFCNT_dec(final);
@@ -15421,10 +15515,10 @@ redo_curchar:
      result_string = newSVpvs("");
      while (invlist_iternext(final, &start, &end)) {
          if (start == end) {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}", start);
+            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start);
          }
          else {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}-\\x{%"UVXf"}",
+            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%" UVXf "}",
                                                       start,          end);
          }
      }
@@ -15454,7 +15548,7 @@ redo_curchar:
                      NULL
                  );
      if (!node)
-        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf,
+        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#" UVxf,
                      PTR2UV(flagp));
  
      /* Fix up the node type if we are in locale.  (We have pretended we are
@@ -15504,8 +15598,8 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
                               AV * stack, const IV fence, AV * fence_stack)
  {   /* Dumps the stacks in handle_regex_sets() */
  
-    const SSize_t stack_top = av_tindex_nomg(stack);
-    const SSize_t fence_stack_top = av_tindex_nomg(fence_stack);
+    const SSize_t stack_top = av_tindex_skip_len_mg(stack);
+    const SSize_t fence_stack_top = av_tindex_skip_len_mg(fence_stack);
      SSize_t i;
  
      PERL_ARGS_ASSERT_DUMP_REGEX_SETS_STRUCTURES;
@@ -15811,8 +15905,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                         character; used under /i */
      UV n;
      char * stop_ptr = RExC_end;    /* where to stop parsing */
-    const bool skip_white = cBOOL(ret_invlist); /* ignore unescaped white
-                                                   space? */
+
+    /* ignore unescaped whitespace? */
+    const bool skip_white = cBOOL(   ret_invlist
+                                  || (RExC_flags & RXf_PMf_EXTENDED_MORE));
  
      /* Unicode properties are stored in a swash; this holds the current one
       * being parsed.  If this swash is the only above-latin1 component of the
@@ -15957,7 +16053,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      while (1) {
  
          if (   posix_warnings
-            && av_tindex_nomg(posix_warnings) >= 0
+            && av_tindex_skip_len_mg(posix_warnings) >= 0
              && RExC_parse > not_posix_region_end)
          {
              /* Warnings about posix class issues are considered tentative until
@@ -16013,7 +16109,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   * posix class, and it failed, it was a false alarm, as this
                   * successful one proves */
                  if (   posix_warnings
-                    && av_tindex_nomg(posix_warnings) >= 0
+                    && av_tindex_skip_len_mg(posix_warnings) >= 0
                      && not_posix_region_end >= RExC_parse
                      && not_posix_region_end <= posix_class_end)
                  {
@@ -16283,7 +16379,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              RExC_parse = e + 1;
  
                              /* diag_listed_as: Can't find Unicode property definition "%s" */
-                            vFAIL3utf8f("%s \"%"UTF8f"\"",
+                            vFAIL3utf8f("%s \"%" UTF8f "\"",
                                  msg, UTF8fARG(UTF, n, name));
                          }
  
@@ -16302,7 +16398,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  SAVEFREEPV(name);
                              }
                          }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%" UTF8f "%s\n",
                                          (value == 'p' ? '+' : '!'),
                                          (FOLD) ? "__" : "",
                                          UTF8fARG(UTF, n, name),
@@ -16472,13 +16568,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                    : 0;
                      if (strict) {
                          vFAIL2utf8f(
-                            "False [] range \"%"UTF8f"\"",
+                            "False [] range \"%" UTF8f "\"",
                              UTF8fARG(UTF, w, rangebegin));
                      }
                      else {
                          SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
                          ckWARN2reg(RExC_parse,
-                            "False [] range \"%"UTF8f"\"",
+                            "False [] range \"%" UTF8f "\"",
                              UTF8fARG(UTF, w, rangebegin));
                          (void)ReREFCNT_inc(RExC_rx_sv);
                          cp_list = add_cp_to_invlist(cp_list, '-');
@@ -16666,7 +16762,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  #endif
                  w = RExC_parse - rangebegin;
                  vFAIL2utf8f(
-                    "Invalid [] range \"%"UTF8f"\"",
+                    "Invalid [] range \"%" UTF8f "\"",
                      UTF8fARG(UTF, w, rangebegin));
                  NOT_REACHED; /* NOTREACHED */
             }
@@ -16771,7 +16867,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                        foldbuf + foldlen);
                          SV* multi_fold = sv_2mortal(newSVpvs(""));
  
-                        Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
+                        Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%" UVXf "}", value);
  
                          multi_char_matches
                                          = add_multi_match(multi_char_matches,
@@ -16808,15 +16904,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                       * must be be all digits or all letters of the same case.
                       * Otherwise, the range is non-portable and unclear as to
                       * what it contains */
-                    if ((isPRINT_A(prevvalue) || isPRINT_A(value))
-                        && (non_portable_endpoint
-                            || ! ((isDIGIT_A(prevvalue) && isDIGIT_A(value))
-                                   || (isLOWER_A(prevvalue) && isLOWER_A(value))
-                                   || (isUPPER_A(prevvalue) && isUPPER_A(value)))))
-                    {
-                        vWARN(RExC_parse, "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\"");
+                    if (             (isPRINT_A(prevvalue) || isPRINT_A(value))
+                        && (          non_portable_endpoint
+                            || ! (   (isDIGIT_A(prevvalue) && isDIGIT_A(value))
+                                  || (isLOWER_A(prevvalue) && isLOWER_A(value))
+                                  || (isUPPER_A(prevvalue) && isUPPER_A(value))
+                    ))) {
+                        vWARN(RExC_parse, "Ranges of ASCII printables should"
+                                          " be some subset of \"0-9\","
+                                          " \"A-Z\", or \"a-z\"");
                      }
                      else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+                        SSize_t index_start;
+                        SSize_t index_final;
  
                          /* But the nature of Unicode and languages mean we
                           * can't do the same checks for above-ASCII ranges,
@@ -16824,40 +16924,68 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * contain only digits from the same group of 10.  The
                           * ASCII case is handled just above.  0x660 is the
                           * first digit character beyond ASCII.  Hence here, the
-                         * range could be a range of digits.  Find out.  */
-                        IV index_start = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
-                                                         prevvalue);
-                        IV index_final = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
-                                                         value);
-
-                        /* If the range start and final points are in the same
-                         * inversion list element, it means that either both
-                         * are not digits, or both are digits in a consecutive
-                         * sequence of digits.  (So far, Unicode has kept all
-                         * such sequences as distinct groups of 10, but assert
-                         * to make sure).  If the end points are not in the
-                         * same element, neither should be a digit. */
-                        if (index_start == index_final) {
-                            assert(! ELEMENT_RANGE_MATCHES_INVLIST(index_start)
-                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
-                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
-                               == 10)
-                               /* But actually Unicode did have one group of 11
-                                * 'digits' in 5.2, so in case we are operating
-                                * on that version, let that pass */
-                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
-                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
-                                == 11
-                               && invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
-                                == 0x19D0)
-                            );
+                         * range could be a range of digits.  First some
+                         * unlikely special cases.  Grandfather in that a range
+                         * ending in 19DA (NEW TAI LUE THAM DIGIT ONE) is bad
+                         * if its starting value is one of the 10 digits prior
+                         * to it.  This is because it is an alternate way of
+                         * writing 19D1, and some people may expect it to be in
+                         * that group.  But it is bad, because it won't give
+                         * the expected results.  In Unicode 5.2 it was
+                         * considered to be in that group (of 11, hence), but
+                         * this was fixed in the next version */
+
+                        if (UNLIKELY(value == 0x19DA && prevvalue >= 0x19D0)) {
+                            goto warn_bad_digit_range;
                          }
-                        else if ((index_start >= 0
-                                  && ELEMENT_RANGE_MATCHES_INVLIST(index_start))
-                                 || (index_final >= 0
-                                     && ELEMENT_RANGE_MATCHES_INVLIST(index_final)))
+                        else if (UNLIKELY(   prevvalue >= 0x1D7CE
+                                          &&     value <= 0x1D7FF))
                          {
-                            vWARN(RExC_parse, "Ranges of digits should be from the same group of 10");
+                            /* This is the only other case currently in Unicode
+                             * where the algorithm below fails.  The code
+                             * points just above are the end points of a single
+                             * range containing only decimal digits.  It is 5
+                             * different series of 0-9.  All other ranges of
+                             * digits currently in Unicode are just a single
+                             * series.  (And mktables will notify us if a later
+                             * Unicode version breaks this.)
+                             *
+                             * If the range being checked is at most 9 long,
+                             * and the digit values represented are in
+                             * numerical order, they are from the same series.
+                             * */
+                            if (         value - prevvalue > 9
+                                ||    (((    value - 0x1D7CE) % 10)
+                                     <= (prevvalue - 0x1D7CE) % 10))
+                            {
+                                goto warn_bad_digit_range;
+                            }
+                        }
+                        else {
+
+                            /* For all other ranges of digits in Unicode, the
+                             * algorithm is just to check if both end points
+                             * are in the same series, which is the same range.
+                             * */
+                            index_start = _invlist_search(
+                                                    PL_XPosix_ptrs[_CC_DIGIT],
+                                                    prevvalue);
+
+                            /* Warn if the range starts and ends with a digit,
+                             * and they are not in the same group of 10. */
+                            if (   index_start >= 0
+                                && ELEMENT_RANGE_MATCHES_INVLIST(index_start)
+                                && (index_final =
+                                    _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
+                                                    value)) != index_start
+                                && index_final >= 0
+                                && ELEMENT_RANGE_MATCHES_INVLIST(index_final))
+                            {
+                              warn_bad_digit_range:
+                                vWARN(RExC_parse, "Ranges of digits should be"
+                                                  " from the same group of"
+                                                  " 10");
+                            }
                          }
                      }
                  }
@@ -16938,7 +17066,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      } /* End of loop through all the text within the brackets */
  
  
-    if (   posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
+    if (   posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
          output_or_return_posix_warnings(pRExC_state, posix_warnings,
                                          return_posix_warnings);
      }
@@ -16971,7 +17099,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  #endif
  
          /* Look at the longest folds first */
-        for (cp_count = av_tindex_nomg(multi_char_matches);
+        for (cp_count = av_tindex_skip_len_mg(multi_char_matches);
                          cp_count > 0;
                          cp_count--)
          {
@@ -17027,7 +17155,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          RExC_adjusted_start = RExC_start + prefix_end;
         RExC_end = RExC_parse + len;
          RExC_in_multi_char_class = 1;
-       RExC_override_recoding = 1;
          RExC_emit = (regnode *)orig_emit;
  
         ret = reg(pRExC_state, 1, &reg_flags, depth+1);
@@ -17040,7 +17167,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          RExC_precomp_adj = 0;
         RExC_end = save_end;
         RExC_in_multi_char_class = 0;
-       RExC_override_recoding = 0;
          SvREFCNT_dec_NN(multi_char_matches);
          return ret;
      }
@@ -17355,7 +17481,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      {
                          AV* list = (AV*) *listp;
                          IV k;
-                        for (k = 0; k <= av_tindex_nomg(list); k++) {
+                        for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
                              SV** c_p = av_fetch(list, k, FALSE);
                              UV c;
                              assert(c_p);
@@ -18046,7 +18172,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
             si = *ary;  /* ary[0] = the string to initialize the swash with */
  
-            if (av_tindex_nomg(av) >= 2) {
+            if (av_tindex_skip_len_mg(av) >= 2) {
                  if (only_utf8_locale_ptr
                      && ary[2]
                      && ary[2] != &PL_sv_undef)
@@ -18062,7 +18188,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                   * is any inversion list generated at compile time; [4]
                   * indicates if that inversion list has any user-defined
                   * properties in it. */
-                if (av_tindex_nomg(av) >= 3) {
+                if (av_tindex_skip_len_mg(av) >= 3) {
                      invlist = ary[3];
                      if (SvUV(ary[4])) {
                          swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
@@ -18375,7 +18501,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
  #else
      if (RExC_offsets) {         /* MJD */
         MJD_OFFSET_DEBUG(
-              ("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n",
+              ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
                name, __LINE__,
                PL_reg_name[op],
                (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0]
@@ -18452,9 +18578,17 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
  - reginsert - insert an operator in front of already-emitted operand
  *
  * Means relocating the operand.
+*
+* IMPORTANT NOTE - it is the *callers* responsibility to correctly
+* set up NEXT_OFF() of the inserted node if needed. Something like this:
+*
+* reginsert(pRExC, OPFAIL, orig_emit, depth+1);
+* if (PASS2)
+*     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
+*
  */
  STATIC void
-S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
+S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
  {
      regnode *src;
      regnode *dst;
@@ -18480,7 +18614,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
      dst = RExC_emit;
      if (RExC_open_parens) {
          int paren;
-        /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/
+        /*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/
          /* remember that RExC_npar is rex->nparens + 1,
           * iow it is 1 more than the number of parens seen in
           * the pattern so far. */
@@ -18488,13 +18622,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
              /* note, RExC_open_parens[0] is the start of the
               * regex, it can't move. RExC_close_parens[0] is the end
               * of the regex, it *can* move. */
-            if ( paren && RExC_open_parens[paren] >= opnd ) {
+            if ( paren && RExC_open_parens[paren] >= operand ) {
                  /*DEBUG_PARSE_FMT("open"," - %d",size);*/
                  RExC_open_parens[paren] += size;
              } else {
                  /*DEBUG_PARSE_FMT("open"," - %s","ok");*/
              }
-            if ( RExC_close_parens[paren] >= opnd ) {
+            if ( RExC_close_parens[paren] >= operand ) {
                  /*DEBUG_PARSE_FMT("close"," - %d",size);*/
                  RExC_close_parens[paren] += size;
              } else {
@@ -18505,12 +18639,12 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
      if (RExC_end_op)
          RExC_end_op += size;
  
-    while (src > opnd) {
+    while (src > operand) {
         StructCopy(--src, --dst, regnode);
  #ifdef RE_TRACK_PATTERN_OFFSETS
          if (RExC_offsets) {     /* MJD 20010112 */
             MJD_OFFSET_DEBUG(
-                 ("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n",
+                 ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n",
                    "reg_insert",
                   __LINE__,
                   PL_reg_name[op],
@@ -18526,11 +18660,11 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
      }
  
  
-    place = opnd;              /* Op node, where operand used to be. */
+    place = operand;           /* Op node, where operand used to be. */
  #ifdef RE_TRACK_PATTERN_OFFSETS
      if (RExC_offsets) {         /* MJD */
         MJD_OFFSET_DEBUG(
-              ("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
+              ("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n",
                "reginsert",
               __LINE__,
               PL_reg_name[op],
@@ -18679,7 +18813,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
          DEBUG_PARSE_MSG("");
          regprop(RExC_rx, RExC_mysv, val, NULL, pRExC_state);
          Perl_re_printf( aTHX_
-                      "~ attach to %s (%"IVdf") offset to %"IVdf"\n",
+                      "~ attach to %s (%" IVdf ") offset to %" IVdf "\n",
                       SvPV_nolen_const(RExC_mysv),
                       (IV)REG_NODE_NUM(val),
                       (IV)(val - scan)
@@ -18792,14 +18926,14 @@ Perl_regdump(pTHX_ const regexp *r)
         RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr),
             RE_SV_DUMPLEN(r->anchored_substr), 30);
          Perl_re_printf( aTHX_
-                     "anchored %s%s at %"IVdf" ",
+                     "anchored %s%s at %" IVdf " ",
                       s, RE_SV_TAIL(r->anchored_substr),
                       (IV)r->anchored_offset);
      } else if (r->anchored_utf8) {
         RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8),
             RE_SV_DUMPLEN(r->anchored_utf8), 30);
          Perl_re_printf( aTHX_
-                     "anchored utf8 %s%s at %"IVdf" ",
+                     "anchored utf8 %s%s at %" IVdf " ",
                       s, RE_SV_TAIL(r->anchored_utf8),
                       (IV)r->anchored_offset);
      }
@@ -18807,14 +18941,14 @@ Perl_regdump(pTHX_ const regexp *r)
         RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr),
             RE_SV_DUMPLEN(r->float_substr), 30);
          Perl_re_printf( aTHX_
-                     "floating %s%s at %"IVdf"..%"UVuf" ",
+                     "floating %s%s at %" IVdf "..%" UVuf " ",
                       s, RE_SV_TAIL(r->float_substr),
                       (IV)r->float_min_offset, (UV)r->float_max_offset);
      } else if (r->float_utf8) {
         RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8),
             RE_SV_DUMPLEN(r->float_utf8), 30);
          Perl_re_printf( aTHX_
-                     "floating utf8 %s%s at %"IVdf"..%"UVuf" ",
+                     "floating utf8 %s%s at %" IVdf "..%" UVuf " ",
                       s, RE_SV_TAIL(r->float_utf8),
                       (IV)r->float_min_offset, (UV)r->float_max_offset);
      }
@@ -18846,12 +18980,12 @@ Perl_regdump(pTHX_ const regexp *r)
          Perl_re_printf( aTHX_ " ");
      }
      if (r->intflags & PREGf_GPOS_SEEN)
-        Perl_re_printf( aTHX_  "GPOS:%"UVuf" ", (UV)r->gofs);
+        Perl_re_printf( aTHX_  "GPOS:%" UVuf " ", (UV)r->gofs);
      if (r->intflags & PREGf_SKIP)
          Perl_re_printf( aTHX_  "plus ");
      if (r->intflags & PREGf_IMPLICIT)
          Perl_re_printf( aTHX_  "implicit ");
-    Perl_re_printf( aTHX_  "minlen %"IVdf" ", (IV)r->minlen);
+    Perl_re_printf( aTHX_  "minlen %" IVdf " ", (IV)r->minlen);
      if (r->extflags & RXf_EVAL_SEEN)
          Perl_re_printf( aTHX_  "with eval ");
      Perl_re_printf( aTHX_  "\n");
@@ -18967,7 +19101,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
            if (trie->jump)
              sv_catpvs(sv, "(JUMP)");
            Perl_sv_catpvf(aTHX_ sv,
-            "<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
+            "<S:%" UVuf "/%" IVdf " W:%" UVuf " L:%" UVuf "/%" UVuf " C:%" UVuf "/%" UVuf ">",
              (UV)trie->startstate,
              (IV)trie->statecount-1, /* -1 because of the unused 0 element */
              (UV)trie->wordcount,
@@ -19008,7 +19142,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      {
          AV *name_list= NULL;
          U32 parno= OP(o) == ACCEPT ? (U32)ARG2L(o) : ARG(o);
-        Perl_sv_catpvf(aTHX_ sv, "%"UVuf, (UV)parno);        /* Parenth number */
+        Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno);        /* Parenth number */
         if ( RXp_PAREN_NAMES(prog) ) {
              name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
          } else if ( pRExC_state ) {
@@ -19018,7 +19152,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              if ( k != REF || (OP(o) < NREF)) {
                  SV **name= av_fetch(name_list, parno, 0 );
                 if (name)
-                   Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                   Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
              }
              else {
                  SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
@@ -19027,10 +19161,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                  I32 n;
                  if (name) {
                      for ( n=0; n<SvIVX(sv_dat); n++ ) {
-                        Perl_sv_catpvf(aTHX_ sv, "%s%"IVdf,
+                        Perl_sv_catpvf(aTHX_ sv, "%s%" IVdf,
                                     (n ? "," : ""), (IV)nums[n]);
                      }
-                    Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
                  }
              }
          }
@@ -19062,7 +19196,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          if (name_list) {
              SV **name= av_fetch(name_list, ARG(o), 0 );
              if (name)
-                Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
          }
      }
      else if (k == LOGICAL)
@@ -19255,7 +19389,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
      /* add on the verb argument if there is one */
      if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) {
-        Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
+        Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
                         SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
      }
  #else
@@ -19474,12 +19608,8 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
      if (ri->u.offsets)
          Safefree(ri->u.offsets);             /* 20010421 MJD */
  #endif
-    if (ri->code_blocks) {
-       int n;
-       for (n = 0; n < ri->num_code_blocks; n++)
-           SvREFCNT_dec(ri->code_blocks[n].src_regex);
-       Safefree(ri->code_blocks);
-    }
+    if (ri->code_blocks)
+        S_free_codeblocks(aTHX_ ri->code_blocks);
  
      if (ri->data) {
         int n = ri->data->count;
@@ -19696,16 +19826,18 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
      Copy(ri->program, reti->program, len+1, regnode);
  
  
-    reti->num_code_blocks = ri->num_code_blocks;
      if (ri->code_blocks) {
         int n;
-       Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block,
-               struct reg_code_block);
-       Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks,
-               struct reg_code_block);
-       for (n = 0; n < ri->num_code_blocks; n++)
-            reti->code_blocks[n].src_regex = (REGEXP*)
-                   sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param);
+       Newx(reti->code_blocks, 1, struct reg_code_blocks);
+       Newx(reti->code_blocks->cb, ri->code_blocks->count,
+                    struct reg_code_block);
+       Copy(ri->code_blocks->cb, reti->code_blocks->cb,
+             ri->code_blocks->count, struct reg_code_block);
+       for (n = 0; n < ri->code_blocks->count; n++)
+            reti->code_blocks->cb[n].src_regex = (REGEXP*)
+                   sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param);
+        reti->code_blocks->count = ri->code_blocks->count;
+        reti->code_blocks->refcnt = 1;
      }
      else
         reti->code_blocks = NULL;
@@ -19836,7 +19968,7 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
         l1 = 512;
      Copy(message, buf, l1 , char);
      /* l1-1 to avoid \n */
-    Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
+    Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, l1-1, buf));
  }
  
  /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
@@ -19888,7 +20020,7 @@ S_put_code_point(pTHX_ SV *sv, UV c)
      PERL_ARGS_ASSERT_PUT_CODE_POINT;
  
      if (c > 255) {
-        Perl_sv_catpvf(aTHX_ sv, "\\x{%04"UVXf"}", c);
+        Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c);
      }
      else if (isPRINT(c)) {
         const char string = (char) c;
@@ -20072,10 +20204,10 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                      : NUM_ANYOF_CODE_POINTS - 1;
  #if NUM_ANYOF_CODE_POINTS > 256
          format = (this_end < 256)
-                 ? "\\x%02"UVXf"-\\x%02"UVXf""
-                 : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
+                 ? "\\x%02" UVXf "-\\x%02" UVXf
+                 : "\\x{%04" UVXf "}-\\x{%04" UVXf "}";
  #else
-        format = "\\x%02"UVXf"-\\x%02"UVXf"";
+        format = "\\x%02" UVXf "-\\x%02" UVXf;
  #endif
          GCC_DIAG_IGNORE(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
@@ -20477,7 +20609,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  #define CLEAR_OPTSTART                                                       \
      if (optstart) STMT_START {                                               \
          DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_                                           \
-                              " (%"IVdf" nodes)\n", (IV)(node - optstart))); \
+                              " (%" IVdf " nodes)\n", (IV)(node - optstart))); \
          optstart=NULL;                                                       \
      } STMT_END
  
@@ -20526,7 +20658,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             CLEAR_OPTSTART;
  
          regprop(r, sv, node, NULL, NULL);
-        Perl_re_printf( aTHX_  "%4"IVdf":%*s%s", (IV)(node - start),
+        Perl_re_printf( aTHX_  "%4" IVdf ":%*s%s", (IV)(node - start),
                       (int)(2*indent + 1), "", SvPVX_const(sv));
  
          if (OP(node) != OPTIMIZED) {
@@ -20536,7 +20668,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                       && PL_regkind[OP(next)] != BRANCH )
                  Perl_re_printf( aTHX_  " (FAIL)");
              else
-                Perl_re_printf( aTHX_  " (%"IVdf")", (IV)(next - start));
+                Perl_re_printf( aTHX_  " (%" IVdf ")", (IV)(next - start));
              Perl_re_printf( aTHX_ "\n");
          }
  
@@ -20591,7 +20723,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                  );
                  if (trie->jump) {
                      U16 dist= trie->jump[word_idx+1];
-                    Perl_re_printf( aTHX_  "(%"UVuf")\n",
+                    Perl_re_printf( aTHX_  "(%" UVuf ")\n",
                                 (UV)((dist ? this_trie + dist : next) - start));
                      if (dist) {
                          if (!nextbranch)