This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #131730] Fix exec PROGRAM LIST with empty LIST on VMS
[perl5.git] / regcomp.c
index a0782d3..cc0ff96 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -119,8 +119,7 @@ typedef struct scan_frame {
 
 /* Certain characters are output as a sequence with the first being a
  * backslash. */
-#define isBACKSLASHED_PUNCT(c)                                              \
-                    ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
+#define isBACKSLASHED_PUNCT(c)  strchr("-[]\\^", c)
 
 
 struct RExC_state_t {
@@ -177,15 +176,13 @@ struct RExC_state_t {
     U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
     I32                in_lookbehind;
     I32                contains_locale;
-    I32                contains_i;
     I32                override_recoding;
 #ifdef EBCDIC
     I32                recode_x_to_native;
 #endif
     I32                in_multi_char_class;
-    struct reg_code_block *code_blocks;        /* positions of literal (?{})
+    struct reg_code_blocks *code_blocks;/* positions of literal (?{})
                                            within pattern */
-    int                num_code_blocks;        /* size of code_blocks[] */
     int                code_index;             /* next code_blocks[] slot */
     SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
     scan_frame *frame_head;
@@ -272,8 +269,6 @@ struct RExC_state_t {
                                    (pRExC_state->study_chunk_recursed_bytes)
 #define RExC_in_lookbehind     (pRExC_state->in_lookbehind)
 #define RExC_contains_locale   (pRExC_state->contains_locale)
-#define RExC_contains_i (pRExC_state->contains_i)
-#define RExC_override_recoding (pRExC_state->override_recoding)
 #ifdef EBCDIC
 #   define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
 #endif
@@ -397,7 +392,7 @@ struct RExC_state_t {
 
   For each string some basic information is maintained:
 
-  - offset or min_offset
+  - min_offset
     This is the position the string must appear at, or not before.
     It also implicitly (when combined with minlenp) tells us how many
     characters must match before the string we are searching for.
@@ -409,6 +404,7 @@ struct RExC_state_t {
     Only used for floating strings. This is the rightmost point that
     the string can appear at. If set to SSize_t_MAX it indicates that the
     string can occur infinitely far to the right.
+    For fixed strings, it is equal to min_offset.
 
   - minlenp
     A pointer to the minimum number of characters of the pattern that the
@@ -448,6 +444,15 @@ struct RExC_state_t {
 
 */
 
+struct scan_data_substrs {
+    SV      *str;       /* longest substring found in pattern */
+    SSize_t min_offset; /* earliest point in string it can appear */
+    SSize_t max_offset; /* latest point in string it can appear */
+    SSize_t *minlenp;   /* pointer to the minlen relevant to the string */
+    SSize_t lookbehind; /* is the pos of the string modified by LB */
+    I32 flags;          /* per substring SF_* and SCF_* flags */
+};
+
 typedef struct scan_data_t {
     /*I32 len_min;      unused */
     /*I32 len_delta;    unused */
@@ -457,17 +462,14 @@ typedef struct scan_data_t {
     SSize_t last_end;      /* min value, <0 unless valid. */
     SSize_t last_start_min;
     SSize_t last_start_max;
-    SV **longest;          /* Either &l_fixed, or &l_float. */
-    SV *longest_fixed;      /* longest fixed string found in pattern */
-    SSize_t offset_fixed;   /* offset where it starts */
-    SSize_t *minlen_fixed;  /* pointer to the minlen relevant to the string */
-    I32 lookbehind_fixed;   /* is the position of the string modfied by LB */
-    SV *longest_float;      /* longest floating string found in pattern */
-    SSize_t offset_float_min; /* earliest point in string it can appear */
-    SSize_t offset_float_max; /* latest point in string it can appear */
-    SSize_t *minlen_float;  /* pointer to the minlen relevant to the string */
-    SSize_t lookbehind_float; /* is the pos of the string modified by LB */
-    I32 flags;
+    U8      cur_is_floating; /* whether the last_* values should be set as
+                              * the next fixed (0) or floating (1)
+                              * substring */
+
+    /* [0] is longest fixed substring so far, [1] is longest float so far */
+    struct scan_data_substrs  substrs[2];
+
+    I32 flags;             /* common SF_* and SCF_* flags */
     I32 whilem_c;
     SSize_t *last_closep;
     regnode_ssc *start_class;
@@ -477,23 +479,21 @@ typedef struct scan_data_t {
  * Forward declarations for pregcomp()'s friends.
  */
 
-static const scan_data_t zero_scan_data =
-  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0};
+static const scan_data_t zero_scan_data = {
+    0, 0, NULL, 0, 0, 0, 0,
+    {
+        { NULL, 0, 0, 0, 0, 0 },
+        { NULL, 0, 0, 0, 0, 0 },
+    },
+    0, 0, NULL, NULL
+};
+
+/* study flags */
 
-#define SF_BEFORE_EOL          (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
 #define SF_BEFORE_SEOL         0x0001
 #define SF_BEFORE_MEOL         0x0002
-#define SF_FIX_BEFORE_EOL      (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
-#define SF_FL_BEFORE_EOL       (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
-
-#define SF_FIX_SHIFT_EOL       (+2)
-#define SF_FL_SHIFT_EOL                (+4)
-
-#define SF_FIX_BEFORE_SEOL     (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
-#define SF_FIX_BEFORE_MEOL     (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
+#define SF_BEFORE_EOL          (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
 
-#define SF_FL_BEFORE_SEOL      (SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
-#define SF_FL_BEFORE_MEOL      (SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
 #define SF_IS_INF              0x0040
 #define SF_HAS_PAR             0x0080
 #define SF_IN_PAR              0x0100
@@ -556,7 +556,6 @@ static const scan_data_t zero_scan_data =
 #define OOB_UNICODE            0xDEADBEEF
 
 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
 
 
 /* length of regex to show in messages that don't mark a position within */
@@ -571,7 +570,7 @@ static const scan_data_t zero_scan_data =
 #define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
 
 #define REPORT_LOCATION " in regex; marked by " MARKER1    \
-                        " in m/%"UTF8f MARKER2 "%"UTF8f"/"
+                        " in m/%" UTF8f MARKER2 "%" UTF8f "/"
 
 /* The code in this file in places uses one level of recursion with parsing
  * rebased to an alternate string constructed by us in memory.  This can take
@@ -659,11 +658,11 @@ static const scan_data_t zero_scan_data =
 } STMT_END
 
 #define        FAIL(msg) _FAIL(                            \
-    Perl_croak(aTHX_ "%s in regex m/%"UTF8f"%s/",          \
+    Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/",        \
            msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
 
 #define        FAIL2(msg,arg) _FAIL(                       \
-    Perl_croak(aTHX_ msg " in regex m/%"UTF8f"%s/",        \
+    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",      \
            arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
 
 /*
@@ -976,64 +975,123 @@ Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
 #define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
   if ((flags) & flag) Perl_re_printf( aTHX_  "%s ", #flag)
 
-#define DEBUG_SHOW_STUDY_FLAGS(flags,open_str,close_str)                    \
-    if ( ( flags ) ) {                                                      \
-        Perl_re_printf( aTHX_  "%s", open_str);                                         \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_SEOL);                     \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_MEOL);                     \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_IS_INF);                             \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_HAS_PAR);                            \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_IN_PAR);                             \
-        DEBUG_SHOW_STUDY_FLAG(flags,SF_HAS_EVAL);                           \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_SUBSTR);                         \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS_AND);                    \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS_OR);                     \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS);                        \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_WHILEM_VISITED_POS);                \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_RESTUDY);                      \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_SEEN_ACCEPT);                       \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_DOING_RESTUDY);                \
-        DEBUG_SHOW_STUDY_FLAG(flags,SCF_IN_DEFINE);                         \
-        Perl_re_printf( aTHX_  "%s", close_str);                                        \
-    }
-
-
-#define DEBUG_STUDYDATA(str,data,depth)                              \
-DEBUG_OPTIMISE_MORE_r(if(data){                                      \
-    Perl_re_indentf( aTHX_  "" str "Pos:%"IVdf"/%"IVdf                           \
-        " Flags: 0x%"UVXf,                                           \
-        depth,                                                       \
-        (IV)((data)->pos_min),                                       \
-        (IV)((data)->pos_delta),                                     \
-        (UV)((data)->flags)                                          \
-    );                                                               \
-    DEBUG_SHOW_STUDY_FLAGS((data)->flags," [ ","]");                 \
-    Perl_re_printf( aTHX_                                                        \
-        " Whilem_c: %"IVdf" Lcp: %"IVdf" %s",                        \
-        (IV)((data)->whilem_c),                                      \
-        (IV)((data)->last_closep ? *((data)->last_closep) : -1),     \
-        is_inf ? "INF " : ""                                         \
-    );                                                               \
-    if ((data)->last_found)                                          \
-        Perl_re_printf( aTHX_                                                    \
-            "Last:'%s' %"IVdf":%"IVdf"/%"IVdf" %sFixed:'%s' @ %"IVdf \
-            " %sFloat: '%s' @ %"IVdf"/%"IVdf"",                      \
-            SvPVX_const((data)->last_found),                         \
-            (IV)((data)->last_end),                                  \
-            (IV)((data)->last_start_min),                            \
-            (IV)((data)->last_start_max),                            \
-            ((data)->longest &&                                      \
-             (data)->longest==&((data)->longest_fixed)) ? "*" : "",  \
-            SvPVX_const((data)->longest_fixed),                      \
-            (IV)((data)->offset_fixed),                              \
-            ((data)->longest &&                                      \
-             (data)->longest==&((data)->longest_float)) ? "*" : "",  \
-            SvPVX_const((data)->longest_float),                      \
-            (IV)((data)->offset_float_min),                          \
-            (IV)((data)->offset_float_max)                           \
-        );                                                           \
-    Perl_re_printf( aTHX_ "\n");                                                 \
-});
+
+#ifdef DEBUGGING
+static void
+S_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
+                                    const char *close_str)
+{
+    if (!flags)
+        return;
+
+    Perl_re_printf( aTHX_  "%s", open_str);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_SEOL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_BEFORE_MEOL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_IS_INF);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_PAR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_IN_PAR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SF_HAS_EVAL);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_SUBSTR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_AND);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS_OR);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_DO_STCLASS);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_WHILEM_VISITED_POS);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_RESTUDY);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_SEEN_ACCEPT);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_TRIE_DOING_RESTUDY);
+    DEBUG_SHOW_STUDY_FLAG(flags, SCF_IN_DEFINE);
+    Perl_re_printf( aTHX_  "%s", close_str);
+}
+
+
+static void
+S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
+                    U32 depth, int is_inf)
+{
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    DEBUG_OPTIMISE_MORE_r({
+        if (!data)
+            return;
+        Perl_re_indentf(aTHX_  "%s: Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
+            depth,
+            where,
+            (IV)data->pos_min,
+            (IV)data->pos_delta,
+            (UV)data->flags
+        );
+
+        S_debug_show_study_flags(aTHX_ data->flags," [","]");
+
+        Perl_re_printf( aTHX_
+            " Whilem_c: %" IVdf " Lcp: %" IVdf " %s",
+            (IV)data->whilem_c,
+            (IV)(data->last_closep ? *((data)->last_closep) : -1),
+            is_inf ? "INF " : ""
+        );
+
+        if (data->last_found) {
+            int i;
+            Perl_re_printf(aTHX_
+                "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
+                    SvPVX_const(data->last_found),
+                    (IV)data->last_end,
+                    (IV)data->last_start_min,
+                    (IV)data->last_start_max
+            );
+
+            for (i = 0; i < 2; i++) {
+                Perl_re_printf(aTHX_
+                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
+                    data->cur_is_floating == i ? "*" : "",
+                    i ? "Float" : "Fixed",
+                    SvPVX_const(data->substrs[i].str),
+                    (IV)data->substrs[i].min_offset,
+                    (IV)data->substrs[i].max_offset
+                );
+                S_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
+            }
+        }
+
+        Perl_re_printf( aTHX_ "\n");
+    });
+}
+
+
+static void
+S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
+                regnode *scan, U32 depth, U32 flags)
+{
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    DEBUG_OPTIMISE_r({
+        regnode *Next;
+
+        if (!scan)
+            return;
+        Next = regnext(scan);
+        regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
+        Perl_re_indentf( aTHX_   "%s>%3d: %s (%d)",
+            depth,
+            str,
+            REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),
+            Next ? (REG_NODE_NUM(Next)) : 0 );
+        S_debug_show_study_flags(aTHX_ flags," [ ","]");
+        Perl_re_printf( aTHX_  "\n");
+   });
+}
+
+
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf) \
+                    S_debug_studydata(aTHX_ where, data, depth, is_inf)
+
+#  define DEBUG_PEEP(str, scan, depth, flags)   \
+                    S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
+
+#else
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf) NOOP
+#  define DEBUG_PEEP(str, scan, depth, flags)         NOOP
+#endif
 
 
 /* =========================================================
@@ -1205,7 +1263,7 @@ S_cntrl_to_mnemonic(const U8 c)
 }
 
 /* Mark that we cannot extend a found fixed substring at this point.
-   Update the longest found anchored substring and the longest found
+   Update the longest found anchored substring or the longest found
    floating substrings if needed. */
 
 STATIC void
@@ -1213,42 +1271,38 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
                     SSize_t *minlenp, int is_inf)
 {
     const STRLEN l = CHR_SVLEN(data->last_found);
-    const STRLEN old_l = CHR_SVLEN(*data->longest);
+    SV * const longest_sv = data->substrs[data->cur_is_floating].str;
+    const STRLEN old_l = CHR_SVLEN(longest_sv);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_SCAN_COMMIT;
 
     if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
-       SvSetMagicSV(*data->longest, data->last_found);
-       if (*data->longest == data->longest_fixed) {
-           data->offset_fixed = l ? data->last_start_min : data->pos_min;
-           if (data->flags & SF_BEFORE_EOL)
-               data->flags
-                   |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
-           else
-               data->flags &= ~SF_FIX_BEFORE_EOL;
-           data->minlen_fixed=minlenp;
-           data->lookbehind_fixed=0;
-       }
-       else { /* *data->longest == data->longest_float */
-           data->offset_float_min = l ? data->last_start_min : data->pos_min;
-           data->offset_float_max = (l
+        const U8 i = data->cur_is_floating;
+       SvSetMagicSV(longest_sv, data->last_found);
+        data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
+
+       if (!i) /* fixed */
+           data->substrs[0].max_offset = data->substrs[0].min_offset;
+       else { /* float */
+           data->substrs[1].max_offset = (l
                           ? data->last_start_max
                           : (data->pos_delta > SSize_t_MAX - data->pos_min
                                         ? SSize_t_MAX
                                         : data->pos_min + data->pos_delta));
            if (is_inf
-                || (STRLEN)data->offset_float_max > (STRLEN)SSize_t_MAX)
-               data->offset_float_max = SSize_t_MAX;
-           if (data->flags & SF_BEFORE_EOL)
-               data->flags
-                   |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
-           else
-               data->flags &= ~SF_FL_BEFORE_EOL;
-            data->minlen_float=minlenp;
-            data->lookbehind_float=0;
-       }
+                || (STRLEN)data->substrs[1].max_offset > (STRLEN)SSize_t_MAX)
+               data->substrs[1].max_offset = SSize_t_MAX;
+        }
+
+        if (data->flags & SF_BEFORE_EOL)
+            data->substrs[i].flags |= (data->flags & SF_BEFORE_EOL);
+        else
+            data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].minlenp = minlenp;
+        data->substrs[i].lookbehind = 0;
     }
+
     SvCUR_set(data->last_found, 0);
     {
        SV * const sv = data->last_found;
@@ -1260,7 +1314,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
     }
     data->last_end = -1;
     data->flags &= ~SF_BEFORE_EOL;
-    DEBUG_STUDYDATA("commit: ",data,0);
+    DEBUG_STUDYDATA("commit", data, 0, is_inf);
 }
 
 /* An SSC is just a regnode_charclass_posix with an extra field: the inversion
@@ -2012,7 +2066,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
     for( state = 1 ; state < trie->statecount ; state++ ) {
        const U32 base = trie->states[ state ].trans.base;
 
-        Perl_re_indentf( aTHX_  "#%4"UVXf"|", depth+1, (UV)state);
+        Perl_re_indentf( aTHX_  "#%4" UVXf "|", depth+1, (UV)state);
 
         if ( trie->states[ state ].wordnum ) {
             Perl_re_printf( aTHX_  " W%4X", trie->states[ state ].wordnum );
@@ -2020,7 +2074,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
             Perl_re_printf( aTHX_  "%6s", "" );
         }
 
-        Perl_re_printf( aTHX_  " @%4"UVXf" ", (UV)base );
+        Perl_re_printf( aTHX_  " @%4" UVXf " ", (UV)base );
 
         if ( base ) {
             U32 ofs = 0;
@@ -2031,7 +2085,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
                                                                     != state))
                     ofs++;
 
-            Perl_re_printf( aTHX_  "+%2"UVXf"[ ", (UV)ofs);
+            Perl_re_printf( aTHX_  "+%2" UVXf "[ ", (UV)ofs);
 
             for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
                 if ( ( base + ofs >= trie->uniquecharcount )
@@ -2040,7 +2094,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
                         && trie->trans[ base + ofs
                                     - trie->uniquecharcount ].check == state )
                 {
-                   Perl_re_printf( aTHX_  "%*"UVXf, colwidth,
+                   Perl_re_printf( aTHX_  "%*" UVXf, colwidth,
                     (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next
                    );
                 } else {
@@ -2089,7 +2143,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
     for( state=1 ; state < next_alloc ; state ++ ) {
         U16 charid;
 
-        Perl_re_indentf( aTHX_  " %4"UVXf" :",
+        Perl_re_indentf( aTHX_  " %4" UVXf " :",
             depth+1, (UV)state  );
         if ( ! trie->states[ state ].wordnum ) {
             Perl_re_printf( aTHX_  "%5s| ","");
@@ -2102,7 +2156,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
            SV ** const tmp = av_fetch( revcharmap,
                                         TRIE_LIST_ITEM(state,charid).forid, 0);
            if ( tmp ) {
-                Perl_re_printf( aTHX_  "%*s:%3X=%4"UVXf" | ",
+                Perl_re_printf( aTHX_  "%*s:%3X=%4" UVXf " | ",
                     colwidth,
                     pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp),
                               colwidth,
@@ -2173,22 +2227,22 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
 
     for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
 
-        Perl_re_indentf( aTHX_  "%4"UVXf" : ",
+        Perl_re_indentf( aTHX_  "%4" UVXf " : ",
             depth+1,
             (UV)TRIE_NODENUM( state ) );
 
         for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
             UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next );
             if (v)
-                Perl_re_printf( aTHX_  "%*"UVXf, colwidth, v );
+                Perl_re_printf( aTHX_  "%*" UVXf, colwidth, v );
             else
                 Perl_re_printf( aTHX_  "%*s", colwidth, "." );
         }
         if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
-            Perl_re_printf( aTHX_  " (%4"UVXf")\n",
+            Perl_re_printf( aTHX_  " (%4" UVXf ")\n",
                                             (UV)trie->trans[ state ].check );
         } else {
-            Perl_re_printf( aTHX_  " (%4"UVXf") W%4X\n",
+            Perl_re_printf( aTHX_  " (%4" UVXf ") W%4X\n",
                                             (UV)trie->trans[ state ].check,
             trie->states[ TRIE_NODENUM( state ) ].wordnum );
         }
@@ -2359,8 +2413,9 @@ is the recommended Unicode-aware way of saying
 
 #define TRIE_LIST_PUSH(state,fid,ns) STMT_START {               \
     if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) {    \
-       U32 ging = TRIE_LIST_LEN( state ) *= 2;                 \
+       U32 ging = TRIE_LIST_LEN( state ) * 2;                  \
        Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
+        TRIE_LIST_LEN( state ) = ging;                          \
     }                                                           \
     TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid;     \
     TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns;   \
@@ -2426,6 +2481,21 @@ is the recommended Unicode-aware way of saying
            : ( state==1 ? special : 0 )                                        \
       )
 
+#define TRIE_BITMAP_SET_FOLDED(trie, uvc, folder)           \
+STMT_START {                                                \
+    TRIE_BITMAP_SET(trie, uvc);                             \
+    /* store the folded codepoint */                        \
+    if ( folder )                                           \
+        TRIE_BITMAP_SET(trie, folder[(U8) uvc ]);           \
+                                                            \
+    if ( !UTF ) {                                           \
+        /* store first byte of utf8 representation of */    \
+        /* variant codepoints */                            \
+        if (! UVCHR_IS_INVARIANT(uvc)) {                    \
+            TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));   \
+        }                                                   \
+    }                                                       \
+} STMT_END
 #define MADE_TRIE       1
 #define MADE_JUMP_TRIE  2
 #define MADE_EXACT_TRIE 4
@@ -2451,8 +2521,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
     /* we just use folder as a flag in utf8 */
     const U8 * folder = NULL;
 
+    /* in the below add_data call we are storing either 'tu' or 'tuaa'
+     * which stands for one trie structure, one hash, optionally followed
+     * by two arrays */
 #ifdef DEBUGGING
-    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tuuu"));
+    const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("tuaa"));
     AV *trie_words = NULL;
     /* along with revcharmap, this only used during construction but both are
      * useful during debugging so we store them in the struct when debugging.
@@ -2551,12 +2624,23 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                                                bitmap?*/
 
         if (OP(noper) == NOTHING) {
+            /* skip past a NOTHING at the start of an alternation
+             * eg, /(?:)a|(?:b)/ should be the same as /a|b/
+             */
             regnode *noper_next= regnext(noper);
             if (noper_next < tail)
                 noper= noper_next;
         }
 
-        if ( noper < tail && ( OP(noper) == flags || ( flags == EXACTFU && OP(noper) == EXACTFU_SS ) ) ) {
+        if ( noper < tail &&
+                (
+                    OP(noper) == flags ||
+                    (
+                        flags == EXACTFU &&
+                        OP(noper) == EXACTFU_SS
+                    )
+                )
+        ) {
             uc= (U8*)STRING(noper);
             e= uc + STR_LEN(noper);
         } else {
@@ -2573,6 +2657,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                 TRIE_BITMAP_SET(trie, LATIN_SMALL_LETTER_SHARP_S);
             }
         }
+
         for ( ; uc < e ; uc += len ) {  /* Look at each char in the current
                                            branch */
             TRIE_CHARCOUNT(trie)++;
@@ -2656,18 +2741,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                 if ( set_bit ) {
                    /* store the codepoint in the bitmap, and its folded
                     * equivalent. */
-                    TRIE_BITMAP_SET(trie, uvc);
-
-                   /* store the folded codepoint */
-                    if ( folder ) TRIE_BITMAP_SET(trie, folder[(U8) uvc ]);
-
-                   if ( !UTF ) {
-                       /* store first byte of utf8 representation of
-                          variant codepoints */
-                       if (! UVCHR_IS_INVARIANT(uvc)) {
-                           TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));
-                       }
-                   }
+                    TRIE_BITMAP_SET_FOLDED(trie, uvc, folder);
                     set_bit = 0; /* We've done our bit :-) */
                 }
             } else {
@@ -2686,7 +2760,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                 svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
 
                 if ( !svpp )
-                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
+                    Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%" UVXf, uvc );
 
                 if ( !SvTRUE( *svpp ) ) {
                     sv_setiv( *svpp, ++trie->uniquecharcount );
@@ -2830,7 +2904,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                         }
                         state = newstate;
                     } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                    }
                }
            }
@@ -3027,7 +3101,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                         }
                         state = trie->trans[ state + charid ].next;
                     } else {
-                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
+                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                     }
                     /* charid is now 0 if we dont know the char read, or
                      * nonzero if we do */
@@ -3160,7 +3234,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
            PerlMemShared_realloc( trie->states, laststate
                                   * sizeof(reg_trie_state) );
         DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
+            Perl_re_indentf( aTHX_  "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n",
                 depth+1,
                 (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount
                        + 1 ),
@@ -3172,7 +3246,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
         } /* end table compress */
     }
     DEBUG_TRIE_COMPILE_MORE_r(
-            Perl_re_indentf( aTHX_  "Statecount:%"UVxf" Lasttrans:%"UVxf"\n",
+            Perl_re_indentf( aTHX_  "Statecount:%" UVxf " Lasttrans:%" UVxf "\n",
                 depth+1,
                 (UV)trie->statecount,
                 (UV)trie->lasttrans)
@@ -3223,7 +3297,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
             });
         }
         DEBUG_OPTIMISE_r(
-            Perl_re_indentf( aTHX_  "MJD offset:%"UVuf" MJD length:%"UVuf"\n",
+            Perl_re_indentf( aTHX_  "MJD offset:%" UVuf " MJD length:%" UVuf "\n",
                 depth+1,
                 (UV)mjd_offset, (UV)mjd_nodelen)
         );
@@ -3232,13 +3306,19 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
            split out as an EXACT and put in front of the TRIE node.  */
         trie->startstate= 1;
         if ( trie->bitmap && !widecharmap && !trie->jump  ) {
+            /* we want to find the first state that has more than
+             * one transition, if that state is not the first state
+             * then we have a common prefix which we can remove.
+             */
             U32 state;
             for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
                 U32 ofs = 0;
-                I32 idx = -1;
+                I32 first_ofs = -1; /* keeps track of the ofs of the first
+                                       transition, -1 means none */
                 U32 count = 0;
                 const U32 base = trie->states[ state ].trans.base;
 
+                /* does this state terminate an alternation? */
                 if ( trie->states[state].wordnum )
                         count = 1;
 
@@ -3248,58 +3328,54 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                          trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
                     {
                         if ( ++count > 1 ) {
-                            SV **tmp = av_fetch( revcharmap, ofs, 0);
-                           const U8 *ch = (U8*)SvPV_nolen_const( *tmp );
+                            /* we have more than one transition */
+                            SV **tmp;
+                            U8 *ch;
+                            /* if this is the first state there is no common prefix
+                             * to extract, so we can exit */
                             if ( state == 1 ) break;
+                            tmp = av_fetch( revcharmap, ofs, 0);
+                            ch = (U8*)SvPV_nolen_const( *tmp );
+
+                            /* if we are on count 2 then we need to initialize the
+                             * bitmap, and store the previous char if there was one
+                             * in it*/
                             if ( count == 2 ) {
+                                /* clear the bitmap */
                                 Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
                                 DEBUG_OPTIMISE_r(
-                                    Perl_re_indentf( aTHX_  "New Start State=%"UVuf" Class: [",
+                                    Perl_re_indentf( aTHX_  "New Start State=%" UVuf " Class: [",
                                         depth+1,
                                         (UV)state));
-                               if (idx >= 0) {
-                                   SV ** const tmp = av_fetch( revcharmap, idx, 0);
+                                if (first_ofs >= 0) {
+                                    SV ** const tmp = av_fetch( revcharmap, first_ofs, 0);
                                    const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
 
-                                    TRIE_BITMAP_SET(trie,*ch);
-                                    if ( folder )
-                                        TRIE_BITMAP_SET(trie, folder[ *ch ]);
-                                    if ( !UTF ) {
-                                        /* store first byte of utf8 representation of
-                                           variant codepoints */
-                                        if (! UVCHR_IS_INVARIANT(*ch)) {
-                                            TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(*ch));
-                                        }
-                                    }
+                                    TRIE_BITMAP_SET_FOLDED(trie,*ch,folder);
                                     DEBUG_OPTIMISE_r(
                                         Perl_re_printf( aTHX_  "%s", (char*)ch)
                                     );
                                }
                            }
-                           TRIE_BITMAP_SET(trie,*ch);
-                           if ( folder )
-                               TRIE_BITMAP_SET(trie,folder[ *ch ]);
-                            if ( !UTF ) {
-                                /* store first byte of utf8 representation of
-                                   variant codepoints */
-                                if (! UVCHR_IS_INVARIANT(*ch)) {
-                                    TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(*ch));
-                                }
-                            }
+                            /* store the current firstchar in the bitmap */
+                            TRIE_BITMAP_SET_FOLDED(trie,*ch,folder);
                             DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
                        }
-                        idx = ofs;
+                        first_ofs = ofs;
                    }
                 }
                 if ( count == 1 ) {
-                    SV **tmp = av_fetch( revcharmap, idx, 0);
+                    /* This state has only one transition, its transition is part
+                     * of a common prefix - we need to concatenate the char it
+                     * represents to what we have so far. */
+                    SV **tmp = av_fetch( revcharmap, first_ofs, 0);
                     STRLEN len;
                     char *ch = SvPV( *tmp, len );
                     DEBUG_OPTIMISE_r({
                         SV *sv=sv_newmortal();
-                        Perl_re_indentf( aTHX_  "Prefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n",
+                        Perl_re_indentf( aTHX_  "Prefix State: %" UVuf " Ofs:%" UVuf " Char='%s'\n",
                             depth+1,
-                            (UV)state, (UV)idx,
+                            (UV)state, (UV)first_ofs,
                             pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
                                PL_colors[0], PL_colors[1],
                                (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
@@ -3590,11 +3666,11 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
      */
     fail[ 0 ] = fail[ 1 ] = 0;
     DEBUG_TRIE_COMPILE_r({
-        Perl_re_indentf( aTHX_  "Stclass Failtable (%"UVuf" states): 0",
+        Perl_re_indentf( aTHX_  "Stclass Failtable (%" UVuf " states): 0",
                       depth, (UV)numstates
         );
         for( q_read=1; q_read<numstates; q_read++ ) {
-            Perl_re_printf( aTHX_  ", %"UVuf, (UV)fail[q_read]);
+            Perl_re_printf( aTHX_  ", %" UVuf, (UV)fail[q_read]);
         }
         Perl_re_printf( aTHX_  "\n");
     });
@@ -3604,17 +3680,6 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
 }
 
 
-#define DEBUG_PEEP(str,scan,depth)         \
-    DEBUG_OPTIMISE_r({if (scan){           \
-       regnode *Next = regnext(scan);      \
-       regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);\
-       Perl_re_indentf( aTHX_  "" str ">%3d: %s (%d)", \
-           depth, REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),\
-           Next ? (REG_NODE_NUM(Next)) : 0 );\
-       DEBUG_SHOW_STUDY_FLAGS(flags," [ ","]");\
-       Perl_re_printf( aTHX_  "\n");                   \
-   }});
-
 /* The below joins as many adjacent EXACTish nodes as possible into a single
  * one.  The regop may be changed if the node(s) contain certain sequences that
  * require special handling.  The joining is only done if:
@@ -3769,7 +3834,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
     PERL_UNUSED_ARG(flags);
     PERL_UNUSED_ARG(val);
 #endif
-    DEBUG_PEEP("join",scan,depth);
+    DEBUG_PEEP("join", scan, depth, 0);
 
     /* Look through the subsequent nodes in the chain.  Skip NOTHING, merge
      * EXACT ones that are mergeable to the current one. */
@@ -3783,7 +3848,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
         if (OP(n) == TAIL || n > next)
             stringok = 0;
         if (PL_regkind[OP(n)] == NOTHING) {
-            DEBUG_PEEP("skip:",n,depth);
+            DEBUG_PEEP("skip:", n, depth, 0);
             NEXT_OFF(scan) += NEXT_OFF(n);
             next = n + NODE_STEP_REGNODE;
 #ifdef DEBUGGING
@@ -3803,7 +3868,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
             if (oldl + STR_LEN(n) > U8_MAX)
                 break;
 
-            DEBUG_PEEP("merg",n,depth);
+            DEBUG_PEEP("merg", n, depth, 0);
             merged++;
 
             NEXT_OFF(scan) += NEXT_OFF(n);
@@ -3820,7 +3885,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
 
 #ifdef EXPERIMENTAL_INPLACESCAN
        if (flags && !NEXT_OFF(n)) {
-           DEBUG_PEEP("atch", val, depth);
+           DEBUG_PEEP("atch", val, depth, 0);
            if (reg_off_by_arg[OP(n)]) {
                ARG_SET(n, val - n);
            }
@@ -3892,7 +3957,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                     }
                     else {
                         STRLEN len;
-                        _to_utf8_fold_flags(s, d, &len, FOLD_FLAGS_FULL);
+                        _toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
                         d += len;
                     }
                     s += s_len;
@@ -4051,7 +4116,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
         n++;
     }
 #endif
-    DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl",scan,depth)});
+    DEBUG_OPTIMISE_r(if (merged){DEBUG_PEEP("finl", scan, depth, 0);});
     return stopnow;
 }
 
@@ -4165,8 +4230,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                    the folded version may be shorter) */
        bool unfolded_multi_char = FALSE;
        /* Peephole optimizer: */
-        DEBUG_STUDYDATA("Peep:", data, depth);
-        DEBUG_PEEP("Peep", scan, depth);
+        DEBUG_STUDYDATA("Peep", data, depth, is_inf);
+        DEBUG_PEEP("Peep", scan, depth, flags);
 
 
         /* The reason we do this here is that we need to deal with things like
@@ -4210,14 +4275,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             StructCopy(&zero_scan_data, &data_fake, scan_data_t);
             scan = regnext(scan);
             assert( OP(scan) == IFTHEN );
-            DEBUG_PEEP("expect IFTHEN", scan, depth);
+            DEBUG_PEEP("expect IFTHEN", scan, depth, flags);
 
             data_fake.last_closep= &fake_last_close;
             minlen = *minlenp;
             next = regnext(scan);
             scan = NEXTOPER(NEXTOPER(scan));
-            DEBUG_PEEP("scan", scan, depth);
-            DEBUG_PEEP("next", next, depth);
+            DEBUG_PEEP("scan", scan, depth, flags);
+            DEBUG_PEEP("next", next, depth, flags);
 
             /* we suppose the run is continuous, last=next...
              * NOTE we dont use the return here! */
@@ -4261,7 +4326,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    I32 f = 0;
                    regnode_ssc this_class;
 
-                    DEBUG_PEEP("Branch", scan, depth);
+                    DEBUG_PEEP("Branch", scan, depth, flags);
 
                    num++;
                     StructCopy(&zero_scan_data, &data_fake, scan_data_t);
@@ -4326,7 +4391,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    else
                        data->pos_delta += max1 - min1;
                    if (max1 != min1 || is_inf)
-                       data->longest = &(data->longest_float);
+                       data->cur_is_floating = 1;
                }
                min += min1;
                if (delta == SSize_t_MAX
@@ -4433,7 +4498,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                         DEBUG_TRIE_COMPILE_r({
                             regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
-                            Perl_re_indentf( aTHX_  "%s %"UVuf":%s\n",
+                            Perl_re_indentf( aTHX_  "%s %" UVuf ":%s\n",
                               depth+1,
                               "Looking for TRIE'able sequences. Tail node is ",
                               (UV)(tail - RExC_emit_start),
@@ -4764,16 +4829,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              RExC_study_chunk_recursed_bytes, U8);
                     }
                     /* we havent recursed into this paren yet, so recurse into it */
-                    DEBUG_STUDYDATA("gosub-set:", data,depth);
+                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
                     PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
                     my_recursed_depth= recursed_depth + 1;
                 } else {
-                    DEBUG_STUDYDATA("gosub-inf:", data,depth);
+                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
                     /* some form of infinite recursion, assume infinite length
                      * */
                     if (flags & SCF_DO_SUBSTR) {
                         scan_commit(pRExC_state, data, minlenp, is_inf);
-                        data->longest = &(data->longest_float);
+                        data->cur_is_floating = 1;
                     }
                     is_inf = is_inf_internal = 1;
                     if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
@@ -4811,8 +4876,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 newframe->prev_recursed_depth = recursed_depth;
                 newframe->this_prev_frame= frame;
 
-                DEBUG_STUDYDATA("frame-new:",data,depth);
-                DEBUG_PEEP("fnew", scan, depth);
+                DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
+                DEBUG_PEEP("fnew", scan, depth, flags);
 
                frame = newframe;
                scan =  start;
@@ -4901,7 +4966,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 }
                 data->pos_delta += min_subtract;
                if (min_subtract) {
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                }
            }
 
@@ -4968,7 +5033,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                if (flags & SCF_DO_SUBSTR) {
                     scan_commit(pRExC_state, data, minlenp, is_inf);
                     /* Cannot extend fixed substrings */
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                }
                 is_inf = is_inf_internal = 1;
                 scan = regnext(scan);
@@ -5058,7 +5123,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                }
                if (!scan)              /* It was not CURLYX, but CURLY. */
                    scan = next;
-               if (!(flags & SCF_TRIE_DOING_RESTUDY)
+               if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
                    /* ? quantifier ok, except for (?{ ... }) */
                    && (next_is_eval || !(mincount == 0 && maxcount == 1))
                    && (minnext == 0) && (deltanext == 0)
@@ -5070,7 +5135,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    SAVEFREESV(RExC_rx_sv);
                    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
                        "Quantifier unexpected on zero-length expression "
-                       "in regex m/%"UTF8f"/",
+                       "in regex m/%" UTF8f "/",
                         UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
                                  RExC_precomp));
                    (void)ReREFCNT_inc(RExC_rx_sv);
@@ -5197,15 +5262,21 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                            However, this time it's not a subexpression
                            we care about, but the expression itself. */
                         && (maxcount == REG_INFTY)
-                        && data && ++data->whilem_c < 16) {
+                        && data) {
                    /* This stays as CURLYX, we can put the count/of pair. */
                    /* Find WHILEM (as in regexec.c) */
                    regnode *nxt = oscan + NEXT_OFF(oscan);
 
                    if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
                        nxt += ARG(nxt);
-                   PREVOPER(nxt)->flags = (U8)(data->whilem_c
-                       | (RExC_whilem_seen << 4)); /* On WHILEM */
+                    nxt = PREVOPER(nxt);
+                    if (nxt->flags & 0xf) {
+                        /* we've already set whilem count on this node */
+                    } else if (++data->whilem_c < 16) {
+                        assert(data->whilem_c <= RExC_whilem_seen);
+                        nxt->flags = (U8)(data->whilem_c
+                            | (RExC_whilem_seen << 4)); /* On WHILEM */
+                    }
                }
                if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
                    pars++;
@@ -5266,13 +5337,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    /* It is counted once already... */
                    data->pos_min += minnext * (mincount - counted);
 #if 0
-Perl_re_printf( aTHX_  "counted=%"UVuf" deltanext=%"UVuf
-                              " SSize_t_MAX=%"UVuf" minnext=%"UVuf
-                              " maxcount=%"UVuf" mincount=%"UVuf"\n",
+Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
+                              " SSize_t_MAX=%" UVuf " minnext=%" UVuf
+                              " maxcount=%" UVuf " mincount=%" UVuf "\n",
     (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
     (UV)mincount);
 if (deltanext != SSize_t_MAX)
-Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
+Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
     (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
           - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
 #endif
@@ -5300,7 +5371,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                                ? SSize_t_MAX
                                : data->pos_min + data->pos_delta - last_chrs;
                        }
-                       data->longest = &(data->longest_float);
+                       data->cur_is_floating = 1; /* float */
                    }
                    SvREFCNT_dec(last_str);
                }
@@ -5324,7 +5395,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                if (flags & SCF_DO_SUBSTR) {
                     /* Cannot expect anything... */
                     scan_commit(pRExC_state, data, minlenp, is_inf);
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                }
                is_inf = is_inf_internal = 1;
                if (flags & SCF_DO_STCLASS_OR) {
@@ -5371,7 +5442,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                 scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min += 1;
                data->pos_delta += 1;
-               data->longest = &(data->longest_float);
+               data->cur_is_floating = 1; /* float */
            }
        }
        else if (REGNODE_SIMPLE(OP(scan))) {
@@ -5577,7 +5648,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                        FAIL("Variable length lookbehind not implemented");
                     }
                     else if (minnext > (I32)U8_MAX) {
-                       FAIL2("Lookbehind longer than %"UVuf" not implemented",
+                       FAIL2("Lookbehind longer than %" UVuf " not implemented",
                               (UV)U8_MAX);
                     }
                     scan->flags = (U8)minnext;
@@ -5643,6 +5714,8 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                 else
                     data_fake.last_closep = &fake;
                 data_fake.flags = 0;
+                data_fake.substrs[0].flags = 0;
+                data_fake.substrs[1].flags = 0;
                data_fake.pos_delta = delta;
                 if (is_inf)
                    data_fake.flags |= SF_IS_INF;
@@ -5666,7 +5739,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                        FAIL("Variable length lookbehind not implemented");
                     }
                     else if (*minnextp > (I32)U8_MAX) {
-                       FAIL2("Lookbehind longer than %"UVuf" not implemented",
+                       FAIL2("Lookbehind longer than %" UVuf " not implemented",
                               (UV)U8_MAX);
                     }
                     scan->flags = (U8)*minnextp;
@@ -5685,29 +5758,29 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                         data->flags |= SF_HAS_EVAL;
                     data->whilem_c = data_fake.whilem_c;
                     if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
+                        int i;
                         if (RExC_rx->minlen<*minnextp)
                             RExC_rx->minlen=*minnextp;
                         scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
                         SvREFCNT_dec_NN(data_fake.last_found);
 
-                        if ( data_fake.minlen_fixed != minlenp )
-                        {
-                            data->offset_fixed= data_fake.offset_fixed;
-                            data->minlen_fixed= data_fake.minlen_fixed;
-                            data->lookbehind_fixed+= scan->flags;
-                        }
-                        if ( data_fake.minlen_float != minlenp )
-                        {
-                            data->minlen_float= data_fake.minlen_float;
-                            data->offset_float_min=data_fake.offset_float_min;
-                            data->offset_float_max=data_fake.offset_float_max;
-                            data->lookbehind_float+= scan->flags;
+                        for (i = 0; i < 2; i++) {
+                            if (data_fake.substrs[i].minlenp != minlenp) {
+                                data->substrs[i].min_offset =
+                                            data_fake.substrs[i].min_offset;
+                                data->substrs[i].max_offset =
+                                            data_fake.substrs[i].max_offset;
+                                data->substrs[i].minlenp =
+                                            data_fake.substrs[i].minlenp;
+                                data->substrs[i].lookbehind += scan->flags;
+                            }
                         }
                     }
                 }
            }
 #endif
        }
+
        else if (OP(scan) == OPEN) {
            if (stopparen != (I32)ARG(scan))
                pars++;
@@ -5744,7 +5817,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
        {
                if (flags & SCF_DO_SUBSTR) {
                     scan_commit(pRExC_state, data, minlenp, is_inf);
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
                }
                is_inf = is_inf_internal = 1;
                if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
@@ -5855,7 +5928,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                 data->pos_min += min1;
                 data->pos_delta += max1 - min1;
                 if (max1 != min1 || is_inf)
-                    data->longest = &(data->longest_float);
+                    data->cur_is_floating = 1; /* float */
             }
             min += min1;
             if (delta != SSize_t_MAX)
@@ -5899,7 +5972,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
                data->pos_min += trie->minlen;
                data->pos_delta += (trie->maxlen - trie->minlen);
                if (trie->maxlen != trie->minlen)
-                   data->longest = &(data->longest_float);
+                   data->cur_is_floating = 1; /* float */
            }
            if (trie->jump) /* no more substrings -- for now /grr*/
                flags &= ~SCF_DO_SUBSTR;
@@ -5916,8 +5989,8 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
         /* we need to unwind recursion. */
         depth = depth - 1;
 
-        DEBUG_STUDYDATA("frame-end:",data,depth);
-        DEBUG_PEEP("fend", scan, depth);
+        DEBUG_STUDYDATA("frame-end", data, depth, is_inf);
+        DEBUG_PEEP("fend", scan, depth, flags);
 
         /* restore previous context */
         last = frame->last_regnode;
@@ -5931,7 +6004,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
     }
 
     assert(!frame);
-    DEBUG_STUDYDATA("pre-fin:",data,depth);
+    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
 
     *scanp = scan;
     *deltap = is_inf_internal ? SSize_t_MAX : delta;
@@ -5953,7 +6026,7 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
     if (flags & SCF_TRIE_RESTUDY)
         data->flags |=         SCF_TRIE_RESTUDY;
 
-    DEBUG_STUDYDATA("post-fin:",data,depth);
+    DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
 
     {
         SSize_t final_minlen= min < stopmin ? min : stopmin;
@@ -6080,7 +6153,7 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 
     /* Dispatch a request to compile a regexp to correct regexp engine. */
     DEBUG_COMPILE_r({
-        Perl_re_printf( aTHX_  "Using engine %"UVxf"\n",
+        Perl_re_printf( aTHX_  "Using engine %" UVxf "\n",
                        PTR2UV(eng));
     });
     return CALLREGCOMP_ENG(eng, pattern, flags);
@@ -6107,6 +6180,39 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
 }
 
 
+static void
+S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs)
+{
+    int n;
+
+    if (--cbs->refcnt > 0)
+        return;
+    for (n = 0; n < cbs->count; n++) {
+        REGEXP *rx = cbs->cb[n].src_regex;
+        cbs->cb[n].src_regex = NULL;
+        SvREFCNT_dec(rx);
+    }
+    Safefree(cbs->cb);
+    Safefree(cbs);
+}
+
+
+static struct reg_code_blocks *
+S_alloc_code_blocks(pTHX_  int ncode)
+{
+     struct reg_code_blocks *cbs;
+    Newx(cbs, 1, struct reg_code_blocks);
+    cbs->count = ncode;
+    cbs->refcnt = 1;
+    SAVEDESTRUCTOR_X(S_free_codeblocks, cbs);
+    if (ncode)
+        Newx(cbs->cb, ncode, struct reg_code_block);
+    else
+        cbs->cb = NULL;
+    return cbs;
+}
+
+
 /* upgrade pattern pat_p of length plen_p to UTF8, and if there are code
  * blocks, recalculate the indices. Update pat_p and plen_p in-place to
  * point to the realloced string and length.
@@ -6133,14 +6239,16 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
 
     while (s < *plen_p) {
         append_utf8_from_native_byte(src[s], &d);
+
         if (n < num_code_blocks) {
-            if (!do_end && pRExC_state->code_blocks[n].start == s) {
-                pRExC_state->code_blocks[n].start = d - dst - 1;
+            assert(pRExC_state->code_blocks);
+            if (!do_end && pRExC_state->code_blocks->cb[n].start == s) {
+                pRExC_state->code_blocks->cb[n].start = d - dst - 1;
                 assert(*(d - 1) == '(');
                 do_end = 1;
             }
-            else if (do_end && pRExC_state->code_blocks[n].end == s) {
-                pRExC_state->code_blocks[n].end = d - dst - 1;
+            else if (do_end && pRExC_state->code_blocks->cb[n].end == s) {
+                pRExC_state->code_blocks->cb[n].end = d - dst - 1;
                 assert(*(d - 1) == ')');
                 do_end = 0;
                 n++;
@@ -6260,10 +6368,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
             if (oplist->op_type == OP_NULL
                 && (oplist->op_flags & OPf_SPECIAL))
             {
-                assert(n < pRExC_state->num_code_blocks);
-                pRExC_state->code_blocks[n].start = pat ? SvCUR(pat) : 0;
-                pRExC_state->code_blocks[n].block = oplist;
-                pRExC_state->code_blocks[n].src_regex = NULL;
+                assert(n < pRExC_state->code_blocks->count);
+                pRExC_state->code_blocks->cb[n].start = pat ? SvCUR(pat) : 0;
+                pRExC_state->code_blocks->cb[n].block = oplist;
+                pRExC_state->code_blocks->cb[n].src_regex = NULL;
                 n++;
                 code = 1;
                 oplist = OpSIBLING(oplist); /* skip CONST */
@@ -6293,7 +6401,8 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
             sv_setsv(pat, sv);
             /* overloading involved: all bets are off over literal
              * code. Pretend we haven't seen it */
-            pRExC_state->num_code_blocks -= n;
+            if (n)
+                pRExC_state->code_blocks->count -= n;
             n = 0;
         }
         else  {
@@ -6343,7 +6452,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
             }
 
             if (code)
-                pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+                pRExC_state->code_blocks->cb[n-1].end = SvCUR(pat)-1;
         }
 
         /* extract any code blocks within any embedded qr//'s */
@@ -6352,25 +6461,31 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
         {
 
             RXi_GET_DECL(ReANY((REGEXP *)rx), ri);
-            if (ri->num_code_blocks) {
+            if (ri->code_blocks && ri->code_blocks->count) {
                 int i;
                 /* the presence of an embedded qr// with code means
                  * we should always recompile: the text of the
                  * qr// may not have changed, but it may be a
                  * different closure than last time */
                 *recompile_p = 1;
-                Renew(pRExC_state->code_blocks,
-                    pRExC_state->num_code_blocks + ri->num_code_blocks,
-                    struct reg_code_block);
-                pRExC_state->num_code_blocks += ri->num_code_blocks;
+                if (pRExC_state->code_blocks) {
+                    int new_count = pRExC_state->code_blocks->count
+                            + ri->code_blocks->count;
+                    Renew(pRExC_state->code_blocks->cb,
+                            new_count, struct reg_code_block);
+                    pRExC_state->code_blocks->count = new_count;
+                }
+                else
+                    pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_
+                                                    ri->code_blocks->count);
 
-                for (i=0; i < ri->num_code_blocks; i++) {
+                for (i=0; i < ri->code_blocks->count; i++) {
                     struct reg_code_block *src, *dst;
                     STRLEN offset =  orig_patlen
                         + ReANY((REGEXP *)rx)->pre_prefix;
-                    assert(n < pRExC_state->num_code_blocks);
-                    src = &ri->code_blocks[i];
-                    dst = &pRExC_state->code_blocks[n];
+                    assert(n < pRExC_state->code_blocks->count);
+                    src = &ri->code_blocks->cb[i];
+                    dst = &pRExC_state->code_blocks->cb[n];
                     dst->start     = src->start + offset;
                     dst->end       = src->end   + offset;
                     dst->block     = src->block;
@@ -6405,10 +6520,11 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
     PERL_UNUSED_CONTEXT;
 
     for (s = 0; s < plen; s++) {
-       if (n < pRExC_state->num_code_blocks
-           && s == pRExC_state->code_blocks[n].start)
+       if (   pRExC_state->code_blocks
+            && n < pRExC_state->code_blocks->count
+           && s == pRExC_state->code_blocks->cb[n].start)
        {
-           s = pRExC_state->code_blocks[n].end;
+           s = pRExC_state->code_blocks->cb[n].end;
            n++;
            continue;
        }
@@ -6468,7 +6584,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
        int n = 0;
        STRLEN s;
        char *p, *newpat;
-       int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
+       int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */
        SV *sv, *qr_ref;
        dSP;
 
@@ -6483,12 +6599,13 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
        *p++ = 'q'; *p++ = 'r'; *p++ = '\'';
 
        for (s = 0; s < plen; s++) {
-           if (n < pRExC_state->num_code_blocks
-               && s == pRExC_state->code_blocks[n].start)
+           if (   pRExC_state->code_blocks
+               && n < pRExC_state->code_blocks->count
+               && s == pRExC_state->code_blocks->cb[n].start)
            {
                /* blank out literal code block */
                assert(pat[s] == '(');
-               while (s <= pRExC_state->code_blocks[n].end) {
+               while (s <= pRExC_state->code_blocks->cb[n].end) {
                    *p++ = '_';
                    s++;
                }
@@ -6501,8 +6618,12 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
            *p++ = pat[s];
        }
        *p++ = '\'';
-       if (pRExC_state->pm_flags & RXf_PMf_EXTENDED)
+       if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) {
            *p++ = 'x';
+            if (pRExC_state->pm_flags & RXf_PMf_EXTENDED_MORE) {
+                *p++ = 'x';
+            }
+        }
        *p++ = '\0';
        DEBUG_COMPILE_r({
             Perl_re_printf( aTHX_
@@ -6528,11 +6649,8 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
        {
            SV * const errsv = ERRSV;
            if (SvTRUE_NN(errsv))
-           {
-               Safefree(pRExC_state->code_blocks);
                 /* use croak_sv ? */
-               Perl_croak_nocontext("%"SVf, SVfARG(errsv));
-           }
+               Perl_croak_nocontext("%" SVf, SVfARG(errsv));
        }
        assert(SvROK(qr_ref));
        qr = SvRV(qr_ref);
@@ -6564,42 +6682,46 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
        struct reg_code_block *new_block, *dst;
        RExC_state_t * const r1 = pRExC_state; /* convenient alias */
        int i1 = 0, i2 = 0;
+        int r1c, r2c;
 
-       if (!r2->num_code_blocks) /* we guessed wrong */
+       if (!r2->code_blocks || !r2->code_blocks->count) /* we guessed wrong */
        {
            SvREFCNT_dec_NN(qr);
            return 1;
        }
 
-       Newx(new_block,
-           r1->num_code_blocks + r2->num_code_blocks,
-           struct reg_code_block);
+        if (!r1->code_blocks)
+            r1->code_blocks = S_alloc_code_blocks(aTHX_ 0);
+
+        r1c = r1->code_blocks->count;
+        r2c = r2->code_blocks->count;
+
+       Newx(new_block, r1c + r2c, struct reg_code_block);
+
        dst = new_block;
 
-       while (    i1 < r1->num_code_blocks
-               || i2 < r2->num_code_blocks)
-       {
+       while (i1 < r1c || i2 < r2c) {
            struct reg_code_block *src;
            bool is_qr = 0;
 
-           if (i1 == r1->num_code_blocks) {
-               src = &r2->code_blocks[i2++];
+           if (i1 == r1c) {
+               src = &r2->code_blocks->cb[i2++];
                is_qr = 1;
            }
-           else if (i2 == r2->num_code_blocks)
-               src = &r1->code_blocks[i1++];
-           else if (  r1->code_blocks[i1].start
-                    < r2->code_blocks[i2].start)
+           else if (i2 == r2c)
+               src = &r1->code_blocks->cb[i1++];
+           else if (  r1->code_blocks->cb[i1].start
+                    < r2->code_blocks->cb[i2].start)
            {
-               src = &r1->code_blocks[i1++];
-               assert(src->end < r2->code_blocks[i2].start);
+               src = &r1->code_blocks->cb[i1++];
+               assert(src->end < r2->code_blocks->cb[i2].start);
            }
            else {
-               assert(  r1->code_blocks[i1].start
-                      > r2->code_blocks[i2].start);
-               src = &r2->code_blocks[i2++];
+               assert(  r1->code_blocks->cb[i1].start
+                      > r2->code_blocks->cb[i2].start);
+               src = &r2->code_blocks->cb[i2++];
                is_qr = 1;
-               assert(src->end < r1->code_blocks[i1].start);
+               assert(src->end < r1->code_blocks->cb[i1].start);
            }
 
            assert(pat[src->start] == '(');
@@ -6611,9 +6733,9 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
                                    : src->src_regex;
            dst++;
        }
-       r1->num_code_blocks += r2->num_code_blocks;
-       Safefree(r1->code_blocks);
-       r1->code_blocks = new_block;
+       r1->code_blocks->count += r2c;
+       Safefree(r1->code_blocks->cb);
+       r1->code_blocks->cb = new_block;
     }
 
     SvREFCNT_dec_NN(qr);
@@ -6622,10 +6744,10 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 
 
 STATIC bool
-S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest,
-                      SV** rx_utf8, SV** rx_substr, SSize_t* rx_end_shift,
-                     SSize_t lookbehind, SSize_t offset, SSize_t *minlen,
-                      STRLEN longest_length, bool eol, bool meol)
+S_setup_longest(pTHX_ RExC_state_t *pRExC_state,
+                      struct reg_substr_datum  *rsd,
+                      struct scan_data_substrs *sub,
+                      STRLEN longest_length)
 {
     /* This is the common code for setting up the floating and fixed length
      * string data extracted from Perl_re_op_compile() below.  Returns a boolean
@@ -6633,6 +6755,8 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest,
 
     I32 t;
     SSize_t ml;
+    bool eol  = cBOOL(sub->flags & SF_BEFORE_EOL);
+    bool meol = cBOOL(sub->flags & SF_BEFORE_MEOL);
 
     if (! (longest_length
            || (eol /* Can't have SEOL and MULTI */
@@ -6646,25 +6770,29 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest,
 
     /* copy the information about the longest from the reg_scan_data
         over to the program. */
-    if (SvUTF8(sv_longest)) {
-        *rx_utf8 = sv_longest;
-        *rx_substr = NULL;
+    if (SvUTF8(sub->str)) {
+        rsd->substr      = NULL;
+        rsd->utf8_substr = sub->str;
     } else {
-        *rx_substr = sv_longest;
-        *rx_utf8 = NULL;
+        rsd->substr      = sub->str;
+        rsd->utf8_substr = NULL;
     }
     /* end_shift is how many chars that must be matched that
         follow this item. We calculate it ahead of time as once the
         lookbehind offset is added in we lose the ability to correctly
         calculate it.*/
-    ml = minlen ? *(minlen) : (SSize_t)longest_length;
-    *rx_end_shift = ml - offset
-        - longest_length + (SvTAIL(sv_longest) != 0)
-        + lookbehind;
+    ml = sub->minlenp ? *(sub->minlenp) : (SSize_t)longest_length;
+    rsd->end_shift = ml - sub->min_offset
+        - longest_length
+            /* XXX SvTAIL is always false here - did you mean FBMcf_TAIL
+             * intead? - DAPM
+            + (SvTAIL(sub->str) != 0)
+            */
+        + sub->lookbehind;
 
     t = (eol/* Can't have SEOL and MULTI */
          && (! meol || (RExC_flags & RXf_PMf_MULTILINE)));
-    fbm_compile(sv_longest, t ? FBMcf_TAIL : 0);
+    fbm_compile(sub->str, t ? FBMcf_TAIL : 0);
 
     return TRUE;
 }
@@ -6728,7 +6856,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     SSize_t minlen = 0;
     U32 rx_flags;
     SV *pat;
-    SV *code_blocksv = NULL;
     SV** new_patternp = patternp;
 
     /* these are all flags - maybe they should be turned
@@ -6786,7 +6913,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     pRExC_state->warn_text = NULL;
     pRExC_state->code_blocks = NULL;
-    pRExC_state->num_code_blocks = 0;
 
     if (is_bare_re)
        *is_bare_re = FALSE;
@@ -6800,10 +6926,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o))
            if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
                ncode++; /* count of DO blocks */
-       if (ncode) {
-           pRExC_state->num_code_blocks = ncode;
-           Newx(pRExC_state->code_blocks, ncode, struct reg_code_block);
-       }
+
+       if (ncode)
+            pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_ ncode);
     }
 
     if (!pat_count) {
@@ -6847,7 +6972,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     /* set expr to the first arg op */
 
-    if (pRExC_state->num_code_blocks
+    if (pRExC_state->code_blocks && pRExC_state->code_blocks->count
          && expr->op_type != OP_CONST)
     {
             expr = cLISTOPx(expr)->op_first;
@@ -6869,7 +6994,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             if (is_bare_re)
                 *is_bare_re = TRUE;
             SvREFCNT_inc(re);
-            Safefree(pRExC_state->code_blocks);
             DEBUG_PARSE_r(Perl_re_printf( aTHX_
                 "Precompiled pattern%s\n",
                     orig_rx_flags & RXf_SPLIT ? " for split" : ""));
@@ -6889,7 +7013,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
            pat = newSVpvn_flags(exp, plen, SVs_TEMP |
                                        (IN_BYTES ? 0 : SvUTF8(pat)));
        }
-       Safefree(pRExC_state->code_blocks);
        return CALLREGCOMP_ENG(eng, pat, orig_rx_flags);
     }
 
@@ -6899,7 +7022,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_uni_semantics = 0;
     RExC_seen_unfolded_sharp_s = 0;
     RExC_contains_locale = 0;
-    RExC_contains_i = 0;
     RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
     RExC_study_started = 0;
     pRExC_state->runtime_code_qr = NULL;
@@ -6945,15 +7067,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         && memEQ(RX_PRECOMP(old_re), exp, plen)
        && !runtime_code /* with runtime code, always recompile */ )
     {
-        Safefree(pRExC_state->code_blocks);
         return old_re;
     }
 
     rx_flags = orig_rx_flags;
 
-    if (rx_flags & PMf_FOLD) {
-        RExC_contains_i = 1;
-    }
     if (   initial_charset == REGEX_DEPENDS_CHARSET
         && (RExC_utf8 ||RExC_uni_semantics))
     {
@@ -6977,7 +7095,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
            /* whoops, we have a non-utf8 pattern, whilst run-time code
             * got compiled as utf8. Try again with a utf8 pattern */
             S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
-                                    pRExC_state->num_code_blocks);
+                pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
             goto redo_first_pass;
        }
     }
@@ -6990,7 +7108,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_in_lookbehind = 0;
     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
     RExC_extralen = 0;
-    RExC_override_recoding = 0;
 #ifdef EBCDIC
     RExC_recode_x_to_native = 0;
 #endif
@@ -7034,17 +7151,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         RExC_lastnum=0;
         RExC_lastparse=NULL;
     );
-    /* reg may croak on us, not giving us a chance to free
-       pRExC_state->code_blocks.  We cannot SAVEFREEPV it now, as we may
-       need it to survive as long as the regexp (qr/(?{})/).
-       We must check that code_blocksv is not already set, because we may
-       have jumped back to restart the sizing pass. */
-    if (pRExC_state->code_blocks && !code_blocksv) {
-       code_blocksv = newSV_type(SVt_PV);
-       SAVEFREESV(code_blocksv);
-       SvPV_set(code_blocksv, (char *)pRExC_state->code_blocks);
-       SvLEN_set(code_blocksv, 1); /*sufficient to make sv_clear free it*/
-    }
+
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
         /* It's possible to write a regexp in ascii that represents Unicode
         codepoints outside of the byte range, such as via \x{100}. If we
@@ -7057,7 +7164,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         if (flags & RESTART_PASS1) {
             if (flags & NEED_UTF8) {
                 S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
-                                    pRExC_state->num_code_blocks);
+                pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
             }
             else {
                 DEBUG_PARSE_r(Perl_re_printf( aTHX_
@@ -7066,14 +7173,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
             goto redo_first_pass;
         }
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags);
     }
-    if (code_blocksv)
-       SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */
 
     DEBUG_PARSE_r({
         Perl_re_printf( aTHX_
-            "Required size %"IVdf" nodes\n"
+            "Required size %" IVdf " nodes\n"
             "Starting second pass (creation)\n",
             (IV)RExC_size);
         RExC_lastnum=0;
@@ -7122,16 +7227,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     if (pm_flags & PMf_IS_QR) {
        ri->code_blocks = pRExC_state->code_blocks;
-       ri->num_code_blocks = pRExC_state->num_code_blocks;
-    }
-    else
-    {
-       int n;
-       for (n = 0; n < pRExC_state->num_code_blocks; n++)
-           if (pRExC_state->code_blocks[n].src_regex)
-               SAVEFREESV(pRExC_state->code_blocks[n].src_regex);
-       if(pRExC_state->code_blocks)
-           SAVEFREEPV(pRExC_state->code_blocks); /* often null */
+       if (ri->code_blocks)
+            ri->code_blocks->refcnt++;
     }
 
     {
@@ -7148,7 +7245,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                                    == REG_RUN_ON_COMMENT_SEEN);
        U8 reganch = (U8)((r->extflags & RXf_PMf_STD_PMMOD)
                            >> RXf_PMf_STD_PMMOD_SHIFT);
-       const char *fptr = STD_PAT_MODS;        /*"msixn"*/
+       const char *fptr = STD_PAT_MODS;        /*"msixxn"*/
        char *p;
 
         /* We output all the necessary flags; we never output a minus, as all
@@ -7165,8 +7262,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         /* make sure PL_bitcount bounds not exceeded */
         assert(sizeof(STD_PAT_MODS) <= 8);
 
-        Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
-       r->xpv_len_u.xpvlenu_pv = p;
+        p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
+        SvPOK_on(rx);
        if (RExC_utf8)
            SvFLAGS(rx) |= SVf_UTF8;
         *p++='('; *p++='?';
@@ -7211,7 +7308,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 #ifdef RE_TRACK_PATTERN_OFFSETS
     Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
     DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
-                          "%s %"UVuf" bytes for offset annotations.\n",
+                          "%s %" UVuf " bytes for offset annotations.\n",
                           ri->u.offsets ? "Got" : "Couldn't get",
                           (UV)((2*RExC_size+1) * sizeof(U32))));
 #endif
@@ -7265,7 +7362,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_npar = 1;
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
        ReREFCNT_dec(rx);
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags);
     }
     DEBUG_OPTIMISE_r(
         Perl_re_printf( aTHX_  "Starting post parse optimization\n");
@@ -7326,12 +7423,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /*  Only one top-level choice.
                                                   */
        SSize_t fake;
-       STRLEN longest_float_length, longest_fixed_length;
+       STRLEN longest_length[2];
        regnode_ssc ch_class; /* pointed to by data */
        int stclass_flag;
        SSize_t last_close = 0; /* pointed to by data */
         regnode *first= scan;
         regnode *first_next= regnext(first);
+        int i;
+
        /*
         * Skip introductions and multiplicators >= 1
         * so that we can extract the 'meat' of the pattern that must
@@ -7373,7 +7472,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
        /* Starting-point info. */
       again:
-        DEBUG_PEEP("first:",first,0);
+        DEBUG_PEEP("first:", first, 0, 0);
         /* Ignore EXACT as we deal with it later. */
        if (PL_regkind[OP(first)] == EXACT) {
            if (OP(first) == EXACT || OP(first) == EXACTL)
@@ -7410,7 +7509,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             !sawlookahead &&
            (OP(first) == STAR &&
            PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
-            !(r->intflags & PREGf_ANCH) && !pRExC_state->num_code_blocks)
+            !(r->intflags & PREGf_ANCH) && !pRExC_state->code_blocks)
        {
            /* turn .* into ^.* with an implied $*=1 */
            const int type =
@@ -7423,7 +7522,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        }
         if (sawplus && !sawminmod && !sawlookahead
             && (!sawopen || !RExC_sawback)
-           && !pRExC_state->num_code_blocks) /* May examine pos and $& */
+           && !pRExC_state->code_blocks) /* May examine pos and $& */
            /* x+ must match at the 1st pos of run of x's */
            r->intflags |= PREGf_SKIP;
 
@@ -7431,12 +7530,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 #ifdef TRIE_STUDY_OPT
        DEBUG_PARSE_r(
            if (!restudied)
-                Perl_re_printf( aTHX_  "first at %"IVdf"\n",
+                Perl_re_printf( aTHX_  "first at %" IVdf "\n",
                              (IV)(first - scan + 1))
         );
 #else
        DEBUG_PARSE_r(
-            Perl_re_printf( aTHX_  "first at %"IVdf"\n",
+            Perl_re_printf( aTHX_  "first at %" IVdf "\n",
                (IV)(first - scan + 1))
         );
 #endif
@@ -7454,13 +7553,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        * earlier string may buy us something the later one won't.]
        */
 
-       data.longest_fixed = newSVpvs("");
-       data.longest_float = newSVpvs("");
+       data.substrs[0].str = newSVpvs("");
+       data.substrs[1].str = newSVpvs("");
        data.last_found = newSVpvs("");
-       data.longest = &(data.longest_fixed);
+       data.cur_is_floating = 0; /* initially any found substring is fixed */
        ENTER_with_name("study_chunk");
-       SAVEFREESV(data.longest_fixed);
-       SAVEFREESV(data.longest_float);
+       SAVEFREESV(data.substrs[0].str);
+       SAVEFREESV(data.substrs[1].str);
        SAVEFREESV(data.last_found);
        first = scan;
        if (!ri->regstclass) {
@@ -7483,7 +7582,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
 
 
-       if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
+       if ( RExC_npar == 1 && !data.cur_is_floating
             && data.last_start_min == 0 && data.last_end > 0
             && !RExC_seen_zerolen
              && !(RExC_seen & REG_VERBARG_SEEN)
@@ -7493,62 +7592,49 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         }
        scan_commit(pRExC_state, &data,&minlen,0);
 
-       longest_float_length = CHR_SVLEN(data.longest_float);
-
-        if (! ((SvCUR(data.longest_fixed)  /* ok to leave SvCUR */
-                   && data.offset_fixed == data.offset_float_min
-                   && SvCUR(data.longest_fixed) == SvCUR(data.longest_float)))
-            && S_setup_longest (aTHX_ pRExC_state,
-                                    data.longest_float,
-                                    &(r->float_utf8),
-                                    &(r->float_substr),
-                                    &(r->float_end_shift),
-                                    data.lookbehind_float,
-                                    data.offset_float_min,
-                                    data.minlen_float,
-                                    longest_float_length,
-                                    cBOOL(data.flags & SF_FL_BEFORE_EOL),
-                                    cBOOL(data.flags & SF_FL_BEFORE_MEOL)))
-        {
-           r->float_min_offset = data.offset_float_min - data.lookbehind_float;
-           r->float_max_offset = data.offset_float_max;
-           if (data.offset_float_max < SSize_t_MAX) /* Don't offset infinity */
-               r->float_max_offset -= data.lookbehind_float;
-           SvREFCNT_inc_simple_void_NN(data.longest_float);
-       }
-       else {
-           r->float_substr = r->float_utf8 = NULL;
-           longest_float_length = 0;
-       }
 
-       longest_fixed_length = CHR_SVLEN(data.longest_fixed);
-
-        if (S_setup_longest (aTHX_ pRExC_state,
-                                data.longest_fixed,
-                                &(r->anchored_utf8),
-                                &(r->anchored_substr),
-                                &(r->anchored_end_shift),
-                                data.lookbehind_fixed,
-                                data.offset_fixed,
-                                data.minlen_fixed,
-                                longest_fixed_length,
-                                cBOOL(data.flags & SF_FIX_BEFORE_EOL),
-                                cBOOL(data.flags & SF_FIX_BEFORE_MEOL)))
-        {
-           r->anchored_offset = data.offset_fixed - data.lookbehind_fixed;
-           SvREFCNT_inc_simple_void_NN(data.longest_fixed);
-       }
-       else {
-           r->anchored_substr = r->anchored_utf8 = NULL;
-           longest_fixed_length = 0;
-       }
+        /* XXX this is done in reverse order because that's the way the
+         * code was before it was parameterised. Don't know whether it
+         * actually needs doing in reverse order. DAPM */
+        for (i = 1; i >= 0; i--) {
+            longest_length[i] = CHR_SVLEN(data.substrs[i].str);
+
+            if (   !(   i
+                     && SvCUR(data.substrs[0].str)  /* ok to leave SvCUR */
+                     &&    data.substrs[0].min_offset
+                        == data.substrs[1].min_offset
+                     &&    SvCUR(data.substrs[0].str)
+                        == SvCUR(data.substrs[1].str)
+                    )
+                && S_setup_longest (aTHX_ pRExC_state,
+                                        &(r->substrs->data[i]),
+                                        &(data.substrs[i]),
+                                        longest_length[i]))
+            {
+                r->substrs->data[i].min_offset =
+                        data.substrs[i].min_offset - data.substrs[i].lookbehind;
+
+                r->substrs->data[i].max_offset = data.substrs[i].max_offset;
+                /* Don't offset infinity */
+                if (data.substrs[i].max_offset < SSize_t_MAX)
+                    r->substrs->data[i].max_offset -= data.substrs[i].lookbehind;
+                SvREFCNT_inc_simple_void_NN(data.substrs[i].str);
+            }
+            else {
+                r->substrs->data[i].substr      = NULL;
+                r->substrs->data[i].utf8_substr = NULL;
+                longest_length[i] = 0;
+            }
+        }
+
        LEAVE_with_name("study_chunk");
 
        if (ri->regstclass
            && (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
            ri->regstclass = NULL;
 
-       if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
+       if ((!(r->substrs->data[0].substr || r->substrs->data[0].utf8_substr)
+              || r->substrs->data[0].min_offset)
            && stclass_flag
             && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
            && is_ssc_worth_it(pRExC_state, data.start_class))
@@ -7571,37 +7657,29 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             data.start_class = NULL;
        }
 
-        /* A temporary algorithm prefers floated substr to fixed one to dig
-         * more info. */
-       if (longest_fixed_length > longest_float_length) {
-           r->substrs->check_ix = 0;
-           r->check_end_shift = r->anchored_end_shift;
-           r->check_substr = r->anchored_substr;
-           r->check_utf8 = r->anchored_utf8;
-           r->check_offset_min = r->check_offset_max = r->anchored_offset;
-            if (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS))
-                r->intflags |= PREGf_NOSCAN;
-       }
-       else {
-           r->substrs->check_ix = 1;
-           r->check_end_shift = r->float_end_shift;
-           r->check_substr = r->float_substr;
-           r->check_utf8 = r->float_utf8;
-           r->check_offset_min = r->float_min_offset;
-           r->check_offset_max = r->float_max_offset;
-       }
+        /* A temporary algorithm prefers floated substr to fixed one of
+         * same length to dig more info. */
+       i = (longest_length[0] <= longest_length[1]);
+        r->substrs->check_ix = i;
+        r->check_end_shift  = r->substrs->data[i].end_shift;
+        r->check_substr     = r->substrs->data[i].substr;
+        r->check_utf8       = r->substrs->data[i].utf8_substr;
+        r->check_offset_min = r->substrs->data[i].min_offset;
+        r->check_offset_max = r->substrs->data[i].max_offset;
+        if (!i && (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS)))
+            r->intflags |= PREGf_NOSCAN;
+
        if ((r->check_substr || r->check_utf8) ) {
            r->extflags |= RXf_USE_INTUIT;
            if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
                r->extflags |= RXf_INTUIT_TAIL;
        }
-        r->substrs->data[0].max_offset = r->substrs->data[0].min_offset;
 
        /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
-       if ( (STRLEN)minlen < longest_float_length )
-            minlen= longest_float_length;
-        if ( (STRLEN)minlen < longest_fixed_length )
-            minlen= longest_fixed_length;
+       if ( (STRLEN)minlen < longest_length[1] )
+            minlen= longest_length[1];
+        if ( (STRLEN)minlen < longest_length[0] )
+            minlen= longest_length[0];
         */
     }
     else {
@@ -7627,8 +7705,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
         CHECK_RESTUDY_GOTO_butfirst(NOOP);
 
-       r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
-               = r->float_substr = r->float_utf8 = NULL;
+       r->check_substr = NULL;
+        r->check_utf8 = NULL;
+        r->substrs->data[0].substr      = NULL;
+        r->substrs->data[0].utf8_substr = NULL;
+        r->substrs->data[1].substr      = NULL;
+        r->substrs->data[1].utf8_substr = NULL;
 
         if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
            && is_ssc_worth_it(pRExC_state, data.start_class))
@@ -7663,7 +7745,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* Guard against an embedded (?=) or (?<=) with a longer minlen than
        the "real" pattern. */
     DEBUG_OPTIMISE_r({
-        Perl_re_printf( aTHX_ "minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%"IVdf"\n",
+        Perl_re_printf( aTHX_ "minlen: %" IVdf " r->minlen:%" IVdf " maxlen:%" IVdf "\n",
                       (IV)minlen, (IV)r->minlen, (IV)RExC_maxlen);
     });
     r->minlenret = minlen;
@@ -7679,7 +7761,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     if (RExC_seen & REG_LOOKBEHIND_SEEN)
         r->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the
                                                 lookbehind */
-    if (pRExC_state->num_code_blocks)
+    if (pRExC_state->code_blocks)
        r->extflags |= RXf_EVAL_SEEN;
     if (RExC_seen & REG_VERBARG_SEEN)
     {
@@ -7751,6 +7833,18 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     while ( RExC_recurse_count > 0 ) {
         const regnode *scan = RExC_recurse[ --RExC_recurse_count ];
+        /*
+         * This data structure is set up in study_chunk() and is used
+         * to calculate the distance between a GOSUB regopcode and
+         * the OPEN/CURLYM (CURLYM's are special and can act like OPEN's)
+         * it refers to.
+         *
+         * If for some reason someone writes code that optimises
+         * away a GOSUB opcode then the assert should be changed to
+         * an if(scan) to guard the ARG2L_SET() - Yves
+         *
+         */
+        assert(scan && OP(scan) == GOSUB);
         ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan );
     }
 
@@ -7771,10 +7865,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         STRLEN i;
         GET_RE_DEBUG_FLAGS_DECL;
         Perl_re_printf( aTHX_
-                      "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]);
+                      "Offsets: [%" UVuf "]\n\t", (UV)ri->u.offsets[0]);
         for (i = 1; i <= len; i++) {
             if (ri->u.offsets[i*2-1] || ri->u.offsets[i*2])
-                Perl_re_printf( aTHX_  "%"UVuf":%"UVuf"[%"UVuf"] ",
+                Perl_re_printf( aTHX_  "%" UVuf ":%" UVuf "[%" UVuf "] ",
                 (UV)i, (UV)ri->u.offsets[i*2-1], (UV)ri->u.offsets[i*2]);
             }
         Perl_re_printf( aTHX_  "\n");
@@ -7841,21 +7935,18 @@ SV*
 Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
                          const U32 flags)
 {
-    AV *retarray = NULL;
     SV *ret;
     struct regexp *const rx = ReANY(r);
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
 
-    if (flags & RXapif_ALL)
-        retarray=newAV();
-
     if (rx && RXp_PAREN_NAMES(rx)) {
         HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
         if (he_str) {
             IV i;
             SV* sv_dat=HeVAL(he_str);
             I32 *nums=(I32*)SvPVX(sv_dat);
+            AV * const retarray = (flags & RXapif_ALL) ? newAV() : NULL;
             for ( i=0; i<SvIVX(sv_dat); i++ ) {
                 if ((I32)(rx->nparens) >= nums[i]
                     && rx->offs[nums[i]].start != -1
@@ -8116,7 +8207,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
         }
     } else {
       ret_undef:
-        sv_setsv(sv,&PL_sv_undef);
+        sv_set_undef(sv);
         return;
     }
 }
@@ -8248,17 +8339,18 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
 
     assert (RExC_parse <= RExC_end);
     if (RExC_parse == RExC_end) NOOP;
-    else if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
+    else if (isIDFIRST_lazy_if_safe(RExC_parse, RExC_end, UTF)) {
          /* Note that the code here assumes well-formed UTF-8.  Skip IDFIRST by
           * using do...while */
        if (UTF)
            do {
                RExC_parse += UTF8SKIP(RExC_parse);
-           } while (isWORDCHAR_utf8((U8*)RExC_parse));
+           } while (   RExC_parse < RExC_end
+                     && isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end));
        else
            do {
                RExC_parse++;
-           } while (isWORDCHAR(*RExC_parse));
+           } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse));
     } else {
         RExC_parse++; /* so the <- from the vFAIL is after the offending
                          character */
@@ -8709,7 +8801,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
        if (   array[final_element] > start
            || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
        {
-           Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%"UVuf", start=%"UVuf", match=%c",
+           Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
                     array[final_element], start,
                     ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
        }
@@ -9938,18 +10030,18 @@ S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
     invlist_iterinit(invlist);
     while (invlist_iternext(invlist, &start, &end)) {
        if (end == UV_MAX) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%cINFINITY%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFINITY%c",
                                           start, intra_range_delimiter,
                                                  inter_range_delimiter);
        }
        else if (end != start) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c%04"UVXf"%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
                                          start,
                                                    intra_range_delimiter,
                                                   end, inter_range_delimiter);
        }
        else {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c",
+           Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
                                           start, inter_range_delimiter);
        }
     }
@@ -9996,16 +10088,16 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
     while (invlist_iternext(invlist, &start, &end)) {
        if (end == UV_MAX) {
            Perl_dump_indent(aTHX_ level, file,
-                                       "%s[%"UVuf"] 0x%04"UVXf" .. INFINITY\n",
+                                       "%s[%" UVuf "] 0x%04" UVXf " .. INFINITY\n",
                                    indent, (UV)count, start);
        }
        else if (end != start) {
            Perl_dump_indent(aTHX_ level, file,
-                                    "%s[%"UVuf"] 0x%04"UVXf" .. 0x%04"UVXf"\n",
+                                    "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n",
                                indent, (UV)count, start,         end);
        }
        else {
-           Perl_dump_indent(aTHX_ level, file, "%s[%"UVuf"] 0x%04"UVXf"\n",
+           Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
                                             indent, (UV)count, start);
        }
         count += 2;
@@ -10021,9 +10113,10 @@ Perl__load_PL_utf8_foldclosures (pTHX)
      * to force that */
     if (! PL_utf8_tofold) {
         U8 dummy[UTF8_MAXBYTES_CASE+1];
+        const U8 hyphen[] = HYPHEN_UTF8;
 
         /* This string is just a short named one above \xff */
-        to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
+        toFOLD_utf8_safe(hyphen, hyphen + sizeof(hyphen) - 1, dummy, NULL);
         assert(PL_utf8_tofold); /* Verify that worked */
     }
     PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
@@ -10043,9 +10136,6 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
     UV len_a = _invlist_len(a);
     UV len_b = _invlist_len(b);
 
-    UV i = 0;              /* current index into the arrays */
-    bool retval = TRUE;     /* Assume are identical until proven otherwise */
-
     PERL_ARGS_ASSERT__INVLISTEQ;
 
     /* If are to compare 'a' with the complement of b, set it
@@ -10075,20 +10165,9 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
         }
     }
 
-    /* Make sure that the lengths are the same, as well as the final element
-     * before looping through the remainder.  (Thus we test the length, final,
-     * and first elements right off the bat) */
-    if (len_a != len_b || array_a[len_a-1] != array_b[len_a-1]) {
-        retval = FALSE;
-    }
-    else for (i = 0; i < len_a - 1; i++) {
-        if (array_a[i] != array_b[i]) {
-            retval = FALSE;
-            break;
-        }
-    }
+    return    len_a == len_b
+           && memEQ(array_a, array_b, len_a * sizeof(array_a[0]));
 
-    return retval;
 }
 #endif
 
@@ -10174,7 +10253,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
                 }
                 else {
                     STRLEN len;
-                    to_utf8_fold(s, d, &len);
+                    toFOLD_utf8_safe(s, e, d, &len);
                     d += len;
                     s += UTF8SKIP(s);
                 }
@@ -10215,7 +10294,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
             {
                 AV* list = (AV*) *listp;
                 IV k;
-                for (k = 0; k <= av_tindex_nomg(list); k++) {
+                for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
                     SV** c_p = av_fetch(list, k, FALSE);
                     UV c;
                     assert(c_p);
@@ -10424,26 +10503,28 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 }
                 flagsp = &negflags;
                 wastedflags = 0;  /* reset so (?g-c) warns twice */
+                x_mod_count = 0;
                 break;
             case ':':
             case ')':
+
+                if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) {
+                    negflags |= RXf_PMf_EXTENDED_MORE;
+                }
                 RExC_flags |= posflags;
+
+                if (negflags & RXf_PMf_EXTENDED) {
+                    negflags |= RXf_PMf_EXTENDED_MORE;
+                }
                 RExC_flags &= ~negflags;
                 set_regex_charset(&RExC_flags, cs);
-                if (RExC_flags & RXf_PMf_FOLD) {
-                    RExC_contains_i = 1;
-                }
 
-                if (UNLIKELY((x_mod_count) > 1)) {
-                    vFAIL("Only one /x regex modifier is allowed");
-                }
                 return;
-                /*NOTREACHED*/
             default:
               fail_modifiers:
                 RExC_parse += SKIP_IF_CHAR(RExC_parse);
                /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
-                vFAIL2utf8f("Sequence (%"UTF8f"...) not recognized",
+                vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
                       UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                 NOT_REACHED; /*NOTREACHED*/
         }
@@ -10655,7 +10736,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            if ( ! op ) {
                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                 vFAIL2utf8f(
-                    "Unknown verb pattern '%"UTF8f"'",
+                    "Unknown verb pattern '%" UTF8f "'",
                     UTF8fARG(UTF, verb_len, start_verb));
            }
             if ( arg_required && !start_arg ) {
@@ -10952,7 +11033,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    }
                    RExC_recurse_count++;
                     DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Recurse #%"UVuf" to %"IVdf"\n",
+                        "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
                               22, "|    |", (int)(depth * 2 + 1), "",
                               (UV)ARG(ret), (IV)ARG2L(ret)));
                 }
@@ -10974,7 +11055,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     RExC_parse += SKIP_IF_CHAR(RExC_parse);
                     /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                     vFAIL2utf8f(
-                        "Sequence (%"UTF8f"...) not recognized",
+                        "Sequence (%" UTF8f "...) not recognized",
                         UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                    NOT_REACHED; /*NOTREACHED*/
                }
@@ -10989,9 +11070,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
                RExC_seen_zerolen++;
 
-               if (   !pRExC_state->num_code_blocks
-                   || pRExC_state->code_index >= pRExC_state->num_code_blocks
-                   || pRExC_state->code_blocks[pRExC_state->code_index].start
+               if (   !pRExC_state->code_blocks
+                   || pRExC_state->code_index
+                                        >= pRExC_state->code_blocks->count
+                   || pRExC_state->code_blocks->cb[pRExC_state->code_index].start
                        != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
                            - RExC_start)
                ) {
@@ -11000,7 +11082,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    FAIL("Eval-group not allowed at runtime, use re 'eval'");
                }
                /* this is a pre-compiled code block (?{...}) */
-               cb = &pRExC_state->code_blocks[pRExC_state->code_index];
+               cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
                RExC_parse = RExC_start + cb->end;
                if (!SIZE_ONLY) {
                    OP *o = cb->block;
@@ -11176,7 +11258,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                             *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                             return NULL;
                         }
-                        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
+                        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                               (UV) flags);
                     } else
                         REGTAIL(pRExC_state, br, reganode(pRExC_state,
@@ -11197,7 +11279,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                 *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                                 return NULL;
                             }
-                            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
+                            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf,
                                   (UV) flags);
                         }
                         REGTAIL(pRExC_state, ret, lastbr);
@@ -11265,7 +11347,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 if (RExC_open_parens && !RExC_open_parens[parno])
                {
                     DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting open paren #%"IVdf" to %d\n",
+                        "%*s%*s Setting open paren #%" IVdf " to %d\n",
                         22, "|    |", (int)(depth * 2 + 1), "",
                        (IV)parno, REG_NODE_NUM(ret)));
                     RExC_open_parens[parno]= ret;
@@ -11295,7 +11377,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             *flagp = flags & (RESTART_PASS1|NEED_UTF8);
             return NULL;
         }
-        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+        FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
     }
     if (*RExC_parse == '|') {
        if (!SIZE_ONLY && RExC_extralen) {
@@ -11342,7 +11424,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                 return NULL;
             }
-            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags);
         }
         REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
        lastbr = br;
@@ -11359,7 +11441,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            ender = reganode(pRExC_state, CLOSE, parno);
             if ( RExC_close_parens ) {
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting close paren #%"IVdf" to %d\n",
+                        "%*s%*s Setting close paren #%" IVdf " to %d\n",
                         22, "|    |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender)));
                 RExC_close_parens[parno]= ender;
                if (RExC_nestroot == parno)
@@ -11396,7 +11478,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             DEBUG_PARSE_MSG("lsbr");
             regprop(RExC_rx, RExC_mysv1, lastbr, NULL, pRExC_state);
             regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
-            Perl_re_printf( aTHX_  "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+            Perl_re_printf( aTHX_  "~ tying lastbr %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n",
                           SvPV_nolen_const(RExC_mysv1),
                           (IV)REG_NODE_NUM(lastbr),
                           SvPV_nolen_const(RExC_mysv2),
@@ -11435,7 +11517,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     DEBUG_PARSE_MSG("NADA");
                     regprop(RExC_rx, RExC_mysv1, ret, NULL, pRExC_state);
                     regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
-                    Perl_re_printf( aTHX_  "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+                    Perl_re_printf( aTHX_  "~ converting ret %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n",
                                   SvPV_nolen_const(RExC_mysv1),
                                   (IV)REG_NODE_NUM(ret),
                                   SvPV_nolen_const(RExC_mysv2),
@@ -11556,7 +11638,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
                 *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                 return NULL;
             }
-            FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regpiece returned NULL, flags=%#" UVxf, (UV) flags);
        }
        else if (ret == NULL)
             ret = latest;
@@ -11585,7 +11667,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
 }
 
 /*
- - regpiece - something followed by possible [*+?]
+ - regpiece - something followed by possible quantifier * + ? {n,m}
  *
  * Note that the branching code sequences used for ? and the general cases
  * of * and + are somewhat optimized:  they use the same NOTHING node as
@@ -11628,7 +11710,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        if (flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8))
            *flagp |= flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8);
         else
-            FAIL2("panic: regatom returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags);
        return(NULL);
     }
 
@@ -11680,19 +11762,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            nextchar(pRExC_state);
             if (max < min) {    /* If can't match, warn and optimize to fail
                                    unconditionally */
-                if (SIZE_ONLY) {
-
-                    /* We can't back off the size because we have to reserve
-                     * enough space for all the things we are about to throw
-                     * away, but we can shrink it by the amount we are about
-                     * to re-use here */
-                    RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
-                }
-                else {
+                reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+                if (PASS2) {
                     ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
-                    RExC_emit = orig_emit;
+                    NEXT_OFF(orig_emit)= regarglen[OPFAIL] + NODE_STEP_REGNODE;
                 }
-                ret = reganode(pRExC_state, OPFAIL, 0);
                 return ret;
             }
             else if (min == max && *RExC_parse == '?')
@@ -11708,14 +11782,12 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            if ((flags&SIMPLE)) {
                 if (min == 0 && max == REG_INFTY) {
                     reginsert(pRExC_state, STAR, ret, depth+1);
-                    ret->flags = 0;
                     MARK_NAUGHTY(4);
                     RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                     goto nest_check;
                 }
                 if (min == 1 && max == REG_INFTY) {
                     reginsert(pRExC_state, PLUS, ret, depth+1);
-                    ret->flags = 0;
                     MARK_NAUGHTY(3);
                     RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                     goto nest_check;
@@ -11809,7 +11881,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
        SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
        ckWARN2reg(RExC_parse,
-                  "%"UTF8f" matches null string many times",
+                  "%" UTF8f " matches null string many times",
                   UTF8fARG(UTF, (RExC_parse >= origparse
                                  ? RExC_parse - origparse
                                  : 0),
@@ -11828,7 +11900,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         ender = reg_node(pRExC_state, SUCCEED);
         REGTAIL(pRExC_state, ret, ender);
         reginsert(pRExC_state, SUSPEND, ret, depth+1);
-        ret->flags = 0;
         ender = reg_node(pRExC_state, TAIL);
         REGTAIL(pRExC_state, ret, ender);
     }
@@ -11983,13 +12054,16 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
 
     RExC_parse++;      /* Skip past the '{' */
 
-    if (! (endbrace = strchr(RExC_parse, '}'))  /* no trailing brace */
-       || ! (endbrace == RExC_parse            /* nothing between the {} */
+    endbrace = strchr(RExC_parse, '}');
+    if (! endbrace) { /* no trailing brace */
+        vFAIL2("Missing right brace on \\%c{}", 'N');
+    }
+    else if(!(endbrace == RExC_parse           /* nothing between the {} */
               || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
                   && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
                                                        error msg) */
     {
-       if (endbrace) RExC_parse = endbrace;    /* position msg's '<--HERE' */
+       RExC_parse = endbrace;  /* position msg's '<--HERE' */
        vFAIL("\\N{NAME} must be resolved by the lexer");
     }
 
@@ -12122,20 +12196,21 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
        }
         sv_catpv(substitute_parse, ")");
 
-        RExC_parse = RExC_start = RExC_adjusted_start = SvPV(substitute_parse,
-                                                             len);
+        len = SvCUR(substitute_parse);
 
        /* Don't allow empty number */
        if (len < (STRLEN) 8) {
             RExC_parse = endbrace;
            vFAIL("Invalid hexadecimal number in \\N{U+...}");
        }
+
+        RExC_parse = RExC_start = RExC_adjusted_start
+                                              = SvPV_nolen(substitute_parse);
        RExC_end = RExC_parse + len;
 
         /* The values are Unicode, and therefore not subject to recoding, but
          * have to be converted to native on a non-Unicode (meaning non-ASCII)
          * platform. */
-       RExC_override_recoding = 1;
 #ifdef EBCDIC
         RExC_recode_x_to_native = 1;
 #endif
@@ -12146,7 +12221,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                     *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                     return FALSE;
                 }
-                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"",
+                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
                     (UV) flags);
             }
             *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
@@ -12156,7 +12231,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
        RExC_start = RExC_adjusted_start = save_start;
        RExC_parse = endbrace;
        RExC_end = orig_end;
-       RExC_override_recoding = 0;
 #ifdef EBCDIC
         RExC_recode_x_to_native = 0;
 #endif
@@ -12364,6 +12438,52 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
     }
 }
 
+STATIC bool
+S_new_regcurly(const char *s, const char *e)
+{
+    /* This is a temporary function designed to match the most lenient form of
+     * a {m,n} quantifier we ever envision, with either number omitted, and
+     * spaces anywhere between/before/after them.
+     *
+     * If this function fails, then the string it matches is very unlikely to
+     * ever be considered a valid quantifier, so we can allow the '{' that
+     * begins it to be considered as a literal */
+
+    bool has_min = FALSE;
+    bool has_max = FALSE;
+
+    PERL_ARGS_ASSERT_NEW_REGCURLY;
+
+    if (s >= e || *s++ != '{')
+       return FALSE;
+
+    while (s < e && isSPACE(*s)) {
+        s++;
+    }
+    while (s < e && isDIGIT(*s)) {
+        has_min = TRUE;
+        s++;
+    }
+    while (s < e && isSPACE(*s)) {
+        s++;
+    }
+
+    if (*s == ',') {
+       s++;
+        while (s < e && isSPACE(*s)) {
+            s++;
+        }
+        while (s < e && isDIGIT(*s)) {
+            has_max = TRUE;
+            s++;
+        }
+        while (s < e && isSPACE(*s)) {
+            s++;
+        }
+    }
+
+    return s < e && *s == '}' && (has_min || has_max);
+}
 
 /* Parse backref decimal value, unless it's too big to sensibly be a backref,
  * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
@@ -12512,7 +12632,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         if (ret == NULL) {
             if (*flagp & (RESTART_PASS1|NEED_UTF8))
                 return NULL;
-            FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
+            FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                   (UV) *flagp);
         }
        if (*RExC_parse != ']') {
@@ -12539,7 +12659,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     *flagp = flags & (RESTART_PASS1|NEED_UTF8);
                     return NULL;
                 }
-                FAIL2("panic: reg returned NULL to regatom, flags=%#"UVxf"",
+                FAIL2("panic: reg returned NULL to regatom, flags=%#" UVxf,
                                                                  (UV) flags);
        }
        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
@@ -12702,7 +12822,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       bad_bound_type:
                         RExC_parse = endbrace;
                        vFAIL2utf8f(
-                            "'%"UTF8f"' is an unknown bound type",
+                            "'%" UTF8f "' is an unknown bound type",
                            UTF8fARG(UTF, length, endbrace - length));
                         NOT_REACHED; /*NOTREACHED*/
                 }
@@ -12798,6 +12918,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             /* FALLTHROUGH */
 
           finish_meta_pat:
+            if (   UCHARAT(RExC_parse + 1) == '{'
+                && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
+            {
+                RExC_parse += 2;
+                vFAIL("Unescaped left brace in regex is illegal here");
+            }
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
            break;
@@ -12820,7 +12946,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
              * multi-char folds are allowed.  */
             if (!ret)
-                FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
+                FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf,
                       (UV) *flagp);
 
             RExC_parse--;
@@ -13194,6 +13320,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             goto loopdone;
                         }
                         p = RExC_parse;
+                        RExC_parse = parse_start;
                         if (ender > 0xff) {
                             REQUIRE_UTF8(flagp);
                         }
@@ -13347,14 +13474,43 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    } /* End of switch on '\' */
                    break;
                case '{':
-                   /* Currently we don't care if the lbrace is at the start
-                    * of a construct.  This catches it in the middle of a
-                    * literal string, or when it's the first thing after
-                    * something like "\b" */
-                   if (len || (p > RExC_start && isALPHA_A(*(p -1)))) {
-                        RExC_parse = p + 1;
-                       vFAIL("Unescaped left brace in regex is illegal here");
+                    /* Currently we allow an lbrace at the start of a construct
+                     * without raising a warning.  This is because we think we
+                     * will never want such a brace to be meant to be other
+                     * than taken literally. */
+                   if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
+
+                        /* But, we raise a fatal warning otherwise, as the
+                         * deprecation cycle has come and gone.  Except that it
+                         * turns out that some heavily-relied on upstream
+                         * software, notably GNU Autoconf, have failed to fix
+                         * their uses.  For these, don't make it fatal unless
+                         * we anticipate using the '{' for something else.
+                         * This happens after any alpha, and for a looser {m,n}
+                         * quantifier specification */
+                        if (      RExC_strict
+                            || (  p > parse_start + 1
+                                && isALPHA_A(*(p - 1))
+                                && *(p - 2) == '\\')
+                            || new_regcurly(p, RExC_end))
+                        {
+                            RExC_parse = p + 1;
+                            vFAIL("Unescaped left brace in regex is "
+                                  "illegal here");
+                        }
+                        if (PASS2) {
+                            ckWARNregdep(p + 1,
+                                        "Unescaped left brace in regex is "
+                                        "deprecated here (and will be fatal "
+                                        "in Perl 5.30), passed through");
+                        }
                    }
+                   goto normal_default;
+                case '}':
+                case ']':
+                    if (PASS2 && p > RExC_parse && RExC_strict) {
+                        ckWARN2reg(p + 1, "Unescaped literal '%c'", *p);
+                    }
                    /*FALLTHROUGH*/
                default:    /* A literal character */
                  normal_default:
@@ -13386,10 +13542,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  * this character again next time through, when it will be the
                  * only thing in its new node */
 
-                if ((next_is_quantifier = (   LIKELY(p < RExC_end)
-                                           && UNLIKELY(ISMULT2(p))))
-                    && LIKELY(len))
-               {
+                next_is_quantifier =    LIKELY(p < RExC_end)
+                                     && UNLIKELY(ISMULT2(p));
+
+                if (next_is_quantifier && LIKELY(len)) {
                     p = oldp;
                     goto loopdone;
                 }
@@ -13774,7 +13930,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                             FALSE /* Don't force to /x */ );
     if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) {
-        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here, passed through");
+        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through");
     }
 
     return(ret);
@@ -13875,6 +14031,17 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
                                              REPORT_LOCATION_ARGS(p)));     \
         }                                                                   \
     } STMT_END
+#define CLEAR_POSIX_WARNINGS()                                              \
+    STMT_START {                                                            \
+        if (posix_warnings && RExC_warn_text)                               \
+            av_clear(RExC_warn_text);                                       \
+    } STMT_END
+
+#define CLEAR_POSIX_WARNINGS_AND_RETURN(ret)                                \
+    STMT_START {                                                            \
+        CLEAR_POSIX_WARNINGS();                                             \
+        return ret;                                                         \
+    } STMT_END
 
 STATIC int
 S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
@@ -13947,7 +14114,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
      *
      * The syntax for a legal posix class is:
      *
-     * qr/(?xa: \[ : \^? [:lower:]{4,6} : \] )/
+     * qr/(?xa: \[ : \^? [[:lower:]]{4,6} : \] )/
      *
      * What this routine considers syntactically to be an intended posix class
      * is this (the comments indicate some restrictions that the pattern
@@ -13972,7 +14139,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
      *                                      # for it to be considered to be
      *                                      # an intended posix class.
      *          \h*
-     *          [:punct:]?                  # The closing class character,
+     *          [[:punct:]]?                # The closing class character,
      *                                      # possibly omitted.  If not a colon
      *                                      # nor semi colon, the class name
      *                                      # must be even closer to a valid
@@ -14011,11 +14178,11 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
      * decide that no posix class was intended.  Should be at least
      * sizeof("alphanumeric") */
     UV input_text[15];
+    STATIC_ASSERT_DECL(C_ARRAY_LENGTH(input_text) >= sizeof "alphanumeric");
 
     PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX;
 
-    if (posix_warnings && RExC_warn_text)
-        av_clear(RExC_warn_text);
+    CLEAR_POSIX_WARNINGS();
 
     if (p >= e) {
         return NOT_MEANT_TO_BE_A_POSIX_CLASS;
@@ -14107,7 +14274,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                     *updated_parse_ptr = (char *) temp_ptr;
                 }
 
-                return OOB_NAMEDCLASS;
+                CLEAR_POSIX_WARNINGS_AND_RETURN(OOB_NAMEDCLASS);
             }
         }
 
@@ -14177,7 +14344,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
         /* We consider something like [^:^alnum:]] to not have been intended to
          * be a posix class, but XXX maybe we should */
         if (complement) {
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
         }
 
         complement = 1;
@@ -14204,7 +14371,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
          * this leaves this construct looking like [:] or [:^], which almost
          * certainly weren't intended to be posix classes */
         if (has_opening_bracket) {
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
         }
 
         /* But this function can be called when we parse the colon for
@@ -14221,7 +14388,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
             /* XXX We are currently very restrictive here, so this code doesn't
              * consider the possibility that, say, /[alpha.]]/ was intended to
              * be a posix class. */
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
         }
 
         /* Here we have something like 'foo:]'.  There was no initial colon,
@@ -14391,7 +14558,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
             }
 
             /* Otherwise, it can't have meant to have been a class */
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
         }
 
         /* If we ran off the end, and the final character was a punctuation
@@ -14441,7 +14608,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
          * class name.  (We can do this on the first pass, as any second pass
          * will yield an even shorter name) */
         if (name_len < 3) {
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
         }
 
         /* Find which class it is.  Initially switch on the length of the name.
@@ -14600,7 +14767,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
             }
 
             /* Here neither pass found a close-enough class name */
-            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(NOT_MEANT_TO_BE_A_POSIX_CLASS);
         }
 
     probably_meant_to_be:
@@ -14642,7 +14809,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
             /* If it is a known class, return the class.  The class number
              * #defines are structured so each complement is +1 to the normal
              * one */
-            return class_number + complement;
+            CLEAR_POSIX_WARNINGS_AND_RETURN(class_number + complement);
         }
         else if (! check_only) {
 
@@ -14652,7 +14819,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                                                    ? "^"
                                                    : "";
             RExC_parse = (char *) p;
-            vFAIL3utf8f("POSIX class [:%s%"UTF8f":] unknown",
+            vFAIL3utf8f("POSIX class [:%s%" UTF8f ":] unknown",
                         complement_string,
                         UTF8fARG(UTF, RExC_parse - name_start - 2, name_start));
         }
@@ -14795,7 +14962,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                   &posix_warnings
                                  ))
                         FAIL2("panic: regclass returned NULL to handle_sets, "
-                              "flags=%#"UVxf"", (UV) *flagp);
+                              "flags=%#" UVxf, (UV) *flagp);
 
                     /* function call leaves parse pointing to the ']', except
                      * if we faked it */
@@ -14831,7 +14998,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       no_close:
         /* We output the messages even if warnings are off, because we'll fail
          * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
+        if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
             output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
         }
 
@@ -14946,7 +15113,7 @@ redo_curchar:
                                            stack, fence, fence_stack));
 #endif
 
-        top_index = av_tindex_nomg(stack);
+        top_index = av_tindex_skip_len_mg(stack);
 
         switch (curchar) {
             SV** stacked_ptr;       /* Ptr to something already on 'stack' */
@@ -15064,7 +15231,7 @@ redo_curchar:
                               NULL))
                 {
                     FAIL2("panic: regclass returned NULL to handle_sets, "
-                          "flags=%#"UVxf"", (UV) *flagp);
+                          "flags=%#" UVxf, (UV) *flagp);
                 }
 
                 /* regclass() will return with parsing just the \ sequence,
@@ -15103,7 +15270,7 @@ redo_curchar:
                                 ))
                 {
                     FAIL2("panic: regclass returned NULL to handle_sets, "
-                          "flags=%#"UVxf"", (UV) *flagp);
+                          "flags=%#" UVxf, (UV) *flagp);
                 }
 
                 /* function call leaves parse pointing to the ']', except if we
@@ -15124,7 +15291,7 @@ redo_curchar:
                 goto done;
 
             case ')':
-                if (av_tindex_nomg(fence_stack) < 0) {
+                if (av_tindex_skip_len_mg(fence_stack) < 0) {
                     RExC_parse++;
                     vFAIL("Unexpected ')'");
                 }
@@ -15320,7 +15487,7 @@ redo_curchar:
              * may have altered the stack in the time since we earlier set
              * 'top_index'.  */
 
-            top_index = av_tindex_nomg(stack);
+            top_index = av_tindex_skip_len_mg(stack);
             if (top_index - fence >= 0) {
                 /* If the top entry on the stack is an operator, it had better
                  * be a '!', otherwise the entry below the top operand should
@@ -15371,15 +15538,15 @@ redo_curchar:
     } /* End of loop parsing through the construct */
 
   done:
-    if (av_tindex_nomg(fence_stack) >= 0) {
+    if (av_tindex_skip_len_mg(fence_stack) >= 0) {
         vFAIL("Unmatched (");
     }
 
-    if (av_tindex_nomg(stack) < 0   /* Was empty */
+    if (av_tindex_skip_len_mg(stack) < 0   /* Was empty */
         || ((final = av_pop(stack)) == NULL)
         || ! IS_OPERAND(final)
         || SvTYPE(final) != SVt_INVLIST
-        || av_tindex_nomg(stack) >= 0)  /* More left on stack */
+        || av_tindex_skip_len_mg(stack) >= 0)  /* More left on stack */
     {
       bad_syntax:
         SvREFCNT_dec(final);
@@ -15399,10 +15566,10 @@ redo_curchar:
     result_string = newSVpvs("");
     while (invlist_iternext(final, &start, &end)) {
         if (start == end) {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}", start);
+            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start);
         }
         else {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}-\\x{%"UVXf"}",
+            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%" UVXf "}",
                                                      start,          end);
         }
     }
@@ -15432,7 +15599,7 @@ redo_curchar:
                     NULL
                 );
     if (!node)
-        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf,
+        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#" UVxf,
                     PTR2UV(flagp));
 
     /* Fix up the node type if we are in locale.  (We have pretended we are
@@ -15482,8 +15649,8 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
                              AV * stack, const IV fence, AV * fence_stack)
 {   /* Dumps the stacks in handle_regex_sets() */
 
-    const SSize_t stack_top = av_tindex_nomg(stack);
-    const SSize_t fence_stack_top = av_tindex_nomg(fence_stack);
+    const SSize_t stack_top = av_tindex_skip_len_mg(stack);
+    const SSize_t fence_stack_top = av_tindex_skip_len_mg(fence_stack);
     SSize_t i;
 
     PERL_ARGS_ASSERT_DUMP_REGEX_SETS_STRUCTURES;
@@ -15789,8 +15956,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                        character; used under /i */
     UV n;
     char * stop_ptr = RExC_end;    /* where to stop parsing */
-    const bool skip_white = cBOOL(ret_invlist); /* ignore unescaped white
-                                                   space? */
+
+    /* ignore unescaped whitespace? */
+    const bool skip_white = cBOOL(   ret_invlist
+                                  || (RExC_flags & RXf_PMf_EXTENDED_MORE));
 
     /* Unicode properties are stored in a swash; this holds the current one
      * being parsed.  If this swash is the only above-latin1 component of the
@@ -15935,7 +16104,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     while (1) {
 
         if (   posix_warnings
-            && av_tindex_nomg(posix_warnings) >= 0
+            && av_tindex_skip_len_mg(posix_warnings) >= 0
             && RExC_parse > not_posix_region_end)
         {
             /* Warnings about posix class issues are considered tentative until
@@ -15991,7 +16160,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  * posix class, and it failed, it was a false alarm, as this
                  * successful one proves */
                 if (   posix_warnings
-                    && av_tindex_nomg(posix_warnings) >= 0
+                    && av_tindex_skip_len_mg(posix_warnings) >= 0
                     && not_posix_region_end >= RExC_parse
                     && not_posix_region_end <= posix_class_end)
                 {
@@ -16261,7 +16430,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             RExC_parse = e + 1;
 
                             /* diag_listed_as: Can't find Unicode property definition "%s" */
-                            vFAIL3utf8f("%s \"%"UTF8f"\"",
+                            vFAIL3utf8f("%s \"%" UTF8f "\"",
                                 msg, UTF8fARG(UTF, n, name));
                         }
 
@@ -16280,7 +16449,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                 SAVEFREEPV(name);
                             }
                         }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%" UTF8f "%s\n",
                                         (value == 'p' ? '+' : '!'),
                                         (FOLD) ? "__" : "",
                                         UTF8fARG(UTF, n, name),
@@ -16450,13 +16619,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                   : 0;
                     if (strict) {
                         vFAIL2utf8f(
-                            "False [] range \"%"UTF8f"\"",
+                            "False [] range \"%" UTF8f "\"",
                             UTF8fARG(UTF, w, rangebegin));
                     }
                     else {
                         SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
                         ckWARN2reg(RExC_parse,
-                            "False [] range \"%"UTF8f"\"",
+                            "False [] range \"%" UTF8f "\"",
                             UTF8fARG(UTF, w, rangebegin));
                         (void)ReREFCNT_inc(RExC_rx_sv);
                         cp_list = add_cp_to_invlist(cp_list, '-');
@@ -16644,7 +16813,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 #endif
                 w = RExC_parse - rangebegin;
                 vFAIL2utf8f(
-                    "Invalid [] range \"%"UTF8f"\"",
+                    "Invalid [] range \"%" UTF8f "\"",
                     UTF8fARG(UTF, w, rangebegin));
                 NOT_REACHED; /* NOTREACHED */
            }
@@ -16749,7 +16918,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                       foldbuf + foldlen);
                         SV* multi_fold = sv_2mortal(newSVpvs(""));
 
-                        Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
+                        Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%" UVXf "}", value);
 
                         multi_char_matches
                                         = add_multi_match(multi_char_matches,
@@ -16786,15 +16955,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * must be be all digits or all letters of the same case.
                      * Otherwise, the range is non-portable and unclear as to
                      * what it contains */
-                    if ((isPRINT_A(prevvalue) || isPRINT_A(value))
-                        && (non_portable_endpoint
-                            || ! ((isDIGIT_A(prevvalue) && isDIGIT_A(value))
-                                   || (isLOWER_A(prevvalue) && isLOWER_A(value))
-                                   || (isUPPER_A(prevvalue) && isUPPER_A(value)))))
-                    {
-                        vWARN(RExC_parse, "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\"");
+                    if (             (isPRINT_A(prevvalue) || isPRINT_A(value))
+                        && (          non_portable_endpoint
+                            || ! (   (isDIGIT_A(prevvalue) && isDIGIT_A(value))
+                                  || (isLOWER_A(prevvalue) && isLOWER_A(value))
+                                  || (isUPPER_A(prevvalue) && isUPPER_A(value))
+                    ))) {
+                        vWARN(RExC_parse, "Ranges of ASCII printables should"
+                                          " be some subset of \"0-9\","
+                                          " \"A-Z\", or \"a-z\"");
                     }
                     else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+                        SSize_t index_start;
+                        SSize_t index_final;
 
                         /* But the nature of Unicode and languages mean we
                          * can't do the same checks for above-ASCII ranges,
@@ -16802,40 +16975,68 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          * contain only digits from the same group of 10.  The
                          * ASCII case is handled just above.  0x660 is the
                          * first digit character beyond ASCII.  Hence here, the
-                         * range could be a range of digits.  Find out.  */
-                        IV index_start = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
-                                                         prevvalue);
-                        IV index_final = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
-                                                         value);
-
-                        /* If the range start and final points are in the same
-                         * inversion list element, it means that either both
-                         * are not digits, or both are digits in a consecutive
-                         * sequence of digits.  (So far, Unicode has kept all
-                         * such sequences as distinct groups of 10, but assert
-                         * to make sure).  If the end points are not in the
-                         * same element, neither should be a digit. */
-                        if (index_start == index_final) {
-                            assert(! ELEMENT_RANGE_MATCHES_INVLIST(index_start)
-                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
-                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
-                               == 10)
-                               /* But actually Unicode did have one group of 11
-                                * 'digits' in 5.2, so in case we are operating
-                                * on that version, let that pass */
-                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
-                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
-                                == 11
-                               && invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
-                                == 0x19D0)
-                            );
+                         * range could be a range of digits.  First some
+                         * unlikely special cases.  Grandfather in that a range
+                         * ending in 19DA (NEW TAI LUE THAM DIGIT ONE) is bad
+                         * if its starting value is one of the 10 digits prior
+                         * to it.  This is because it is an alternate way of
+                         * writing 19D1, and some people may expect it to be in
+                         * that group.  But it is bad, because it won't give
+                         * the expected results.  In Unicode 5.2 it was
+                         * considered to be in that group (of 11, hence), but
+                         * this was fixed in the next version */
+
+                        if (UNLIKELY(value == 0x19DA && prevvalue >= 0x19D0)) {
+                            goto warn_bad_digit_range;
                         }
-                        else if ((index_start >= 0
-                                  && ELEMENT_RANGE_MATCHES_INVLIST(index_start))
-                                 || (index_final >= 0
-                                     && ELEMENT_RANGE_MATCHES_INVLIST(index_final)))
+                        else if (UNLIKELY(   prevvalue >= 0x1D7CE
+                                          &&     value <= 0x1D7FF))
                         {
-                            vWARN(RExC_parse, "Ranges of digits should be from the same group of 10");
+                            /* This is the only other case currently in Unicode
+                             * where the algorithm below fails.  The code
+                             * points just above are the end points of a single
+                             * range containing only decimal digits.  It is 5
+                             * different series of 0-9.  All other ranges of
+                             * digits currently in Unicode are just a single
+                             * series.  (And mktables will notify us if a later
+                             * Unicode version breaks this.)
+                             *
+                             * If the range being checked is at most 9 long,
+                             * and the digit values represented are in
+                             * numerical order, they are from the same series.
+                             * */
+                            if (         value - prevvalue > 9
+                                ||    (((    value - 0x1D7CE) % 10)
+                                     <= (prevvalue - 0x1D7CE) % 10))
+                            {
+                                goto warn_bad_digit_range;
+                            }
+                        }
+                        else {
+
+                            /* For all other ranges of digits in Unicode, the
+                             * algorithm is just to check if both end points
+                             * are in the same series, which is the same range.
+                             * */
+                            index_start = _invlist_search(
+                                                    PL_XPosix_ptrs[_CC_DIGIT],
+                                                    prevvalue);
+
+                            /* Warn if the range starts and ends with a digit,
+                             * and they are not in the same group of 10. */
+                            if (   index_start >= 0
+                                && ELEMENT_RANGE_MATCHES_INVLIST(index_start)
+                                && (index_final =
+                                    _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
+                                                    value)) != index_start
+                                && index_final >= 0
+                                && ELEMENT_RANGE_MATCHES_INVLIST(index_final))
+                            {
+                              warn_bad_digit_range:
+                                vWARN(RExC_parse, "Ranges of digits should be"
+                                                  " from the same group of"
+                                                  " 10");
+                            }
                         }
                     }
                 }
@@ -16916,7 +17117,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     } /* End of loop through all the text within the brackets */
 
 
-    if (   posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
+    if (   posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
         output_or_return_posix_warnings(pRExC_state, posix_warnings,
                                         return_posix_warnings);
     }
@@ -16949,7 +17150,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 #endif
 
         /* Look at the longest folds first */
-        for (cp_count = av_tindex_nomg(multi_char_matches);
+        for (cp_count = av_tindex_skip_len_mg(multi_char_matches);
                         cp_count > 0;
                         cp_count--)
         {
@@ -17005,7 +17206,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         RExC_adjusted_start = RExC_start + prefix_end;
        RExC_end = RExC_parse + len;
         RExC_in_multi_char_class = 1;
-       RExC_override_recoding = 1;
         RExC_emit = (regnode *)orig_emit;
 
        ret = reg(pRExC_state, 1, &reg_flags, depth+1);
@@ -17018,7 +17218,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         RExC_precomp_adj = 0;
        RExC_end = save_end;
        RExC_in_multi_char_class = 0;
-       RExC_override_recoding = 0;
         SvREFCNT_dec_NN(multi_char_matches);
         return ret;
     }
@@ -17333,7 +17532,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     {
                         AV* list = (AV*) *listp;
                         IV k;
-                        for (k = 0; k <= av_tindex_nomg(list); k++) {
+                        for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
                             SV** c_p = av_fetch(list, k, FALSE);
                             UV c;
                             assert(c_p);
@@ -18024,7 +18223,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
 
            si = *ary;  /* ary[0] = the string to initialize the swash with */
 
-            if (av_tindex_nomg(av) >= 2) {
+            if (av_tindex_skip_len_mg(av) >= 2) {
                 if (only_utf8_locale_ptr
                     && ary[2]
                     && ary[2] != &PL_sv_undef)
@@ -18040,7 +18239,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                  * is any inversion list generated at compile time; [4]
                  * indicates if that inversion list has any user-defined
                  * properties in it. */
-                if (av_tindex_nomg(av) >= 3) {
+                if (av_tindex_skip_len_mg(av) >= 3) {
                     invlist = ary[3];
                     if (SvUV(ary[4])) {
                         swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
@@ -18353,7 +18552,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
 #else
     if (RExC_offsets) {         /* MJD */
        MJD_OFFSET_DEBUG(
-              ("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n",
+              ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
               name, __LINE__,
               PL_reg_name[op],
               (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0]
@@ -18430,9 +18629,18 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
 - reginsert - insert an operator in front of already-emitted operand
 *
 * Means relocating the operand.
+*
+* IMPORTANT NOTE - it is the *callers* responsibility to correctly
+* set up NEXT_OFF() of the inserted node if needed. Something like this:
+*
+* reginsert(pRExC, OPFAIL, orig_emit, depth+1);
+* if (PASS2)
+*     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
+*
+* ALSO NOTE - operand->flags will be set to 0 as well.
 */
 STATIC void
-S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
+S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
 {
     regnode *src;
     regnode *dst;
@@ -18458,7 +18666,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
     dst = RExC_emit;
     if (RExC_open_parens) {
         int paren;
-        /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/
+        /*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/
         /* remember that RExC_npar is rex->nparens + 1,
          * iow it is 1 more than the number of parens seen in
          * the pattern so far. */
@@ -18466,13 +18674,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
             /* note, RExC_open_parens[0] is the start of the
              * regex, it can't move. RExC_close_parens[0] is the end
              * of the regex, it *can* move. */
-            if ( paren && RExC_open_parens[paren] >= opnd ) {
+            if ( paren && RExC_open_parens[paren] >= operand ) {
                 /*DEBUG_PARSE_FMT("open"," - %d",size);*/
                 RExC_open_parens[paren] += size;
             } else {
                 /*DEBUG_PARSE_FMT("open"," - %s","ok");*/
             }
-            if ( RExC_close_parens[paren] >= opnd ) {
+            if ( RExC_close_parens[paren] >= operand ) {
                 /*DEBUG_PARSE_FMT("close"," - %d",size);*/
                 RExC_close_parens[paren] += size;
             } else {
@@ -18483,12 +18691,12 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
     if (RExC_end_op)
         RExC_end_op += size;
 
-    while (src > opnd) {
+    while (src > operand) {
        StructCopy(--src, --dst, regnode);
 #ifdef RE_TRACK_PATTERN_OFFSETS
         if (RExC_offsets) {     /* MJD 20010112 */
            MJD_OFFSET_DEBUG(
-                 ("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n",
+                 ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n",
                   "reg_insert",
                  __LINE__,
                  PL_reg_name[op],
@@ -18503,12 +18711,11 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
 #endif
     }
 
-
-    place = opnd;              /* Op node, where operand used to be. */
+    place = operand;           /* Op node, where operand used to be. */
 #ifdef RE_TRACK_PATTERN_OFFSETS
     if (RExC_offsets) {         /* MJD */
        MJD_OFFSET_DEBUG(
-              ("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
+              ("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n",
               "reginsert",
              __LINE__,
              PL_reg_name[op],
@@ -18522,6 +18729,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
     }
 #endif
     src = NEXTOPER(place);
+    place->flags = 0;
     FILL_ADVANCE_NODE(place, op);
     Zero(src, offset, regnode);
 }
@@ -18657,7 +18865,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
         DEBUG_PARSE_MSG("");
         regprop(RExC_rx, RExC_mysv, val, NULL, pRExC_state);
         Perl_re_printf( aTHX_
-                      "~ attach to %s (%"IVdf") offset to %"IVdf"\n",
+                      "~ attach to %s (%" IVdf ") offset to %" IVdf "\n",
                      SvPV_nolen_const(RExC_mysv),
                      (IV)REG_NODE_NUM(val),
                      (IV)(val - scan)
@@ -18756,6 +18964,7 @@ void
 Perl_regdump(pTHX_ const regexp *r)
 {
 #ifdef DEBUGGING
+    int i;
     SV * const sv = sv_newmortal();
     SV *dsv= sv_newmortal();
     RXi_GET_DECL(r,ri);
@@ -18766,41 +18975,40 @@ Perl_regdump(pTHX_ const regexp *r)
     (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
 
     /* Header fields of interest. */
-    if (r->anchored_substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr),
-           RE_SV_DUMPLEN(r->anchored_substr), 30);
-        Perl_re_printf( aTHX_
-                     "anchored %s%s at %"IVdf" ",
-                     s, RE_SV_TAIL(r->anchored_substr),
-                     (IV)r->anchored_offset);
-    } else if (r->anchored_utf8) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8),
-           RE_SV_DUMPLEN(r->anchored_utf8), 30);
-        Perl_re_printf( aTHX_
-                     "anchored utf8 %s%s at %"IVdf" ",
-                     s, RE_SV_TAIL(r->anchored_utf8),
-                     (IV)r->anchored_offset);
-    }
-    if (r->float_substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr),
-           RE_SV_DUMPLEN(r->float_substr), 30);
-        Perl_re_printf( aTHX_
-                     "floating %s%s at %"IVdf"..%"UVuf" ",
-                     s, RE_SV_TAIL(r->float_substr),
-                     (IV)r->float_min_offset, (UV)r->float_max_offset);
-    } else if (r->float_utf8) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8),
-           RE_SV_DUMPLEN(r->float_utf8), 30);
-        Perl_re_printf( aTHX_
-                     "floating utf8 %s%s at %"IVdf"..%"UVuf" ",
-                     s, RE_SV_TAIL(r->float_utf8),
-                     (IV)r->float_min_offset, (UV)r->float_max_offset);
+    for (i = 0; i < 2; i++) {
+        if (r->substrs->data[i].substr) {
+            RE_PV_QUOTED_DECL(s, 0, dsv,
+                            SvPVX_const(r->substrs->data[i].substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
+                            30);
+            Perl_re_printf( aTHX_
+                          "%s %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
+        else if (r->substrs->data[i].utf8_substr) {
+            RE_PV_QUOTED_DECL(s, 1, dsv,
+                            SvPVX_const(r->substrs->data[i].utf8_substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
+                            30);
+            Perl_re_printf( aTHX_
+                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
     }
+
     if (r->check_substr || r->check_utf8)
         Perl_re_printf( aTHX_
                      (const char *)
-                     (r->check_substr == r->float_substr
-                      && r->check_utf8 == r->float_utf8
+                     (   r->check_substr == r->substrs->data[1].substr
+                      && r->check_utf8   == r->substrs->data[1].utf8_substr
                       ? "(checking floating" : "(checking anchored"));
     if (r->intflags & PREGf_NOSCAN)
         Perl_re_printf( aTHX_  " noscan");
@@ -18824,12 +19032,12 @@ Perl_regdump(pTHX_ const regexp *r)
         Perl_re_printf( aTHX_ " ");
     }
     if (r->intflags & PREGf_GPOS_SEEN)
-        Perl_re_printf( aTHX_  "GPOS:%"UVuf" ", (UV)r->gofs);
+        Perl_re_printf( aTHX_  "GPOS:%" UVuf " ", (UV)r->gofs);
     if (r->intflags & PREGf_SKIP)
         Perl_re_printf( aTHX_  "plus ");
     if (r->intflags & PREGf_IMPLICIT)
         Perl_re_printf( aTHX_  "implicit ");
-    Perl_re_printf( aTHX_  "minlen %"IVdf" ", (IV)r->minlen);
+    Perl_re_printf( aTHX_  "minlen %" IVdf " ", (IV)r->minlen);
     if (r->extflags & RXf_EVAL_SEEN)
         Perl_re_printf( aTHX_  "with eval ");
     Perl_re_printf( aTHX_  "\n");
@@ -18945,7 +19153,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
           if (trie->jump)
             sv_catpvs(sv, "(JUMP)");
           Perl_sv_catpvf(aTHX_ sv,
-            "<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
+            "<S:%" UVuf "/%" IVdf " W:%" UVuf " L:%" UVuf "/%" UVuf " C:%" UVuf "/%" UVuf ">",
             (UV)trie->startstate,
             (IV)trie->statecount-1, /* -1 because of the unused 0 element */
             (UV)trie->wordcount,
@@ -18986,7 +19194,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
     {
         AV *name_list= NULL;
         U32 parno= OP(o) == ACCEPT ? (U32)ARG2L(o) : ARG(o);
-        Perl_sv_catpvf(aTHX_ sv, "%"UVuf, (UV)parno);        /* Parenth number */
+        Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno);        /* Parenth number */
        if ( RXp_PAREN_NAMES(prog) ) {
             name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
         } else if ( pRExC_state ) {
@@ -18996,7 +19204,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
             if ( k != REF || (OP(o) < NREF)) {
                 SV **name= av_fetch(name_list, parno, 0 );
                if (name)
-                   Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                   Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
             }
             else {
                 SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
@@ -19005,10 +19213,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                 I32 n;
                 if (name) {
                     for ( n=0; n<SvIVX(sv_dat); n++ ) {
-                        Perl_sv_catpvf(aTHX_ sv, "%s%"IVdf,
+                        Perl_sv_catpvf(aTHX_ sv, "%s%" IVdf,
                                    (n ? "," : ""), (IV)nums[n]);
                     }
-                    Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
                 }
             }
         }
@@ -19040,7 +19248,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         if (name_list) {
             SV **name= av_fetch(name_list, ARG(o), 0 );
             if (name)
-                Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+                Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
         }
     }
     else if (k == LOGICAL)
@@ -19233,8 +19441,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 
     /* add on the verb argument if there is one */
     if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) {
-        Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
+        if ( ARG(o) )
+            Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
                        SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
+        else
+            sv_catpvs(sv, ":NULL");
     }
 #else
     PERL_UNUSED_CONTEXT;
@@ -19309,13 +19520,13 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     } else {
         CALLREGFREE_PVT(rx); /* free the private data */
         SvREFCNT_dec(RXp_PAREN_NAMES(r));
-       Safefree(r->xpv_len_u.xpvlenu_pv);
     }
     if (r->substrs) {
-        SvREFCNT_dec(r->anchored_substr);
-        SvREFCNT_dec(r->anchored_utf8);
-        SvREFCNT_dec(r->float_substr);
-        SvREFCNT_dec(r->float_utf8);
+        int i;
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_dec(r->substrs->data[i].substr);
+            SvREFCNT_dec(r->substrs->data[i].utf8_substr);
+        }
        Safefree(r->substrs);
     }
     RX_MATCH_COPY_FREE(rx);
@@ -19326,12 +19537,19 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     SvREFCNT_dec(r->qr_anoncv);
     if (r->recurse_locinput)
         Safefree(r->recurse_locinput);
-    rx->sv_u.svu_rx = 0;
 }
 
+
 /*  reg_temp_copy()
 
-    This is a hacky workaround to the structural issue of match results
+    Copy ssv to dsv, both of which should of type SVt_REGEXP or SVt_PVLV,
+    except that dsv will be created if NULL.
+
+    This function is used in two main ways. First to implement
+        $r = qr/....; $s = $$r;
+
+    Secondly, it is used as a hacky workaround to the structural issue of
+    match results
     being stored in the regexp structure which is in turn stored in
     PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
     could be PL_curpm in multiple contexts, and could require multiple
@@ -19347,75 +19565,80 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
 
 
 REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
+Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
 {
-    struct regexp *ret;
-    struct regexp *const r = ReANY(rx);
-    const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
+    struct regexp *drx;
+    struct regexp *const srx = ReANY(ssv);
+    const bool islv = dsv && SvTYPE(dsv) == SVt_PVLV;
 
     PERL_ARGS_ASSERT_REG_TEMP_COPY;
 
-    if (!ret_x)
-       ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+    if (!dsv)
+       dsv = (REGEXP*) newSV_type(SVt_REGEXP);
     else {
-       SvOK_off((SV *)ret_x);
+       SvOK_off((SV *)dsv);
        if (islv) {
-           /* For PVLVs, SvANY points to the xpvlv body while sv_u points
-              to the regexp.  (For SVt_REGEXPs, sv_upgrade has already
-              made both spots point to the same regexp body.) */
+           /* For PVLVs, the head (sv_any) points to an XPVLV, while
+             * the LV's xpvlenu_rx will point to a regexp body, which
+             * we allocate here */
            REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
-           assert(!SvPVX(ret_x));
-           ret_x->sv_u.svu_rx = temp->sv_any;
+           assert(!SvPVX(dsv));
+            ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
            temp->sv_any = NULL;
            SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
            SvREFCNT_dec_NN(temp);
            /* SvCUR still resides in the xpvlv struct, so the regexp copy-
               ing below will not set it. */
-           SvCUR_set(ret_x, SvCUR(rx));
+           SvCUR_set(dsv, SvCUR(ssv));
        }
     }
     /* This ensures that SvTHINKFIRST(sv) is true, and hence that
        sv_force_normal(sv) is called.  */
-    SvFAKE_on(ret_x);
-    ret = ReANY(ret_x);
+    SvFAKE_on(dsv);
+    drx = ReANY(dsv);
 
-    SvFLAGS(ret_x) |= SvUTF8(rx);
+    SvFLAGS(dsv) |= SvFLAGS(ssv) & (SVf_POK|SVp_POK|SVf_UTF8);
+    SvPV_set(dsv, RX_WRAPPED(ssv));
     /* We share the same string buffer as the original regexp, on which we
        hold a reference count, incremented when mother_re is set below.
        The string pointer is copied here, being part of the regexp struct.
      */
-    memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+    memcpy(&(drx->xpv_cur), &(srx->xpv_cur),
           sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
-    if (r->offs) {
-        const I32 npar = r->nparens+1;
-        Newx(ret->offs, npar, regexp_paren_pair);
-        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
-    }
-    if (r->substrs) {
-        Newx(ret->substrs, 1, struct reg_substr_data);
-       StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+    if (!islv)
+        SvLEN_set(dsv, 0);
+    if (srx->offs) {
+        const I32 npar = srx->nparens+1;
+        Newx(drx->offs, npar, regexp_paren_pair);
+        Copy(srx->offs, drx->offs, npar, regexp_paren_pair);
+    }
+    if (srx->substrs) {
+        int i;
+        Newx(drx->substrs, 1, struct reg_substr_data);
+       StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
 
-       SvREFCNT_inc_void(ret->anchored_substr);
-       SvREFCNT_inc_void(ret->anchored_utf8);
-       SvREFCNT_inc_void(ret->float_substr);
-       SvREFCNT_inc_void(ret->float_utf8);
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_inc_void(drx->substrs->data[i].substr);
+            SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr);
+        }
 
        /* check_substr and check_utf8, if non-NULL, point to either their
           anchored or float namesakes, and don't hold a second reference.  */
     }
-    RX_MATCH_COPIED_off(ret_x);
+    RX_MATCH_COPIED_off(dsv);
 #ifdef PERL_ANY_COW
-    ret->saved_copy = NULL;
+    drx->saved_copy = NULL;
 #endif
-    ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
-    SvREFCNT_inc_void(ret->qr_anoncv);
-    if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+    drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
+    SvREFCNT_inc_void(drx->qr_anoncv);
+    if (srx->recurse_locinput)
+        Newxz(drx->recurse_locinput,srx->nparens + 1,char *);
 
-    return ret_x;
+    return dsv;
 }
 #endif
 
+
 /* regfree_internal()
 
    Free the private data in a regexp. This is overloadable by
@@ -19452,12 +19675,8 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
     if (ri->u.offsets)
         Safefree(ri->u.offsets);             /* 20010421 MJD */
 #endif
-    if (ri->code_blocks) {
-       int n;
-       for (n = 0; n < ri->num_code_blocks; n++)
-           SvREFCNT_dec(ri->code_blocks[n].src_regex);
-       Safefree(ri->code_blocks);
-    }
+    if (ri->code_blocks)
+        S_free_codeblocks(aTHX_ ri->code_blocks);
 
     if (ri->data) {
        int n = ri->data->count;
@@ -19581,36 +19800,41 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
        /* Do it this way to avoid reading from *r after the StructCopy().
           That way, if any of the sv_dup_inc()s dislodge *r from the L1
           cache, it doesn't matter.  */
+        int i;
        const bool anchored = r->check_substr
-           ? r->check_substr == r->anchored_substr
-           : r->check_utf8 == r->anchored_utf8;
+           ? r->check_substr == r->substrs->data[0].substr
+           : r->check_utf8   == r->substrs->data[0].utf8_substr;
         Newx(ret->substrs, 1, struct reg_substr_data);
        StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
 
-       ret->anchored_substr = sv_dup_inc(ret->anchored_substr, param);
-       ret->anchored_utf8 = sv_dup_inc(ret->anchored_utf8, param);
-       ret->float_substr = sv_dup_inc(ret->float_substr, param);
-       ret->float_utf8 = sv_dup_inc(ret->float_utf8, param);
+        for (i = 0; i < 2; i++) {
+            ret->substrs->data[i].substr =
+                        sv_dup_inc(ret->substrs->data[i].substr, param);
+            ret->substrs->data[i].utf8_substr =
+                        sv_dup_inc(ret->substrs->data[i].utf8_substr, param);
+        }
 
        /* check_substr and check_utf8, if non-NULL, point to either their
           anchored or float namesakes, and don't hold a second reference.  */
 
        if (ret->check_substr) {
            if (anchored) {
-               assert(r->check_utf8 == r->anchored_utf8);
-               ret->check_substr = ret->anchored_substr;
-               ret->check_utf8 = ret->anchored_utf8;
+               assert(r->check_utf8 == r->substrs->data[0].utf8_substr);
+
+               ret->check_substr = ret->substrs->data[0].substr;
+               ret->check_utf8   = ret->substrs->data[0].utf8_substr;
            } else {
-               assert(r->check_substr == r->float_substr);
-               assert(r->check_utf8 == r->float_utf8);
-               ret->check_substr = ret->float_substr;
-               ret->check_utf8 = ret->float_utf8;
+               assert(r->check_substr == r->substrs->data[1].substr);
+               assert(r->check_utf8   == r->substrs->data[1].utf8_substr);
+
+               ret->check_substr = ret->substrs->data[1].substr;
+               ret->check_utf8   = ret->substrs->data[1].utf8_substr;
            }
        } else if (ret->check_utf8) {
            if (anchored) {
-               ret->check_utf8 = ret->anchored_utf8;
+               ret->check_utf8 = ret->substrs->data[0].utf8_substr;
            } else {
-               ret->check_utf8 = ret->float_utf8;
+               ret->check_utf8 = ret->substrs->data[1].utf8_substr;
            }
        }
     }
@@ -19637,7 +19861,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
               1: a buffer in a different thread
               2: something we no longer hold a reference on
               so we need to copy it locally.  */
-    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
+    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
     ret->mother_re   = NULL;
 }
 #endif /* PERL_IN_XSUB_RE */
@@ -19674,16 +19898,18 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
     Copy(ri->program, reti->program, len+1, regnode);
 
 
-    reti->num_code_blocks = ri->num_code_blocks;
     if (ri->code_blocks) {
        int n;
-       Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block,
-               struct reg_code_block);
-       Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks,
-               struct reg_code_block);
-       for (n = 0; n < ri->num_code_blocks; n++)
-            reti->code_blocks[n].src_regex = (REGEXP*)
-                   sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param);
+       Newx(reti->code_blocks, 1, struct reg_code_blocks);
+       Newx(reti->code_blocks->cb, ri->code_blocks->count,
+                    struct reg_code_block);
+       Copy(ri->code_blocks->cb, reti->code_blocks->cb,
+             ri->code_blocks->count, struct reg_code_block);
+       for (n = 0; n < ri->code_blocks->count; n++)
+            reti->code_blocks->cb[n].src_regex = (REGEXP*)
+                   sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param);
+        reti->code_blocks->count = ri->code_blocks->count;
+        reti->code_blocks->refcnt = 1;
     }
     else
        reti->code_blocks = NULL;
@@ -19704,33 +19930,47 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
            d->what[i] = ri->data->what[i];
            switch (d->what[i]) {
                /* see also regcomp.h and regfree_internal() */
-           case 'a': /* actually an AV, but the dup function is identical.  */
-           case 'r':
-           case 's':
-           case 'S':
-           case 'u': /* actually an HV, but the dup function is identical.  */
+            case 'a': /* actually an AV, but the dup function is identical.
+                         values seem to be "plain sv's" generally. */
+            case 'r': /* a compiled regex (but still just another SV) */
+            case 's': /* an RV (currently only used for an RV to an AV by the ANYOF code)
+                         this use case should go away, the code could have used
+                         'a' instead - see S_set_ANYOF_arg() for array contents. */
+            case 'S': /* actually an SV, but the dup function is identical.  */
+            case 'u': /* actually an HV, but the dup function is identical.
+                         values are "plain sv's" */
                d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
                break;
            case 'f':
+                /* Synthetic Start Class - "Fake" charclass we generate to optimize
+                 * patterns which could start with several different things. Pre-TRIE
+                 * this was more important than it is now, however this still helps
+                 * in some places, for instance /x?a+/ might produce a SSC equivalent
+                 * to [xa]. This is used by Perl_re_intuit_start() and S_find_byclass()
+                 * in regexec.c
+                 */
                /* This is cheating. */
                Newx(d->data[i], 1, regnode_ssc);
                StructCopy(ri->data->data[i], d->data[i], regnode_ssc);
                reti->regstclass = (regnode*)d->data[i];
                break;
            case 'T':
-               /* Trie stclasses are readonly and can thus be shared
+                /* AHO-CORASICK fail table */
+                /* Trie stclasses are readonly and can thus be shared
                 * without duplication. We free the stclass in pregfree
                 * when the corresponding reg_ac_data struct is freed.
                 */
                reti->regstclass= ri->regstclass;
                /* FALLTHROUGH */
            case 't':
+                /* TRIE transition table */
                OP_REFCNT_LOCK;
                ((reg_trie_data*)ri->data->data[i])->refcount++;
                OP_REFCNT_UNLOCK;
                /* FALLTHROUGH */
-           case 'l':
-           case 'L':
+            case 'l': /* (?{...}) or (??{ ... }) code (cb->block) */
+            case 'L': /* same when RExC_pm_flags & PMf_HAS_CV and code
+                         is not from another regexp */
                d->data[i] = ri->data->data[i];
                break;
             default:
@@ -19814,7 +20054,7 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
        l1 = 512;
     Copy(message, buf, l1 , char);
     /* l1-1 to avoid \n */
-    Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
+    Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, l1-1, buf));
 }
 
 /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
@@ -19866,7 +20106,7 @@ S_put_code_point(pTHX_ SV *sv, UV c)
     PERL_ARGS_ASSERT_PUT_CODE_POINT;
 
     if (c > 255) {
-        Perl_sv_catpvf(aTHX_ sv, "\\x{%04"UVXf"}", c);
+        Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c);
     }
     else if (isPRINT(c)) {
        const char string = (char) c;
@@ -20050,10 +20290,10 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                     : NUM_ANYOF_CODE_POINTS - 1;
 #if NUM_ANYOF_CODE_POINTS > 256
         format = (this_end < 256)
-                 ? "\\x%02"UVXf"-\\x%02"UVXf""
-                 : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
+                 ? "\\x%02" UVXf "-\\x%02" UVXf
+                 : "\\x{%04" UVXf "}-\\x{%04" UVXf "}";
 #else
-        format = "\\x%02"UVXf"-\\x%02"UVXf"";
+        format = "\\x%02" UVXf "-\\x%02" UVXf;
 #endif
         GCC_DIAG_IGNORE(-Wformat-nonliteral);
         Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
@@ -20455,7 +20695,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
 #define CLEAR_OPTSTART                                                       \
     if (optstart) STMT_START {                                               \
         DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_                                           \
-                              " (%"IVdf" nodes)\n", (IV)(node - optstart))); \
+                              " (%" IVdf " nodes)\n", (IV)(node - optstart))); \
         optstart=NULL;                                                       \
     } STMT_END
 
@@ -20504,7 +20744,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
            CLEAR_OPTSTART;
 
         regprop(r, sv, node, NULL, NULL);
-        Perl_re_printf( aTHX_  "%4"IVdf":%*s%s", (IV)(node - start),
+        Perl_re_printf( aTHX_  "%4" IVdf ":%*s%s", (IV)(node - start),
                      (int)(2*indent + 1), "", SvPVX_const(sv));
 
         if (OP(node) != OPTIMIZED) {
@@ -20514,7 +20754,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                      && PL_regkind[OP(next)] != BRANCH )
                 Perl_re_printf( aTHX_  " (FAIL)");
             else
-                Perl_re_printf( aTHX_  " (%"IVdf")", (IV)(next - start));
+                Perl_re_printf( aTHX_  " (%" IVdf ")", (IV)(next - start));
             Perl_re_printf( aTHX_ "\n");
         }
 
@@ -20569,7 +20809,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                 );
                 if (trie->jump) {
                     U16 dist= trie->jump[word_idx+1];
-                    Perl_re_printf( aTHX_  "(%"UVuf")\n",
+                    Perl_re_printf( aTHX_  "(%" UVuf ")\n",
                                (UV)((dist ? this_trie + dist : next) - start));
                     if (dist) {
                         if (!nextbranch)