This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
correct error returns from fast_abs_path()
[perl5.git] / regcomp.c
index 06e1433..376b697 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -110,7 +110,6 @@ typedef struct scan_frame {
     regnode *next_regnode;      /* next node to process when last is reached */
     U32 prev_recursed_depth;
     I32 stopparen;              /* what stopparen do we use */
-    U32 is_top_frame;           /* what flags do we use? */
 
     struct scan_frame *this_prev_frame; /* this previous frame */
     struct scan_frame *prev_frame;      /* previous frame */
@@ -150,7 +149,7 @@ struct RExC_state_t {
     I32                sawback;                /* Did we see \1, ...? */
     U32                seen;
     SSize_t    size;                   /* Code size. */
-    I32                npar;            /* Capture buffer count, (OPEN) plus
+    I32         npar;                   /* Capture buffer count, (OPEN) plus
                                            one. ("par" 0 is the whole
                                            pattern)*/
     I32                nestroot;               /* root parens we are in - used by
@@ -213,6 +212,7 @@ struct RExC_state_t {
     bool        seen_unfolded_sharp_s;
     bool        strict;
     bool        study_started;
+    bool        in_script_run;
 };
 
 #define RExC_flags     (pRExC_state->flags)
@@ -279,6 +279,7 @@ struct RExC_state_t {
 #define RExC_strict (pRExC_state->strict)
 #define RExC_study_started      (pRExC_state->study_started)
 #define RExC_warn_text (pRExC_state->warn_text)
+#define RExC_in_script_run      (pRExC_state->in_script_run)
 
 /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
  * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -628,7 +629,7 @@ static const scan_data_t zero_scan_data = {
     UTF8fARG(UTF,                                                           \
              (xI(xC) > eC) /* Don't run off end */                          \
               ? eC - sC   /* Length before the <--HERE */                   \
-              : xI_offset(xC),                                              \
+              : ( __ASSERT_(xI_offset(xC) >= 0) xI_offset(xC) ),            \
              sC),         /* The input pattern printed up to the <--HERE */ \
     UTF8fARG(UTF,                                                           \
              (xI(xC) > eC) ? 0 : eC - xI(xC), /* Length after <--HERE */    \
@@ -1031,6 +1032,7 @@ S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
         );
 
         if (data->last_found) {
+            int i;
             Perl_re_printf(aTHX_
                 "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
                     SvPVX_const(data->last_found),
@@ -1039,22 +1041,17 @@ S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
                     (IV)data->last_start_max
             );
 
-            Perl_re_printf(aTHX_
-                " %sFixed:'%s' @ %" IVdf,
-                data->cur_is_floating == 0 ? "*" : "",
-                SvPVX_const(data->substrs[0].str),
-                (IV)data->substrs[0].min_offset
-            );
-            S_debug_show_study_flags(aTHX_ data->substrs[0].flags," [","]");
-
-            Perl_re_printf(aTHX_
-                " %sFloat: '%s' @ %" IVdf "/%" IVdf,
-                data->cur_is_floating == 1 ? "*" : "",
-                SvPVX_const(data->substrs[1].str),
-                (IV)data->substrs[1].min_offset,
-                (IV)data->substrs[1].max_offset
-            );
-            S_debug_show_study_flags(aTHX_ data->substrs[1].flags," [","]");
+            for (i = 0; i < 2; i++) {
+                Perl_re_printf(aTHX_
+                    " %s%s: '%s' @ %" IVdf "/%" IVdf,
+                    data->cur_is_floating == i ? "*" : "",
+                    i ? "Float" : "Fixed",
+                    SvPVX_const(data->substrs[i].str),
+                    (IV)data->substrs[i].min_offset,
+                    (IV)data->substrs[i].max_offset
+                );
+                S_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
+            }
         }
 
         Perl_re_printf( aTHX_ "\n");
@@ -1122,7 +1119,7 @@ PERL_STATIC_INLINE item*
 push(UV key,item* curr)
 {
     item* head;
-    Newxz(head, 1, item);
+    Newx(head, 1, item);
     head->key = key;
     head->value = 0;
     head->next = curr;
@@ -1192,7 +1189,7 @@ S_edit_distance(const UV* src,
     PERL_ARGS_ASSERT_EDIT_DISTANCE;
 
     /* intialize matrix start values */
-    Newxz(scores, ( (x + 2) * (y + 2)), UV);
+    Newx(scores, ( (x + 2) * (y + 2)), UV);
     scores[0] = score_ceil;
     scores[1 * (y + 2) + 0] = score_ceil;
     scores[0 * (y + 2) + 1] = score_ceil;
@@ -1267,7 +1264,7 @@ S_cntrl_to_mnemonic(const U8 c)
 }
 
 /* Mark that we cannot extend a found fixed substring at this point.
-   Update the longest found anchored substring and the longest found
+   Update the longest found anchored substring or the longest found
    floating substrings if needed. */
 
 STATIC void
@@ -1282,19 +1279,13 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
     PERL_ARGS_ASSERT_SCAN_COMMIT;
 
     if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
+        const U8 i = data->cur_is_floating;
        SvSetMagicSV(longest_sv, data->last_found);
-       if (data->cur_is_floating == 0) { /* fixed */
-           data->substrs[0].min_offset = l ? data->last_start_min : data->pos_min;
+        data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
+
+       if (!i) /* fixed */
            data->substrs[0].max_offset = data->substrs[0].min_offset;
-           if (data->flags & SF_BEFORE_EOL)
-               data->substrs[0].flags |= (data->flags & SF_BEFORE_EOL);
-           else
-               data->substrs[0].flags &= ~SF_BEFORE_EOL;
-           data->substrs[0].minlenp = minlenp;
-           data->substrs[0].lookbehind = 0;
-       }
        else { /* float */
-           data->substrs[1].min_offset = l ? data->last_start_min : data->pos_min;
            data->substrs[1].max_offset = (l
                           ? data->last_start_max
                           : (data->pos_delta > SSize_t_MAX - data->pos_min
@@ -1303,15 +1294,16 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
            if (is_inf
                 || (STRLEN)data->substrs[1].max_offset > (STRLEN)SSize_t_MAX)
                data->substrs[1].max_offset = SSize_t_MAX;
+        }
 
-           if (data->flags & SF_BEFORE_EOL)
-               data->substrs[1].flags |= (data->flags & SF_BEFORE_EOL);
-           else
-               data->substrs[1].flags &= ~SF_BEFORE_EOL;
-            data->substrs[1].minlenp = minlenp;
-            data->substrs[1].lookbehind = 0;
-       }
+        if (data->flags & SF_BEFORE_EOL)
+            data->substrs[i].flags |= (data->flags & SF_BEFORE_EOL);
+        else
+            data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].minlenp = minlenp;
+        data->substrs[i].lookbehind = 0;
     }
+
     SvCUR_set(data->last_found, 0);
     {
        SV * const sv = data->last_found;
@@ -1711,6 +1703,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
             regnode_charclass_posixl temp;
             int add = 1;    /* To calculate the index of the complement */
 
+            Zero(&temp, 1, regnode_charclass_posixl);
             ANYOF_POSIXL_ZERO(&temp);
             for (i = 0; i < ANYOF_MAX; i++) {
                 assert(i % 2 != 0
@@ -2432,7 +2425,7 @@ is the recommended Unicode-aware way of saying
 } STMT_END
 
 #define TRIE_LIST_NEW(state) STMT_START {                       \
-    Newxz( trie->states[ state ].trans.list,               \
+    Newx( trie->states[ state ].trans.list,                     \
        4, reg_trie_trans_le );                                 \
      TRIE_LIST_CUR( state ) = 1;                                \
      TRIE_LIST_LEN( state ) = 4;                                \
@@ -3627,7 +3620,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
     aho->trie=trie_offset;
     aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
     Copy( trie->states, aho->states, numstates, reg_trie_state );
-    Newxz( q, numstates, U32);
+    Newx( q, numstates, U32);
     aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
     aho->refcount = 1;
     fail = aho->fail;
@@ -4152,7 +4145,7 @@ S_unwind_scan_frames(pTHX_ const void *p)
     } while (f);
 }
 
-
+/* the return from this sub is the minimum length that could possibly match */
 STATIC SSize_t
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         SSize_t *minlenp, SSize_t *deltap,
@@ -4188,6 +4181,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     PERL_ARGS_ASSERT_STUDY_CHUNK;
     RExC_study_started= 1;
 
+    Zero(&data_fake, 1, scan_data_t);
 
     if ( depth == 0 ) {
         while (first_non_open && OP(first_non_open) == OPEN)
@@ -4295,6 +4289,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
             /* we suppose the run is continuous, last=next...
              * NOTE we dont use the return here! */
+            /* DEFINEP study_chunk() recursion */
             (void)study_chunk(pRExC_state, &scan, &minlen,
                               &deltanext, next, &data_fake, stopparen,
                               recursed_depth, NULL, f, depth+1);
@@ -4362,6 +4357,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        f |= SCF_WHILEM_VISITED_POS;
 
                    /* we suppose the run is continuous, last=next...*/
+                    /* recurse study_chunk() for each BRANCH in an alternation */
                    minnext = study_chunk(pRExC_state, &scan, minlenp,
                                       &deltanext, next, &data_fake, stopparen,
                                       recursed_depth, NULL, f,depth+1);
@@ -4901,6 +4897,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
        else if (OP(scan) == EXACT || OP(scan) == EXACTL) {
            SSize_t l = STR_LEN(scan);
            UV uc;
+            assert(l);
            if (UTF) {
                const U8 * const s = (U8*)STRING(scan);
                uc = utf8_to_uvchr_buf(s, s + l, NULL);
@@ -5097,6 +5094,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    f &= ~SCF_WHILEM_VISITED_POS;
 
                /* This will finish on WHILEM, setting scan, or on NULL: */
+                /* recurse study_chunk() on loop bodies */
                minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
                                   last, data, stopparen, recursed_depth, NULL,
                                   (mincount == 0
@@ -5132,7 +5130,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                }
                if (!scan)              /* It was not CURLYX, but CURLY. */
                    scan = next;
-               if (!(flags & SCF_TRIE_DOING_RESTUDY)
+               if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
                    /* ? quantifier ok, except for (?{ ... }) */
                    && (next_is_eval || !(mincount == 0 && maxcount == 1))
                    && (minnext == 0) && (deltanext == 0)
@@ -5259,6 +5257,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        }
 #endif
                        /* Optimize again: */
+                        /* recurse study_chunk() on optimised CURLYX => CURLYM */
                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                     NULL, stopparen, recursed_depth, NULL, 0,depth+1);
                    }
@@ -5557,20 +5556,25 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     }
                     break;
 
+                case NASCII:
+                    invert = 1;
+                    /* FALLTHROUGH */
+               case ASCII:
+                    my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
+
+                    /* This can be handled as a Posix class */
+                    goto join_posix_and_ascii;
+
                 case NPOSIXA:   /* For these, we always know the exact set of
                                    what's matched */
                     invert = 1;
                     /* FALLTHROUGH */
                case POSIXA:
-                    if (FLAGS(scan) == _CC_ASCII) {
-                        my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
-                    }
-                    else {
-                        _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
-                                              PL_XPosix_ptrs[_CC_ASCII],
-                                              &my_invlist);
-                    }
-                    goto join_posix;
+                    assert(FLAGS(scan) != _CC_ASCII);
+                    _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
+                                          PL_XPosix_ptrs[_CC_ASCII],
+                                          &my_invlist);
+                    goto join_posix_and_ascii;
 
                case NPOSIXD:
                case NPOSIXU:
@@ -5590,7 +5594,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                           &my_invlist);
                     }
 
-                  join_posix:
+                  join_posix_and_ascii:
 
                     if (flags & SCF_DO_STCLASS_AND) {
                         ssc_intersection(data->start_class, my_invlist, invert);
@@ -5649,6 +5653,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     f |= SCF_WHILEM_VISITED_POS;
                 next = regnext(scan);
                 nscan = NEXTOPER(NEXTOPER(scan));
+
+                /* recurse study_chunk() for lookahead body */
                 minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                       last, &data_fake, stopparen,
                                       recursed_depth, NULL, f, depth+1);
@@ -5739,6 +5745,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 next = regnext(scan);
                 nscan = NEXTOPER(NEXTOPER(scan));
 
+                /* positive lookahead study_chunk() recursion */
                 *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                         &deltanext, last, &data_fake,
                                         stopparen, recursed_depth, NULL,
@@ -5767,29 +5774,29 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                         data->flags |= SF_HAS_EVAL;
                     data->whilem_c = data_fake.whilem_c;
                     if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
+                        int i;
                         if (RExC_rx->minlen<*minnextp)
                             RExC_rx->minlen=*minnextp;
                         scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
                         SvREFCNT_dec_NN(data_fake.last_found);
 
-                        if (data_fake.substrs[0].minlenp != minlenp) {
-                            data->substrs[0].min_offset = data_fake.substrs[0].min_offset;
-                            data->substrs[0].max_offset = data_fake.substrs[0].max_offset;
-                            data->substrs[0].minlenp = data_fake.substrs[0].minlenp;
-                            data->substrs[0].lookbehind += scan->flags;
-                        }
-
-                        if (data_fake.substrs[1].minlenp != minlenp) {
-                            data->substrs[1].minlenp = data_fake.substrs[1].minlenp;
-                            data->substrs[1].min_offset = data_fake.substrs[1].min_offset;
-                            data->substrs[1].max_offset = data_fake.substrs[1].max_offset;
-                            data->substrs[1].lookbehind += scan->flags;
+                        for (i = 0; i < 2; i++) {
+                            if (data_fake.substrs[i].minlenp != minlenp) {
+                                data->substrs[i].min_offset =
+                                            data_fake.substrs[i].min_offset;
+                                data->substrs[i].max_offset =
+                                            data_fake.substrs[i].max_offset;
+                                data->substrs[i].minlenp =
+                                            data_fake.substrs[i].minlenp;
+                                data->substrs[i].lookbehind += scan->flags;
+                            }
                         }
                     }
                 }
            }
 #endif
        }
+
        else if (OP(scan) == OPEN) {
            if (stopparen != (I32)ARG(scan))
                pars++;
@@ -5900,6 +5907,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                         /* We go from the jump point to the branch that follows
                            it. Note this means we need the vestigal unused
                            branches even though they arent otherwise used. */
+                        /* optimise study_chunk() for TRIE */
                         minnext = study_chunk(pRExC_state, &scan, minlenp,
                             &deltanext, (regnode *)nextbranch, &data_fake,
                             stopparen, recursed_depth, NULL, f,depth+1);
@@ -5940,8 +5948,12 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     data->cur_is_floating = 1; /* float */
             }
             min += min1;
-            if (delta != SSize_t_MAX)
-                delta += max1 - min1;
+            if (delta != SSize_t_MAX) {
+                if (SSize_t_MAX - (max1 - min1) >= delta)
+                    delta += max1 - min1;
+                else
+                    delta = SSize_t_MAX;
+            }
             if (flags & SCF_DO_STCLASS_OR) {
                 ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
                 if (min1) {
@@ -6915,7 +6927,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         if (   ! dump_len_string
             || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
         {
-            PL_dump_re_max_len = 0;
+            PL_dump_re_max_len = 60;    /* A reasonable default */
         }
 #endif
     }
@@ -7032,6 +7044,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_seen_unfolded_sharp_s = 0;
     RExC_contains_locale = 0;
     RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
+    RExC_in_script_run = 0;
     RExC_study_started = 0;
     pRExC_state->runtime_code_qr = NULL;
     RExC_frame_head= NULL;
@@ -7044,7 +7057,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     });
     DEBUG_COMPILE_r({
             SV *dsv= sv_newmortal();
-            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, 60);
+            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, PL_dump_re_max_len);
             Perl_re_printf( aTHX_  "%sCompiling REx%s %s\n",
                           PL_colors[4],PL_colors[5],s);
         });
@@ -7271,8 +7284,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         /* make sure PL_bitcount bounds not exceeded */
         assert(sizeof(STD_PAT_MODS) <= 8);
 
-        Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
-       r->xpv_len_u.xpvlenu_pv = p;
+        p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
+        SvPOK_on(rx);
        if (RExC_utf8)
            SvFLAGS(rx) |= SVf_UTF8;
         *p++='('; *p++='?';
@@ -7381,7 +7394,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        3-units-long substrs field. */
     Newx(r->substrs, 1, struct reg_substr_data);
     if (RExC_recurse_count) {
-        Newxz(RExC_recurse,RExC_recurse_count,regnode *);
+        Newx(RExC_recurse,RExC_recurse_count,regnode *);
         SAVEFREEPV(RExC_recurse);
     }
 
@@ -7432,12 +7445,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /*  Only one top-level choice.
                                                   */
        SSize_t fake;
-       STRLEN longest_float_length, longest_fixed_length;
+       STRLEN longest_length[2];
        regnode_ssc ch_class; /* pointed to by data */
        int stclass_flag;
        SSize_t last_close = 0; /* pointed to by data */
         regnode *first= scan;
         regnode *first_next= regnext(first);
+        int i;
+
        /*
         * Skip introductions and multiplicators >= 1
         * so that we can extract the 'meat' of the pattern that must
@@ -7578,6 +7593,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        data.last_closep = &last_close;
 
         DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/PATTERN/
+         * (NO top level branches)
+         */
        minlen = study_chunk(pRExC_state, &first, &minlen, &fake,
                              scan + RExC_size, /* Up to end */
             &data, -1, 0, NULL,
@@ -7599,54 +7618,41 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         }
        scan_commit(pRExC_state, &data,&minlen,0);
 
-       longest_float_length = CHR_SVLEN(data.substrs[1].str);
 
-        if (! ((SvCUR(data.substrs[0].str)  /* ok to leave SvCUR */
-                   && data.substrs[0].min_offset == data.substrs[1].min_offset
-                   && SvCUR(data.substrs[0].str) == SvCUR(data.substrs[1].str)))
-            && S_setup_longest (aTHX_ pRExC_state,
-                                    &(r->substrs->data[1]),
-                                    &(data.substrs[1]),
-                                    longest_float_length))
-        {
-           r->substrs->data[1].min_offset =
-                    data.substrs[1].min_offset - data.substrs[1].lookbehind;
+        /* XXX this is done in reverse order because that's the way the
+         * code was before it was parameterised. Don't know whether it
+         * actually needs doing in reverse order. DAPM */
+        for (i = 1; i >= 0; i--) {
+            longest_length[i] = CHR_SVLEN(data.substrs[i].str);
 
-           r->substrs->data[1].max_offset = data.substrs[1].max_offset;
-           if (data.substrs[1].max_offset < SSize_t_MAX) /* Don't offset infinity */
-               r->substrs->data[1].max_offset -= data.substrs[1].lookbehind;
-
-           SvREFCNT_inc_simple_void_NN(data.substrs[1].str);
-       }
-       else {
-           r->substrs->data[1].substr      = NULL;
-            r->substrs->data[1].utf8_substr = NULL;
-           longest_float_length = 0;
-       }
-
-       longest_fixed_length = CHR_SVLEN(data.substrs[0].str);
-
-        if (S_setup_longest (aTHX_ pRExC_state,
-                                &(r->substrs->data[0]),
-                                &(data.substrs[0]),
-                                longest_fixed_length))
-        {
-           r->substrs->data[0].min_offset =
-                    data.substrs[0].min_offset - data.substrs[0].lookbehind;
-            /* XXX this calc isn't necessary for anchored, but is done
-             * for consistency with float code path */
-           r->substrs->data[0].max_offset = data.substrs[0].max_offset;
-
-           if (data.substrs[0].max_offset < SSize_t_MAX) /* Don't offset infinity */
-               r->substrs->data[0].max_offset -= data.substrs[0].lookbehind;
+            if (   !(   i
+                     && SvCUR(data.substrs[0].str)  /* ok to leave SvCUR */
+                     &&    data.substrs[0].min_offset
+                        == data.substrs[1].min_offset
+                     &&    SvCUR(data.substrs[0].str)
+                        == SvCUR(data.substrs[1].str)
+                    )
+                && S_setup_longest (aTHX_ pRExC_state,
+                                        &(r->substrs->data[i]),
+                                        &(data.substrs[i]),
+                                        longest_length[i]))
+            {
+                r->substrs->data[i].min_offset =
+                        data.substrs[i].min_offset - data.substrs[i].lookbehind;
+
+                r->substrs->data[i].max_offset = data.substrs[i].max_offset;
+                /* Don't offset infinity */
+                if (data.substrs[i].max_offset < SSize_t_MAX)
+                    r->substrs->data[i].max_offset -= data.substrs[i].lookbehind;
+                SvREFCNT_inc_simple_void_NN(data.substrs[i].str);
+            }
+            else {
+                r->substrs->data[i].substr      = NULL;
+                r->substrs->data[i].utf8_substr = NULL;
+                longest_length[i] = 0;
+            }
+        }
 
-           SvREFCNT_inc_simple_void_NN(data.substrs[0].str);
-       }
-       else {
-            r->substrs->data[0].substr      = NULL;
-            r->substrs->data[0].utf8_substr = NULL;
-           longest_fixed_length = 0;
-       }
        LEAVE_with_name("study_chunk");
 
        if (ri->regstclass
@@ -7677,26 +7683,17 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             data.start_class = NULL;
        }
 
-        /* A temporary algorithm prefers floated substr to fixed one to dig
-         * more info. */
-       if (longest_fixed_length > longest_float_length) {
-           r->substrs->check_ix = 0;
-           r->check_end_shift  = r->substrs->data[0].end_shift;
-           r->check_substr     = r->substrs->data[0].substr;
-           r->check_utf8       = r->substrs->data[0].utf8_substr;
-           r->check_offset_min = r->substrs->data[0].min_offset;
-           r->check_offset_max = r->substrs->data[0].max_offset;
-            if (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS))
-                r->intflags |= PREGf_NOSCAN;
-       }
-       else {
-           r->substrs->check_ix = 1;
-           r->check_end_shift  = r->substrs->data[1].end_shift;
-           r->check_substr     = r->substrs->data[1].substr;
-           r->check_utf8       = r->substrs->data[1].utf8_substr;
-           r->check_offset_min = r->substrs->data[1].min_offset;
-           r->check_offset_max = r->substrs->data[1].max_offset;
-       }
+        /* A temporary algorithm prefers floated substr to fixed one of
+         * same length to dig more info. */
+       i = (longest_length[0] <= longest_length[1]);
+        r->substrs->check_ix = i;
+        r->check_end_shift  = r->substrs->data[i].end_shift;
+        r->check_substr     = r->substrs->data[i].substr;
+        r->check_utf8       = r->substrs->data[i].utf8_substr;
+        r->check_offset_min = r->substrs->data[i].min_offset;
+        r->check_offset_max = r->substrs->data[i].max_offset;
+        if (!i && (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS)))
+            r->intflags |= PREGf_NOSCAN;
 
        if ((r->check_substr || r->check_utf8) ) {
            r->extflags |= RXf_USE_INTUIT;
@@ -7705,10 +7702,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        }
 
        /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
-       if ( (STRLEN)minlen < longest_float_length )
-            minlen= longest_float_length;
-        if ( (STRLEN)minlen < longest_fixed_length )
-            minlen= longest_fixed_length;
+       if ( (STRLEN)minlen < longest_length[1] )
+            minlen= longest_length[1];
+        if ( (STRLEN)minlen < longest_length[0] )
+            minlen= longest_length[0];
         */
     }
     else {
@@ -7725,6 +7722,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        data.last_closep = &last_close;
 
         DEBUG_RExC_seen();
+        /*
+         * MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../
+         * (patterns WITH top level branches)
+         */
        minlen = study_chunk(pRExC_state,
             &scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL,
             SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
@@ -7783,7 +7784,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     if (RExC_seen & REG_RECURSE_SEEN ) {
         r->intflags |= PREGf_RECURSE_SEEN;
-        Newxz(r->recurse_locinput, r->nparens + 1, char *);
+        Newx(r->recurse_locinput, r->nparens + 1, char *);
     }
     if (RExC_seen & REG_GPOS_SEEN)
         r->intflags |= PREGf_GPOS_SEEN;
@@ -10678,13 +10679,28 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
          * here (if paren ==2).  The forms '(*VERB' and '(?...' disallow such
          * intervening space, as the sequence is a token, and a token should be
          * indivisible */
-        bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
+        bool has_intervening_patws = (paren == 2 || paren == 's')
+                                  && *(RExC_parse - 1) != '(';
 
         if (RExC_parse >= RExC_end) {
            vFAIL("Unmatched (");
         }
 
-        if ( *RExC_parse == '*') { /* (*VERB:ARG) */
+        if (paren == 's') {
+
+            /* A nested script run  is a no-op besides clustering */
+            if (RExC_in_script_run) {
+                paren = ':';
+                nextchar(pRExC_state);
+                ret = NULL;
+                goto parse_rest;
+            }
+            RExC_in_script_run = 1;
+
+           ret = reg_node(pRExC_state, SROPEN);
+            is_open = 1;
+        }
+        else if ( *RExC_parse == '*') { /* (*VERB:ARG) */
            char *start_verb = RExC_parse + 1;
            STRLEN verb_len;
            char *start_arg = NULL;
@@ -10708,6 +10724,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 if (RExC_parse >= RExC_end) {
                     goto unterminated_verb_pattern;
                 }
+
                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                while ( RExC_parse < RExC_end && *RExC_parse != ')' )
                     RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
@@ -10795,6 +10812,45 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            nextchar(pRExC_state);
            return ret;
         }
+        else if (*RExC_parse == '+') { /* (+...) */
+            RExC_parse++;
+
+            if (has_intervening_patws) {
+                /* XXX Note that a potential gotcha is that outside of /x '( +
+                 * ...)' means to match a space at least once ...   This is a
+                 * problem elsewhere too */
+                vFAIL("In '(+...)', the '(' and '+' must be adjacent");
+            }
+
+            if (! memBEGINPs(RExC_parse, (STRLEN) (RExC_end - RExC_parse),
+                             "script_run:"))
+            {
+                RExC_parse += strcspn(RExC_parse, ":)");
+                vFAIL("Unknown (+ pattern");
+            }
+            else {
+
+                /* This indicates Unicode rules. */
+                REQUIRE_UNI_RULES(flagp, NULL);
+
+                RExC_parse += sizeof("script_run:") - 1;
+
+                if (PASS2) {
+                    Perl_ck_warner_d(aTHX_
+                        packWARN(WARN_EXPERIMENTAL__SCRIPT_RUN),
+                        "The script_run feature is experimental"
+                        REPORT_LOCATION, REPORT_LOCATION_ARGS(RExC_parse));
+                }
+
+                ret = reg(pRExC_state, 's', &flags, depth+1);
+                if (flags & (RESTART_PASS1|NEED_UTF8)) {
+                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
+                    return NULL;
+                }
+
+                return ret;
+            }
+        }
         else if (*RExC_parse == '?') { /* (?...) */
            bool is_logical = 0;
            const char * const seqstart = RExC_parse;
@@ -11206,8 +11262,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     ret = reganode(pRExC_state,NGROUPP,num);
                     goto insert_if_check_paren;
                }
-               else if (RExC_end - RExC_parse >= DEFINE_len
-                        && strnEQ(RExC_parse, "DEFINE", DEFINE_len))
+               else if (memBEGINs(RExC_parse,
+                                   (STRLEN) (RExC_end - RExC_parse),
+                                   "DEFINE"))
                 {
                    ret = reganode(pRExC_state,DEFINEP,0);
                    RExC_parse += DEFINE_len;
@@ -11342,7 +11399,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 vFAIL("Unknown switch condition (?(...))");
            }
            case '[':           /* (?[ ... ]) */
-                return handle_regex_sets(pRExC_state, NULL, flagp, depth,
+                return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
                                          oregcomp_parse);
             case 0: /* A NUL */
                RExC_parse--; /* for vFAIL to print correctly */
@@ -11479,6 +11536,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             Set_Node_Offset(ender,RExC_parse+1); /* MJD */
             Set_Node_Length(ender,1); /* MJD */
            break;
+       case 's':
+           ender = reg_node(pRExC_state, SRCLOSE);
+            RExC_in_script_run = 0;
+           break;
        case '<':
        case ',':
        case '=':
@@ -11811,14 +11872,12 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            if ((flags&SIMPLE)) {
                 if (min == 0 && max == REG_INFTY) {
                     reginsert(pRExC_state, STAR, ret, depth+1);
-                    ret->flags = 0;
                     MARK_NAUGHTY(4);
                     RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                     goto nest_check;
                 }
                 if (min == 1 && max == REG_INFTY) {
                     reginsert(pRExC_state, PLUS, ret, depth+1);
-                    ret->flags = 0;
                     MARK_NAUGHTY(3);
                     RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
                     goto nest_check;
@@ -11931,7 +11990,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         ender = reg_node(pRExC_state, SUCCEED);
         REGTAIL(pRExC_state, ret, ender);
         reginsert(pRExC_state, SUSPEND, ret, depth+1);
-        ret->flags = 0;
         ender = reg_node(pRExC_state, TAIL);
         REGTAIL(pRExC_state, ret, ender);
     }
@@ -12086,14 +12144,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
 
     RExC_parse++;      /* Skip past the '{' */
 
-    endbrace = strchr(RExC_parse, '}');
+    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
     if (! endbrace) { /* no trailing brace */
         vFAIL2("Missing right brace on \\%c{}", 'N');
     }
-    else if(!(endbrace == RExC_parse           /* nothing between the {} */
-              || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
-                  && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
-                                                       error msg) */
+    else if (!(   endbrace == RExC_parse       /* nothing between the {} */
+               || memBEGINs(RExC_parse,   /* U+ (bad hex is checked below
+                                                   for a  better error msg) */
+                                  (STRLEN) (RExC_end - RExC_parse),
+                                 "U+")))
     {
        RExC_parse = endbrace;  /* position msg's '<--HERE' */
        vFAIL("\\N{NAME} must be resolved by the lexer");
@@ -12141,22 +12200,22 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         /* Convert code point from hex */
        length_of_hex = (STRLEN)(endchar - RExC_parse);
        grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
-                           | PERL_SCAN_DISALLOW_PREFIX
+                       | PERL_SCAN_DISALLOW_PREFIX
 
-                             /* No errors in the first pass (See [perl
-                              * #122671].)  We let the code below find the
-                              * errors when there are multiple chars. */
-                           | ((SIZE_ONLY)
-                              ? PERL_SCAN_SILENT_ILLDIGIT
-                              : 0);
+                           /* No errors in the first pass (See [perl
+                            * #122671].)  We let the code below find the
+                            * errors when there are multiple chars. */
+                       | ((SIZE_ONLY)
+                          ? PERL_SCAN_SILENT_ILLDIGIT
+                          : 0);
 
         /* This routine is the one place where both single- and double-quotish
          * \N{U+xxxx} are evaluated.  The value is a Unicode code point which
          * must be converted to native. */
        *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
-                                         &length_of_hex,
-                                         &grok_hex_flags,
-                                         NULL));
+                                               &length_of_hex,
+                                               &grok_hex_flags,
+                                               NULL));
 
        /* The tokenizer should have guaranteed validity, but it's possible to
          * bypass it by using single quoting, so check.  Don't do the check
@@ -12197,7 +12256,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         }
 
         /* Fail if caller doesn't want to handle a multi-code-point sequence.
-         * But don't backup up the pointer if the caller want to know how many
+         * But don't backup up the pointer if the caller wants to know how many
          * code points there are (they can then handle things) */
         if (! node_p) {
             if (! cp_count) {
@@ -12228,14 +12287,16 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
        }
         sv_catpv(substitute_parse, ")");
 
-        RExC_parse = RExC_start = RExC_adjusted_start = SvPV(substitute_parse,
-                                                             len);
+        len = SvCUR(substitute_parse);
 
        /* Don't allow empty number */
        if (len < (STRLEN) 8) {
             RExC_parse = endbrace;
            vFAIL("Invalid hexadecimal number in \\N{U+...}");
        }
+
+        RExC_parse = RExC_start = RExC_adjusted_start
+                                              = SvPV_nolen(substitute_parse);
        RExC_end = RExC_parse + len;
 
         /* The values are Unicode, and therefore not subject to recoding, but
@@ -12245,17 +12306,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         RExC_recode_x_to_native = 1;
 #endif
 
-        if (node_p) {
-            if (!(*node_p = reg(pRExC_state, 1, &flags, depth+1))) {
-                if (flags & (RESTART_PASS1|NEED_UTF8)) {
-                    *flagp = flags & (RESTART_PASS1|NEED_UTF8);
-                    return FALSE;
-                }
-                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
-                    (UV) flags);
-            }
-            *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
-        }
+        *node_p = reg(pRExC_state, 1, &flags, depth+1);
 
         /* Restore the saved values */
        RExC_start = RExC_adjusted_start = save_start;
@@ -12264,8 +12315,18 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
 #ifdef EBCDIC
         RExC_recode_x_to_native = 0;
 #endif
-
         SvREFCNT_dec_NN(substitute_parse);
+
+        if (! *node_p) {
+            if (flags & (RESTART_PASS1|NEED_UTF8)) {
+                *flagp = flags & (RESTART_PASS1|NEED_UTF8);
+                return FALSE;
+            }
+            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf,
+                (UV) flags);
+        }
+        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+
         nextchar(pRExC_state);
 
         return TRUE;
@@ -12800,9 +12861,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else {
                 STRLEN length;
                 char name = *RExC_parse;
-                char * endbrace;
+                char * endbrace = NULL;
                 RExC_parse += 2;
-                endbrace = strchr(RExC_parse, '}');
+                if (RExC_parse < RExC_end) {
+                    endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
+                }
 
                 if (! endbrace) {
                     vFAIL2("Missing right brace on \\%c{}", name);
@@ -12823,8 +12886,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 }*/
                 switch (*RExC_parse) {
                     case 'g':
-                        if (length != 1
-                            && (length != 3 || strnNE(RExC_parse + 1, "cb", 2)))
+                        if (    length != 1
+                            && (memNEs(RExC_parse + 1, length - 1, "cb")))
                         {
                             goto bad_bound_type;
                         }
@@ -13350,6 +13413,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             goto loopdone;
                         }
                         p = RExC_parse;
+                        RExC_parse = parse_start;
                         if (ender > 0xff) {
                             REQUIRE_UTF8(flagp);
                         }
@@ -13380,6 +13444,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                            const char* error_msg;
 
                            bool valid = grok_bslash_o(&p,
+                                                       RExC_end,
                                                       &result,
                                                       &error_msg,
                                                       PASS2, /* out warnings */
@@ -13406,6 +13471,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                            const char* error_msg;
 
                            bool valid = grok_bslash_x(&p,
+                                                       RExC_end,
                                                       &result,
                                                       &error_msg,
                                                       PASS2, /* out warnings */
@@ -13587,7 +13653,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      * character we are appending, hence we can delay getting
                      * its representation until PASS2. */
                     if (SIZE_ONLY) {
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                             const STRLEN unilen = UVCHR_SKIP(ender);
                             s += unilen;
 
@@ -13605,7 +13671,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         }
                     } else { /* PASS2 */
                       not_fold_common:
-                        if (UTF) {
+                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
                             U8 * new_s = uvchr_to_utf8((U8*)s, ender);
                             len += (char *) new_s - s - 1;
                             s = (char *) new_s;
@@ -14644,7 +14710,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
          * */
         switch (name_len) {
             case 4:
-                if (memEQ(name_start, "word", 4)) {
+                if (memEQs(name_start, 4, "word")) {
                     /* this is not POSIX, this is the Perl \w */
                     class_number = ANYOF_WORDCHAR;
                 }
@@ -14655,51 +14721,51 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                  * Offset 4 gives the best switch position.  */
                 switch (name_start[4]) {
                     case 'a':
-                        if (memEQ(name_start, "alph", 4)) /* alpha */
+                        if (memBEGINs(name_start, 5, "alph")) /* alpha */
                             class_number = ANYOF_ALPHA;
                         break;
                     case 'e':
-                        if (memEQ(name_start, "spac", 4)) /* space */
+                        if (memBEGINs(name_start, 5, "spac")) /* space */
                             class_number = ANYOF_SPACE;
                         break;
                     case 'h':
-                        if (memEQ(name_start, "grap", 4)) /* graph */
+                        if (memBEGINs(name_start, 5, "grap")) /* graph */
                             class_number = ANYOF_GRAPH;
                         break;
                     case 'i':
-                        if (memEQ(name_start, "asci", 4)) /* ascii */
+                        if (memBEGINs(name_start, 5, "asci")) /* ascii */
                             class_number = ANYOF_ASCII;
                         break;
                     case 'k':
-                        if (memEQ(name_start, "blan", 4)) /* blank */
+                        if (memBEGINs(name_start, 5, "blan")) /* blank */
                             class_number = ANYOF_BLANK;
                         break;
                     case 'l':
-                        if (memEQ(name_start, "cntr", 4)) /* cntrl */
+                        if (memBEGINs(name_start, 5, "cntr")) /* cntrl */
                             class_number = ANYOF_CNTRL;
                         break;
                     case 'm':
-                        if (memEQ(name_start, "alnu", 4)) /* alnum */
+                        if (memBEGINs(name_start, 5, "alnu")) /* alnum */
                             class_number = ANYOF_ALPHANUMERIC;
                         break;
                     case 'r':
-                        if (memEQ(name_start, "lowe", 4)) /* lower */
+                        if (memBEGINs(name_start, 5, "lowe")) /* lower */
                             class_number = (FOLD) ? ANYOF_CASED : ANYOF_LOWER;
-                        else if (memEQ(name_start, "uppe", 4)) /* upper */
+                        else if (memBEGINs(name_start, 5, "uppe")) /* upper */
                             class_number = (FOLD) ? ANYOF_CASED : ANYOF_UPPER;
                         break;
                     case 't':
-                        if (memEQ(name_start, "digi", 4)) /* digit */
+                        if (memBEGINs(name_start, 5, "digi")) /* digit */
                             class_number = ANYOF_DIGIT;
-                        else if (memEQ(name_start, "prin", 4)) /* print */
+                        else if (memBEGINs(name_start, 5, "prin")) /* print */
                             class_number = ANYOF_PRINT;
-                        else if (memEQ(name_start, "punc", 4)) /* punct */
+                        else if (memBEGINs(name_start, 5, "punc")) /* punct */
                             class_number = ANYOF_PUNCT;
                         break;
                 }
                 break;
             case 6:
-                if (memEQ(name_start, "xdigit", 6))
+                if (memEQs(name_start, 6, "xdigit"))
                     class_number = ANYOF_XDIGIT;
                 break;
         }
@@ -14903,9 +14969,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                        'stack' of where the undealt-with left
                                        parens would be if they were actually
                                        put there */
-    /* The 'VOL' (expanding to 'volatile') is a workaround for an optimiser bug
+    /* The 'volatile' is a workaround for an optimiser bug
      * in Solaris Studio 12.3. See RT #127455 */
-    VOL IV fence = 0;               /* Position of where most recent undealt-
+    volatile IV fence = 0;          /* Position of where most recent undealt-
                                        with left paren in stack is; -1 if none.
                                      */
     STRLEN len;                     /* Temporary */
@@ -14920,6 +14986,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
     PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
 
+    DEBUG_PARSE("xcls");
+
     if (in_locale) {
         set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
     }
@@ -14937,7 +15005,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      * these things, we need to realize that something preceded by a backslash
      * is escaped, so we have to keep track of backslashes */
     if (SIZE_ONLY) {
-        UV depth = 0; /* how many nested (?[...]) constructs */
+        UV nest_depth = 0; /* how many nested (?[...]) constructs */
 
         while (RExC_parse < RExC_end) {
             SV* current = NULL;
@@ -14946,8 +15014,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                     TRUE /* Force /x */ );
 
             switch (*RExC_parse) {
-                case '?':
-                    if (RExC_parse[1] == '[') depth++, RExC_parse++;
+                case '(':
+                    if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
+                        nest_depth++, RExC_parse+=2;
                     /* FALLTHROUGH */
                 default:
                     break;
@@ -15004,9 +15073,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                 }
 
                 case ']':
-                    if (depth--) break;
-                    RExC_parse++;
-                    if (*RExC_parse == ')') {
+                    if (RExC_parse[1] == ')') {
+                        RExC_parse++;
+                        if (nest_depth--) break;
                         node = reganode(pRExC_state, ANYOF, 0);
                         RExC_size += ANYOF_SKIP;
                         nextchar(pRExC_state);
@@ -15018,20 +15087,25 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
                         return node;
                     }
-                    goto no_close;
+                    /* We output the messages even if warnings are off, because we'll fail
+                     * the very next thing, and these give a likely diagnosis for that */
+                    if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
+                        output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
+                    }
+                    RExC_parse++;
+                    vFAIL("Unexpected ']' with no following ')' in (?[...");
             }
 
             RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
         }
 
-      no_close:
         /* We output the messages even if warnings are off, because we'll fail
          * the very next thing, and these give a likely diagnosis for that */
         if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
             output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
         }
 
-        FAIL("Syntax error in (?[...])");
+        vFAIL("Syntax error in (?[...])");
     }
 
     /* Pass 2 only after this. */
@@ -15211,12 +15285,14 @@ redo_curchar:
                      * inversion list, and RExC_parse points to the trailing
                      * ']'; the next character should be the ')' */
                     RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for nested extended charclass");
 
                     /* Then the ')' matching the original '(' handled by this
                      * case: statement */
                     RExC_parse++;
-                    assert(UCHARAT(RExC_parse) == ')');
+                    if (UCHARAT(RExC_parse) != ')')
+                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
 
                     RExC_parse++;
                     RExC_flags = save_flags;
@@ -16216,6 +16292,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         do_posix_warnings ? &posix_warnings : NULL,
                         TRUE /* checking only */);
         }
+        else if (  strict && ! skip_white
+                 && (   _generic_isCC(value, _CC_VERTSPACE)
+                     || is_VERTWS_cp_high(value)))
+        {
+            vFAIL("Literal vertical space in [] is illegal except under /x");
+        }
         else if (value == '\\') {
             /* Is a backslash; get the code point of the char after it */
 
@@ -16336,7 +16418,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    vFAIL2("Empty \\%c", (U8)value);
                if (*RExC_parse == '{') {
                    const U8 c = (U8)value;
-                   e = strchr(RExC_parse, '}');
+                   e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
                     if (!e) {
                         RExC_parse++;
                         vFAIL2("Missing right brace on \\%c{}", c);
@@ -16468,7 +16550,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          * referred to outside it. [perl #121777] */
                         if (! has_pkg && curpkg) {
                             char* pkgname = HvNAME(curpkg);
-                            if (strNE(pkgname, "main")) {
+                            if (memNEs(pkgname, HvNAMELEN(curpkg), "main")) {
                                 char* full_name = Perl_form(aTHX_
                                                             "%s::%s",
                                                             pkgname,
@@ -16551,6 +16633,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                {
                    const char* error_msg;
                    bool valid = grok_bslash_o(&RExC_parse,
+                                               RExC_end,
                                               &value,
                                               &error_msg,
                                                PASS2,   /* warnings only in
@@ -16569,6 +16652,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                {
                    const char* error_msg;
                    bool valid = grok_bslash_x(&RExC_parse,
+                                               RExC_end,
                                               &value,
                                               &error_msg,
                                               PASS2, /* Output warnings */
@@ -16994,7 +17078,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                           " be some subset of \"0-9\","
                                           " \"A-Z\", or \"a-z\"");
                     }
-                    else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+                    else if (prevvalue >= FIRST_NON_ASCII_DECIMAL_DIGIT) {
                         SSize_t index_start;
                         SSize_t index_final;
 
@@ -17002,8 +17086,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          * can't do the same checks for above-ASCII ranges,
                          * except in the case of digit ones.  These should
                          * contain only digits from the same group of 10.  The
-                         * ASCII case is handled just above.  0x660 is the
-                         * first digit character beyond ASCII.  Hence here, the
+                         * ASCII case is handled just above.  Hence here, the
                          * range could be a range of digits.  First some
                          * unlikely special cases.  Grandfather in that a range
                          * ending in 19DA (NEW TAI LUE THAM DIGIT ONE) is bad
@@ -17297,14 +17380,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 /* The actual POSIXish node for all the rest depends on the
                  * charset modifier.  The ones in the first set depend only on
                  * ASCII or, if available on this platform, also locale */
+
                 case ANYOF_ASCII:
                 case ANYOF_NASCII:
+
 #ifdef HAS_ISASCII
-                    op = (LOC) ? POSIXL : POSIXA;
-#else
-                    op = POSIXA;
+                    if (LOC) {
+                        op = POSIXL;
+                        goto join_posix;
+                    }
 #endif
-                    goto join_posix;
+                    /* (named_class - ANY_OF_ASCII) is 0 or 1. xor'ing with
+                     * invert converts that to 1 or 0 */
+                    op = ASCII + ((namedclass - ANYOF_ASCII) ^ invert);
+                    break;
 
                 /* The following don't have any matches in the upper Latin1
                  * range, hence /d is equivalent to /u for them.  Making it /u
@@ -17446,6 +17535,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                            TRUE /* downgradable to EXACT */
                                            );
             }
+            else {
+                *flagp |= HASWIDTH|SIMPLE;
+            }
 
             RExC_parse = (char *) cur_parse;
 
@@ -17710,6 +17802,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 if (_invlist_len(only_non_utf8_list) != 0) {
                     ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
                 }
+                SvREFCNT_dec_NN(only_non_utf8_list);
             }
             else {
                 /* Here there were no complemented posix classes.  That means
@@ -18001,25 +18094,43 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             const UV* cp_list_array = invlist_array(cp_list);
 
             /* Here, didn't find an optimization.  See if this matches any of
-             * the POSIX classes.  These run slightly faster for above-Unicode
-             * code points, so don't bother with POSIXA ones nor the 2 that
-             * have no above-Unicode matches.  We can avoid these checks unless
-             * the ANYOF matches at least as high as the lowest POSIX one
-             * (which was manually found to be \v.  The actual code point may
-             * increase in later Unicode releases, if a higher code point is
-             * assigned to be \v, but this code will never break.  It would
-             * just mean we could execute the checks for posix optimizations
-             * unnecessarily) */
-
-            if (cp_list_array[cp_list_len-1] > 0x2029) {
+             * the POSIX classes.  First try ASCII */
+
+            if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 0)) {
+                op = ASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (_invlistEQ(cp_list, PL_XPosix_ptrs[_CC_ASCII], 1)) {
+                op = NASCII;
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (cp_list_array[cp_list_len-1] >= 0x2029) {
+
+                /* Then try the other POSIX classes.  The POSIXA ones are about
+                 * the same speed as ANYOF ops, but the ones that have
+                 * above-Latin1 code point matches are somewhat faster than
+                 * ANYOF.  So optimize those, but don't bother with the POSIXA
+                 * ones nor [:cntrl:] which has no above-Latin1 matches.  If
+                 * this ANYOF node has a lower highest possible matching code
+                 * point than any of the XPosix ones, we know that it can't
+                 * possibly be the same as any of them, so we can avoid
+                 * executing this code.  The 0x2029 above for the lowest max
+                 * was determined by manual inspection of the classes, and
+                 * comes from \v.  Suppose Unicode in a later version adds a
+                 * higher code point to \v.  All that means is that this code
+                 * can be executed unnecessarily.  It will still give the
+                 * correct answer. */
+
                 for (posix_class = 0;
                      posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
                      posix_class++)
                 {
                     int try_inverted;
-                    if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) {
+
+                    if (posix_class == _CC_CNTRL) {
                         continue;
                     }
+
                     for (try_inverted = 0; try_inverted < 2; try_inverted++) {
 
                         /* Check if matches normal or inverted */
@@ -18666,6 +18777,7 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
 * if (PASS2)
 *     NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
 *
+* ALSO NOTE - operand->flags will be set to 0 as well.
 */
 STATIC void
 S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
@@ -18739,7 +18851,6 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
 #endif
     }
 
-
     place = operand;           /* Op node, where operand used to be. */
 #ifdef RE_TRACK_PATTERN_OFFSETS
     if (RExC_offsets) {         /* MJD */
@@ -18758,6 +18869,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
     }
 #endif
     src = NEXTOPER(place);
+    place->flags = 0;
     FILL_ADVANCE_NODE(place, op);
     Zero(src, offset, regnode);
 }
@@ -18992,6 +19104,7 @@ void
 Perl_regdump(pTHX_ const regexp *r)
 {
 #ifdef DEBUGGING
+    int i;
     SV * const sv = sv_newmortal();
     SV *dsv= sv_newmortal();
     RXi_GET_DECL(r,ri);
@@ -19002,36 +19115,35 @@ Perl_regdump(pTHX_ const regexp *r)
     (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
 
     /* Header fields of interest. */
-    if (r->substrs->data[0].substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->substrs->data[0].substr),
-           RE_SV_DUMPLEN(r->substrs->data[0].substr), 30);
-        Perl_re_printf( aTHX_
-                     "anchored %s%s at %" IVdf " ",
-                     s, RE_SV_TAIL(r->substrs->data[0].substr),
-                     (IV)r->substrs->data[0].min_offset);
-    } else if (r->substrs->data[0].utf8_substr) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->substrs->data[0].utf8_substr),
-           RE_SV_DUMPLEN(r->substrs->data[0].utf8_substr), 30);
-        Perl_re_printf( aTHX_
-                     "anchored utf8 %s%s at %" IVdf " ",
-                     s, RE_SV_TAIL(r->substrs->data[0].utf8_substr),
-                     (IV)r->substrs->data[0].min_offset);
-    }
-    if (r->substrs->data[1].substr) {
-       RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->substrs->data[1].substr),
-           RE_SV_DUMPLEN(r->substrs->data[1].substr), 30);
-        Perl_re_printf( aTHX_
-                     "floating %s%s at %" IVdf "..%" UVuf " ",
-                     s, RE_SV_TAIL(r->substrs->data[1].substr),
-                     (IV)r->substrs->data[1].min_offset, (UV)r->substrs->data[1].max_offset);
-    } else if (r->substrs->data[1].utf8_substr) {
-       RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->substrs->data[1].utf8_substr),
-           RE_SV_DUMPLEN(r->substrs->data[1].utf8_substr), 30);
-        Perl_re_printf( aTHX_
-                     "floating utf8 %s%s at %" IVdf "..%" UVuf " ",
-                     s, RE_SV_TAIL(r->substrs->data[1].utf8_substr),
-                     (IV)r->substrs->data[1].min_offset, (UV)r->substrs->data[1].max_offset);
+    for (i = 0; i < 2; i++) {
+        if (r->substrs->data[i].substr) {
+            RE_PV_QUOTED_DECL(s, 0, dsv,
+                            SvPVX_const(r->substrs->data[i].substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].substr),
+                            PL_dump_re_max_len);
+            Perl_re_printf( aTHX_
+                          "%s %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
+        else if (r->substrs->data[i].utf8_substr) {
+            RE_PV_QUOTED_DECL(s, 1, dsv,
+                            SvPVX_const(r->substrs->data[i].utf8_substr),
+                            RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
+                            30);
+            Perl_re_printf( aTHX_
+                          "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
+                          i ? "floating" : "anchored",
+                          s,
+                          RE_SV_TAIL(r->substrs->data[i].utf8_substr),
+                          (IV)r->substrs->data[i].min_offset,
+                          (UV)r->substrs->data[i].max_offset);
+        }
     }
+
     if (r->check_substr || r->check_utf8)
         Perl_re_printf( aTHX_
                      (const char *)
@@ -19158,7 +19270,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         * is a crude hack but it may be the best for now since
         * we have no flag "this EXACTish node was UTF-8"
         * --jhi */
-       pv_pretty(sv, STRING(o), STR_LEN(o), 60, PL_colors[0], PL_colors[1],
+       pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
+                  PL_colors[0], PL_colors[1],
                  PERL_PV_ESCAPE_UNI_DETECT |
                  PERL_PV_ESCAPE_NONASCII   |
                  PERL_PV_PRETTY_ELLIPSES   |
@@ -19382,7 +19495,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
             SV* contents;
 
             /* See if truncation size is overridden */
-            const STRLEN dump_len = (PL_dump_re_max_len)
+            const STRLEN dump_len = (PL_dump_re_max_len > 256)
                                     ? PL_dump_re_max_len
                                     : 256;
 
@@ -19469,8 +19582,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 
     /* add on the verb argument if there is one */
     if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) {
-        Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
+        if ( ARG(o) )
+            Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
                        SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
+        else
+            sv_catpvs(sv, ":NULL");
     }
 #else
     PERL_UNUSED_CONTEXT;
@@ -19506,7 +19622,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                      PL_colors[5],PL_colors[0],
                      s,
                      PL_colors[1],
-                     (strlen(s) > 60 ? "..." : ""));
+                     (strlen(s) > PL_dump_re_max_len ? "..." : ""));
        } );
 
     /* use UTF8 check substring if regexp pattern itself is in UTF8 */
@@ -19545,13 +19661,13 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     } else {
         CALLREGFREE_PVT(rx); /* free the private data */
         SvREFCNT_dec(RXp_PAREN_NAMES(r));
-       Safefree(r->xpv_len_u.xpvlenu_pv);
     }
     if (r->substrs) {
-        SvREFCNT_dec(r->substrs->data[0].substr);
-        SvREFCNT_dec(r->substrs->data[0].utf8_substr);
-        SvREFCNT_dec(r->substrs->data[1].substr);
-        SvREFCNT_dec(r->substrs->data[1].utf8_substr);
+        int i;
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_dec(r->substrs->data[i].substr);
+            SvREFCNT_dec(r->substrs->data[i].utf8_substr);
+        }
        Safefree(r->substrs);
     }
     RX_MATCH_COPY_FREE(rx);
@@ -19562,12 +19678,19 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     SvREFCNT_dec(r->qr_anoncv);
     if (r->recurse_locinput)
         Safefree(r->recurse_locinput);
-    rx->sv_u.svu_rx = 0;
 }
 
+
 /*  reg_temp_copy()
 
-    This is a hacky workaround to the structural issue of match results
+    Copy ssv to dsv, both of which should of type SVt_REGEXP or SVt_PVLV,
+    except that dsv will be created if NULL.
+
+    This function is used in two main ways. First to implement
+        $r = qr/....; $s = $$r;
+
+    Secondly, it is used as a hacky workaround to the structural issue of
+    match results
     being stored in the regexp structure which is in turn stored in
     PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
     could be PL_curpm in multiple contexts, and could require multiple
@@ -19583,75 +19706,80 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
 
 
 REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
+Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
 {
-    struct regexp *ret;
-    struct regexp *const r = ReANY(rx);
-    const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
+    struct regexp *drx;
+    struct regexp *const srx = ReANY(ssv);
+    const bool islv = dsv && SvTYPE(dsv) == SVt_PVLV;
 
     PERL_ARGS_ASSERT_REG_TEMP_COPY;
 
-    if (!ret_x)
-       ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+    if (!dsv)
+       dsv = (REGEXP*) newSV_type(SVt_REGEXP);
     else {
-       SvOK_off((SV *)ret_x);
+       SvOK_off((SV *)dsv);
        if (islv) {
-           /* For PVLVs, SvANY points to the xpvlv body while sv_u points
-              to the regexp.  (For SVt_REGEXPs, sv_upgrade has already
-              made both spots point to the same regexp body.) */
+           /* For PVLVs, the head (sv_any) points to an XPVLV, while
+             * the LV's xpvlenu_rx will point to a regexp body, which
+             * we allocate here */
            REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
-           assert(!SvPVX(ret_x));
-           ret_x->sv_u.svu_rx = temp->sv_any;
+           assert(!SvPVX(dsv));
+            ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
            temp->sv_any = NULL;
            SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
            SvREFCNT_dec_NN(temp);
            /* SvCUR still resides in the xpvlv struct, so the regexp copy-
               ing below will not set it. */
-           SvCUR_set(ret_x, SvCUR(rx));
+           SvCUR_set(dsv, SvCUR(ssv));
        }
     }
     /* This ensures that SvTHINKFIRST(sv) is true, and hence that
        sv_force_normal(sv) is called.  */
-    SvFAKE_on(ret_x);
-    ret = ReANY(ret_x);
+    SvFAKE_on(dsv);
+    drx = ReANY(dsv);
 
-    SvFLAGS(ret_x) |= SvUTF8(rx);
+    SvFLAGS(dsv) |= SvFLAGS(ssv) & (SVf_POK|SVp_POK|SVf_UTF8);
+    SvPV_set(dsv, RX_WRAPPED(ssv));
     /* We share the same string buffer as the original regexp, on which we
        hold a reference count, incremented when mother_re is set below.
        The string pointer is copied here, being part of the regexp struct.
      */
-    memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+    memcpy(&(drx->xpv_cur), &(srx->xpv_cur),
           sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
-    if (r->offs) {
-        const I32 npar = r->nparens+1;
-        Newx(ret->offs, npar, regexp_paren_pair);
-        Copy(r->offs, ret->offs, npar, regexp_paren_pair);
-    }
-    if (r->substrs) {
-        Newx(ret->substrs, 1, struct reg_substr_data);
-       StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+    if (!islv)
+        SvLEN_set(dsv, 0);
+    if (srx->offs) {
+        const I32 npar = srx->nparens+1;
+        Newx(drx->offs, npar, regexp_paren_pair);
+        Copy(srx->offs, drx->offs, npar, regexp_paren_pair);
+    }
+    if (srx->substrs) {
+        int i;
+        Newx(drx->substrs, 1, struct reg_substr_data);
+       StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
 
-       SvREFCNT_inc_void(ret->substrs->data[0].substr);
-       SvREFCNT_inc_void(ret->substrs->data[0].utf8_substr);
-       SvREFCNT_inc_void(ret->substrs->data[1].substr);
-       SvREFCNT_inc_void(ret->substrs->data[1].utf8_substr);
+        for (i = 0; i < 2; i++) {
+            SvREFCNT_inc_void(drx->substrs->data[i].substr);
+            SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr);
+        }
 
        /* check_substr and check_utf8, if non-NULL, point to either their
           anchored or float namesakes, and don't hold a second reference.  */
     }
-    RX_MATCH_COPIED_off(ret_x);
+    RX_MATCH_COPIED_off(dsv);
 #ifdef PERL_ANY_COW
-    ret->saved_copy = NULL;
+    drx->saved_copy = NULL;
 #endif
-    ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
-    SvREFCNT_inc_void(ret->qr_anoncv);
-    if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+    drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
+    SvREFCNT_inc_void(drx->qr_anoncv);
+    if (srx->recurse_locinput)
+        Newx(drx->recurse_locinput,srx->nparens + 1,char *);
 
-    return ret_x;
+    return dsv;
 }
 #endif
 
+
 /* regfree_internal()
 
    Free the private data in a regexp. This is overloadable by
@@ -19679,7 +19807,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
        {
            SV *dsv= sv_newmortal();
             RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
-                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60);
+                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), PL_dump_re_max_len);
             Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n",
                 PL_colors[4],PL_colors[5],s);
         }
@@ -19813,16 +19941,19 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
        /* Do it this way to avoid reading from *r after the StructCopy().
           That way, if any of the sv_dup_inc()s dislodge *r from the L1
           cache, it doesn't matter.  */
+        int i;
        const bool anchored = r->check_substr
            ? r->check_substr == r->substrs->data[0].substr
            : r->check_utf8   == r->substrs->data[0].utf8_substr;
         Newx(ret->substrs, 1, struct reg_substr_data);
        StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
 
-       ret->substrs->data[0].substr      = sv_dup_inc(ret->substrs->data[0].substr, param);
-       ret->substrs->data[0].utf8_substr = sv_dup_inc(ret->substrs->data[0].utf8_substr, param);
-       ret->substrs->data[1].substr      = sv_dup_inc(ret->substrs->data[1].substr, param);
-       ret->substrs->data[1].utf8_substr = sv_dup_inc(ret->substrs->data[1].utf8_substr, param);
+        for (i = 0; i < 2; i++) {
+            ret->substrs->data[i].substr =
+                        sv_dup_inc(ret->substrs->data[i].substr, param);
+            ret->substrs->data[i].utf8_substr =
+                        sv_dup_inc(ret->substrs->data[i].utf8_substr, param);
+        }
 
        /* check_substr and check_utf8, if non-NULL, point to either their
           anchored or float namesakes, and don't hold a second reference.  */
@@ -19852,7 +19983,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
     RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
     ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
     if (r->recurse_locinput)
-        Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+        Newx(ret->recurse_locinput,r->nparens + 1,char *);
 
     if (ret->pprivate)
        RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -19871,7 +20002,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
               1: a buffer in a different thread
               2: something we no longer hold a reference on
               so we need to copy it locally.  */
-    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
+    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
     ret->mother_re   = NULL;
 }
 #endif /* PERL_IN_XSUB_RE */
@@ -20305,9 +20436,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
 #else
         format = "\\x%02" UVXf "-\\x%02" UVXf;
 #endif
-        GCC_DIAG_IGNORE(-Wformat-nonliteral);
+        GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
         Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
-        GCC_DIAG_RESTORE;
+        GCC_DIAG_RESTORE_STMT;
         break;
     }
 }
@@ -20740,7 +20871,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
        /* While that wasn't END last time... */
        NODE_ALIGN(node);
        op = OP(node);
-       if (op == CLOSE || op == WHILEM)
+       if (op == CLOSE || op == SRCLOSE || op == WHILEM)
            indent--;
        next = regnext((regnode *)node);
 
@@ -20807,7 +20938,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                     indent+3,
                     elem_ptr
                     ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
-                                SvCUR(*elem_ptr), 60,
+                                SvCUR(*elem_ptr), PL_dump_re_max_len,
                                 PL_colors[0], PL_colors[1],
                                 (SvUTF8(*elem_ptr)
                                  ? PERL_PV_ESCAPE_UNI
@@ -20864,7 +20995,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
            node = NEXTOPER(node);
            node += regarglen[(U8)op];
        }
-       if (op == CURLYX || op == OPEN)
+       if (op == CURLYX || op == OPEN || op == SROPEN)
            indent++;
     }
     CLEAR_OPTSTART;