This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
preserve code blocks in interpolated qr//s
[perl5.git] / regcomp.c
index 6bcd8b7..802c722 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
 #define        STATIC  static
 #endif
 
+
 typedef struct RExC_state_t {
     U32                flags;                  /* are we folding, multilining? */
     char       *precomp;               /* uncompiled string. */
@@ -149,6 +150,10 @@ typedef struct RExC_state_t {
     I32                in_lookbehind;
     I32                contains_locale;
     I32                override_recoding;
+    struct reg_code_block *code_blocks;        /* positions of literal (?{})
+                                           within pattern */
+    int                num_code_blocks;        /* size of code_blocks[] */
+    int                code_index;             /* next code_blocks[] slot */
 #if ADD_TO_REGEXEC
     char       *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry  (pRExC_state->starttry)
@@ -4887,23 +4892,12 @@ Perl_reginitcolors(pTHX)
 #endif        
 
 /*
- pregcomp - compile a regular expression into internal code
* pregcomp - compile a regular expression into internal code
  *
- * We can't allocate space until we know how big the compiled form will be,
- * but we can't compile it (and thus know how big it is) until we've got a
- * place to put the code.  So we cheat:  we compile it twice, once with code
- * generation turned off and size counting turned on, and once "for real".
- * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the
- * code and thus invalidate pointers into it.  (Note that it has to be in
- * one piece because free() must be able to free it all.) [NB: not true in perl]
- *
- * Beware that the optimization-preparation code in here knows about some
- * of the structure of the compiled regexp.  [I'll say.]
+ * Decides which engine's compiler to call based on the hint currently in
+ * scope
  */
 
-
-
 #ifndef PERL_IN_XSUB_RE
 #define RE_ENGINE_PTR &PL_core_reg_engine
 #else
@@ -4912,46 +4906,125 @@ extern const struct regexp_engine my_reg_engine;
 #endif
 
 #ifndef PERL_IN_XSUB_RE 
+
+/* return the currently in-scope regex engine (or NULL if none)  */
+
+regexp_engine *
+Perl_current_re_engine(pTHX)
+{
+    dVAR;
+
+    if (IN_PERL_COMPILETIME) {
+       HV * const table = GvHV(PL_hintgv);
+       SV **ptr;
+
+       if (!table)
+           return NULL;
+       ptr = hv_fetchs(table, "regcomp", FALSE);
+       if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr)))
+           return NULL;
+       return INT2PTR(regexp_engine*,SvIV(*ptr));
+    }
+    else {
+       SV *ptr;
+       if (!PL_curcop->cop_hints_hash)
+           return NULL;
+       ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0);
+       if ( !(ptr && SvIOK(ptr) && SvIV(ptr)))
+           return NULL;
+       return INT2PTR(regexp_engine*,SvIV(ptr));
+    }
+}
+
+
 REGEXP *
 Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 {
     dVAR;
-    HV * const table = GvHV(PL_hintgv);
+    regexp_engine *eng = current_re_engine();
 
     PERL_ARGS_ASSERT_PREGCOMP;
 
-    /* Dispatch a request to compile a regexp to correct 
-       regexp engine. */
-    if (table) {
-        SV **ptr= hv_fetchs(table, "regcomp", FALSE);
+    /* Dispatch a request to compile a regexp to correct regexp engine. */
+    if (eng) {
         GET_RE_DEBUG_FLAGS_DECL;
-        if (ptr && SvIOK(*ptr) && SvIV(*ptr)) {
-            const regexp_engine *eng=INT2PTR(regexp_engine*,SvIV(*ptr));
-            DEBUG_COMPILE_r({
-                PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
-                    SvIV(*ptr));
-            });            
-            return CALLREGCOMP_ENG(eng, pattern, flags);
-        } 
+       DEBUG_COMPILE_r({
+           PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
+                           PTR2UV(eng));
+       });
+       return CALLREGCOMP_ENG(eng, pattern, flags);
     }
     return Perl_re_compile(aTHX_ pattern, flags);
 }
 #endif
 
+/* public(ish) wrapper for Perl_op_re_compile that only takes an SV
+ * pattern rather than a list of OPs */
+
 REGEXP *
 Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 {
+    SV *pat = pattern; /* defeat constness! */
+    PERL_ARGS_ASSERT_RE_COMPILE;
+    return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
+                                   NULL, NULL, NULL, orig_pm_flags);
+}
+
+
+/*
+ * Perl_op_re_compile - the perl internal RE engine's function to compile a
+ * regular expression into internal code.
+ * The pattern may be passed either as:
+ *    a list of SVs (patternp plus pat_count)
+ *    a list of OPs (expr)
+ * If both are passed, the SV list is used, but the OP list indicates
+ * which SVs are actually pre-compiled codeblocks
+ *
+ * The list of SVs have magic and qr overloading applied to them (and
+ * the list may be modified in-place with replacement SVs in the latter
+ * case).
+ *
+ * If the pattern hasn't changed from old_re, then old-re will be
+ * returned.
+ *
+ * If eng is set (and  not equal to PL_core_reg_engine), then
+ * just do the intial concatenation of arguments, then pass on to
+ * the external engine.
+ *
+ * If is_bare_re is not null, set it to a boolean indicating whether
+ * the the arg list reduced (after overloading) to a single bare
+ * regex which has been returned (i.e. /$qr/).
+ *
+ * We can't allocate space until we know how big the compiled form will be,
+ * but we can't compile it (and thus know how big it is) until we've got a
+ * place to put the code.  So we cheat:  we compile it twice, once with code
+ * generation turned off and size counting turned on, and once "for real".
+ * This also means that we don't allocate space until we are sure that the
+ * thing really will compile successfully, and we never have to move the
+ * code and thus invalidate pointers into it.  (Note that it has to be in
+ * one piece because free() must be able to free it all.) [NB: not true in perl]
+ *
+ * Beware that the optimization-preparation code in here knows about some
+ * of the structure of the compiled regexp.  [I'll say.]
+ */
+
+REGEXP *
+Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
+                   OP *expr, const regexp_engine* eng, REGEXP *VOL old_re,
+                    int *is_bare_re, U32 orig_pm_flags)
+{
     dVAR;
     REGEXP *rx;
     struct regexp *r;
     register regexp_internal *ri;
     STRLEN plen;
-    char  *exp;
+    char  * VOL exp;
     char* xend;
     regnode *scan;
     I32 flags;
     I32 minlen = 0;
     U32 pm_flags;
+    SV * VOL pat;
 
     /* these are all flags - maybe they should be turned
      * into a single int with different bit masks */
@@ -4960,6 +5033,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     I32 sawopen = 0;
     bool used_setjump = FALSE;
     regex_charset initial_charset = get_regex_charset(orig_pm_flags);
+    bool code_is_utf8 = 0;
 
     U8 jump_ret = 0;
     dJMPENV;
@@ -4972,8 +5046,6 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 #endif    
     GET_RE_DEBUG_FLAGS_DECL;
 
-    PERL_ARGS_ASSERT_RE_COMPILE;
-
     DEBUG_r(if (!PL_colorset) reginitcolors());
 
 #ifndef PERL_IN_XSUB_RE
@@ -5034,7 +5106,238 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     }
 #endif
 
-    RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+    pRExC_state->code_blocks = NULL;
+    pRExC_state->num_code_blocks = 0;
+
+    if (is_bare_re)
+       *is_bare_re = 0;
+
+    if (expr && (expr->op_type == OP_LIST ||
+               (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
+
+       /* is the source UTF8, and how many code blocks are there? */
+       OP *o;
+       int ncode = 0;
+
+       for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+           if (o->op_type == OP_CONST && SvUTF8(cSVOPo_sv))
+               code_is_utf8 = 1;
+           else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+               /* count of DO blocks */
+               ncode++;
+       }
+       if (ncode) {
+           pRExC_state->num_code_blocks = ncode;
+           Newx(pRExC_state->code_blocks, ncode, struct reg_code_block);
+       }
+    }
+
+    if (pat_count) {
+       /* handle a list of SVs */
+
+       SV **svp;
+
+       /* apply magic and RE overloading to each arg */
+       for (svp = patternp; svp < patternp + pat_count; svp++) {
+           SV *rx = *svp;
+           SvGETMAGIC(rx);
+           if (SvROK(rx) && SvAMAGIC(rx)) {
+               SV *sv = AMG_CALLunary(rx, regexp_amg);
+               if (sv) {
+                   if (SvROK(sv))
+                       sv = SvRV(sv);
+                   if (SvTYPE(sv) != SVt_REGEXP)
+                       Perl_croak(aTHX_ "Overloaded qr did not return a REGEXP");
+                   *svp = sv;
+               }
+           }
+       }
+
+       if (pat_count > 1) {
+           /* concat multiple args and find any code block indexes */
+
+           OP *o = NULL;
+           int n = 0;
+           bool utf8 = 0;
+
+           if (pRExC_state->num_code_blocks) {
+               o = cLISTOPx(expr)->op_first;
+               assert(o->op_type == OP_PUSHMARK);
+               o = o->op_sibling;
+           }
+
+           pat = newSVpvn("", 0);
+           SAVEFREESV(pat);
+
+           /* determine if the pattern is going to be utf8 (needed
+            * in advance to align code block indices correctly).
+            * XXX This could fail to be detected for an arg with
+            * overloading but not concat overloading; but the main effect
+            * in this obscure case is to need a 'use re eval' for a
+            * literal code block */
+           for (svp = patternp; svp < patternp + pat_count; svp++) {
+               if (SvUTF8(*svp))
+                   utf8 = 1;
+           }
+           if (utf8)
+               SvUTF8_on(pat);
+
+           for (svp = patternp; svp < patternp + pat_count; svp++) {
+               SV *sv, *msv = *svp;
+               SV *rx;
+               bool code = 0;
+               if (o) {
+                   if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+                       assert(n < pRExC_state->num_code_blocks);
+                       pRExC_state->code_blocks[n].start = SvCUR(pat);
+                       pRExC_state->code_blocks[n].block = o;
+                       pRExC_state->code_blocks[n].src_regex = NULL;
+                       n++;
+                       code = 1;
+                       o = o->op_sibling; /* skip CONST */
+                       assert(o);
+                   }
+                   o = o->op_sibling;;
+               }
+
+               /* extract any code blocks within any embedded qr//'s */
+               rx = msv;
+               if (SvROK(rx))
+                   rx = SvRV(rx);
+               if (SvTYPE(rx) == SVt_REGEXP
+                   && RX_ENGINE((REGEXP*)rx) == RE_ENGINE_PTR)
+               {
+
+                   RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
+                   if (ri->num_code_blocks) {
+                       int i;
+                       Renew(pRExC_state->code_blocks,
+                           pRExC_state->num_code_blocks + ri->num_code_blocks,
+                           struct reg_code_block);
+                       pRExC_state->num_code_blocks += ri->num_code_blocks;
+                       for (i=0; i < ri->num_code_blocks; i++) {
+                           struct reg_code_block *src, *dst;
+                           STRLEN offset =  SvCUR(pat)
+                               + ((struct regexp *)SvANY(rx))->pre_prefix;
+                           assert(n < pRExC_state->num_code_blocks);
+                           src = &ri->code_blocks[i];
+                           dst = &pRExC_state->code_blocks[n];
+                           dst->start      = src->start + offset;
+                           dst->end        = src->end   + offset;
+                           dst->block      = src->block;
+                           dst->src_regex  = (REGEXP*) SvREFCNT_inc( (SV*)
+                                                   src->src_regex
+                                                       ? src->src_regex
+                                                       : (REGEXP*)rx);
+                           n++;
+                       }
+                   }
+               }
+
+               if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
+                       (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+               {
+                   sv_setsv(pat, sv);
+                   /* overloading involved: all bets are off over literal
+                    * code. Pretend we haven't seen it */
+                   pRExC_state->num_code_blocks -= n;
+                   n = 0;
+
+               }
+               else {
+                   sv_catsv_nomg(pat, msv);
+                   if (code)
+                       pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+               }
+           }
+           SvSETMAGIC(pat);
+       }
+       else
+           pat = *patternp;
+
+       /* handle bare regex: foo =~ $re */
+       {
+           SV *re = pat;
+           if (SvROK(re))
+               re = SvRV(re);
+           if (SvTYPE(re) == SVt_REGEXP) {
+               if (is_bare_re)
+                   *is_bare_re = 1;
+               SvREFCNT_inc(re);
+               Safefree(pRExC_state->code_blocks);
+               return (REGEXP*)re;
+           }
+       }
+    }
+    else {
+       /* not a list of SVs, so must be a list of OPs */
+       assert(expr);
+       if (expr->op_type == OP_LIST) {
+           int i = -1;
+           bool is_code = 0;
+           OP *o;
+
+           pat = newSVpvn("", 0);
+           SAVEFREESV(pat);
+           if (code_is_utf8)
+               SvUTF8_on(pat);
+
+           /* given a list of CONSTs and DO blocks in expr, append all
+            * the CONSTs to pat, and record the start and end of each
+            * code block in code_blocks[] (each DO{} op is followed by an
+            * OP_CONST containing the corresponding literal '(?{...})
+            * text)
+            */
+           for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+               if (o->op_type == OP_CONST) {
+                   sv_catsv(pat, cSVOPo_sv);
+                   if (is_code) {
+                       pRExC_state->code_blocks[i].end = SvCUR(pat)-1;
+                       is_code = 0;
+                   }
+               }
+               else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+                   assert(i+1 < pRExC_state->num_code_blocks);
+                   pRExC_state->code_blocks[++i].start = SvCUR(pat);
+                   pRExC_state->code_blocks[i].block = o;
+                   pRExC_state->code_blocks[i].src_regex = NULL;
+                   is_code = 1;
+               }
+           }
+       }
+       else {
+           assert(expr->op_type == OP_CONST);
+           pat = cSVOPx_sv(expr);
+       }
+    }
+
+    exp = SvPV_nomg(pat, plen);
+
+    if (eng && eng != RE_ENGINE_PTR) {
+       if ((SvUTF8(pat) && IN_BYTES)
+               || SvGMAGICAL(pat) || SvAMAGIC(pat))
+       {
+           /* make a temporary copy; either to convert to bytes,
+            * or to avoid repeating get-magic / overloaded stringify */
+           pat = newSVpvn_flags(exp, plen, SVs_TEMP |
+                                       (IN_BYTES ? 0 : SvUTF8(pat)));
+       }
+       Safefree(pRExC_state->code_blocks);
+       return CALLREGCOMP_ENG(eng, pat, orig_pm_flags);
+    }
+
+    if (   old_re
+       && !!RX_UTF8(old_re) == !!SvUTF8(pat)
+       && RX_PRECOMP(old_re) && RX_PRELEN(old_re) == plen
+       && memEQ(RX_PRECOMP(old_re), exp, plen))
+    {
+       ReREFCNT_inc(old_re);
+       Safefree(pRExC_state->code_blocks);
+       return old_re;
+    }
+
+    /* ignore the utf8ness if the pattern is 0 length */
+    RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
     RExC_uni_semantics = 0;
     RExC_contains_locale = 0;
 
@@ -5046,12 +5349,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     }
 
     if (jump_ret == 0) {    /* First time through */
-       exp = SvPV(pattern, plen);
        xend = exp + plen;
-       /* ignore the utf8ness if the pattern is 0 length */
-       if (plen == 0) {
-           RExC_utf8 = RExC_orig_utf8 = 0;
-       }
 
         DEBUG_COMPILE_r({
             SV *dsv= sv_newmortal();
@@ -5062,7 +5360,10 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
         });
     }
     else {  /* longjumped back */
-        STRLEN len = plen;
+       U8 *src, *dst;
+       int n=0;
+       STRLEN s = 0, d = 0;
+       bool do_end = 0;
 
         /* If the cause for the longjmp was other than changing to utf8, pop
          * our own setjmp, and longjmp to the correct handler */
@@ -5083,10 +5384,60 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
         -- dmq */
         DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
            "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
-        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pattern, plen), &len);
-        xend = exp + len;
-        RExC_orig_utf8 = RExC_utf8 = 1;
-        SAVEFREEPV(exp);
+
+       /* upgrade pattern to UTF8, and if there are code blocks,
+        * recalculate the indices.
+        * This is essentially an unrolled Perl_bytes_to_utf8() */
+
+       src = (U8*)SvPV_nomg(pat, plen);
+       Newx(dst, plen * 2 + 1, U8);
+
+       while (s < plen) {
+           const UV uv = NATIVE_TO_ASCII(src[s]);
+           if (UNI_IS_INVARIANT(uv))
+               dst[d]   = (U8)UTF_TO_NATIVE(uv);
+           else {
+               dst[d++] = (U8)UTF8_EIGHT_BIT_HI(uv);
+               dst[d]   = (U8)UTF8_EIGHT_BIT_LO(uv);
+           }
+           if (n < pRExC_state->num_code_blocks) {
+               if (!do_end && pRExC_state->code_blocks[n].start == s) {
+                   pRExC_state->code_blocks[n].start = d;
+                   assert(dst[d] == '(');
+                   do_end = 1;
+               }
+               else if (do_end && pRExC_state->code_blocks[n].end == s) {
+                   pRExC_state->code_blocks[n].end = d;
+                   assert(dst[d] == ')');
+                   do_end = 0;
+                   n++;
+               }
+           }
+           s++;
+           d++;
+       }
+       dst[d] = '\0';
+       plen = d;
+       exp = (char*) dst;
+       xend = exp + plen;
+       SAVEFREEPV(exp);
+       RExC_orig_utf8 = RExC_utf8 = 1;
+
+       /* we've changed the string; check again whether it matches
+        * the old pattern, to avoid recompilation */
+       if (   old_re
+           && RX_UTF8(old_re)
+           && RX_PRECOMP(old_re) && RX_PRELEN(old_re) == plen
+           && memEQ(RX_PRECOMP(old_re), exp, plen))
+       {
+           ReREFCNT_inc(old_re);
+           if (used_setjump) {
+               JMPENV_POP;
+           }
+           Safefree(pRExC_state->code_blocks);
+           return old_re;
+       }
+
     }
 
 #ifdef TRIE_STUDY_OPT
@@ -5135,6 +5486,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 #endif
     RExC_recurse = NULL;
     RExC_recurse_count = 0;
+    pRExC_state->code_index = 0;
 
 #if 0 /* REGC() is (currently) a NOP at the first pass.
        * Clever compilers notice this and complain. --jhi */
@@ -5143,6 +5495,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n"));
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
        RExC_precomp = NULL;
+       Safefree(pRExC_state->code_blocks);
        return(NULL);
     }
 
@@ -5197,6 +5550,13 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     RXi_SET( r, ri );
     r->engine= RE_ENGINE_PTR;
     r->extflags = pm_flags;
+    if (orig_pm_flags & PMf_HAS_CV) {
+       ri->code_blocks = pRExC_state->code_blocks;
+       ri->num_code_blocks = pRExC_state->num_code_blocks;
+    }
+    else
+       SAVEFREEPV(pRExC_state->code_blocks);
+
     {
         bool has_p     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
         bool has_charset = (get_regex_charset(r->extflags) != REGEX_DEPENDS_CHARSET);
@@ -5228,7 +5588,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
         p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
        SvPOK_on(rx);
-       SvFLAGS(rx) |= SvUTF8(pattern);
+       if (RExC_utf8)
+           SvFLAGS(rx) |= SVf_UTF8;
         *p++='('; *p++='?';
 
         /* If a default, cover it using the caret */
@@ -5296,6 +5657,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     RExC_emit_start = ri->program;
     RExC_emit = ri->program;
     RExC_emit_bound = ri->program + RExC_size + 1;
+    pRExC_state->code_index = 0;
 
     /* Store the count of eval-groups for security checks: */
     RExC_rx->seen_evals = RExC_seen_evals;
@@ -7964,55 +8326,86 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
                RExC_seen_zerolen++;
                RExC_seen |= REG_SEEN_EVAL;
-               while (count && (c = *RExC_parse)) {
-                   if (c == '\\') {
-                       if (RExC_parse[1])
-                           RExC_parse++;
+
+               if (   pRExC_state->num_code_blocks
+                   && pRExC_state->code_index < pRExC_state->num_code_blocks
+                   && pRExC_state->code_blocks[pRExC_state->code_index].start
+                       == (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+                           - RExC_start)
+               ) {
+                   /* this is a pre-compiled literal (?{}) */
+                   struct reg_code_block *cb =
+                       &pRExC_state->code_blocks[pRExC_state->code_index];
+                   RExC_parse = RExC_start + cb->end;
+                   if (SIZE_ONLY)
+                       RExC_seen_evals++;
+                   else {
+                       OP *o = cb->block;
+                       if (cb->src_regex) {
+                           n = add_data(pRExC_state, 2, "rl");
+                           RExC_rxi->data->data[n] =
+                               (void*)SvREFCNT_inc((SV*)cb->src_regex);
+                       RExC_rxi->data->data[n+1] = (void*)o->op_next;
+                       }
+                       else {
+                           n = add_data(pRExC_state, 1,
+                                  (RExC_flags & PMf_HAS_CV) ? "L" : "l");
+                           RExC_rxi->data->data[n] = (void*)o->op_next;
+                       }
                    }
-                   else if (c == '{')
-                       count++;
-                   else if (c == '}')
-                       count--;
-                   RExC_parse++;
+                   pRExC_state->code_index++;
                }
-               if (*RExC_parse != ')') {
-                   RExC_parse = s;             
-                   vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
-               }
-               if (!SIZE_ONLY) {
-                   PAD *pad;
-                   OP_4tree *sop, *rop;
-                   SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
-
-                   ENTER;
-                   Perl_save_re_context(aTHX);
-                   rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
-                   sop->op_private |= OPpREFCOUNTED;
-                   /* re_dup will OpREFCNT_inc */
-                   OpREFCNT_set(sop, 1);
-                   LEAVE;
-
-                   n = add_data(pRExC_state, 3, "nop");
-                   RExC_rxi->data->data[n] = (void*)rop;
-                   RExC_rxi->data->data[n+1] = (void*)sop;
-                   RExC_rxi->data->data[n+2] = (void*)pad;
-                   SvREFCNT_dec(sv);
-               }
-               else {                                          /* First pass */
-                   if (PL_reginterp_cnt < ++RExC_seen_evals
-                       && IN_PERL_RUNTIME)
-                       /* No compiled RE interpolated, has runtime
-                          components ===> unsafe.  */
-                       FAIL("Eval-group not allowed at runtime, use re 'eval'");
-                   if (PL_tainting && PL_tainted)
-                       FAIL("Eval-group in insecure regular expression");
-#if PERL_VERSION > 8
-                   if (IN_PERL_COMPILETIME)
-                       PL_cv_has_eval = 1;
-#endif
+               else {
+                   while (count && (c = *RExC_parse)) {
+                       if (c == '\\') {
+                           if (RExC_parse[1])
+                               RExC_parse++;
+                       }
+                       else if (c == '{')
+                           count++;
+                       else if (c == '}')
+                           count--;
+                       RExC_parse++;
+                   }
+                   if (*RExC_parse != ')') {
+                       RExC_parse = s;
+                       vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
+                   }
+                   if (!SIZE_ONLY) {
+                       PAD *pad;
+                       OP_4tree *sop, *rop;
+                       SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
+
+                       ENTER;
+                       Perl_save_re_context(aTHX);
+                       rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
+                       sop->op_private |= OPpREFCOUNTED;
+                       /* re_dup will OpREFCNT_inc */
+                       OpREFCNT_set(sop, 1);
+                       LEAVE;
+
+                       n = add_data(pRExC_state, 3, "nop");
+                       RExC_rxi->data->data[n] = (void*)rop;
+                       RExC_rxi->data->data[n+1] = (void*)sop;
+                       RExC_rxi->data->data[n+2] = (void*)pad;
+                       SvREFCNT_dec(sv);
+                   }
+                   else {                                              /* First pass */
+                       if (PL_reginterp_cnt < ++RExC_seen_evals
+                           && IN_PERL_RUNTIME)
+                           /* No compiled RE interpolated, has runtime
+                              components ===> unsafe.  */
+                           FAIL("Eval-group not allowed at runtime, use re 'eval'");
+                       if (PL_tainting && PL_tainted)
+                           FAIL("Eval-group in insecure regular expression");
+    #if PERL_VERSION > 8
+                       if (IN_PERL_COMPILETIME)
+                           PL_cv_has_eval = 1;
+    #endif
+                   }
                }
-
                nextchar(pRExC_state);
+
                if (is_logical) {
                    ret = reg_node(pRExC_state, LOGICAL);
                    if (!SIZE_ONLY)
@@ -12822,6 +13215,7 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     SvREFCNT_dec(r->saved_copy);
 #endif
     Safefree(r->offs);
+    SvREFCNT_dec(r->qr_anoncv);
 }
 
 /*  reg_temp_copy()
@@ -12885,6 +13279,7 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
     ret->saved_copy = NULL;
 #endif
     ret->mother_re = rx;
+    SvREFCNT_inc_void(ret->qr_anoncv);
     
     return ret_x;
 }
@@ -12927,6 +13322,13 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
     if (ri->u.offsets)
         Safefree(ri->u.offsets);             /* 20010421 MJD */
 #endif
+    if (ri->code_blocks) {
+       int n;
+       for (n = 0; n < ri->num_code_blocks; n++)
+           SvREFCNT_dec(ri->code_blocks[n].src_regex);
+       Safefree(ri->code_blocks);
+    }
+
     if (ri->data) {
        int n = ri->data->count;
        PAD* new_comppad = NULL;
@@ -12937,6 +13339,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
           /* If you add a ->what type here, update the comment in regcomp.h */
            switch (ri->data->what[n]) {
            case 'a':
+           case 'r':
            case 's':
            case 'S':
            case 'u':
@@ -12965,6 +13368,8 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                SvREFCNT_dec(MUTABLE_SV(new_comppad));
                new_comppad = NULL;
                break;
+           case 'l':
+           case 'L':
            case 'n':
                break;
             case 'T':          
@@ -13093,6 +13498,7 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
     }
 
     RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
+    ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
 
     if (ret->pprivate)
        RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -13153,7 +13559,20 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
     
     Newxc(reti, sizeof(regexp_internal) + len*sizeof(regnode), char, regexp_internal);
     Copy(ri->program, reti->program, len+1, regnode);
-    
+
+    reti->num_code_blocks = ri->num_code_blocks;
+    if (ri->code_blocks) {
+       int n;
+       Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block,
+               struct reg_code_block);
+       Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks,
+               struct reg_code_block);
+       for (n = 0; n < ri->num_code_blocks; n++)
+            reti->code_blocks[n].src_regex = (REGEXP*)
+                   sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param);
+    }
+    else
+       reti->code_blocks = NULL;
 
     reti->regstclass = NULL;
 
@@ -13173,6 +13592,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                /* legal options are one of: sSfpontTua
                   see also regcomp.h and pregfree() */
            case 'a': /* actually an AV, but the dup function is identical.  */
+           case 'r':
            case 's':
            case 'S':
            case 'p': /* actually an AV, but the dup function is identical.  */
@@ -13205,6 +13625,8 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                ((reg_trie_data*)ri->data->data[i])->refcount++;
                OP_REFCNT_UNLOCK;
                /* Fall through */
+           case 'l':
+           case 'L':
            case 'n':
                d->data[i] = ri->data->data[i];
                break;