This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Mostly complete fix for literal /(?{..})/ blocks
authorDavid Mitchell <davem@iabyn.com>
Thu, 25 Aug 2011 10:41:49 +0000 (11:41 +0100)
committerDavid Mitchell <davem@iabyn.com>
Wed, 13 Jun 2012 12:25:49 +0000 (13:25 +0100)
Change the way that code blocks in patterns are parsed and executed,
especially as regards lexical and scoping behaviour.

(Note that this fix only applies to literal code blocks appearing within
patterns: run-time patterns, and literals within qr//, are still done the
old broken way for now).

This change means that for literal /(?{..})/ and /(??{..})/:

* the code block is now fully parsed in the same pass as the surrounding
  code, which means that the compiler no longer just does a simplistic
  count of balancing {} to find the limits of the code block;
  i.e. stuff like /(?{  $x = "{" })/ now works (in the same way
  that subscripts in double quoted strings always have: "$a{'{'}" )

* Error and warning messages will now appear to emanate from the main body
  rather than an re_eval; e.g. the output from

    #!/usr/bin/perl
    /(?{ warn "boo" })/

has changed from

    boo at (re_eval 1) line 1.

to

    boo at /tmp/p line 2.

* scope and closures now behave as you might expect; for example

        for my $x (qw(a b c)) { "" =~ /(?{ print $x })/ }

  now prints "abc" rather than ""

* with recursion, it now finds the lexical within the appropriate depth
  of pad: this code now prints "012" rather than "000":

    sub recurse {
        my ($n) = @_;
        return if $n > 2;
        "" =~ /^(?{print $n})/;
        recurse($n+1);
    }
    recurse(0);

* an earlier fix that stopped 'my' declarations within code blocks causing
  crashes, required the accumulating of two SAVECOMPPADs on the stack for
  each iteration of the code block; this is no longer needed;

* UNITCHECK blocks within literal code blocks are now run as part of the
  main body of code (run-time code blocks still trigger an immediate
  call to the UNITCHECK block though)

This is all achieved by building upon the efforts of the commits which led
up to this; those altered the parser to parse literal code blocks
directly, but up until now those code blocks were discarded by
Perl_pmruntime and the block re-compiled using the original re_eval
mechanism. As of this commit, for the non-qr and non-runtime variants,
those code blocks are no longer thrown away. Instead:

* the LISTOP generated by the parser, which contains all the code
  blocks plus OP_CONSTs that collectively make up the literal pattern,
  is now stored in a new field in PMOPs, called op_code_list. For example
  in /A(?{BLOCK})C/, the listop stored in op_code_list looks like

    LIST
        PUSHMARK
        CONST['A']
        NULL/special (aka a DO block)
            BLOCK
        CONST['(?{BLOCK})']
        CONST['B']

* each of the code blocks has its last op set to null and is individually
  run through the peephole optimiser, so each one becomes a little
  self-contained block of code, rather than a list of blocks that run into
  each other;

* then in re_op_compile(), we concatenate the list of CONSTs to produce a
  string to be compiled, but at the same time we note any DO blocks and
  note the start and end positions of the corresponding CONST['(?{BLOCK})'];

* (if the current regex engine isn't the built-in perl one, then we just
  throw away the code blocks and pass the concatenated string to the engine)

* then during regex compilation, whenever we encounter a '(?{', we see if
  it matches the index of one of the pre-compiled blocks, and if so, we
  store a pointer to that block in an 'l' data slot, and use the end index
  to skip over the text of the code body. Conversely, if the index doesn't
  match, then we know that it's a run-time pattern and (for now), compile
  it in the old way.

* During execution, when an EVAL op is encountered, if data->what is 'l',
  then we just use the pad that was in effect when the pattern was called;
  i.e. we use the current pad slot of the currently executing CV that the
  pattern is embedded within.

14 files changed:
dump.c
op.c
op.h
pod/perlmod.pod
regcomp.c
regcomp.h
regexec.c
t/lib/strict/refs
t/lib/strict/subs
t/op/blocks.t
t/re/pat_re_eval.t
t/re/re_tests
t/re/reg_eval_scope.t
t/run/fresh_perl.t

diff --git a/dump.c b/dump.c
index 75ee7e7..8f2fa76 100644 (file)
--- a/dump.c
+++ b/dump.c
@@ -613,6 +613,10 @@ Perl_do_pmop_dump(pTHX_ I32 level, PerlIO *file, const PMOP *pm)
        Perl_dump_indent(aTHX_ level, file, "PMf_REPL = ");
        op_dump(pm->op_pmreplrootu.op_pmreplroot);
     }
+    if (pm->op_code_list) {
+       Perl_dump_indent(aTHX_ level, file, "CODE_LIST =\n");
+       do_op_dump(level, file, pm->op_code_list);
+    }
     if (pm->op_pmflags || (PM_GETRE(pm) && RX_CHECK_SUBSTR(PM_GETRE(pm)))) {
        SV * const tmpsv = pm_description(pm);
        Perl_dump_indent(aTHX_ level, file, "PMFLAGS = (%s)\n", SvCUR(tmpsv) ? SvPVX_const(tmpsv) + 1 : "");
diff --git a/op.c b/op.c
index 5cc9887..4d82c7c 100644 (file)
--- a/op.c
+++ b/op.c
@@ -743,6 +743,8 @@ Perl_op_clear(pTHX_ OP *o)
     case OP_MATCH:
     case OP_QR:
 clear_pmop:
+       op_free(cPMOPo->op_code_list);
+       cPMOPo->op_code_list = NULL;
        forget_pmop(cPMOPo, 1);
        cPMOPo->op_pmreplrootu.op_pmreplroot = NULL;
         /* we use the same protection as the "SAFE" version of the PM_ macros
@@ -4299,45 +4301,83 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
                is_compiletime = 0;
        }
     }
-    else { assert(expr->op_type != OP_PUSHMARK); if (expr->op_type != OP_CONST && expr->op_type != OP_PUSHMARK)
+    else if (expr->op_type != OP_CONST)
        is_compiletime = 0;
-    }
 
    /* are we using an external (non-perl) re engine? */
 
    eng = current_re_engine();
    ext_eng = (eng &&  eng != &PL_core_reg_engine);
 
-    /* concatenate adjacent CONSTs, and for non-perl engines, strip out
-     * any DO blocks */
+    /* for perl engine:
+     *   concatenate adjacent CONSTs for non-code case
+     *   pre-process DO blocks;
+     * for non-perl engines:
+     *    concatenate adjacent CONSTs;
+     *    strip out any DO blocks
+     */
 
-    if (expr->op_type == OP_LIST
-       && (!is_compiletime || /* XXX TMP until we handle runtime (?{}) */
-          !has_code || ext_eng))
-    {
-       OP *o, *kid;
-       o = cLISTOPx(expr)->op_first;
-       while (o->op_sibling) {
-           kid = o->op_sibling;
+    if (expr->op_type == OP_LIST) {
+       OP *kid, *okid = NULL;
+       kid = cLISTOPx(expr)->op_first;
+       while (kid) {
            if (kid->op_type == OP_NULL && (kid->op_flags & OPf_SPECIAL)) {
                /* do {...} */
-               o->op_sibling = kid->op_sibling;
-               kid->op_sibling = NULL;
-               op_free(kid);
+               if (ext_eng  || !is_compiletime/*XXX tmp*/
+                       || o->op_type == OP_QR/*XXX tmp*/) {
+                   assert(okid);
+                   okid->op_sibling = kid->op_sibling;
+                   kid->op_sibling = NULL;
+                   op_free(kid);
+                   kid = okid;
+               }
+               else {
+                   /* treat each DO block as a separate little sub */
+                   scalar(kid);
+                   LINKLIST(kid);
+                   if (kLISTOP->op_first->op_type == OP_LEAVE) {
+                       LISTOP *leave = cLISTOPx(kLISTOP->op_first);
+                       /* skip ENTER */
+                       assert(leave->op_first->op_type == OP_ENTER);
+                       assert(leave->op_first->op_sibling);
+                       kid->op_next = leave->op_first->op_sibling;
+                       /* skip LEAVE */
+                       assert(leave->op_flags & OPf_KIDS);
+                       assert(leave->op_last->op_next = (OP*)leave);
+                       leave->op_next = NULL; /* stop on last op */
+                       op_null((OP*)leave);
+                   }
+                   else {
+                       /* skip SCOPE */
+                       OP *scope = kLISTOP->op_first;
+                       assert(scope->op_type == OP_SCOPE);
+                       assert(scope->op_flags & OPf_KIDS);
+                       scope->op_next = NULL; /* stop on last op */
+                       op_null(scope);
+                   }
+                   CALL_PEEP(kid);
+                   finalize_optree(kid);
+               }
            }
-           else if (o->op_type == OP_CONST && kid->op_type == OP_CONST){
-               SV* sv = cSVOPo->op_sv;
+           else if ( (ext_eng || !has_code || !is_compiletime/*XXX tmp*/)
+                             && kid->op_type == OP_CONST
+                             && kid->op_sibling
+                             && kid->op_sibling->op_type == OP_CONST)
+           {
+               OP *o = kid->op_sibling;
+               SV* sv = cSVOPx_sv(kid);
                SvREADONLY_off(sv);
-               sv_catsv(sv, cSVOPx(kid)->op_sv);
+               sv_catsv(sv, cSVOPo_sv);
                SvREADONLY_on(sv);
-               o->op_sibling = kid->op_sibling;
-               kid->op_sibling = NULL;
-               op_free(kid);
+               kid->op_sibling = o->op_sibling;
+               o->op_sibling = NULL;
+               op_free(o);
+               kid = okid;
            }
-           else
-               o = o->op_sibling;
+           okid = kid;
+           kid = kid->op_sibling;
        }
-       cLISTOPx(expr)->op_last = o;
+       cLISTOPx(expr)->op_last = okid;
     }
 
     PL_hints |= HINT_BLOCK_SCOPE;
@@ -4375,15 +4415,16 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
            }
 
            PM_SETRE(pm, CALLREGCOMP(pat, pm_flags));
-       }
-       else
-           PM_SETRE(pm, re_op_compile(NULL, expr, pm_flags));
-
 #ifdef PERL_MAD
-       op_getmad(expr,(OP*)pm,'e');
+           op_getmad(expr,(OP*)pm,'e');
 #else
-       op_free(expr);
+           op_free(expr);
 #endif
+       }
+       else {
+           pm->op_code_list = expr;
+           PM_SETRE(pm, re_op_compile(NULL, expr, pm_flags));
+       }
     }
     else {
        bool reglist;
diff --git a/op.h b/op.h
index 6aa16f5..f267da2 100644 (file)
--- a/op.h
+++ b/op.h
@@ -375,6 +375,7 @@ struct pmop {
        HV *    op_pmstash;
 #endif
     }          op_pmstashstartu;
+    OP *       op_code_list;   /* list of (?{}) code blocks */
 };
 
 #ifdef USE_ITHREADS
index 33f098d..9d02c3f 100644 (file)
@@ -307,7 +307,7 @@ the main program.
 
 C<UNITCHECK> blocks are run just after the unit which defined them has
 been compiled.  The main program file and each module it loads are
-compilation units, as are string C<eval>s, code compiled using the
+compilation units, as are string C<eval>s, run-time code compiled using the
 C<(?{ })> construct in a regex, calls to C<do FILE>, C<require FILE>,
 and code after the C<-e> switch on the command line.
 
index 394502b..b45122c 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -149,6 +149,11 @@ typedef struct RExC_state_t {
     I32                in_lookbehind;
     I32                contains_locale;
     I32                override_recoding;
+    int                max_code_index;         /* max index into code_indices */
+    int                code_index;             /* index into code_indices */
+    STRLEN     *code_indices;          /* begin and ends of literal (?{})
+                                           within pattern */
+    OP*                next_code_or_const;     /* iterating the list of DO/OP_CONST */ 
 #if ADD_TO_REGEXEC
     char       *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry  (pRExC_state->starttry)
@@ -4963,6 +4968,35 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     return Perl_re_op_compile(aTHX_ pattern, NULL, orig_pm_flags);
 }
 
+/* given a list of CONSTs and DO blocks in expr, append all the CONSTs to
+ * pat, and record the start and end of each code block in code_indices
+ * (each DO{} op is followed by an OP_CONST containing the corresponding
+ * literal '(?{...}) text)
+ */
+
+static void
+S_get_pat_and_code_indices(pTHX_ RExC_state_t *pRExC_state, OP* expr, SV* pat) {
+    int ncode = 0;
+    bool is_code = 0;
+    OP *o;
+    for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+       if (o->op_type == OP_CONST) {
+           sv_catsv(pat, cSVOPo_sv);
+           if (is_code) {
+               pRExC_state->code_indices[ncode++] = SvCUR(pat); /* end pos */
+               is_code = 0;
+           }
+       }
+       else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+           assert(ncode < pRExC_state->max_code_index);
+           pRExC_state->code_indices[ncode++] = SvCUR(pat); /*start pos */
+           is_code = 1;
+       }
+    }
+    pRExC_state->code_index = 0;
+}
+
+
 /*
  * Perl_op_re_compile - the perl internal RE engine's function to compile a
  * regular expression into internal code.
@@ -5077,33 +5111,37 @@ Perl_re_op_compile(pTHX_ SV * const pattern, OP *expr, U32 orig_pm_flags)
     }
 #endif
 
+    pRExC_state->code_indices = NULL;
+    pRExC_state->max_code_index = 0;
     if (expr) {
-       /* XXX tmp get rid of DO blocks, concat CONSTs */
-       OP *o, *kid;
-       o = cLISTOPx(expr)->op_first;
-       while (o->op_sibling) {
-           kid = o->op_sibling;
-           if (kid->op_type == OP_NULL && (kid->op_flags & OPf_SPECIAL)) {
-               /* do {...} */
-               o->op_sibling = kid->op_sibling;
-               kid->op_sibling = NULL;
-               op_free(kid);
+       if (expr->op_type == OP_LIST) {
+           OP *o;
+           bool is_utf8 = 0;
+           int ncode = 0;
+
+           /* are we UTF8, and how many code blocks are there? */
+           for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+               if (o->op_type == OP_CONST && SvUTF8(cSVOPo_sv))
+                   is_utf8 = 1;
+               else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+                   /* count of DO blocks */
+                   ncode++;
            }
-           else if (o->op_type == OP_CONST && kid->op_type == OP_CONST){
-               SV* sv = cSVOPo->op_sv;
-               SvREADONLY_off(sv);
-               sv_catsv(sv, cSVOPx(kid)->op_sv);
-               SvREADONLY_on(sv);
-               o->op_sibling = kid->op_sibling;
-               kid->op_sibling = NULL;
-               op_free(kid);
+           pRExC_state->max_code_index = ncode*2;
+           if (ncode) {
+               Newx(pRExC_state->code_indices, ncode*2, STRLEN);
+               SAVEFREEPV(pRExC_state->code_indices);
            }
-           else
-               o = o->op_sibling;
+           pat = newSVpvn("", 0);
+           SAVEFREESV(pat);
+           if (is_utf8)
+               SvUTF8_on(pat);
+           S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
+       }
+       else {
+           assert(expr->op_type == OP_CONST);
+           pat = cSVOPx_sv(expr);
        }
-       cLISTOPx(expr)->op_last = o;
-       pat = ((SVOP*)(expr->op_type == OP_LIST
-               ? cLISTOPx(expr)->op_first->op_sibling : expr))->op_sv;
     }
     else
        pat = pattern;
@@ -5157,10 +5195,20 @@ Perl_re_op_compile(pTHX_ SV * const pattern, OP *expr, U32 orig_pm_flags)
         -- dmq */
         DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
            "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
-        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pat, plen), &len);
-        xend = exp + len;
-        RExC_orig_utf8 = RExC_utf8 = 1;
-        SAVEFREEPV(exp);
+
+       if (expr && expr->op_type == OP_LIST) {
+           sv_setpvn(pat, "", 0);
+           SvUTF8_on(pat);
+           S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
+           exp = SvPV(pat, plen);
+           xend = exp + plen;
+       }
+       else {
+           exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pat, plen), &len);
+           xend = exp + len;
+           SAVEFREEPV(exp);
+       }
+       RExC_orig_utf8 = RExC_utf8 = 1;
     }
 
 #ifdef TRIE_STUDY_OPT
@@ -5370,6 +5418,11 @@ Perl_re_op_compile(pTHX_ SV * const pattern, OP *expr, U32 orig_pm_flags)
     RExC_emit_start = ri->program;
     RExC_emit = ri->program;
     RExC_emit_bound = ri->program + RExC_size + 1;
+    pRExC_state->code_index = 0;
+    if (expr && expr->op_type == OP_LIST) {
+       assert(cLISTOPx(expr)->op_first->op_type == OP_PUSHMARK);
+       pRExC_state->next_code_or_const = cLISTOPx(expr)->op_first;
+    }
 
     /* Store the count of eval-groups for security checks: */
     RExC_rx->seen_evals = RExC_seen_evals;
@@ -8038,55 +8091,83 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 
                RExC_seen_zerolen++;
                RExC_seen |= REG_SEEN_EVAL;
-               while (count && (c = *RExC_parse)) {
-                   if (c == '\\') {
-                       if (RExC_parse[1])
-                           RExC_parse++;
+
+               if (pRExC_state->max_code_index
+                   && pRExC_state->code_indices[pRExC_state->code_index] ==
+                      (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+                           - RExC_start)
+               ) {
+                   /* this is a pre-compiled literal (?{}) */
+                   assert(pRExC_state->code_index
+                             < pRExC_state->max_code_index);
+                   RExC_parse = RExC_start - 1
+                       + pRExC_state->code_indices[++pRExC_state->code_index];
+                   pRExC_state->code_index++;
+                   if (SIZE_ONLY)
+                       RExC_seen_evals++;
+                   else {
+                       OP *o = pRExC_state->next_code_or_const;
+                       while(! (o->op_type == OP_NULL
+                                   && (o->op_flags & OPf_SPECIAL)))
+                       {
+                           o = o->op_sibling;
+                       }
+                       n = add_data(pRExC_state, 1, "l");
+                       RExC_rxi->data->data[n] = (void*)o->op_next;
+                       pRExC_state->next_code_or_const = o->op_sibling;
                    }
-                   else if (c == '{')
-                       count++;
-                   else if (c == '}')
-                       count--;
-                   RExC_parse++;
                }
-               if (*RExC_parse != ')') {
-                   RExC_parse = s;             
-                   vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
-               }
-               if (!SIZE_ONLY) {
-                   PAD *pad;
-                   OP_4tree *sop, *rop;
-                   SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
-
-                   ENTER;
-                   Perl_save_re_context(aTHX);
-                   rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
-                   sop->op_private |= OPpREFCOUNTED;
-                   /* re_dup will OpREFCNT_inc */
-                   OpREFCNT_set(sop, 1);
-                   LEAVE;
-
-                   n = add_data(pRExC_state, 3, "nop");
-                   RExC_rxi->data->data[n] = (void*)rop;
-                   RExC_rxi->data->data[n+1] = (void*)sop;
-                   RExC_rxi->data->data[n+2] = (void*)pad;
-                   SvREFCNT_dec(sv);
-               }
-               else {                                          /* First pass */
-                   if (PL_reginterp_cnt < ++RExC_seen_evals
-                       && IN_PERL_RUNTIME)
-                       /* No compiled RE interpolated, has runtime
-                          components ===> unsafe.  */
-                       FAIL("Eval-group not allowed at runtime, use re 'eval'");
-                   if (PL_tainting && PL_tainted)
-                       FAIL("Eval-group in insecure regular expression");
-#if PERL_VERSION > 8
-                   if (IN_PERL_COMPILETIME)
-                       PL_cv_has_eval = 1;
-#endif
+               else {
+                   while (count && (c = *RExC_parse)) {
+                       if (c == '\\') {
+                           if (RExC_parse[1])
+                               RExC_parse++;
+                       }
+                       else if (c == '{')
+                           count++;
+                       else if (c == '}')
+                           count--;
+                       RExC_parse++;
+                   }
+                   if (*RExC_parse != ')') {
+                       RExC_parse = s;
+                       vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
+                   }
+                   if (!SIZE_ONLY) {
+                       PAD *pad;
+                       OP_4tree *sop, *rop;
+                       SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
+
+                       ENTER;
+                       Perl_save_re_context(aTHX);
+                       rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
+                       sop->op_private |= OPpREFCOUNTED;
+                       /* re_dup will OpREFCNT_inc */
+                       OpREFCNT_set(sop, 1);
+                       LEAVE;
+
+                       n = add_data(pRExC_state, 3, "nop");
+                       RExC_rxi->data->data[n] = (void*)rop;
+                       RExC_rxi->data->data[n+1] = (void*)sop;
+                       RExC_rxi->data->data[n+2] = (void*)pad;
+                       SvREFCNT_dec(sv);
+                   }
+                   else {                                              /* First pass */
+                       if (PL_reginterp_cnt < ++RExC_seen_evals
+                           && IN_PERL_RUNTIME)
+                           /* No compiled RE interpolated, has runtime
+                              components ===> unsafe.  */
+                           FAIL("Eval-group not allowed at runtime, use re 'eval'");
+                       if (PL_tainting && PL_tainted)
+                           FAIL("Eval-group in insecure regular expression");
+    #if PERL_VERSION > 8
+                       if (IN_PERL_COMPILETIME)
+                           PL_cv_has_eval = 1;
+    #endif
+                   }
                }
-
                nextchar(pRExC_state);
+
                if (is_logical) {
                    ret = reg_node(pRExC_state, LOGICAL);
                    if (!SIZE_ONLY)
@@ -13039,6 +13120,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                SvREFCNT_dec(MUTABLE_SV(new_comppad));
                new_comppad = NULL;
                break;
+           case 'l':
            case 'n':
                break;
             case 'T':          
@@ -13279,6 +13361,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                ((reg_trie_data*)ri->data->data[i])->refcount++;
                OP_REFCNT_UNLOCK;
                /* Fall through */
+           case 'l':
            case 'n':
                d->data[i] = ri->data->data[i];
                break;
index 0fdb005..a9da0c9 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
@@ -535,6 +535,7 @@ END_EXTERN_C
  * The character describes the function of the corresponding .data item:
  *   a - AV for paren_name_list under DEBUGGING
  *   f - start-class data for regstclass optimization
+ *   l - start op for literal (?{EVAL}) item
  *   n - Root of op tree for (?{EVAL}) item
  *   o - Start op for (?{EVAL}) item
  *   p - Pad for (?{EVAL}) item
index bb845a7..f384c4d 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -3125,6 +3125,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                                false: plain (?=foo)
                                true:  used as a condition: (?(?=foo))
                            */
+    PAD* const initial_pad = PL_comppad;
 #ifdef DEBUGGING
     GET_RE_DEBUG_FLAGS_DECL;
 #endif
@@ -4247,7 +4248,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                SV ** const before = SP;
                OP_4tree * const oop = PL_op;
                COP * const ocurcop = PL_curcop;
-               PAD *old_comppad;
+               PAD *old_comppad, *new_comppad;
                char *saved_regeol = PL_regeol;
                struct re_save_state saved_state;
 
@@ -4268,7 +4269,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
 
                n = ARG(scan);
-               PL_op = (OP_4tree*)rexi->data->data[n];
+               if (rexi->data->what[n] == 'l') { /* literal code */
+                   new_comppad = initial_pad; /* the pad of the current sub */
+                   PL_op = (OP_4tree*)rexi->data->data[n];
+               }
+               else {
+                   PL_op = (OP_4tree*)rexi->data->data[n];
+                   new_comppad = (PAD*)rexi->data->data[n + 2];
+               }
                DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, 
                    "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
                /* wrap the call in two SAVECOMPPADs. This ensures that
@@ -4276,8 +4284,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 * accumulated SAVEt_CLEARSV's will be processed with
                 * interspersed SAVEt_COMPPAD's to ensure that lexicals
                 * are cleared in the right pad */
-               SAVECOMPPAD();
-               PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
+               if (PL_comppad == new_comppad)
+                   old_comppad = new_comppad;
+               else {
+                   SAVECOMPPAD();
+                   PAD_SAVE_LOCAL(old_comppad, new_comppad);
+               }
                PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
 
                 if (sv_yes_mark) {
@@ -4297,8 +4309,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
 
                PL_op = oop;
-               SAVECOMPPAD();
-               PAD_RESTORE_LOCAL(old_comppad);
+               if (old_comppad != PL_comppad) {
+                   SAVECOMPPAD();
+                   PAD_RESTORE_LOCAL(old_comppad);
+               }
                PL_curcop = ocurcop;
                PL_regeol = saved_regeol;
                if (!logical) {
index 21dbfcf..e748512 100644 (file)
@@ -331,7 +331,7 @@ Can't use string ("Fred") as a SCALAR ref while "strict refs" in use at - line 8
 use strict 'refs';
 /(?{${"foo"}++})/;
 EXPECT
-Can't use string ("foo") as a SCALAR ref while "strict refs" in use at (re_eval 1) line 1.
+Can't use string ("foo") as a SCALAR ref while "strict refs" in use at - line 3.
 ########
 # [perl #37886] strict 'refs' doesn't apply inside defined
 use strict 'refs';
index 57327cc..e3e4014 100644 (file)
@@ -451,3 +451,10 @@ sub foo {
 EXPECT
 Bareword "FOO" not allowed while "strict subs" in use at - line 5.
 Execution of - aborted due to compilation errors.
+########
+# make sure checks are done within (?{})
+use strict 'subs';
+/(?{FOO})/
+EXPECT
+Bareword "FOO" not allowed while "strict subs" in use at - line 3.
+Execution of - aborted due to compilation errors.
index 0717699..d07e844 100644 (file)
@@ -13,13 +13,13 @@ b1
 b2
 b3
 b4
-b6
-u5
+b6-c
 b7
 u6
+u5-c
 u1
 c3
-c2
+c2-c
 c1
 i1
 i2
@@ -27,15 +27,13 @@ b5
 u2
 u3
 u4
+b6-r
+u5-r
 e2
 e1
                );
 my $expect = ":" . join(":", @expect);
 
-# XXX tmp while re-evals are being doubly compiled:
-$expect =
- ':b1:b2:b3:b4:b6:b6:u5:b7:u6:u5:u1:c3:c2:c2:c1:i1:i2:b5:u2:u3:u4:e2:e1';
-
 fresh_perl_is(<<'SCRIPT', $expect,{switches => [''], stdin => '', stderr => 1 },'Order of execution of special blocks');
 BEGIN {print ":b1"}
 END {print ":e1"}
@@ -49,9 +47,18 @@ UNITCHECK {print ":u1"}
 eval 'BEGIN {print ":b5"}';
 eval 'UNITCHECK {print ":u2"}';
 eval 'UNITCHECK {print ":u3"; UNITCHECK {print ":u4"}}';
-"a" =~ /(?{UNITCHECK {print ":u5"};
-          CHECK {print ":c2"};
-          BEGIN {print ":b6"}})/x;
+"a" =~ /(?{UNITCHECK {print ":u5-c"};
+          CHECK {print ":c2-c"};
+          BEGIN {print ":b6-c"}})/x;
+{
+    use re 'eval';
+    my $runtime = q{
+    (?{UNITCHECK {print ":u5-r"};
+              CHECK {print ":c2-r"};
+              BEGIN {print ":b6-r"}})/
+    };
+    "a" =~ /$runtime/x;
+}
 eval {BEGIN {print ":b7"}};
 eval {UNITCHECK {print ":u6"}};
 eval {INIT {print ":i2"}};
index 262e6f3..57f2fa2 100644 (file)
@@ -22,7 +22,7 @@ BEGIN {
 }
 
 
-plan tests => 214;  # Update this when adding/deleting tests.
+plan tests => 217;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -378,7 +378,7 @@ sub run_tests {
            # the most basic: literal code should be in same scope
            # as the parent
 
-           tok(1,   "A$x" =~ /^A(??{$x})$/, "[$x] literal code");
+           ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code");
 
            # the "don't recompile if pattern unchanged" mechanism
            # shouldn't apply to code blocks - recompile every time
@@ -477,6 +477,16 @@ sub run_tests {
                                "[$x-$yy] literal qr + r6 +lit, outside");
            }
        }
+
+       # recursive subs should get lexical from the correct pad depth
+
+       sub recurse {
+           my ($n) = @_;
+           return if $n > 2;
+           ok("A$n" =~ /^A(??{$n})$/, "recurse($n)");
+           recurse($n+1);
+       }
+       recurse(0);
     }
 
 } # End of sub run_tests
index 6952f30..60b8a34 100644 (file)
@@ -537,7 +537,8 @@ a(?{})b     cabd    y       $&      ab
 a(?{f()+       -       c       -       Missing right curly or square bracket
 a(?{{1}+       -       c       -       Missing right curly or square bracket
 a(?{}})b       -       c       -       
-a(?{"{"})b     -       c       -       Sequence (?{...}) not terminated or not {}-balanced
+# XXX tmp disable this test - works for // but not qr// yet
+#a(?{"{"})b    ab      y       -       -
 a(?{"\{"})b    cabd    y       $&      ab
 a(?{"{"}})b    -       c       -       Sequence (?{...}) not terminated with ')'
 a(?{$::bl="\{"}).b     caxbd   y       $::bl   {
index 00e7d99..6ba9e77 100644 (file)
@@ -23,8 +23,6 @@ fresh_perl_is <<'CODE', '781745', {}, '(?{}) has its own lexical scope';
  print $x,$a,$b;
 CODE
 
-on;
-
 fresh_perl_is <<'CODE',
  for my $x("a".."c") {
   $y = 1;
@@ -44,8 +42,6 @@ CODE
   {},
  'multiple (?{})s in loop with lexicals';
 
-off;
-
 fresh_perl_is <<'CODE', '781745', {}, 'run-time re-eval has its own scope';
  use re qw(eval);
  my $x = 7;  my $a = 4; my $b = 5;
index e1ffc1b..3e56a09 100644 (file)
@@ -349,9 +349,8 @@ sub foo { local $_ = shift; @_ = split; @_ }
 @x = foo(' x  y  z ');
 print "you die joe!\n" unless "@x" eq 'x y z';
 ########
-/(?{"{"})/     # Check it outside of eval too
+"A" =~ /(?{"{"})/      # Check it outside of eval too
 EXPECT
-Sequence (?{...}) not terminated or not {}-balanced in regex; marked by <-- HERE in m/(?{ <-- HERE "{"})/ at - line 1.
 ########
 /(?{"{"}})/    # Check it outside of eval too
 EXPECT