This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
preserve code blocks in interpolated qr//s
authorDavid Mitchell <davem@iabyn.com>
Wed, 30 Nov 2011 13:40:15 +0000 (13:40 +0000)
committerDavid Mitchell <davem@iabyn.com>
Wed, 13 Jun 2012 12:32:46 +0000 (13:32 +0100)
This now works:

    { my $x = 1; $r = qr/(??{$x})/ }
    my $x = 2;
    print "ok\n" if "1" =~ /^$r$/;

When a qr// is interpolated into another pattern, the pattern is still
recompiled using the stringified qr, but now the pre-compiled code blocks
from the qr are reused rather than being re-compiled, so it behaves like a
closure.

Note that this makes some tests in regexp_qr_embed_thr.t fail, due to a
pre-existing threads bug, which can be summarised as:

    use threads;
    my $s = threads->new(sub { return sub { $::x = 1} })->join;
    $s->();
    print "\$::x=[$::x]\n";

which prints undef, not 1, since the *::x is cloned into the child thread,
then cloned back into the parent as part of the CV (linked from the pad)
being returned in the join. The cloning/join code isn't clever enough
to realise that the back-cloned *::x is the same as the original *::x, so
the main thread ends up with two copies.

This manifests itself in the re tests as

    my $re = threads->new( sub { qr/(?{$::x = 1 })/ })->join();

where, since the returned qr// is now a closure, it suffers from the same
glob duplication in the parent.

So I've disabled 4 re_tests tests under threads for now.

regcomp.c
regcomp.h
regexec.c
regexp.h
t/re/pat_re_eval.t
t/re/re_tests
t/re/reg_eval_scope.t

index 4463c11..802c722 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -5184,13 +5184,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
            for (svp = patternp; svp < patternp + pat_count; svp++) {
                SV *sv, *msv = *svp;
+               SV *rx;
                bool code = 0;
                if (o) {
                    if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+                       assert(n < pRExC_state->num_code_blocks);
+                       pRExC_state->code_blocks[n].start = SvCUR(pat);
+                       pRExC_state->code_blocks[n].block = o;
+                       pRExC_state->code_blocks[n].src_regex = NULL;
                        n++;
-                       assert(n <= pRExC_state->num_code_blocks);
-                       pRExC_state->code_blocks[n-1].start = SvCUR(pat);
-                       pRExC_state->code_blocks[n-1].block = o;
                        code = 1;
                        o = o->op_sibling; /* skip CONST */
                        assert(o);
@@ -5198,6 +5200,40 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                    o = o->op_sibling;;
                }
 
+               /* extract any code blocks within any embedded qr//'s */
+               rx = msv;
+               if (SvROK(rx))
+                   rx = SvRV(rx);
+               if (SvTYPE(rx) == SVt_REGEXP
+                   && RX_ENGINE((REGEXP*)rx) == RE_ENGINE_PTR)
+               {
+
+                   RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
+                   if (ri->num_code_blocks) {
+                       int i;
+                       Renew(pRExC_state->code_blocks,
+                           pRExC_state->num_code_blocks + ri->num_code_blocks,
+                           struct reg_code_block);
+                       pRExC_state->num_code_blocks += ri->num_code_blocks;
+                       for (i=0; i < ri->num_code_blocks; i++) {
+                           struct reg_code_block *src, *dst;
+                           STRLEN offset =  SvCUR(pat)
+                               + ((struct regexp *)SvANY(rx))->pre_prefix;
+                           assert(n < pRExC_state->num_code_blocks);
+                           src = &ri->code_blocks[i];
+                           dst = &pRExC_state->code_blocks[n];
+                           dst->start      = src->start + offset;
+                           dst->end        = src->end   + offset;
+                           dst->block      = src->block;
+                           dst->src_regex  = (REGEXP*) SvREFCNT_inc( (SV*)
+                                                   src->src_regex
+                                                       ? src->src_regex
+                                                       : (REGEXP*)rx);
+                           n++;
+                       }
+                   }
+               }
+
                if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
                        (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
                {
@@ -5264,6 +5300,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                    assert(i+1 < pRExC_state->num_code_blocks);
                    pRExC_state->code_blocks[++i].start = SvCUR(pat);
                    pRExC_state->code_blocks[i].block = o;
+                   pRExC_state->code_blocks[i].src_regex = NULL;
                    is_code = 1;
                }
            }
@@ -5276,7 +5313,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     exp = SvPV_nomg(pat, plen);
 
-    if (eng && eng != &PL_core_reg_engine) {
+    if (eng && eng != RE_ENGINE_PTR) {
        if ((SvUTF8(pat) && IN_BYTES)
                || SvGMAGICAL(pat) || SvAMAGIC(pat))
        {
@@ -8297,16 +8334,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                            - RExC_start)
                ) {
                    /* this is a pre-compiled literal (?{}) */
-                   RExC_parse = RExC_start +
-                       pRExC_state->code_blocks[pRExC_state->code_index].end;
+                   struct reg_code_block *cb =
+                       &pRExC_state->code_blocks[pRExC_state->code_index];
+                   RExC_parse = RExC_start + cb->end;
                    if (SIZE_ONLY)
                        RExC_seen_evals++;
                    else {
-                       OP *o =
-                        pRExC_state->code_blocks[pRExC_state->code_index].block;
-                       n = add_data(pRExC_state, 1,
+                       OP *o = cb->block;
+                       if (cb->src_regex) {
+                           n = add_data(pRExC_state, 2, "rl");
+                           RExC_rxi->data->data[n] =
+                               (void*)SvREFCNT_inc((SV*)cb->src_regex);
+                       RExC_rxi->data->data[n+1] = (void*)o->op_next;
+                       }
+                       else {
+                           n = add_data(pRExC_state, 1,
                                   (RExC_flags & PMf_HAS_CV) ? "L" : "l");
-                       RExC_rxi->data->data[n] = (void*)o->op_next;
+                           RExC_rxi->data->data[n] = (void*)o->op_next;
+                       }
                    }
                    pRExC_state->code_index++;
                }
@@ -13277,8 +13322,12 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
     if (ri->u.offsets)
         Safefree(ri->u.offsets);             /* 20010421 MJD */
 #endif
-    if (ri->code_blocks)
+    if (ri->code_blocks) {
+       int n;
+       for (n = 0; n < ri->num_code_blocks; n++)
+           SvREFCNT_dec(ri->code_blocks[n].src_regex);
        Safefree(ri->code_blocks);
+    }
 
     if (ri->data) {
        int n = ri->data->count;
@@ -13290,6 +13339,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
           /* If you add a ->what type here, update the comment in regcomp.h */
            switch (ri->data->what[n]) {
            case 'a':
+           case 'r':
            case 's':
            case 'S':
            case 'u':
@@ -13512,10 +13562,14 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
 
     reti->num_code_blocks = ri->num_code_blocks;
     if (ri->code_blocks) {
+       int n;
        Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block,
                struct reg_code_block);
        Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks,
                struct reg_code_block);
+       for (n = 0; n < ri->num_code_blocks; n++)
+            reti->code_blocks[n].src_regex = (REGEXP*)
+                   sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param);
     }
     else
        reti->code_blocks = NULL;
@@ -13538,6 +13592,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
                /* legal options are one of: sSfpontTua
                   see also regcomp.h and pregfree() */
            case 'a': /* actually an AV, but the dup function is identical.  */
+           case 'r':
            case 's':
            case 'S':
            case 'p': /* actually an AV, but the dup function is identical.  */
index d7261db..f130734 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
@@ -542,6 +542,7 @@ END_EXTERN_C
  *   n - Root of op tree for (?{EVAL}) item
  *   o - Start op for (?{EVAL}) item
  *   p - Pad for (?{EVAL}) item
+ *   r - pointer to an embedded code-containing qr, e.g. /ab$qr/
  *   s - swash for Unicode-style character class, and the multicharacter
  *       strings resulting from casefolding the single-character entries
  *       in the character class
index 3393d8c..642b48f 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -4269,7 +4269,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
 
                n = ARG(scan);
-               if (rexi->data->what[n] == 'l') { /* literal code */
+               if (rexi->data->what[n] == 'r') { /* code from an external qr */
+                   /* XXX assumes pad depth is 1; this isn't necessarily
+                    * the case with recursive qr//'s */
+                   new_comppad = (PAD*)AvARRAY(CvPADLIST(
+                                           ((struct regexp *)SvANY(
+                                               (REGEXP*)(rexi->data->data[n])
+                                           ))->qr_anoncv
+                                       ))[1];
+                   PL_op = (OP_4tree*)rexi->data->data[n+1];
+               }
+               else if (rexi->data->what[n] == 'l') { /* literal code */
                    new_comppad = initial_pad; /* the pad of the current sub */
                    PL_op = (OP_4tree*)rexi->data->data[n];
                }
index 29f11c0..8b19055 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -70,6 +70,7 @@ struct reg_code_block {
     STRLEN start;
     STRLEN end;
     OP     *block;
+    REGEXP *src_regex;
 };
 
 
index 2210a1b..e53f108 100644 (file)
@@ -22,7 +22,7 @@ BEGIN {
 }
 
 
-plan tests => 242;  # Update this when adding/deleting tests.
+plan tests => 245;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -59,7 +59,8 @@ sub run_tests {
 
         no re "eval";
         undef $@;
-        my $match = eval { /$a$c$a/ };
+        my $d = '(?{1})';
+        my $match = eval { /$a$c$a$d/ };
         ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message);
         is($b, '14', $message);
 
@@ -528,6 +529,44 @@ sub run_tests {
            ok("bcd" =~ $r, "qr with run-time elements and code block");
        }
 
+       # check that cascaded embedded regexes all see their own lexical
+       # environment
+
+       {
+           my ($r1, $r2, $r3, $r4);
+           my ($x1, $x2, $x3, $x4) = (5,6,7,8);
+           { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
+           { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; }
+           { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
+           { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; }
+           ok("A1234" =~ /^$r4$/, "cascaded qr");
+       }
+
+       # and again, but in a loop, with no external references
+       # being maintained to the qr's
+
+       {
+           my $r = 'A';
+           for my $x (1..4) {
+               $r = qr/$r(??{$x})/;
+           }
+           my $x = 5;
+           ok("A1234" =~ /^$r$/, "cascaded qr loop");
+       }
+
+
+       # and again, but compiling the qrs in an eval so there
+       # aren't even refs to the qrs from any ops
+
+       {
+           my $r = 'A';
+           for my $x (1..4) {
+               $r = eval q[ qr/$r(??{$x})/; ];
+           }
+           my $x = 5;
+           ok("A1234" =~ /^$r$/, "cascaded qr loop");
+       }
+
        # forward declared subs should Do The Right Thing with any anon CVs
        # within them (i.e. pad_fixup_inner_anons() should work)
 
index 21d3989..708c9ed 100644 (file)
@@ -537,11 +537,10 @@ a(?{})b   cabd    y       $&      ab
 a(?{f()+       -       c       -       Missing right curly or square bracket
 a(?{{1}+       -       c       -       Missing right curly or square bracket
 a(?{}})b       -       c       -       
-# XXX tmp disable this test - doesn't work for embedded qr// yet
-#a(?{"{"})b    ab      y       -       -
+a(?{"{"})b     ab      y       -       -
 a(?{"\{"})b    cabd    y       $&      ab
 a(?{"{"}})b    -       c       -       Sequence (?{...}) not terminated with ')'
-a(?{$::bl="\{"}).b     caxbd   y       $::bl   {
+a(?{$::bl="\{"}).b     caxbd   t       $::bl   {
 x(~~)*(?:(?:F)?)?      x~~     y       -       -
 ^a(?#xxx){3}c  aaac    y       $&      aaac
 '^a (?#xxx) (?#yyy) {3}c'x     aaac    y       $&      aaac
@@ -606,8 +605,8 @@ $(?<=^(a))  a       y       $1      a
 ([\w:]+::)?(\w+)$      abcd    y       $1-$2   -abcd
 ([\w:]+::)?(\w+)$      xy:z:::abcd     y       $1-$2   xy:z:::-abcd
 ^[^bcd]*(c+)   aexycd  y       $1      c
-(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a})    yaaxxaaaacd     y       $b      3
-(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a})  yaaxxaaaacd     y       $b      4
+(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a})    yaaxxaaaacd     t       $b      3
+(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a})  yaaxxaaaacd     t       $b      4
 (>a+)ab        aaab    n       -       -
 (?>a+)b        aaab    y       -       -
 ([[:]+)        a:[b]:  y       $1      :[
@@ -851,7 +850,7 @@ abb$        b\nca   n       -       -
 'abb$'m        b\nca   n       -       -
 (^|x)(c)       ca      y       $2      c
 a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz   x       n       -       -
-a(?{$a=2;$b=3;($b)=$a})b       yabz    y       $b      2
+a(?{$a=2;$b=3;($b)=$a})b       yabz    t       $b      2
 round\(((?>[^()]+))\)  _I(round(xs * sz),1)    y       $1      xs * sz
 '((?x:.) )'    x       y       $1-     x -
 '((?-x:.) )'x  x       y       $1-     x-
index 6ba9e77..0a813c6 100644 (file)
@@ -88,7 +88,7 @@ on;
 
 fresh_perl_is <<'CODE', '123123', {},
   for my $x(1..3) {
-   push @regexps = qr/(?{ print $x })a/;
+   push @regexps, qr/(?{ print $x })a/;
   }
  "a" =~ $_ for @regexps;
  "ba" =~ /b$_/ for @regexps;
@@ -102,8 +102,7 @@ is $pack, 'foo', 'qr// inherits package';
 "a" =~ do { use re "/x"; qr/(?{ $::re = qr-- })a/ };
 is $re, '(?^x:)', 'qr// inherits pragmata';
 
-on;
-
+$::pack = '';
 "ba" =~ /b${\do { package baz; qr|(?{ $::pack = __PACKAGE__ })a| }}/;
 is $pack, 'baz', '/text$qr/ inherits package';
 "ba" =~ m+b${\do { use re "/i"; qr|(?{ $::re = qr-- })a| }}+;