This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
force recompiling of regex where closures matter
authorDavid Mitchell <davem@iabyn.com>
Mon, 19 Dec 2011 11:33:07 +0000 (11:33 +0000)
committerDavid Mitchell <davem@iabyn.com>
Wed, 13 Jun 2012 12:32:47 +0000 (13:32 +0100)
There are some cases where on the second run of a run-time regex, the
text of the pattern hasn't changed, but we should still recompile to
ensure that closure behaviour is correct.

These cases are:

1) run-time code:

    my $code = '(??{$x})';
    for my $x (1..3) {
        $x =~ /$code/; # recompile to see fresh value of $x
    }

2) embedded regexes with code:

    for my $x (1..3) {
my $r = qr/(??{$x})/;
        "A$x" =~ /A$r/; # recompile to see new $r
    }

With this fix, all the TODO tests in re/pat_re_eval.t now pass. (Note that
a couple of those TODO tests were actually broken and are fixed in this
commit)

regcomp.c
t/re/pat_re_eval.t
t/re/reg_eval_scope.t

index 021e14e..ebff883 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -5032,7 +5032,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     bool used_setjump = FALSE;
     regex_charset initial_charset = get_regex_charset(orig_rx_flags);
     bool code_is_utf8 = 0;
-
+    bool recompile = 0;
     U8 jump_ret = 0;
     dJMPENV;
     scan_data_t data;
@@ -5211,6 +5211,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                    RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
                    if (ri->num_code_blocks) {
                        int i;
+                       /* the presence of an embedded qr// with code means
+                        * we should always recompile: the text of the
+                        * qr// may not have changed, but it may be a
+                        * different closure than last time */
+                       recompile = 1;
                        Renew(pRExC_state->code_blocks,
                            pRExC_state->num_code_blocks + ri->num_code_blocks,
                            struct reg_code_block);
@@ -5417,17 +5422,40 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* return old regex if pattern hasn't changed */
 
     if (   old_re
+        && !recompile
        && !!RX_UTF8(old_re) == !!RExC_utf8
        && RX_PRECOMP(old_re)
        && RX_PRELEN(old_re) == plen
        && memEQ(RX_PRECOMP(old_re), exp, plen))
     {
-       ReREFCNT_inc(old_re);
-       if (used_setjump) {
-           JMPENV_POP;
+       /* see if there are any run-time code blocks */
+       int n = 0;
+       STRLEN s;
+       bool runtime = 0;
+       for (s = 0; s < plen; s++) {
+           if (n < pRExC_state->num_code_blocks
+               && s == pRExC_state->code_blocks[n].start)
+           {
+               s = pRExC_state->code_blocks[n].end;
+               n++;
+               continue;
+           }
+           if (exp[s] == '(' && exp[s+1] == '?' &&
+               (exp[s+2] == '{' || (exp[s+2] == '?' && exp[s+3] == '{')))
+           {
+               runtime = 1;
+               break;
+           }
+       }
+       /* with runtime code, always recompile */
+       if (!runtime) {
+           ReREFCNT_inc(old_re);
+           if (used_setjump) {
+               JMPENV_POP;
+           }
+           Safefree(pRExC_state->code_blocks);
+           return old_re;
        }
-       Safefree(pRExC_state->code_blocks);
-       return old_re;
     }
 
 #ifdef TRIE_STUDY_OPT
index f4f42bd..0290bc1 100644 (file)
@@ -359,15 +359,6 @@ sub run_tests {
        # i.e. when do (?{}) blocks get (re)compiled, and what instances
        # of lexical vars do they close over?
 
-       # XXX remove this when TODOs are fixed
-       # like ok, but 1st arg indicates TODO
-       sub tok($$$) {
-           my $todo = shift;
-           local $::TODO = 're_eval lexical madness' if $todo;
-           ok($_[0], $_[1]);
-       }
-
-
        # if the pattern string gets utf8 upgraded while concatenating,
        # make sure a literal code block is still detected (by still
        # compiling in the absence of use re 'eval')
@@ -398,25 +389,25 @@ sub run_tests {
 
            my $code1  = 'B(??{$x})';
            my $code1u = $c80 . "\x{100}" . '(??{$x})';
-           tok($bc, "AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
-           tok($bc, "A$c80\x{100}$x" =~ /^A$code1u$/,
+           ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
+           ok("A$c80\x{100}$x" =~ /^A$code1u$/,
                                        "[$x] unvarying runtime code AU");
-           tok($bc, "$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
+           ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
                                        "[$x] unvarying runtime code UA");
-           tok($bc, "$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
+           ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
                                        "[$x] unvarying runtime code UU");
 
            # mixed literal and run-time code blocks
 
            my $code2  = 'B(??{$x})';
            my $code2u = $c80 . "\x{100}" . '(??{$x})';
-           tok($bc, "A$x-B$x" =~ /^A(??{$x})-$code2$/,
+           ok("A$x-B$x" =~ /^A(??{$x})-$code2$/,
                                        "[$x] literal+runtime AA");
-           tok($bc, "A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
+           ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
                                        "[$x] literal+runtime AU");
-           tok($bc, "$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
+           ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
                                        "[$x] literal+runtime UA");
-           tok($bc, "$c80\x{101}$x-$c80\x{100}$x"
+           ok("$c80\x{101}$x-$c80\x{100}$x"
                                        =~ /^$c80\x{101}(??{$x})-$code2u$/,
                                        "[$x] literal+runtime UU");
 
@@ -443,7 +434,7 @@ sub run_tests {
            use re 'eval';
            $cr4 //= qr/C(??{$x})$/;
            my $code3 = 'A(??{$x})';
-           tok(1,   "A$x-BCa" =~ /^A$code3-B$cr4/,
+           ok("A$x-BCa" =~ /^$code3-B$cr4/,
                            "[$x] literal qr once embedded text + run code");
            no re 'eval';
 
@@ -455,12 +446,12 @@ sub run_tests {
            # literal qr code, embedded with text
 
            my $r2 = qr/B(??{$x})$/;
-           tok($bc, "AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
+           ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
 
            # literal qr code, embedded with text + lit code
 
            my $r3 = qr/C(??{$x})$/;
-           tok($bc, "A$x-BC$x" =~ /^A(??{$x})-B$r3/,
+           ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/,
                                "[$x] literal qr embedded text + lit code");
 
            # literal qr code, embedded with text + run code
@@ -468,16 +459,16 @@ sub run_tests {
            no re 'eval';
            my $r4 = qr/C(??{$x})$/;
            my $code4 = '(??{$x})';
-           tok($bc, "A$x-BC$x" =~ /^A$code4-B$r4/,
+           ok("A$x-BC$x" =~ /^A$code4-B$r4/,
                                "[$x] literal qr embedded text + run code");
            use re 'eval';
 
            # nested qr in different scopes
 
            my $code5 = '(??{$x})';
-           my $r5 = qr/C(??{$x})$/;
+           my $r5 = qr/C(??{$x})/;
            use re 'eval';
-           my $r6 = qr/$code5-C(??{$x})$/;
+           my $r6 = qr/$code5-C(??{$x})/;
            no re 'eval';
 
            my @rr5;
@@ -487,12 +478,12 @@ sub run_tests {
 
                my $rr5 = qr/^A(??{"$x$y"})-$r5/;
                push @rr5, $rr5;
-               tok($bc, "A$x$y-C$x" =~ $rr5,
+               ok("A$x$y-C$x" =~ $rr5,
                                "[$x-$y] literal qr + r5");
 
                my $rr6 = qr/^A(??{"$x$y"})-$r6/;
                push @rr6, $rr6;
-               tok($bc, "A$x$y-$x-C$x" =~ $rr6,
+               ok("A$x$y-$x-C$x" =~ $rr6,
                                "[$x-$y] literal qr + r6");
            }
 
@@ -500,16 +491,16 @@ sub run_tests {
                my $y = 'Y';
                my $yy = (qw(d e f))[$i];
                my $rr5 = $rr5[$i];
-               tok($bc, "A$x$yy-C$x" =~ $rr5,
-                               "[$x-$yy] literal qr + r5, outside");
-               tok(1,               "A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})/,
+               ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside");
+               ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/,
                                "[$x-$yy] literal qr + r5 + lit, outside");
 
+
                my $rr6 = $rr6[$i];
                push @rr6, $rr6;
-               tok($bc, "A$x$yy-$x-C$x" =~ $rr6,
+               ok("A$x$yy-$x-C$x" =~ $rr6,
                                "[$x-$yy] literal qr + r6, outside");
-               tok(1,               "A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
+               ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
                                "[$x-$yy] literal qr + r6 +lit, outside");
            }
        }
index 0a813c6..5873a3d 100644 (file)
@@ -84,8 +84,6 @@ fresh_perl_is <<'CODE', '178279371047857967101745', {},
 CODE
  'multiple (?{})s in "foo" =~ /$string/x';
 
-on;
-
 fresh_perl_is <<'CODE', '123123', {},
   for my $x(1..3) {
    push @regexps, qr/(?{ print $x })a/;
@@ -95,8 +93,6 @@ fresh_perl_is <<'CODE', '123123', {},
 CODE
  'qr/(?{})/ is a closure';
 
-off;
-
 "a" =~ do { package foo; qr/(?{ $::pack = __PACKAGE__ })a/ };
 is $pack, 'foo', 'qr// inherits package';
 "a" =~ do { use re "/x"; qr/(?{ $::re = qr-- })a/ };