This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
preserve code blocks in interpolated qr//s
[perl5.git] / t / re / pat_re_eval.t
index 5a79942..e53f108 100644 (file)
@@ -22,7 +22,7 @@ BEGIN {
 }
 
 
-plan tests => 127;  # Update this when adding/deleting tests.
+plan tests => 245;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -59,7 +59,8 @@ sub run_tests {
 
         no re "eval";
         undef $@;
-        my $match = eval { /$a$c$a/ };
+        my $d = '(?{1})';
+        my $match = eval { /$a$c$a$d/ };
         ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message);
         is($b, '14', $message);
 
@@ -353,6 +354,232 @@ sub run_tests {
        ok('ja' =~ /^\L(??{"$B\Ea"})$/,  '^\L(??{"$B\Ea"})$');
     }
 
+    {
+       # Comprehensive (hopefully) tests of closure behaviour:
+       # i.e. when do (?{}) blocks get (re)compiled, and what instances
+       # of lexical vars do they close over?
+
+       # XXX remove this when TODOs are fixed
+       # like ok, but 1st arg indicates TODO
+       sub tok($$$) {
+           my $todo = shift;
+           local $::TODO = 're_eval lexical madness' if $todo;
+           ok($_[0], $_[1]);
+       }
+
+       # XXX remove this when TODOs are fixed
+       no warnings qw(uninitialized closure);
+
+       # if the pattern string gets utf8 upgraded while concatenating,
+       # make sure a literal code block is still detected (by still
+       # compiling in the absence of use re 'eval')
+
+       {
+           my $s1 = "\x{80}";
+           my $s2 = "\x{100}";
+           ok("\x{80}\x{100}" =~ /^$s1(?{1})$s2$/, "utf8 upgrade");
+       }
+
+       my ($cr1, $cr2, $cr3, $cr4);
+
+       use re 'eval';
+       for my $x (qw(a b c)) {
+           my $bc = ($x ne 'a');
+           my $c80 = chr(0x80);
+
+           # the most basic: literal code should be in same scope
+           # as the parent
+
+           ok("A$x"       =~ /^A(??{$x})$/,       "[$x] literal code");
+           ok("\x{100}$x" =~ /^\x{100}(??{$x})$/, "[$x] literal code UTF8");
+
+           # the "don't recompile if pattern unchanged" mechanism
+           # shouldn't apply to code blocks - recompile every time
+           # to pick up new instances of variables
+
+           my $code1  = 'B(??{$x})';
+           my $code1u = $c80 . "\x{100}" . '(??{$x})';
+           tok($bc, "AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
+           tok($bc, "A$c80\x{100}$x" =~ /^A$code1u$/,
+                                       "[$x] unvarying runtime code AU");
+           tok($bc, "$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
+                                       "[$x] unvarying runtime code UA");
+           tok($bc, "$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
+                                       "[$x] unvarying runtime code UU");
+
+           # mixed literal and run-time code blocks
+
+           my $code2  = 'B(??{$x})';
+           my $code2u = $c80 . "\x{100}" . '(??{$x})';
+           tok($bc, "A$x-B$x" =~ /^A(??{$x})-$code2$/,
+                                       "[$x] literal+runtime AA");
+           tok($bc, "A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
+                                       "[$x] literal+runtime AU");
+           tok($bc, "$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
+                                       "[$x] literal+runtime UA");
+           tok($bc, "$c80\x{101}$x-$c80\x{100}$x"
+                                       =~ /^$c80\x{101}(??{$x})-$code2u$/,
+                                       "[$x] literal+runtime UU");
+
+           # literal qr code only created once, naked
+
+           $cr1 //= qr/^A(??{$x})$/;
+           ok("Aa" =~ $cr1, "[$x] literal qr once naked");
+
+           # literal qr code only created once, embedded with text
+
+           $cr2 //= qr/B(??{$x})$/;
+           ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text");
+
+           # literal qr code only created once, embedded with text + lit code
+
+           $cr3 //= qr/C(??{$x})$/;
+           ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/,
+                           "[$x] literal qr once embedded text + lit code");
+
+           # literal qr code only created once, embedded with text + run code
+
+           $cr4 //= qr/C(??{$x})$/;
+           my $code3 = 'A(??{$x})';
+           tok(1,   "A$x-BCa" =~ /^A$code3-B$cr4/,
+                           "[$x] literal qr once embedded text + run code");
+
+           # literal qr code, naked
+
+           my $r1 = qr/^A(??{$x})$/;
+           ok("A$x" =~ $r1, "[$x] literal qr naked");
+
+           # literal qr code, embedded with text
+
+           my $r2 = qr/B(??{$x})$/;
+           tok($bc, "AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
+
+           # literal qr code, embedded with text + lit code
+
+           my $r3 = qr/C(??{$x})$/;
+           tok($bc, "A$x-BC$x" =~ /^A(??{$x})-B$r3/,
+                               "[$x] literal qr embedded text + lit code");
+
+           # literal qr code, embedded with text + run code
+
+           my $r4 = qr/C(??{$x})$/;
+           my $code4 = '(??{$x})';
+           tok($bc, "A$x-BC$x" =~ /^A$code4-B$r4/,
+                               "[$x] literal qr embedded text + run code");
+
+           # nested qr in different scopes
+
+           my $code5 = '(??{$x})';
+           my $r5 = qr/C(??{$x})$/;
+           my $r6 = qr/$code5-C(??{$x})$/;
+
+           my @rr5;
+           my @rr6;
+
+           for my $y (qw(d e f)) {
+
+               my $rr5 = qr/^A(??{"$x$y"})-$r5/;
+               push @rr5, $rr5;
+               tok($bc, "A$x$y-C$x" =~ $rr5,
+                               "[$x-$y] literal qr + r5");
+
+               my $rr6 = qr/^A(??{"$x$y"})-$r6/;
+               push @rr6, $rr6;
+               tok($bc, "A$x$y-$x-C$x" =~ $rr6,
+                               "[$x-$y] literal qr + r6");
+           }
+
+           for my $i (0,1,2) {
+               my $y = 'Y';
+               my $yy = (qw(d e f))[$i];
+               my $rr5 = $rr5[$i];
+               tok($bc, "A$x$yy-C$x" =~ $rr5,
+                               "[$x-$yy] literal qr + r5, outside");
+               tok(1,               "A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})/,
+                               "[$x-$yy] literal qr + r5 + lit, outside");
+
+               my $rr6 = $rr6[$i];
+               push @rr6, $rr6;
+               tok($bc, "A$x$yy-$x-C$x" =~ $rr6,
+                               "[$x-$yy] literal qr + r6, outside");
+               tok(1,               "A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
+                               "[$x-$yy] literal qr + r6 +lit, outside");
+           }
+       }
+
+       # recursive subs should get lexical from the correct pad depth
+
+       sub recurse {
+           my ($n) = @_;
+           return if $n > 2;
+           ok("A$n" =~ /^A(??{$n})$/, "recurse($n)");
+           recurse($n+1);
+       }
+       recurse(0);
+
+       # for qr// containing run-time elements but with a compile-time
+       # code block, make sure the run-time bits are executed in the same
+       # pad they were compiled in
+       {
+           my $a = 'a'; # ensure outer and inner pads don't align
+           my $b = 'b';
+           my $c = 'c';
+           my $d = 'd';
+           my $r = qr/^$b(??{$c})$d$/;
+           ok("bcd" =~ $r, "qr with run-time elements and code block");
+       }
+
+       # check that cascaded embedded regexes all see their own lexical
+       # environment
+
+       {
+           my ($r1, $r2, $r3, $r4);
+           my ($x1, $x2, $x3, $x4) = (5,6,7,8);
+           { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
+           { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; }
+           { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
+           { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; }
+           ok("A1234" =~ /^$r4$/, "cascaded qr");
+       }
+
+       # and again, but in a loop, with no external references
+       # being maintained to the qr's
+
+       {
+           my $r = 'A';
+           for my $x (1..4) {
+               $r = qr/$r(??{$x})/;
+           }
+           my $x = 5;
+           ok("A1234" =~ /^$r$/, "cascaded qr loop");
+       }
+
+
+       # and again, but compiling the qrs in an eval so there
+       # aren't even refs to the qrs from any ops
+
+       {
+           my $r = 'A';
+           for my $x (1..4) {
+               $r = eval q[ qr/$r(??{$x})/; ];
+           }
+           my $x = 5;
+           ok("A1234" =~ /^$r$/, "cascaded qr loop");
+       }
+
+       # forward declared subs should Do The Right Thing with any anon CVs
+       # within them (i.e. pad_fixup_inner_anons() should work)
+
+       sub forward;
+       sub forward {
+           my $x = "a";
+           my $A = "A";
+           ok("Aa" =~ qr/^A(??{$x})$/,  "forward qr compiletime");
+           ok("Aa" =~ qr/^$A(??{$x})$/, "forward qr runtime");
+       }
+       forward;
+    }
+
 } # End of sub run_tests
 
 1;