3 # This is a home for regular expression tests that don't fit into
4 # the format supported by re/regexp.t. If you want to add a test
5 # that does fit that format, add it to re/re_tests, not here.
20 require './test.pl'; require './charset_tools.pl';
26 plan tests => 502; # Update this when adding/deleting tests.
28 run_tests() unless caller;
30 # test that runtime code without 'use re eval' is trapped
33 like($@, qr/Eval-group not allowed at runtime/, @_);
41 my $message = "Call code from qr //";
42 local $_ = 'var="foo"';
45 ok(/$a$a/ && $b eq '9', $message);
48 ok(/$a$a/ && $b eq '11', $message);
52 norun("$message norun 1");
57 norun("$message norun 2");
60 is($b, '14', $message);
66 my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/);
68 is($lex_res, 1, $message);
69 is($lex_a, 44, $message);
70 is($lex_c, 43, $message);
74 my $match = eval { /$a$c$a$d/ };
75 ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message);
76 is($b, '14', $message);
82 $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/);
84 is($lex_res, 1, $message);
85 is($lex_a, 44, $message);
86 is($lex_c, 43, $message);
91 our $a = bless qr /foo/ => 'Foo';
92 ok 'goodfood' =~ $a, "Reblessed qr // matches";
93 is($a, '(?^:foo)', "Reblessed qr // stringifies");
95 my $z = my $y = byte_utf8a_to_utf8n("\317\276"); # Byte representation
98 ok $x =~ $a, "UTF-8 interpolation in qr //";
99 ok "a$a" =~ $x, "Stringified qr // preserves UTF-8";
100 ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8";
101 ok "a$x" =~ /^a(??{$a})\z/,
102 "Postponed interpolation of qr // preserves UTF-8";
105 is(length qr /##/x, 9, "## in qr // doesn't corrupt memory; Bug 17776");
108 ok "$x$x" =~ /^$x(??{$x})\z/,
109 "Postponed UTF-8 string in UTF-8 re matches UTF-8";
110 ok "$y$x" =~ /^$y(??{$x})\z/,
111 "Postponed UTF-8 string in non-UTF-8 re matches UTF-8";
112 ok "$y$x" !~ /^$y(??{$y})\z/,
113 "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8";
114 ok "$x$x" !~ /^$x(??{$y})\z/,
115 "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8";
116 ok "$y$y" =~ /^$y(??{$y})\z/,
117 "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8";
118 ok "$x$y" =~ /^$x(??{$y})\z/,
119 "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8";
121 $y = $z; # Reset $y after upgrade.
122 ok "$x$y" !~ /^$x(??{$x})\z/,
123 "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8";
124 ok "$y$y" !~ /^$y(??{$x})\z/,
125 "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8";
131 # Test if $^N and $+ work in (?{})
139 push @ctl_n, (defined $^N ? $^N : "undef");
140 push @plus, (defined $+ ? $+ : "undef");
151 # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+ ]
152 [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ],
153 [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ],
154 [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ],
155 [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ],
156 [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ],
157 [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
158 [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
159 [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ],
160 [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
161 [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
162 [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
163 [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
164 [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ],
166 ) { #"#silence vim highlighting
170 my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0);
171 push @ctl_n, (defined $^N ? $^N : "undef");
172 push @plus, (defined $+ ? $+ : "undef");
173 ok($test->[0] == $match, "match $c");
174 if ($test->[0] != $match) {
175 # unset @ctl_n and @plus
178 is("@ctl_n", $test->[2], "ctl_n $c");
179 is("@plus", $test->[3], "plus $c");
187 defined $_[0] ? $_[0] : "undef";
190 like("123", qr/^(\d)(((??{1 + $^N})))+$/, 'Bug 56194');
195 my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#;
196 my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#;
197 my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#;
199 local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#;
200 my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#;
201 my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#;
209 # Expected values of $^N
210 # Expected values of $+
211 # Expected values of $1, $2, $3, $4 and $5
215 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#,
218 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
222 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#,
225 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
229 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#,
232 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
236 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#,
239 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
243 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#,
246 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
250 qr#^($re)(|a(b)c|def)(??{$^R})$#,
253 "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
260 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
267 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
271 qr#^(??{$re5})(|abc|def)(??{"$^R"})$#,
274 "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef",
278 qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#,
281 "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef",
285 qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#,
286 "1234 123 12 1 2 3 1234",
287 "1234 123 12 1 2 3 4",
288 "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4",
292 qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#,
293 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56",
294 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5",
295 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56",
299 qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#,
300 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62",
301 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2",
302 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5",
309 my $match = $test->[0] =~ $test->[1];
310 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5));
311 push @ctl_n, $f->($^N);
312 push @plus, $f->($+);
313 ok($match, "match $c; Bug 56194");
315 # unset $str, @ctl_n and @plus
319 is("@ctl_n", $test->[2], "ctl_n $c; Bug 56194");
320 is("@plus", $test->[3], "plus $c; Bug 56194");
321 is($str, $test->[4], "str $c; Bug 56194");
329 local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#;
331 my $match = "123abc3" =~ m/^(??{$re4})$/;
332 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R));
333 push @ctl_n, $f->($^N);
334 push @plus, $f->($+);
335 ok($match, 'Bug 56194');
342 is("@ctl_n", "1 2 undef", 'Bug 56194');
343 is("@plus", "1 2 undef", 'Bug 56194');
345 "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = 3",
346 'Bug 56194 ($^R tweaked by 121070)');
350 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })/;
351 is("$^R", 42, 'Bug 121070 - use of (?&Char) should not clobber $^R');
352 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })(?{ 43 })/;
353 is("$^R", 43, 'related to 121070 - use of (?&Char) should not clobber $^R');
358 # re evals within \U, \Q etc shouldn't be seen by the lexer
361 ok('(?{1})' =~ /^\Q(?{1})\E$/, '\Q(?{1})\E');
362 ok('(?{1})' =~ /^\Q(?{\E1\}\)$/, '\Q(?{\E1\}\)');
363 eval {/^\U(??{"$a\Ea"})$/ }; norun('^\U(??{"$a\Ea"})$ norun');
364 eval {/^\L(??{"$B\Ea"})$/ }; norun('^\L(??{"$B\Ea"})$ norun');
366 ok('Ia' =~ /^\U(??{"$a\Ea"})$/, '^\U(??{"$a\Ea"})$');
367 ok('ja' =~ /^\L(??{"$B\Ea"})$/, '^\L(??{"$B\Ea"})$');
371 # Comprehensive (hopefully) tests of closure behaviour:
372 # i.e. when do (?{}) blocks get (re)compiled, and what instances
373 # of lexical vars do they close over?
375 # if the pattern string gets utf8 upgraded while concatenating,
376 # make sure a literal code block is still detected (by still
377 # compiling in the absence of use re 'eval')
382 ok("\x{80}\x{100}" =~ /^$s1(?{1})$s2$/, "utf8 upgrade");
385 my ($cr1, $cr2, $cr3, $cr4);
387 for my $x (qw(a b c)) {
388 my $bc = ($x ne 'a');
391 # the most basic: literal code should be in same scope
394 ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code");
395 ok("\x{100}$x" =~ /^\x{100}(??{$x})$/, "[$x] literal code UTF8");
397 # the "don't recompile if pattern unchanged" mechanism
398 # shouldn't apply to code blocks - recompile every time
399 # to pick up new instances of variables
401 my $code1 = 'B(??{$x})';
402 my $code1u = $c80 . "\x{100}" . '(??{$x})';
405 norun("[$x] unvarying runtime code AA norun");
407 norun("[$x] unvarying runtime code AU norun");
408 eval {/^$c80\x{100}$code1$/};
409 norun("[$x] unvarying runtime code UA norun");
410 eval {/^$c80\x{101}$code1u$/};
411 norun("[$x] unvarying runtime code UU norun");
415 ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
416 ok("A$c80\x{100}$x" =~ /^A$code1u$/,
417 "[$x] unvarying runtime code AU");
418 ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
419 "[$x] unvarying runtime code UA");
420 ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
421 "[$x] unvarying runtime code UU");
424 # mixed literal and run-time code blocks
426 my $code2 = 'B(??{$x})';
427 my $code2u = $c80 . "\x{100}" . '(??{$x})';
429 eval {/^A(??{$x})-$code2$/};
430 norun("[$x] literal+runtime AA norun");
431 eval {/^A(??{$x})-$code2u$/};
432 norun("[$x] literal+runtime AU norun");
433 eval {/^$c80\x{100}(??{$x})-$code2$/};
434 norun("[$x] literal+runtime UA norun");
435 eval {/^$c80\x{101}(??{$x})-$code2u$/};
436 norun("[$x] literal+runtime UU norun");
440 ok("A$x-B$x" =~ /^A(??{$x})-$code2$/,
441 "[$x] literal+runtime AA");
442 ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
443 "[$x] literal+runtime AU");
444 ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
445 "[$x] literal+runtime UA");
446 ok("$c80\x{101}$x-$c80\x{100}$x"
447 =~ /^$c80\x{101}(??{$x})-$code2u$/,
448 "[$x] literal+runtime UU");
451 # literal qr code only created once, naked
453 $cr1 //= qr/^A(??{$x})$/;
454 ok("Aa" =~ $cr1, "[$x] literal qr once naked");
456 # literal qr code only created once, embedded with text
458 $cr2 //= qr/B(??{$x})$/;
459 ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text");
461 # literal qr code only created once, embedded with text + lit code
463 $cr3 //= qr/C(??{$x})$/;
464 ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/,
465 "[$x] literal qr once embedded text + lit code");
467 # literal qr code only created once, embedded with text + run code
469 $cr4 //= qr/C(??{$x})$/;
470 my $code3 = 'A(??{$x})';
472 eval {/^$code3-B$cr4/};
473 norun("[$x] literal qr once embedded text + run code norun");
476 ok("A$x-BCa" =~ /^$code3-B$cr4/,
477 "[$x] literal qr once embedded text + run code");
480 # literal qr code, naked
482 my $r1 = qr/^A(??{$x})$/;
483 ok("A$x" =~ $r1, "[$x] literal qr naked");
485 # literal qr code, embedded with text
487 my $r2 = qr/B(??{$x})$/;
488 ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
490 # literal qr code, embedded with text + lit code
492 my $r3 = qr/C(??{$x})$/;
493 ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/,
494 "[$x] literal qr embedded text + lit code");
496 # literal qr code, embedded with text + run code
498 my $r4 = qr/C(??{$x})$/;
499 my $code4 = '(??{$x})';
501 eval {/^A$code4-B$r4/};
502 norun("[$x] literal qr embedded text + run code");
505 ok("A$x-BC$x" =~ /^A$code4-B$r4/,
506 "[$x] literal qr embedded text + run code");
509 # nested qr in different scopes
511 my $code5 = '(??{$x})';
512 my $r5 = qr/C(??{$x})/;
515 eval {qr/$code5-C(??{$x})/}; norun("r6 norun");
518 $r6 = qr/$code5-C(??{$x})/;
524 for my $y (qw(d e f)) {
526 my $rr5 = qr/^A(??{"$x$y"})-$r5/;
528 ok("A$x$y-C$x" =~ $rr5,
529 "[$x-$y] literal qr + r5");
531 my $rr6 = qr/^A(??{"$x$y"})-$r6/;
533 ok("A$x$y-$x-C$x" =~ $rr6,
534 "[$x-$y] literal qr + r6");
539 my $yy = (qw(d e f))[$i];
541 ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside");
542 ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/,
543 "[$x-$yy] literal qr + r5 + lit, outside");
548 ok("A$x$yy-$x-C$x" =~ $rr6,
549 "[$x-$yy] literal qr + r6, outside");
550 ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
551 "[$x-$yy] literal qr + r6 +lit, outside");
555 # recursive subs should get lexical from the correct pad depth
560 ok("A$n" =~ /^A(??{$n})$/, "recurse($n)");
565 # for qr// containing run-time elements but with a compile-time
566 # code block, make sure the run-time bits are executed in the same
567 # pad they were compiled in
569 my $a = 'a'; # ensure outer and inner pads don't align
573 my $r = qr/^$b(??{$c})$d$/;
574 ok("bcd" =~ $r, "qr with run-time elements and code block");
577 # check that cascaded embedded regexes all see their own lexical
581 my ($r1, $r2, $r3, $r4);
582 my ($x1, $x2, $x3, $x4) = (5,6,7,8);
583 { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
584 { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; }
585 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
586 { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; }
587 ok("A1234" =~ /^$r4$/, "cascaded qr");
590 # and again, but in a loop, with no external references
591 # being maintained to the qr's
599 ok("A1234" =~ /^$r$/, "cascaded qr loop");
603 # and again, but compiling the qrs in an eval so there
604 # aren't even refs to the qrs from any ops
609 $r = eval q[ qr/$r(??{$x})/; ];
612 ok("A1234" =~ /^$r$/, "cascaded qr loop");
615 # have qrs with either literal code blocks or only embedded
616 # code blocks, but not both
619 my ($r1, $r2, $r3, $r4);
620 my ($x1, $x3) = (7,8);
621 { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
622 { $r2 = qr/${r1}2/; }
623 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
624 { $r4 = qr/${r3}4/; }
625 ok("A1234" =~ /^$r4$/, "cascaded qr mix 1");
626 ok("A12345" =~ /^${r4}5$/, "cascaded qr mix 2");
627 ok("A1234" =~ qr/^$r4$/ , "cascaded qr mix 3");
628 ok("A12345" =~ qr/^${r4}5$/, "cascaded qr mix 4");
631 # and make sure things are freed at the right time
633 sub Foo99::DESTROY { $Foo99::d++ }
637 my $x = bless [1], 'Foo99';
638 $r1 = eval 'qr/(??{$x->[0]})/';
640 my $r2 = eval 'qr/a$r1/';
642 ok(eval '"a1" =~ qr/^$r2$/', "match while in scope");
643 # make sure PL_reg_curpm isn't holding on to anything
645 is($Foo99::d, 0, "before scope exit");
647 ::is($Foo99::d, 1, "after scope exit");
649 # forward declared subs should Do The Right Thing with any anon CVs
650 # within them (i.e. pad_fixup_inner_anons() should work)
656 ok("Aa" =~ qr/^A(??{$x})$/, "forward qr compiletime");
657 ok("Aa" =~ qr/^$A(??{$x})$/, "forward qr runtime");
662 # test that run-time embedded code, when re-fed into toker,
663 # does all the right escapes
667 $enc = eval 'use Encode; find_encoding("ascii")' unless $::IS_EBCDIC;
672 # note that most of the strings below are single-quoted, and the
673 # things within them, like '$y', *aren't* intended to interpolate
676 'a\\$y(?# (??{BEGIN{$x=1} "X1"})b(?# \Ux2\E)c\'d\\\\e\\\\Uf\\\\E';
678 ok(q{a$ybc'd\e\Uf\E} =~ /^$s1$/, "reparse");
679 is($x, 0, "reparse no BEGIN");
681 my $s2 = 'g\\$y# (??{{BEGIN{$x=2} "X3"}) \Ux3\E' . "\nh";
683 ok(q{a$ybc'd\\e\\Uf\\Eg$yh} =~ /^$s1$s2$/x, "reparse /x");
684 is($x, 0, "reparse /x no BEGIN");
689 # non-ascii in string as "<0xNNN>"
694 (utf8::native_to_unicode($c)< 32
695 || utf8::native_to_unicode($c) > 127)
696 ? sprintf("<0x%x>", $c) : $1;
700 sub fmt { sprintf "hairy backslashes %s [%s] =~ /^%s/",
701 $_[0], esc_str($_[1]), esc_str($_[2]);
706 [ '', '', 'blank ' ],
707 [ "\x{100}", '\x{100}', 'single' ],
708 [ "\x{100}", "\x{100}", 'double' ])
713 [ "$b$q", "$b$b$b$q" ],
714 [ "$b$b$q", "$b$b$b$b$q" ],
715 [ "$b$b$b$q", "$b$b$b$b$b$b$q" ],
716 [ "$b$b$b$b$q","$b$b$b$b$b$b$b$b$q" ],
718 my ($s, $r) = @$pair;
720 my $ss = "$u->[0]$s";
723 my $cc = "$u->[1]$c";
725 ok($ss =~ /^$cc/, fmt("plain $u->[2]", $ss, $cc));
728 $nine = $nine = "bad";
729 $ss = "$u->[0]\t${q}\x41${b}x42$s" if $::IS_ASCII;
730 $ss = "$u->[0]\t${q}\xC1${b}xC2$s" if $::IS_EBCDIC;
731 for my $use_qr ('', 'qr') {
732 $c = qq[(??{my \$z='{';]
734 ? qq[$use_qr"$b${b}t$b$q$b${b}x41$b$b$b${b}x42"]
735 : qq[$use_qr"$b${b}t$b$q$b${b}xC1$b$b$b${b}xC2"])
737 # (??{ qr/str/ }) goes through one less interpolation
738 # stage than (??{ qq/str/ })
739 $c =~ s{\\\\}{\\}g if ($use_qr eq 'qr');
744 eval {/^$cc/}; norun(fmt("code norun $u->[2]", $ss, $cc));
747 ok($ss =~ /^$cc/, fmt("code $u->[2]", $ss, $cc));
753 my $code1u = "(??{qw(\x{100})})";
754 eval {/^$code1u$/}; norun("reparse embedded unicode norun");
757 ok("\x{100}" =~ /^$code1u$/, "reparse embedded unicode");
761 # a non-pattern literal won't get code blocks parsed at compile time;
762 # but they must get parsed later on if 'use re eval' is in scope
763 # also check that unbalanced {}'s are parsed ok
766 eval q["a{" =~ '^(??{"a{"})$'];
767 norun("non-pattern literal code norun");
768 eval {/^${\'(??{"a{"})'}$/};
769 norun("runtime code with unbalanced {} norun");
772 ok("a{" =~ '^a(??{"{"})$', "non-pattern literal code");
773 ok("a{" =~ /^a${\'(??{"{"})'}$/, "runtime code with unbalanced {}");
776 # make sure warnings come from the right place
781 local $SIG{__WARN__} = sub { $w .= "@_" };
784 my $r = qr/(?{$t=$s+1})/;
786 like($w, qr/pat_re_eval/, "warning main file");
788 # do it in an eval to get predictable line numbers
791 $r = qr/(?{$t=$s+1})/;
795 like($w, qr/ at \(eval \d+\) line 3/, "warning eval A");
800 my $c = '(?{$t=$s+1})';
804 like($w, qr/ at \(eval \d+\) line 1/, "warning eval B");
809 # * mixing all the different types of blocks (literal, qr/literal/,
811 # * backtracking (the Z+ alternation ensures CURLYX and full
812 # scope popping on backtracking)
817 return unless $depth;
819 my $r1 = qr/(??{"$s1-$depth"})/;
822 my $c1 = '(??{"$s2-$depth"})';
824 ok( "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>"
825 . "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>"
827 /^<(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>
828 <(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>$/x,
835 # nested (??{}) called from various levels of a recursive function
841 ok("A$n" =~ m{^A(??{ "0123" =~ /((??{$n}))/; $1 })$},
843 ok("A$n" !~ m{^A(??{ "0123" =~ /((??{$n}))/; "X" })$},
844 "recurse3($n) nomatch");
850 # nested (??{}) being invoked recursively via a function
855 my @alpha = qw(A B C D E);
860 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~
864 "$n-0123" =~ m{^(\d)-(((??{$recurse4->($n+1)})))};
865 $s .= "i1=$1:<=[$2]";
866 $3; # NB - not stringified
871 $s .= $m ? 'M' : '!M';
873 my $ret = '.*?' . ($n-1);
878 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])'
879 . 'i1=3:<=[0123]1a=D:M<=[.*?2])i1=2:<=[012]1a=C:M<=[.*?1])'
880 . 'i1=1:<=[01]1a=B:M<=[.*?0])i1=0:<=[0]1a=A:M<=[.*?-1])';
881 is($s, $exp, 'recurse4');
884 # single (??{}) being invoked recursively via a function
889 my @alpha = qw(A B C D E);
894 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~
903 $s .= $m ? 'M' : '!M';
905 my $ret = '.*?' . ($n-1);
910 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])'
911 . '1a=D:2=0123:M<=[.*?2])1a=C:2=012:M<=[.*?1])'
912 . '1a=B:2=01:M<=[.*?0])1a=A:2=0:M<=[.*?-1])';
913 is($s, $exp, 'recurse5');
917 # make sure that errors during compiling run-time code get trapped
922 my $code = '(?{$x=})';
923 eval { "a" =~ /^a$code/ };
924 like($@, qr/syntax error at \(eval \d+\) line \d+/, 'syntax error');
926 $code = '(?{BEGIN{die})';
927 eval { "a" =~ /^a$code/ };
929 qr/BEGIN failed--compilation aborted at \(eval \d+\) line \d+/,
933 $code = '(?{Foo::$bar})';
934 eval { "a" =~ /^a$code/ };
935 like($@, qr/Bad name after Foo:: at \(eval \d+\) line \d+/, 'UTF8 sytax error');
938 # make sure that 'use re eval' is propagated into compiling the
939 # pattern returned by (??{})
943 my $pat = 'B(??{1})C';
945 # compile-time outer code-block
946 ok("AB1CD" =~ /^A(??{$pat})D$/, "re eval propagated compile-time");
947 # run-time outer code-block
948 ok("AB1CD" =~ /^$A(??{$pat})D$/, "re eval propagated run-time");
951 # returning a ref to something that had set magic but wasn't
952 # PERL_MAGIC_qr triggered a false positive assertion failure
953 # The test is not so much concerned with it not matching,
954 # as with not failing the assertion
957 ok("a" !~ /^(a)(??{ \$1 })/, '(??{ ref })');
960 # make sure the uninit warning from returning an undef var
966 local $SIG{__WARN__} = sub { $warn .= $_[0] };
967 $u1 =~ /(??{$u2})/ or die;
968 like($warn, qr/value \$u1 in pattern match.*\n.*value at/, 'uninit');
971 # test that code blocks are called in scalar context
975 ok("" =~ /^(?{@a})$/, '(?{}) in scalar context');
976 is($^R, 1, '(?{}) in scalar context: $^R');
977 ok("1" =~ /^(??{@a})$/, '(??{}) in scalar context');
978 ok("foo" =~ /^(?(?{@a})foo|bar)$/, '(?(?{})|) in scalar context');
981 # BEGIN in compiled blocks shouldn't mess with $1 et al
985 my $code1 = '(B)(??{ BEGIN { "X" =~ /X/ } $1})(C)';
986 ok("ABBCA" =~ /^(.)(??{$code1})\1$/, '(?{}) BEGIN and $1');
987 my $code2 = '(B)(??{ BEGIN { "X" =~ /X/ } $1 =~ /(.)/ ? $1 : ""})(C)';
988 ok("ABBCA" =~ /^(.)(??{$code2})\1$/, '(?{}) BEGIN and $1 mark 2');
991 # check that the optimiser is applied to code blocks: see if aelem has
992 # been converted to aelemfast
998 'q() =~ qr/(?{$a[0]})/',
999 'use re q(eval); q() =~ q{(?{$a[0]})}',
1000 'use re q(eval); $c = q{(?{$a[0]})}; /$c/',
1001 'use re q(eval); $c = q{(?{$a[0]})}; /(?{1;})$c/',
1003 $out = runperl(switches => ["-Dt"], prog => $prog, stderr => 1);
1004 like($out, qr/aelemfast|Recompile perl with -DDEBUGGING/,
1005 "optimise: '$prog'");
1010 # Ensure that ?pat? matches exactly once, even when the run-time
1011 # pattern changes, and even when the presence of run-time (?{}) affects
1012 # how and when patterns are recompiled
1021 is($m, 'a', '?pat? with a,a,a');
1027 is($m, 'a', '?pat? with a,b,c');
1033 my $e = qq[(??{"$_"})];
1036 is($m, 'a', '?pat? with (??{a,a,a})');
1040 my $e = qq[(??{"$_"})];
1043 is($m, 'a', '?pat? with (??{a,b,c})');
1047 # this code won't actually fail, but it used to fail valgrind,
1048 # so its here just to make sure valgrind doesn't fail again
1049 # While examining the ops of the secret anon sub wrapped around
1050 # the qr//, the pad of the sub was in scope, so cSVOPo_sv
1051 # got the const from the wrong pad. By having lots of $s's
1052 # (aka gvsv(*s), this forces the targs of the consts which have
1053 # been moved to the pad, to have high indices.
1056 local our $s = "abc";
1057 my $qr = qr/^(?{1})$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s/;
1063 # code blocks in qr objects that are interpolated in arrays need
1064 # handling the same as if they were interpolated from scalar vars
1065 # (before this code would need 'use re "eval"')
1072 my @refs = (0, \@array, 2);
1074 tie @tied, 'Tie::StdArray';
1078 @array = ('A', qr/(??{$bb})/, 'C', qr/(??{$dd})/, 'E');
1084 ok("A B C D E=" =~ /@array/, 'bare interpolated array match');
1085 ok("A B C D E=" =~ qr/@array/, 'qr bare interpolated array match');
1086 ok("A B C D E=" =~ /@global/, 'bare interpolated global array match');
1087 ok("A B C D E=" =~ qr/@global/,
1088 'qr bare interpolated global array match');
1089 ok("A B C D E=" =~ /@{$refs[1]}/, 'bare interpolated ref array match');
1090 ok("A B C D E=" =~ qr/@{$refs[1]}/,
1091 'qr bare interpolated ref array match');
1092 ok("A B C D E=" =~ /@tied/, 'bare interpolated tied array match');
1093 ok("A B C D E=" =~ qr/@tied/, 'qr bare interpolated tied array match');
1094 ok("aA B C D E=" =~ /^a@array=$/, 'interpolated array match');
1095 ok("aA B C D E=" =~ qr/^a@array=$/, 'qr interpolated array match');
1096 ok("aA B C D E=" =~ /^a@global=$/, 'interpolated global array match');
1097 ok("aA B C D E=" =~ qr/^a@global=$/,
1098 'qr interpolated global array match');
1099 ok("aA B C D E=" =~ /^a@{$refs[1]}=$/, 'interpolated ref array match');
1100 ok("aA B C D E=" =~ qr/^a@{$refs[1]}=$/,
1101 'qr interpolated ref array match');
1102 ok("aA B C D E=" =~ /^a@tied=$/, 'interpolated tied array match');
1103 ok("aA B C D E=" =~ qr/^a@tied=$/, 'qr interpolated tied array match');
1107 ok("aA-B-C-D-E=" =~ /^a@{array}=$/,
1108 'interpolated array match with local sep');
1109 ok("aA-B-C-D-E=" =~ qr/^a@{array}=$/,
1110 'qr interpolated array match with local sep');
1111 ok("aA-B-C-D-E=" =~ /^a@{global}=$/,
1112 'interpolated global array match with local sep');
1113 ok("aA-B-C-D-E=" =~ qr/^a@{global}=$/,
1114 'qr interpolated global array match with local sep');
1115 ok("aA-B-C-D-E=" =~ /^a@{tied}=$/,
1116 'interpolated tied array match with local sep');
1117 ok("aA-B-C-D-E=" =~ qr/^a@{tied}=$/,
1118 'qr interpolated tied array match with local sep');
1121 # but don't handle the array ourselves in the presence of \Q etc
1123 @array = ('A', '(?{})');
1126 ok("aA (?{})=" =~ /^a\Q@{array}\E=$/,
1127 'interpolated array match with \Q');
1128 ok("aA (?{})=" =~ qr/^a\Q@{array}\E=$/,
1129 'qr interpolated array match with \Q');
1130 ok("aA (?{})=" =~ /^a\Q@{global}\E=$/,
1131 'interpolated global array match with \Q');
1132 ok("aA (?{})=" =~ qr/^a\Q@{global}\E=$/,
1133 'qr interpolated global array match with \Q');
1134 ok("aA (?{})=" =~ /^a\Q@{$refs[1]}\E=$/,
1135 'interpolated ref array match with \Q');
1136 ok("aA (?{})=" =~ qr/^a\Q@{$refs[1]}\E=$/,
1137 'qr interpolated ref array match with \Q');
1138 ok("aA (?{})=" =~ /^a\Q@{tied}\E=$/,
1139 'interpolated tied array match with \Q');
1140 ok("aA (?{})=" =~ qr/^a\Q@{tied}\E=$/,
1141 'qr interpolated tied array match with \Q');
1143 # and check it works with an empty array
1148 ok("a=" =~ /^a@array=$/, 'empty array match');
1149 ok("a=" =~ qr/^a@array=$/, 'qr empty array match');
1150 ok("a=" =~ /^a@global=$/, 'empty global array match');
1151 ok("a=" =~ qr/^a@global=$/, 'qr empty global array match');
1152 ok("a=" =~ /^a@tied=$/, 'empty tied array match');
1153 ok("a=" =~ qr/^a@tied=$/, 'qr empty tied array match');
1154 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q');
1155 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q');
1156 ok("a=" =~ qr/^a\Q@{global}\E=$/,
1157 'qr empty global array match with \Q');
1158 ok("a=" =~ /^a\Q@{tied}\E=$/, 'empty tied array match with \Q');
1159 ok("a=" =~ qr/^a\Q@{tied}\E=$/, 'qr empty tied array match with \Q');
1161 # NB: these below are empty patterns, so they happen to use the
1162 # successful match from the line above
1164 ok("a=" =~ /@array/, 'empty array pattern');
1165 ok("a=" =~ qr/@array/, 'qr empty array pattern');
1166 ok("a=" =~ /@global/, 'empty global array pattern');
1167 ok("a=" =~ qr/@global/, 'qr empty global array pattern');
1168 ok("a=" =~ /@tied/, 'empty tied pattern');
1169 ok("a=" =~ qr/@tied/, 'qr empty tied pattern');
1170 ok("a=" =~ /\Q@array\E/, 'empty array pattern with \Q');
1171 ok("a=" =~ qr/\Q@array\E/, 'qr empty array pattern with \Q');
1172 ok("a=" =~ /\Q@global\E/, 'empty global array pattern with \Q');
1173 ok("a=" =~ qr/\Q@global\E/, 'qr empty global array pattern with \Q');
1174 ok("a=" =~ /\Q@tied\E/, 'empty tied pattern with \Q');
1175 ok("a=" =~ qr/\Q@tied\E/, 'qr empty tied pattern with \Q');
1176 ok("a=" =~ //, 'completely empty pattern');
1177 ok("a=" =~ qr//, 'qr completely empty pattern');
1181 { package o; use overload '""'=>sub { "abc" } }
1182 my $x = bless [],"o";
1184 (my $y_addr = "$y") =~ y/()//d; # REF(0x7fcb9c02) -> REF0x7fcb9c02
1185 # $y_addr =~ $y should be true, as should $y_addr =~ /(??{$y})/
1186 "abc$y_addr" =~ /(??{$x})(??{$y})/;
1187 is "$&", "abc$y_addr",
1188 '(??{$x}) does not leak cached qr to (??{\$x}) (match)';
1189 is scalar "abcabc" =~ /(??{$x})(??{$y})/, "",
1190 '(??{$x}) does not leak cached qr to (??{\$x}) (no match)';
1194 sub ReEvalTieTest::TIESCALAR {bless[], "ReEvalTieTest"}
1195 sub ReEvalTieTest::STORE{}
1196 sub ReEvalTieTest::FETCH { "$1" }
1197 tie my $t, "ReEvalTieTest";
1199 "aab" =~ /(a)((??{"b" =~ m|(.)|; $t}))/;
1200 is "[$1 $2]", "[a b]",
1201 '(??{$tied_former_overload}) sees the right $1 in FETCH';
1206 my $ref = bless \my $o, "o";
1207 my $foo = sub { push @matchsticks, scalar "abc" =~ /(??{$ref})/ };
1210 () = "$ref"; # flush AMAGIC flag on main
1212 is "@matchsticks", "1 ", 'qr magic is not cached on refs';
1216 my ($foo, $bar) = ("foo"x1000, "bar"x1000);
1217 "$foo$bar" =~ /(??{".*"})/;
1218 is "$&", "foo"x1000 . "bar"x1000,
1219 'padtmp swiping does not affect "$a$b" =~ /(??{})/'
1224 # this used to cause a double-free of the code_block struct
1225 # when re-running the compilation after spotting utf8.
1226 # This test doesn't catch it, but might panic, or fail under
1230 /$s(?{})\x{100}/ for '', '';
1234 # RT #130650 code blocks could get double-freed during a pattern
1238 # this used to panic or give ASAN errors
1240 like $@, qr/Reference to nonexistent group/, "RT #130650";
1244 # on exit from a pattern with multiple code blocks from different
1245 # CVs, PL_comppad wasn't being restored correctly
1248 # give first few pad slots known values
1249 my ($x1, $x2, $x3, $x4, $x5) = 101..105;
1250 # these vars are in a separate pad
1251 my $r = qr/((?{my ($y1, $y2) = 201..202; 1;})A){2}X/;
1252 # the first alt fails, causing a switch to this anon
1254 "AAA" =~ /$r|(?{my ($z1, $z2) = 301..302; 1;})A/;
1255 is $x1, 101, "RT #129881: x1";
1256 is $x2, 102, "RT #129881: x2";
1257 is $x3, 103, "RT #129881: x3";
1262 # savestack wasn't always being unwound on EVAL failure
1273 $max = $i if $max < $i;
1278 is $max, 2, "RT #126697";
1283 # Ensure that optimisation of OP_CONST into OP_MULTICONCAT doesn't
1284 # leave any freed ops in the execution path. This is is associated
1285 # with rpeep() being called before optimize_optree(), which causes
1286 # gv/rv2sv to be prematurely optimised into gvsv, confusing
1287 # S_maybe_multiconcat when it tries to reorganise a concat subtree
1288 # into a multiconcat list
1292 local $b = "b"; # not lexical, so optimised to OP_GVSV
1294 ok /^a(??{ $b."c" })$/, "RT #132772 - compile time";
1295 ok /^$a(??{ $b."c" })$/, "RT #132772 - run time";
1296 my $qr = qr/^a(??{ $b."c" })$/;
1297 ok /$qr/, "RT #132772 - compile time time qr//";
1298 $qr = qr/(??{ $b."c" })$/;
1299 ok /^a$qr$/, "RT #132772 - compile time time qr// compound";
1300 $qr = qr/$a(??{ $b."c" })$/;
1301 ok /^$qr$/, "RT #132772 - run time time qr//";
1305 } # End of sub run_tests