3 # This is a home for regular expression tests that don't fit into
4 # the format supported by re/regexp.t. If you want to add a test
5 # that does fit that format, add it to re/re_tests, not here.
25 plan tests => 527; # Update this when adding/deleting tests.
27 run_tests() unless caller;
29 # test that runtime code without 'use re eval' is trapped
32 like($@, qr/Eval-group not allowed at runtime/, @_);
40 my $message = "Call code from qr //";
41 local $_ = 'var="foo"';
44 ok(/$a$a/ && $b eq '9', $message);
47 ok(/$a$a/ && $b eq '11', $message);
51 norun("$message norun 1");
56 norun("$message norun 2");
59 is($b, '14', $message);
65 my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/);
67 is($lex_res, 1, $message);
68 is($lex_a, 44, $message);
69 is($lex_c, 43, $message);
73 my $match = eval { /$a$c$a$d/ };
74 ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message);
75 is($b, '14', $message);
81 $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/);
83 is($lex_res, 1, $message);
84 is($lex_a, 44, $message);
85 is($lex_c, 43, $message);
90 our $a = bless qr /foo/ => 'Foo';
91 ok 'goodfood' =~ $a, "Reblessed qr // matches";
92 is($a, '(?^:foo)', "Reblessed qr // stringifies");
94 my $z = my $y = "\317\276"; # Byte representation of $x
96 ok $x =~ $a, "UTF-8 interpolation in qr //";
97 ok "a$a" =~ $x, "Stringified qr // preserves UTF-8";
98 ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8";
99 ok "a$x" =~ /^a(??{$a})\z/,
100 "Postponed interpolation of qr // preserves UTF-8";
103 is(length qr /##/x, 9, "## in qr // doesn't corrupt memory; Bug 17776");
106 ok "$x$x" =~ /^$x(??{$x})\z/,
107 "Postponed UTF-8 string in UTF-8 re matches UTF-8";
108 ok "$y$x" =~ /^$y(??{$x})\z/,
109 "Postponed UTF-8 string in non-UTF-8 re matches UTF-8";
110 ok "$y$x" !~ /^$y(??{$y})\z/,
111 "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8";
112 ok "$x$x" !~ /^$x(??{$y})\z/,
113 "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8";
114 ok "$y$y" =~ /^$y(??{$y})\z/,
115 "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8";
116 ok "$x$y" =~ /^$x(??{$y})\z/,
117 "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8";
119 $y = $z; # Reset $y after upgrade.
120 ok "$x$y" !~ /^$x(??{$x})\z/,
121 "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8";
122 ok "$y$y" !~ /^$y(??{$x})\z/,
123 "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8";
129 # Test if $^N and $+ work in (?{})
137 push @ctl_n, (defined $^N ? $^N : "undef");
138 push @plus, (defined $+ ? $+ : "undef");
149 # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+ ]
150 [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ],
151 [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ],
152 [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ],
153 [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ],
154 [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ],
155 [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
156 [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
157 [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ],
158 [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
159 [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
160 [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
161 [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
162 [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ],
164 ) { #"#silence vim highlighting
168 my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0);
169 push @ctl_n, (defined $^N ? $^N : "undef");
170 push @plus, (defined $+ ? $+ : "undef");
171 ok($test->[0] == $match, "match $c");
172 if ($test->[0] != $match) {
173 # unset @ctl_n and @plus
176 is("@ctl_n", $test->[2], "ctl_n $c");
177 is("@plus", $test->[3], "plus $c");
185 defined $_[0] ? $_[0] : "undef";
188 like("123", qr/^(\d)(((??{1 + $^N})))+$/, 'Bug 56194');
193 my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#;
194 my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#;
195 my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#;
197 local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#;
198 my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#;
199 my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#;
207 # Expected values of $^N
208 # Expected values of $+
209 # Expected values of $1, $2, $3, $4 and $5
213 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#,
216 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
220 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#,
223 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
227 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#,
230 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
234 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#,
237 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
241 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#,
244 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
248 qr#^($re)(|a(b)c|def)(??{$^R})$#,
251 "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
258 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
265 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
269 qr#^(??{$re5})(|abc|def)(??{"$^R"})$#,
272 "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef",
276 qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#,
279 "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef",
283 qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#,
284 "1234 123 12 1 2 3 1234",
285 "1234 123 12 1 2 3 4",
286 "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4",
290 qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#,
291 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56",
292 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5",
293 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56",
297 qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#,
298 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62",
299 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2",
300 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5",
307 my $match = $test->[0] =~ $test->[1];
308 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5));
309 push @ctl_n, $f->($^N);
310 push @plus, $f->($+);
311 ok($match, "match $c; Bug 56194");
313 # unset $str, @ctl_n and @plus
317 is("@ctl_n", $test->[2], "ctl_n $c; Bug 56194");
318 is("@plus", $test->[3], "plus $c; Bug 56194");
319 is($str, $test->[4], "str $c; Bug 56194");
327 local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#;
329 my $match = "123abc3" =~ m/^(??{$re4})$/;
330 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R));
331 push @ctl_n, $f->($^N);
332 push @plus, $f->($+);
333 ok($match, 'Bug 56194');
340 is("@ctl_n", "1 2 undef", 'Bug 56194');
341 is("@plus", "1 2 undef", 'Bug 56194');
343 "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = 3",
344 'Bug 56194 ($^R tweaked by 121070)');
348 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })/;
349 is("$^R", 42, 'Bug 121070 - use of (?&Char) should not clobber $^R');
350 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })(?{ 43 })/;
351 is("$^R", 43, 'related to 121070 - use of (?&Char) should not clobber $^R');
356 # re evals within \U, \Q etc shouldn't be seen by the lexer
359 ok('(?{1})' =~ /^\Q(?{1})\E$/, '\Q(?{1})\E');
360 ok('(?{1})' =~ /^\Q(?{\E1\}\)$/, '\Q(?{\E1\}\)');
361 eval {/^\U(??{"$a\Ea"})$/ }; norun('^\U(??{"$a\Ea"})$ norun');
362 eval {/^\L(??{"$B\Ea"})$/ }; norun('^\L(??{"$B\Ea"})$ norun');
364 ok('Ia' =~ /^\U(??{"$a\Ea"})$/, '^\U(??{"$a\Ea"})$');
365 ok('ja' =~ /^\L(??{"$B\Ea"})$/, '^\L(??{"$B\Ea"})$');
369 # Comprehensive (hopefully) tests of closure behaviour:
370 # i.e. when do (?{}) blocks get (re)compiled, and what instances
371 # of lexical vars do they close over?
373 # if the pattern string gets utf8 upgraded while concatenating,
374 # make sure a literal code block is still detected (by still
375 # compiling in the absence of use re 'eval')
380 ok("\x{80}\x{100}" =~ /^$s1(?{1})$s2$/, "utf8 upgrade");
383 my ($cr1, $cr2, $cr3, $cr4);
385 for my $x (qw(a b c)) {
386 my $bc = ($x ne 'a');
389 # the most basic: literal code should be in same scope
392 ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code");
393 ok("\x{100}$x" =~ /^\x{100}(??{$x})$/, "[$x] literal code UTF8");
395 # the "don't recompile if pattern unchanged" mechanism
396 # shouldn't apply to code blocks - recompile every time
397 # to pick up new instances of variables
399 my $code1 = 'B(??{$x})';
400 my $code1u = $c80 . "\x{100}" . '(??{$x})';
403 norun("[$x] unvarying runtime code AA norun");
405 norun("[$x] unvarying runtime code AU norun");
406 eval {/^$c80\x{100}$code1$/};
407 norun("[$x] unvarying runtime code UA norun");
408 eval {/^$c80\x{101}$code1u$/};
409 norun("[$x] unvarying runtime code UU norun");
413 ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
414 ok("A$c80\x{100}$x" =~ /^A$code1u$/,
415 "[$x] unvarying runtime code AU");
416 ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
417 "[$x] unvarying runtime code UA");
418 ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
419 "[$x] unvarying runtime code UU");
422 # mixed literal and run-time code blocks
424 my $code2 = 'B(??{$x})';
425 my $code2u = $c80 . "\x{100}" . '(??{$x})';
427 eval {/^A(??{$x})-$code2$/};
428 norun("[$x] literal+runtime AA norun");
429 eval {/^A(??{$x})-$code2u$/};
430 norun("[$x] literal+runtime AU norun");
431 eval {/^$c80\x{100}(??{$x})-$code2$/};
432 norun("[$x] literal+runtime UA norun");
433 eval {/^$c80\x{101}(??{$x})-$code2u$/};
434 norun("[$x] literal+runtime UU norun");
438 ok("A$x-B$x" =~ /^A(??{$x})-$code2$/,
439 "[$x] literal+runtime AA");
440 ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
441 "[$x] literal+runtime AU");
442 ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
443 "[$x] literal+runtime UA");
444 ok("$c80\x{101}$x-$c80\x{100}$x"
445 =~ /^$c80\x{101}(??{$x})-$code2u$/,
446 "[$x] literal+runtime UU");
449 # literal qr code only created once, naked
451 $cr1 //= qr/^A(??{$x})$/;
452 ok("Aa" =~ $cr1, "[$x] literal qr once naked");
454 # literal qr code only created once, embedded with text
456 $cr2 //= qr/B(??{$x})$/;
457 ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text");
459 # literal qr code only created once, embedded with text + lit code
461 $cr3 //= qr/C(??{$x})$/;
462 ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/,
463 "[$x] literal qr once embedded text + lit code");
465 # literal qr code only created once, embedded with text + run code
467 $cr4 //= qr/C(??{$x})$/;
468 my $code3 = 'A(??{$x})';
470 eval {/^$code3-B$cr4/};
471 norun("[$x] literal qr once embedded text + run code norun");
474 ok("A$x-BCa" =~ /^$code3-B$cr4/,
475 "[$x] literal qr once embedded text + run code");
478 # literal qr code, naked
480 my $r1 = qr/^A(??{$x})$/;
481 ok("A$x" =~ $r1, "[$x] literal qr naked");
483 # literal qr code, embedded with text
485 my $r2 = qr/B(??{$x})$/;
486 ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
488 # literal qr code, embedded with text + lit code
490 my $r3 = qr/C(??{$x})$/;
491 ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/,
492 "[$x] literal qr embedded text + lit code");
494 # literal qr code, embedded with text + run code
496 my $r4 = qr/C(??{$x})$/;
497 my $code4 = '(??{$x})';
499 eval {/^A$code4-B$r4/};
500 norun("[$x] literal qr embedded text + run code");
503 ok("A$x-BC$x" =~ /^A$code4-B$r4/,
504 "[$x] literal qr embedded text + run code");
507 # nested qr in different scopes
509 my $code5 = '(??{$x})';
510 my $r5 = qr/C(??{$x})/;
513 eval {qr/$code5-C(??{$x})/}; norun("r6 norun");
516 $r6 = qr/$code5-C(??{$x})/;
522 for my $y (qw(d e f)) {
524 my $rr5 = qr/^A(??{"$x$y"})-$r5/;
526 ok("A$x$y-C$x" =~ $rr5,
527 "[$x-$y] literal qr + r5");
529 my $rr6 = qr/^A(??{"$x$y"})-$r6/;
531 ok("A$x$y-$x-C$x" =~ $rr6,
532 "[$x-$y] literal qr + r6");
537 my $yy = (qw(d e f))[$i];
539 ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside");
540 ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/,
541 "[$x-$yy] literal qr + r5 + lit, outside");
546 ok("A$x$yy-$x-C$x" =~ $rr6,
547 "[$x-$yy] literal qr + r6, outside");
548 ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
549 "[$x-$yy] literal qr + r6 +lit, outside");
553 # recursive subs should get lexical from the correct pad depth
558 ok("A$n" =~ /^A(??{$n})$/, "recurse($n)");
563 # for qr// containing run-time elements but with a compile-time
564 # code block, make sure the run-time bits are executed in the same
565 # pad they were compiled in
567 my $a = 'a'; # ensure outer and inner pads don't align
571 my $r = qr/^$b(??{$c})$d$/;
572 ok("bcd" =~ $r, "qr with run-time elements and code block");
575 # check that cascaded embedded regexes all see their own lexical
579 my ($r1, $r2, $r3, $r4);
580 my ($x1, $x2, $x3, $x4) = (5,6,7,8);
581 { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
582 { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; }
583 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
584 { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; }
585 ok("A1234" =~ /^$r4$/, "cascaded qr");
588 # and again, but in a loop, with no external references
589 # being maintained to the qr's
597 ok("A1234" =~ /^$r$/, "cascaded qr loop");
601 # and again, but compiling the qrs in an eval so there
602 # aren't even refs to the qrs from any ops
607 $r = eval q[ qr/$r(??{$x})/; ];
610 ok("A1234" =~ /^$r$/, "cascaded qr loop");
613 # have qrs with either literal code blocks or only embedded
614 # code blocks, but not both
617 my ($r1, $r2, $r3, $r4);
618 my ($x1, $x3) = (7,8);
619 { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
620 { $r2 = qr/${r1}2/; }
621 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
622 { $r4 = qr/${r3}4/; }
623 ok("A1234" =~ /^$r4$/, "cascaded qr mix 1");
624 ok("A12345" =~ /^${r4}5$/, "cascaded qr mix 2");
625 ok("A1234" =~ qr/^$r4$/ , "cascaded qr mix 3");
626 ok("A12345" =~ qr/^${r4}5$/, "cascaded qr mix 4");
629 # and make sure things are freed at the right time
631 sub Foo99::DESTROY { $Foo99::d++ }
635 my $x = bless [1], 'Foo99';
636 $r1 = eval 'qr/(??{$x->[0]})/';
638 my $r2 = eval 'qr/a$r1/';
640 ok(eval '"a1" =~ qr/^$r2$/', "match while in scope");
641 # make sure PL_reg_curpm isn't holding on to anything
643 is($Foo99::d, 0, "before scope exit");
645 ::is($Foo99::d, 1, "after scope exit");
647 # forward declared subs should Do The Right Thing with any anon CVs
648 # within them (i.e. pad_fixup_inner_anons() should work)
654 ok("Aa" =~ qr/^A(??{$x})$/, "forward qr compiletime");
655 ok("Aa" =~ qr/^$A(??{$x})$/, "forward qr runtime");
660 # test that run-time embedded code, when re-fed into toker,
661 # does all the right escapes
664 my $enc = eval 'use Encode; find_encoding("ascii")';
669 # note that most of the strings below are single-quoted, and the
670 # things within them, like '$y', *aren't* intended to interpolate
673 'a\\$y(?# (??{BEGIN{$x=1} "X1"})b(?# \Ux2\E)c\'d\\\\e\\\\Uf\\\\E';
675 ok(q{a$ybc'd\e\Uf\E} =~ /^$s1$/, "reparse");
676 is($x, 0, "reparse no BEGIN");
678 my $s2 = 'g\\$y# (??{{BEGIN{$x=2} "X3"}) \Ux3\E' . "\nh";
680 ok(q{a$ybc'd\\e\\Uf\\Eg$yh} =~ /^$s1$s2$/x, "reparse /x");
681 is($x, 0, "reparse /x no BEGIN");
686 # non-ascii in string as "<0xNNN>"
691 ($c< 32 || $c > 127) ? sprintf("<0x%x>", $c) : $1;
695 sub fmt { sprintf "hairy backslashes %s [%s] =~ /^%s/",
696 $_[0], esc_str($_[1]), esc_str($_[2]);
701 [ '', '', 'blank ' ],
702 [ "\x{100}", '\x{100}', 'single' ],
703 [ "\x{100}", "\x{100}", 'double' ])
708 [ "$b$q", "$b$b$b$q" ],
709 [ "$b$b$q", "$b$b$b$b$q" ],
710 [ "$b$b$b$q", "$b$b$b$b$b$b$q" ],
711 [ "$b$b$b$b$q","$b$b$b$b$b$b$b$b$q" ],
713 my ($s, $r) = @$pair;
715 my $ss = "$u->[0]$s";
718 my $cc = "$u->[1]$c";
720 ok($ss =~ /^$cc/, fmt("plain $u->[2]", $ss, $cc));
724 $ss = "$u->[0]\t${q}$chr41${b}x42$s";
725 $nine = $nine = "bad";
726 for my $use_qr ('', 'qr') {
727 $c = qq[(??{my \$z='{';]
728 . qq[$use_qr"$b${b}t$b$q$b${b}x41$b$b$b${b}x42"]
730 # (??{ qr/str/ }) goes through one less interpolation
731 # stage than (??{ qq/str/ })
732 $c =~ s{\\\\}{\\}g if ($use_qr eq 'qr');
737 eval {/^$cc/}; norun(fmt("code norun $u->[2]", $ss, $cc));
740 ok($ss =~ /^$cc/, fmt("code $u->[2]", $ss, $cc));
744 # Poor man's "use encoding 'ascii'".
745 # This causes a different code path in S_const_str()
747 no warnings 'deprecated';
748 local ${^ENCODING} = $enc;
749 use warnings 'deprecated';
751 ok($ss =~ /^$cc/, fmt("encode $u->[2]", $ss, $cc));
757 my $code1u = "(??{qw(\x{100})})";
758 eval {/^$code1u$/}; norun("reparse embedded unicode norun");
761 ok("\x{100}" =~ /^$code1u$/, "reparse embedded unicode");
765 # a non-pattern literal won't get code blocks parsed at compile time;
766 # but they must get parsed later on if 'use re eval' is in scope
767 # also check that unbalanced {}'s are parsed ok
770 eval q["a{" =~ '^(??{"a{"})$'];
771 norun("non-pattern literal code norun");
772 eval {/^${\'(??{"a{"})'}$/};
773 norun("runtime code with unbalanced {} norun");
776 ok("a{" =~ '^a(??{"{"})$', "non-pattern literal code");
777 ok("a{" =~ /^a${\'(??{"{"})'}$/, "runtime code with unbalanced {}");
780 # make sure warnings come from the right place
785 local $SIG{__WARN__} = sub { $w .= "@_" };
788 my $r = qr/(?{$t=$s+1})/;
790 like($w, qr/pat_re_eval/, "warning main file");
792 # do it in an eval to get predictable line numbers
795 $r = qr/(?{$t=$s+1})/;
799 like($w, qr/ at \(eval \d+\) line 3/, "warning eval A");
804 my $c = '(?{$t=$s+1})';
808 like($w, qr/ at \(eval \d+\) line 1/, "warning eval B");
813 # * mixing all the different types of blocks (literal, qr/literal/,
815 # * backtracking (the Z+ alternation ensures CURLYX and full
816 # scope popping on backtracking)
821 return unless $depth;
823 my $r1 = qr/(??{"$s1-$depth"})/;
826 my $c1 = '(??{"$s2-$depth"})';
828 ok( "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>"
829 . "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>"
831 /^<(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>
832 <(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>$/x,
839 # nested (??{}) called from various levels of a recursive function
845 ok("A$n" =~ m{^A(??{ "0123" =~ /((??{$n}))/; $1 })$},
847 ok("A$n" !~ m{^A(??{ "0123" =~ /((??{$n}))/; "X" })$},
848 "recurse3($n) nomatch");
854 # nested (??{}) being invoked recursively via a function
859 my @alpha = qw(A B C D E);
864 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~
868 "$n-0123" =~ m{^(\d)-(((??{$recurse4->($n+1)})))};
869 $s .= "i1=$1:<=[$2]";
870 $3; # NB - not stringified
875 $s .= $m ? 'M' : '!M';
877 my $ret = '.*?' . ($n-1);
882 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])'
883 . 'i1=3:<=[0123]1a=D:M<=[.*?2])i1=2:<=[012]1a=C:M<=[.*?1])'
884 . 'i1=1:<=[01]1a=B:M<=[.*?0])i1=0:<=[0]1a=A:M<=[.*?-1])';
885 is($s, $exp, 'recurse4');
888 # single (??{}) being invoked recursively via a function
893 my @alpha = qw(A B C D E);
898 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~
907 $s .= $m ? 'M' : '!M';
909 my $ret = '.*?' . ($n-1);
914 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])'
915 . '1a=D:2=0123:M<=[.*?2])1a=C:2=012:M<=[.*?1])'
916 . '1a=B:2=01:M<=[.*?0])1a=A:2=0:M<=[.*?-1])';
917 is($s, $exp, 'recurse5');
921 # make sure that errors during compiling run-time code get trapped
926 my $code = '(?{$x=})';
927 eval { "a" =~ /^a$code/ };
928 like($@, qr/syntax error at \(eval \d+\) line \d+/, 'syntax error');
930 $code = '(?{BEGIN{die})';
931 eval { "a" =~ /^a$code/ };
933 qr/BEGIN failed--compilation aborted at \(eval \d+\) line \d+/,
937 $code = '(?{Foo::$bar})';
938 eval { "a" =~ /^a$code/ };
939 like($@, qr/Bad name after Foo:: at \(eval \d+\) line \d+/, 'UTF8 sytax error');
942 # make sure that 'use re eval' is propagated into compiling the
943 # pattern returned by (??{})
947 my $pat = 'B(??{1})C';
949 # compile-time outer code-block
950 ok("AB1CD" =~ /^A(??{$pat})D$/, "re eval propagated compile-time");
951 # run-time outer code-block
952 ok("AB1CD" =~ /^$A(??{$pat})D$/, "re eval propagated run-time");
955 # returning a ref to something that had set magic but wasn't
956 # PERL_MAGIC_qr triggered a false positive assertion failure
957 # The test is not so much concerned with it not matching,
958 # as with not failing the assertion
961 ok("a" !~ /^(a)(??{ \$1 })/, '(??{ ref })');
964 # make sure the uninit warning from returning an undef var
970 local $SIG{__WARN__} = sub { $warn .= $_[0] };
971 $u1 =~ /(??{$u2})/ or die;
972 like($warn, qr/value \$u1 in pattern match.*\n.*value at/, 'uninit');
975 # test that code blocks are called in scalar context
979 ok("" =~ /^(?{@a})$/, '(?{}) in scalar context');
980 is($^R, 1, '(?{}) in scalar context: $^R');
981 ok("1" =~ /^(??{@a})$/, '(??{}) in scalar context');
982 ok("foo" =~ /^(?(?{@a})foo|bar)$/, '(?(?{})|) in scalar context');
985 # BEGIN in compiled blocks shouldn't mess with $1 et al
989 my $code1 = '(B)(??{ BEGIN { "X" =~ /X/ } $1})(C)';
990 ok("ABBCA" =~ /^(.)(??{$code1})\1$/, '(?{}) BEGIN and $1');
991 my $code2 = '(B)(??{ BEGIN { "X" =~ /X/ } $1 =~ /(.)/ ? $1 : ""})(C)';
992 ok("ABBCA" =~ /^(.)(??{$code2})\1$/, '(?{}) BEGIN and $1 mark 2');
995 # check that the optimiser is applied to code blocks: see if aelem has
996 # been converted to aelemfast
1002 'q() =~ qr/(?{$a[0]})/',
1003 'use re q(eval); q() =~ q{(?{$a[0]})}',
1004 'use re q(eval); $c = q{(?{$a[0]})}; /$c/',
1005 'use re q(eval); $c = q{(?{$a[0]})}; /(?{1;})$c/',
1007 $out = runperl(switches => ["-Dt"], prog => $prog, stderr => 1);
1008 like($out, qr/aelemfast|Recompile perl with -DDEBUGGING/,
1009 "optimise: '$prog'");
1014 # Ensure that ?pat? matches exactly once, even when the run-time
1015 # pattern changes, and even when the presence of run-time (?{}) affects
1016 # how and when patterns are recompiled
1025 is($m, 'a', '?pat? with a,a,a');
1031 is($m, 'a', '?pat? with a,b,c');
1037 my $e = qq[(??{"$_"})];
1040 is($m, 'a', '?pat? with (??{a,a,a})');
1044 my $e = qq[(??{"$_"})];
1047 is($m, 'a', '?pat? with (??{a,b,c})');
1051 # this code won't actually fail, but it used to fail valgrind,
1052 # so its here just to make sure valgrind doesn't fail again
1053 # While examining the ops of the secret anon sub wrapped around
1054 # the qr//, the pad of the sub was in scope, so cSVOPo_sv
1055 # got the const from the wrong pad. By having lots of $s's
1056 # (aka gvsv(*s), this forces the targs of the consts which have
1057 # been moved to the pad, to have high indices.
1060 local our $s = "abc";
1061 my $qr = qr/^(?{1})$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s/;
1067 # code blocks in qr objects that are interpolated in arrays need
1068 # handling the same as if they were interpolated from scalar vars
1069 # (before this code would need 'use re "eval"')
1077 my @refs = (0, \@array, 2);
1079 tie @tied, 'Tie::StdArray';
1083 @array = ('A', qr/(??{$bb})/, 'C', qr/(??{$dd})/, 'E');
1089 ok("A B C D E=" =~ /@array/, 'bare interpolated array match');
1090 ok("A B C D E=" =~ qr/@array/, 'qr bare interpolated array match');
1091 ok("A B C D E=" =~ /@global/, 'bare interpolated global array match');
1092 ok("A B C D E=" =~ qr/@global/,
1093 'qr bare interpolated global array match');
1094 ok("A B C D E=" =~ /@{$refs[1]}/, 'bare interpolated ref array match');
1095 ok("A B C D E=" =~ qr/@{$refs[1]}/,
1096 'qr bare interpolated ref array match');
1097 ok("A B C D E=" =~ /@tied/, 'bare interpolated tied array match');
1098 ok("A B C D E=" =~ qr/@tied/, 'qr bare interpolated tied array match');
1099 ok("aA B C D E=" =~ /^a@array=$/, 'interpolated array match');
1100 ok("aA B C D E=" =~ qr/^a@array=$/, 'qr interpolated array match');
1101 ok("aA B C D E=" =~ /^a@global=$/, 'interpolated global array match');
1102 ok("aA B C D E=" =~ qr/^a@global=$/,
1103 'qr interpolated global array match');
1104 ok("aA B C D E=" =~ /^a@{$refs[1]}=$/, 'interpolated ref array match');
1105 ok("aA B C D E=" =~ qr/^a@{$refs[1]}=$/,
1106 'qr interpolated ref array match');
1107 ok("aA B C D E=" =~ /^a@tied=$/, 'interpolated tied array match');
1108 ok("aA B C D E=" =~ qr/^a@tied=$/, 'qr interpolated tied array match');
1112 ok("aA-B-C-D-E=" =~ /^a@{array}=$/,
1113 'interpolated array match with local sep');
1114 ok("aA-B-C-D-E=" =~ qr/^a@{array}=$/,
1115 'qr interpolated array match with local sep');
1116 ok("aA-B-C-D-E=" =~ /^a@{global}=$/,
1117 'interpolated global array match with local sep');
1118 ok("aA-B-C-D-E=" =~ qr/^a@{global}=$/,
1119 'qr interpolated global array match with local sep');
1120 ok("aA-B-C-D-E=" =~ /^a@{tied}=$/,
1121 'interpolated tied array match with local sep');
1122 ok("aA-B-C-D-E=" =~ qr/^a@{tied}=$/,
1123 'qr interpolated tied array match with local sep');
1126 # but don't handle the array ourselves in the presence of \Q etc
1128 @array = ('A', '(?{})');
1131 ok("aA (?{})=" =~ /^a\Q@{array}\E=$/,
1132 'interpolated array match with \Q');
1133 ok("aA (?{})=" =~ qr/^a\Q@{array}\E=$/,
1134 'qr interpolated array match with \Q');
1135 ok("aA (?{})=" =~ /^a\Q@{global}\E=$/,
1136 'interpolated global array match with \Q');
1137 ok("aA (?{})=" =~ qr/^a\Q@{global}\E=$/,
1138 'qr interpolated global array match with \Q');
1139 ok("aA (?{})=" =~ /^a\Q@{$refs[1]}\E=$/,
1140 'interpolated ref array match with \Q');
1141 ok("aA (?{})=" =~ qr/^a\Q@{$refs[1]}\E=$/,
1142 'qr interpolated ref array match with \Q');
1143 ok("aA (?{})=" =~ /^a\Q@{tied}\E=$/,
1144 'interpolated tied array match with \Q');
1145 ok("aA (?{})=" =~ qr/^a\Q@{tied}\E=$/,
1146 'qr interpolated tied array match with \Q');
1148 # and check it works with an empty array
1153 ok("a=" =~ /^a@array=$/, 'empty array match');
1154 ok("a=" =~ qr/^a@array=$/, 'qr empty array match');
1155 ok("a=" =~ /^a@global=$/, 'empty global array match');
1156 ok("a=" =~ qr/^a@global=$/, 'qr empty global array match');
1157 ok("a=" =~ /^a@tied=$/, 'empty tied array match');
1158 ok("a=" =~ qr/^a@tied=$/, 'qr empty tied array match');
1159 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q');
1160 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q');
1161 ok("a=" =~ qr/^a\Q@{global}\E=$/,
1162 'qr empty global array match with \Q');
1163 ok("a=" =~ /^a\Q@{tied}\E=$/, 'empty tied array match with \Q');
1164 ok("a=" =~ qr/^a\Q@{tied}\E=$/, 'qr empty tied array match with \Q');
1166 # NB: these below are empty patterns, so they happen to use the
1167 # successful match from the line above
1169 ok("a=" =~ /@array/, 'empty array pattern');
1170 ok("a=" =~ qr/@array/, 'qr empty array pattern');
1171 ok("a=" =~ /@global/, 'empty global array pattern');
1172 ok("a=" =~ qr/@global/, 'qr empty global array pattern');
1173 ok("a=" =~ /@tied/, 'empty tied pattern');
1174 ok("a=" =~ qr/@tied/, 'qr empty tied pattern');
1175 ok("a=" =~ /\Q@array\E/, 'empty array pattern with \Q');
1176 ok("a=" =~ qr/\Q@array\E/, 'qr empty array pattern with \Q');
1177 ok("a=" =~ /\Q@global\E/, 'empty global array pattern with \Q');
1178 ok("a=" =~ qr/\Q@global\E/, 'qr empty global array pattern with \Q');
1179 ok("a=" =~ /\Q@tied\E/, 'empty tied pattern with \Q');
1180 ok("a=" =~ qr/\Q@tied\E/, 'qr empty tied pattern with \Q');
1181 ok("a=" =~ //, 'completely empty pattern');
1182 ok("a=" =~ qr//, 'qr completely empty pattern');
1186 { package o; use overload '""'=>sub { "abc" } }
1187 my $x = bless [],"o";
1189 (my $y_addr = "$y") =~ y/()//d; # REF(0x7fcb9c02) -> REF0x7fcb9c02
1190 # $y_addr =~ $y should be true, as should $y_addr =~ /(??{$y})/
1191 "abc$y_addr" =~ /(??{$x})(??{$y})/;
1192 is "$&", "abc$y_addr",
1193 '(??{$x}) does not leak cached qr to (??{\$x}) (match)';
1194 is scalar "abcabc" =~ /(??{$x})(??{$y})/, "",
1195 '(??{$x}) does not leak cached qr to (??{\$x}) (no match)';
1199 sub ReEvalTieTest::TIESCALAR {bless[], "ReEvalTieTest"}
1200 sub ReEvalTieTest::STORE{}
1201 sub ReEvalTieTest::FETCH { "$1" }
1202 tie my $t, "ReEvalTieTest";
1204 "aab" =~ /(a)((??{"b" =~ m|(.)|; $t}))/;
1205 is "[$1 $2]", "[a b]",
1206 '(??{$tied_former_overload}) sees the right $1 in FETCH';
1211 my $ref = bless \my $o, "o";
1212 my $foo = sub { push @matchsticks, scalar "abc" =~ /(??{$ref})/ };
1215 () = "$ref"; # flush AMAGIC flag on main
1217 is "@matchsticks", "1 ", 'qr magic is not cached on refs';
1221 my ($foo, $bar) = ("foo"x1000, "bar"x1000);
1222 "$foo$bar" =~ /(??{".*"})/;
1223 is "$&", "foo"x1000 . "bar"x1000,
1224 'padtmp swiping does not affect "$a$b" =~ /(??{})/'
1227 } # End of sub run_tests