This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix a segfault in run-time qr//s with (?{})
[perl5.git] / t / re / pat_re_eval.t
CommitLineData
0f289c68
YO
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t. If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7use strict;
8use warnings;
04e69edb 9use Config;
0f289c68
YO
10use 5.010;
11
12
13sub run_tests;
14
15$| = 1;
16
17
18BEGIN {
19 chdir 't' if -d 't';
20 @INC = ('../lib','.');
90b541eb 21 require './test.pl';
0669fd04 22 skip_all_if_miniperl("no dynamic loading on miniperl, no re");
0f289c68
YO
23}
24
25
da1f5b59 26plan tests => 464; # Update this when adding/deleting tests.
0f289c68
YO
27
28run_tests() unless caller;
29
1008bb63
DM
30# test that runtime code without 'use re eval' is trapped
31
32sub norun {
33 like($@, qr/Eval-group not allowed at runtime/, @_);
34}
35
0f289c68
YO
36#
37# Tests start here.
38#
39sub run_tests {
40 {
f245da07 41 my $message = "Call code from qr //";
0f289c68
YO
42 local $_ = 'var="foo"';
43 $a = qr/(?{++$b})/;
44 $b = 7;
f245da07 45 ok(/$a$a/ && $b eq '9', $message);
0f289c68
YO
46
47 my $c="$a";
f245da07 48 ok(/$a$a/ && $b eq '11', $message);
0f289c68
YO
49
50 undef $@;
51 eval {/$c/};
1008bb63 52 norun("$message norun 1");
0f289c68 53
1008bb63
DM
54
55 {
56 eval {/$a$c$a/};
57 norun("$message norun 2");
58 use re "eval";
59 /$a$c$a/;
60 is($b, '14', $message);
61 }
0f289c68
YO
62
63 our $lex_a = 43;
64 our $lex_b = 17;
65 our $lex_c = 27;
66 my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/);
67
f245da07
NC
68 is($lex_res, 1, $message);
69 is($lex_a, 44, $message);
70 is($lex_c, 43, $message);
0f289c68 71
0f289c68 72 undef $@;
b30fcab9
DM
73 my $d = '(?{1})';
74 my $match = eval { /$a$c$a$d/ };
f245da07
NC
75 ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message);
76 is($b, '14', $message);
0f289c68
YO
77
78 $lex_a = 2;
79 $lex_a = 43;
80 $lex_b = 17;
81 $lex_c = 27;
82 $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/);
83
f245da07
NC
84 is($lex_res, 1, $message);
85 is($lex_a, 44, $message);
86 is($lex_c, 43, $message);
0f289c68
YO
87
88 }
89
90 {
91 our $a = bless qr /foo/ => 'Foo';
92 ok 'goodfood' =~ $a, "Reblessed qr // matches";
de26e0cc 93 is($a, '(?^:foo)', "Reblessed qr // stringifies");
0f289c68
YO
94 my $x = "\x{3fe}";
95 my $z = my $y = "\317\276"; # Byte representation of $x
96 $a = qr /$x/;
97 ok $x =~ $a, "UTF-8 interpolation in qr //";
98 ok "a$a" =~ $x, "Stringified qr // preserves UTF-8";
99 ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8";
100 ok "a$x" =~ /^a(??{$a})\z/,
101 "Postponed interpolation of qr // preserves UTF-8";
bb535cf1
NC
102
103
104 is(length qr /##/x, 9, "## in qr // doesn't corrupt memory; Bug 17776");
105
0f289c68 106 {
0f289c68
YO
107 ok "$x$x" =~ /^$x(??{$x})\z/,
108 "Postponed UTF-8 string in UTF-8 re matches UTF-8";
109 ok "$y$x" =~ /^$y(??{$x})\z/,
110 "Postponed UTF-8 string in non-UTF-8 re matches UTF-8";
111 ok "$y$x" !~ /^$y(??{$y})\z/,
112 "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8";
113 ok "$x$x" !~ /^$x(??{$y})\z/,
114 "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8";
115 ok "$y$y" =~ /^$y(??{$y})\z/,
116 "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8";
117 ok "$x$y" =~ /^$x(??{$y})\z/,
118 "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8";
119
120 $y = $z; # Reset $y after upgrade.
121 ok "$x$y" !~ /^$x(??{$x})\z/,
122 "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8";
123 ok "$y$y" !~ /^$y(??{$x})\z/,
124 "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8";
125 }
126 }
127
128
129 {
1008bb63 130 # Test if $^N and $+ work in (?{})
0f289c68
YO
131 our @ctl_n = ();
132 our @plus = ();
133 our $nested_tags;
134 $nested_tags = qr{
135 <
136 ((\w)+)
137 (?{
138 push @ctl_n, (defined $^N ? $^N : "undef");
139 push @plus, (defined $+ ? $+ : "undef");
140 })
141 >
142 (??{$nested_tags})*
143 </\s* \w+ \s*>
144 }x;
145
146
147 my $c = 0;
148 for my $test (
149 # Test structure:
150 # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+ ]
151 [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ],
152 [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ],
153 [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ],
154 [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ],
155 [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ],
156 [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
157 [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
158 [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ],
159 [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ],
160 [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
161 [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
162 [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ],
163 [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ],
164
165 ) { #"#silence vim highlighting
166 $c++;
167 @ctl_n = ();
168 @plus = ();
169 my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0);
170 push @ctl_n, (defined $^N ? $^N : "undef");
171 push @plus, (defined $+ ? $+ : "undef");
172 ok($test->[0] == $match, "match $c");
173 if ($test->[0] != $match) {
174 # unset @ctl_n and @plus
175 @ctl_n = @plus = ();
176 }
de26e0cc
NC
177 is("@ctl_n", $test->[2], "ctl_n $c");
178 is("@plus", $test->[3], "plus $c");
0f289c68
YO
179 }
180 }
181
182 {
0f289c68
YO
183 our $f;
184 local $f;
185 $f = sub {
186 defined $_[0] ? $_[0] : "undef";
187 };
188
4b0d13b9 189 like("123", qr/^(\d)(((??{1 + $^N})))+$/, 'Bug 56194');
0f289c68
YO
190
191 our @ctl_n;
192 our @plus;
193
194 my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#;
195 my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#;
196 my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#;
197 our $re5;
198 local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#;
199 my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#;
200 my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#;
201 my $re8 = qr/(\d+)/;
202 my $c = 0;
203 for my $test (
204 # Test structure:
205 # [
206 # String to match
207 # Regex too match
208 # Expected values of $^N
209 # Expected values of $+
210 # Expected values of $1, $2, $3, $4 and $5
211 # ]
212 [
213 "1233",
214 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#,
215 "1 2 3 3",
216 "1 2 3 3",
217 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
218 ],
219 [
220 "1233",
221 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#,
222 "1 2 3 3",
223 "1 2 3 3",
224 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
225 ],
226 [
227 "1233",
228 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#,
229 "1 2 3 3",
230 "1 2 3 3",
231 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
232 ],
233 [
234 "1233",
235 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#,
236 "1 2 3 3",
237 "1 2 3 3",
238 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
239 ],
240 [
241 "1233",
242 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#,
243 "1 2 3 3",
244 "1 2 3 3",
245 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef",
246 ],
247 [
248 "123abc3",
249 qr#^($re)(|a(b)c|def)(??{$^R})$#,
250 "1 2 3 abc",
251 "1 2 3 b",
252 "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
253 ],
254 [
255 "123abc3",
256 qr#^($re2)$#,
257 "1 2 3 123abc3",
258 "1 2 3 b",
259 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
260 ],
261 [
262 "123abc3",
263 qr#^($re3)$#,
264 "1 2 123abc3",
265 "1 2 b",
266 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b",
267 ],
268 [
269 "123abc3",
270 qr#^(??{$re5})(|abc|def)(??{"$^R"})$#,
271 "1 2 abc",
272 "1 2 abc",
273 "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef",
274 ],
275 [
276 "123abc3",
277 qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#,
278 "1 2 abc",
279 "1 2 b",
280 "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef",
281 ],
282 [
283 "1234",
284 qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#,
285 "1234 123 12 1 2 3 1234",
286 "1234 123 12 1 2 3 4",
287 "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4",
288 ],
289 [
290 "1234556",
291 qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#,
292 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56",
293 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5",
294 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56",
295 ],
296 [
297 "12345562",
298 qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#,
299 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62",
300 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2",
301 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5",
302 ],
303 ) {
304 $c++;
305 @ctl_n = ();
306 @plus = ();
307 undef $^R;
308 my $match = $test->[0] =~ $test->[1];
309 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5));
310 push @ctl_n, $f->($^N);
311 push @plus, $f->($+);
bb535cf1 312 ok($match, "match $c; Bug 56194");
0f289c68
YO
313 if (not $match) {
314 # unset $str, @ctl_n and @plus
315 $str = "";
316 @ctl_n = @plus = ();
317 }
bb535cf1
NC
318 is("@ctl_n", $test->[2], "ctl_n $c; Bug 56194");
319 is("@plus", $test->[3], "plus $c; Bug 56194");
320 is($str, $test->[4], "str $c; Bug 56194");
0f289c68 321 }
0f289c68 322
1008bb63 323 {
0f289c68
YO
324 @ctl_n = ();
325 @plus = ();
326
327 our $re4;
328 local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#;
329 undef $^R;
330 my $match = "123abc3" =~ m/^(??{$re4})$/;
331 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R));
332 push @ctl_n, $f->($^N);
333 push @plus, $f->($+);
4b0d13b9 334 ok($match, 'Bug 56194');
0f289c68
YO
335 if (not $match) {
336 # unset $str
337 @ctl_n = ();
338 @plus = ();
339 $str = "";
340 }
4b0d13b9
NC
341 is("@ctl_n", "1 2 undef", 'Bug 56194');
342 is("@plus", "1 2 undef", 'Bug 56194');
343 is($str,
344 "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = undef",
345 'Bug 56194');
0f289c68
YO
346 }
347 }
348
d3cec5e5
DM
349 {
350 # re evals within \U, \Q etc shouldn't be seen by the lexer
351 local our $a = "i";
352 local our $B = "J";
353 ok('(?{1})' =~ /^\Q(?{1})\E$/, '\Q(?{1})\E');
354 ok('(?{1})' =~ /^\Q(?{\E1\}\)$/, '\Q(?{\E1\}\)');
1008bb63
DM
355 eval {/^\U(??{"$a\Ea"})$/ }; norun('^\U(??{"$a\Ea"})$ norun');
356 eval {/^\L(??{"$B\Ea"})$/ }; norun('^\L(??{"$B\Ea"})$ norun');
d3cec5e5
DM
357 use re 'eval';
358 ok('Ia' =~ /^\U(??{"$a\Ea"})$/, '^\U(??{"$a\Ea"})$');
359 ok('ja' =~ /^\L(??{"$B\Ea"})$/, '^\L(??{"$B\Ea"})$');
360 }
361
a39c66b9
DM
362 {
363 # Comprehensive (hopefully) tests of closure behaviour:
364 # i.e. when do (?{}) blocks get (re)compiled, and what instances
365 # of lexical vars do they close over?
366
947535e3
DM
367 # if the pattern string gets utf8 upgraded while concatenating,
368 # make sure a literal code block is still detected (by still
369 # compiling in the absence of use re 'eval')
370
371 {
372 my $s1 = "\x{80}";
373 my $s2 = "\x{100}";
374 ok("\x{80}\x{100}" =~ /^$s1(?{1})$s2$/, "utf8 upgrade");
375 }
376
a39c66b9
DM
377 my ($cr1, $cr2, $cr3, $cr4);
378
a39c66b9
DM
379 for my $x (qw(a b c)) {
380 my $bc = ($x ne 'a');
2bd8e0da 381 my $c80 = chr(0x80);
a39c66b9
DM
382
383 # the most basic: literal code should be in same scope
384 # as the parent
385
2bd8e0da
DM
386 ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code");
387 ok("\x{100}$x" =~ /^\x{100}(??{$x})$/, "[$x] literal code UTF8");
a39c66b9
DM
388
389 # the "don't recompile if pattern unchanged" mechanism
390 # shouldn't apply to code blocks - recompile every time
391 # to pick up new instances of variables
392
2bd8e0da
DM
393 my $code1 = 'B(??{$x})';
394 my $code1u = $c80 . "\x{100}" . '(??{$x})';
1008bb63
DM
395
396 eval {/^A$code1$/};
397 norun("[$x] unvarying runtime code AA norun");
398 eval {/^A$code1u$/};
399 norun("[$x] unvarying runtime code AU norun");
400 eval {/^$c80\x{100}$code1$/};
401 norun("[$x] unvarying runtime code UA norun");
402 eval {/^$c80\x{101}$code1u$/};
403 norun("[$x] unvarying runtime code UU norun");
404
405 {
406 use re 'eval';
407 ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA");
408 ok("A$c80\x{100}$x" =~ /^A$code1u$/,
409 "[$x] unvarying runtime code AU");
410 ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/,
411 "[$x] unvarying runtime code UA");
412 ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/,
413 "[$x] unvarying runtime code UU");
414 }
a39c66b9
DM
415
416 # mixed literal and run-time code blocks
417
2bd8e0da
DM
418 my $code2 = 'B(??{$x})';
419 my $code2u = $c80 . "\x{100}" . '(??{$x})';
1008bb63
DM
420
421 eval {/^A(??{$x})-$code2$/};
422 norun("[$x] literal+runtime AA norun");
423 eval {/^A(??{$x})-$code2u$/};
424 norun("[$x] literal+runtime AU norun");
425 eval {/^$c80\x{100}(??{$x})-$code2$/};
426 norun("[$x] literal+runtime UA norun");
427 eval {/^$c80\x{101}(??{$x})-$code2u$/};
428 norun("[$x] literal+runtime UU norun");
429
430 {
431 use re 'eval';
432 ok("A$x-B$x" =~ /^A(??{$x})-$code2$/,
433 "[$x] literal+runtime AA");
434 ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/,
435 "[$x] literal+runtime AU");
436 ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/,
437 "[$x] literal+runtime UA");
438 ok("$c80\x{101}$x-$c80\x{100}$x"
439 =~ /^$c80\x{101}(??{$x})-$code2u$/,
440 "[$x] literal+runtime UU");
441 }
fdfac248 442
a39c66b9
DM
443 # literal qr code only created once, naked
444
445 $cr1 //= qr/^A(??{$x})$/;
d63c20f2 446 ok("Aa" =~ $cr1, "[$x] literal qr once naked");
a39c66b9
DM
447
448 # literal qr code only created once, embedded with text
449
450 $cr2 //= qr/B(??{$x})$/;
346d3070 451 ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text");
a39c66b9
DM
452
453 # literal qr code only created once, embedded with text + lit code
454
455 $cr3 //= qr/C(??{$x})$/;
346d3070 456 ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/,
a39c66b9
DM
457 "[$x] literal qr once embedded text + lit code");
458
459 # literal qr code only created once, embedded with text + run code
460
461 $cr4 //= qr/C(??{$x})$/;
462 my $code3 = 'A(??{$x})';
fbf5ab7b 463
1008bb63
DM
464 eval {/^$code3-B$cr4/};
465 norun("[$x] literal qr once embedded text + run code norun");
466 {
467 use re 'eval';
468 ok("A$x-BCa" =~ /^$code3-B$cr4/,
a39c66b9 469 "[$x] literal qr once embedded text + run code");
1008bb63 470 }
a39c66b9
DM
471
472 # literal qr code, naked
473
474 my $r1 = qr/^A(??{$x})$/;
d63c20f2 475 ok("A$x" =~ $r1, "[$x] literal qr naked");
a39c66b9
DM
476
477 # literal qr code, embedded with text
478
479 my $r2 = qr/B(??{$x})$/;
629cd4f3 480 ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text");
a39c66b9
DM
481
482 # literal qr code, embedded with text + lit code
483
484 my $r3 = qr/C(??{$x})$/;
629cd4f3 485 ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/,
a39c66b9
DM
486 "[$x] literal qr embedded text + lit code");
487
488 # literal qr code, embedded with text + run code
489
490 my $r4 = qr/C(??{$x})$/;
491 my $code4 = '(??{$x})';
fbf5ab7b 492
1008bb63
DM
493 eval {/^A$code4-B$r4/};
494 norun("[$x] literal qr embedded text + run code");
fbf5ab7b 495 {
1008bb63
DM
496 use re 'eval';
497 ok("A$x-BC$x" =~ /^A$code4-B$r4/,
498 "[$x] literal qr embedded text + run code");
fbf5ab7b
DM
499 }
500
a39c66b9
DM
501 # nested qr in different scopes
502
503 my $code5 = '(??{$x})';
629cd4f3 504 my $r5 = qr/C(??{$x})/;
fbf5ab7b 505
1008bb63
DM
506 my $r6;
507 eval {qr/$code5-C(??{$x})/}; norun("r6 norun");
508 {
509 use re 'eval';
510 $r6 = qr/$code5-C(??{$x})/;
511 }
a39c66b9
DM
512
513 my @rr5;
514 my @rr6;
515
516 for my $y (qw(d e f)) {
517
518 my $rr5 = qr/^A(??{"$x$y"})-$r5/;
519 push @rr5, $rr5;
629cd4f3 520 ok("A$x$y-C$x" =~ $rr5,
a39c66b9
DM
521 "[$x-$y] literal qr + r5");
522
523 my $rr6 = qr/^A(??{"$x$y"})-$r6/;
524 push @rr6, $rr6;
629cd4f3 525 ok("A$x$y-$x-C$x" =~ $rr6,
a39c66b9
DM
526 "[$x-$y] literal qr + r6");
527 }
528
529 for my $i (0,1,2) {
530 my $y = 'Y';
531 my $yy = (qw(d e f))[$i];
532 my $rr5 = $rr5[$i];
629cd4f3
DM
533 ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside");
534 ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/,
a39c66b9
DM
535 "[$x-$yy] literal qr + r5 + lit, outside");
536
629cd4f3 537
a39c66b9
DM
538 my $rr6 = $rr6[$i];
539 push @rr6, $rr6;
629cd4f3 540 ok("A$x$yy-$x-C$x" =~ $rr6,
a39c66b9 541 "[$x-$yy] literal qr + r6, outside");
629cd4f3 542 ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
a39c66b9
DM
543 "[$x-$yy] literal qr + r6 +lit, outside");
544 }
545 }
68e2671b
DM
546
547 # recursive subs should get lexical from the correct pad depth
548
549 sub recurse {
550 my ($n) = @_;
551 return if $n > 2;
552 ok("A$n" =~ /^A(??{$n})$/, "recurse($n)");
553 recurse($n+1);
554 }
555 recurse(0);
d63c20f2
DM
556
557 # for qr// containing run-time elements but with a compile-time
558 # code block, make sure the run-time bits are executed in the same
559 # pad they were compiled in
560 {
561 my $a = 'a'; # ensure outer and inner pads don't align
562 my $b = 'b';
563 my $c = 'c';
564 my $d = 'd';
565 my $r = qr/^$b(??{$c})$d$/;
566 ok("bcd" =~ $r, "qr with run-time elements and code block");
567 }
568
b30fcab9
DM
569 # check that cascaded embedded regexes all see their own lexical
570 # environment
571
572 {
573 my ($r1, $r2, $r3, $r4);
574 my ($x1, $x2, $x3, $x4) = (5,6,7,8);
575 { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
576 { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; }
577 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
578 { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; }
579 ok("A1234" =~ /^$r4$/, "cascaded qr");
580 }
581
582 # and again, but in a loop, with no external references
583 # being maintained to the qr's
584
585 {
586 my $r = 'A';
587 for my $x (1..4) {
588 $r = qr/$r(??{$x})/;
589 }
590 my $x = 5;
591 ok("A1234" =~ /^$r$/, "cascaded qr loop");
592 }
593
594
595 # and again, but compiling the qrs in an eval so there
596 # aren't even refs to the qrs from any ops
597
598 {
599 my $r = 'A';
600 for my $x (1..4) {
601 $r = eval q[ qr/$r(??{$x})/; ];
602 }
603 my $x = 5;
604 ok("A1234" =~ /^$r$/, "cascaded qr loop");
605 }
606
87c6a48b
DM
607 # have qrs with either literal code blocks or only embedded
608 # code blocks, but not both
609
610 {
611 my ($r1, $r2, $r3, $r4);
612 my ($x1, $x3) = (7,8);
613 { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
614 { $r2 = qr/${r1}2/; }
615 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
616 { $r4 = qr/${r3}4/; }
617 ok("A1234" =~ /^$r4$/, "cascaded qr mix 1");
618 ok("A12345" =~ /^${r4}5$/, "cascaded qr mix 2");
619 ok("A1234" =~ qr/^$r4$/ , "cascaded qr mix 3");
620 ok("A12345" =~ qr/^${r4}5$/, "cascaded qr mix 4");
621 }
622
623 # and make sure things are freed at the right time
624
04e69edb
DM
625 SKIP: {
626 if ($Config{mad}) {
627 skip "MAD doesn't free eval CVs", 3;
628 }
87c6a48b
DM
629
630 {
171982db
DM
631 sub Foo99::DESTROY { $Foo99::d++ }
632 $Foo99::d = 0;
87c6a48b
DM
633 my $r1;
634 {
171982db 635 my $x = bless [1], 'Foo99';
87c6a48b
DM
636 $r1 = eval 'qr/(??{$x->[0]})/';
637 }
638 my $r2 = eval 'qr/a$r1/';
639 my $x = 2;
171982db 640 ok(eval '"a1" =~ qr/^$r2$/', "match while in scope");
87c6a48b
DM
641 # make sure PL_reg_curpm isn't holding on to anything
642 "a" =~ /a(?{1})/;
171982db 643 is($Foo99::d, 0, "before scope exit");
87c6a48b 644 }
171982db 645 ::is($Foo99::d, 1, "after scope exit");
87c6a48b
DM
646 }
647
d63c20f2
DM
648 # forward declared subs should Do The Right Thing with any anon CVs
649 # within them (i.e. pad_fixup_inner_anons() should work)
650
651 sub forward;
652 sub forward {
653 my $x = "a";
654 my $A = "A";
655 ok("Aa" =~ qr/^A(??{$x})$/, "forward qr compiletime");
656 ok("Aa" =~ qr/^$A(??{$x})$/, "forward qr runtime");
657 }
658 forward;
a39c66b9
DM
659 }
660
d24ca0c5
DM
661 # test that run-time embedded code, when re-fed into toker,
662 # does all the right escapes
663
664 {
d24ca0c5
DM
665 my $enc = eval 'use Encode; find_encoding("ascii")';
666
667 my $x = 0;
668 my $y = 'bad';
669
670 # note that most of the strings below are single-quoted, and the
671 # things within them, like '$y', *aren't* intended to interpolate
672
673 my $s1 =
674 'a\\$y(?# (??{BEGIN{$x=1} "X1"})b(?# \Ux2\E)c\'d\\\\e\\\\Uf\\\\E';
675
676 ok(q{a$ybc'd\e\Uf\E} =~ /^$s1$/, "reparse");
677 is($x, 0, "reparse no BEGIN");
678
679 my $s2 = 'g\\$y# (??{{BEGIN{$x=2} "X3"}) \Ux3\E' . "\nh";
680
681 ok(q{a$ybc'd\\e\\Uf\\Eg$yh} =~ /^$s1$s2$/x, "reparse /x");
682 is($x, 0, "reparse /x no BEGIN");
683
684 my $b = '\\';
685 my $q = '\'';
686
687 # non-ascii in string as "<0xNNN>"
688 sub esc_str {
689 my $s = shift;
690 $s =~ s{(.)}{
691 my $c = ord($1);
692 ($c< 32 || $c > 127) ? sprintf("<0x%x>", $c) : $1;
693 }ge;
694 $s;
695 }
696 sub fmt { sprintf "hairy backslashes %s [%s] =~ /^%s/",
697 $_[0], esc_str($_[1]), esc_str($_[2]);
698 }
699
700
701 for my $u (
702 [ '', '', 'blank ' ],
703 [ "\x{100}", '\x{100}', 'single' ],
704 [ "\x{100}", "\x{100}", 'double' ])
705 {
706 for my $pair (
707 [ "$b", "$b$b" ],
708 [ "$q", "$q" ],
709 [ "$b$q", "$b$b$b$q" ],
710 [ "$b$b$q", "$b$b$b$b$q" ],
711 [ "$b$b$b$q", "$b$b$b$b$b$b$q" ],
712 [ "$b$b$b$b$q","$b$b$b$b$b$b$b$b$q" ],
713 ) {
714 my ($s, $r) = @$pair;
715 $s = "9$s";
716 my $ss = "$u->[0]$s";
717
718 my $c = '9' . $r;
719 my $cc = "$u->[1]$c";
1008bb63
DM
720
721 ok($ss =~ /^$cc/, fmt("plain $u->[2]", $ss, $cc));
d24ca0c5
DM
722
723 no strict;
724 my $chr41 = "\x41";
725 $ss = "$u->[0]\t${q}$chr41${b}x42$s";
726 $nine = $nine = "bad";
727 for my $use_qr ('', 'qr') {
728 $c = qq[(??{my \$z='{';]
729 . qq[$use_qr"$b${b}t$b$q$b${b}x41$b$b$b${b}x42"]
730 . qq[. \$nine})];
731 # (??{ qr/str/ }) goes through one less interpolation
732 # stage than (??{ qq/str/ })
733 $c =~ s{\\\\}{\\}g if ($use_qr eq 'qr');
734 $c .= $r;
735 $cc = "$u->[1]$c";
736 my $nine = 9;
1008bb63
DM
737
738 eval {/^$cc/}; norun(fmt("code norun $u->[2]", $ss, $cc));
739 {
740 use re 'eval';
741 ok($ss =~ /^$cc/, fmt("code $u->[2]", $ss, $cc));
742 }
743
d24ca0c5
DM
744 {
745 # Poor man's "use encoding 'ascii'".
746 # This causes a different code path in S_const_str()
747 # to be used
748 local ${^ENCODING} = $enc;
1008bb63
DM
749 use re 'eval';
750 ok($ss =~ /^$cc/, fmt("encode $u->[2]", $ss, $cc));
d24ca0c5
DM
751 }
752 }
753 }
754 }
755
756 my $code1u = "(??{qw(\x{100})})";
1008bb63
DM
757 eval {/^$code1u$/}; norun("reparse embeded unicode norun");
758 {
759 use re 'eval';
760 ok("\x{100}" =~ /^$code1u$/, "reparse embeded unicode");
761 }
d24ca0c5
DM
762 }
763
764 # a non-pattern literal won't get code blocks parsed at compile time;
765 # but they must get parsed later on if 'use re eval' is in scope
766 # also check that unbalanced {}'s are parsed ok
767
768 {
1008bb63
DM
769 eval q["a{" =~ '^(??{"a{"})$'];
770 norun("non-pattern literal code norun");
771 eval {/^${\'(??{"a{"})'}$/};
772 norun("runtime code with unbalanced {} norun");
773
d24ca0c5 774 use re 'eval';
2f71aec6
DM
775 ok("a{" =~ '^a(??{"{"})$', "non-pattern literal code");
776 ok("a{" =~ /^a${\'(??{"{"})'}$/, "runtime code with unbalanced {}");
d24ca0c5
DM
777 }
778
2e2e3f36
DM
779 # make sure warnings come from the right place
780
781 {
782 use warnings;
783 my ($s, $t, $w);
784 local $SIG{__WARN__} = sub { $w .= "@_" };
785
786 $w = ''; $s = 's';
787 my $r = qr/(?{$t=$s+1})/;
788 "a" =~ /a$r/;
789 like($w, qr/pat_re_eval/, "warning main file");
790
791 # do it in an eval to get predictable line numbers
792 eval q[
793
794 $r = qr/(?{$t=$s+1})/;
795 ];
796 $w = ''; $s = 's';
797 "a" =~ /a$r/;
798 like($w, qr/ at \(eval \d+\) line 3/, "warning eval A");
799
800 $w = ''; $s = 's';
801 eval q[
802 use re 'eval';
803 my $c = '(?{$t=$s+1})';
804 "a" =~ /a$c/;
805 1;
806 ];
807 like($w, qr/ at \(eval \d+\) line 1/, "warning eval B");
808 }
809
81ed78b2
DM
810 # jumbo test for:
811 # * recursion;
812 # * mixing all the different types of blocks (literal, qr/literal/,
813 # runtime);
814 # * backtracking (the Z+ alternation ensures CURLYX and full
815 # scope popping on backtracking)
816
817 {
818 sub recurse2 {
819 my ($depth)= @_;
820 return unless $depth;
821 my $s1 = '3-LMN';
822 my $r1 = qr/(??{"$s1-$depth"})/;
823
824 my $s2 = '4-PQR';
825 my $c1 = '(??{"$s2-$depth"})';
826 use re 'eval';
827 ok( "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>"
828 . "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>"
829 =~
830 /^<(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>
831 <(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>$/x,
832 "recurse2($depth)");
833 recurse2($depth-1);
834 }
835 recurse2(5);
836 }
837
74088413
DM
838 # nested (??{}) called from various levels of a recursive function
839
840 {
841 sub recurse3 {
842 my ($n) = @_;
843 return if $n > 3;
844 ok("A$n" =~ m{^A(??{ "0123" =~ /((??{$n}))/; $1 })$},
845 "recurse3($n)");
846 ok("A$n" !~ m{^A(??{ "0123" =~ /((??{$n}))/; "X" })$},
847 "recurse3($n) nomatch");
848 recurse3($n+1);
849 }
850 recurse3(0);
851 }
852
853 # nested (??{}) being invoked recursively via a function
854
855 {
856 my $s = '';
857 our $recurse4;
858 my @alpha = qw(A B C D E);
859 $recurse4 = sub {
860 my ($n) = @_;
861 $s .= "(n=$n:";
862 if ($n < 4) {
863 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~
864 m{^([A-Z])
865 (??{
866 $s .= "1=$1:";
867 "$n-0123" =~ m{^(\d)-(((??{$recurse4->($n+1)})))};
868 $s .= "i1=$1:<=[$2]";
869 $3; # NB - not stringified
870 })
871 $
872 }x;
873 $s .= "1a=$1:";
874 $s .= $m ? 'M' : '!M';
875 }
876 my $ret = '.*?' . ($n-1);
877 $s .= "<=[$ret])";
878 return $ret;
879 };
880 $recurse4->(0);
881 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])'
882 . 'i1=3:<=[0123]1a=D:M<=[.*?2])i1=2:<=[012]1a=C:M<=[.*?1])'
883 . 'i1=1:<=[01]1a=B:M<=[.*?0])i1=0:<=[0]1a=A:M<=[.*?-1])';
884 is($s, $exp, 'recurse4');
885 }
886
887 # single (??{}) being invoked recursively via a function
888
889 {
890 my $s = '';
891 our $recurse5;
892 my @alpha = qw(A B C D E);
893 $recurse5 = sub {
894 my ($n) = @_;
895 $s .= "(n=$n:";
896 if ($n < 4) {
897 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~
898 m{^([A-Z])
899 ((??{
900 $s .= "1=$1:";
901 $recurse5->($n+1);
902 }))
903 $
904 }x;
905 $s .= "1a=$1:2=$2:";
906 $s .= $m ? 'M' : '!M';
907 }
908 my $ret = '.*?' . ($n-1);
909 $s .= "<=[$ret])";
910 return $ret;
911 };
912 $recurse5->(0);
913 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])'
914 . '1a=D:2=0123:M<=[.*?2])1a=C:2=012:M<=[.*?1])'
915 . '1a=B:2=01:M<=[.*?0])1a=A:2=0:M<=[.*?-1])';
916 is($s, $exp, 'recurse5');
917 }
918
919
81ed78b2
DM
920 # make sure that errors during compiling run-time code get trapped
921
922 {
923 use re 'eval';
924
925 my $code = '(?{$x=})';
926 eval { "a" =~ /^a$code/ };
927 like($@, qr/syntax error at \(eval \d+\) line \d+/, 'syntax error');
928
929 $code = '(?{BEGIN{die})';
930 eval { "a" =~ /^a$code/ };
931 like($@,
932 qr/BEGIN failed--compilation aborted at \(eval \d+\) line \d+/,
933 'syntax error');
934 }
935
732caac7
DM
936 # make sure that 'use re eval' is propagated into compiling the
937 # pattern returned by (??{})
938
939 {
940 use re 'eval';
941 my $pat = 'B(??{1})C';
942 my $A = 'A';
943 # compile-time outer code-block
944 ok("AB1CD" =~ /^A(??{$pat})D$/, "re eval propagated compile-time");
945 # run-time outer code-block
946 ok("AB1CD" =~ /^$A(??{$pat})D$/, "re eval propagated run-time");
947 }
948
197e8e6e
DM
949 # returning a ref to something that had set magic but wasn't
950 # PERL_MAGIC_qr triggered a false positive assertion failure
951 # The test is not so much concerned with it not matching,
952 # as with not failing the assertion
953
954 {
955 ok("a" !~ /^(a)(??{ \$1 })/, '(??{ ref })');
956 }
957
e4bfbed3
DM
958 # make sure the uninit warning from returning an undef var
959 # sees the right var
960
961 {
962 my ($u1, $u2);
963 my $warn = '';
964 local $SIG{__WARN__} = sub { $warn .= $_[0] };
965 $u1 =~ /(??{$u2})/ or die;
966 like($warn, qr/value \$u1 in pattern match.*\n.*value at/, 'uninit');
967 }
968
86464c5b
DM
969 # test that code blocks are called in scalar context
970
971 {
972 my @a = (0);
973 ok("" =~ /^(?{@a})$/, '(?{}) in scalar context');
974 is($^R, 1, '(?{}) in scalar context: $^R');
975 ok("1" =~ /^(??{@a})$/, '(??{}) in scalar context');
976 ok("foo" =~ /^(?(?{@a})foo|bar)$/, '(?(?{})|) in scalar context');
977 }
978
30e92347
DM
979 # BEGIN in compiled blocks shouldn't mess with $1 et al
980
981 {
982 use re 'eval';
983 my $code1 = '(B)(??{ BEGIN { "X" =~ /X/ } $1})(C)';
984 ok("ABBCA" =~ /^(.)(??{$code1})\1$/, '(?{}) BEGIN and $1');
985 my $code2 = '(B)(??{ BEGIN { "X" =~ /X/ } $1 =~ /(.)/ ? $1 : ""})(C)';
986 ok("ABBCA" =~ /^(.)(??{$code2})\1$/, '(?{}) BEGIN and $1 mark 2');
987 }
e4bfbed3 988
41953474
DM
989 # check that the optimiser is applied to code blocks: see if aelem has
990 # been converted to aelemfast
991
992 {
993 my $out;
994 for my $prog (
995 '/(?{$a[0]})/',
996 'q() =~ qr/(?{$a[0]})/',
997 'use re q(eval); q() =~ q{(?{$a[0]})}',
998 'use re q(eval); $c = q{(?{$a[0]})}; /$c/',
999 'use re q(eval); $c = q{(?{$a[0]})}; /(?{1;})$c/',
1000 ) {
1001 $out = runperl(switches => ["-Dt"], prog => $prog, stderr => 1);
1002 like($out, qr/aelemfast|Recompile perl with -DDEBUGGING/,
1003 "optimise: '$prog'");
1004 }
1005 }
1006
d234778b
DM
1007 # [perl #115080]
1008 # Ensure that ?pat? matches exactly once, even when the run-time
1009 # pattern changes, and even when the presence of run-time (?{}) affects
1010 # how and when patterns are recompiled
41953474 1011
d234778b
DM
1012 {
1013 my $m;
1014
1015 $m = '';
1016 for (qw(a a a)) {
1017 $m .= $_ if m?$_?;
1018 }
1019 is($m, 'a', '?pat? with a,a,a');
1020
1021 $m = '';
1022 for (qw(a b c)) {
1023 $m .= $_ if m?$_?;
1024 }
1025 is($m, 'a', '?pat? with a,b,c');
1026
1027 use re 'eval';
1028
1029 $m = '';
1030 for (qw(a a a)) {
1031 my $e = qq[(??{"$_"})];
1032 $m .= $_ if m?$e?;
1033 }
1034 is($m, 'a', '?pat? with (??{a,a,a})');
1035
1036 $m = '';
1037 for (qw(a b c)) {
1038 my $e = qq[(??{"$_"})];
1039 $m .= $_ if m?$e?;
1040 }
1041 is($m, 'a', '?pat? with (??{a,b,c})');
1042 }
41953474 1043
da1f5b59
DM
1044 {
1045 # this code won't actually fail, but it used to fail valgrind,
1046 # so its here just to make sure valgrind doesn't fail again
1047 # While examining the ops of the secret anon sub wrapped around
1048 # the qr//, the pad of the sub was in scope, so cSVOPo_sv
1049 # got the const from the wrong pad. By having lots of $s's
1050 # (aka gvsv(*s), this forces the targs of the consts which have
1051 # been moved to the pad, to have high indices.
1052
1053 sub {
1054 local our $s = "abc";
1055 my $qr = qr/^(?{1})$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s/;
1056 }->();
1057 pass("cSVOPo_sv");
1058 }
1059
1060
81ed78b2 1061
0f289c68
YO
1062} # End of sub run_tests
1063
10641;