This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Replaced 'unlink' with 'unlink_all' in t/op/magic.t
[perl5.git] / t / re / pat.t
CommitLineData
8d063cd8 1#!./perl
8d37f932
DD
2#
3# This is a home for regular expression tests that don't fit into
67a2b8c6 4# the format supported by re/regexp.t. If you want to add a test
ff3f963a
KW
5# that does fit that format, add it to re/re_tests, not here. Tests for \N
6# should be added here because they are treated as single quoted strings
7# there, which means they avoid the lexer which otherwise would look at them.
8d063cd8 8
84281c31
A
9use strict;
10use warnings;
11use 5.010;
12
13
14sub run_tests;
15
9133bbab 16$| = 1;
3568d838 17
8d37f932 18
e4d48cc9
GS
19BEGIN {
20 chdir 't' if -d 't';
9d45b377
YO
21 @INC = ('../lib','.');
22 do "re/ReTest.pl" or die $@;
e4d48cc9 23}
84281c31 24
84281c31 25
5b6010b3 26plan tests => 410; # Update this when adding/deleting tests.
b7a35066 27
9d45b377 28run_tests() unless caller;
b7a35066 29
84281c31
A
30#
31# Tests start here.
32#
33sub run_tests {
0ef3e39e 34
84281c31 35 {
b485d051 36
84281c31 37 my $x = "abc\ndef\n";
fd291da9 38
84281c31
A
39 ok $x =~ /^abc/, qq ["$x" =~ /^abc/];
40 ok $x !~ /^def/, qq ["$x" !~ /^def/];
fd291da9 41
84281c31
A
42 # used to be a test for $*
43 ok $x =~ /^def/m, qq ["$x" =~ /^def/m];
fd291da9 44
84281c31
A
45 nok $x =~ /^xxx/, qq ["$x" =~ /^xxx/];
46 nok $x !~ /^abc/, qq ["$x" !~ /^abc/];
fd291da9 47
84281c31
A
48 ok $x =~ /def/, qq ["$x" =~ /def/];
49 nok $x !~ /def/, qq ["$x" !~ /def/];
4765795a 50
84281c31
A
51 ok $x !~ /.def/, qq ["$x" !~ /.def/];
52 nok $x =~ /.def/, qq ["$x" =~ /.def/];
4765795a 53
84281c31
A
54 ok $x =~ /\ndef/, qq ["$x" =~ /\ndef/];
55 nok $x !~ /\ndef/, qq ["$x" !~ /\ndef/];
56 }
4765795a 57
84281c31
A
58 {
59 $_ = '123';
60 ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/];
61 }
f9969324 62
84281c31
A
63 {
64 $_ = 'aaabbbccc';
65 ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc',
66 qq [\$_ = '$_'; /(a*b*)(c*)/];
67 ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/];
68 nok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/];
69
70 $_ = 'aaabccc';
71 ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/];
72 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/];
73
74 $_ = 'aaaccc';
75 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/];
76 nok /a*b+c*/, qq [\$_ = '$_'; /a*b+c*/];
77
78 $_ = 'abcdef';
79 ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/];
80 ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/];
81 ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|];
82 ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/];
4765795a 83 }
4765795a 84
84281c31
A
85 {
86 # used to be a test for $*
87 ok "ab\ncd\n" =~ /^cd/m, qq ["ab\ncd\n" =~ /^cd/m];
88 }
4765795a 89
84281c31
A
90 {
91 our %XXX = map {($_ => $_)} 123, 234, 345;
92
93 our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3');
94 while ($_ = shift(@XXX)) {
95 my $f = index ($_, 'not') >= 0 ? \&nok : \&ok;
725a61d7 96 my $r = m?(.*)?;
84281c31
A
97 &$f ($r, "?(.*)?");
98 /not/ && reset;
99 if (/not ok 2/) {
100 if ($^O eq 'VMS') {
101 $_ = shift(@XXX);
102 }
103 else {
104 reset 'X';
105 }
106 }
107 }
4765795a 108
84281c31
A
109 SKIP: {
110 if ($^O eq 'VMS') {
111 skip "Reset 'X'", 1;
112 }
113 ok !keys %XXX, "%XXX is empty";
114 }
4765795a 115
84281c31 116 }
4765795a 117
84281c31
A
118 {
119 local $Message = "Test empty pattern";
120 my $xyz = 'xyz';
121 my $cde = 'cde';
122
123 $cde =~ /[^ab]*/;
124 $xyz =~ //;
125 iseq $&, $xyz;
126
127 my $foo = '[^ab]*';
128 $cde =~ /$foo/;
129 $xyz =~ //;
130 iseq $&, $xyz;
131
132 $cde =~ /$foo/;
133 my $null;
134 no warnings 'uninitialized';
135 $xyz =~ /$null/;
136 iseq $&, $xyz;
137
138 $null = "";
139 $xyz =~ /$null/;
140 iseq $&, $xyz;
141 }
4765795a 142
84281c31
A
143 {
144 local $Message = q !Check $`, $&, $'!;
145 $_ = 'abcdefghi';
0f289c68 146 /def/; # optimized up to cmd
84281c31 147 iseq "$`:$&:$'", 'abc:def:ghi';
4765795a 148
84281c31 149 no warnings 'void';
0f289c68 150 /cde/ + 0; # optimized only to spat
84281c31 151 iseq "$`:$&:$'", 'ab:cde:fghi';
4765795a 152
0f289c68 153 /[d][e][f]/; # not optimized
84281c31
A
154 iseq "$`:$&:$'", 'abc:def:ghi';
155 }
4765795a 156
84281c31
A
157 {
158 $_ = 'now is the {time for all} good men to come to.';
159 / {([^}]*)}/;
160 iseq $1, 'time for all', "Match braces";
161 }
4765795a 162
84281c31
A
163 {
164 local $Message = "{N,M} quantifier";
165 $_ = 'xxx {3,4} yyy zzz';
166 ok /( {3,4})/;
167 iseq $1, ' ';
168 ok !/( {4,})/;
169 ok /( {2,3}.)/;
170 iseq $1, ' y';
171 ok /(y{2,3}.)/;
172 iseq $1, 'yyy ';
173 ok !/x {3,4}/;
174 ok !/^xxx {3,4}/;
175 }
4765795a 176
84281c31
A
177 {
178 local $Message = "Test /g";
179 local $" = ":";
180 $_ = "now is the time for all good men to come to.";
181 my @words = /(\w+)/g;
182 my $exp = "now:is:the:time:for:all:good:men:to:come:to";
4765795a 183
84281c31 184 iseq "@words", $exp;
4765795a 185
84281c31
A
186 @words = ();
187 while (/\w+/g) {
188 push (@words, $&);
189 }
190 iseq "@words", $exp;
4765795a 191
84281c31
A
192 @words = ();
193 pos = 0;
194 while (/to/g) {
195 push(@words, $&);
196 }
197 iseq "@words", "to:to";
4765795a 198
84281c31
A
199 pos $_ = 0;
200 @words = /to/g;
201 iseq "@words", "to:to";
202 }
4765795a 203
84281c31
A
204 {
205 $_ = "abcdefghi";
206
207 my $pat1 = 'def';
208 my $pat2 = '^def';
209 my $pat3 = '.def.';
210 my $pat4 = 'abc';
211 my $pat5 = '^abc';
212 my $pat6 = 'abc$';
213 my $pat7 = 'ghi';
214 my $pat8 = '\w*ghi';
215 my $pat9 = 'ghi$';
216
217 my $t1 = my $t2 = my $t3 = my $t4 = my $t5 =
218 my $t6 = my $t7 = my $t8 = my $t9 = 0;
219
220 for my $iter (1 .. 5) {
221 $t1++ if /$pat1/o;
222 $t2++ if /$pat2/o;
223 $t3++ if /$pat3/o;
224 $t4++ if /$pat4/o;
225 $t5++ if /$pat5/o;
226 $t6++ if /$pat6/o;
227 $t7++ if /$pat7/o;
228 $t8++ if /$pat8/o;
229 $t9++ if /$pat9/o;
230 }
231 my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9";
232 iseq $x, '505550555', "Test /o";
233 }
4765795a 234
4765795a 235
84281c31
A
236 SKIP: {
237 my $xyz = 'xyz';
238 ok "abc" =~ /^abc$|$xyz/, "| after \$";
4765795a 239
84281c31
A
240 # perl 4.009 says "unmatched ()"
241 local $Message = '$ inside ()';
4765795a 242
84281c31
A
243 my $result;
244 eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"';
245 iseq $@, "" or skip "eval failed", 1;
246 iseq $result, "abc:bc";
247 }
4765795a 248
4765795a 249
84281c31
A
250 {
251 local $Message = "Scalar /g";
252 $_ = "abcfooabcbar";
253
254 ok /abc/g && $` eq "";
255 ok /abc/g && $` eq "abcfoo";
256 ok !/abc/g;
257
258 local $Message = "Scalar /gi";
259 pos = 0;
260 ok /ABC/gi && $` eq "";
261 ok /ABC/gi && $` eq "abcfoo";
262 ok !/ABC/gi;
263
264 local $Message = "Scalar /g";
265 pos = 0;
266 ok /abc/g && $' eq "fooabcbar";
267 ok /abc/g && $' eq "bar";
268
269 $_ .= '';
270 my @x = /abc/g;
271 iseq @x, 2, "/g reset after assignment";
4765795a 272 }
4765795a 273
84281c31
A
274 {
275 local $Message = '/g, \G and pos';
276 $_ = "abdc";
277 pos $_ = 2;
278 /\Gc/gc;
279 iseq pos $_, 2;
280 /\Gc/g;
281 ok !defined pos $_;
282 }
4765795a 283
84281c31
A
284 {
285 local $Message = '(?{ })';
286 our $out = 1;
287 'abc' =~ m'a(?{ $out = 2 })b';
288 iseq $out, 2;
289
290 $out = 1;
291 'abc' =~ m'a(?{ $out = 3 })c';
292 iseq $out, 1;
293 }
4765795a 294
4765795a 295
84281c31
A
296 {
297 $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6';
298 my @out = /(?<!foo)bar./g;
299 iseq "@out", 'bar2 barf', "Negative lookbehind";
300 }
4765795a 301
84281c31
A
302 {
303 local $Message = "REG_INFTY tests";
304 # Tests which depend on REG_INFTY
305 $::reg_infty = $Config {reg_infty} // 32767;
306 $::reg_infty_m = $::reg_infty - 1;
307 $::reg_infty_p = $::reg_infty + 1;
308 $::reg_infty_m = $::reg_infty_m; # Surpress warning.
309
310 # As well as failing if the pattern matches do unexpected things, the
311 # next three tests will fail if you should have picked up a lower-than-
312 # default value for $reg_infty from Config.pm, but have not.
313
314 eval_ok q (('aaa' =~ /(a{1,$::reg_infty_m})/)[0] eq 'aaa');
315 eval_ok q (('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/);
316 eval_ok q (('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/);
317 eval "'aaa' =~ /a{1,$::reg_infty}/";
318 ok $@ =~ /^\QQuantifier in {,} bigger than/;
319 eval "'aaa' =~ /a{1,$::reg_infty_p}/";
320 ok $@ =~ /^\QQuantifier in {,} bigger than/;
4765795a 321 }
8269fa76 322
84281c31
A
323 {
324 # Poke a couple more parse failures
325 my $context = 'x' x 256;
326 eval qq("${context}y" =~ /(?<=$context)y/);
327 ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit";
328 }
8269fa76 329
84281c31
A
330 {
331 # Long Monsters
332 local $Message = "Long monster";
333 for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory
334 my $a = 'a' x $l;
335 local $Error = "length = $l";
336 ok "ba$a=" =~ /a$a=/;
337 nok "b$a=" =~ /a$a=/;
338 ok "b$a=" =~ /ba+=/;
339
84281c31
A
340 ok "ba$a=" =~ /b(?:a|b)+=/;
341 }
342 }
8269fa76 343
b8ef571c 344
84281c31
A
345 {
346 # 20000 nodes, each taking 3 words per string, and 1 per branch
347 my $long_constant_len = join '|', 12120 .. 32645;
348 my $long_var_len = join '|', 8120 .. 28645;
349 my %ans = ( 'ax13876y25677lbc' => 1,
350 'ax13876y25677mcb' => 0, # not b.
351 'ax13876y35677nbc' => 0, # Num too big
352 'ax13876y25677y21378obc' => 1,
0f289c68 353 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o]
84281c31
A
354 'ax13876y25677y21378y21378kbc' => 1,
355 'ax13876y25677y21378y21378kcb' => 0, # Not b.
356 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs
357 );
358
359 local $Message = "20000 nodes";
360 for (keys %ans) {
361 local $Error = "const-len '$_'";
362 ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o);
363
364 local $Error = "var-len '$_'";
365 ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o);
366 }
b8ef571c 367 }
209a9bc1 368
84281c31
A
369 {
370 local $Message = "Complicated backtracking";
371 $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e";
372 my $expect = "(bla()) ((l)u((e))) (l(e)e)";
373
374 use vars '$c';
375 sub matchit {
376 m/
377 (
378 \(
0f289c68 379 (?{ $c = 1 }) # Initialize
84281c31
A
380 (?:
381 (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop
382 (?!
0f289c68
YO
383 ) # Fail: will unwind one iteration back
384 )
84281c31 385 (?:
0f289c68 386 [^()]+ # Match a big chunk
84281c31
A
387 (?=
388 [()]
0f289c68 389 ) # Do not try to match subchunks
84281c31
A
390 |
391 \(
392 (?{ ++$c })
393 |
394 \)
395 (?{ --$c })
396 )
0f289c68 397 )+ # This may not match with different subblocks
84281c31
A
398 )
399 (?(?{ $c != 0 })
400 (?!
0f289c68
YO
401 ) # Fail
402 ) # Otherwise the chunk 1 may succeed with $c>0
84281c31
A
403 /xg;
404 }
3568d838 405
84281c31
A
406 my @ans = ();
407 my $res;
408 push @ans, $res while $res = matchit;
409 iseq "@ans", "1 1 1";
3568d838 410
84281c31
A
411 @ans = matchit;
412 iseq "@ans", $expect;
3568d838 413
84281c31
A
414 local $Message = "Recursion with (??{ })";
415 our $matched;
416 $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/;
3568d838 417
84281c31
A
418 @ans = my @ans1 = ();
419 push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g;
3568d838 420
84281c31
A
421 iseq "@ans", "1 1 1";
422 iseq "@ans1", $expect;
3568d838 423
84281c31
A
424 @ans = m/$matched/g;
425 iseq "@ans", $expect;
3568d838 426
84281c31 427 }
3568d838 428
84281c31
A
429 {
430 ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/';
431 }
3568d838 432
84281c31 433 {
0f289c68 434 my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad
84281c31
A
435 iseq "@ans", 'a/ b', "Stack may be bad";
436 }
3568d838 437
84281c31
A
438 {
439 local $Message = "Eval-group not allowed at runtime";
440 my $code = '{$blah = 45}';
441 our $blah = 12;
442 eval { /(?$code)/ };
443 ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12;
444
445 for $code ('{$blah = 45}','=xx') {
446 $blah = 12;
447 my $res = eval { "xx" =~ /(?$code)/o };
448 no warnings 'uninitialized';
449 local $Error = "'$@', '$res', '$blah'";
450 if ($code eq '=xx') {
451 ok !$@ && $res;
452 }
453 else {
454 ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12;
455 }
456 }
3568d838 457
84281c31
A
458 $code = '{$blah = 45}';
459 $blah = 12;
460 eval "/(?$code)/";
461 iseq $blah, 45;
3568d838 462
84281c31
A
463 $blah = 12;
464 /(?{$blah = 45})/;
465 iseq $blah, 45;
466 }
3568d838 467
84281c31
A
468 {
469 local $Message = "Pos checks";
470 my $x = 'banana';
471 $x =~ /.a/g;
472 iseq pos ($x), 2;
3568d838 473
84281c31
A
474 $x =~ /.z/gc;
475 iseq pos ($x), 2;
3568d838 476
84281c31
A
477 sub f {
478 my $p = $_[0];
479 return $p;
480 }
3568d838 481
84281c31
A
482 $x =~ /.a/g;
483 iseq f (pos ($x)), 4;
484 }
3568d838 485
84281c31
A
486 {
487 local $Message = 'Checking $^R';
488 our $x = $^R = 67;
489 'foot' =~ /foo(?{$x = 12; 75})[t]/;
490 iseq $^R, 75;
491
492 $x = $^R = 67;
493 'foot' =~ /foo(?{$x = 12; 75})[xy]/;
494 ok $^R eq '67' && $x eq '12';
495
496 $x = $^R = 67;
497 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/;
498 ok $^R eq '79' && $x eq '12';
499 }
3568d838 500
84281c31 501 {
fb85c044
KW
502 iseq qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i';
503 iseq qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s';
504 iseq qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m';
505 iseq qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x';
9de15fec 506 iseq qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism';
fb85c044 507 iseq qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/';
84281c31 508 }
3568d838 509
9de15fec
KW
510 { # Test that charset modifier work, and are interpolated
511 iseq qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier';
512 iseq qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles';
513 iseq qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles';
514 iseq qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles';
515 iseq qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles';
516
517 my $dual = qr/\b\v$/;
518 use locale;
519 my $locale = qr/\b\v$/;
520 iseq $locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale';
521 no locale;
522
523 use feature 'unicode_strings';
524 my $unicode = qr/\b\v$/;
525 iseq $unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings';
526 iseq qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
527 iseq qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings';
528
529 no feature 'unicode_strings';
530 iseq qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings';
531 iseq qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings';
532
533 use locale;
534 iseq qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
535 iseq qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale';
536 }
537
3568d838 538
84281c31
A
539 {
540 local $Message = "Look around";
541 $_ = 'xabcx';
542 SKIP:
543 foreach my $ans ('', 'c') {
544 ok /(?<=(?=a)..)((?=c)|.)/g or skip "Match failed", 1;
545 iseq $1, $ans;
546 }
547 }
3568d838 548
84281c31
A
549 {
550 local $Message = "Empty clause";
551 $_ = 'a';
552 foreach my $ans ('', 'a', '') {
553 ok /^|a|$/g or skip "Match failed", 1;
554 iseq $&, $ans;
555 }
556 }
3568d838 557
84281c31
A
558 {
559 local $Message = "Prefixify";
560 sub prefixify {
561 SKIP: {
562 my ($v, $a, $b, $res) = @_;
563 ok $v =~ s/\Q$a\E/$b/ or skip "Match failed", 1;
564 iseq $v, $res;
565 }
566 }
3568d838 567
84281c31
A
568 prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch');
569 prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch');
570 }
3568d838 571
84281c31
A
572 {
573 $_ = 'var="foo"';
574 /(\")/;
575 ok $1 && /$1/, "Capture a quote";
576 }
3568d838 577
84281c31 578 {
84281c31
A
579 no warnings 'closure';
580 local $Message = '(?{ $var } refers to package vars';
581 package aa;
582 our $c = 2;
583 $::c = 3;
584 '' =~ /(?{ $c = 4 })/;
585 main::iseq $c, 4;
586 main::iseq $::c, 3;
587 }
3568d838 588
84281c31
A
589 {
590 must_die 'q(a:[b]:) =~ /[x[:foo:]]/',
591 'POSIX class \[:[^:]+:\] unknown in regex',
592 'POSIX class [: :] must have valid name';
593
594 for my $d (qw [= .]) {
595 must_die "/[[${d}foo${d}]]/",
596 "\QPOSIX syntax [$d $d] is reserved for future extensions",
597 "POSIX syntax [[$d $d]] is an error";
598 }
599 }
3568d838 600
3568d838 601
84281c31
A
602 {
603 # test if failure of patterns returns empty list
604 local $Message = "Failed pattern returns empty list";
605 $_ = 'aaa';
606 @_ = /bbb/;
607 iseq "@_", "";
3568d838 608
84281c31
A
609 @_ = /bbb/g;
610 iseq "@_", "";
a72deede 611
84281c31
A
612 @_ = /(bbb)/;
613 iseq "@_", "";
a72deede 614
84281c31
A
615 @_ = /(bbb)/g;
616 iseq "@_", "";
617 }
a72deede 618
0f289c68 619
84281c31
A
620 {
621 local $Message = '@- and @+ tests';
622
623 /a(?=.$)/;
624 iseq $#+, 0;
625 iseq $#-, 0;
626 iseq $+ [0], 2;
627 iseq $- [0], 1;
628 ok !defined $+ [1] && !defined $- [1] &&
629 !defined $+ [2] && !defined $- [2];
630
631 /a(a)(a)/;
632 iseq $#+, 2;
633 iseq $#-, 2;
634 iseq $+ [0], 3;
635 iseq $- [0], 0;
636 iseq $+ [1], 2;
637 iseq $- [1], 1;
638 iseq $+ [2], 3;
639 iseq $- [2], 2;
640 ok !defined $+ [3] && !defined $- [3] &&
641 !defined $+ [4] && !defined $- [4];
642
643
644 /.(a)(b)?(a)/;
645 iseq $#+, 3;
646 iseq $#-, 3;
647 iseq $+ [1], 2;
648 iseq $- [1], 1;
649 iseq $+ [3], 3;
650 iseq $- [3], 2;
651 ok !defined $+ [2] && !defined $- [2] &&
652 !defined $+ [4] && !defined $- [4];
653
654
655 /.(a)/;
656 iseq $#+, 1;
657 iseq $#-, 1;
658 iseq $+ [0], 2;
659 iseq $- [0], 0;
660 iseq $+ [1], 2;
661 iseq $- [1], 1;
662 ok !defined $+ [2] && !defined $- [2] &&
663 !defined $+ [3] && !defined $- [3];
664
665 /.(a)(ba*)?/;
666 iseq $#+, 2;
667 iseq $#-, 1;
668 }
a72deede 669
a72deede 670
84281c31
A
671 {
672 local $DiePattern = '^Modification of a read-only value attempted';
673 local $Message = 'Elements of @- and @+ are read-only';
674 must_die '$+[0] = 13';
675 must_die '$-[0] = 13';
676 must_die '@+ = (7, 6, 5)';
677 must_die '@- = qw (foo bar)';
678 }
a72deede 679
a72deede 680
84281c31
A
681 {
682 local $Message = '\G testing';
683 $_ = 'aaa';
684 pos = 1;
685 my @a = /\Ga/g;
686 iseq "@a", "a a";
687
688 my $str = 'abcde';
689 pos $str = 2;
690 ok $str !~ /^\G/;
691 ok $str !~ /^.\G/;
692 ok $str =~ /^..\G/;
693 ok $str !~ /^...\G/;
694 ok $str =~ /\G../ && $& eq 'cd';
695
696 local $TODO = $running_as_thread;
697 ok $str =~ /.\G./ && $& eq 'bc';
698 }
a72deede 699
569b5e07 700
84281c31
A
701 {
702 local $Message = 'pos inside (?{ })';
703 my $str = 'abcde';
704 our ($foo, $bar);
705 ok $str =~ /b(?{$foo = $_; $bar = pos})c/;
706 iseq $foo, $str;
707 iseq $bar, 2;
708 ok !defined pos ($str);
709
710 undef $foo;
711 undef $bar;
712 pos $str = undef;
713 ok $str =~ /b(?{$foo = $_; $bar = pos})c/g;
714 iseq $foo, $str;
715 iseq $bar, 2;
716 iseq pos ($str), 3;
717
718 $_ = $str;
719 undef $foo;
720 undef $bar;
721 ok /b(?{$foo = $_; $bar = pos})c/;
722 iseq $foo, $str;
723 iseq $bar, 2;
724
725 undef $foo;
726 undef $bar;
727 ok /b(?{$foo = $_; $bar = pos})c/g;
728 iseq $foo, $str;
729 iseq $bar, 2;
730 iseq pos, 3;
731
732 undef $foo;
733 undef $bar;
734 pos = undef;
735 1 while /b(?{$foo = $_; $bar = pos})c/g;
736 iseq $foo, $str;
737 iseq $bar, 2;
738 ok !defined pos;
739
740 undef $foo;
741 undef $bar;
742 $_ = 'abcde|abcde';
743 ok s/b(?{$foo = $_; $bar = pos})c/x/g;
744 iseq $foo, 'abcde|abcde';
745 iseq $bar, 8;
746 iseq $_, 'axde|axde';
747
748 # List context:
749 $_ = 'abcde|abcde';
750 our @res;
751 () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g;
752 @res = map {defined $_ ? "'$_'" : 'undef'} @res;
753 iseq "@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'";
754
755 @res = ();
756 () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g;
757 @res = map {defined $_ ? "'$_'" : 'undef'} @res;
758 iseq "@res", "'' 'ab' 'cde|abcde' " .
759 "'' 'abc' 'de|abcde' " .
760 "'abcd' 'e|' 'abcde' " .
761 "'abcde|' 'ab' 'cde' " .
762 "'abcde|' 'abc' 'de'" ;
763 }
f33976b4 764
cce850e4 765
84281c31
A
766 {
767 local $Message = '\G anchor checks';
768 my $foo = 'aabbccddeeffgg';
769 pos ($foo) = 1;
770 {
771 local $TODO = $running_as_thread;
772 no warnings 'uninitialized';
773 ok $foo =~ /.\G(..)/g;
774 iseq $1, 'ab';
cce850e4 775
84281c31
A
776 pos ($foo) += 1;
777 ok $foo =~ /.\G(..)/g;
778 iseq $1, 'cc';
cce850e4 779
84281c31
A
780 pos ($foo) += 1;
781 ok $foo =~ /.\G(..)/g;
782 iseq $1, 'de';
cce850e4 783
84281c31
A
784 ok $foo =~ /\Gef/g;
785 }
cce850e4 786
84281c31
A
787 undef pos $foo;
788 ok $foo =~ /\G(..)/g;
789 iseq $1, 'aa';
cce850e4 790
84281c31
A
791 ok $foo =~ /\G(..)/g;
792 iseq $1, 'bb';
cce850e4 793
84281c31
A
794 pos ($foo) = 5;
795 ok $foo =~ /\G(..)/g;
796 iseq $1, 'cd';
797 }
cce850e4 798
cce850e4 799
84281c31
A
800 {
801 $_ = '123x123';
802 my @res = /(\d*|x)/g;
803 local $" = '|';
804 iseq "@res", "123||x|123|", "0 match in alternation";
805 }
cce850e4 806
d9f424b2 807
84281c31
A
808 {
809 local $Message = "Match against temporaries (created via pp_helem())" .
810 " is safe";
811 ok {foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g;
812 iseq $1, "bar";
813 }
75685a94 814
d9f424b2 815
84281c31
A
816 {
817 local $Message = 'package $i inside (?{ }), ' .
818 'saved substrings and changing $_';
819 our @a = qw [foo bar];
820 our @b = ();
821 s/(\w)(?{push @b, $1})/,$1,/g for @a;
822 iseq "@b", "f o o b a r";
823 iseq "@a", ",f,,o,,o, ,b,,a,,r,";
824
825 local $Message = 'lexical $i inside (?{ }), ' .
826 'saved substrings and changing $_';
827 no warnings 'closure';
828 my @c = qw [foo bar];
829 my @d = ();
830 s/(\w)(?{push @d, $1})/,$1,/g for @c;
831 iseq "@d", "f o o b a r";
832 iseq "@c", ",f,,o,,o, ,b,,a,,r,";
d9f424b2
JH
833 }
834
d9f424b2 835
84281c31
A
836 {
837 local $Message = 'Brackets';
838 our $brackets;
839 $brackets = qr {
840 { (?> [^{}]+ | (??{ $brackets }) )* }
841 }x;
842
843 ok "{{}" =~ $brackets;
844 iseq $&, "{}";
845 ok "something { long { and } hairy" =~ $brackets;
846 iseq $&, "{ and }";
847 ok "something { long { and } hairy" =~ m/((??{ $brackets }))/;
848 iseq $&, "{ and }";
849 }
a4c04bdc 850
e2d8ce26 851
84281c31
A
852 {
853 $_ = "a-a\nxbb";
854 pos = 1;
855 nok m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg';
856 }
a4c04bdc 857
a4c04bdc 858
84281c31
A
859 {
860 local $Message = '\G anchor checks';
861 my $text = "aaXbXcc";
862 pos ($text) = 0;
863 ok $text !~ /\GXb*X/g;
864 }
a4c04bdc 865
a4c04bdc 866
84281c31
A
867 {
868 $_ = "xA\n" x 500;
869 nok /^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"';
a4c04bdc 870
84281c31
A
871 my $text = "abc dbf";
872 my @res = ($text =~ /.*?(b).*?\b/g);
873 iseq "@res", "b b", '\b is not special';
987aaf07 874 }
a4c04bdc 875
a4c04bdc 876
84281c31
A
877 {
878 local $Message = '\S, [\S], \s, [\s]';
879 my @a = map chr, 0 .. 255;
9d45b377
YO
880 my @b = grep m/\S/, @a;
881 my @c = grep m/[^\s]/, @a;
84281c31
A
882 iseq "@b", "@c";
883
884 @b = grep /\S/, @a;
885 @c = grep /[\S]/, @a;
886 iseq "@b", "@c";
887
888 @b = grep /\s/, @a;
889 @c = grep /[^\S]/, @a;
890 iseq "@b", "@c";
891
892 @b = grep /\s/, @a;
893 @c = grep /[\s]/, @a;
894 iseq "@b", "@c";
895 }
896 {
897 local $Message = '\D, [\D], \d, [\d]';
898 my @a = map chr, 0 .. 255;
899 my @b = grep /\D/, @a;
900 my @c = grep /[^\d]/, @a;
901 iseq "@b", "@c";
902
903 @b = grep /\D/, @a;
904 @c = grep /[\D]/, @a;
905 iseq "@b", "@c";
906
907 @b = grep /\d/, @a;
908 @c = grep /[^\D]/, @a;
909 iseq "@b", "@c";
910
911 @b = grep /\d/, @a;
912 @c = grep /[\d]/, @a;
913 iseq "@b", "@c";
914 }
915 {
916 local $Message = '\W, [\W], \w, [\w]';
917 my @a = map chr, 0 .. 255;
918 my @b = grep /\W/, @a;
919 my @c = grep /[^\w]/, @a;
920 iseq "@b", "@c";
921
922 @b = grep /\W/, @a;
923 @c = grep /[\W]/, @a;
924 iseq "@b", "@c";
925
926 @b = grep /\w/, @a;
927 @c = grep /[^\W]/, @a;
928 iseq "@b", "@c";
929
930 @b = grep /\w/, @a;
931 @c = grep /[\w]/, @a;
932 iseq "@b", "@c";
933 }
a4c04bdc 934
a4c04bdc 935
84281c31
A
936 {
937 # see if backtracking optimization works correctly
938 local $Message = 'Backtrack optimization';
939 ok "\n\n" =~ /\n $ \n/x;
940 ok "\n\n" =~ /\n* $ \n/x;
941 ok "\n\n" =~ /\n+ $ \n/x;
942 ok "\n\n" =~ /\n? $ \n/x;
943 ok "\n\n" =~ /\n*? $ \n/x;
944 ok "\n\n" =~ /\n+? $ \n/x;
945 ok "\n\n" =~ /\n?? $ \n/x;
946 ok "\n\n" !~ /\n*+ $ \n/x;
947 ok "\n\n" !~ /\n++ $ \n/x;
948 ok "\n\n" =~ /\n?+ $ \n/x;
949 }
a4c04bdc 950
a4c04bdc 951
84281c31
A
952 {
953 package S;
954 use overload '""' => sub {'Object S'};
955 sub new {bless []}
0f289c68 956
9d45b377 957 local $::Message = "Ref stringification";
84281c31
A
958 ::ok do { \my $v} =~ /^SCALAR/, "Scalar ref stringification";
959 ::ok do {\\my $v} =~ /^REF/, "Ref ref stringification";
960 ::ok [] =~ /^ARRAY/, "Array ref stringification";
961 ::ok {} =~ /^HASH/, "Hash ref stringification";
962 ::ok 'S' -> new =~ /^Object S/, "Object stringification";
963 }
a4c04bdc 964
a4c04bdc 965
84281c31
A
966 {
967 local $Message = "Test result of match used as match";
968 ok 'a1b' =~ ('xyz' =~ /y/);
969 iseq $`, 'a';
970 ok 'a1b' =~ ('xyz' =~ /t/);
971 iseq $`, 'a';
972 }
a4c04bdc 973
a4c04bdc 974
84281c31
A
975 {
976 local $Message = '"1" is not \s';
977 may_not_warn sub {ok ("1\n" x 102) !~ /^\s*\n/m};
978 }
a4c04bdc 979
a4c04bdc 980
84281c31
A
981 {
982 local $Message = '\s, [[:space:]] and [[:blank:]]';
983 my %space = (spc => " ",
984 tab => "\t",
985 cr => "\r",
986 lf => "\n",
987 ff => "\f",
988 # There's no \v but the vertical tabulator seems miraculously
989 # be 11 both in ASCII and EBCDIC.
990 vt => chr(11),
991 false => "space");
992
993 my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space;
994 my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space;
995 my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space;
996
997 iseq "@space0", "cr ff lf spc tab";
998 iseq "@space1", "cr ff lf spc tab vt";
999 iseq "@space2", "spc tab";
1000 }
a4c04bdc 1001
ff3f963a
KW
1002 {
1003 use charnames ":full";
1004 local $Message = 'Delayed interpolation of \N';
1005 my $r1 = qr/\N{THAI CHARACTER SARA I}/;
1006 my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}";
1007
1008 # Bug #56444
1009 ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/';
1010
1011 # Bug #62056
1012 ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/';
1013
1014 ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"';
1015 ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"';
1016 }
1017
37820adc
KW
1018 {
1019 use charnames ":full";
1020 local $Message = '[perl #74982] Period coming after \N{}';
1021 ok "\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.";
1022 ok "\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.";
1023 }
c9415951
YO
1024 {
1025 my $n= 50;
1026 # this must be a high number and go from 0 to N, as the bug we are looking for doesnt
1027 # seem to be predictable. Slight changes to the test make it fail earlier or later.
1028 foreach my $i (0 .. $n)
1029 {
1030 my $str= "\n" x $i;
1031 ok $str=~/.*\z/, "implict MBOL check string disable does not break things length=$i";
1032 }
1033 }
92f3d482
YO
1034 {
1035 # we are actually testing that we dont die when executing these patterns
1036 use utf8;
1037 my $e = "Böck";
1038 ok(utf8::is_utf8($e),"got a unicode string - rt75680");
1039
1040 ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680");
1041 ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680");
1042 ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680");
1043 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680");
1044 }
1045 {
1046 # we are actually testing that we dont die when executing these patterns
1047 my $e = "B\x{f6}ck";
1048 ok(!utf8::is_utf8($e), "got a latin string - rt75680");
1049
1050 ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680");
1051 ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680");
1052 ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680");
1053 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680");
1054 }
c920e018
A
1055
1056 {
1057 #
1058 # Tests for bug 77414.
1059 #
1060
1061 local $Message = '\p property after empty * match';
1062 {
1063 local $TODO = "Bug 77414";
1064 ok "1" =~ /\s*\pN/;
1065 ok "-" =~ /\s*\p{Dash}/;
1066 ok " " =~ /\w*\p{Blank}/;
1067 }
1068
1069 ok "1" =~ /\s*\pN+/;
1070 ok "-" =~ /\s*\p{Dash}{1}/;
1071 ok " " =~ /\w*\p{Blank}{1,4}/;
1072
1073 }
1074
7c17ea2f
KW
1075 SKIP: { # Some constructs with Latin1 characters cause a utf8 string not
1076 # to match itself in non-utf8
1077 if ($IS_EBCDIC) {
1078 skip "Needs to be customized to run on EBCDIC", 6;
1079 }
634c83a2
KW
1080 my $c = "\xc0";
1081 my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/;
1082 utf8::upgrade($utf8_pattern);
1083 ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8";
d4e0b827 1084 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8";
634c83a2 1085 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not";
d4e0b827 1086 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not";
634c83a2
KW
1087 utf8::upgrade($c);
1088 ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not";
d4e0b827 1089 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not";
634c83a2 1090 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8";
d4e0b827 1091 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8";
634c83a2
KW
1092 }
1093
8cc86590
KW
1094 SKIP: { # Make sure can override the formatting
1095 if ($IS_EBCDIC) {
1096 skip "Needs to be customized to run on EBCDIC", 2;
1097 }
1098 use feature 'unicode_strings';
1099 ok "\xc0" =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/';
1100 ok "\xc0" !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/';
1101 }
1102
704f71be
B
1103 {
1104 # Test that a regex followed by an operator and/or a statement modifier work
1105 # These tests use string-eval so that it reports a clean error when it fails
1106 # (without the string eval the test script might be unparseable)
1107
1108 # Note: these test check the behaviour that currently is valid syntax
1109 # If a new regex modifier is added and a test fails then there is a backwards-compatibilty issue
1110 # Note-2: a new deprecate warning was added for this with commit e6897b1a5db0410e387ccbf677e89fc4a1d8c97a
1111 # which indicate that this syntax will be removed in 5.16.
1112 # When this happens the tests can be removed
1113
1114 no warnings 'syntax';
1115 iseq( eval q#my $r = "a" =~ m/a/lt 2;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by lt");
1116 iseq( eval q#my $r = "a" =~ m/a/le 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by le");
1117 iseq( eval q#my $r = "a" =~ m/a/eq 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by eq");
1118 iseq( eval q#my $r = "a" =~ m/a/ne 0;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by ne");
1119 iseq( eval q#my $r = "a" =~ m/a/and 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by and");
1120 iseq( eval q#my $r = "a" =~ m/a/unless 0;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by unless");
1121 iseq( eval q#my $c = 1; my $r; $r = "a" =~ m/a/while $c--;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by while");
1122 iseq( eval q#my $c = 0; my $r; $r = "a" =~ m/a/until $c++;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by until");
1123 iseq( eval q#my $r; $r = "a" =~ m/a/for 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by for");
1124 iseq( eval q#my $r; $r = "a" =~ m/a/foreach 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by foreach");
1125
1126 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//lt 2;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by lt");
1127 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//le 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by le");
1128 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//ne 0;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by ne");
1129 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//and 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by and");
1130 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//unless 0;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by unless");
1131
1132 iseq( eval q#my $c = 1; my $r; my $t = "a"; $r = $t =~ s/a//while $c--;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by while");
1133 iseq( eval q#my $c = 0; my $r; my $t = "a"; $r = $t =~ s/a//until $c++;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by until");
1134 iseq( eval q#my $r; my $t = "a"; $r = $t =~ s/a//for 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by for");
1135 iseq( eval q#my $r; my $t = "a"; $r = $t =~ s/a//for 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by foreach");
1136 }
1137
5b6010b3
YO
1138 {
1139 my $str= "\x{100}";
1140 chop $str;
1141 my $qr= qr/$str/;
1142 iseq( "$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212" );
1143 $str= "";
1144 $qr= qr/$str/;
1145 iseq( "$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212" )
1146
1147 }
1148
84281c31
A
1149} # End of sub run_tests
1150
11511;