This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Eliminate the global override $DiePattern from t/re{ReTest.pl,pat.t}
[perl5.git] / t / re / pat.t
... / ...
CommitLineData
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t. If you want to add a test
5# that does fit that format, add it to re/re_tests, not here. Tests for \N
6# should be added here because they are treated as single quoted strings
7# there, which means they avoid the lexer which otherwise would look at them.
8
9use strict;
10use warnings;
11use 5.010;
12
13
14sub run_tests;
15
16$| = 1;
17
18
19BEGIN {
20 chdir 't' if -d 't';
21 @INC = ('../lib','.');
22 do "re/ReTest.pl" or die $@;
23}
24
25
26plan tests => 426; # Update this when adding/deleting tests.
27
28run_tests() unless caller;
29
30#
31# Tests start here.
32#
33sub run_tests {
34
35 {
36
37 my $x = "abc\ndef\n";
38
39 ok $x =~ /^abc/, qq ["$x" =~ /^abc/];
40 ok $x !~ /^def/, qq ["$x" !~ /^def/];
41
42 # used to be a test for $*
43 ok $x =~ /^def/m, qq ["$x" =~ /^def/m];
44
45 nok $x =~ /^xxx/, qq ["$x" =~ /^xxx/];
46 nok $x !~ /^abc/, qq ["$x" !~ /^abc/];
47
48 ok $x =~ /def/, qq ["$x" =~ /def/];
49 nok $x !~ /def/, qq ["$x" !~ /def/];
50
51 ok $x !~ /.def/, qq ["$x" !~ /.def/];
52 nok $x =~ /.def/, qq ["$x" =~ /.def/];
53
54 ok $x =~ /\ndef/, qq ["$x" =~ /\ndef/];
55 nok $x !~ /\ndef/, qq ["$x" !~ /\ndef/];
56 }
57
58 {
59 $_ = '123';
60 ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/];
61 }
62
63 {
64 $_ = 'aaabbbccc';
65 ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc',
66 qq [\$_ = '$_'; /(a*b*)(c*)/];
67 ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/];
68 nok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/];
69
70 $_ = 'aaabccc';
71 ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/];
72 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/];
73
74 $_ = 'aaaccc';
75 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/];
76 nok /a*b+c*/, qq [\$_ = '$_'; /a*b+c*/];
77
78 $_ = 'abcdef';
79 ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/];
80 ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/];
81 ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|];
82 ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/];
83 }
84
85 {
86 # used to be a test for $*
87 ok "ab\ncd\n" =~ /^cd/m, qq ["ab\ncd\n" =~ /^cd/m];
88 }
89
90 {
91 our %XXX = map {($_ => $_)} 123, 234, 345;
92
93 our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3');
94 while ($_ = shift(@XXX)) {
95 my $f = index ($_, 'not') >= 0 ? \&nok : \&ok;
96 my $r = m?(.*)?;
97 &$f ($r, "?(.*)?");
98 /not/ && reset;
99 if (/not ok 2/) {
100 if ($^O eq 'VMS') {
101 $_ = shift(@XXX);
102 }
103 else {
104 reset 'X';
105 }
106 }
107 }
108
109 SKIP: {
110 if ($^O eq 'VMS') {
111 skip "Reset 'X'", 1;
112 }
113 ok !keys %XXX, "%XXX is empty";
114 }
115
116 }
117
118 {
119 local $Message = "Test empty pattern";
120 my $xyz = 'xyz';
121 my $cde = 'cde';
122
123 $cde =~ /[^ab]*/;
124 $xyz =~ //;
125 iseq $&, $xyz;
126
127 my $foo = '[^ab]*';
128 $cde =~ /$foo/;
129 $xyz =~ //;
130 iseq $&, $xyz;
131
132 $cde =~ /$foo/;
133 my $null;
134 no warnings 'uninitialized';
135 $xyz =~ /$null/;
136 iseq $&, $xyz;
137
138 $null = "";
139 $xyz =~ /$null/;
140 iseq $&, $xyz;
141 }
142
143 {
144 local $Message = q !Check $`, $&, $'!;
145 $_ = 'abcdefghi';
146 /def/; # optimized up to cmd
147 iseq "$`:$&:$'", 'abc:def:ghi';
148
149 no warnings 'void';
150 /cde/ + 0; # optimized only to spat
151 iseq "$`:$&:$'", 'ab:cde:fghi';
152
153 /[d][e][f]/; # not optimized
154 iseq "$`:$&:$'", 'abc:def:ghi';
155 }
156
157 {
158 $_ = 'now is the {time for all} good men to come to.';
159 / {([^}]*)}/;
160 iseq $1, 'time for all', "Match braces";
161 }
162
163 {
164 local $Message = "{N,M} quantifier";
165 $_ = 'xxx {3,4} yyy zzz';
166 ok /( {3,4})/;
167 iseq $1, ' ';
168 ok !/( {4,})/;
169 ok /( {2,3}.)/;
170 iseq $1, ' y';
171 ok /(y{2,3}.)/;
172 iseq $1, 'yyy ';
173 ok !/x {3,4}/;
174 ok !/^xxx {3,4}/;
175 }
176
177 {
178 local $Message = "Test /g";
179 local $" = ":";
180 $_ = "now is the time for all good men to come to.";
181 my @words = /(\w+)/g;
182 my $exp = "now:is:the:time:for:all:good:men:to:come:to";
183
184 iseq "@words", $exp;
185
186 @words = ();
187 while (/\w+/g) {
188 push (@words, $&);
189 }
190 iseq "@words", $exp;
191
192 @words = ();
193 pos = 0;
194 while (/to/g) {
195 push(@words, $&);
196 }
197 iseq "@words", "to:to";
198
199 pos $_ = 0;
200 @words = /to/g;
201 iseq "@words", "to:to";
202 }
203
204 {
205 $_ = "abcdefghi";
206
207 my $pat1 = 'def';
208 my $pat2 = '^def';
209 my $pat3 = '.def.';
210 my $pat4 = 'abc';
211 my $pat5 = '^abc';
212 my $pat6 = 'abc$';
213 my $pat7 = 'ghi';
214 my $pat8 = '\w*ghi';
215 my $pat9 = 'ghi$';
216
217 my $t1 = my $t2 = my $t3 = my $t4 = my $t5 =
218 my $t6 = my $t7 = my $t8 = my $t9 = 0;
219
220 for my $iter (1 .. 5) {
221 $t1++ if /$pat1/o;
222 $t2++ if /$pat2/o;
223 $t3++ if /$pat3/o;
224 $t4++ if /$pat4/o;
225 $t5++ if /$pat5/o;
226 $t6++ if /$pat6/o;
227 $t7++ if /$pat7/o;
228 $t8++ if /$pat8/o;
229 $t9++ if /$pat9/o;
230 }
231 my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9";
232 iseq $x, '505550555', "Test /o";
233 }
234
235
236 SKIP: {
237 my $xyz = 'xyz';
238 ok "abc" =~ /^abc$|$xyz/, "| after \$";
239
240 # perl 4.009 says "unmatched ()"
241 local $Message = '$ inside ()';
242
243 my $result;
244 eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"';
245 iseq $@, "" or skip "eval failed", 1;
246 iseq $result, "abc:bc";
247 }
248
249
250 {
251 local $Message = "Scalar /g";
252 $_ = "abcfooabcbar";
253
254 ok /abc/g && $` eq "";
255 ok /abc/g && $` eq "abcfoo";
256 ok !/abc/g;
257
258 local $Message = "Scalar /gi";
259 pos = 0;
260 ok /ABC/gi && $` eq "";
261 ok /ABC/gi && $` eq "abcfoo";
262 ok !/ABC/gi;
263
264 local $Message = "Scalar /g";
265 pos = 0;
266 ok /abc/g && $' eq "fooabcbar";
267 ok /abc/g && $' eq "bar";
268
269 $_ .= '';
270 my @x = /abc/g;
271 iseq @x, 2, "/g reset after assignment";
272 }
273
274 {
275 local $Message = '/g, \G and pos';
276 $_ = "abdc";
277 pos $_ = 2;
278 /\Gc/gc;
279 iseq pos $_, 2;
280 /\Gc/g;
281 ok !defined pos $_;
282 }
283
284 {
285 local $Message = '(?{ })';
286 our $out = 1;
287 'abc' =~ m'a(?{ $out = 2 })b';
288 iseq $out, 2;
289
290 $out = 1;
291 'abc' =~ m'a(?{ $out = 3 })c';
292 iseq $out, 1;
293 }
294
295
296 {
297 $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6';
298 my @out = /(?<!foo)bar./g;
299 iseq "@out", 'bar2 barf', "Negative lookbehind";
300 }
301
302 {
303 local $Message = "REG_INFTY tests";
304 # Tests which depend on REG_INFTY
305 $::reg_infty = $Config {reg_infty} // 32767;
306 $::reg_infty_m = $::reg_infty - 1;
307 $::reg_infty_p = $::reg_infty + 1;
308 $::reg_infty_m = $::reg_infty_m; # Suppress warning.
309
310 # As well as failing if the pattern matches do unexpected things, the
311 # next three tests will fail if you should have picked up a lower-than-
312 # default value for $reg_infty from Config.pm, but have not.
313
314 eval_ok q (('aaa' =~ /(a{1,$::reg_infty_m})/)[0] eq 'aaa');
315 eval_ok q (('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/);
316 eval_ok q (('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/);
317 eval "'aaa' =~ /a{1,$::reg_infty}/";
318 ok $@ =~ /^\QQuantifier in {,} bigger than/;
319 eval "'aaa' =~ /a{1,$::reg_infty_p}/";
320 ok $@ =~ /^\QQuantifier in {,} bigger than/;
321 }
322
323 {
324 # Poke a couple more parse failures
325 my $context = 'x' x 256;
326 eval qq("${context}y" =~ /(?<=$context)y/);
327 ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit";
328 }
329
330 {
331 # Long Monsters
332 for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory
333 my $a = 'a' x $l;
334 local $Message = "Long monster, length = $l";
335 ok "ba$a=" =~ /a$a=/;
336 nok "b$a=" =~ /a$a=/;
337 ok "b$a=" =~ /ba+=/;
338
339 ok "ba$a=" =~ /b(?:a|b)+=/;
340 }
341 }
342
343
344 {
345 # 20000 nodes, each taking 3 words per string, and 1 per branch
346 my $long_constant_len = join '|', 12120 .. 32645;
347 my $long_var_len = join '|', 8120 .. 28645;
348 my %ans = ( 'ax13876y25677lbc' => 1,
349 'ax13876y25677mcb' => 0, # not b.
350 'ax13876y35677nbc' => 0, # Num too big
351 'ax13876y25677y21378obc' => 1,
352 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o]
353 'ax13876y25677y21378y21378kbc' => 1,
354 'ax13876y25677y21378y21378kcb' => 0, # Not b.
355 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs
356 );
357
358 for (keys %ans) {
359 local $Message = "20000 nodes, const-len '$_'";
360 ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o);
361
362 $Message = "20000 nodes, var-len '$_'";
363 ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o);
364 }
365 }
366
367 {
368 local $Message = "Complicated backtracking";
369 $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e";
370 my $expect = "(bla()) ((l)u((e))) (l(e)e)";
371
372 use vars '$c';
373 sub matchit {
374 m/
375 (
376 \(
377 (?{ $c = 1 }) # Initialize
378 (?:
379 (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop
380 (?!
381 ) # Fail: will unwind one iteration back
382 )
383 (?:
384 [^()]+ # Match a big chunk
385 (?=
386 [()]
387 ) # Do not try to match subchunks
388 |
389 \(
390 (?{ ++$c })
391 |
392 \)
393 (?{ --$c })
394 )
395 )+ # This may not match with different subblocks
396 )
397 (?(?{ $c != 0 })
398 (?!
399 ) # Fail
400 ) # Otherwise the chunk 1 may succeed with $c>0
401 /xg;
402 }
403
404 my @ans = ();
405 my $res;
406 push @ans, $res while $res = matchit;
407 iseq "@ans", "1 1 1";
408
409 @ans = matchit;
410 iseq "@ans", $expect;
411
412 local $Message = "Recursion with (??{ })";
413 our $matched;
414 $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/;
415
416 @ans = my @ans1 = ();
417 push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g;
418
419 iseq "@ans", "1 1 1";
420 iseq "@ans1", $expect;
421
422 @ans = m/$matched/g;
423 iseq "@ans", $expect;
424
425 }
426
427 {
428 ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/';
429 }
430
431 {
432 my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad
433 iseq "@ans", 'a/ b', "Stack may be bad";
434 }
435
436 {
437 local $Message = "Eval-group not allowed at runtime";
438 my $code = '{$blah = 45}';
439 our $blah = 12;
440 eval { /(?$code)/ };
441 ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12;
442
443 $blah = 12;
444 my $res = eval { "xx" =~ /(?$code)/o };
445 {
446 no warnings 'uninitialized';
447 local $Message = "$Message '$@', '$res', '$blah'";
448 ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12;
449 }
450
451 $code = '=xx';
452 $blah = 12;
453 $res = eval { "xx" =~ /(?$code)/o };
454 {
455 no warnings 'uninitialized';
456 local $Message = "$Message '$@', '$res', '$blah'";
457 ok !$@ && $res;
458 }
459
460 $code = '{$blah = 45}';
461 $blah = 12;
462 eval "/(?$code)/";
463 iseq $blah, 45;
464
465 $blah = 12;
466 /(?{$blah = 45})/;
467 iseq $blah, 45;
468 }
469
470 {
471 local $Message = "Pos checks";
472 my $x = 'banana';
473 $x =~ /.a/g;
474 iseq pos ($x), 2;
475
476 $x =~ /.z/gc;
477 iseq pos ($x), 2;
478
479 sub f {
480 my $p = $_[0];
481 return $p;
482 }
483
484 $x =~ /.a/g;
485 iseq f (pos ($x)), 4;
486 }
487
488 {
489 local $Message = 'Checking $^R';
490 our $x = $^R = 67;
491 'foot' =~ /foo(?{$x = 12; 75})[t]/;
492 iseq $^R, 75;
493
494 $x = $^R = 67;
495 'foot' =~ /foo(?{$x = 12; 75})[xy]/;
496 ok $^R eq '67' && $x eq '12';
497
498 $x = $^R = 67;
499 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/;
500 ok $^R eq '79' && $x eq '12';
501 }
502
503 {
504 iseq qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i';
505 iseq qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s';
506 iseq qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m';
507 iseq qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x';
508 iseq qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism';
509 iseq qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/';
510 }
511
512 { # Test that charset modifier work, and are interpolated
513 iseq qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier';
514 iseq qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles';
515 iseq qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles';
516 iseq qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles';
517 iseq qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles';
518
519 my $dual = qr/\b\v$/;
520 use locale;
521 my $locale = qr/\b\v$/;
522 iseq $locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale';
523 no locale;
524
525 use feature 'unicode_strings';
526 my $unicode = qr/\b\v$/;
527 iseq $unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings';
528 iseq qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
529 iseq qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings';
530
531 no feature 'unicode_strings';
532 iseq qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings';
533 iseq qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings';
534
535 use locale;
536 iseq qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
537 iseq qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale';
538 }
539
540
541 {
542 local $Message = "Look around";
543 $_ = 'xabcx';
544 SKIP:
545 foreach my $ans ('', 'c') {
546 ok /(?<=(?=a)..)((?=c)|.)/g or skip "Match failed", 1;
547 iseq $1, $ans;
548 }
549 }
550
551 {
552 local $Message = "Empty clause";
553 $_ = 'a';
554 foreach my $ans ('', 'a', '') {
555 ok /^|a|$/g or skip "Match failed", 1;
556 iseq $&, $ans;
557 }
558 }
559
560 {
561 local $Message = "Prefixify";
562 sub prefixify {
563 SKIP: {
564 my ($v, $a, $b, $res) = @_;
565 ok $v =~ s/\Q$a\E/$b/ or skip "Match failed", 1;
566 iseq $v, $res;
567 }
568 }
569
570 prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch');
571 prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch');
572 }
573
574 {
575 $_ = 'var="foo"';
576 /(\")/;
577 ok $1 && /$1/, "Capture a quote";
578 }
579
580 {
581 no warnings 'closure';
582 local $Message = '(?{ $var } refers to package vars';
583 package aa;
584 our $c = 2;
585 $::c = 3;
586 '' =~ /(?{ $c = 4 })/;
587 main::iseq $c, 4;
588 main::iseq $::c, 3;
589 }
590
591 {
592 must_die 'q(a:[b]:) =~ /[x[:foo:]]/',
593 qr/POSIX class \[:[^:]+:\] unknown in regex/,
594 'POSIX class [: :] must have valid name';
595
596 for my $d (qw [= .]) {
597 must_die "/[[${d}foo${d}]]/",
598 qr/\QPOSIX syntax [$d $d] is reserved for future extensions/,
599 "POSIX syntax [[$d $d]] is an error";
600 }
601 }
602
603
604 {
605 # test if failure of patterns returns empty list
606 local $Message = "Failed pattern returns empty list";
607 $_ = 'aaa';
608 @_ = /bbb/;
609 iseq "@_", "";
610
611 @_ = /bbb/g;
612 iseq "@_", "";
613
614 @_ = /(bbb)/;
615 iseq "@_", "";
616
617 @_ = /(bbb)/g;
618 iseq "@_", "";
619 }
620
621
622 {
623 local $Message = '@- and @+ tests';
624
625 /a(?=.$)/;
626 iseq $#+, 0;
627 iseq $#-, 0;
628 iseq $+ [0], 2;
629 iseq $- [0], 1;
630 ok !defined $+ [1] && !defined $- [1] &&
631 !defined $+ [2] && !defined $- [2];
632
633 /a(a)(a)/;
634 iseq $#+, 2;
635 iseq $#-, 2;
636 iseq $+ [0], 3;
637 iseq $- [0], 0;
638 iseq $+ [1], 2;
639 iseq $- [1], 1;
640 iseq $+ [2], 3;
641 iseq $- [2], 2;
642 ok !defined $+ [3] && !defined $- [3] &&
643 !defined $+ [4] && !defined $- [4];
644
645 # Exists has a special check for @-/@+ - bug 45147
646 ok exists $-[0];
647 ok exists $+[0];
648 ok exists $-[2];
649 ok exists $+[2];
650 ok !exists $-[3];
651 ok !exists $+[3];
652 ok exists $-[-1];
653 ok exists $+[-1];
654 ok exists $-[-3];
655 ok exists $+[-3];
656 ok !exists $-[-4];
657 ok !exists $+[-4];
658
659 /.(a)(b)?(a)/;
660 iseq $#+, 3;
661 iseq $#-, 3;
662 iseq $+ [1], 2;
663 iseq $- [1], 1;
664 iseq $+ [3], 3;
665 iseq $- [3], 2;
666 ok !defined $+ [2] && !defined $- [2] &&
667 !defined $+ [4] && !defined $- [4];
668
669
670 /.(a)/;
671 iseq $#+, 1;
672 iseq $#-, 1;
673 iseq $+ [0], 2;
674 iseq $- [0], 0;
675 iseq $+ [1], 2;
676 iseq $- [1], 1;
677 ok !defined $+ [2] && !defined $- [2] &&
678 !defined $+ [3] && !defined $- [3];
679
680 /.(a)(ba*)?/;
681 iseq $#+, 2;
682 iseq $#-, 1;
683 }
684
685
686 foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', '@- = qw (foo bar)') {
687 must_die($_, qr/^Modification of a read-only value attempted/,
688 'Elements of @- and @+ are read-only');
689 }
690
691
692 {
693 local $Message = '\G testing';
694 $_ = 'aaa';
695 pos = 1;
696 my @a = /\Ga/g;
697 iseq "@a", "a a";
698
699 my $str = 'abcde';
700 pos $str = 2;
701 ok $str !~ /^\G/;
702 ok $str !~ /^.\G/;
703 ok $str =~ /^..\G/;
704 ok $str !~ /^...\G/;
705 ok $str =~ /\G../ && $& eq 'cd';
706
707 local $TODO = $running_as_thread;
708 ok $str =~ /.\G./ && $& eq 'bc';
709 }
710
711
712 {
713 local $Message = 'pos inside (?{ })';
714 my $str = 'abcde';
715 our ($foo, $bar);
716 ok $str =~ /b(?{$foo = $_; $bar = pos})c/;
717 iseq $foo, $str;
718 iseq $bar, 2;
719 ok !defined pos ($str);
720
721 undef $foo;
722 undef $bar;
723 pos $str = undef;
724 ok $str =~ /b(?{$foo = $_; $bar = pos})c/g;
725 iseq $foo, $str;
726 iseq $bar, 2;
727 iseq pos ($str), 3;
728
729 $_ = $str;
730 undef $foo;
731 undef $bar;
732 ok /b(?{$foo = $_; $bar = pos})c/;
733 iseq $foo, $str;
734 iseq $bar, 2;
735
736 undef $foo;
737 undef $bar;
738 ok /b(?{$foo = $_; $bar = pos})c/g;
739 iseq $foo, $str;
740 iseq $bar, 2;
741 iseq pos, 3;
742
743 undef $foo;
744 undef $bar;
745 pos = undef;
746 1 while /b(?{$foo = $_; $bar = pos})c/g;
747 iseq $foo, $str;
748 iseq $bar, 2;
749 ok !defined pos;
750
751 undef $foo;
752 undef $bar;
753 $_ = 'abcde|abcde';
754 ok s/b(?{$foo = $_; $bar = pos})c/x/g;
755 iseq $foo, 'abcde|abcde';
756 iseq $bar, 8;
757 iseq $_, 'axde|axde';
758
759 # List context:
760 $_ = 'abcde|abcde';
761 our @res;
762 () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g;
763 @res = map {defined $_ ? "'$_'" : 'undef'} @res;
764 iseq "@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'";
765
766 @res = ();
767 () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g;
768 @res = map {defined $_ ? "'$_'" : 'undef'} @res;
769 iseq "@res", "'' 'ab' 'cde|abcde' " .
770 "'' 'abc' 'de|abcde' " .
771 "'abcd' 'e|' 'abcde' " .
772 "'abcde|' 'ab' 'cde' " .
773 "'abcde|' 'abc' 'de'" ;
774 }
775
776
777 {
778 local $Message = '\G anchor checks';
779 my $foo = 'aabbccddeeffgg';
780 pos ($foo) = 1;
781 {
782 local $TODO = $running_as_thread;
783 no warnings 'uninitialized';
784 ok $foo =~ /.\G(..)/g;
785 iseq $1, 'ab';
786
787 pos ($foo) += 1;
788 ok $foo =~ /.\G(..)/g;
789 iseq $1, 'cc';
790
791 pos ($foo) += 1;
792 ok $foo =~ /.\G(..)/g;
793 iseq $1, 'de';
794
795 ok $foo =~ /\Gef/g;
796 }
797
798 undef pos $foo;
799 ok $foo =~ /\G(..)/g;
800 iseq $1, 'aa';
801
802 ok $foo =~ /\G(..)/g;
803 iseq $1, 'bb';
804
805 pos ($foo) = 5;
806 ok $foo =~ /\G(..)/g;
807 iseq $1, 'cd';
808 }
809
810
811 {
812 $_ = '123x123';
813 my @res = /(\d*|x)/g;
814 local $" = '|';
815 iseq "@res", "123||x|123|", "0 match in alternation";
816 }
817
818
819 {
820 local $Message = "Match against temporaries (created via pp_helem())" .
821 " is safe";
822 ok {foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g;
823 iseq $1, "bar";
824 }
825
826
827 {
828 local $Message = 'package $i inside (?{ }), ' .
829 'saved substrings and changing $_';
830 our @a = qw [foo bar];
831 our @b = ();
832 s/(\w)(?{push @b, $1})/,$1,/g for @a;
833 iseq "@b", "f o o b a r";
834 iseq "@a", ",f,,o,,o, ,b,,a,,r,";
835
836 local $Message = 'lexical $i inside (?{ }), ' .
837 'saved substrings and changing $_';
838 no warnings 'closure';
839 my @c = qw [foo bar];
840 my @d = ();
841 s/(\w)(?{push @d, $1})/,$1,/g for @c;
842 iseq "@d", "f o o b a r";
843 iseq "@c", ",f,,o,,o, ,b,,a,,r,";
844 }
845
846
847 {
848 local $Message = 'Brackets';
849 our $brackets;
850 $brackets = qr {
851 { (?> [^{}]+ | (??{ $brackets }) )* }
852 }x;
853
854 ok "{{}" =~ $brackets;
855 iseq $&, "{}";
856 ok "something { long { and } hairy" =~ $brackets;
857 iseq $&, "{ and }";
858 ok "something { long { and } hairy" =~ m/((??{ $brackets }))/;
859 iseq $&, "{ and }";
860 }
861
862
863 {
864 $_ = "a-a\nxbb";
865 pos = 1;
866 nok m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg';
867 }
868
869
870 {
871 local $Message = '\G anchor checks';
872 my $text = "aaXbXcc";
873 pos ($text) = 0;
874 ok $text !~ /\GXb*X/g;
875 }
876
877
878 {
879 $_ = "xA\n" x 500;
880 nok /^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"';
881
882 my $text = "abc dbf";
883 my @res = ($text =~ /.*?(b).*?\b/g);
884 iseq "@res", "b b", '\b is not special';
885 }
886
887
888 {
889 local $Message = '\S, [\S], \s, [\s]';
890 my @a = map chr, 0 .. 255;
891 my @b = grep m/\S/, @a;
892 my @c = grep m/[^\s]/, @a;
893 iseq "@b", "@c";
894
895 @b = grep /\S/, @a;
896 @c = grep /[\S]/, @a;
897 iseq "@b", "@c";
898
899 @b = grep /\s/, @a;
900 @c = grep /[^\S]/, @a;
901 iseq "@b", "@c";
902
903 @b = grep /\s/, @a;
904 @c = grep /[\s]/, @a;
905 iseq "@b", "@c";
906 }
907 {
908 local $Message = '\D, [\D], \d, [\d]';
909 my @a = map chr, 0 .. 255;
910 my @b = grep /\D/, @a;
911 my @c = grep /[^\d]/, @a;
912 iseq "@b", "@c";
913
914 @b = grep /\D/, @a;
915 @c = grep /[\D]/, @a;
916 iseq "@b", "@c";
917
918 @b = grep /\d/, @a;
919 @c = grep /[^\D]/, @a;
920 iseq "@b", "@c";
921
922 @b = grep /\d/, @a;
923 @c = grep /[\d]/, @a;
924 iseq "@b", "@c";
925 }
926 {
927 local $Message = '\W, [\W], \w, [\w]';
928 my @a = map chr, 0 .. 255;
929 my @b = grep /\W/, @a;
930 my @c = grep /[^\w]/, @a;
931 iseq "@b", "@c";
932
933 @b = grep /\W/, @a;
934 @c = grep /[\W]/, @a;
935 iseq "@b", "@c";
936
937 @b = grep /\w/, @a;
938 @c = grep /[^\W]/, @a;
939 iseq "@b", "@c";
940
941 @b = grep /\w/, @a;
942 @c = grep /[\w]/, @a;
943 iseq "@b", "@c";
944 }
945
946
947 {
948 # see if backtracking optimization works correctly
949 local $Message = 'Backtrack optimization';
950 ok "\n\n" =~ /\n $ \n/x;
951 ok "\n\n" =~ /\n* $ \n/x;
952 ok "\n\n" =~ /\n+ $ \n/x;
953 ok "\n\n" =~ /\n? $ \n/x;
954 ok "\n\n" =~ /\n*? $ \n/x;
955 ok "\n\n" =~ /\n+? $ \n/x;
956 ok "\n\n" =~ /\n?? $ \n/x;
957 ok "\n\n" !~ /\n*+ $ \n/x;
958 ok "\n\n" !~ /\n++ $ \n/x;
959 ok "\n\n" =~ /\n?+ $ \n/x;
960 }
961
962
963 {
964 package S;
965 use overload '""' => sub {'Object S'};
966 sub new {bless []}
967
968 local $::Message = "Ref stringification";
969 ::ok do { \my $v} =~ /^SCALAR/, "Scalar ref stringification";
970 ::ok do {\\my $v} =~ /^REF/, "Ref ref stringification";
971 ::ok [] =~ /^ARRAY/, "Array ref stringification";
972 ::ok {} =~ /^HASH/, "Hash ref stringification";
973 ::ok 'S' -> new =~ /^Object S/, "Object stringification";
974 }
975
976
977 {
978 local $Message = "Test result of match used as match";
979 ok 'a1b' =~ ('xyz' =~ /y/);
980 iseq $`, 'a';
981 ok 'a1b' =~ ('xyz' =~ /t/);
982 iseq $`, 'a';
983 }
984
985
986 {
987 local $Message = '"1" is not \s';
988 may_not_warn sub {ok ("1\n" x 102) !~ /^\s*\n/m};
989 }
990
991
992 {
993 local $Message = '\s, [[:space:]] and [[:blank:]]';
994 my %space = (spc => " ",
995 tab => "\t",
996 cr => "\r",
997 lf => "\n",
998 ff => "\f",
999 # There's no \v but the vertical tabulator seems miraculously
1000 # be 11 both in ASCII and EBCDIC.
1001 vt => chr(11),
1002 false => "space");
1003
1004 my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space;
1005 my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space;
1006 my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space;
1007
1008 iseq "@space0", "cr ff lf spc tab";
1009 iseq "@space1", "cr ff lf spc tab vt";
1010 iseq "@space2", "spc tab";
1011 }
1012
1013 {
1014 use charnames ":full";
1015 local $Message = 'Delayed interpolation of \N';
1016 my $r1 = qr/\N{THAI CHARACTER SARA I}/;
1017 my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}";
1018
1019 # Bug #56444
1020 ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/';
1021
1022 # Bug #62056
1023 ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/';
1024
1025 ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"';
1026 ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"';
1027 }
1028
1029 {
1030 use charnames ":full";
1031 local $Message = '[perl #74982] Period coming after \N{}';
1032 ok "\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.";
1033 ok "\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.";
1034 }
1035 {
1036 my $n= 50;
1037 # this must be a high number and go from 0 to N, as the bug we are looking for doesn't
1038 # seem to be predictable. Slight changes to the test make it fail earlier or later.
1039 foreach my $i (0 .. $n)
1040 {
1041 my $str= "\n" x $i;
1042 ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i";
1043 }
1044 }
1045 {
1046 # we are actually testing that we dont die when executing these patterns
1047 use utf8;
1048 my $e = "Böck";
1049 ok(utf8::is_utf8($e),"got a unicode string - rt75680");
1050
1051 ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680");
1052 ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680");
1053 ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680");
1054 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680");
1055 }
1056 {
1057 # we are actually testing that we dont die when executing these patterns
1058 my $e = "B\x{f6}ck";
1059 ok(!utf8::is_utf8($e), "got a latin string - rt75680");
1060
1061 ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680");
1062 ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680");
1063 ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680");
1064 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680");
1065 }
1066
1067 {
1068 #
1069 # Tests for bug 77414.
1070 #
1071
1072 local $Message = '\p property after empty * match';
1073 {
1074 local $TODO = "Bug 77414";
1075 ok "1" =~ /\s*\pN/;
1076 ok "-" =~ /\s*\p{Dash}/;
1077 ok " " =~ /\w*\p{Blank}/;
1078 }
1079
1080 ok "1" =~ /\s*\pN+/;
1081 ok "-" =~ /\s*\p{Dash}{1}/;
1082 ok " " =~ /\w*\p{Blank}{1,4}/;
1083
1084 }
1085
1086 SKIP: { # Some constructs with Latin1 characters cause a utf8 string not
1087 # to match itself in non-utf8
1088 if ($IS_EBCDIC) {
1089 skip "Needs to be customized to run on EBCDIC", 6;
1090 }
1091 my $c = "\xc0";
1092 my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/;
1093 utf8::upgrade($utf8_pattern);
1094 ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8";
1095 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8";
1096 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not";
1097 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not";
1098 utf8::upgrade($c);
1099 ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not";
1100 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not";
1101 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8";
1102 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8";
1103 }
1104
1105 SKIP: { # Make sure can override the formatting
1106 if ($IS_EBCDIC) {
1107 skip "Needs to be customized to run on EBCDIC", 2;
1108 }
1109 use feature 'unicode_strings';
1110 ok "\xc0" =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/';
1111 ok "\xc0" !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/';
1112 }
1113
1114 {
1115 # Test that a regex followed by an operator and/or a statement modifier work
1116 # These tests use string-eval so that it reports a clean error when it fails
1117 # (without the string eval the test script might be unparseable)
1118
1119 # Note: these test check the behaviour that currently is valid syntax
1120 # If a new regex modifier is added and a test fails then there is a backwards-compatibility issue
1121 # Note-2: a new deprecate warning was added for this with commit e6897b1a5db0410e387ccbf677e89fc4a1d8c97a
1122 # which indicate that this syntax will be removed in 5.16.
1123 # When this happens the tests can be removed
1124
1125 no warnings 'syntax';
1126 iseq( eval q#my $r = "a" =~ m/a/lt 2;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by lt");
1127 iseq( eval q#my $r = "a" =~ m/a/le 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by le");
1128 iseq( eval q#my $r = "a" =~ m/a/eq 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by eq");
1129 iseq( eval q#my $r = "a" =~ m/a/ne 0;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by ne");
1130 iseq( eval q#my $r = "a" =~ m/a/and 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by and");
1131 iseq( eval q#my $r = "a" =~ m/a/unless 0;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by unless");
1132 iseq( eval q#my $c = 1; my $r; $r = "a" =~ m/a/while $c--;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by while");
1133 iseq( eval q#my $c = 0; my $r; $r = "a" =~ m/a/until $c++;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by until");
1134 iseq( eval q#my $r; $r = "a" =~ m/a/for 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by for");
1135 iseq( eval q#my $r; $r = "a" =~ m/a/foreach 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by foreach");
1136
1137 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//lt 2;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by lt");
1138 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//le 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by le");
1139 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//ne 0;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by ne");
1140 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//and 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by and");
1141 iseq( eval q#my $t = "a"; my $r = $t =~ s/a//unless 0;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by unless");
1142
1143 iseq( eval q#my $c = 1; my $r; my $t = "a"; $r = $t =~ s/a//while $c--;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by while");
1144 iseq( eval q#my $c = 0; my $r; my $t = "a"; $r = $t =~ s/a//until $c++;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by until");
1145 iseq( eval q#my $r; my $t = "a"; $r = $t =~ s/a//for 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by for");
1146 iseq( eval q#my $r; my $t = "a"; $r = $t =~ s/a//for 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by foreach");
1147 }
1148
1149 {
1150 my $str= "\x{100}";
1151 chop $str;
1152 my $qr= qr/$str/;
1153 iseq( "$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212" );
1154 $str= "";
1155 $qr= qr/$str/;
1156 iseq( "$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212" )
1157
1158 }
1159
1160 {
1161 local $TODO = "[perl #38133]";
1162
1163 "A" =~ /(((?:A))?)+/;
1164 my $first = $2;
1165
1166 "A" =~ /(((A))?)+/;
1167 my $second = $2;
1168
1169 iseq($first, $second);
1170 }
1171
1172 {
1173 # RT #3516: \G in a m//g expression causes problems
1174 my $count = 0;
1175 while ("abc" =~ m/(\G[ac])?/g) {
1176 last if $count++ > 10;
1177 }
1178 ok($count < 10, 'RT #3516 A');
1179
1180 $count = 0;
1181 while ("abc" =~ m/(\G|.)[ac]/g) {
1182 last if $count++ > 10;
1183 }
1184 ok($count < 10, 'RT #3516 B');
1185
1186 $count = 0;
1187 while ("abc" =~ m/(\G?[ac])?/g) {
1188 last if $count++ > 10;
1189 }
1190 ok($count < 10, 'RT #3516 C');
1191 }
1192
1193
1194
1195} # End of sub run_tests
1196
11971;