3 # This is a home for regular expression tests that don't fit into
4 # the format supported by re/regexp.t. If you want to add a test
5 # that does fit that format, add it to re/re_tests, not here.
19 @INC = ('../lib','.');
20 do "re/ReTest.pl" or die $@;
24 plan tests => 2514; # Update this when adding/deleting tests.
26 run_tests() unless caller;
35 local $BugId = '20000731.001';
36 ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/,
37 "Match UTF-8 char in presense of (??{ })";
42 local $BugId = '20001021.005';
43 no warnings 'uninitialized';
44 ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV";
48 local $Message = 'bug id 20001008.001';
50 my @x = ("stra\337e 138", "stra\337e 138");
52 ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
53 ok my ($latin) = /^(.+)(?:\s+\d)/;
54 iseq $latin, "stra\337e";
55 ok $latin =~ s/stra\337e/straße/;
57 # Previous code follows, but outcommented - there were no tests.
59 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
60 # use utf8; # needed for the raw UTF-8
61 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
67 local $BugId = '20001028.003';
69 # Fist half of the bug.
70 local $Message = 'HEBREW ACCENT QADMA matched by .*';
72 ok my ($Y) = $X =~ /(.*)/;
76 # Second half of the bug.
77 $Message = 'HEBREW ACCENT QADMA in replacement';
79 $X =~ s/^/chr(1488)/e;
86 local $BugId = '20001108.001';
87 local $Message = 'Repeated s///';
88 my $X = "Szab\x{f3},Bal\x{e1}zs";
90 $Y =~ s/(B)/$1/ for 0 .. 3;
92 iseq $X, "Szab\x{f3},Bal\x{e1}zs";
97 local $BugId = '20000517.001';
98 local $Message = 's/// on UTF-8 string';
107 local $BugId = '20001230.002';
108 local $Message = '\C and É';
109 ok "École" =~ /^\C\C(.)/ && $1 eq 'c';
110 ok "École" =~ /^\C\C(c)/;
115 # The original bug report had 'no utf8' here but that was irrelevant.
116 local $BugId = '20010306.008';
117 local $Message = "Don't dump core";
119 ok $a =~ m/\w/; # used to core dump.
124 local $BugId = '20010410.006';
125 local $Message = '/g in scalar context';
126 for my $rx ('/(.*?)\{(.*?)\}/csg',
127 '/(.*?)\{(.*?)\}/cg',
128 '/(.*?)\{(.*?)\}/sg',
130 '/(.+?)\{(.+?)\}/csg',) {
132 my $input = "a{b}c{d}";
134 while (eval \$input =~ $rx) {
143 local $BugId = "20010619.003";
144 # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
145 for ("\n", "\t", "\014", "\r") {
146 ok !/[[:print:]]/, "'$_' not in [[:print:]]";
149 ok /[[:print:]]/, "'$_' in [[:print:]]";
156 # [ID 20010814.004] pos() doesn't work when using =~m// in list context
157 local $BugId = '20010814.004';
159 my $a = join ":", /b./gc;
160 my $b = join ":", /a./gc;
162 iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//";
167 # [ID 20010407.006] matching utf8 return values from
168 # functions does not work
169 local $BugId = '20010407.006';
170 local $Message = 'UTF-8 return values from functions';
171 package ID_20010407_006;
177 ::ok length ($y) == 2 && $y eq $x;
180 ::ok length ($y) == 2 && $y eq $x;
184 # High bit bug -- japhy
186 ok $x =~ /.*?\200/, "High bit fine";
191 local $Message = 'UTF-8 hash keys and /$/';
192 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
193 # /2002-01/msg01327.html
196 my $v = substr ($u, 0, 1);
197 my $w = substr ($u, 1, 1);
198 my %u = ($u => $u, $v => $v, $w => $w);
200 my $m1 = /^\w*$/ ? 1 : 0;
201 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
208 local $BugId = "20020124.005";
209 local $PatchId = "14795";
210 local $Message = "s///eg";
212 for my $char ("a", "\x{df}", "\x{100}") {
213 my $x = "$char b $char";
218 iseq substr ($x, 0, 1), substr ($x, -1, 1);
224 local $BugId = "20020412.005";
225 local $Message = "Correct pmop flags checked when empty pattern";
227 # Requires reuse of last successful pattern.
232 ok $match != $_, $Message,
233 sprintf "'match one' %s on %s iteration" =>
234 $match ? 'succeeded' : 'failed',
235 $_ ? 'second' : 'first';
238 my $result = join "" => $num =~ //g;
244 local $BugId = '20020630.002';
245 local $Message = 'UTF-8 regex matches above 32k';
246 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
247 my ($type, $char) = @$_;
248 for my $len (32000, 32768, 33000) {
249 my $s = $char . "f" x $len;
250 my $r = $s =~ /$char([f]*)/gc;
251 ok $r, $Message, "<$type x $len>";
252 ok !$r || pos ($s) == $len + 1, $Message,
253 "<$type x $len>; pos = @{[pos $s]}";
259 local $PatchId = '18179';
260 my $s = "\x{100}" x 5;
261 my $ok = $s =~ /(\x{100}{4})/;
262 my ($ord, $len) = (ord $1, length $1);
263 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift";
268 local $BugId = '15763';
270 chop $a; # Leaves the UTF-8 flag
271 $a .= "y"; # 1 byte before 'y'.
273 ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8';
274 ok $a =~ /^\C{1}/, 'match \C{1}';
276 ok $a =~ /^\Cy/, 'match \Cy';
277 ok $a =~ /^\C{1}y/, 'match \C{1}y';
279 ok $a !~ /^\C\Cy/, q {don't match two \Cy};
280 ok $a !~ /^\C{2}y/, q {don't match \C{2}y};
282 $a = "\x{100}y"; # 2 bytes before "y"
284 ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8';
285 ok $a =~ /^\C{1}/, 'match \C{1}';
286 ok $a =~ /^\C\C/, 'match two \C';
287 ok $a =~ /^\C{2}/, 'match \C{2}';
289 ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte';
290 ok $a =~ /^\C{3}/, 'match \C{3}';
292 ok $a =~ /^\C\Cy/, 'match two \C';
293 ok $a =~ /^\C{2}y/, 'match \C{2}';
295 ok $a !~ /^\C\C\Cy/, q {don't match three \Cy};
296 ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy};
297 ok $a !~ /^\C{3}y/, q {don't match \C{3}y};
299 $a = "\x{1000}y"; # 3 bytes before "y"
301 ok $a =~ /^\C/, 'match one \C on three-byte UTF-8';
302 ok $a =~ /^\C{1}/, 'match \C{1}';
303 ok $a =~ /^\C\C/, 'match two \C';
304 ok $a =~ /^\C{2}/, 'match \C{2}';
305 ok $a =~ /^\C\C\C/, 'match three \C';
306 ok $a =~ /^\C{3}/, 'match \C{3}';
308 ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte';
309 ok $a =~ /^\C{4}/, 'match \C{4}';
311 ok $a =~ /^\C\C\Cy/, 'match three \Cy';
312 ok $a =~ /^\C{3}y/, 'match \C{3}y';
314 ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy};
315 ok $a !~ /^\C{4}y/, q {don't match \C{4}y};
320 local $BugId = '15397';
321 local $Message = 'UTF-8 matching';
322 ok "\x{100}" =~ /\x{100}/;
323 ok "\x{100}" =~ /(\x{100})/;
324 ok "\x{100}" =~ /(\x{100}){1}/;
325 ok "\x{100}\x{100}" =~ /(\x{100}){2}/;
326 ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/;
331 local $BugId = '7471';
332 local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times';
334 ok /(AB)*?CD/ && !defined $1;
335 ok /(AB)*CD/ && !defined $1;
340 local $BugId = '3547';
341 local $Message = "Caching shouldn't prevent match";
342 my $pattern = "^(b+?|a){1,2}c";
343 ok "bac" =~ /$pattern/ && $1 eq 'a';
344 ok "bbac" =~ /$pattern/ && $1 eq 'a';
345 ok "bbbac" =~ /$pattern/ && $1 eq 'a';
346 ok "bbbbac" =~ /$pattern/ && $1 eq 'a';
352 local $BugId = '18232';
353 local $Message = '$1 should keep UTF-8 ness';
354 ok "\x{100}" =~ /(.)/;
355 iseq $1, "\x{100}", '$1 is UTF-8';
357 iseq $1, "\x{100}", '$1 is still UTF-8';
358 isneq $1, "\xC4\x80", '$1 is not non-UTF-8';
363 local $BugId = '19767';
364 local $Message = "Optimizer doesn't prematurely reject match";
368 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
369 my $NormalWord = qr /${NormalChar}+?/;
370 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/;
373 ok $attr =~ $PredNameHyphen; # Original test.
376 ok "0" =~ /\p{N}+\z/; # Variant.
381 local $BugId = '20683';
382 local $Message = "(??{ }) doesn't return stale values";
384 foreach (1, 2, 3, 4) {
385 $p ++ if /(??{ $p })/
392 sub TIESCALAR {bless []}
396 foreach (1, 2, 3, 4) {
404 # Subject: Odd regexp behavior
405 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
406 # Date: Wed, 26 Feb 2003 16:53:12 +0000
407 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
408 # To: perl-unicode@perl.org
410 local $Message = 'Markus Kuhn 2003-02-26';
412 my $x = "\x{2019}\nk";
413 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
414 ok $x eq "\x{2019} k";
417 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
420 ok "\x{2019}" =~ /\S/;
425 local $BugId = '21411';
426 local $Message = "(??{ .. }) in split doesn't corrupt its stack";
428 ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-';
429 no warnings 'syntax';
430 @_ = split /(?{'WOW'})/, 'abc';
437 # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it
438 # hasn't been crashing. Disable this test until it is fixed properly.
439 # XXX also check what it returns rather than just doing ok(1,...)
440 # split /(?{ split "" })/, "abc";
441 local $TODO = "Recursive split is still broken";
442 ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0';
447 local $BugId = '17757';
448 $_ = "code: 'x' { '...' }\n"; study;
449 my @x; push @x, $& while m/'[^\']*'/gx;
451 iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop";
456 local $BugId = '22354';
458 ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]";
459 ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m";
462 $_ = "x"; s/x/func "in subst"/e;
463 $_ = "x"; s/x/func "in multiline subst"/em;
464 $_ = "x"; /x(?{func "in regexp"})/;
465 $_ = "x"; /x(?{func "in multiline regexp"})/m;
470 local $BugId = '19049';
473 iseq "abcde", $`, 'Global match sets $`';
478 # [perl #23769] Unicode regex broken on simple example
479 # regrepeat() didn't handle UTF-8 EXACT case right.
480 local $BugId = '23769';
481 my $Mess = 'regrepeat() handles UTF-8 EXACT case right';
482 local $Message = $Mess;
484 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
488 ok $s =~ /\x{a0}\x{a0}/;
490 $Message = "$Mess (easy variant)";
491 ok "aaa\x{100}" =~ /(a+)/;
494 $Message = "$Mess (easy invariant)";
495 ok "aaa\x{100} " =~ /(a+?)/;
498 $Message = "$Mess (regrepeat variant)";
499 ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/;
502 $Message = "$Mess (regrepeat invariant)";
503 ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/;
504 iseq $1, "\xa0\xa0\xa0";
506 $Message = "$Mess (hard variant)";
507 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/;
510 $Message = "$Mess (hard invariant)";
511 ok "ababab\x{100} " =~ /((?:ab)+)/;
514 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/;
515 iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1";
517 ok "ababab\x{100} " =~ /((?:ab)+?)/;
520 $Message = "Don't match first byte of UTF-8 representation";
521 ok "\xc4\xc4\xc4" !~ /(\x{100}+)/;
522 ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/;
523 ok "\xc4\xc4\xc4" !~ /(\x{100}++)/;
528 # perl panic: pp_match start/end pointers
529 local $BugId = '25269';
530 iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"},
531 'Captures can move backwards in string';
536 local $BugId = '27940'; # \cA not recognized in character classes
537 ok "a\cAb" =~ /\cA/, '\cA in pattern';
538 ok "a\cAb" =~ /[\cA]/, '\cA in character class';
539 ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range';
540 ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range';
541 ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range';
542 ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range';
543 ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern';
544 ok "ab" !~ /a\cIb/x, '\cI in pattern';
549 # perl #28532: optional zero-width match at end of string is ignored
550 local $BugId = '28532';
551 ok "abc" =~ /^abc(\z)?/ && defined($1),
552 'Optional zero-width match at end of string';
553 ok "abc" =~ /^abc(\z)??/ && !defined($1),
554 'Optional zero-width match at end of string';
560 local $BugId = '36207';
561 my $utf8 = "\xe9\x{100}"; chop $utf8;
564 ok $utf8 =~ /\xe9/i, "utf8/latin";
565 ok $utf8 =~ /$latin1/i, "utf8/latin runtime";
566 ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie";
567 ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime";
569 ok "\xe9" =~ /$utf8/i, "latin/utf8";
570 ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie";
571 ok $latin1 =~ /$utf8/i, "latin/utf8 runtime";
572 ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime";
577 local $BugId = '37038';
583 "Assigning to original string does not corrupt match vars";
588 local $PatchId = '26410';
593 my $aeek = bless {} => 'wooosh';
594 eval_ok sub {$aeek -> gloople () =~ /(.)/g},
595 "//g match against return value of sub";
598 eval_ok sub {gloople () =~ /(.)/g},
599 "26410 didn't affect sub calls for some reason";
604 local $TODO = "See changes 26925-26928, which reverted change 26410";
608 sub variable : lvalue {$var}
610 my $o = bless [] => 'lv';
614 $f .= $1 if $o -> variable =~ /(.)/g;
619 iseq $f, "ab", "pos() retained between calls";
623 ok 0, "Code failed: $@";
627 sub variable : lvalue {$var}
631 $g .= $1 if variable =~ /(.)/g;
636 iseq $g, "ab", "pos() retained between calls";
640 ok 0, "Code failed: $@";
647 local $BugId = '37836';
648 skip "In EBCDIC" if $IS_EBCDIC;
650 $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8
652 eval_ok sub {!($ret = s/[\0]+//g)},
653 "Ill-formed UTF-8 doesn't match NUL in class";
658 # chr(65535) should be allowed in regexes
659 local $BugId = '38293';
660 no warnings 'utf8'; # To allow non-characters
665 ok $c eq "", "U+FFFF, parsed as atom";
670 ok $c eq "", "U+FFFF backslashed, parsed as atom";
674 ok $c eq "", "U+FFFF, parsed in class";
679 ok $c eq "", "U+FFFF backslashed, parsed in class";
683 ok $s eq "AB", "U+FFFF, EXACTF";
687 ok $s eq "\x{ffff}", "U+FFFF, BOUND";
691 ok $s eq "\x{ffff}", "U+FFFF, NBOUND";
696 local $BugId = '39583';
698 # The printing characters
699 my @chars = ("A" .. "Z");
701 my $size = 32771 - 4;
704 # Create some random junk. Inefficient, but it works.
705 for (my $i = 0; $i < $size; $ i++) {
706 $str .= $chars [rand @chars];
709 $str .= ($delim x 4);
712 ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches";
713 iseq $str, "", "Empty string";
714 ok defined $1 && length ($1) == $size, '$1 is correct size';
719 local $BugId = '27940';
720 ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern';
721 ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern';
722 ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern';
723 ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern';
725 ok "X\0A" =~ /X\c@?A/, '\c@?';
726 ok "X\0A" =~ /X\c@*A/, '\c@*';
727 ok "X\0A" =~ /X\c@(A)/, '\c@(';
728 ok "X\0A" =~ /X(\c@)A/, '\c@)';
729 ok "X\0A" =~ /X\c@|ZA/, '\c@|';
731 ok "X\@A" =~ /X@?A/, '@?';
732 ok "X\@A" =~ /X@*A/, '@*';
733 ok "X\@A" =~ /X@(A)/, '@(';
734 ok "X\@A" =~ /X(@)A/, '@)';
735 ok "X\@A" =~ /X@|ZA/, '@|';
737 local $" = ','; # non-whitespace and non-RE-specific
738 ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus';
739 ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/';
740 ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/';
741 ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x';
742 ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x';
747 local $BugId = '50496';
748 my $s = 'foo bar baz';
749 my (@k, @v, @fetch, $res);
751 my @names = qw ($+{A} $+{B} $+{C});
752 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
753 while (my ($k, $v) = each (%+)) {
757 @v = sort values (%+);
767 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
772 iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/";
773 iseq $count, 3, "Got 3 keys in %+ via each";
774 iseq 0 + @k, 3, 'Got 3 keys in %+ via keys';
775 iseq "@k", "A B C", "Got expected keys";
776 iseq "@v", "bar baz foo", "Got expected values";
778 no warnings "uninitialized";
779 print for $+ {this_key_doesnt_exist};
781 ok !$@, 'lvalue $+ {...} should not throw an exception';
787 # Almost the same as the block above, except that the capture is nested.
789 local $BugId = '50496';
790 my $s = 'foo bar baz';
791 my (@k, @v, @fetch, $res);
793 my @names = qw ($+{A} $+{B} $+{C} $+{D});
794 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
795 while (my ($k,$v) = each(%+)) {
799 @v = sort values (%+);
810 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
815 iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/";
816 iseq $count, 4, "Got 4 keys in %+ via each";
817 iseq @k, 4, 'Got 4 keys in %+ via keys';
818 iseq "@k", "A B C D", "Got expected keys";
819 iseq "@v", "bar baz foo foo bar baz", "Got expected values";
821 no warnings "uninitialized";
822 print for $+ {this_key_doesnt_exist};
824 ok !$@,'lvalue $+ {...} should not throw an exception';
829 local $BugId = '36046';
834 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
835 iseq $mval, 0, '@- should be empty';
836 iseq $pval, 0, '@+ should be empty';
837 iseq $count, 1, 'Should have matched once only';
844 local $BugId = '40684';
845 local $Message = '/m in precompiled regexp';
847 my $rex = qr'^abc$'m;
854 local $BugId = '36909';
855 local $Message = '(?: ... )? should not lose $^R';
859 ok 'x foofoo y' =~ m {
860 (foo) # $^R correctly set
861 (?{ "last regexp code result" })
863 iseq $^R, 'last regexp code result';
870 ok 'x foofoo y' =~ m {
871 (?:foo|bar)+ # $^R correctly set
872 (?{ "last regexp code result" })
874 iseq $^R, 'last regexp code result';
880 ok 'x foofoo y' =~ m {
881 (foo|bar)\1+ # $^R undefined
882 (?{ "last regexp code result" })
884 iseq $^R, 'last regexp code result';
890 ok 'x foofoo y' =~ m {
891 (foo|bar)\1 # This time without the +
892 (?{"last regexp code result"})
894 iseq $^R, 'last regexp code result';
901 local $BugId = '22395';
902 local $Message = 'Match is linear, not quadratic';
904 for my $l (10, 100, 1000) {
906 ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
907 local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)";
914 local $BugId = '22614';
915 local $Message = '@-/@+ should not have undefined values';
918 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
919 iseq "@len", "2 2 2";
924 local $BugId = '18209';
925 local $Message = '$& set on s///';
926 my $text = ' word1 word2 word3 word4 word5 word6 ';
928 my @words = ('word1', 'word3', 'word5');
930 foreach my $word (@words) {
931 $text =~ s/$word\s//gi; # Leave a space to seperate words
932 # in the resultant str.
933 # The following block is not working.
940 iseq $text, ' word2 word4 word6 ';
946 local $BugId = '6893';
947 local $_ = qq (A\nB\nC\n);
949 while (m#(\G|\n)([^\n]*)\n#gsx) {
953 iseq "@res", "A B C", "/g pattern shouldn't infinite loop";
959 local $BugId = '41010';
960 local $Message = 'No optimizer bug';
961 my @tails = ('', '(?(1))', '(|)', '()?');
962 my @quants = ('*','+');
966 for my $pat (@$pats) {
967 for my $quant (@quants) {
968 for my $tail (@tails) {
969 my $re = "($pat$quant\$)$tail";
970 ok /$re/ && $1 eq $_, "'$_' =~ /$re/";
971 ok /$re/m && $1 eq $_, "'$_' =~ /$re/m";
980 '(1|[23]|4|[56]|[78]|[90])',
981 '(?:1|[23]|4|[56]|[78]|[90])',
982 '(1|2|3|4|5|6|7|8|9|0)',
983 '(?:1|2|3|4|5|6|7|8|9|0)');
984 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
986 my @dstrs = ('12345');
987 $doit -> (\@spats, @sstrs);
988 $doit -> (\@dpats, @dstrs);
994 local $BugId = '45605';
995 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
997 my $utf_8 = "\xd6schel";
998 utf8::upgrade ($utf_8);
999 $utf_8 =~ m {(\xd6|Ö)schel};
1000 iseq $1, "\xd6", "Upgrade error";
1004 # Regardless of utf8ness any character matches itself when
1005 # doing a case insensitive match. See also [perl #36207]
1006 local $BugId = '36207';
1007 for my $o (0 .. 255) {
1008 my @ch = (chr ($o), chr ($o));
1009 utf8::upgrade ($ch [1]);
1010 for my $u_str (0, 1) {
1011 for my $u_pat (0, 1) {
1012 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i,
1013 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat";
1014 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i,
1015 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat";
1023 local $BugId = '49190';
1024 local $Message = '$REGMARK in replacement';
1027 ok s/(*:B)A/$REGMARK/;
1030 ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g;
1036 local $BugId = '52658';
1037 local $Message = 'Substitution evaluation in list context';
1038 my $reg = '../xxx/';
1039 my @te = ($reg =~ m{^(/?(?:\.\./)*)},
1040 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
1041 iseq $reg, '../bbb/';
1042 iseq $te [0], '../';
1046 local $BugId = '60034';
1047 my $a = "xyzt" x 8192;
1048 ok $a =~ /\A(?>[a-z])*\z/,
1049 '(?>) does not cause wrongness on long string';
1050 my $b = $a . chr 256;
1055 ok $b =~ /\A(?>[a-z])*\z/,
1056 '(?>) does not cause wrongness on long string with UTF-8';
1061 # Keep the following tests last -- they may crash perl
1063 print "# Tests that follow may crash perl\n";
1065 local $BugId = '19049/38869';
1066 local $Message = 'Pattern in a loop, failure should not ' .
1067 'affect previous success';
1069 'ab cdef', # Matches regex
1070 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
1075 m/ab(.+)cd/i; # The ignore-case seems to be important
1076 $y = $1; # Use $1, which might not be from the last match!
1077 $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
1085 local $BugId = '24274';
1087 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker");
1088 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/,
1089 "Regexp /^(??{'(.)'x 100})/ crashes older perls");
1094 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
1095 local $BugId = '45337';
1096 local ${^UTF8CACHE} = -1;
1097 local $Message = "Shouldn't panic";
1103 local $BugId = '57042';
1104 local $Message = "Check if tree logic breaks \$^R";
1105 my $cond_re = qr/\s*
1112 for my $line ("(A)","(B)") {
1113 if ($line =~ m/$cond_re/) {
1114 push @res, $^R ? "#$^R" : "UNDEF";
1117 iseq "@res","#1 #2";
1120 no warnings 'closure';
1121 my $re = qr/A(??{"1"})/;
1122 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
1129 # This only works under -DEBUGGING because it relies on an assert().
1131 local $BugId = '60508';
1132 local $Message = "Check capture offset re-entrancy of utf8 code.";
1134 sub fswash { $_[0] =~ s/([>X])//g; }
1136 my $k1 = "." x 4 . ">>";
1139 my $k2 = "\x{f1}\x{2022}";
1140 $k2 =~ s/([\360-\362])/>/g;
1143 iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks");
1148 local $BugId = 65372; # minimal CURLYM limited to 32767 matches
1150 qr{a(x|y)*b}, # CURLYM
1151 qr{a(x|y)*?b}, # .. with minmod
1152 qr{a([wx]|[yz])*b}, # .. and without tries
1153 qr{a([wx]|[yz])*?b},
1156 my $s = join '', 'a', 'x' x $len, 'b';
1157 for my $pat (@pat) {
1158 ok($s =~ $pat, $pat);
1163 local $TODO = "[perl #38133]";
1165 "A" =~ /(((?:A))?)+/;
1171 iseq($first, $second);
1175 local $BugId = 70998;
1177 = 'utf8 =~ /trie/ where trie matches a continuation octet';
1181 local $SIG{__WARN__} = sub { $w .= shift };
1183 # This bug can be reduced to
1184 qq{\x{30ab}} =~ /\xab|\xa9/;
1185 # but it's nice to have a more 'real-world' test. The original test
1186 # case from the RT ticket follows:
1192 my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')';
1193 $conv_rx = qr{$conv_rx};
1196 = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}}
1197 . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}}
1198 . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}}
1199 . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}}
1200 . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}}
1201 . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}}
1202 . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}}
1203 . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}};
1205 $x =~ s{$conv_rx}{$conv{$1}}eg;
1211 local $BugId = 68564; # minimal CURLYM limited to 32767 matches
1212 local $Message = "stclass optimisation does not break + inside (?=)";
1213 iseq join("-", " abc def " =~ /(?=(\S+))/g),
1214 "abc-bc-c-def-ef-f",
1217 } # End of sub run_tests