+# [perl #37038] Global regular matches generate invalid pointers
+
+{
+ my $s = "abcd";
+ $s =~ /(..)(..)/g;
+ $s = $1;
+ $s = $2;
+ ok($s eq 'cd',
+ "# assigning to original string should not corrupt match vars");
+}
+
+{
+ package wooosh;
+ sub gloople {
+ "!";
+ }
+ package main;
+
+ my $aeek = bless {}, 'wooosh';
+ eval {$aeek->gloople() =~ /(.)/g;};
+ ok($@ eq "", "//g match against return value of sub") or print "# $@\n";
+}
+
+{
+ sub gloople {
+ "!";
+ }
+ eval {gloople() =~ /(.)/g;};
+ ok($@ eq "", "# 26410 didn't affect sub calls for some reason")
+ or print "# $@\n";
+}
+
+{
+ package lv;
+ $var = "abc";
+ sub variable : lvalue { $var }
+
+ package main;
+ my $o = bless [], "lv";
+ my $f = "";
+ eval { for (1..2) { $f .= $1 if $o->variable =~ /(.)/g } };
+ ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+}
+
+{
+ $var = "abc";
+ sub variable : lvalue { $var }
+
+ my $f = "";
+ eval { for (1..2) { $f .= $1 if variable() =~ /(.)/g } };
+ ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+}
+
+# [perl #37836] Simple Regex causes SEGV when run on specific data
+if ($ordA == 193) {
+ print "ok $test # Skip: in EBCDIC\n"; $test++;
+} else {
+ no warnings 'utf8';
+ $_ = pack('U0C2', 0xa2, 0xf8); # ill-formed UTF-8
+ my $ret = 0;
+ eval { $ret = s/[\0]+//g };
+ ok($ret == 0, "ill-formed UTF-8 doesn't match NUL in class");
+}
+
+{ # [perl #38293] chr(65535) should be allowed in regexes
+ no warnings 'utf8'; # to allow non-characters
+ my($c, $r, $s);
+
+ $c = chr 0xffff;
+ $c =~ s/$c//g;
+ ok($c eq "", "U+FFFF, parsed as atom");
+
+ $c = chr 0xffff;
+ $r = "\\$c";
+ $c =~ s/$r//g;
+ ok($c eq "", "U+FFFF backslashed, parsed as atom");
+
+ $c = chr 0xffff;
+ $c =~ s/[$c]//g;
+ ok($c eq "", "U+FFFF, parsed in class");
+
+ $c = chr 0xffff;
+ $r = "[\\$c]";
+ $c =~ s/$r//g;
+ ok($c eq "", "U+FFFF backslashed, parsed in class");
+
+ $s = "A\x{ffff}B";
+ $s =~ s/\x{ffff}//i;
+ ok($s eq "AB", "U+FFFF, EXACTF");
+
+ $s = "\x{ffff}A";
+ $s =~ s/\bA//;
+ ok($s eq "\x{ffff}", "U+FFFF, BOUND");
+
+ $s = "\x{ffff}!";
+ $s =~ s/\B!//;
+ ok($s eq "\x{ffff}", "U+FFFF, NBOUND");
+} # non-characters end
+
+{
+ # https://rt.perl.org/rt3/Ticket/Display.html?id=39583
+
+ # The printing characters
+ my @chars = ("A".."Z");
+ my $delim = ",";
+ my $size = 32771 - 4;
+ my $str = '';
+
+ # create some random junk. Inefficient, but it works.
+ for ($i = 0 ; $i < $size ; $i++) {
+ $str .= $chars[int(rand(@chars))];
+ }
+
+ $str .= ($delim x 4);
+ my $res;
+ my $matched;
+ if ($str =~ s/^(.*?)${delim}{4}//s) {
+ $res = $1;
+ $matched=1;
+ }
+ ok($matched,'pattern matches');
+ ok(length($str)==0,"Empty string");
+ ok(defined($res) && length($res)==$size,"\$1 is correct size");
+}
+
+{ # related to [perl #27940]
+ ok("\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern');
+ ok("\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern');
+ ok("X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern');
+ ok("X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern');
+
+ ok("X\0A" =~ /X\c@?A/, '\c@?');
+ ok("X\0A" =~ /X\c@*A/, '\c@*');
+ ok("X\0A" =~ /X\c@(A)/, '\c@(');
+ ok("X\0A" =~ /X(\c@)A/, '\c@)');
+ ok("X\0A" =~ /X\c@|ZA/, '\c@|');
+
+ ok("X\@A" =~ /X@?A/, '@?');
+ ok("X\@A" =~ /X@*A/, '@*');
+ ok("X\@A" =~ /X@(A)/, '@(');
+ ok("X\@A" =~ /X(@)A/, '@)');
+ ok("X\@A" =~ /X@|ZA/, '@|');
+
+ local $" = ','; # non-whitespace and non-RE-specific
+ ok('abc' =~ /(.)(.)(.)/, 'the last successful match is bogus');
+ ok("A@+B" =~ /A@{+}B/, 'interpolation of @+ in /@{+}/');
+ ok("A@-B" =~ /A@{-}B/, 'interpolation of @- in /@{-}/');
+ ok("A@+B" =~ /A@{+}B/x, 'interpolation of @+ in /@{+}/x');
+ ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x');
+}
+
+{
+ use lib 'lib';
+ use Cname;
+
+ ok('fooB'=~/\N{foo}[\N{B}\N{b}]/,"Passthrough charname");
+ $test=1233; my $handle=make_must_warn('Ignoring excess chars from');
+ $handle->('q(xxWxx) =~ /[\N{WARN}]/');
+ {
+ my $code;
+ my $w="";
+ local $SIG{__WARN__} = sub { $w.=shift };
+ eval($code=<<'EOFTEST') or die "$@\n$code\n";
+ {
+ use warnings;
+
+ #1234
+ ok("\0" !~ /[\N{EMPTY-STR}XY]/,
+ "Zerolength charname in charclass doesnt match \0");
+ 1;
+ }
+EOFTEST
+ ok($w=~/Ignoring zero length/,
+ "Got expected zero length warning");
+ warn $code;
+
+ }
+ $handle= make_must_warn('Ignoring zero length');
+ $handle->('qq(\\0) =~ /[\N{EMPTY-STR}XY]/');
+ ok('AB'=~/(\N{EVIL})/ && $1 eq 'A',"Charname caching $1");
+ ok('ABC'=~/(\N{EVIL})/,"Charname caching $1");
+ ok('xy'=~/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node');
+ ok(''=~/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node 2');
+
+}
+{
+ print "# MORE LATIN SMALL LETTER SHARP S\n";
+
+ use charnames ':full';
+
+ #see also test #835
+ ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 1");
+ ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 2");
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/,
+ "unoptimized named sequence in class 3");
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 4");
+
+ ok('aabc' !~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against aabc');
+ ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+ ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+
+ ok(' A B'=~/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+ 'Intermixed named and unicode escapes 1');
+ ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}"=~
+ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+ 'Intermixed named and unicode escapes 2');
+ ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042} 3"=~
+ /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/,
+ 'Intermixed named and unicode escapes');
+}
+$brackets = qr{
+ { (?> [^{}]+ | (??{ $brackets }) )* }
+ }x;
+ok("{b{c}d" !~ m/^((??{ $brackets }))/, "bracket mismatch");
+
+SKIP:{
+ our @stack=();
+ my @expect=qw(
+ stuff1
+ stuff2
+ <stuff1>and<stuff2>
+ right
+ <right>
+ <<right>>
+ <<<right>>>
+ <<stuff1>and<stuff2>><<<<right>>>>
+ );
+
+ local $_='<<<stuff1>and<stuff2>><<<<right>>>>>';
+ ok(/^(<((?:(?>[^<>]+)|(?1))*)>(?{push @stack, $2 }))$/,
+ "Recursion should match");
+ ok(@stack==@expect)
+ or skip("Won't test individual results as count isn't equal",
+ 0+@expect);
+ foreach my $idx (@expect) {
+ ok($expect[$idx] eq $stack[$idx],
+ "Expecting '$expect' at stack pos #$idx");
+ }
+
+}
+{
+ my $s='123453456';
+ $s=~s/(?<digits>\d+)\k<digits>/$+{digits}/;
+ ok($s eq '123456','Named capture (angle brackets) s///');
+ $s='123453456';
+ $s=~s/(?'digits'\d+)\k'digits'/$+{digits}/;
+ ok($s eq '123456','Named capture (single quotes) s///');
+}
+sub iseq($$;$) {
+ my ( $got, $expect, $name)=@_;
+
+ $_=defined($_) ? "'$_'" : "undef"
+ for $got, $expect;
+
+ my $ok= $got eq $expect;
+
+ printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name||'unnamed';
+
+ printf "# Failed test at line %d\n".
+ "# expected: %s\n".
+ "# result: %s\n",
+ (caller)[2], $expect, $got
+ unless $ok;
+
+ $test++;
+ return $ok;
+}
+{
+ my $s='foo bar baz';
+ my (@k,@v,@fetch,$res);
+ my $count= 0;
+ my @names=qw($+{A} $+{B} $+{C});
+ if ($s=~/(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
+ while (my ($k,$v)=each(%+)) {
+ $count++;
+ }
+ @k=sort keys(%+);
+ @v=sort values(%+);
+ $res=1;
+ push @fetch,
+ [ "$+{A}", "$1" ],
+ [ "$+{B}", "$2" ],
+ [ "$+{C}", "$3" ],
+ ;
+ }
+ foreach (0..2) {
+ if ($fetch[$_]) {
+ iseq($fetch[$_][0],$fetch[$_][1],$names[$_]);
+ } else {
+ ok(0, $names[$_]);
+ }
+ }
+ iseq($res,1,"$s~=/(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/");
+ iseq($count,3,"Got 3 keys in %+ via each");
+ iseq(0+@k, 3, 'Got 3 keys in %+ via keys');
+ iseq("@k","A B C", "Got expected keys");
+ iseq("@v","bar baz foo", "Got expected values");
+ eval'
+ print for $+{this_key_doesnt_exist};
+ ';
+ ok(!$@,'lvalue $+{...} should not throw an exception');
+}
+
+# stress test CURLYX/WHILEM.
+#
+# This test includes varying levels of nesting, and according to
+# profiling done against build 28905, exercises every code line in the
+# CURLYX and WHILEM blocks, except those related to LONGJMP, the
+# super-linear cache and warnings. It executes about 0.5M regexes
+
+if ($ENV{PERL_SKIP_PSYCHO_TEST}){
+ printf "ok %d Skip: No psycho tests\n", $test++;
+} else {
+ my $r = qr/^
+ (?:
+ ( (?:a|z+)+ )
+ (?:
+ ( (?:b|z+){3,}? )
+ (
+ (?:
+ (?:
+ (?:c|z+){1,1}?z
+ )?
+ (?:c|z+){1,1}
+ )*
+ )
+ (?:z*){2,}
+ ( (?:z+|d)+ )
+ (?:
+ ( (?:e|z+)+ )
+ )*
+ ( (?:f|z+)+ )
+ )*
+ ( (?:z+|g)+ )
+ (?:
+ ( (?:h|z+)+ )
+ )*
+ ( (?:i|z+)+ )
+ )+
+ ( (?:j|z+)+ )
+ (?:
+ ( (?:k|z+)+ )
+ )*
+ ( (?:l|z+)+ )
+ $/x;
+
+
+ my $ok = 1;
+ my $msg = "CURLYX stress test";
+ OUTER:
+ for my $a ("x","a","aa") {
+ for my $b ("x","bbb","bbbb") {
+ my $bs = $a.$b;
+ for my $c ("x","c","cc") {
+ my $cs = $bs.$c;
+ for my $d ("x","d","dd") {
+ my $ds = $cs.$d;
+ for my $e ("x","e","ee") {
+ my $es = $ds.$e;
+ for my $f ("x","f","ff") {
+ my $fs = $es.$f;
+ for my $g ("x","g","gg") {
+ my $gs = $fs.$g;
+ for my $h ("x","h","hh") {
+ my $hs = $gs.$h;
+ for my $i ("x","i","ii") {
+ my $is = $hs.$i;
+ for my $j ("x","j","jj") {
+ my $js = $is.$j;
+ for my $k ("x","k","kk") {
+ my $ks = $js.$k;
+ for my $l ("x","l","ll") {
+ my $ls = $ks.$l;
+ if ($ls =~ $r) {
+ if ($ls =~ /x/) {
+ $msg .= ": unexpected match for [$ls]";
+ $ok = 0;
+ last OUTER;
+ }
+ my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12";
+ unless ($ls eq $cap) {
+ $msg .= ": capture: [$ls], got [$cap]";
+ $ok = 0;
+ last OUTER;
+ }
+ }
+ else {
+ unless ($ls =~ /x/) {
+ $msg = ": failed for [$ls]";
+ $ok = 0;
+ last OUTER;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ ok($ok, $msg);
+}
+
+# \, breaks {3,4}
+ok("xaaay" !~ /xa{3\,4}y/, "\, in a pattern");
+ok("xa{3,4}y" =~ /xa{3\,4}y/, "\, in a pattern");
+
+# \c\ followed by _
+ok("x\c_y" !~ /x\c\_y/, "\_ in a pattern");
+ok("x\c\_y" =~ /x\c\_y/, "\_ in a pattern");
+
+# \c\ followed by other characters
+for my $c ("z", "\0", "!", chr(254), chr(256)) {
+ my $targ = "a\034$c";
+ my $reg = "a\\c\\$c";
+ ok(eval("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern");
+}
+
+{
+ my $str='abc';
+ my $count=0;
+ my $mval=0;
+ my $pval=0;
+ while ($str=~/b/g) { $mval=$#-; $pval=$#+; $count++ }
+ iseq($mval,0,"\@- should be empty [RT#36046]");
+ iseq($pval,0,"\@+ should be empty [RT#36046]");
+ iseq($count,1,"should have matched once only [RT#36046]");
+}
+
+{ # Test the (*PRUNE) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(?{$count++})(*FAIL)/;
+ iseq($count,9,"expect 9 for no (*PRUNE)");
+ $count = 0;
+ 'aaab'=~/a+b?(*PRUNE)(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with (*PRUNE)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*PRUNE)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*PRUNE)/");
+ $count = 0;
+ 'aaab'=~/a+b?(??{'(*PRUNE)'})(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with (*PRUNE)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*PRUNE)/");
+}
+{ # Test the (*SKIP) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(*SKIP)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*SKIP)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*SKIP)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*SKIP)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,2,"Expect 2 with (*SKIP)" );
+ iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" );
+}
+{ # Test the (*SKIP) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*SKIP)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*SKIP)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(*MARK:foo)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,2,"Expect 2 with (*SKIP)" );
+ iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" );
+}
+{ # Test the (*SKIP) pattern
+ our $count = 0;
+ 'aaab'=~/a*(*MARK:a)b?(*MARK:b)(*SKIP:a)(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with *MARK:a)b?(*MARK:b)(*SKIP:a)");
+ local $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a*(*MARK:a)b?)(*MARK:x)(*SKIP:a)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,5,"Expect 5 with (*MARK:a)b?)(*MARK:x)(*SKIP:a)" );
+ iseq("@res","aaab b aaab b ","adjacent (*MARK:a)b?)(*MARK:x)(*SKIP:a) works as expected" );
+}
+{ # Test the (*COMMIT) pattern
+ our $count = 0;
+ 'aaabaaab'=~/a+b?(*COMMIT)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*COMMIT)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g;
+ iseq($count,1,"/.(*COMMIT)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,1,"Expect 1 with (*COMMIT)" );
+ iseq("@res","aaab","adjacent (*COMMIT) works as expected" );
+}
+{
+ # Test named commits and the $REGERROR var
+ our $REGERROR;
+ for my $name ('',':foo')
+ {
+ for my $pat ("(*PRUNE$name)",
+ ($name? "(*MARK$name)" : "")
+ . "(*SKIP$name)",
+ "(*COMMIT$name)")
+ {
+ for my $suffix ('(*FAIL)','')
+ {
+ 'aaaab'=~/a+b$pat$suffix/;
+ iseq(
+ $REGERROR,
+ ($suffix ? ($name ? 'foo' : "1") : ""),
+ "Test $pat and \$REGERROR $suffix"
+ );
+ }
+ }
+ }
+}
+{
+ # Test named commits and the $REGERROR var
+ package Fnorble;
+ our $REGERROR;
+ for my $name ('',':foo')
+ {
+ for my $pat ("(*PRUNE$name)",
+ ($name? "(*MARK$name)" : "")
+ . "(*SKIP$name)",
+ "(*COMMIT$name)")
+ {
+ for my $suffix ('(*FAIL)','')
+ {
+ 'aaaab'=~/a+b$pat$suffix/;
+ ::iseq(
+ $REGERROR,
+ ($suffix ? ($name ? 'foo' : "1") : ""),
+ "Test $pat and \$REGERROR $suffix"
+ );
+ }
+ }
+ }
+}
+{
+ # Test named commits and the $REGERROR var
+ our $REGERROR;
+ for $word (qw(bar baz bop)) {
+ $REGERROR="";
+ "aaaaa$word"=~/a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/;
+ iseq($REGERROR,$word);
+ }
+}
+{ #Regression test for perlbug 40684
+ my $s = "abc\ndef";
+ my $rex = qr'^abc$'m;
+ ok($s =~ m/$rex/);
+ ok($s =~ m/^abc$/m);
+}
+{
+ #Mindnumbingly simple test of (*THEN)
+ for ("ABC","BAX") {
+ ok(/A (*THEN) X | B (*THEN) C/x,"Simple (*THEN) test");
+ }
+}
+
+{
+ my $parens=qr/(\((?:[^()]++|(?-1))*+\))/;
+ local $_='foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))';
+ my ($all,$one,$two)=('','','');
+ if (/foo $parens \s* \+ \s* bar $parens/x) {
+ $all=$&;
+ $one=$1;
+ $two=$2;
+ }
+ iseq($one, '((2*3)+4-3)');
+ iseq($two, '(2*(3+4)-1*(2-3))');
+ iseq($all, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))');
+ iseq($all, $_);
+}
+
+#-------------------------------------------------------------------
+
+# Keep the following tests last -- they may crash perl
+
+ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
+ or print "# Unexpected outcome: should pass or crash perl\n";
+
+ok((q(a)x 100) =~ /^(??{'(.)'x 100})/,
+ "Regexp /^(??{'(.)'x 100})/ crashes older perls")
+ or print "# Unexpected outcome: should pass or crash perl\n";
+
+{
+ $_="ns1ns1ns1";
+ s/ns(?=\d)/ns_/g;
+ iseq($_,"ns_1ns_1ns_1");
+ $_="ns1";
+ s/ns(?=\d)/ns_/;
+ iseq($_,"ns_1");
+ $_="123";
+ s/(?=\d+)|(?<=\d)/!Bang!/g;
+ iseq($_,"!Bang!1!Bang!2!Bang!3!Bang!");
+}
+
+# Put new tests above the line, not here.
+
+# Don't forget to update this!
+BEGIN { print "1..1345\n" };