X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/28d8d7f41ab202dd5f7611033d27ecad44cadd60..e3faa678eb30e1e08116ca1bd086624974e5e5aa:/t/op/pat.t diff --git a/t/op/pat.t b/t/op/pat.t index 71ddbe9..2ccc07c 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -16,6 +16,10 @@ our $Message = "Noname test"; eval 'use Config'; # Defaults assumed if this fails +run_tests() unless caller; + +sub run_tests { + $x = "abc\ndef\n"; if ($x =~ /^abc/) {print "ok 1\n";} else {print "not ok 1\n";} @@ -533,25 +537,32 @@ print "not " unless $1 and /$1/; print "ok $test\n"; $test++; +if ($::running_as_thread) { + print "not ok $test # TODO & SKIP: croaks in 5.10 when threaded\n"; + $test++; +} else { $a=qr/(?{++$b})/; $b = 7; /$a$a/; print "not " unless $b eq '9'; print "ok $test\n"; $test++; +} -$c="$a"; -/$a$a/; -print "not " unless $b eq '11'; -print "ok $test\n"; -$test++; +{ + local $TODO = $::running_as_thread; + $c="$a"; + /$a$a/; + iseq($b, '11'); +} { use re "eval"; /$a$c$a/; - print "not " unless $b eq '14'; - print "ok $test\n"; - $test++; + { + local $TODO = $::running_as_thread; + iseq($b, '14'); + } local $lex_a = 2; my $lex_a = 43; @@ -571,10 +582,10 @@ $test++; no re "eval"; $match = eval { /$a$c$a/ }; - print "not " - unless $b eq '14' and $@ =~ /Eval-group not allowed/ and not $match; - print "ok $test\n"; - $test++; + # FIXME - split this one. That would require removing a lot of hard coded + # test numbers. + local $TODO = $::running_as_thread; + ok($b eq '14' and $@ =~ /Eval-group not allowed/ and not $match); } { @@ -789,9 +800,10 @@ print "not " if $str =~ /^...\G/; print "ok $test\n"; $test++; -print "not " unless $str =~ /.\G./ and $& eq 'bc'; -print "ok $test\n"; -$test++; +{ + local $TODO = $::running_as_thread; + ok($str =~ /.\G./ and $& eq 'bc'); +} print "not " unless $str =~ /\G../ and $& eq 'cd'; print "ok $test\n"; @@ -875,23 +887,29 @@ $foo='aabbccddeeffgg'; pos($foo)=1; $foo=~/.\G(..)/g; -iseq($1,'ab'); +{ + local $TODO = $::running_as_thread; + iseq($1,'ab'); +} pos($foo) += 1; $foo=~/.\G(..)/g; -print "not " unless($1 eq 'cc'); -print "ok $test\n"; -$test++; +{ + local $TODO = $::running_as_thread; + iseq($1, 'cc'); +} pos($foo) += 1; $foo=~/.\G(..)/g; -print "not " unless($1 eq 'de'); -print "ok $test\n"; -$test++; +{ + local $TODO = $::running_as_thread; + iseq($1, 'de'); +} -print "not " unless $foo =~ /\Gef/g; -print "ok $test\n"; -$test++; +{ + local $TODO = $::running_as_thread; + ok($foo =~ /\Gef/g); +} undef pos $foo; @@ -1279,7 +1297,10 @@ print "ok 246\n"; print "not " unless "\x{abcd}" =~ /\x{abcd}/; print "ok 247\n"; -{ +if ($::running_as_thread) { + print "not ok 248 # TODO & SKIP: SEGVs in 5.10 when threaded\n"; + print "not ok 249 # TODO & SKIP: SEGVs in 5.10 when threaded\n"; +} else { # bug id 20001008.001 $test = 248; @@ -2033,9 +2054,10 @@ $test = 687; # Force scalar context on the patern match sub ok ($;$) { my($ok, $name) = @_; + my $todo = $TODO ? " # TODO $TODO" : ''; printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, - ($name||$Message)."\tLine ".((caller)[2]); + ($name||$Message)."$todo\tLine ".((caller)[2]); printf "# Failed test at line %d\n", (caller)[2] unless $ok; @@ -3074,12 +3096,15 @@ ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash"); ok($a !~ /^\C{4}y/, q{don't match \C{4}y}); } -$_ = 'aaaaaaaaaa'; -utf8::upgrade($_); chop $_; $\="\n"; -ok(/[^\s]+/, "m/[^\s]/ utf8"); -ok(/[^\d]+/, "m/[^\d]/ utf8"); -ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8"); -ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8"); +{ + local $\; + $_ = 'aaaaaaaaaa'; + utf8::upgrade($_); chop $_; $\="\n"; + ok(/[^\s]+/, "m/[^\s]/ utf8"); + ok(/[^\d]+/, "m/[^\d]/ utf8"); + ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8"); + ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8"); +} ok("\x{100}" =~ /\x{100}/, "[perl #15397]"); ok("\x{100}" =~ /(\x{100})/, "[perl #15397]"); @@ -3134,13 +3159,13 @@ ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); foreach (1,2,3,4) { $p++ if /(??{ $p })/ } - ok ($p == 5, "[perl #20683] (??{ }) returns stale values"); + iseq ($p, 5, "[perl #20683] (??{ }) returns stale values"); { package P; $a=1; sub TIESCALAR { bless[] } sub FETCH { $a++ } } tie $p, P; foreach (1,2,3,4) { /(??{ $p })/ } - ok ( $p == 5, "(??{ }) returns stale values"); + iseq ( $p, 5, "(??{ }) returns stale values"); } { @@ -3386,7 +3411,7 @@ ok(("foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i) } - +print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n"; if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ my @normal=qw(these are some normal words); my $psycho=join "|",@normal,map chr $_,255..20000; @@ -3406,9 +3431,9 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"); ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"); - ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8"); + ok("\xe9" =~ /$utf8/i, "# latin/utf8"); ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie"); - ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime"); + ok($latin1 =~ /$utf8/i, "# latin/utf8 runtime"); ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime"); } @@ -3445,6 +3470,7 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ } { + local $TODO = "See changes 26925-26928, which reverted change 26410"; package lv; $var = "abc"; sub variable : lvalue { $var } @@ -3453,16 +3479,17 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ my $o = bless [], "lv"; my $f = ""; eval { for (1..2) { $f .= $1 if $o->variable =~ /(.)/g } }; - ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n"; + ok($f eq "ab", "pos retained between calls") or print "# $@\n"; } { + local $TODO = "See changes 26925-26928, which reverted change 26410"; $var = "abc"; sub variable : lvalue { $var } my $f = ""; eval { for (1..2) { $f .= $1 if variable() =~ /(.)/g } }; - ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n"; + ok($f eq "ab", "pos retained between calls") or print "# $@\n"; } # [perl #37836] Simple Regex causes SEGV when run on specific data @@ -3692,13 +3719,14 @@ SKIP:{ sub iseq($$;$) { my ( $got, $expect, $name)=@_; + my $todo = $TODO ? " # TODO $TODO" : ''; $_=defined($_) ? "'$_'" : "undef" for $got, $expect; my $ok= $got eq $expect; - printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, + printf "%sok %d - %s$todo\n", ($ok ? "" : "not "), $test, ($name||$Message)."\tLine ".((caller)[2]); printf "# Failed test at line %d\n". @@ -3773,6 +3801,7 @@ sub iseq($$;$) { if ($ENV{PERL_SKIP_PSYCHO_TEST}){ printf "ok %d Skip: No psycho tests\n", $test++; } else { + print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n"; my $r = qr/^ (?: ( (?:a|z+)+ ) @@ -3913,25 +3942,6 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g; iseq($count,4,"/.(*PRUNE)/"); } -{ # Test the \v form of the (*PRUNE) pattern - our $count = 0; - 'aaab'=~/a+b?(?{$count++})(*FAIL)/; - iseq($count,9,"expect 9 for no \\v"); - $count = 0; - 'aaab'=~/a+b?\v(?{$count++})(*FAIL)/; - iseq($count,3,"expect 3 with \\v"); - local $_='aaab'; - $count=0; - 1 while /.\v(?{$count++})(*FAIL)/g; - iseq($count,4,"/.\\v/"); - $count = 0; - 'aaab'=~/a+b?(??{'\v'})(?{$count++})(*FAIL)/; - iseq($count,3,"expect 3 with \\v"); - local $_='aaab'; - $count=0; - 1 while /.(??{'\v'})(?{$count++})(*FAIL)/g; - iseq($count,4,"/.\\v/"); -} { # Test the (*SKIP) pattern our $count = 0; 'aaab'=~/a+b?(*SKIP)(?{$count++})(*FAIL)/; @@ -3947,21 +3957,6 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { iseq($count,2,"Expect 2 with (*SKIP)" ); iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" ); } -{ # Test the \V form of the (*SKIP) pattern - our $count = 0; - 'aaab'=~/a+b?\V(?{$count++})(*FAIL)/; - iseq($count,1,"expect 1 with \\V"); - local $_='aaab'; - $count=0; - 1 while /.\V(?{$count++})(*FAIL)/g; - iseq($count,4,"/.\\V/"); - $_='aaabaaab'; - $count=0; - our @res=(); - 1 while /(a+b?)\V(?{$count++; push @res,$1})(*FAIL)/g; - iseq($count,2,"Expect 2 with \\V" ); - iseq("@res","aaab aaab","adjacent \\V works as expected" ); -} { # Test the (*SKIP) pattern our $count = 0; 'aaab'=~/a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/; @@ -4127,14 +4122,25 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { iseq($^R,'last regexp code result'); } iseq($^R,'Nothing'); + + { + local $^R = "Bad"; + ok('x foofoo y' =~ m{ + (foo|bar)\1 # this time without the + + (?{"last regexp code result"}) + }x); + iseq($^R,'last regexp code result'); + } + iseq($^R,'Nothing'); } { - local $Message="RT#22395"; + local $Message="RT 22395"; + local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; our $count; for my $l (10,100,1000) { $count=0; ('a' x $l) =~ /(.*)(?{$count++})[bc]/; - iseq( $count, $l + 1, "# TODO Should be L+1 not L*(L+3)/2 (L=$l)"); + iseq( $count, $l + 1); } } { @@ -4335,7 +4341,241 @@ sub kt iseq("$1$2",'foooooobaaaaar'); } iseq("$1$2","foobar"); +} +{ + local $Message = "HORIZWS"; + local $_="\t \r\n \n \t".chr(11)."\n"; + s/\H/H/g; + s/\h/h/g; + iseq($_,"hhHHhHhhHH"); + $_="\t \r\n \n \t".chr(11)."\n"; + utf8::upgrade($_); + s/\H/H/g; + s/\h/h/g; + iseq($_,"hhHHhHhhHH"); } +{ + local $Message = "Various whitespace special patterns"; + my @h=map { chr( $_ ) } ( + 0x09, 0x20, 0xa0, 0x1680, 0x180e, 0x2000, 0x2001, 0x2002, + 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, + 0x202f, 0x205f, 0x3000 + ); + my @v=map { chr( $_ ) } ( 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 ); + my @lb=( "\x0D\x0A", + map { chr( $_ ) } ( 0x0A..0x0D,0x85,0x2028,0x2029 )); + foreach my $t ([\@h,qr/\h/,qr/\h+/],[\@v,qr/\v/,qr/\v+/],[\@lb,qr/\R/,qr/\R+/],){ + my $ary=shift @$t; + foreach my $pat (@$t) { + foreach my $str (@$ary) { + ok($str=~/($pat)/,$pat); + iseq($1,$str,$pat); + utf8::upgrade($str); + ok($str=~/($pat)/,"Upgraded string - $pat"); + iseq($1,$str,"Upgraded string - $pat"); + } + } + } +} +{ + local $Message = "Check that \\xDF match properly in its various forms"; + # test that \xDF matches properly. this is pretty hacky stuff, + # but its actually needed. the malarky with '-' is to prevent + # compilation caching from playing any role in the test. + my @df= (chr(0xDF),'-',chr(0xDF)); + utf8::upgrade($df[2]); + my @strs= ('ss','sS','Ss','SS',chr(0xDF)); + my @ss= map { ("$_", "$_") } @strs; + utf8::upgrade($ss[$_*2+1]) for 0..$#strs; + + for my $ssi (0..$#ss) { + for my $dfi (0..$#df) { + my $pat= $df[$dfi]; + my $str= $ss[$ssi]; + my $utf_df= ($dfi > 1) ? 'utf8' : ''; + my $utf_ss= ($ssi % 2) ? 'utf8' : ''; + (my $sstr=$str)=~s/\xDF/\\xDF/; + + if ($utf_df || $utf_ss || length($ss[$ssi])==1) { + my $ret= $str=~/$pat/i; + next if $pat eq '-'; + ok($ret, + "\"$sstr\"=~/\\xDF/i (str is @{[$utf_ss||'latin']}, pat is @{[$utf_df||'latin']})"); + } else { + my $ret= $str !~ /$pat/i; + next if $pat eq '-'; + ok($ret, + "\"$sstr\"!~/\\xDF/i (str is @{[$utf_ss||'latin']}, pat is @{[$utf_df||'latin']})"); + } + } + } +} +{ + local $Message = "BBC(Bleadperl Breaks CPAN) Today: String::Multibyte"; + my $re = qr/(?:[\x00-\xFF]{4})/; + my $hyp = "\0\0\0-"; + my $esc = "\0\0\0\\"; + + my $str = "$esc$hyp$hyp$esc$esc"; + my @a = ($str =~ /\G(?:\Q$esc$esc\E|\Q$esc$hyp\E|$re)/g); + + iseq(0+@a,3); + iseq(join('=', @a),"$esc$hyp=$hyp=$esc$esc"); +} +# test for keys in %+ and %- +{ + my $_ = "abcdef"; + /(?a)|(?b)/; + iseq( (join ",", sort keys %+), "foo" ); + iseq( (join ",", sort keys %-), "foo" ); + iseq( (join ",", sort values %+), "a" ); + iseq( (join ",", sort map "@$_", values %-), "a " ); + /(?a)(?b)(?.)/; + iseq( (join ",", sort keys %+), "bar,quux" ); + iseq( (join ",", sort keys %-), "bar,quux" ); + iseq( (join ",", sort values %+), "a,c" ); # leftmost + iseq( (join ",", sort map "@$_", values %-), "a b,c" ); + /(?a)(?c)?/; # second buffer won't capture + iseq( (join ",", sort keys %+), "un" ); + iseq( (join ",", sort keys %-), "deux,un" ); + iseq( (join ",", sort values %+), "a" ); + iseq( (join ",", sort map "@$_", values %-), ",a" ); +} + +# length() on captures, the numbered ones end up in Perl_magic_len +{ + my $_ = "aoeu \xe6var ook"; + /^ \w+ \s (?\S+)/x; + + iseq( length($`), 0, 'length $`' ); + iseq( length($'), 4, q[length $'] ); + iseq( length($&), 9, 'length $&' ); + iseq( length($1), 4, 'length $1' ); + iseq( length($+{eek}), 4, 'length $+{eek} == length $1' ); +} + +{ + my $ok=-1; + + $ok=exists($-{x}) ? 1 : 0 + if 'bar'=~/(?foo)|bar/; + iseq($ok,1,'$-{x} exists after "bar"=~/(?foo)|bar/'); + iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?foo)|bar/'); + iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?foo)|bar/'); + + $ok=-1; + $ok=exists($+{x}) ? 1 : 0 + if 'bar'=~/(?foo)|bar/; + iseq($ok,0,'$+{x} not exists after "bar"=~/(?foo)|bar/'); + iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?foo)|bar/'); + iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?foo)|bar/'); + + $ok=-1; + $ok=exists($-{x}) ? 1 : 0 + if 'foo'=~/(?foo)|bar/; + iseq($ok,1,'$-{x} exists after "foo"=~/(?foo)|bar/'); + iseq(scalar(%+), 1, 'scalar %+ == 1 after "foo"=~/(?foo)|bar/'); + iseq(scalar(%-), 1, 'scalar %- == 1 after "foo"=~/(?foo)|bar/'); + + $ok=-1; + $ok=exists($+{x}) ? 1 : 0 + if 'foo'=~/(?foo)|bar/; + iseq($ok,1,'$+{x} exists after "foo"=~/(?foo)|bar/'); +} +{ + local $_; + ($_ = 'abc')=~/(abc)/g; + $_ = '123'; + iseq("$1",'abc',"/g leads to unsafe match vars: $1"); +} +{ + local $Message="Message-ID: <20070818091501.7eff4831@r2d2>"; + my $str= ""; + for(0..5){ + my @x; + $str .= "@x"; # this should ALWAYS be the empty string + 'a'=~/(a|)/; + push @x,1; + } + iseq(length($str),"0","Trie scope error, string should be empty"); + $str=""; + my @foo = ('a')x5; + for (@foo) { + my @bar; + $str .= "@bar"; + s/a|/push @bar, 1/e; + } + iseq(length($str),"0","Trie scope error, string should be empty"); +} +{ +# [perl #45605] Regexp failure with utf8-flagged and byte-flagged string + + my $utf_8 = "\xd6schel"; + utf8::upgrade($utf_8); + $utf_8 =~ m{(\xd6|Ö)schel}; + iseq($1,"\xd6","#45605"); +} + +{ + # Regardless of utf8ness any character matches itself when + # doing a case insensitive match. See also [perl #36207] + for my $o (0..255) { + my @ch=(chr($o),chr($o)); + utf8::upgrade($ch[1]); + for my $u_str (0,1) { + for my $u_pat (0,1) { + ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E/i, + "\$c=~/\$c/i : chr($o) : u_str=$u_str u_pat=$u_pat"); + ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E|xyz/i, + "# \$c=~/\$c|xyz/i : chr($o) : u_str=$u_str u_pat=$u_pat"); + } + } + } +} +{ + my $a = 3; "" =~ /(??{ $a })/; + my $b = $a; + iseq($b, $a, "copy of scalar used for postponed subexpression"); +} +{ + local $Message = "\$REGMARK in replacement -- Bug #49190"; + my $_ = "A"; + s/(*:B)A/$REGMARK/; + iseq $_, "B"; + $_ = "CCCCBAA"; + s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g; + iseq $_, "ZYX"; +} +if ($::running_as_thread) { + for (1..3) { + print "not ok $test # TODO & SKIP: croaks when threaded\n"; + $test++; + } +} else { + our @ctl_n=(); + our @plus=(); + our $nested_tags; + $nested_tags = qr{ + < + (\w+) + (?{ + push @ctl_n,$^N; + push @plus,$+; + }) + > + (??{$nested_tags})* + + }x; + + my $match= '' =~ m/^$nested_tags$/; + ok($match,'nested construct matches'); + iseq("@ctl_n","bla blubb",'$^N inside of (?{}) works as expected'); + iseq("@plus","bla blubb",'$+ inside of (?{}) works as expected'); +} + + + + # Test counter is at bottom of file. Put new tests above here. #------------------------------------------------------------------- # Keep the following tests last -- they may crash perl @@ -4381,31 +4621,24 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/); iseq($_,"!Bang!1!Bang!2!Bang!3!Bang!"); } -# test for keys in %+ and %- +# [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache + { - my $_ = "abcdef"; - /(?a)|(?b)/; - iseq( (join ",", sort keys %+), "foo" ); - iseq( (join ",", sort keys %-), "foo" ); - iseq( (join ",", sort values %+), "a" ); - iseq( (join ",", sort map "@$_", values %-), "a " ); - /(?a)(?b)(?.)/; - iseq( (join ",", sort keys %+), "bar,quux" ); - iseq( (join ",", sort keys %-), "bar,quux" ); - iseq( (join ",", sort values %+), "a,c" ); # leftmost - iseq( (join ",", sort map "@$_", values %-), "a b,c" ); - /(?a)(?c)?/; # second buffer won't capture - iseq( (join ",", sort keys %+), "un" ); - iseq( (join ",", sort keys %-), "deux,un" ); - iseq( (join ",", sort values %+), "a" ); - iseq( (join ",", sort map "@$_", values %-), ",a" ); + local ${^UTF8CACHE} = -1; + my $s="[a]a{2}"; + utf8::upgrade $s; + ok("aaa" =~ /$s/, "#45337"); } # Put new tests above the dotted line about a page above this comment iseq(0+$::test,$::TestCount,"Got the right number of tests!"); + +} # end of sub pat_tests + # Don't forget to update this! BEGIN { - $::TestCount = 1655; + $::TestCount = 4019; print "1..$::TestCount\n"; } +"Truth";