X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/c74340f9cdee6010339b6bfd0e8b0dc8bc875344..3914f56082a0817d46ec5706e82e48b4e4e19f9d:/t/op/pat.t diff --git a/t/op/pat.t b/t/op/pat.t index 358fbb0..5512b2b 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -12,6 +12,7 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; } +our $Message = "Noname test"; eval 'use Config'; # Defaults assumed if this fails @@ -874,9 +875,7 @@ $foo='aabbccddeeffgg'; pos($foo)=1; $foo=~/.\G(..)/g; -print "not " unless($1 eq 'ab'); -print "ok $test\n"; -$test++; +iseq($1,'ab'); pos($foo) += 1; $foo=~/.\G(..)/g; @@ -1047,9 +1046,7 @@ $test++; @b = grep(/\w/,@a); @c = grep(/[\w]/,@a); -print "not " if "@b" ne "@c"; -print "ok $test\n"; -$test++; +iseq("@b","@c"); # see if backtracking optimization works correctly "\n\n" =~ /\n $ \n/x or print "not "; @@ -2037,7 +2034,8 @@ $test = 687; sub ok ($;$) { my($ok, $name) = @_; - printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name||'unnamed'; + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, + ($name||$Message)."\tLine ".((caller)[2]); printf "# Failed test at line %d\n", (caller)[2] unless $ok; @@ -3665,6 +3663,33 @@ SKIP:{ $s=~s/(?'digits'\d+)\k'digits'/$+{digits}/; ok($s eq '123456','Named capture (single quotes) s///'); } + +{ + my @ary = ( + pack('U', 0x00F1), # n-tilde + '_'.pack('U', 0x00F1), # _ + n-tilde + 'c'.pack('U', 0x0327), # c + cedilla + pack('U*', 0x00F1, 0x0327), # n-tilde + cedilla + 'a'.pack('U', 0x00B2), # a + superscript two + pack('U', 0x0391), # ALPHA + pack('U', 0x0391).'2', # ALPHA + 2 + pack('U', 0x0391).'_', # ALPHA + _ + ); + for my $uni (@ary) { + my ($r1, $c1, $r2, $c2) = eval qq{ + use utf8; + scalar("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/), + \$+{${uni}}, + scalar("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/), + \$+{${uni}}; + }; + ok($r1, "Named capture UTF (?'')"); + ok(defined $c1 && $c1 eq 'foo', "Named capture UTF \%+"); + ok($r2, "Named capture UTF (?<>)"); + ok(defined $c2 && $c2 eq 'bar', "Named capture UTF \%+"); + } +} + sub iseq($$;$) { my ( $got, $expect, $name)=@_; @@ -3673,7 +3698,8 @@ sub iseq($$;$) { my $ok= $got eq $expect; - printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name||'unnamed'; + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, + ($name||$Message)."\tLine ".((caller)[2]); printf "# Failed test at line %d\n". "# expected: %s\n". @@ -3719,7 +3745,24 @@ sub iseq($$;$) { '; ok(!$@,'lvalue $+{...} should not throw an exception'); } - +{ + my $s='foo bar baz'; + my @res; + if ('1234'=~/(?1)(?2)(?3)(?4)/) { + foreach my $name (sort keys(%-)) { + my $ary = $-{$name}; + foreach my $idx (0..$#$ary) { + push @res,"$name:$idx:$ary->[$idx]"; + } + } + } + my @expect=qw(A:0:1 A:1:3 B:0:2 B:1:4); + iseq("@res","@expect","Check %-"); + eval' + print for $-{this_key_doesnt_exist}; + '; + ok(!$@,'lvalue $-{...} should not throw an exception'); +} # stress test CURLYX/WHILEM. # # This test includes varying levels of nesting, and according to @@ -3870,6 +3913,25 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g; iseq($count,4,"/.(*PRUNE)/"); } +{ # Test the \v form of the (*PRUNE) pattern + our $count = 0; + 'aaab'=~/a+b?(?{$count++})(*FAIL)/; + iseq($count,9,"expect 9 for no \\v"); + $count = 0; + 'aaab'=~/a+b?\v(?{$count++})(*FAIL)/; + iseq($count,3,"expect 3 with \\v"); + local $_='aaab'; + $count=0; + 1 while /.\v(?{$count++})(*FAIL)/g; + iseq($count,4,"/.\\v/"); + $count = 0; + 'aaab'=~/a+b?(??{'\v'})(?{$count++})(*FAIL)/; + iseq($count,3,"expect 3 with \\v"); + local $_='aaab'; + $count=0; + 1 while /.(??{'\v'})(?{$count++})(*FAIL)/g; + iseq($count,4,"/.\\v/"); +} { # Test the (*SKIP) pattern our $count = 0; 'aaab'=~/a+b?(*SKIP)(?{$count++})(*FAIL)/; @@ -3885,6 +3947,21 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { iseq($count,2,"Expect 2 with (*SKIP)" ); iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" ); } +{ # Test the \V form of the (*SKIP) pattern + our $count = 0; + 'aaab'=~/a+b?\V(?{$count++})(*FAIL)/; + iseq($count,1,"expect 1 with \\V"); + local $_='aaab'; + $count=0; + 1 while /.\V(?{$count++})(*FAIL)/g; + iseq($count,4,"/.\\V/"); + $_='aaabaaab'; + $count=0; + our @res=(); + 1 while /(a+b?)\V(?{$count++; push @res,$1})(*FAIL)/g; + iseq($count,2,"Expect 2 with \\V" ); + iseq("@res","aaab aaab","adjacent \\V works as expected" ); +} { # Test the (*SKIP) pattern our $count = 0; 'aaab'=~/a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/; @@ -3973,6 +4050,7 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { } { # Test named commits and the $REGERROR var + local $Message = "\$REGERROR"; our $REGERROR; for $word (qw(bar baz bop)) { $REGERROR=""; @@ -3981,6 +4059,7 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { } } { #Regression test for perlbug 40684 + local $Message = "RT#40684 tests:"; my $s = "abc\ndef"; my $rex = qr'^abc$'m; ok($s =~ m/$rex/); @@ -3994,6 +4073,7 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { } { + local $Message = "Relative Recursion"; my $parens=qr/(\((?:[^()]++|(?-1))*+\))/; local $_='foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; my ($all,$one,$two)=('','',''); @@ -4007,9 +4087,267 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) { iseq($all, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'); iseq($all, $_); } +{ + my $spaces=" "; + local $_=join 'bar',$spaces,$spaces; + our $count=0; + s/(?>\s+bar)(?{$count++})//g; + iseq($_,$spaces,"SUSPEND final string"); + iseq($count,1,"Optimiser should have prevented more than one match"); +} +{ + local $Message = "RT#36909 test"; + $^R = 'Nothing'; + { + local $^R = "Bad"; + ok('x foofoo y' =~ m{ + (foo) # $^R correctly set + (?{ "last regexp code result" }) + }x); + iseq($^R,'last regexp code result'); + } + iseq($^R,'Nothing'); + { + local $^R = "Bad"; -#------------------------------------------------------------------- + ok('x foofoo y' =~ m{ + (?:foo|bar)+ # $^R correctly set + (?{"last regexp code result"}) + }x); + iseq($^R,'last regexp code result'); + } + iseq($^R,'Nothing'); + + { + local $^R = "Bad"; + ok('x foofoo y' =~ m{ + (foo|bar)\1+ # $^R undefined + (?{"last regexp code result"}) + }x); + iseq($^R,'last regexp code result'); + } + iseq($^R,'Nothing'); + { + local $^R = "Bad"; + ok('x foofoo y' =~ m{ + (foo|bar)\1 # this time without the + + (?{"last regexp code result"}) + }x); + iseq($^R,'last regexp code result'); + } + iseq($^R,'Nothing'); +} +{ + local $Message="RT#22395"; + our $count; + for my $l (10,100,1000) { + $count=0; + ('a' x $l) =~ /(.*)(?{$count++})[bc]/; + iseq( $count, $l + 1, "# TODO Should be L+1 not L*(L+3)/2 (L=$l)"); + } +} +{ + local $Message = "RT#22614"; + local $_='ab'; + our @len=(); + /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; + iseq("@len","2 2 2"); +} +{ + local $Message = "RT#18209"; + my $text = ' word1 word2 word3 word4 word5 word6 '; + + my @words = ('word1', 'word3', 'word5'); + my $count; + foreach my $word (@words){ + $text =~ s/$word\s//gi; # Leave a space to seperate words in the resultant str. + # The following block is not working. + if($&){ + $count++; + } + # End bad block + } + iseq($count,3); + iseq($text,' word2 word4 word6 '); +} +{ + # RT#6893 + local $_= qq(A\nB\nC\n); + my @res; + while (m#(\G|\n)([^\n]*)\n#gsx) + { + push @res,"$2"; + last if @res>3; + } + iseq("@res","A B C","RT#6893: /g pattern shouldn't infinite loop"); +} + +{ + # From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus> + my $dow_name= "nada"; + my $parser = "(\$dow_name) = \$time_string =~ /(D\x{e9}\\ C\x{e9}adaoin|D\x{e9}\\ Sathairn|\\w+|\x{100})/"; + my $time_string = "D\x{e9} C\x{e9}adaoin"; + eval $parser; + ok(!$@,"Test Eval worked"); + iseq($dow_name,$time_string,"UTF8 trie common prefix extraction"); +} + +{ + my $v; + ($v='bar')=~/(\w+)/g; + $v='foo'; + iseq("$1",'bar','$1 is safe after /g - may fail due to specialized config in pp_hot.c') +} +{ + local $Message = "http://nntp.perl.org/group/perl.perl5.porters/118663"; + my $qr_barR1 = qr/(bar)\g-1/; + ok("foobarbarxyz" =~ $qr_barR1); + ok("foobarbarxyz" =~ qr/foo${qr_barR1}xyz/); + ok("foobarbarxyz" =~ qr/(foo)${qr_barR1}xyz/); + ok("foobarbarxyz" =~ qr/(foo)(bar)\g{-1}xyz/); + ok("foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/); + ok("foobarbarxyz" =~ qr/(foo(bar)\g{-1})xyz/); +} +{ + local $Message = "RT#41010"; + my @tails=('','(?(1))','(|)','()?'); + my @quants=('*','+'); + my $doit=sub { + my $pats= shift; + for (@_) { + for my $pat (@$pats) { + for my $quant (@quants) { + for my $tail (@tails) { + my $re = "($pat$quant\$)$tail"; + ok(/$re/ && $1 eq $_,"'$_'=~/$re/"); + ok(/$re/m && $1 eq $_,"'$_'=~/$re/m"); + } + } + } + } + }; + + my @dpats=( + '\d', + '[1234567890]', + '(1|[23]|4|[56]|[78]|[90])', + '(?:1|[23]|4|[56]|[78]|[90])', + '(1|2|3|4|5|6|7|8|9|0)', + '(?:1|2|3|4|5|6|7|8|9|0)', + ); + my @spats=('[ ]',' ','( |\t)','(?: |\t)','[ \t]','\s'); + my @sstrs=(' '); + my @dstrs=('12345'); + $doit->(\@spats,@sstrs); + $doit->(\@dpats,@dstrs); +} +{ + local $Message = "\$REGMARK"; + our @r=(); + ok('foofoo' =~ /foo (*MARK:foo) (?{push @r,$REGMARK}) /x); + iseq("@r","foo"); + iseq($REGMARK,"foo"); + ok('foofoo' !~ /foo (*MARK:foo) (*FAIL) /x); + ok(!$REGMARK); + iseq($REGERROR,'foo'); +} +{ + my $x; + $x = "abc.def.ghi.jkl"; + $x =~ s/.*\K\..*//; + ok($x eq "abc.def.ghi"); + + $x = "one two three four"; + $x =~ s/o+ \Kthree//g; + ok($x eq "one two four"); + + $x = "abcde"; + $x =~ s/(.)\K/$1/g; + ok($x eq "aabbccddee"); +} +sub kt +{ + return '4' if $_[0] eq '09028623'; +} + +{ # Nested EVAL using PL_curpm (via $1 or friends) + my $re; + our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x; + $re = qr/^ ( (??{ $grabit }) ) $ /x; + my @res = '0902862349' =~ $re; + iseq(join("-",@res),"0902862349", + 'PL_curpm is set properly on nested eval'); + + our $qr = qr/ (o) (??{ $1 }) /x; + ok( 'boob'=~/( b (??{ $qr }) b )/x && 1, + "PL_curpm, nested eval"); +} + +{ + use charnames ":full"; + ok("\N{ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "I =~ Alphabetic"); + ok("\N{ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/, "I =~ Uppercase"); + ok("\N{ROMAN NUMERAL ONE}" !~ /\p{Lowercase}/, "I !~ Lowercase"); + ok("\N{ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "I =~ ID_Start"); + ok("\N{ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "I =~ ID_Continue"); + ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "i =~ Alphabetic"); + ok("\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Uppercase}/, "i !~ Uppercase"); + ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/, "i =~ Lowercase"); + ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "i =~ ID_Start"); + ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "i =~ ID_Continue"); +} + +{ +# requirement of Unicode Technical Standard #18, 1.7 Code Points +# cf. http://www.unicode.org/reports/tr18/#Supplementary_Characters + for my $u (0x7FF, 0x800, 0xFFFF, 0x10000) { + no warnings 'utf8'; # oops + my $c = chr $u; + my $x = sprintf '%04X', $u; + ok( "A${c}B" =~ /A[\0-\x{10000}]B/, "unicode range - $x"); + } +} + +{ + my $res=""; + + if ('1' =~ /(?|(?1)|(?2))/) { + $res = "@{$- {digit}}"; + } + iseq($res,"1", + "Check that (?|...) doesnt cause dupe entries in the names array"); + #--- + $res=""; + if ('11' =~ /(?|(?1)|(?2))(?&digit)/) { + $res = "@{$- {digit}}"; + } + iseq($res, "1", + "Check that (?&..) to a buffer inside a (?|...) goes to the leftmost"); +} + +{ + use warnings; + local $Message = "ASCII pattern that really is utf8"; + my @w; + local $SIG{__WARN__}=sub{push @w,"@_"}; + my $c=qq(\x{DF}); + ok($c=~/${c}|\x{100}/); + ok(@w==0); +} +{ + local $Message = "corruption of match results of qr// across scopes"; + my $qr=qr/(fo+)(ba+r)/; + 'foobar'=~/$qr/; + iseq("$1$2","foobar"); + { + 'foooooobaaaaar'=~/$qr/; + iseq("$1$2",'foooooobaaaaar'); + } + iseq("$1$2","foobar"); +} +# Test counter is at bottom of file. Put new tests above here. +#------------------------------------------------------------------- # Keep the following tests last -- they may crash perl { # RT#19049 / RT#38869 @@ -4037,7 +4375,11 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/, "Regexp /^(??{'(.)'x 100})/ crashes older perls") or print "# Unexpected outcome: should pass or crash perl\n"; +eval '/\k/'; +ok($@=~/\QSequence \k... not terminated in regex;\E/); + { + local $Message = "substitution with lookahead (possible segv)"; $_="ns1ns1ns1"; s/ns(?=\d)/ns_/g; iseq($_,"ns_1ns_1ns_1"); @@ -4049,7 +4391,43 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/, iseq($_,"!Bang!1!Bang!2!Bang!3!Bang!"); } -# Put new tests above the line, not here. +# test for keys in %+ and %- +{ + my $_ = "abcdef"; + /(?a)|(?b)/; + iseq( (join ",", sort keys %+), "foo" ); + iseq( (join ",", sort keys %-), "foo" ); + iseq( (join ",", sort values %+), "a" ); + iseq( (join ",", sort map "@$_", values %-), "a " ); + /(?a)(?b)(?.)/; + iseq( (join ",", sort keys %+), "bar,quux" ); + iseq( (join ",", sort keys %-), "bar,quux" ); + iseq( (join ",", sort values %+), "a,c" ); # leftmost + iseq( (join ",", sort map "@$_", values %-), "a b,c" ); + /(?a)(?c)?/; # second buffer won't capture + iseq( (join ",", sort keys %+), "un" ); + iseq( (join ",", sort keys %-), "deux,un" ); + iseq( (join ",", sort values %+), "a" ); + iseq( (join ",", sort map "@$_", values %-), ",a" ); +} + +# length() on captures, these end up in Perl_magic_len +{ + my $_ = "aoeu \xe6var ook"; + /^ \w+ \s (?\S+)/x; + + iseq( length($`), 4, 'length $`' ); + iseq( length($'), 4, q[length $'] ); + iseq( length($&), 9, 'length $&' ); + iseq( length($1), 4, 'length $1' ); + iseq( length($+{eek}), 4, 'length $+{eek} == length $1' ); +} +# Put new tests above the dotted line about a page above this comment +iseq(0+$::test,$::TestCount,"Got the right number of tests!"); # Don't forget to update this! -BEGIN { print "1..1347\n" }; +BEGIN { + $::TestCount = 1663; + print "1..$::TestCount\n"; +} +