X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5aca436449cd62a9a59230a9a638565b1de6c2fb..bfac009d8686ae692a2b3faa02801b15a15e64ba:/t/op/pat.t diff --git a/t/op/pat.t b/t/op/pat.t index 1e7eaf7..59499b1 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..1007\n"; +# Test counter output is generated by a BEGIN block at bottom of file BEGIN { chdir 't' if -d 't'; @@ -79,12 +79,21 @@ $XXX{345} = 345; while ($_ = shift(@XXX)) { ?(.*)? && (print $1,"\n"); /not/ && reset; - /not ok 26/ && reset 'X'; + if (/not ok 26/) { + if ($^O eq 'VMS') { + $_ = shift(@XXX); + } + else { + reset 'X'; + } + } } -while (($key,$val) = each(%XXX)) { +if ($^O ne 'VMS') { + while (($key,$val) = each(%XXX)) { print "not ok 27\n"; exit; + } } print "ok 27\n"; @@ -1276,7 +1285,7 @@ print "ok 247\n"; { # bug id 20001008.001 - my $test = 248; + $test = 248; my @x = ("stra\337e 138","stra\337e 138"); for (@x) { s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; @@ -1365,10 +1374,10 @@ print "ok 247\n"; print "ok 263\n"; } -{ - my $test = 264; # till 575 +SKIP: { + $test = 264; # till 575 - use charnames ':full'; + use charnames ":full"; # This is far from complete testing, there are dozens of character # classes in Unicode. The mixing of literals and \N{...} is @@ -1689,10 +1698,11 @@ EOT print "not " if $x =~ /[\x{100}]/; print "ok 604\n"; - print "not " unless $x =~ /\p{InLatin1Supplement}/; + # the next two tests must be ignored on EBCDIC + print "not " unless $x =~ /\p{InLatin1Supplement}/ or ord("A") == 193; print "ok 605\n"; - print "not " if $x =~ /\P{InLatin1Supplement}/; + print "not " if $x =~ /\P{InLatin1Supplement}/ and ord("A") != 193; print "ok 606\n"; print "not " if $x =~ /\p{InLatinExtendedA}/; @@ -1907,8 +1917,10 @@ print "ok 663\n"; print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew print "ok 664\n"; -print "not " unless chr(0xb5) =~ /\p{IsGreek}/; # singleton (not in a range) -print "ok 665\n"; +# # singleton (not in a range, this test must be ignored on EBCDIC) +# print "not " unless chr(0xb5) =~ /\p{IsGreek}/ or ord("A") == 193; +# print "ok 665\n"; +print "ok 665 # 0xb5 moved from Greek to Common with Unicode 4.0.1\n"; print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton print "ok 666\n"; @@ -2019,13 +2031,13 @@ print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; } -my $test = 687; +$test = 687; # Force scalar context on the patern match -sub ok ($$) { +sub ok ($;$) { my($ok, $name) = @_; - printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name||'unnamed'; printf "# Failed test at line %d\n", (caller)[2] unless $ok; @@ -2233,10 +2245,11 @@ print "# some Unicode properties\n"; } { - print "not " unless "a" =~ /\p{L&}/; + # L& and LC are the same + print "not " unless "a" =~ /\p{LC}/ and "a" =~ /\p{L&}/; print "ok 743\n"; - print "not " if "1" =~ /\p{L&}/; + print "not " if "1" =~ /\p{LC}/ or "1" =~ /\p{L&}/; print "ok 744\n"; } @@ -2590,42 +2603,32 @@ print "# some Unicode properties\n"; use charnames ':full'; - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /\N{LATIN SMALL LETTER SHARP S}/ ? "ok 835\n" : "not ok 835\n"; - - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 836\n" : "not ok 836\n"; - - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/ ? "ok 837\n" : "not ok 837\n"; + $test= 835; - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n"; + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/i); - print "ss" =~ - /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 839\n" : "not ok 839\n"; + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); - print "SS" =~ - /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 840\n" : "not ok 840\n"; + ok("ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i); + ok("SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i); + ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); + ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); - print "ss" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n"; - - print "SS" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n"; - - print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ? - "ok 843\n" : "not ok 843\n"; - - print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ? - "ok 844\n" : "not ok 844\n"; + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i); } { print "# more whitespace: U+0085, U+2028, U+2029\n"; # U+0085 needs to be forced to be Unicode, the \x{100} does that. - print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n"; + if ($ordA == 193) { + print "<\x{100}\x{0085}>" =~ /<\x{100}e>/ ? "ok 845\n" : "not ok 845\n"; + } else { + print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n"; + } print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n"; print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n"; } @@ -2733,7 +2736,7 @@ print "# some Unicode properties\n"; # check utf8/non-utf8 mixtures # try to force all float/anchored check combinations my $c = "\x{100}"; - my $test = 865; + $test = 865; my $subst; for my $re ( "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx", @@ -2772,7 +2775,7 @@ print "# some Unicode properties\n"; { print "# qr/.../x\n"; - my $test = 893; + $test = 893; my $R = qr/ A B C # D E/x; @@ -2788,7 +2791,7 @@ print "# some Unicode properties\n"; { print "# illegal Unicode properties\n"; - my $test = 896; + $test = 896; print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n"; $test++; @@ -2800,7 +2803,7 @@ print "# some Unicode properties\n"; { print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n"; # requires reuse of last successful pattern - my $test = 898; + $test = 898; $test =~ /\d/; for (0 .. 1) { my $match = ?? + 0; @@ -2980,8 +2983,8 @@ sub IsSyriac1 { END } -print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; -print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +ok("\x{0712}" =~ /\p{IsSyriac1}/, '\x{0712}, \p{IsSyriac1}'); +ok("\x{072F}" =~ /\P{IsSyriac1}/, '\x{072F}, \P{IsSyriac1}'); sub Syriac1 { return <<'END'; @@ -2990,8 +2993,29 @@ sub Syriac1 { END } -print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; -print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +ok("\x{0712}" =~ /\p{Syriac1}/, '\x{0712}, \p{Syriac1}'); +ok("\x{072F}" =~ /\P{Syriac1}/, '\x{072F}, \p{Syriac1}'); + +print "# user-defined character properties may lack \\n at the end\n"; +sub InGreekSmall { return "03B1\t03C9" } +sub InGreekCapital { return "0391\t03A9\n-03A2" } + +ok("\x{03C0}" =~ /\p{InGreekSmall}/, "Small pi"); +ok("\x{03C2}" =~ /\p{InGreekSmall}/, "Final sigma"); +ok("\x{03A0}" =~ /\p{InGreekCapital}/, "Capital PI"); +ok("\x{03A2}" =~ /\P{InGreekCapital}/, "Reserved"); + +sub AsciiHexAndDash { + return <<'END'; ++utf8::ASCII_Hex_Digit ++utf8::Dash +END +} + +ok("-" =~ /\p{Dash}/, "'-' is Dash"); +ok("A" =~ /\p{ASCII_Hex_Digit}/, "'A' is ASCII_Hex_Digit"); +ok("-" =~ /\p{AsciiHexAndDash}/, "'-' is AsciiHexAndDash"); +ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash"); { print "# Change #18179\n"; @@ -3000,7 +3024,7 @@ print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; my $ok = $s =~ /(\x{100}{4})/; my($ord, $len) = (ord $1, length $1); print +($ok && $ord == 0x100 && $len == 4) - ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n"; + ? "ok $test\n" : "not ok $test\t# [#18179] $ok/$ord/$len\n"; ++$test; } @@ -3147,7 +3171,10 @@ ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); } { - split /(?{ split "" })/, "abc"; + # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it + # hasn't been crashing. Disable this test until it is fixed properly. + # XXX also check what it returns rather than just doing ok(1,...) + # split /(?{ split "" })/, "abc"; ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'); } @@ -3194,4 +3221,524 @@ $_="abcdef\n"; @x = m/./g; ok("abcde" eq "$`", '# TODO #19049 - global match not setting $`'); -# last test 1007 +ok("123\x{100}" =~ /^.*1.*23\x{100}$/, 'uft8 + multiple floating substr'); + +# LATIN SMALL/CAPITAL LETTER A WITH MACRON +ok(" \x{101}" =~ qr/\x{100}/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW +ok(" \x{1E01}" =~ qr/\x{1E00}/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +# DESERET SMALL/CAPITAL LETTER LONG I +ok(" \x{10428}" =~ qr/\x{10400}/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' +ok(" \x{1E01}x" =~ qr/\x{1E00}X/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +{ + # [perl #23769] Unicode regex broken on simple example + # regrepeat() didn't handle UTF-8 EXACT case right. + + my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; + + ok($s =~ /\x{a0}/, "[perl #23769]"); + ok($s =~ /\x{a0}+/, "[perl #23769]"); + ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]"); + + ok("aaa\x{100}" =~ /(a+)/, "[perl #23769] easy invariant"); + ok($1 eq "aaa", "[perl #23769]"); + + ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, "[perl #23769] regrepeat invariant"); + ok($1 eq "\xa0\xa0\xa0", "[perl #23769]"); + + ok("ababab\x{100} " =~ /((?:ab)+)/, "[perl #23769] hard invariant"); + ok($1 eq "ababab", "[perl #23769]"); + + ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, "[perl #23769] hard variant"); + ok($1 eq "\xa0\xa1\xa0\xa1\xa0\xa1", "[perl #23769]"); + + ok("aaa\x{100} " =~ /(a+?)/, "[perl #23769] easy invariant"); + ok($1 eq "a", "[perl #23769]"); + + ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, "[perl #23769] regrepeat variant"); + ok($1 eq "\xa0", "[perl #23769]"); + + ok("ababab\x{100} " =~ /((?:ab)+?)/, "[perl #23769] hard invariant"); + ok($1 eq "ab", "[perl #23769]"); + + ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, "[perl #23769] hard variant"); + ok($1 eq "\xa0\xa1", "[perl #23769]"); + + ok("\xc4\xc4\xc4" !~ /(\x{100}+)/, "[perl #23769] don't match first byte of utf8 representation"); + ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation"); +} + +for (120 .. 130) { + my $head = 'x' x $_; + for my $tail ('\x{0061}', '\x{1234}') { + ok( + eval qq{ "$head$tail" =~ /$head$tail/ }, + '\x{...} misparsed in regexp near 127 char EXACT limit' + ); + } +} + +# perl #25269: panic: pp_match start/end pointers +ok("a-bc" eq eval { + my($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; + "$x-$y"; +}, 'captures can move backwards in string'); + +# perl #27940: \cA not recognized in character classes +ok("a\cAb" =~ /\cA/, '\cA in pattern'); +ok("a\cAb" =~ /[\cA]/, '\cA in character class'); +ok("a\cAb" =~ /[\cA-\cB]/, '\cA in character class range'); +ok("abc" =~ /[^\cA-\cB]/, '\cA in negated character class range'); +ok("a\cBb" =~ /[\cA-\cC]/, '\cB in character class range'); +ok("a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range'); +ok("a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern'); +ok("ab" !~ /a\cIb/x, '\cI in pattern'); + +# perl #28532: optional zero-width match at end of string is ignored +ok(("abc" =~ /^abc(\z)?/) && defined($1), + 'optional zero-width match at end of string'); +ok(("abc" =~ /^abc(\z)??/) && !defined($1), + 'optional zero-width match at end of string'); + + + +{ # TRIE related + my @got=(); + "words"=~/(word|word|word)(?{push @got,$1})s$/; + ok(@got==1,"TRIE optimation is working") or warn "# @got"; + @got=(); + "words"=~/(word|word|word)(?{push @got,$1})s$/i; + ok(@got==1,"TRIEF optimisation is working") or warn "# @got"; + + my @nums=map {int rand 1000} 1..100; + my $re="(".(join "|",@nums).")"; + $re=qr/\b$re\b/; + + foreach (@nums) { + ok($_=~/$re/,"Trie nums"); + } + $_=join " ", @nums; + @got=(); + push @got,$1 while /$re/g; + + my %count; + $count{$_}++ for @got; + my $ok=1; + for (@nums) { + $ok=0 if --$count{$_}<0; + } + ok($ok,"Trie min count matches"); +} + + +# TRIE related +# LATIN SMALL/CAPITAL LETTER A WITH MACRON +ok(("foba \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i) && $1 eq "\x{101}foo", + "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON"); + +# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW +ok(("foba \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i) && $1 eq "\x{1E01}foo", + "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW"); + +# DESERET SMALL/CAPITAL LETTER LONG I +ok(("foba \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i) && $1 eq "\x{10428}foo", + "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I"); + +# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' +ok(("foba \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i) && $1 eq "\x{1E01}xfoo", + "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'"); + +{# TRIE related + +use charnames ':full'; + +$s="\N{LATIN SMALL LETTER SHARP S}"; +ok(("foba ba$s" =~ qr/(foo|Ba$s|bar)/i) + && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); +ok(("foba ba$s" =~ qr/(Ba$s|foo|bar)/i) + && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); +ok(("foba ba$s" =~ qr/(foo|bar|Ba$s)/i) + && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); + +ok(("foba ba$s" =~ qr/(foo|Bass|bar)/i) + && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); + +ok(("foba ba$s" =~ qr/(foo|BaSS|bar)/i) + && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ SS"); + +ok(("foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i) + && $1 eq "ba${s}pxySS$s$s", + "COMMON PREFIX TRIEF + LATIN SMALL LETTER SHARP S"); + + +} + + + +if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ + my @normal=qw(these are some normal words); + my $psycho=join "|",@normal,map chr $_,255..20000; + ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho'); +} else { + ok(1,'Skipped Psycho'); +} + +# [perl #36207] mixed utf8 / latin-1 and case folding + +{ + my $utf8 = "\xe9\x{100}"; chop $utf8; + my $latin1 = "\xe9"; + + ok($utf8 =~ /\xe9/i, "utf8/latin"); + ok($utf8 =~ /$latin1/i, "utf8/latin runtime"); + ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"); + ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"); + + ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8"); + ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie"); + ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime"); + ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime"); +} + +# [perl #37038] Global regular matches generate invalid pointers + +{ + my $s = "abcd"; + $s =~ /(..)(..)/g; + $s = $1; + $s = $2; + ok($s eq 'cd', + "# assigning to original string should not corrupt match vars"); +} + +{ + package wooosh; + sub gloople { + "!"; + } + package main; + + my $aeek = bless {}, 'wooosh'; + eval {$aeek->gloople() =~ /(.)/g;}; + ok($@ eq "", "//g match against return value of sub") or print "# $@\n"; +} + +{ + sub gloople { + "!"; + } + eval {gloople() =~ /(.)/g;}; + ok($@ eq "", "# 26410 didn't affect sub calls for some reason") + or print "# $@\n"; +} + +{ + package lv; + $var = "abc"; + sub variable : lvalue { $var } + + package main; + my $o = bless [], "lv"; + my $f = ""; + eval { for (1..2) { $f .= $1 if $o->variable =~ /(.)/g } }; + ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n"; +} + +{ + $var = "abc"; + sub variable : lvalue { $var } + + my $f = ""; + eval { for (1..2) { $f .= $1 if variable() =~ /(.)/g } }; + ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n"; +} + +# [perl #37836] Simple Regex causes SEGV when run on specific data +if ($ordA == 193) { + print "ok $test # Skip: in EBCDIC\n"; $test++; +} else { + no warnings 'utf8'; + $_ = pack('U0C2', 0xa2, 0xf8); # ill-formed UTF-8 + my $ret = 0; + eval { $ret = s/[\0]+//g }; + ok($ret == 0, "ill-formed UTF-8 doesn't match NUL in class"); +} + +{ # [perl #38293] chr(65535) should be allowed in regexes + no warnings 'utf8'; # to allow non-characters + my($c, $r, $s); + + $c = chr 0xffff; + $c =~ s/$c//g; + ok($c eq "", "U+FFFF, parsed as atom"); + + $c = chr 0xffff; + $r = "\\$c"; + $c =~ s/$r//g; + ok($c eq "", "U+FFFF backslashed, parsed as atom"); + + $c = chr 0xffff; + $c =~ s/[$c]//g; + ok($c eq "", "U+FFFF, parsed in class"); + + $c = chr 0xffff; + $r = "[\\$c]"; + $c =~ s/$r//g; + ok($c eq "", "U+FFFF backslashed, parsed in class"); + + $s = "A\x{ffff}B"; + $s =~ s/\x{ffff}//i; + ok($s eq "AB", "U+FFFF, EXACTF"); + + $s = "\x{ffff}A"; + $s =~ s/\bA//; + ok($s eq "\x{ffff}", "U+FFFF, BOUND"); + + $s = "\x{ffff}!"; + $s =~ s/\B!//; + ok($s eq "\x{ffff}", "U+FFFF, NBOUND"); +} # non-characters end + +{ + # https://rt.perl.org/rt3/Ticket/Display.html?id=39583 + + # The printing characters + my @chars = ("A".."Z"); + my $delim = ","; + my $size = 32771 - 4; + my $str = ''; + + # create some random junk. Inefficient, but it works. + for ($i = 0 ; $i < $size ; $i++) { + $str .= $chars[int(rand(@chars))]; + } + + $str .= ($delim x 4); + my $res; + my $matched; + if ($str =~ s/^(.*?)${delim}{4}//s) { + $res = $1; + $matched=1; + } + ok($matched,'pattern matches'); + ok(length($str)==0,"Empty string"); + ok(defined($res) && length($res)==$size,"\$1 is correct size"); +} + +{ # related to [perl #27940] + ok("\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'); + ok("\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'); + ok("X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'); + ok("X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'); + + ok("X\0A" =~ /X\c@?A/, '\c@?'); + ok("X\0A" =~ /X\c@*A/, '\c@*'); + ok("X\0A" =~ /X\c@(A)/, '\c@('); + ok("X\0A" =~ /X(\c@)A/, '\c@)'); + ok("X\0A" =~ /X\c@|ZA/, '\c@|'); + + ok("X\@A" =~ /X@?A/, '@?'); + ok("X\@A" =~ /X@*A/, '@*'); + ok("X\@A" =~ /X@(A)/, '@('); + ok("X\@A" =~ /X(@)A/, '@)'); + ok("X\@A" =~ /X@|ZA/, '@|'); + + local $" = ','; # non-whitespace and non-RE-specific + ok('abc' =~ /(.)(.)(.)/, 'the last successful match is bogus'); + ok("A@+B" =~ /A@{+}B/, 'interpolation of @+ in /@{+}/'); + ok("A@-B" =~ /A@{-}B/, 'interpolation of @- in /@{-}/'); + ok("A@+B" =~ /A@{+}B/x, 'interpolation of @+ in /@{+}/x'); + ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x'); +} + +{ + use lib 'lib'; + use Cname; + + ok('fooB'=~/\N{foo}[\N{B}\N{b}]/,"Passthrough charname"); + $test=1233; my $handle=make_must_warn('Ignoring excess chars from'); + $handle->('q(xxWxx) =~ /[\N{WARN}]/'); + { + my $code; + my $w=""; + local $SIG{__WARN__} = sub { $w.=shift }; + eval($code=<<'EOFTEST') or die "$@\n$code\n"; + { + use warnings; + + #1234 + ok("\0" !~ /[\N{EMPTY-STR}XY]/, + "Zerolength charname in charclass doesnt match \0"); + 1; + } +EOFTEST + ok($w=~/Ignoring zero length/, + "Got expected zero length warning"); + warn $code; + + } + $handle= make_must_warn('Ignoring zero length'); + $handle->('qq(\\0) =~ /[\N{EMPTY-STR}XY]/'); + ok('AB'=~/(\N{EVIL})/ && $1 eq 'A',"Charname caching $1"); + ok('ABC'=~/(\N{EVIL})/,"Charname caching $1"); + ok('xy'=~/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node'); + ok(''=~/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node 2'); + +} +{ + print "# MORE LATIN SMALL LETTER SHARP S\n"; + + use charnames ':full'; + + #see also test #835 + ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, + "unoptimized named sequence in class 1"); + ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, + "unoptimized named sequence in class 2"); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/, + "unoptimized named sequence in class 3"); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, + "unoptimized named sequence in class 4"); + + ok('aabc' !~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against aabc'); + ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc'); + ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc'); + + ok(' A B'=~/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + 'Intermixed named and unicode escapes 1'); + ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}"=~ + /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + 'Intermixed named and unicode escapes 2'); + ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042} 3"=~ + /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, + 'Intermixed named and unicode escapes'); +} +$brackets = qr{ + { (?> [^{}]+ | (??{ $brackets }) )* } + }x; +ok("{b{c}d" !~ m/^((??{ $brackets }))/, "bracket mismatch"); + + +# stress test CURLYX/WHILEM. +# +# This test includes varying levels of nesting, and according to +# profiling done against build 28905, exercises every code line in the +# CURLYX and WHILEM blocks, except those related to LONGJMP, the +# super-linear cache and warnings. It executes about 0.5M regexes + +{ + my $r = qr/^ + (?: + ( (?:a|z+)+ ) + (?: + ( (?:b|z+){3,}? ) + ( + (?: + (?:c|z+){1,1} + )* + ) + (?:z*){2,} + ( (?:z+|d)+ ) + (?: + ( (?:e|z+)+ ) + )* + ( (?:f|z+)+ ) + )* + ( (?:z+|g)+ ) + (?: + ( (?:h|z+)+ ) + )* + ( (?:i|z+)+ ) + )+ + ( (?:j|z+)+ ) + (?: + ( (?:k|z+)+ ) + )* + ( (?:l|z+)+ ) + $/x; + + + my $ok = 1; + my $msg = "CURLYX stress test"; + OUTER: + for my $a ("x","a","aa") { + for my $b ("x","bbb","bbbb") { + my $bs = $a.$b; + for my $c ("x","c","cc") { + my $cs = $bs.$c; + for my $d ("x","d","dd") { + my $ds = $cs.$d; + for my $e ("x","e","ee") { + my $es = $ds.$e; + for my $f ("x","f","ff") { + my $fs = $es.$f; + for my $g ("x","g","gg") { + my $gs = $fs.$g; + for my $h ("x","h","hh") { + my $hs = $gs.$h; + for my $i ("x","i","ii") { + my $is = $hs.$i; + for my $j ("x","j","jj") { + my $js = $is.$j; + for my $k ("x","k","kk") { + my $ks = $js.$k; + for my $l ("x","l","ll") { + my $ls = $ks.$l; + if ($ls =~ $r) { + if ($ls =~ /x/) { + $msg .= ": unexpected match for [$ls]"; + $ok = 0; + last OUTER; + } + my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12"; + unless ($ls eq $cap) { + $msg .= ": capture: [$ls], got [$cap]"; + $ok = 0; + last OUTER; + } + } + else { + unless ($ls =~ /x/) { + $msg = ": failed for [$ls]"; + $ok = 0; + last OUTER; + } + } + } + } + } + } + } + } + } + } + } + } + } + } + ok($ok, $msg); +} + + +# Keep the following test last -- it may crash perl + +ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274") + or print "# Unexpected outcome: should pass or crash perl\n"; + +# Don't forget to update this! +BEGIN{print "1..1253\n"}; +