$| = 1;
-print "1..892\n";
+# Test counter output is generated by a BEGIN block at bottom of file
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
}
+our $Message = "Line";
eval 'use Config'; # Defaults assumed if this fails
if ($x =~ /^abc/) {print "ok 1\n";} else {print "not ok 1\n";}
if ($x !~ /^def/) {print "ok 2\n";} else {print "not ok 2\n";}
-$* = 1;
-if ($x =~ /^def/) {print "ok 3\n";} else {print "not ok 3\n";}
-$* = 0;
+# used to be a test for $*
+if ($x =~ /^def/m) {print "ok 3\n";} else {print "not ok 3\n";}
$_ = '123';
if (/^([0-9][0-9]*)/) {print "ok 4\n";} else {print "not ok 4\n";}
if (/^$_$/) {print "ok 23\n";} else {print "not ok 23\n";}
-$* = 1; # test 3 only tested the optimized version--this one is for real
-if ("ab\ncd\n" =~ /^cd/) {print "ok 24\n";} else {print "not ok 24\n";}
-$* = 0;
+# used to be a test for $*
+if ("ab\ncd\n" =~ /^cd/m) {print "ok 24\n";} else {print "not ok 24\n";}
$XXX{123} = 123;
$XXX{234} = 234;
while ($_ = shift(@XXX)) {
?(.*)? && (print $1,"\n");
/not/ && reset;
- /not ok 26/ && reset 'X';
+ if (/not ok 26/) {
+ if ($^O eq 'VMS') {
+ $_ = shift(@XXX);
+ }
+ else {
+ reset 'X';
+ }
+ }
}
-while (($key,$val) = each(%XXX)) {
+if ($^O ne 'VMS') {
+ while (($key,$val) = each(%XXX)) {
print "not ok 27\n";
exit;
+ }
}
print "ok 27\n";
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/i eq '(?i-xsm:\bv$)';
+print "not " unless qr/\b\v$/i eq '(?i-xsm:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/s eq '(?s-xim:\bv$)';
+print "not " unless qr/\b\v$/s eq '(?s-xim:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/m eq '(?m-xis:\bv$)';
+print "not " unless qr/\b\v$/m eq '(?m-xis:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/x eq '(?x-ism:\bv$)';
+print "not " unless qr/\b\v$/x eq '(?x-ism:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/xism eq '(?msix:\bv$)';
+print "not " unless qr/\b\v$/xism eq '(?msix:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/ eq '(?-xism:\bv$)';
+print "not " unless qr/\b\v$/ eq '(?-xism:\b\v$)';
print "ok $test\n";
$test++;
{
# bug id 20001008.001
- my $test = 248;
+ $test = 248;
my @x = ("stra\337e 138","stra\337e 138");
for (@x) {
s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
print "ok 263\n";
}
-{
- my $test = 264; # till 575
+SKIP: {
+ $test = 264; # till 575
- use charnames ':full';
+ use charnames ":full";
# This is far from complete testing, there are dozens of character
# classes in Unicode. The mixing of literals and \N{...} is
print "not " if $x =~ /[\x{100}]/;
print "ok 604\n";
- print "not " unless $x =~ /\p{InLatin1Supplement}/;
+ # the next two tests must be ignored on EBCDIC
+ print "not " unless $x =~ /\p{InLatin1Supplement}/ or ord("A") == 193;
print "ok 605\n";
- print "not " if $x =~ /\P{InLatin1Supplement}/;
+ print "not " if $x =~ /\P{InLatin1Supplement}/ and ord("A") != 193;
print "ok 606\n";
print "not " if $x =~ /\p{InLatinExtendedA}/;
print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew
print "ok 664\n";
-print "not " unless chr(0xb5) =~ /\p{IsGreek}/; # singleton (not in a range)
-print "ok 665\n";
+# # singleton (not in a range, this test must be ignored on EBCDIC)
+# print "not " unless chr(0xb5) =~ /\p{IsGreek}/ or ord("A") == 193;
+# print "ok 665\n";
+print "ok 665 # 0xb5 moved from Greek to Common with Unicode 4.0.1\n";
print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton
print "ok 666\n";
}
-my $test = 687;
+$test = 687;
# Force scalar context on the patern match
-sub ok ($$) {
+sub ok ($;$) {
my($ok, $name) = @_;
- printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+ printf "%sok %d - %s\n", ($ok ? "" : "not "), $test,
+ $name||"$Message:".((caller)[2]);
printf "# Failed test at line %d\n", (caller)[2] unless $ok;
}
{
- print "not " unless "a" =~ /\p{L&}/;
+ # L& and LC are the same
+ print "not " unless "a" =~ /\p{LC}/ and "a" =~ /\p{L&}/;
print "ok 743\n";
- print "not " if "1" =~ /\p{L&}/;
+ print "not " if "1" =~ /\p{LC}/ or "1" =~ /\p{L&}/;
print "ok 744\n";
}
use charnames ':full';
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /\N{LATIN SMALL LETTER SHARP S}/ ? "ok 835\n" : "not ok 835\n";
-
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 836\n" : "not ok 836\n";
-
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/ ? "ok 837\n" : "not ok 837\n";
-
- print "\N{LATIN SMALL LETTER SHARP S}" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n";
-
- print "ss" =~
- /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 839\n" : "not ok 839\n";
+ $test= 835;
- print "SS" =~
- /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 840\n" : "not ok 840\n";
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/);
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
- print "ss" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n";
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/);
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
- print "SS" =~
- /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
+ ok("ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
+ ok("SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
+ ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
+ ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
- print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ?
- "ok 843\n" : "not ok 843\n";
-
- print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ?
- "ok 844\n" : "not ok 844\n";
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i);
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i);
}
{
print "# more whitespace: U+0085, U+2028, U+2029\n";
# U+0085 needs to be forced to be Unicode, the \x{100} does that.
- print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n";
+ if ($ordA == 193) {
+ print "<\x{100}\x{0085}>" =~ /<\x{100}e>/ ? "ok 845\n" : "not ok 845\n";
+ } else {
+ print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n";
+ }
print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n";
print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n";
}
# check utf8/non-utf8 mixtures
# try to force all float/anchored check combinations
my $c = "\x{100}";
- my $test = 865;
+ $test = 865;
my $subst;
for my $re (
"xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx",
++$test;
}
}
+
+{
+ print "# qr/.../x\n";
+ $test = 893;
+
+ my $R = qr/ A B C # D E/x;
+
+ print eval {"ABCDE" =~ $R} ? "ok $test\n" : "not ok $test\n";
+ $test++;
+
+ print eval {"ABCDE" =~ m/$R/} ? "ok $test\n" : "not ok $test\n";
+ $test++;
+
+ print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n";
+ $test++;
+}
+
+{
+ print "# illegal Unicode properties\n";
+ $test = 896;
+
+ print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n";
+ $test++;
+
+ print eval { "a" =~ /\p{qrst} / } ? "not ok $test\n" : "ok $test\n";
+ $test++;
+}
+
+{
+ print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n";
+ # requires reuse of last successful pattern
+ $test = 898;
+ $test =~ /\d/;
+ for (0 .. 1) {
+ my $match = ?? + 0;
+ if ($match != $_) {
+ print "ok $test\n";
+ } else {
+ printf "not ok %s\t# 'match once' %s on %s iteration\n", $test,
+ $match ? 'succeeded' : 'failed', $_ ? 'second' : 'first';
+ }
+ ++$test;
+ }
+ $test =~ /(\d)/;
+ my $result = join '', $test =~ //g;
+ if ($result eq $test) {
+ print "ok $test\n";
+ } else {
+ printf "not ok %s\t# expected '%s', got '%s'\n", $test, $test, $result;
+ }
+ ++$test;
+}
+
+print "# user-defined character properties\n";
+
+sub InKana1 {
+ return <<'END';
+3040 309F
+30A0 30FF
+END
+}
+
+sub InKana2 {
+ return <<'END';
++utf8::InHiragana
++utf8::InKatakana
+END
+}
+
+sub InKana3 {
+ return <<'END';
++utf8::InHiragana
++utf8::InKatakana
+-utf8::IsCn
+END
+}
+
+sub InNotKana {
+ return <<'END';
+!utf8::InHiragana
+-utf8::InKatakana
++utf8::IsCn
+END
+}
+
+$test = 901;
+
+print "\x{3040}" =~ /\p{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{303F}" =~ /\P{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+print "\x{3040}" =~ /\p{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{303F}" =~ /\P{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+print "\x{3041}" =~ /\p{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{3040}" =~ /\P{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+print "\x{3040}" =~ /\p{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{3041}" =~ /\P{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+sub InConsonant { # Not EBCDIC-aware.
+ return <<EOF;
+0061 007f
+-0061
+-0065
+-0069
+-006f
+-0075
+EOF
+}
+
+print "d" =~ /\p{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "e" =~ /\P{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+{
+ print "# [ID 20020630.002] utf8 regex only matches 32k\n";
+ $test = 911;
+ for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) {
+ my($type, $char) = @$_;
+ for my $len (32000, 32768, 33000) {
+ my $s = $char . "f" x $len;
+ my $r = $s =~ /$char([f]*)/gc;
+ print $r ? "ok $test\n" : "not ok $test\t# <$type x $len> fail\n";
+ ++$test;
+ print +(!$r or pos($s) == $len + 1) ? "ok $test\n"
+ : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n";
+ ++$test;
+ }
+ }
+}
+
+$test = 923;
+
+$a = bless qr/foo/, 'Foo';
+print(('goodfood' =~ $a ? '' : 'not '),
+ "ok $test\t# reblessed qr// matches\n");
+++$test;
+
+print(($a eq '(?-xism:foo)' ? '' : 'not '),
+ "ok $test\t# reblessed qr// stringizes\n");
+++$test;
+
+$x = "\x{3fe}";
+$z=$y = "\317\276"; # $y is byte representation of $x
+
+$a = qr/$x/;
+print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n");
+++$test;
+
+print(("a$a" =~ $x ? '' : 'not '),
+ "ok $test - stringifed qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a$a\z/ ? '' : 'not '),
+ "ok $test - interpolated qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '),
+ "ok $test - postponed interpolation of qr// preserves utf8\n");
+++$test;
+
+print((length(qr/##/x) == 12 ? '' : 'not '),
+ "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n");
+++$test;
+
+{ use re 'eval';
+
+print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in non-utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n");
+++$test;
+
+print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n");
+++$test;
+
+print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n");
+++$test;
+
+print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n");
+++$test;
+
+} # no re 'eval'
+
+print "# more user-defined character properties\n";
+
+sub IsSyriac1 {
+ return <<'END';
+0712 072C
+0730 074A
+END
+}
+
+ok("\x{0712}" =~ /\p{IsSyriac1}/, '\x{0712}, \p{IsSyriac1}');
+ok("\x{072F}" =~ /\P{IsSyriac1}/, '\x{072F}, \P{IsSyriac1}');
+
+sub Syriac1 {
+ return <<'END';
+0712 072C
+0730 074A
+END
+}
+
+ok("\x{0712}" =~ /\p{Syriac1}/, '\x{0712}, \p{Syriac1}');
+ok("\x{072F}" =~ /\P{Syriac1}/, '\x{072F}, \p{Syriac1}');
+
+print "# user-defined character properties may lack \\n at the end\n";
+sub InGreekSmall { return "03B1\t03C9" }
+sub InGreekCapital { return "0391\t03A9\n-03A2" }
+
+ok("\x{03C0}" =~ /\p{InGreekSmall}/, "Small pi");
+ok("\x{03C2}" =~ /\p{InGreekSmall}/, "Final sigma");
+ok("\x{03A0}" =~ /\p{InGreekCapital}/, "Capital PI");
+ok("\x{03A2}" =~ /\P{InGreekCapital}/, "Reserved");
+
+sub AsciiHexAndDash {
+ return <<'END';
++utf8::ASCII_Hex_Digit
++utf8::Dash
+END
+}
+
+ok("-" =~ /\p{Dash}/, "'-' is Dash");
+ok("A" =~ /\p{ASCII_Hex_Digit}/, "'A' is ASCII_Hex_Digit");
+ok("-" =~ /\p{AsciiHexAndDash}/, "'-' is AsciiHexAndDash");
+ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash");
+
+{
+ print "# Change #18179\n";
+ # previously failed with "panic: end_shift
+ my $s = "\x{100}" x 5;
+ my $ok = $s =~ /(\x{100}{4})/;
+ my($ord, $len) = (ord $1, length $1);
+ print +($ok && $ord == 0x100 && $len == 4)
+ ? "ok $test\n" : "not ok $test\t# [#18179] $ok/$ord/$len\n";
+ ++$test;
+}
+
+{
+ print "# [perl #15763]\n";
+
+ $a = "x\x{100}";
+ chop $a; # but leaves the UTF-8 flag
+ $a .= "y"; # 1 byte before "y"
+
+ ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+
+ ok($a =~ /^\Cy/, 'match \Cy');
+ ok($a =~ /^\C{1}y/, 'match \C{1}y');
+
+ $a = "\x{100}y"; # 2 bytes before "y"
+
+ ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+ ok($a =~ /^\C\C/, 'match two \C');
+ ok($a =~ /^\C{2}/, 'match \C{2}');
+
+ ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte');
+ ok($a =~ /^\C{3}/, 'match \C{3}');
+
+ ok($a =~ /^\C\Cy/, 'match two \C');
+ ok($a =~ /^\C{2}y/, 'match \C{2}');
+
+ ok($a !~ /^\C\C\Cy/, q{don't match three \Cy});
+ ok($a !~ /^\C{2}\Cy/, q{don't match \C{3}y});
+
+ $a = "\x{1000}y"; # 3 bytes before "y"
+
+ ok($a =~ /^\C/, 'match one \C on three-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+ ok($a =~ /^\C\C/, 'match two \C');
+ ok($a =~ /^\C{2}/, 'match \C{2}');
+ ok($a =~ /^\C\C\C/, 'match three \C');
+ ok($a =~ /^\C{3}/, 'match \C{3}');
+
+ ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte');
+ ok($a =~ /^\C{4}/, 'match \C{4}');
+
+ ok($a =~ /^\C\C\Cy/, 'match three \Cy');
+ ok($a =~ /^\C{3}y/, 'match \C{3}y');
+
+ ok($a !~ /^\C\C\C\C\y/, q{don't match four \Cy});
+ ok($a !~ /^\C{4}y/, q{don't match \C{4}y});
+}
+
+$_ = 'aaaaaaaaaa';
+utf8::upgrade($_); chop $_; $\="\n";
+ok(/[^\s]+/, "m/[^\s]/ utf8");
+ok(/[^\d]+/, "m/[^\d]/ utf8");
+ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8");
+ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8");
+
+ok("\x{100}" =~ /\x{100}/, "[perl #15397]");
+ok("\x{100}" =~ /(\x{100})/, "[perl #15397]");
+ok("\x{100}" =~ /(\x{100}){1}/, "[perl #15397]");
+ok("\x{100}\x{100}" =~ /(\x{100}){2}/, "[perl #15397]");
+ok("\x{100}\x{100}" =~ /(\x{100})(\x{100})/, "[perl #15397]");
+
+$x = "CD";
+$x =~ /(AB)*?CD/;
+ok(!defined $1, "[perl #7471]");
+
+$x = "CD";
+$x =~ /(AB)*CD/;
+ok(!defined $1, "[perl #7471]");
+
+$pattern = "^(b+?|a){1,2}c";
+ok("bac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+
+{
+ # [perl #18232]
+ "\x{100}" =~ /(.)/;
+ ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' );
+ { 'a' =~ /./; }
+ ok( $1 eq "\x{100}", '$1 is still utf-8' );
+ ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' );
+}
+
+{
+ use utf8;
+ my $attr = 'Name-1' ;
+
+ my $NormalChar = qr/[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
+ my $NormalWord = qr/${NormalChar}+?/;
+ my $PredNameHyphen = qr/^${NormalWord}(\-${NormalWord})*?$/;
+
+ $attr =~ /^$/;
+ ok( $attr =~ $PredNameHyphen, "[perl #19767] original test" );
+}
+
+{
+ use utf8;
+ "a" =~ m/[b]/;
+ ok ( "0" =~ /\p{N}+\z/, "[perl #19767] variant test" );
+}
+
+{
+
+ $p = 1;
+ foreach (1,2,3,4) {
+ $p++ if /(??{ $p })/
+ }
+ ok ($p == 5, "[perl #20683] (??{ }) returns stale values");
+ { package P; $a=1; sub TIESCALAR { bless[] } sub FETCH { $a++ } }
+ tie $p, P;
+ foreach (1,2,3,4) {
+ /(??{ $p })/
+ }
+ ok ( $p == 5, "(??{ }) returns stale values");
+}
+
+{
+ # Subject: Odd regexp behavior
+ # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
+ # Date: Wed, 26 Feb 2003 16:53:12 +0000
+ # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
+ # To: perl-unicode@perl.org
+
+ $x = "\x{2019}\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg;
+ ok($x eq "\x{2019} k", "Markus Kuhn 2003-02-26");
+
+ $x = "b\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg;
+ ok($x eq "b k", "Markus Kuhn 2003-02-26");
+
+ ok("\x{2019}" =~ /\S/, "Markus Kuhn 2003-02-26");
+}
+
+{
+ my $i;
+ ok('-1-3-5-' eq join('', split /((??{$i++}))/, '-1-3-5-'),
+ "[perl #21411] (??{ .. }) corrupts split's stack");
+ split /(?{'WOW'})/, 'abc';
+ ok('a|b|c' eq join ('|', @_),
+ "[perl #21411] (?{ .. }) version of the above");
+}
+
+{
+ # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it
+ # hasn't been crashing. Disable this test until it is fixed properly.
+ # XXX also check what it returns rather than just doing ok(1,...)
+ # split /(?{ split "" })/, "abc";
+ ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0');
+}
+
+{
+ ok("\x{100}\n" =~ /\x{100}\n$/, "UTF8 length cache and fbm_compile");
+}
+
+{
+ package Str;
+ use overload q/""/ => sub { ${$_[0]}; };
+ sub new { my ($c, $v) = @_; bless \$v, $c; }
+
+ package main;
+ $_ = Str->new("a\x{100}/\x{100}b");
+ ok(join(":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr");
+}
+
+{
+ $_ = "code: 'x' { '...' }\n"; study;
+ my @x; push @x, $& while m/'[^\']*'/gx;
+ ok(join(":", @x) eq "'x':'...'",
+ "[perl #17757] Parse::RecDescent triggers infinite loop");
+}
+
+{
+ my $re = qq/^([^X]*)X/;
+ utf8::upgrade($re);
+ ok("\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED");
+}
+
+# bug #22354
+sub func ($) {
+ ok( "a\nb" !~ /^b/, $_[0] );
+ ok( "a\nb" =~ /^b/m, "$_[0] - with /m" );
+}
+func "standalone";
+$_ = "x"; s/x/func "in subst"/e;
+$_ = "x"; s/x/func "in multiline subst"/em;
+#$_ = "x"; /x(?{func "in regexp"})/;
+#$_ = "x"; /x(?{func "in multiline regexp"})/m;
+
+# bug RT#19049
+$_="abcdef\n";
+@x = m/./g;
+ok("abcde" eq "$`", 'RT#19049 - global match not setting $`');
+
+ok("123\x{100}" =~ /^.*1.*23\x{100}$/, 'uft8 + multiple floating substr');
+
+# LATIN SMALL/CAPITAL LETTER A WITH MACRON
+ok(" \x{101}" =~ qr/\x{100}/i,
+ "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW
+ok(" \x{1E01}" =~ qr/\x{1E00}/i,
+ "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+# DESERET SMALL/CAPITAL LETTER LONG I
+ok(" \x{10428}" =~ qr/\x{10400}/i,
+ "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'
+ok(" \x{1E01}x" =~ qr/\x{1E00}X/i,
+ "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+{
+ # [perl #23769] Unicode regex broken on simple example
+ # regrepeat() didn't handle UTF-8 EXACT case right.
+
+ my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
+
+ ok($s =~ /\x{a0}/, "[perl #23769]");
+ ok($s =~ /\x{a0}+/, "[perl #23769]");
+ ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]");
+
+ ok("aaa\x{100}" =~ /(a+)/, "[perl #23769] easy invariant");
+ ok($1 eq "aaa", "[perl #23769]");
+
+ ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, "[perl #23769] regrepeat invariant");
+ ok($1 eq "\xa0\xa0\xa0", "[perl #23769]");
+
+ ok("ababab\x{100} " =~ /((?:ab)+)/, "[perl #23769] hard invariant");
+ ok($1 eq "ababab", "[perl #23769]");
+
+ ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, "[perl #23769] hard variant");
+ ok($1 eq "\xa0\xa1\xa0\xa1\xa0\xa1", "[perl #23769]");
+
+ ok("aaa\x{100} " =~ /(a+?)/, "[perl #23769] easy invariant");
+ ok($1 eq "a", "[perl #23769]");
+
+ ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, "[perl #23769] regrepeat variant");
+ ok($1 eq "\xa0", "[perl #23769]");
+
+ ok("ababab\x{100} " =~ /((?:ab)+?)/, "[perl #23769] hard invariant");
+ ok($1 eq "ab", "[perl #23769]");
+
+ ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, "[perl #23769] hard variant");
+ ok($1 eq "\xa0\xa1", "[perl #23769]");
+
+ ok("\xc4\xc4\xc4" !~ /(\x{100}+)/, "[perl #23769] don't match first byte of utf8 representation");
+ ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation");
+}
+
+for (120 .. 130) {
+ my $head = 'x' x $_;
+ for my $tail ('\x{0061}', '\x{1234}') {
+ ok(
+ eval qq{ "$head$tail" =~ /$head$tail/ },
+ '\x{...} misparsed in regexp near 127 char EXACT limit'
+ );
+ }
+}
+
+# perl #25269: panic: pp_match start/end pointers
+ok("a-bc" eq eval {
+ my($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/;
+ "$x-$y";
+}, 'captures can move backwards in string');
+
+# perl #27940: \cA not recognized in character classes
+ok("a\cAb" =~ /\cA/, '\cA in pattern');
+ok("a\cAb" =~ /[\cA]/, '\cA in character class');
+ok("a\cAb" =~ /[\cA-\cB]/, '\cA in character class range');
+ok("abc" =~ /[^\cA-\cB]/, '\cA in negated character class range');
+ok("a\cBb" =~ /[\cA-\cC]/, '\cB in character class range');
+ok("a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range');
+ok("a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern');
+ok("ab" !~ /a\cIb/x, '\cI in pattern');
+
+# perl #28532: optional zero-width match at end of string is ignored
+ok(("abc" =~ /^abc(\z)?/) && defined($1),
+ 'optional zero-width match at end of string');
+ok(("abc" =~ /^abc(\z)??/) && !defined($1),
+ 'optional zero-width match at end of string');
+
+
+
+{ # TRIE related
+ my @got=();
+ "words"=~/(word|word|word)(?{push @got,$1})s$/;
+ ok(@got==1,"TRIE optimation is working") or warn "# @got";
+ @got=();
+ "words"=~/(word|word|word)(?{push @got,$1})s$/i;
+ ok(@got==1,"TRIEF optimisation is working") or warn "# @got";
+
+ my @nums=map {int rand 1000} 1..100;
+ my $re="(".(join "|",@nums).")";
+ $re=qr/\b$re\b/;
+
+ foreach (@nums) {
+ ok($_=~/$re/,"Trie nums");
+ }
+ $_=join " ", @nums;
+ @got=();
+ push @got,$1 while /$re/g;
+
+ my %count;
+ $count{$_}++ for @got;
+ my $ok=1;
+ for (@nums) {
+ $ok=0 if --$count{$_}<0;
+ }
+ ok($ok,"Trie min count matches");
+}
+
+
+# TRIE related
+# LATIN SMALL/CAPITAL LETTER A WITH MACRON
+ok(("foba \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i) && $1 eq "\x{101}foo",
+ "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW
+ok(("foba \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i) && $1 eq "\x{1E01}foo",
+ "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW");
+
+# DESERET SMALL/CAPITAL LETTER LONG I
+ok(("foba \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i) && $1 eq "\x{10428}foo",
+ "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'
+ok(("foba \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i) && $1 eq "\x{1E01}xfoo",
+ "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'");
+
+{# TRIE related
+
+use charnames ':full';
+
+$s="\N{LATIN SMALL LETTER SHARP S}";
+ok(("foba ba$s" =~ qr/(foo|Ba$s|bar)/i)
+ && $1 eq "ba$s",
+ "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+ok(("foba ba$s" =~ qr/(Ba$s|foo|bar)/i)
+ && $1 eq "ba$s",
+ "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+ok(("foba ba$s" =~ qr/(foo|bar|Ba$s)/i)
+ && $1 eq "ba$s",
+ "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+
+ok(("foba ba$s" =~ qr/(foo|Bass|bar)/i)
+ && $1 eq "ba$s",
+ "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+
+ok(("foba ba$s" =~ qr/(foo|BaSS|bar)/i)
+ && $1 eq "ba$s",
+ "TRIEF + LATIN SMALL LETTER SHARP S =~ SS");
+
+ok(("foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i)
+ && $1 eq "ba${s}pxySS$s$s",
+ "COMMON PREFIX TRIEF + LATIN SMALL LETTER SHARP S");
+
+
+}
+
+
+
+if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
+ my @normal=qw(these are some normal words);
+ my $psycho=join "|",@normal,map chr $_,255..20000;
+ ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho');
+} else {
+ ok(1,'Skipped Psycho');
+}
+
+# [perl #36207] mixed utf8 / latin-1 and case folding
+
+{
+ my $utf8 = "\xe9\x{100}"; chop $utf8;
+ my $latin1 = "\xe9";
+
+ ok($utf8 =~ /\xe9/i, "utf8/latin");
+ ok($utf8 =~ /$latin1/i, "utf8/latin runtime");
+ ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie");
+ ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime");
+
+ ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8");
+ ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie");
+ ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime");
+ ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime");
+}
+
+# [perl #37038] Global regular matches generate invalid pointers
+
+{
+ my $s = "abcd";
+ $s =~ /(..)(..)/g;
+ $s = $1;
+ $s = $2;
+ ok($s eq 'cd',
+ "# assigning to original string should not corrupt match vars");
+}
+
+{
+ package wooosh;
+ sub gloople {
+ "!";
+ }
+ package main;
+
+ my $aeek = bless {}, 'wooosh';
+ eval {$aeek->gloople() =~ /(.)/g;};
+ ok($@ eq "", "//g match against return value of sub") or print "# $@\n";
+}
+
+{
+ sub gloople {
+ "!";
+ }
+ eval {gloople() =~ /(.)/g;};
+ ok($@ eq "", "# 26410 didn't affect sub calls for some reason")
+ or print "# $@\n";
+}
+
+{
+ package lv;
+ $var = "abc";
+ sub variable : lvalue { $var }
+
+ package main;
+ my $o = bless [], "lv";
+ my $f = "";
+ eval { for (1..2) { $f .= $1 if $o->variable =~ /(.)/g } };
+ ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+}
+
+{
+ $var = "abc";
+ sub variable : lvalue { $var }
+
+ my $f = "";
+ eval { for (1..2) { $f .= $1 if variable() =~ /(.)/g } };
+ ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+}
+
+# [perl #37836] Simple Regex causes SEGV when run on specific data
+if ($ordA == 193) {
+ print "ok $test # Skip: in EBCDIC\n"; $test++;
+} else {
+ no warnings 'utf8';
+ $_ = pack('U0C2', 0xa2, 0xf8); # ill-formed UTF-8
+ my $ret = 0;
+ eval { $ret = s/[\0]+//g };
+ ok($ret == 0, "ill-formed UTF-8 doesn't match NUL in class");
+}
+
+{ # [perl #38293] chr(65535) should be allowed in regexes
+ no warnings 'utf8'; # to allow non-characters
+ my($c, $r, $s);
+
+ $c = chr 0xffff;
+ $c =~ s/$c//g;
+ ok($c eq "", "U+FFFF, parsed as atom");
+
+ $c = chr 0xffff;
+ $r = "\\$c";
+ $c =~ s/$r//g;
+ ok($c eq "", "U+FFFF backslashed, parsed as atom");
+
+ $c = chr 0xffff;
+ $c =~ s/[$c]//g;
+ ok($c eq "", "U+FFFF, parsed in class");
+
+ $c = chr 0xffff;
+ $r = "[\\$c]";
+ $c =~ s/$r//g;
+ ok($c eq "", "U+FFFF backslashed, parsed in class");
+
+ $s = "A\x{ffff}B";
+ $s =~ s/\x{ffff}//i;
+ ok($s eq "AB", "U+FFFF, EXACTF");
+
+ $s = "\x{ffff}A";
+ $s =~ s/\bA//;
+ ok($s eq "\x{ffff}", "U+FFFF, BOUND");
+
+ $s = "\x{ffff}!";
+ $s =~ s/\B!//;
+ ok($s eq "\x{ffff}", "U+FFFF, NBOUND");
+} # non-characters end
+
+{
+ # https://rt.perl.org/rt3/Ticket/Display.html?id=39583
+
+ # The printing characters
+ my @chars = ("A".."Z");
+ my $delim = ",";
+ my $size = 32771 - 4;
+ my $str = '';
+
+ # create some random junk. Inefficient, but it works.
+ for ($i = 0 ; $i < $size ; $i++) {
+ $str .= $chars[int(rand(@chars))];
+ }
+
+ $str .= ($delim x 4);
+ my $res;
+ my $matched;
+ if ($str =~ s/^(.*?)${delim}{4}//s) {
+ $res = $1;
+ $matched=1;
+ }
+ ok($matched,'pattern matches');
+ ok(length($str)==0,"Empty string");
+ ok(defined($res) && length($res)==$size,"\$1 is correct size");
+}
+
+{ # related to [perl #27940]
+ ok("\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern');
+ ok("\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern');
+ ok("X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern');
+ ok("X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern');
+
+ ok("X\0A" =~ /X\c@?A/, '\c@?');
+ ok("X\0A" =~ /X\c@*A/, '\c@*');
+ ok("X\0A" =~ /X\c@(A)/, '\c@(');
+ ok("X\0A" =~ /X(\c@)A/, '\c@)');
+ ok("X\0A" =~ /X\c@|ZA/, '\c@|');
+
+ ok("X\@A" =~ /X@?A/, '@?');
+ ok("X\@A" =~ /X@*A/, '@*');
+ ok("X\@A" =~ /X@(A)/, '@(');
+ ok("X\@A" =~ /X(@)A/, '@)');
+ ok("X\@A" =~ /X@|ZA/, '@|');
+
+ local $" = ','; # non-whitespace and non-RE-specific
+ ok('abc' =~ /(.)(.)(.)/, 'the last successful match is bogus');
+ ok("A@+B" =~ /A@{+}B/, 'interpolation of @+ in /@{+}/');
+ ok("A@-B" =~ /A@{-}B/, 'interpolation of @- in /@{-}/');
+ ok("A@+B" =~ /A@{+}B/x, 'interpolation of @+ in /@{+}/x');
+ ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x');
+}
+
+{
+ use lib 'lib';
+ use Cname;
+
+ ok('fooB'=~/\N{foo}[\N{B}\N{b}]/,"Passthrough charname");
+ $test=1233; my $handle=make_must_warn('Ignoring excess chars from');
+ $handle->('q(xxWxx) =~ /[\N{WARN}]/');
+ {
+ my $code;
+ my $w="";
+ local $SIG{__WARN__} = sub { $w.=shift };
+ eval($code=<<'EOFTEST') or die "$@\n$code\n";
+ {
+ use warnings;
+
+ #1234
+ ok("\0" !~ /[\N{EMPTY-STR}XY]/,
+ "Zerolength charname in charclass doesnt match \0");
+ 1;
+ }
+EOFTEST
+ ok($w=~/Ignoring zero length/,
+ "Got expected zero length warning");
+ warn $code;
+
+ }
+ $handle= make_must_warn('Ignoring zero length');
+ $handle->('qq(\\0) =~ /[\N{EMPTY-STR}XY]/');
+ ok('AB'=~/(\N{EVIL})/ && $1 eq 'A',"Charname caching $1");
+ ok('ABC'=~/(\N{EVIL})/,"Charname caching $1");
+ ok('xy'=~/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node');
+ ok(''=~/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node 2');
+
+}
+{
+ print "# MORE LATIN SMALL LETTER SHARP S\n";
+
+ use charnames ':full';
+
+ #see also test #835
+ ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 1");
+ ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 2");
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/,
+ "unoptimized named sequence in class 3");
+ ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+ "unoptimized named sequence in class 4");
+
+ ok('aabc' !~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against aabc');
+ ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+ ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+
+ ok(' A B'=~/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+ 'Intermixed named and unicode escapes 1');
+ ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}"=~
+ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+ 'Intermixed named and unicode escapes 2');
+ ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042} 3"=~
+ /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/,
+ 'Intermixed named and unicode escapes');
+}
+$brackets = qr{
+ { (?> [^{}]+ | (??{ $brackets }) )* }
+ }x;
+ok("{b{c}d" !~ m/^((??{ $brackets }))/, "bracket mismatch");
+
+SKIP:{
+ our @stack=();
+ my @expect=qw(
+ stuff1
+ stuff2
+ <stuff1>and<stuff2>
+ right
+ <right>
+ <<right>>
+ <<<right>>>
+ <<stuff1>and<stuff2>><<<<right>>>>
+ );
+
+ local $_='<<<stuff1>and<stuff2>><<<<right>>>>>';
+ ok(/^(<((?:(?>[^<>]+)|(?1))*)>(?{push @stack, $2 }))$/,
+ "Recursion should match");
+ ok(@stack==@expect)
+ or skip("Won't test individual results as count isn't equal",
+ 0+@expect);
+ foreach my $idx (@expect) {
+ ok($expect[$idx] eq $stack[$idx],
+ "Expecting '$expect' at stack pos #$idx");
+ }
+
+}
+{
+ my $s='123453456';
+ $s=~s/(?<digits>\d+)\k<digits>/$+{digits}/;
+ ok($s eq '123456','Named capture (angle brackets) s///');
+ $s='123453456';
+ $s=~s/(?'digits'\d+)\k'digits'/$+{digits}/;
+ ok($s eq '123456','Named capture (single quotes) s///');
+}
+sub iseq($$;$) {
+ my ( $got, $expect, $name)=@_;
+
+ $_=defined($_) ? "'$_'" : "undef"
+ for $got, $expect;
+
+ my $ok= $got eq $expect;
+
+ printf "%sok %d - %s\n", ($ok ? "" : "not "), $test,
+ $name||"$Message:".((caller)[2]);
+
+ printf "# Failed test at line %d\n".
+ "# expected: %s\n".
+ "# result: %s\n",
+ (caller)[2], $expect, $got
+ unless $ok;
+
+ $test++;
+ return $ok;
+}
+{
+ my $s='foo bar baz';
+ my (@k,@v,@fetch,$res);
+ my $count= 0;
+ my @names=qw($+{A} $+{B} $+{C});
+ if ($s=~/(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
+ while (my ($k,$v)=each(%+)) {
+ $count++;
+ }
+ @k=sort keys(%+);
+ @v=sort values(%+);
+ $res=1;
+ push @fetch,
+ [ "$+{A}", "$1" ],
+ [ "$+{B}", "$2" ],
+ [ "$+{C}", "$3" ],
+ ;
+ }
+ foreach (0..2) {
+ if ($fetch[$_]) {
+ iseq($fetch[$_][0],$fetch[$_][1],$names[$_]);
+ } else {
+ ok(0, $names[$_]);
+ }
+ }
+ iseq($res,1,"$s~=/(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/");
+ iseq($count,3,"Got 3 keys in %+ via each");
+ iseq(0+@k, 3, 'Got 3 keys in %+ via keys');
+ iseq("@k","A B C", "Got expected keys");
+ iseq("@v","bar baz foo", "Got expected values");
+ eval'
+ print for $+{this_key_doesnt_exist};
+ ';
+ ok(!$@,'lvalue $+{...} should not throw an exception');
+}
+
+# stress test CURLYX/WHILEM.
+#
+# This test includes varying levels of nesting, and according to
+# profiling done against build 28905, exercises every code line in the
+# CURLYX and WHILEM blocks, except those related to LONGJMP, the
+# super-linear cache and warnings. It executes about 0.5M regexes
+
+if ($ENV{PERL_SKIP_PSYCHO_TEST}){
+ printf "ok %d Skip: No psycho tests\n", $test++;
+} else {
+ my $r = qr/^
+ (?:
+ ( (?:a|z+)+ )
+ (?:
+ ( (?:b|z+){3,}? )
+ (
+ (?:
+ (?:
+ (?:c|z+){1,1}?z
+ )?
+ (?:c|z+){1,1}
+ )*
+ )
+ (?:z*){2,}
+ ( (?:z+|d)+ )
+ (?:
+ ( (?:e|z+)+ )
+ )*
+ ( (?:f|z+)+ )
+ )*
+ ( (?:z+|g)+ )
+ (?:
+ ( (?:h|z+)+ )
+ )*
+ ( (?:i|z+)+ )
+ )+
+ ( (?:j|z+)+ )
+ (?:
+ ( (?:k|z+)+ )
+ )*
+ ( (?:l|z+)+ )
+ $/x;
+
+
+ my $ok = 1;
+ my $msg = "CURLYX stress test";
+ OUTER:
+ for my $a ("x","a","aa") {
+ for my $b ("x","bbb","bbbb") {
+ my $bs = $a.$b;
+ for my $c ("x","c","cc") {
+ my $cs = $bs.$c;
+ for my $d ("x","d","dd") {
+ my $ds = $cs.$d;
+ for my $e ("x","e","ee") {
+ my $es = $ds.$e;
+ for my $f ("x","f","ff") {
+ my $fs = $es.$f;
+ for my $g ("x","g","gg") {
+ my $gs = $fs.$g;
+ for my $h ("x","h","hh") {
+ my $hs = $gs.$h;
+ for my $i ("x","i","ii") {
+ my $is = $hs.$i;
+ for my $j ("x","j","jj") {
+ my $js = $is.$j;
+ for my $k ("x","k","kk") {
+ my $ks = $js.$k;
+ for my $l ("x","l","ll") {
+ my $ls = $ks.$l;
+ if ($ls =~ $r) {
+ if ($ls =~ /x/) {
+ $msg .= ": unexpected match for [$ls]";
+ $ok = 0;
+ last OUTER;
+ }
+ my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12";
+ unless ($ls eq $cap) {
+ $msg .= ": capture: [$ls], got [$cap]";
+ $ok = 0;
+ last OUTER;
+ }
+ }
+ else {
+ unless ($ls =~ /x/) {
+ $msg = ": failed for [$ls]";
+ $ok = 0;
+ last OUTER;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ ok($ok, $msg);
+}
+
+# \, breaks {3,4}
+ok("xaaay" !~ /xa{3\,4}y/, "\, in a pattern");
+ok("xa{3,4}y" =~ /xa{3\,4}y/, "\, in a pattern");
+
+# \c\ followed by _
+ok("x\c_y" !~ /x\c\_y/, "\_ in a pattern");
+ok("x\c\_y" =~ /x\c\_y/, "\_ in a pattern");
+
+# \c\ followed by other characters
+for my $c ("z", "\0", "!", chr(254), chr(256)) {
+ my $targ = "a\034$c";
+ my $reg = "a\\c\\$c";
+ ok(eval("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern");
+}
+
+{
+ my $str='abc';
+ my $count=0;
+ my $mval=0;
+ my $pval=0;
+ while ($str=~/b/g) { $mval=$#-; $pval=$#+; $count++ }
+ iseq($mval,0,"\@- should be empty [RT#36046]");
+ iseq($pval,0,"\@+ should be empty [RT#36046]");
+ iseq($count,1,"should have matched once only [RT#36046]");
+}
+
+{ # Test the (*PRUNE) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(?{$count++})(*FAIL)/;
+ iseq($count,9,"expect 9 for no (*PRUNE)");
+ $count = 0;
+ 'aaab'=~/a+b?(*PRUNE)(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with (*PRUNE)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*PRUNE)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*PRUNE)/");
+ $count = 0;
+ 'aaab'=~/a+b?(??{'(*PRUNE)'})(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with (*PRUNE)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*PRUNE)/");
+}
+{ # Test the (*SKIP) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(*SKIP)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*SKIP)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*SKIP)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*SKIP)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,2,"Expect 2 with (*SKIP)" );
+ iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" );
+}
+{ # Test the (*SKIP) pattern
+ our $count = 0;
+ 'aaab'=~/a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*SKIP)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*SKIP)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(*MARK:foo)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,2,"Expect 2 with (*SKIP)" );
+ iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" );
+}
+{ # Test the (*SKIP) pattern
+ our $count = 0;
+ 'aaab'=~/a*(*MARK:a)b?(*MARK:b)(*SKIP:a)(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with *MARK:a)b?(*MARK:b)(*SKIP:a)");
+ local $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a*(*MARK:a)b?)(*MARK:x)(*SKIP:a)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,5,"Expect 5 with (*MARK:a)b?)(*MARK:x)(*SKIP:a)" );
+ iseq("@res","aaab b aaab b ","adjacent (*MARK:a)b?)(*MARK:x)(*SKIP:a) works as expected" );
+}
+{ # Test the (*COMMIT) pattern
+ our $count = 0;
+ 'aaabaaab'=~/a+b?(*COMMIT)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*COMMIT)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g;
+ iseq($count,1,"/.(*COMMIT)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,1,"Expect 1 with (*COMMIT)" );
+ iseq("@res","aaab","adjacent (*COMMIT) works as expected" );
+}
+{
+ # Test named commits and the $REGERROR var
+ our $REGERROR;
+ for my $name ('',':foo')
+ {
+ for my $pat ("(*PRUNE$name)",
+ ($name? "(*MARK$name)" : "")
+ . "(*SKIP$name)",
+ "(*COMMIT$name)")
+ {
+ for my $suffix ('(*FAIL)','')
+ {
+ 'aaaab'=~/a+b$pat$suffix/;
+ iseq(
+ $REGERROR,
+ ($suffix ? ($name ? 'foo' : "1") : ""),
+ "Test $pat and \$REGERROR $suffix"
+ );
+ }
+ }
+ }
+}
+{
+ # Test named commits and the $REGERROR var
+ package Fnorble;
+ our $REGERROR;
+ for my $name ('',':foo')
+ {
+ for my $pat ("(*PRUNE$name)",
+ ($name? "(*MARK$name)" : "")
+ . "(*SKIP$name)",
+ "(*COMMIT$name)")
+ {
+ for my $suffix ('(*FAIL)','')
+ {
+ 'aaaab'=~/a+b$pat$suffix/;
+ ::iseq(
+ $REGERROR,
+ ($suffix ? ($name ? 'foo' : "1") : ""),
+ "Test $pat and \$REGERROR $suffix"
+ );
+ }
+ }
+ }
+}
+{
+ # Test named commits and the $REGERROR var
+ local $Message = "\$REGERROR";
+ our $REGERROR;
+ for $word (qw(bar baz bop)) {
+ $REGERROR="";
+ "aaaaa$word"=~/a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/;
+ iseq($REGERROR,$word);
+ }
+}
+{ #Regression test for perlbug 40684
+ local $Message = "RT#40684 tests:";
+ my $s = "abc\ndef";
+ my $rex = qr'^abc$'m;
+ ok($s =~ m/$rex/);
+ ok($s =~ m/^abc$/m);
+}
+{
+ #Mindnumbingly simple test of (*THEN)
+ for ("ABC","BAX") {
+ ok(/A (*THEN) X | B (*THEN) C/x,"Simple (*THEN) test");
+ }
+}
+
+{
+ local $Message = "Relative Recursion";
+ my $parens=qr/(\((?:[^()]++|(?-1))*+\))/;
+ local $_='foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))';
+ my ($all,$one,$two)=('','','');
+ if (/foo $parens \s* \+ \s* bar $parens/x) {
+ $all=$&;
+ $one=$1;
+ $two=$2;
+ }
+ iseq($one, '((2*3)+4-3)');
+ iseq($two, '(2*(3+4)-1*(2-3))');
+ iseq($all, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))');
+ iseq($all, $_);
+}
+{
+ my $spaces=" ";
+ local $_=join 'bar',$spaces,$spaces;
+ our $count=0;
+ s/(?>\s+bar)(?{$count++})//g;
+ iseq($_,$spaces,"SUSPEND final string");
+ iseq($count,1,"Optimiser should have prevented more than one match");
+}
+{
+ local $Message = "RT#36909 test";
+ $^R = 'Nothing';
+ {
+ local $^R = "Bad";
+ ok('x foofoo y' =~ m{
+ (foo) # $^R correctly set
+ (?{ "last regexp code result" })
+ }x);
+ iseq($^R,'last regexp code result');
+ }
+ iseq($^R,'Nothing');
+ {
+ local $^R = "Bad";
+
+ ok('x foofoo y' =~ m{
+ (?:foo|bar)+ # $^R correctly set
+ (?{"last regexp code result"})
+ }x);
+ iseq($^R,'last regexp code result');
+ }
+ iseq($^R,'Nothing');
+
+ {
+ local $^R = "Bad";
+ ok('x foofoo y' =~ m{
+ (foo|bar)\1+ # $^R undefined
+ (?{"last regexp code result"})
+ }x);
+ iseq($^R,'last regexp code result');
+ }
+ iseq($^R,'Nothing');
+}
+{
+ local $Message="RT#22395";
+ our $count;
+ for my $l (1,10,100,1000) {
+ $count=0;
+ ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
+ iseq($l+1,$count,"Should be L+1 not L*(L+3)/2 (L=$l)");
+ }
+}
+{
+ local $Message = "RT#22614";
+ local $_='ab';
+ our @len=();
+ /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
+ iseq("@len","2 2 2");
+}
+{
+ local $Message = "RT#18209";
+ my $text = ' word1 word2 word3 word4 word5 word6 ';
+
+ my @words = ('word1', 'word3', 'word5');
+ my $count;
+ foreach my $word (@words){
+ $text =~ s/$word\s//gi; # Leave a space to seperate words in the resultant str.
+ # The following block is not working.
+ if($&){
+ $count++;
+ }
+ # End bad block
+ }
+ iseq($count,3);
+ iseq($text,' word2 word4 word6 ');
+}
+
+# Test counter is at bottom of file. Put new tests above here.
+#-------------------------------------------------------------------
+# Keep the following tests last -- they may crash perl
+{
+ # RT#19049 / RT#38869
+ my @list = (
+ 'ab cdef', # matches regex
+ ( 'e' x 40000 ) .'ab c' # matches not, but 'ab c' matches part of it
+ );
+ my $y;
+ my $x;
+ foreach (@list) {
+ m/ab(.+)cd/i; # the ignore-case seems to be important
+ $y = $1; # use $1, which might not be from the last match!
+ $x = substr($list[0],$-[0],$+[0]-$-[0]);
+ }
+ iseq($y,' ',
+ 'pattern in a loop, failure should not affect previous success');
+ iseq($x,'ab cd',
+ 'pattern in a loop, failure should not affect previous success');
+}
+
+ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
+ or print "# Unexpected outcome: should pass or crash perl\n";
+
+ok((q(a)x 100) =~ /^(??{'(.)'x 100})/,
+ "Regexp /^(??{'(.)'x 100})/ crashes older perls")
+ or print "# Unexpected outcome: should pass or crash perl\n";
+
+{
+ local $Message = "substituation with lookahead (possible segv)";
+ $_="ns1ns1ns1";
+ s/ns(?=\d)/ns_/g;
+ iseq($_,"ns_1ns_1ns_1");
+ $_="ns1";
+ s/ns(?=\d)/ns_/;
+ iseq($_,"ns_1");
+ $_="123";
+ s/(?=\d+)|(?<=\d)/!Bang!/g;
+ iseq($_,"!Bang!1!Bang!2!Bang!3!Bang!");
+}
+
+# Put new tests above the dotted line about a page above this comment
+
+# Don't forget to update this!
+BEGIN { print "1..1365\n" };