#
# This is a home for regular expression tests that don't fit into
# the format supported by re/regexp.t. If you want to add a test
-# that does fit that format, add it to re/re_tests, not here. Tests for \N
-# should be added here because they are treated as single quoted strings
-# there, which means they avoid the lexer which otherwise would look at them.
+# that does fit that format, add it to re/re_tests, not here.
use strict;
use warnings;
use 5.010;
-
sub run_tests;
$| = 1;
BEGIN {
chdir 't' if -d 't';
@INC = ('../lib','.');
- do "re/ReTest.pl" or die $@;
+ require Config; import Config;
+ require './test.pl';
}
-
-plan tests => 426; # Update this when adding/deleting tests.
+plan tests => 712; # Update this when adding/deleting tests.
run_tests() unless caller;
sub run_tests {
{
-
my $x = "abc\ndef\n";
+ (my $x_pretty = $x) =~ s/\n/\\n/g;
- ok $x =~ /^abc/, qq ["$x" =~ /^abc/];
- ok $x !~ /^def/, qq ["$x" !~ /^def/];
+ ok $x =~ /^abc/, qq ["$x_pretty" =~ /^abc/];
+ ok $x !~ /^def/, qq ["$x_pretty" !~ /^def/];
# used to be a test for $*
- ok $x =~ /^def/m, qq ["$x" =~ /^def/m];
+ ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m];
- nok $x =~ /^xxx/, qq ["$x" =~ /^xxx/];
- nok $x !~ /^abc/, qq ["$x" !~ /^abc/];
+ ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]);
+ ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]);
- ok $x =~ /def/, qq ["$x" =~ /def/];
- nok $x !~ /def/, qq ["$x" !~ /def/];
+ ok $x =~ /def/, qq ["$x_pretty" =~ /def/];
+ ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]);
- ok $x !~ /.def/, qq ["$x" !~ /.def/];
- nok $x =~ /.def/, qq ["$x" =~ /.def/];
+ ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/];
+ ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]);
- ok $x =~ /\ndef/, qq ["$x" =~ /\ndef/];
- nok $x !~ /\ndef/, qq ["$x" !~ /\ndef/];
+ ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/];
+ ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]);
}
{
ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc',
qq [\$_ = '$_'; /(a*b*)(c*)/];
ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/];
- nok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/];
+ unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]);
$_ = 'aaabccc';
ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/];
$_ = 'aaaccc';
ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/];
- nok /a*b+c*/, qq [\$_ = '$_'; /a*b+c*/];
+ unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]);
$_ = 'abcdef';
ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/];
{
# used to be a test for $*
- ok "ab\ncd\n" =~ /^cd/m, qq ["ab\ncd\n" =~ /^cd/m];
+ ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m];
}
{
our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3');
while ($_ = shift(@XXX)) {
- my $f = index ($_, 'not') >= 0 ? \&nok : \&ok;
+ my $e = index ($_, 'not') >= 0 ? '' : 1;
my $r = m?(.*)?;
- &$f ($r, "?(.*)?");
+ is($r, $e, "?(.*)?");
/not/ && reset;
if (/not ok 2/) {
if ($^O eq 'VMS') {
{
$_ = 'now is the {time for all} good men to come to.';
- / {([^}]*)}/;
- iseq $1, 'time for all', "Match braces";
+ / \{([^}]*)}/;
+ is($1, 'time for all', "Match braces");
}
{
$t9++ if /$pat9/o;
}
my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9";
- iseq $x, '505550555', "Test /o";
+ is($x, '505550555', "Test /o");
}
-
{
my $xyz = 'xyz';
ok "abc" =~ /^abc$|$xyz/, "| after \$";
is($result, "abc:bc", $message);
}
-
{
my $message = "Scalar /g";
$_ = "abcfooabcbar";
$_ .= '';
my @x = /abc/g;
- iseq @x, 2, "/g reset after assignment";
+ is(@x, 2, "/g reset after assignment");
}
{
is($out, 1, $message);
}
-
{
$_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6';
my @out = /(?<!foo)bar./g;
- iseq "@out", 'bar2 barf', "Negative lookbehind";
+ is("@out", 'bar2 barf', "Negative lookbehind");
}
{
my $message = "REG_INFTY tests";
# Tests which depend on REG_INFTY
- $::reg_infty = $Config {reg_infty} // 32767;
+
+ # Defaults assumed if this fails
+ eval { require Config; };
+ $::reg_infty = $Config::Config{reg_infty} // 32767;
$::reg_infty_m = $::reg_infty - 1;
$::reg_infty_p = $::reg_infty + 1;
$::reg_infty_m = $::reg_infty_m; # Suppress warning.
# next three tests will fail if you should have picked up a lower-than-
# default value for $reg_infty from Config.pm, but have not.
- eval_ok q (('aaa' =~ /(a{1,$::reg_infty_m})/)[0] eq 'aaa'), $message;
- eval_ok q (('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/), $message;
- eval_ok q (('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/), $message;
+ is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message);
+ is($@, '', $message);
+ is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message);
+ is($@, '', $message);
+ isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message);
+ is($@, '', $message);
+
eval "'aaa' =~ /a{1,$::reg_infty}/";
- like($@, /^\QQuantifier in {,} bigger than/, $message);
+ like($@, qr/^\QQuantifier in {,} bigger than/, $message);
eval "'aaa' =~ /a{1,$::reg_infty_p}/";
like($@, qr/^\QQuantifier in {,} bigger than/, $message);
}
unlike("b$a=", qr/a$a=/, $message);
like("b$a=", qr/ba+=/, $message);
- like("ba$a=", /b(?:a|b)+=/, $message);
+ like("ba$a=", qr/b(?:a|b)+=/, $message);
}
}
-
{
# 20000 nodes, each taking 3 words per string, and 1 per branch
my $long_constant_len = join '|', 12120 .. 32645;
{
my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad
- iseq "@ans", 'a/ b', "Stack may be bad";
+ is("@ans", 'a/ b', "Stack may be bad");
}
{
my $res = eval { "xx" =~ /(?$code)/o };
{
no warnings 'uninitialized';
- my $message = "$message '$@', '$res', '$blah'";
+ chomp $@; my $message = "$message '$@', '$res', '$blah'";
ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message);
}
}
{
- iseq qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i';
- iseq qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s';
- iseq qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m';
- iseq qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x';
- iseq qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism';
- iseq qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/';
+ is(qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i');
+ is(qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s');
+ is(qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m');
+ is(qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x');
+ is(qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism');
+ is(qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/');
}
{ # Test that charset modifier work, and are interpolated
- iseq qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier';
- iseq qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles';
- iseq qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles';
- iseq qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles';
- iseq qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles';
+ is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier');
+ is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles');
+ is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles');
+ is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles');
+ is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles');
my $dual = qr/\b\v$/;
- use locale;
- my $locale = qr/\b\v$/;
- iseq $locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale';
- no locale;
+ my $locale;
+
+ SKIP: {
+ skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
+
+ BEGIN {
+ if($Config{d_setlocale}) {
+ require locale; import locale;
+ }
+ }
+ $locale = qr/\b\v$/;
+ is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale');
+ no locale;
+ }
use feature 'unicode_strings';
my $unicode = qr/\b\v$/;
- iseq $unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings';
- iseq qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
- iseq qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings';
+ is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings');
+ is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale');
+
+ SKIP: {
+ skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
+
+ is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings');
+ }
no feature 'unicode_strings';
- iseq qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings';
- iseq qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings';
+ SKIP: {
+ skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
- use locale;
- iseq qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
- iseq qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale';
- }
+ is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings');
+ }
+
+ is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings');
+ SKIP: {
+ skip 'No locale testing without d_setlocale', 2 if(!$Config{d_setlocale});
+ BEGIN {
+ if($Config{d_setlocale}) {
+ require locale; import locale;
+ }
+ }
+ is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale');
+ is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale');
+ }
+ }
{
my $message = "Look around";
}
{
- must_die 'q(a:[b]:) =~ /[x[:foo:]]/',
- qr/POSIX class \[:[^:]+:\] unknown in regex/,
- 'POSIX class [: :] must have valid name';
+ is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef);
+ like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/,
+ 'POSIX class [: :] must have valid name');
for my $d (qw [= .]) {
- must_die "/[[${d}foo${d}]]/",
- qr/\QPOSIX syntax [$d $d] is reserved for future extensions/,
- "POSIX syntax [[$d $d]] is an error";
+ is(eval "/[[${d}foo${d}]]/", undef);
+ like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/,
+ "POSIX syntax [[$d $d]] is an error");
}
}
-
{
# test if failure of patterns returns empty list
my $message = "Failed pattern returns empty list";
is("@_", "", $message);
}
-
{
my $message = '@- and @+ tests';
ok(!defined $+ [2] && !defined $- [2] &&
!defined $+ [4] && !defined $- [4], $message);
-
/.(a)/;
is($#+, 1, $message);
is($#-, 1, $message);
/.(a)(ba*)?/;
is($#+, 2, $message);
is($#-, 1, $message);
- }
-
- foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', '@- = qw (foo bar)') {
- must_die($_, qr/^Modification of a read-only value attempted/,
- 'Elements of @- and @+ are read-only');
+ # Check that values don’t stick
+ " "=~/()()()(.)(..)/;
+ my($m,$p) = (\$-[5], \$+[5]);
+ () = "$$_" for $m, $p; # FETCH (or eqv.)
+ " " =~ /()/;
+ is $$m, undef, 'values do not stick to @- elements';
+ is $$p, undef, 'values do not stick to @+ elements';
}
+ foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)',
+ '@- = qw (foo bar)', '$^N = 42') {
+ is(eval $_, undef);
+ like($@, qr/^Modification of a read-only value attempted/,
+ '$^N, @- and @+ are read-only');
+ }
{
my $message = '\G testing';
like($str, qr/^..\G/, $message);
unlike($str, qr/^...\G/, $message);
ok($str =~ /\G../ && $& eq 'cd', $message);
-
- local $::TODO = $::running_as_thread;
ok($str =~ /.\G./ && $& eq 'bc', $message);
+
+ }
+
+ {
+ my $message = '\G and intuit and anchoring';
+ $_ = "abcdef";
+ pos = 0;
+ ok($_ =~ /\Gabc/, $message);
+ ok($_ =~ /^\Gabc/, $message);
+
+ pos = 3;
+ ok($_ =~ /\Gdef/, $message);
+ pos = 3;
+ ok($_ =~ /\Gdef$/, $message);
+ pos = 3;
+ ok($_ =~ /abc\Gdef$/, $message);
+ pos = 3;
+ ok($_ =~ /^abc\Gdef$/, $message);
+ pos = 3;
+ ok($_ =~ /c\Gd/, $message);
+ }
+
+ {
+ my $s = '123';
+ pos($s) = 1;
+ my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1
+ is("@a", "1", '\G looping');
}
"'abcde|' 'abc' 'de'", $message);
}
-
{
my $message = '\G anchor checks';
my $foo = 'aabbccddeeffgg';
pos ($foo) = 1;
- {
- local $::TODO = $::running_as_thread;
- no warnings 'uninitialized';
- ok($foo =~ /.\G(..)/g, $message);
- is($1, 'ab', $message);
- pos ($foo) += 1;
- ok($foo =~ /.\G(..)/g, $message);
- is($1, 'cc', $message);
+ ok($foo =~ /.\G(..)/g, $message);
+ is($1, 'ab', $message);
- pos ($foo) += 1;
- ok($foo =~ /.\G(..)/g, $message);
- is($1, 'de', $message);
+ pos ($foo) += 1;
+ ok($foo =~ /.\G(..)/g, $message);
+ is($1, 'cc', $message);
- ok($foo =~ /\Gef/g, $message);
- }
+ pos ($foo) += 1;
+ ok($foo =~ /.\G(..)/g, $message);
+ is($1, 'de', $message);
+
+ ok($foo =~ /\Gef/g, $message);
undef pos $foo;
ok($foo =~ /\G(..)/g, $message);
is($1, 'cd', $message);
}
+ {
+ my $message = 'basic \G floating checks';
+ my $foo = 'aabbccddeeffgg';
+ pos ($foo) = 1;
+
+ ok($foo =~ /a+\G(..)/g, "$message: a+\\G");
+ is($1, 'ab', "$message: ab");
+
+ pos ($foo) += 1;
+ ok($foo =~ /b+\G(..)/g, "$message: b+\\G");
+ is($1, 'cc', "$message: cc");
+
+ pos ($foo) += 1;
+ ok($foo =~ /d+\G(..)/g, "$message: d+\\G");
+ is($1, 'de', "$message: de");
+
+ ok($foo =~ /\Gef/g, "$message: \\Gef");
+
+ pos ($foo) = 1;
+
+ ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)");
+ is($1, 'aa', "$message: aa");
+
+ pos ($foo) = 2;
+
+ ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)");
+ is($1, 'ab', "$message: ab");
+
+ }
{
$_ = '123x123';
my @res = /(\d*|x)/g;
local $" = '|';
- iseq "@res", "123||x|123|", "0 match in alternation";
+ is("@res", "123||x|123|", "0 match in alternation");
}
-
{
my $message = "Match against temporaries (created via pp_helem())" .
" is safe";
is($1, "bar", $message);
}
-
{
my $message = 'package $i inside (?{ }), ' .
'saved substrings and changing $_';
is("@c", ",f,,o,,o, ,b,,a,,r,", $message);
}
-
{
my $message = 'Brackets';
our $brackets;
is($&, "{ and }", $message);
}
-
{
$_ = "a-a\nxbb";
pos = 1;
- nok m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg';
+ ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg');
}
-
{
my $message = '\G anchor checks';
my $text = "aaXbXcc";
ok($text !~ /\GXb*X/g, $message);
}
-
{
$_ = "xA\n" x 500;
- nok /^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"';
+ unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"');
my $text = "abc dbf";
my @res = ($text =~ /.*?(b).*?\b/g);
- iseq "@res", "b b", '\b is not special';
+ is("@res", "b b", '\b is not special');
}
-
{
my $message = '\S, [\S], \s, [\s]';
my @a = map chr, 0 .. 255;
is("@b", "@c", $message);
}
-
{
# see if backtracking optimization works correctly
my $message = 'Backtrack optimization';
like("\n\n", qr/\n?+ $ \n/x, $message);
}
-
{
package S;
use overload '""' => sub {'Object S'};
sub new {bless []}
my $message = "Ref stringification";
- ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification", $message);
- ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification", $message);
- ::ok([] =~ /^ARRAY/, "Array ref stringification", $message);
- ::ok({} =~ /^HASH/, "Hash ref stringification", $message);
- ::ok('S' -> new =~ /^Object S/, "Object stringification", $message);
+ ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification") or diag($message);
+ ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification") or diag($message);
+ ::ok([] =~ /^ARRAY/, "Array ref stringification") or diag($message);
+ ::ok({} =~ /^HASH/, "Hash ref stringification") or diag($message);
+ ::ok('S' -> new =~ /^Object S/, "Object stringification") or diag($message);
}
-
{
my $message = "Test result of match used as match";
ok('a1b' =~ ('xyz' =~ /y/), $message);
is($`, 'a', $message);
}
-
{
my $message = '"1" is not \s';
- may_not_warn sub {ok ("1\n" x 102 !~ /^\s*\n/m, $message)}, "$message (did not warn)";
+ warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)},
+ undef, "$message (did not warn)");
}
-
{
my $message = '\s, [[:space:]] and [[:blank:]]';
my %space = (spc => " ",
my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space;
my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space;
- is("@space0", "cr ff lf spc tab", $message);
+ is("@space0", "cr ff lf spc tab vt", $message);
is("@space1", "cr ff lf spc tab vt", $message);
is("@space2", "spc tab", $message);
}
{
- use charnames ":full";
- # Delayed interpolation of \N'
- my $r1 = qr/\N{THAI CHARACTER SARA I}/;
- my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}";
-
- # Bug #56444
- ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/';
-
- # Bug #62056
- ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/';
-
- ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"';
- ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"';
- }
-
- {
- use charnames ":full";
- my $message = '[perl #74982] Period coming after \N{}';
- ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message);
- ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message);
- }
- {
my $n= 50;
# this must be a high number and go from 0 to N, as the bug we are looking for doesn't
# seem to be predictable. Slight changes to the test make it fail earlier or later.
my $message = '\p property after empty * match';
{
- local $::TODO = "Bug 77414";
like("1", qr/\s*\pN/, $message);
like("-", qr/\s*\p{Dash}/, $message);
like(" ", qr/\w*\p{Blank}/, $message);
SKIP: { # Some constructs with Latin1 characters cause a utf8 string not
# to match itself in non-utf8
- if ($IS_EBCDIC) {
+ if ($::IS_EBCDIC) {
skip "Needs to be customized to run on EBCDIC", 6;
}
my $c = "\xc0";
}
SKIP: { # Make sure can override the formatting
- if ($IS_EBCDIC) {
+ if ($::IS_EBCDIC) {
skip "Needs to be customized to run on EBCDIC", 2;
}
use feature 'unicode_strings';
}
{
- # Test that a regex followed by an operator and/or a statement modifier work
- # These tests use string-eval so that it reports a clean error when it fails
- # (without the string eval the test script might be unparseable)
-
- # Note: these test check the behaviour that currently is valid syntax
- # If a new regex modifier is added and a test fails then there is a backwards-compatibility issue
- # Note-2: a new deprecate warning was added for this with commit e6897b1a5db0410e387ccbf677e89fc4a1d8c97a
- # which indicate that this syntax will be removed in 5.16.
- # When this happens the tests can be removed
-
- no warnings 'syntax';
- iseq( eval q#my $r = "a" =~ m/a/lt 2;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by lt");
- iseq( eval q#my $r = "a" =~ m/a/le 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by le");
- iseq( eval q#my $r = "a" =~ m/a/eq 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by eq");
- iseq( eval q#my $r = "a" =~ m/a/ne 0;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by ne");
- iseq( eval q#my $r = "a" =~ m/a/and 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by and");
- iseq( eval q#my $r = "a" =~ m/a/unless 0;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by unless");
- iseq( eval q#my $c = 1; my $r; $r = "a" =~ m/a/while $c--;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by while");
- iseq( eval q#my $c = 0; my $r; $r = "a" =~ m/a/until $c++;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by until");
- iseq( eval q#my $r; $r = "a" =~ m/a/for 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by for");
- iseq( eval q#my $r; $r = "a" =~ m/a/foreach 1;"eval_ok $r"#, "eval_ok 1", "regex (m//) followed by foreach");
-
- iseq( eval q#my $t = "a"; my $r = $t =~ s/a//lt 2;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by lt");
- iseq( eval q#my $t = "a"; my $r = $t =~ s/a//le 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by le");
- iseq( eval q#my $t = "a"; my $r = $t =~ s/a//ne 0;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by ne");
- iseq( eval q#my $t = "a"; my $r = $t =~ s/a//and 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by and");
- iseq( eval q#my $t = "a"; my $r = $t =~ s/a//unless 0;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by unless");
-
- iseq( eval q#my $c = 1; my $r; my $t = "a"; $r = $t =~ s/a//while $c--;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by while");
- iseq( eval q#my $c = 0; my $r; my $t = "a"; $r = $t =~ s/a//until $c++;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by until");
- iseq( eval q#my $r; my $t = "a"; $r = $t =~ s/a//for 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by for");
- iseq( eval q#my $r; my $t = "a"; $r = $t =~ s/a//for 1;"eval_ok $r"#, "eval_ok 1", "regex (s///) followed by foreach");
- }
-
- {
my $str= "\x{100}";
chop $str;
my $qr= qr/$str/;
- iseq( "$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212" );
+ is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212");
$str= "";
$qr= qr/$str/;
- iseq( "$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212" )
+ is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212");
}
"A" =~ /(((A))?)+/;
my $second = $2;
- iseq($first, $second);
+ is($first, $second);
}
{
}
ok($count < 10, 'RT #3516 C');
}
+ {
+ # RT #84294: Is this a bug in the simple Perl regex?
+ # : Nested buffers and (?{...}) dont play nicely on partial matches
+ our @got= ();
+ ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match");
+ my $want= "'ab', 'a', 'b'";
+ my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got);
+ is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state');
+ }
+
+ {
+ # Suppress warnings, as the non-unicode one comes out even if turn off
+ # warnings here (because the execution is done in another scope).
+ local $SIG{__WARN__} = sub {};
+ my $str = "\x{110000}";
+
+ unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}");
+ like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}");
+ like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}");
+ unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}");
+ }
+
+ {
+ # Test that IDstart works, but because the author (khw) knows
+ # regexes much better than the rest of the core, it is being done here
+ # in the context of a regex which relies on buffer names beginng with
+ # IDStarts.
+ use utf8;
+ my $str = "abc";
+ like($str, qr/(?<a>abc)/, "'a' is legal IDStart");
+ like($str, qr/(?<_>abc)/, "'_' is legal IDStart");
+ like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart");
+ like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart");
+
+ # This test works on Unicode 6.0 in which U+2118 and U+212E are legal
+ # IDStarts there, but are not Word characters, and therefore Perl
+ # doesn't allow them to be IDStarts. But there is no guarantee that
+ # Unicode won't change things around in the future so that at some
+ # future Unicode revision these tests would need to be revised.
+ foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) {
+ my $prog = <<"EOP";
+use utf8;;
+"abc" =~ qr/(?<$char>abc)/;
+EOP
+ utf8::encode($prog);
+ fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {},
+ sprintf("'U+%04X not legal IDFirst'", ord($char)));
+ }
+ }
+
+ { # [perl #101710]
+ my $pat = "b";
+ utf8::upgrade($pat);
+ like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string");
+ }
+
+ { # Crash with @a =~ // warning
+ local $SIG{__WARN__} = sub {
+ pass 'no crash for @a =~ // warning'
+ };
+ eval ' sub { my @a =~ // } ';
+ }
+
+ { # Concat overloading and qr// thingies
+ my @refs;
+ my $qr = qr//;
+ package Cat {
+ require overload;
+ overload->import(
+ '""' => sub { ${$_[0]} },
+ '.' => sub {
+ push @refs, ref $_[1] if ref $_[1];
+ bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]"
+ }
+ );
+ }
+ my $s = "foo";
+ my $o = bless \$s, Cat::;
+ /$o$qr/;
+ is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth';
+ }
+
+ {
+ my $count=0;
+ my $str="\n";
+ $count++ while $str=~/.*/g;
+ is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g';
+ my $class_count= 0;
+ $class_count++ while $str=~/[^\n]*/g;
+ is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same';
+ my $anch_count= 0;
+ $anch_count++ while $str=~/^.*/mg;
+ is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once';
+ }
+
+ { # [perl #111174]
+ use re '/u';
+ like "\xe0", qr/(?i:\xc0)/, "(?i: shouldn't lose the passed in /u";
+ use re '/a';
+ unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a";
+ use re '/aa';
+ unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa";
+ }
+
+ {
+ # the test for whether the pattern should be re-compiled should
+ # consider the UTF8ness of the previous and current pattern
+ # string, as well as the physical bytes of the pattern string
+
+ for my $s ("\xc4\x80", "\x{100}") {
+ ok($s =~ /^$s$/, "re-compile check is UTF8-aware");
+ }
+ }
+
+ # #113682 more overloading and qr//
+ # when doing /foo$overloaded/, if $overloaded returns
+ # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval'
+ # shouldn't be required. Via '.', it still is.
+ {
+ package Qr0;
+ use overload 'qr' => sub { qr/(??{50})/ };
+
+ package Qr1;
+ use overload '""' => sub { qr/(??{51})/ };
+
+ package Qr2;
+ use overload '.' => sub { $_[1] . qr/(??{52})/ };
+
+ package Qr3;
+ use overload '""' => sub { qr/(??{7})/ },
+ '.' => sub { $_[1] . qr/(??{53})/ };
+
+ package Qr_indirect;
+ use overload '""' => sub { $_[0][0] };
+
+ package main;
+
+ for my $i (0..3) {
+ my $o = bless [], "Qr$i";
+ if ((0,0,1,1)[$i]) {
+ eval { "A5$i" =~ /^A$o$/ };
+ like($@, qr/Eval-group not allowed/, "Qr$i");
+ eval { "5$i" =~ /$o/ };
+ like($@, ($i == 3 ? qr/^$/ : qr/no method found,/),
+ "Qr$i bare");
+ {
+ use re 'eval';
+ ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval");
+ eval { "5$i" =~ /$o/ };
+ like($@, ($i == 3 ? qr/^$/ : qr/no method found,/),
+ "Qr$i bare - with use re eval");
+ }
+ }
+ else {
+ ok("A5$i" =~ /^A$o$/, "Qr$i");
+ ok("5$i" =~ /$o/, "Qr$i bare");
+ }
+ }
+
+ my $o = bless [ bless [], "Qr1" ], 'Qr_indirect';
+ ok("A51" =~ /^A$o/, "Qr_indirect");
+ ok("51" =~ /$o/, "Qr_indirect bare");
+ }
+
+ { # Various flags weren't being set when a [] is optimized into an
+ # EXACTish node
+ ;
+ ;
+ ok("\x{017F}\x{017F}" =~ qr/^[\x{00DF}]?$/i, "[] to EXACTish optimization");
+ }
+
+ {
+ for my $char (":", "\x{f7}", "\x{2010}") {
+ my $utf8_char = $char;
+ utf8::upgrade($utf8_char);
+ my $display = $char;
+ $display = display($display);
+ my $utf8_display = "utf8::upgrade(\"$display\")";
+
+ like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/");
+ like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/");
+ like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/");
+ like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/");
+ }
+ }
+
+ {
+ # #116148: Pattern utf8ness sticks around globally
+ # the utf8 in the first match was sticking around for the second
+ # match
+
+ use feature 'unicode_strings';
+
+ my $x = "\x{263a}";
+ $x =~ /$x/;
+
+ my $text = "Perl";
+ ok("Perl" =~ /P.*$/i, '#116148');
+ }
+
+ { # 117327: Sequence (?#...) not recognized in regex
+ # The space between the '(' and '?' is now deprecated; this test should
+ # be removed when the deprecation is made fatal.
+ no warnings;
+ like("ab", qr/a( ?#foo)b/x);
+ }
+
+ { # 118297: Mixing up- and down-graded strings in regex
+ utf8::upgrade(my $u = "\x{e5}");
+ utf8::downgrade(my $d = "\x{e5}");
+ my $warned;
+ local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ };
+ my $re = qr/$u$d/;
+ ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern");
+ my $c = "\x{e5}\x{e5}";
+ utf8::downgrade($c);
+ like($c, $re, "mixed up-/downgraded pattern matches downgraded string");
+ utf8::upgrade($c);
+ like($c, $re, "mixed up-/downgraded pattern matches upgraded string");
+ }
+
+ {
+ # if we have 87 capture buffers defined then \87 should refer to the 87th.
+ # test that this is true for 1..100
+ # Note that this test causes the engine to recurse at runtime, and
+ # hence use a lot of C stack.
+ for my $i (1..100) {
+ my $capture= "a";
+ $capture= "($capture)" for 1 .. $i;
+ for my $mid ("","b") {
+ my $str= "a${mid}a";
+ my $backref= "\\$i";
+ eval {
+ ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/");
+ 1;
+ } or do {
+ is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/");
+ };
+ }
+ }
+ }
+
+ # this mixture of readonly (not COWable) and COWable strings
+ # messed up the capture buffers under COW. The actual test results
+ # are incidental; the issue is was an AddressSanitizer failure
+ {
+ my $c ='AB';
+ my $res = '';
+ for ($c, 'C', $c, 'DE') {
+ ok(/(.)/, "COWable match");
+ $res .= $1;
+ }
+ is($res, "ACAD");
+ }
+
+
+ {
+ # RT #45667
+ # /[#$x]/x didn't interpolate the var $x.
+ my $b = 'cd';
+ my $s = 'abcd$%#&';
+ $s =~ s/[a#$b%]/X/g;
+ is ($s, 'XbXX$XX&', 'RT #45667 without /x');
+ $s = 'abcd$%#&';
+ $s =~ s/[a#$b%]/X/gx;
+ is ($s, 'XbXX$XX&', 'RT #45667 with /x');
+ }
+
+ {
+ no warnings "uninitialized";
+ my @a;
+ $a[1]++;
+ /@a/;
+ pass('no crash with /@a/ when array has nonexistent elems');
+ }
+
+ {
+ is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'),
+ "ok\n",
+ 'deleting *^R does not result in crashes';
+ no warnings 'once';
+ *^R = *caretRglobwithnoscalar;
+ "" =~ /(?{42})/;
+ is $^R, 42, 'assigning to *^R does not result in a crash';
+ is runperl(
+ stderr => 1,
+ prog => 'eval q|'
+ .' q-..- =~ /(??{undef *^R;q--})(?{42})/; '
+ .' print qq-$^R\n-'
+ .'|'
+ ),
+ "42\n",
+ 'undefining *^R within (??{}) does not result in a crash';
+ }
+
+ {
+ # [perl #120446]
+ # this code should be virtually instantaneous. If it takes 10s of
+ # seconds, there a bug in intuit_start.
+ # (this test doesn't actually test for slowness - that involves
+ # too much danger of false positives on loaded machines - but by
+ # putting it here, hopefully someone might notice if it suddenly
+ # runs slowly)
+ my $s = ('a' x 1_000_000) . 'b';
+ my $i = 0;
+ for (1..10_000) {
+ pos($s) = $_;
+ $i++ if $s =~/\Gb/g;
+ }
+ is($i, 0, "RT 120446: mustn't run slowly");
+ }
+
+ # These are based on looking at the code in regcomp.c
+ # We don't look for specific code, just the existence of an SSC
+ foreach my $re (qw( qr/a?c/
+ qr/a?c/i
+ qr/[ab]?c/
+ qr/\R?c/
+ qr/\d?c/d
+ qr/\w?c/l
+ qr/\s?c/a
+ qr/[[:alpha:]]?c/u
+ )) {
+ SKIP: {
+ skip "no re-debug under miniperl" if is_miniperl;
+ my $prog = <<"EOP";
+use re qw(Debug COMPILE);
+$re;
+EOP
+ fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class");
+ }
+ }
+
+ {
+ like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works";
+ }