use strict;
use warnings;
+no warnings 'experimental::vlb';
use 5.010;
sub run_tests;
BEGIN {
chdir 't' if -d 't';
- @INC = ('../lib','.','../ext/re');
require Config; import Config;
- require './test.pl';
- skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
- skip_all_without_unicode_tables();
+ require './test.pl'; require './charset_tools.pl';
+ require './loc_tools.pl';
+ set_up_inc('../lib', '.', '../ext/re');
}
-plan tests => 770; # Update this when adding/deleting tests.
+skip_all_without_unicode_tables();
+
+plan tests => 1011; # Update this when adding/deleting tests.
run_tests() unless caller;
# Tests start here.
#
sub run_tests {
+ my $sharp_s = uni_to_native("\xdf");
{
my $x = "abc\ndef\n";
$null = "";
$xyz =~ /$null/;
is($&, $xyz, $message);
+
+ # each entry: regexp, match string, $&, //o match success
+ my @tests =
+ (
+ [ "", "xy", "x", 1 ],
+ [ "y", "yz", "y", !1 ],
+ );
+ for my $test (@tests) {
+ my ($re, $str, $matched, $omatch) = @$test;
+ $xyz =~ /x/o;
+ ok($str =~ /$re/, "$str matches /$re/");
+ is($&, $matched, "on $matched");
+ $xyz =~ /x/o;
+ is($str =~ /$re/o, $omatch, "$str matches /$re/o (or not)");
+ }
}
{
# Defaults assumed if this fails
eval { require Config; };
- $::reg_infty = $Config::Config{reg_infty} // 32767;
+ $::reg_infty = $Config::Config{reg_infty} // 65535;
$::reg_infty_m = $::reg_infty - 1;
$::reg_infty_p = $::reg_infty + 1;
$::reg_infty_m = $::reg_infty_m; # Suppress warning.
like($@, qr/^\QQuantifier in {,} bigger than/, $message);
eval "'aaa' =~ /a{1,$::reg_infty_p}/";
like($@, qr/^\QQuantifier in {,} bigger than/, $message);
+
+ # It should be 'a' x 2147483647, but that exhausts memory on
+ # reasonably sized modern machines
+ like('a' x $::reg_infty_p, qr/a{1,}/,
+ "{1,} matches more times than REG_INFTY");
}
{
ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit";
}
- {
- # Long Monsters
+ SKIP:
+ { # Long Monsters
+
+ skip('limited memory', 20) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'};
+
for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory
my $a = 'a' x $l;
my $message = "Long monster, length = $l";
}
}
- {
- # 20000 nodes, each taking 3 words per string, and 1 per branch
+ SKIP:
+ { # 20000 nodes, each taking 3 words per string, and 1 per branch
+
+ skip('limited memory', 20) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'};
+
my $long_constant_len = join '|', 12120 .. 32645;
my $long_var_len = join '|', 8120 .. 28645;
my %ans = ( 'ax13876y25677lbc' => 1,
$_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e";
my $expect = "(bla()) ((l)u((e))) (l(e)e)";
- use vars '$c';
+ our $c;
sub matchit {
m/
(
my $locale;
SKIP: {
- skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
+ skip 'Locales not available', 1 unless locales_enabled('LC_CTYPE');
use locale;
$locale = qr/\b\v$/;
is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale');
SKIP: {
- skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
+ skip 'Locales not available', 1 unless locales_enabled('LC_CTYPE');
is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings');
}
no feature 'unicode_strings';
SKIP: {
- skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
-
+ skip 'Locales not available', 1 unless locales_enabled('LC_CTYPE');
is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings');
}
is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings');
SKIP: {
- skip 'No locale testing without d_setlocale', 2 if(!$Config{d_setlocale});
+ skip 'Locales not available', 2 unless locales_enabled('LC_CTYPE');
use locale;
is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale');
}
{
- my $message = '@- and @+ tests';
+ my $message = '@- and @+ and @{^CAPTURE} tests';
- /a(?=.$)/;
+ $_= "ace";
+ /c(?=.$)/;
+ is($#{^CAPTURE}, -1, $message);
is($#+, 0, $message);
is($#-, 0, $message);
is($+ [0], 2, $message);
ok(!defined $+ [1] && !defined $- [1] &&
!defined $+ [2] && !defined $- [2], $message);
- /a(a)(a)/;
+ /a(c)(e)/;
+ is($#{^CAPTURE}, 1, $message); # one less than $#-
is($#+, 2, $message);
is($#-, 2, $message);
is($+ [0], 3, $message);
is($- [0], 0, $message);
+ is(${^CAPTURE}[0], "c", $message);
is($+ [1], 2, $message);
is($- [1], 1, $message);
+ is(${^CAPTURE}[1], "e", $message);
is($+ [2], 3, $message);
is($- [2], 2, $message);
ok(!defined $+ [3] && !defined $- [3] &&
+ !defined ${^CAPTURE}[2] && !defined ${^CAPTURE}[3] &&
!defined $+ [4] && !defined $- [4], $message);
# Exists has a special check for @-/@+ - bug 45147
ok(exists $-[0], $message);
ok(exists $+[0], $message);
+ ok(exists ${^CAPTURE}[0], $message);
+ ok(exists ${^CAPTURE}[1], $message);
ok(exists $-[2], $message);
ok(exists $+[2], $message);
+ ok(!exists ${^CAPTURE}[2], $message);
ok(!exists $-[3], $message);
ok(!exists $+[3], $message);
+ ok(exists ${^CAPTURE}[-1], $message);
+ ok(exists ${^CAPTURE}[-2], $message);
ok(exists $-[-1], $message);
ok(exists $+[-1], $message);
ok(exists $-[-3], $message);
ok(exists $+[-3], $message);
ok(!exists $-[-4], $message);
ok(!exists $+[-4], $message);
+ ok(!exists ${^CAPTURE}[-3], $message);
- /.(a)(b)?(a)/;
+
+ /.(c)(b)?(e)/;
+ is($#{^CAPTURE}, 2, $message); # one less than $#-
is($#+, 3, $message);
is($#-, 3, $message);
+ is(${^CAPTURE}[0], "c", $message);
+ is(${^CAPTURE}[2], "e", $message . "[$1 $3]");
is($+ [1], 2, $message);
is($- [1], 1, $message);
is($+ [3], 3, $message);
is($- [3], 2, $message);
ok(!defined $+ [2] && !defined $- [2] &&
- !defined $+ [4] && !defined $- [4], $message);
+ !defined $+ [4] && !defined $- [4] &&
+ !defined ${^CAPTURE}[1], $message);
- /.(a)/;
+ /.(c)/;
+ is($#{^CAPTURE}, 0, $message); # one less than $#-
is($#+, 1, $message);
is($#-, 1, $message);
+ is(${^CAPTURE}[0], "c", $message);
is($+ [0], 2, $message);
is($- [0], 0, $message);
is($+ [1], 2, $message);
is($- [1], 1, $message);
ok(!defined $+ [2] && !defined $- [2] &&
- !defined $+ [3] && !defined $- [3], $message);
+ !defined $+ [3] && !defined $- [3] &&
+ !defined ${^CAPTURE}[1], $message);
- /.(a)(ba*)?/;
+ /.(c)(ba*)?/;
+ is($#{^CAPTURE}, 0, $message); # one less than $#-
is($#+, 2, $message);
is($#-, 1, $message);
- # Check that values don’t stick
+ # Check that values don't stick
" "=~/()()()(.)(..)/;
- my($m,$p) = (\$-[5], \$+[5]);
- () = "$$_" for $m, $p; # FETCH (or eqv.)
+ my($m,$p,$q) = (\$-[5], \$+[5], \${^CAPTURE}[4]);
+ () = "$$_" for $m, $p, $q; # FETCH (or eqv.)
" " =~ /()/;
is $$m, undef, 'values do not stick to @- elements';
is $$p, undef, 'values do not stick to @+ elements';
+ is $$q, undef, 'values do not stick to @{^CAPTURE} elements';
}
foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)',
+ '${^CAPTURE}[0] = 13',
'@- = qw (foo bar)', '$^N = 42') {
is(eval $_, undef);
like($@, qr/^Modification of a read-only value attempted/,
@b = grep /\s/, @a;
@c = grep /[\s]/, @a;
is("@b", "@c", $message);
+
+ # Test an inverted posix class with a char also in the class.
+ my $nbsp = chr utf8::unicode_to_native(0xA0);
+ my $non_s = chr utf8::unicode_to_native(0xA1);
+ my $pat_string = "[^\\S ]";
+ unlike(" ", qr/$pat_string/, "Verify ' ' !~ /$pat_string/");
+ like("\t", qr/$pat_string/, "Verify '\\t =~ /$pat_string/");
+ unlike($nbsp, qr/$pat_string/, "Verify non-utf8-NBSP !~ /$pat_string/");
+ utf8::upgrade($nbsp);
+ like($nbsp, qr/$pat_string/, "Verify utf8-NBSP =~ /$pat_string/");
+ unlike($non_s, qr/$pat_string/, "Verify non-utf8-inverted-bang !~ /$pat_string/");
+ utf8::upgrade($non_s);
+ unlike($non_s, qr/$pat_string/, "Verify utf8-inverted-bang !~ /$pat_string/");
}
{
my $message = '\D, [\D], \d, [\d]';
}
{
# we are actually testing that we dont die when executing these patterns
- my $e = "B\x{f6}ck";
+ my $e = "B" . uni_to_native("\x{f6}") . "ck";
ok(!utf8::is_utf8($e), "got a latin string - rt75680");
ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680");
}
- SKIP: { # Some constructs with Latin1 characters cause a utf8 string not
- # to match itself in non-utf8
- if ($::IS_EBCDIC) {
- skip "Needs to be customized to run on EBCDIC", 6;
- }
- my $c = "\xc0";
- my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/;
+ { # Some constructs with Latin1 characters cause a utf8 string not
+ # to match itself in non-utf8
+ my $c = uni_to_native("\xc0");
+ my $pattern = my $utf8_pattern = qr/(($c)+,?)/;
utf8::upgrade($utf8_pattern);
ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8";
ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8";
ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8";
}
- SKIP: { # Make sure can override the formatting
- if ($::IS_EBCDIC) {
- skip "Needs to be customized to run on EBCDIC", 2;
- }
+ { # Make sure can override the formatting
use feature 'unicode_strings';
- ok "\xc0" =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/';
- ok "\xc0" !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/';
+ ok uni_to_native("\xc0") =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/';
+ ok uni_to_native("\xc0") !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/';
}
{
{ # [perl #111174]
use re '/u';
- like "\xe0", qr/(?i:\xc0)/, "(?i: shouldn't lose the passed in /u";
+ my $A_grave = uni_to_native("\xc0");
+ like uni_to_native("\xe0"), qr/(?i:$A_grave)/, "(?i: shouldn't lose the passed in /u";
use re '/a';
unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a";
use re '/aa';
unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa";
+ unlike 'k', qr'(?i:\N{KELVIN SIGN})', "(?i: shouldn't lose the passed in /aa";
}
{
# consider the UTF8ness of the previous and current pattern
# string, as well as the physical bytes of the pattern string
- for my $s ("\xc4\x80", "\x{100}") {
+ for my $s (byte_utf8a_to_utf8n("\xc4\x80"), "\x{100}") {
ok($s =~ /^$s$/, "re-compile check is UTF8-aware");
}
}
{ # Various flags weren't being set when a [] is optimized into an
# EXACTish node
- ;
- ;
- ok("\x{017F}\x{017F}" =~ qr/^[\x{00DF}]?$/i, "[] to EXACTish optimization");
+ ok("\x{017F}\x{017F}" =~ qr/^[$sharp_s]?$/i, "[] to EXACTish optimization");
+ }
+
+ { # Test that it avoids spllitting a multi-char fold across nodes.
+ # These all fold to things that are like 'ss', which, if split across
+ # nodes could fail to match a single character that folds to the
+ # combination. 1F0 byte expands when folded;
+ my $utf8_locale = find_utf8_ctype_locale();
+ for my $char('F', $sharp_s, "\x{1F0}", "\x{FB00}") {
+ my $length = 260; # Long enough to overflow an EXACTFish regnode
+ my $p = $char x $length;
+ my $s = ($char eq $sharp_s) ? 'ss'
+ : $char eq "\x{1F0}"
+ ? "j\x{30c}"
+ : 'ff';
+ $s = $s x $length;
+ for my $charset (qw(u d l aa)) {
+ for my $utf8 (0..1) {
+ for my $locale ('C', $utf8_locale) {
+ SKIP:
+ {
+ skip "test skipped for non-C locales", 2
+ if $charset ne 'l'
+ && (! defined $locale || $locale ne 'C');
+ if ($charset eq 'l') {
+ if (! defined $locale) {
+ skip "No UTF-8 locale", 2;
+ }
+ skip "Can't test in miniperl",2
+ if is_miniperl();
+
+ require POSIX;
+ POSIX::setlocale(&LC_CTYPE, $locale);
+ }
+
+ my $pat = $p;
+ utf8::upgrade($pat) if $utf8;
+ my $should_pass =
+ ( $charset eq 'u'
+ || ($charset eq 'd' && $utf8)
+ || ($charset eq 'd' && ( $char =~ /[[:ascii:]]/
+ || ord $char > 255))
+ || ($charset eq 'aa' && $char =~ /[[:ascii:]]/)
+ || ($charset eq 'l' && $locale ne 'C')
+ || ($charset eq 'l' && $char =~ /[[:ascii:]]/)
+ );
+ my $name = "(?i$charset), utf8=$utf8, locale=$locale,"
+ . " char=" . sprintf "%x", ord $char;
+ no warnings 'locale';
+ is (eval " '$s' =~ qr/(?i$charset)$pat/;",
+ $should_pass, $name);
+ fail "$name: $@" if $@;
+ is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;",
+ $should_pass, "extra a, $name");
+ fail "$name: $@" if $@;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ SKIP:
+ {
+ skip "no re debug", 5 if is_miniperl;
+ my $s = ("0123456789" x 26214) x 2; # Should fill 2 LEXACTS, plus
+ # small change
+ my $pattern_prefix = "use utf8; use re qw(Debug COMPILE)";
+ my $pattern = "$pattern_prefix; qr/$s/;";
+ my $result = fresh_perl($pattern);
+ if ($? != 0) { # Re-run so as to display STDERR.
+ fail($pattern);
+ fresh_perl($pattern, { stderr => 0, verbose => 1 });
+ }
+ like($result, qr/Final program[^X]*\bLEXACT\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s,
+ "Check that LEXACT nodes are generated");
+ like($s, qr/$s/, "Check that LEXACT nodes match");
+ like("a$s", qr/a$s/, "Previous test preceded by an 'a'");
+ substr($s, 260000, 1) = "\x{100}";
+ $pattern = "$pattern_prefix; qr/$s/;";
+ $result = fresh_perl($pattern, { 'wide_chars' => 1 } );
+ if ($? != 0) { # Re-run so as to display STDERR.
+ fail($pattern);
+ fresh_perl($pattern, { stderr => 0, verbose => 1 });
+ }
+ like($result, qr/Final program[^X]*\bLEXACT_REQ8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s,
+ "Check that an LEXACT_ONLY node is generated with a \\x{100}");
+ like($s, qr/$s/, "Check that LEXACT_REQ8 nodes match");
}
{
- for my $char (":", "\x{f7}", "\x{2010}") {
+ for my $char (":", uni_to_native("\x{f7}"), "\x{2010}") {
my $utf8_char = $char;
utf8::upgrade($utf8_char);
my $display = $char;
# test that this is true for 1..100
# Note that this test causes the engine to recurse at runtime, and
# hence use a lot of C stack.
+
+ # Compiling for all 100 nested captures blows the stack under
+ # clang and ASan; reduce.
+ my $max_captures = $Config{ccflags} =~ /sanitize/ ? 20 : 100;
+
for my $i (1..100) {
+ if ($i > $max_captures) {
+ pass("skipping $i buffers under ASan aa");
+ pass("skipping $i buffers under ASan aba");
+ next;
+ }
my $capture= "a";
$capture= "($capture)" for 1 .. $i;
for my $mid ("","b") {
# Need to use eval, because tries to compile on ASCII platforms even
# though the tests are skipped, and fails because 0x89-j is an illegal
# range there.
- like("\x89", eval "qr/[\x{89}-j]/", '"\x89" should match [\x{89}-j]');
- like("\x8A", eval "qr/[\x{89}-j]/", '"\x8A" should match [\x{89}-j]');
- like("\x90", eval "qr/[\x{89}-j]/", '"\x90" should match [\x{89}-j]');
- like("\x91", eval "qr/[\x{89}-j]/", '"\x91" should match [\x{89}-j]');
+ like("\x89", eval 'qr/[\x{89}-j]/', '"\x89" should match [\x{89}-j]');
+ like("\x8A", eval 'qr/[\x{89}-j]/', '"\x8A" should match [\x{89}-j]');
+ like("\x90", eval 'qr/[\x{89}-j]/', '"\x90" should match [\x{89}-j]');
+ like("\x91", eval 'qr/[\x{89}-j]/', '"\x91" should match [\x{89}-j]');
}
# These are based on looking at the code in regcomp.c
like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works";
}
- {
+ SKIP: {
+ skip("Tests are ASCII-centric, some would fail on EBCDIC", 12) if $::IS_EBCDIC;
+
# Verify that the very last Latin-1 U+00FF
# (LATIN SMALL LETTER Y WITH DIAERESIS)
# and its UPPER counterpart (U+0178 which is pure Unicode),
like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case");
}
- { # make sure we get an error when \p{} cannot load Unicode tables
- fresh_perl_like(<<' prog that cannot load uni tables',
- BEGIN {
- @INC = '../lib';
- require utf8; require 'utf8_heavy.pl';
- @INC = ();
- }
- $name = 'A B';
- if ($name =~ /(\p{IsUpper}) (\p{IsUpper})/){
- print "It's good! >$1< >$2<\n";
- } else {
- print "It's not good...\n";
- }
- prog that cannot load uni tables
- qr/^Can't locate unicore\/Heavy\.pl(?x:
- )|^Can't find Unicode property definition/,
- undef,
- '\p{} should not fail silently when uni tables evanesce');
- }
-
{ # Special handling of literal-ended ranges in [...] was breaking this
use utf8;
like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]");
}
}
{
- local $::TODO = "not fixed yet";
-
# Test that we handle qr/\8888888/ and variants without an infinite loop,
# we use a test within a test so we can todo it, and make sure we don't
# infinite loop our tests.
# NOTE - Do not put quotes in the code!
# NOTE - We have to triple escape the backref in the pattern below.
my $code='
- BEGIN{require q(test.pl);}
+ BEGIN{require q(./test.pl);}
watchdog(3);
for my $len (1 .. 20) {
my $eights= q(8) x $len;
fresh_perl_is($code, "No infinite loop here!", {},
"test that we handle things like m/\\888888888/ without infinite loops" );
}
+
+ SKIP:
+ { # Test that we handle some malformed UTF-8 without looping [perl
+ # #123562]
+ skip "no Encode", 1 if is_miniperl;
+ my $code='
+ BEGIN{require q(./test.pl);}
+ use Encode qw(_utf8_on);
+ # \x80 and \x41 are continuation bytes in their respective
+ # character sets
+ my $malformed = (ord("A") == 65) ? "a\x80\n" : "a\x41\n";
+ utf8::downgrade($malformed);
+ _utf8_on($malformed);
+ watchdog(3);
+ $malformed =~ /(\n\r|\r)$/;
+ print q(No infinite loop here!);
+ ';
+ fresh_perl_like($code, qr/Malformed UTF-8 character/, {},
+ "test that we handle some UTF-8 malformations without looping" );
+ }
+
+ {
+ # [perl #123843] hits SEGV trying to compile this pattern
+ my $match;
+ eval q{ ($match) = ("xxyxxyxy" =~ m{(x+(y(?1))*)}) };
+ ok(1, "compiled GOSUB in CURLYM ok");
+ is($match, 'xxyxxyx', "matched GOSUB in CURLYM");
+ }
+
+ {
+ # [perl #123852] doesn't avoid all the capture-related work with
+ # //n, leading to possible memory corruption
+ eval q{ qr{()(?1)}n };
+ my $error = $@;
+ ok(1, "qr{()(?1)}n didn't crash");
+ like($error, qr{Reference to nonexistent group},
+ 'gave appropriate error for qr{()(?1)}n');
+ }
+
+ {
+ # [perl #126406] panic with unmatchable quantifier
+ my $code='
+ no warnings "regexp";
+ "" =~ m/(.0\N{6,0}0\N{6,0}000000000000000000000000000000000)/;
+ ';
+ fresh_perl_is($code, "", {},
+ "perl [#126406] panic");
+ }
+ {
+ my $bug="[perl #126182]"; # test for infinite pattern recursion
+ for my $tuple (
+ [ 'q(a)=~/(.(?2))((?<=(?=(?1)).))/', "died", "look ahead left recursion fails fast" ],
+ [ 'q(aa)=~/(?R)a/', "died", "left-recursion fails fast", ],
+ [ 'q(bbaa)=~/(?&x)(?(DEFINE)(?<x>(?&y)*a)(?<y>(?&x)*b))/',
+ "died", "inter-cyclic optional left recursion dies" ],
+ [ 'q(abc) =~ /a((?1)?)c/', "died", "optional left recursion dies" ],
+ [ 'q(abc) =~ /a((?1)??)c/', "died", "min mod left recursion dies" ],
+ [ 'q(abc) =~ /a((?1)*)c/', "died", "* left recursion dies" ],
+ [ 'q(abc) =~ /a((?1)+)c/', "died", "+ left recursion dies" ],
+ [ 'q(abc) =~ /a((?1){0,3})c/', "died", "{0,3} left recursion fails fast" ],
+
+ [ 'q(aaabbb)=~/a(?R)?b/', "matched", "optional self recursion works" ],
+ [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?0))*+\\\\))/', "matched",
+ "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
+ [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?1))*+\\\\))/', "matched",
+ "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
+ [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?0))*\\\\))/', "matched",
+ "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
+ [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?1))*\\\\))/', "matched",
+ "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
+ ) {
+ my ($expr, $expect, $test_name, $cap1)= @$tuple;
+ # avoid quotes in this code!
+ my $code='
+ BEGIN{require q(./test.pl);}
+ watchdog(3);
+ my $status= eval(q{ !(' . $expr . ') ? q(failed) : ' .
+ ($cap1 ? '($1 ne q['.$cap1.']) ? qq(badmatch:$1) : ' : '') .
+ ' q(matched) })
+ || ( ( $@ =~ /Infinite recursion/ ) ? qq(died) : q(strange-death) );
+ print $status;
+ ';
+ fresh_perl_is($code, $expect, {}, "$bug - $test_name" );
+ }
+ }
+ {
+ fresh_perl_is('
+ BEGIN{require q(test.pl);}
+ watchdog(3);
+ $SIG{ALRM} = sub {print "Timeout\n"; exit(1)};
+ alarm 1;
+ $_ = "a" x 1000 . "b" x 1000 . "c" x 1000;
+ /.*a.*b.*c.*[de]/;
+ ',"Timeout",{},"Test Perl 73464")
+ }
+
+ { # [perl #128686], crashed the the interpreter
+ my $AE = chr utf8::unicode_to_native(0xC6);
+ my $ae = chr utf8::unicode_to_native(0xE6);
+ my $re = qr/[$ae\s]/i;
+ ok($AE !~ $re, '/[\xE6\s]/i doesn\'t match \xC6 when not in UTF-8');
+ utf8::upgrade $AE;
+ ok($AE =~ $re, '/[\xE6\s]/i matches \xC6 when in UTF-8');
+ }
+
+ {
+ is(0+("\n" =~ m'\n'), 1, q|m'\n' should interpolate escapes|);
+ }
+
+ {
+ my $str = "a\xB6";
+ ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - latin1 case" );
+ utf8::upgrade($str);
+ ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - utf8 case" );
+ }
+ {
+ my $got= run_perl( switches => [ '-l' ], prog => <<'EOF_CODE' );
+ my $died= !eval {
+ $_=qq(ab);
+ print;
+ my $p=qr/(?{ s!!x! })/;
+ /$p/;
+ print;
+ /a/;
+ /$p/;
+ print;
+ /b/;
+ /$p/;
+ print;
+ //;
+ 1;
+ };
+ $error = $died ? ($@ || qq(Zombie)) : qq(none);
+ print $died ? qq(died) : qq(lived);
+ print qq(Error: $@);
+EOF_CODE
+ my @got= split /\n/, $got;
+ is($got[0],"ab","empty pattern in regex codeblock: got expected start string");
+ is($got[1],"xab",
+ "empty pattern in regex codeblock: first subst with no last-match worked right");
+ is($got[2],"xxb","empty pattern in regex codeblock: second subst worked right");
+ is($got[3],"xxx","empty pattern in regex codeblock: third subst worked right");
+ is($got[4],"died","empty pattern in regex codeblock: died as expected");
+ like($got[5],qr/Error: Infinite recursion via empty pattern/,
+ "empty pattern in regex codeblock: produced the right exception message" );
+ }
+
+ # This test is based on the one directly above, which happened to
+ # leak. Repeat the test, but stripped down to the bare essentials
+ # of the leak, which is to die while executing a regex which is
+ # already the current regex, thus causing the saved outer set of
+ # capture offsets to leak. The test itself doesn't do anything
+ # except sit around hoping not to be triggered by ASan
+ {
+ eval {
+ my $s = "abcd";
+ $s =~ m{([abcd]) (?{ die if $1 eq 'd'; })}gx;
+ $s =~ //g;
+ $s =~ //g;
+ $s =~ //g;
+ };
+ pass("call to current regex doesn't leak");
+ }
+
+
+
+ {
+ # [perl #130495] /x comment skipping stopped a byte short, leading
+ # to assertion failure or 'malformed utf-8 character" warning
+ fresh_perl_is(
+ "use utf8; m{a#\x{124}}x", '', {wide_chars => 1},
+ '[perl #130495] utf-8 character at end of /x comment should not misparse',
+ );
+ }
+ {
+ # [perl #130522] causes out-of-bounds read detected by clang with
+ # address=sanitized when length of the STCLASS string is greater than
+ # length of target string.
+ my $re = qr{(?=\0z)\0?z?$}i;
+ my($yes, $no) = (1, "");
+ for my $test (
+ [ $no, undef, '<undef>' ],
+ [ $no, '', '' ],
+ [ $no, "\0", '\0' ],
+ [ $yes, "\0z", '\0z' ],
+ [ $no, "\0z\0", '\0z\0' ],
+ [ $yes, "\0z\n", '\0z\n' ],
+ ) {
+ my($result, $target, $disp) = @$test;
+ no warnings qw/uninitialized/;
+ is($target =~ $re, $result, "[perl #130522] with target '$disp'");
+ }
+ }
+ {
+ # [perl #129377] backref to an unmatched capture should not cause
+ # reading before start of string.
+ SKIP: {
+ skip "no re-debug under miniperl" if is_miniperl;
+ my $prog = <<'EOP';
+use re qw(Debug EXECUTE);
+"x" =~ m{ () y | () \1 }x;
+EOP
+ fresh_perl_like($prog, qr{
+ \A (?! .* ^ \s+ - )
+ }msx, { stderr => 1 }, "Offsets in debug output are not negative");
+ }
+ }
+ {
+ # buffer overflow
+
+ # This test also used to leak - fixed by the commit which added
+ # this line.
+
+ fresh_perl_is("BEGIN{\$^H=0x200000}\ns/[(?{//xx",
+ "Unmatched [ in regex; marked by <-- HERE in m/[ <-- HERE (?{/ at (eval 1) line 1.\n",
+ {}, "buffer overflow for regexp component");
+ }
+ {
+ # [perl #129281] buffer write overflow, detected by ASAN, valgrind
+ fresh_perl_is('/0(?0)|^*0(?0)|^*(^*())0|/', '', {}, "don't bump whilem_c too much");
+ }
+ {
+ # RT #131893 - fails with ASAN -fsanitize=undefined
+ fresh_perl_is('qr/0(0?(0||00*))|/', '', {}, "integer overflow during compilation");
+ }
+
+ {
+ # RT #131575 intuit skipping back from the end to find the highest
+ # possible start point, was potentially hopping back beyond pos()
+ # and crashing by calling fbm_instr with a negative length
+
+ my $text = "=t=\x{5000}";
+ pos($text) = 3;
+ ok(scalar($text !~ m{(~*=[a-z]=)}g), "RT #131575");
+ }
+ {
+ fresh_perl_is('"AA" =~ m/AA{1,0}/','',{},"handle OPFAIL insert properly");
+ }
+ {
+ fresh_perl_is('$_="0\x{1000000}";/^000?\0000/','',{},"dont throw assert errors trying to fbm past end of string");
+ }
+ { # [perl $132227]
+ fresh_perl_is("('0ba' . ('ss' x 300)) =~ m/0B\\N{U+41}" . $sharp_s x 150 . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui");
+
+ # A variation, but as far as khw knows not part of 132227
+ fresh_perl_is("'0bssa' =~ m/0B" . $sharp_s . "\\N{U+41}" . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui");
+ }
+ { # [perl $132164]
+ fresh_perl_is('m m0*0+\Rm', "",{},"Undefined behavior in address sanitizer");
+ }
+ { # [perl #133642]
+ fresh_perl_is('no warnings "experimental::vlb";
+ m/((?<=(0?)))/', "",{},"Was getting 'Double free'");
+ }
+ { # [perl #133782]
+ # this would panic on DEBUGGING builds
+ fresh_perl_is(<<'CODE', "ok\nok\n",{}, 'Bad length magic was left on $^R');
+while( "\N{U+100}bc" =~ /(..?)(?{$^N})/g ) {
+ print "ok\n" if length($^R)==length("$^R");
+}
+CODE
+ }
+ { # [perl #133871], ASAN/valgrind out-of-bounds access
+ fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]");
+ }
+ SKIP:
+ { # [perl #133921], segfault
+ skip "Not valid for EBCDIC", 5 if $::IS_EBCDIC;
+
+ fresh_perl_is('qr\ 40||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\x\0ÿÿÿþ\0\0\0ù\Q`\Qx`\0\ 1{0c!}\ 1e;\0\0\0ù\ò`\Qm`\x{0c!}\;\;îçÿ \0\7fç\0\0\0!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1e;\0\0\0ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q\87 \x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \0F\ 5\0n0t0\0c \0\80\ 1d;t \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\10\10\10\10\x{){} \10\10\10\10)|\10\10\ 4i', "", {}, "[perl #133921]");
+ fresh_perl_is('\ 4|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \x\0ÿÿÿÿ\0\0\0ù\Q`\Qx`\0\ 1{0d ;\0\0\0ù\ò`\Qm`\x{0c!}\;\;îçÿ \0\7fç\0\0\0!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1;\0\0\0ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q\87 \x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \00000000F\ 5\0m0t0\0c \0\80\ 1d;t \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\10\10\10\10\x{){} \10\10\10\10)|\10\10\ 4\ 4i', "", {}, "[perl #133921]");
+
+fresh_perl_is('s\ 4|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h\8d000x0 \xòÿÿÿ\0\0ù\Q`\Q
+
+\1a
+
+
+ ç
+
+
+
+
+
+
+
+
+
+
+
+
+\ 5
+
+
+x{0c!}\;\;çÿ \0\7fq0/i0/!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1e;\0\0\0ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\87\ 1\x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`ÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \0u00000F\ 5\0000t0\0p \0\80\ 1d? \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/0\0\10\10\10\\0\0\ 1\0} \10\10\10\10)|\10\10\ 4\ 4i', "", {}, "[perl #133921]");
+
+ fresh_perl_is('\ 4a aú\0\0úv sWtrt\10\0\ó||ß+W\ eü\16ef\0ù\Qx`\Qx`\x{1c!gGnuc given1111111111111O1111each\8d111\jx` \x\0òÿÿÿ\0\0\0ù\Qx`\Q
+
+
+
+
+
+ ç
+
+
+
+
+
+
+
+
+
+
+
+
+\ 5
+
+
+x{1c!}\;\;îçÿp \0\7fqr/elsif/!\0eF\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ùHQx\0\0\0\7f`Lx{1c!}\ 1e;\0\0\0ù\Qx`\Qx`\x{1c!}\;ÿÿÿÿc!}\;îçÿù\Qx\87\ 1\x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\òx`ÿ\0\0>=\Qx`\Qx`\0\0ù\òx`\Qx`\x{1c!}8;\;îçÿp \0unshifteF\ 5\0normat0\0cmp \0\80\ 1d?not \0\0\0ùp \0\7fç\80\0\0!0000000000000000000000000m/000000000000000000000\ e00000000000m/0R\0\10\10\10\\0\0\ 1\0} \10\10\10\10)|\10\10\ 4\10\10\10\10\10\ 1\aï||K??\8fp\80¿ÿÿfúd{\\ e{\ 4gri\ 4\ 4{\x{1x/}\0 ð¹NuntiÀh', "", {}, "[perl #133921]");
+
+ fresh_perl_is('s\ 4|ß+W0ü0f0\Qx0\Qx0x0c0g0c 000n0000000000000O0h\8d000x0 \xòÿÿÿ\0\0ù\Q`\Q
+
+
+
+
+
+ ç
+
+
+
+
+
+
+
+
+
+
+
+
+\ 5
+
+
+x{0c!}\;\;îçÿ \0\7f/0f/!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1;\0\0\0ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\87\ 1\x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`ÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \0000t0F\ 5\0000t0\0p \0\80\ 1d?n \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\0\10\10\10\\0\0\ 1\0} \10\10\10\10)|\10\10\ 4\ 4i', "", {}, "[perl #133933]");
+ }
+
+ { # perl #133998]
+ fresh_perl_is('print "\x{110000}" =~ qr/(?l)|[^\S\pC\s]/', 1, {},
+ '/[\S\s]/l works');
+ }
+
+ { # perl #133995]
+ use utf8;
+ fresh_perl_is('"έδωσαν ελληνικήვე" =~ m/[^0](?=0)0?/', "",
+ {wide_chars => 1},
+ '[^0] doesnt crash on UTF-8 target string');
+ }
+
+ { # [perl #133992] This is a tokenizer bug of parsing a pattern
+ fresh_perl_is(q:$z = do {
+ use utf8;
+ "q!Ñ\82еÑ\81Ñ\82! =~ m'"
+ };
+ $z .= 'è(?#\84';
+ $z .= "'";
+ eval $z;:, "", {}, 'foo');
+ }
+
+ { # [perl #134325]
+ my $quote="\\Q";
+ my $back="\\\\";
+ my $ff="\xff";
+ my $s = sprintf "/\\1|(|%s)%s%s /i",
+ $quote x 8 . $back x 69,
+ $quote x 5 . $back x 4,
+ $ff x 48;
+ like(runperl(prog => "$s", stderr => 1), qr/Unmatched \(/);
+ }
+
+ { # GitHub #17196, caused assertion failure
+ fresh_perl_like('("0" x 258) =~ /(?l)0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/',
+ qr/^$/,
+ {},
+ "Assertion failure with /l exact string longer than a single node");
+ }
+
+SKIP:
+ { # [perl #134334], Assertion failure
+ my $utf8_locale = find_utf8_ctype_locale();
+ skip "no UTF-8 locale available" unless $utf8_locale;
+ fresh_perl_like("use POSIX; POSIX::setlocale(&LC_CTYPE, '$utf8_locale'); 'ssss' =~ /\xDF+?sX/il;",
+ qr/^$/,
+ {},
+ "Assertion failure matching /il on single char folding to multi");
+ }
+
+ { # Test ANYOFHs
+ my $pat = qr/[\x{4000001}\x{4000003}\x{4000005}]+/;
+ unlike("\x{4000000}", $pat, "4000000 isn't in pattern");
+ like("\x{4000001}", $pat, "4000001 is in pattern");
+ unlike("\x{4000002}", $pat, "4000002 isn't in pattern");
+ like("\x{4000003}", $pat, "4000003 is in pattern");
+ unlike("\x{4000004}", $pat, "4000004 isn't in pattern");
+ like("\x{4000005}", $pat, "4000005 is in pattern");
+ unlike("\x{4000006}", $pat, "4000006 isn't in pattern");
+
+ # gh #17319
+ $pat = qr/[\N{U+200D}\N{U+2000}]()/;
+ unlike("\x{1FFF}", $pat, "1FFF isn't in pattern");
+ like("\x{2000}", $pat, "2000 is in pattern");
+ unlike("\x{2001}", $pat, "2001 isn't in pattern");
+ unlike("\x{200C}", $pat, "200C isn't in pattern");
+ like("\x{200D}", $pat, "200 is in pattern");
+ unlike("\x{200E}", $pat, "200E isn't in pattern");
+ }
+
} # End of sub run_tests
1;
+
+#
+# ex: set ts=8 sts=4 sw=4 et:
+#