set_up_inc('../lib', '.', '../ext/re');
}
-skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();
-plan tests => 967; # Update this when adding/deleting tests.
+plan tests => 1019; # Update this when adding/deleting tests.
run_tests() unless caller;
SKIP:
{ # Long Monsters
- skip('limited memory', 20) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'};
+ my @trials = (125, 140, 250, 270, 300000, 30);
- for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory
+ skip('limited memory', @trials * 4) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'};
+
+ for my $l (@trials) { # Ordered to free memory
my $a = 'a' x $l;
my $message = "Long monster, length = $l";
like("ba$a=", qr/a$a=/, $message);
SKIP:
{ # 20000 nodes, each taking 3 words per string, and 1 per branch
- skip('limited memory', 20) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'};
-
- my $long_constant_len = join '|', 12120 .. 32645;
- my $long_var_len = join '|', 8120 .. 28645;
my %ans = ( 'ax13876y25677lbc' => 1,
'ax13876y25677mcb' => 0, # not b.
'ax13876y35677nbc' => 0, # Num too big
'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs
);
+ skip('limited memory', 2 * scalar keys %ans) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'};
+
+ my $long_constant_len = join '|', 12120 .. 32645;
+ my $long_var_len = join '|', 8120 .. 28645;
+
for (keys %ans) {
my $message = "20000 nodes, const-len '$_'";
ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message;
{ # Test that it avoids spllitting a multi-char fold across nodes.
# These all fold to things that are like 'ss', which, if split across
# nodes could fail to match a single character that folds to the
- # combination.
+ # combination. 1F0 byte expands when folded;
my $utf8_locale = find_utf8_ctype_locale();
- for my $char('F', $sharp_s, "\x{FB00}") {
+ for my $char('F', $sharp_s, "\x{1F0}", "\x{FB00}") {
my $length = 260; # Long enough to overflow an EXACTFish regnode
my $p = $char x $length;
- my $s = ($char eq $sharp_s) ? 'ss' : 'ff';
+ my $s = ($char eq $sharp_s) ? 'ss'
+ : $char eq "\x{1F0}"
+ ? "j\x{30c}"
+ : 'ff';
$s = $s x $length;
for my $charset (qw(u d l aa)) {
for my $utf8 (0..1) {
- SKIP:
for my $locale ('C', $utf8_locale) {
- skip "test skipped for non-C locales", 2
+ SKIP:
+ {
+ skip "test skipped for non-C locales", 2
if $charset ne 'l'
&& (! defined $locale || $locale ne 'C');
- if ($charset eq 'l') {
- if (! defined $locale) {
- skip "No UTF-8 locale", 2;
+ if ($charset eq 'l') {
+ if (! defined $locale) {
+ skip "No UTF-8 locale", 2;
+ }
+ skip "Can't test in miniperl",2
+ if is_miniperl();
+
+ require POSIX;
+ POSIX::setlocale(&LC_CTYPE, $locale);
}
- use POSIX;
- POSIX::setlocale(&LC_CTYPE, $locale);
+ my $pat = $p;
+ utf8::upgrade($pat) if $utf8;
+ my $should_pass =
+ ( $charset eq 'u'
+ || ($charset eq 'd' && $utf8)
+ || ($charset eq 'd' && ( $char =~ /[[:ascii:]]/
+ || ord $char > 255))
+ || ($charset eq 'aa' && $char =~ /[[:ascii:]]/)
+ || ($charset eq 'l' && $locale ne 'C')
+ || ($charset eq 'l' && $char =~ /[[:ascii:]]/)
+ );
+ my $name = "(?i$charset), utf8=$utf8, locale=$locale,"
+ . " char=" . sprintf "%x", ord $char;
+ no warnings 'locale';
+ is (eval " '$s' =~ qr/(?i$charset)$pat/;",
+ $should_pass, $name);
+ fail "$name: $@" if $@;
+ is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;",
+ $should_pass, "extra a, $name");
+ fail "$name: $@" if $@;
}
-
- my $pat = $p;
- utf8::upgrade($pat) if $utf8;
- my $should_pass =
- ( $charset eq 'u'
- || ($charset eq 'd' && $utf8)
- || ($charset eq 'd' && ( $char =~ /[[:ascii:]]/
- || ord $char > 255))
- || ($charset eq 'aa' && $char =~ /[[:ascii:]]/)
- || ($charset eq 'l' && $locale ne 'C')
- || ($charset eq 'l' && $char =~ /[[:ascii:]]/)
- );
- my $name = "(?i$charset), utf8=$utf8, locale=$locale,"
- . " char=" . sprintf "%x", ord $char;
- no warnings 'locale';
- is (eval " '$s' =~ qr/(?i$charset)$pat/;",
- $should_pass, $name);
- fail "$name: $@" if $@;
- is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;",
- $should_pass, "extra a, $name");
- fail "$name: $@" if $@;
}
}
}
}
}
+ SKIP:
{
+ skip "no re debug", 5 if is_miniperl;
my $s = ("0123456789" x 26214) x 2; # Should fill 2 LEXACTS, plus
# small change
my $pattern_prefix = "use utf8; use re qw(Debug COMPILE)";
fail($pattern);
fresh_perl($pattern, { stderr => 0, verbose => 1 });
}
- like($result, qr/Final program[^X]*\bLEXACT_ONLY8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s,
+ like($result, qr/Final program[^X]*\bLEXACT_REQ8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s,
"Check that an LEXACT_ONLY node is generated with a \\x{100}");
- like($s, qr/$s/, "Check that LEXACT_ONLY8 nodes match");
+ like($s, qr/$s/, "Check that LEXACT_REQ8 nodes match");
}
{
like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case");
}
-SKIP: { # make sure we get an error when \p{} cannot load Unicode tables
- skip("Unicode tables always now loaded", 1);
- fresh_perl_like(<<' prog that cannot load uni tables',
- BEGIN {
- @INC = '../lib';
- require utf8; require 'utf8_heavy.pl';
- @INC = ();
- }
- $name = 'A B';
- if ($name =~ /(\p{IsUpper}) (\p{IsUpper})/){
- print "It's good! >$1< >$2<\n";
- } else {
- print "It's not good...\n";
- }
- prog that cannot load uni tables
- qr/^Can't locate unicore\/Heavy\.pl(?x:
- )|^Can't find Unicode property definition/,
- undef,
- '\p{} should not fail silently when uni tables evanesce');
- }
-
{ # Special handling of literal-ended ranges in [...] was breaking this
use utf8;
like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]");
"test that we handle things like m/\\888888888/ without infinite loops" );
}
+ SKIP:
{ # Test that we handle some malformed UTF-8 without looping [perl
# #123562]
-
+ skip "no Encode", 1 if is_miniperl;
my $code='
BEGIN{require q(./test.pl);}
use Encode qw(_utf8_on);
{ # [perl #133871], ASAN/valgrind out-of-bounds access
fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]");
}
+ { # [perl #133871], ASAN/valgrind out-of-bounds access
+ fresh_perl_like('qr/\p{nv:NAnq}/', qr/Can't find Unicode property definition/, {}, "GH #17367");
+ }
+ { # GH #17370, ASAN/valgrind out-of-bounds access
+ fresh_perl_like('qr/\p{nv:qnan}/', qr/Can't find Unicode property definition/, {}, "GH #17370");
+ }
+ { # GH #17371, segfault
+ fresh_perl_like('qr/\p{nv=\\\\\}(?0)|\337ss|\337ss//', qr/Unicode property wildcard not terminated/, {}, "GH #17371");
+ }
+ { # GH #17384, ASAN/valgrind out-of-bounds access
+ fresh_perl_like('"q0" =~ /\p{__::Is0}/', qr/Unknown user-defined property name \\p\{__::Is0}/, {}, "GH #17384");
+ }
+
+ SKIP:
{ # [perl #133921], segfault
+ skip "Not valid for EBCDIC", 5 if $::IS_EBCDIC;
+
fresh_perl_is('qr\ 40||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\x\0ÿÿÿþ\0\0\0ù\Q`\Qx`\0\ 1{0c!}\ 1e;\0\0\0ù\ò`\Qm`\x{0c!}\;\;îçÿ \0\7fç\0\0\0!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1e;\0\0\0ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q\87 \x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \0F\ 5\0n0t0\0c \0\80\ 1d;t \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\10\10\10\10\x{){} \10\10\10\10)|\10\10\ 4i', "", {}, "[perl #133921]");
fresh_perl_is('\ 4|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \x\0ÿÿÿÿ\0\0\0ù\Q`\Qx`\0\ 1{0d ;\0\0\0ù\ò`\Qm`\x{0c!}\;\;îçÿ \0\7fç\0\0\0!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1;\0\0\0ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q\87 \x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \00000000F\ 5\0m0t0\0c \0\80\ 1d;t \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\10\10\10\10\x{){} \10\10\10\10)|\10\10\ 4\ 4i', "", {}, "[perl #133921]");
$quote x 8 . $back x 69,
$quote x 5 . $back x 4,
$ff x 48;
- like(runperl(prog => "$s", stderr => 1), qr/Unmatched \(/);
+ like(fresh_perl("$s", { stderr => 1, }), qr/Unmatched \(/);
}
{ # GitHub #17196, caused assertion failure
"Assertion failure matching /il on single char folding to multi");
}
+ { # Test ANYOFHs
+ my $pat = qr/[\x{4000001}\x{4000003}\x{4000005}]+/;
+ unlike("\x{4000000}", $pat, "4000000 isn't in pattern");
+ like("\x{4000001}", $pat, "4000001 is in pattern");
+ unlike("\x{4000002}", $pat, "4000002 isn't in pattern");
+ like("\x{4000003}", $pat, "4000003 is in pattern");
+ unlike("\x{4000004}", $pat, "4000004 isn't in pattern");
+ like("\x{4000005}", $pat, "4000005 is in pattern");
+ unlike("\x{4000006}", $pat, "4000006 isn't in pattern");
+
+ # gh #17319
+ $pat = qr/[\N{U+200D}\N{U+2000}]()/;
+ unlike("\x{1FFF}", $pat, "1FFF isn't in pattern");
+ like("\x{2000}", $pat, "2000 is in pattern");
+ unlike("\x{2001}", $pat, "2001 isn't in pattern");
+ unlike("\x{200C}", $pat, "200C isn't in pattern");
+ like("\x{200D}", $pat, "200 is in pattern");
+ unlike("\x{200E}", $pat, "200E isn't in pattern");
+ }
+
+ # gh17490: test recursion check
+ {
+ my $eval = '(?{1})';
+ my $re = sprintf '(?&FOO)(?(DEFINE)(?<FOO>%sfoo))', $eval x 20;
+ my $result = eval qq{"foo" =~ /$re/};
+ is($@ // '', '', "many evals did not die");
+ ok($result, "regexp correctly matched");
+ }
+
+ # gh16947: test regexp corruption (GOSUB)
+ {
+ fresh_perl_is(q{
+ 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
+ }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
+ }
+ # gh16947: test fix doesn't break SUSPEND
+ {
+ fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
+ 'ok', {}, "gh16947: test fix doesn't break SUSPEND");
+ }
+
} # End of sub run_tests
1;