BEGIN {
chdir 't' if -d 't';
- @INC = ('../lib','.');
+ @INC = ('../lib','.','../ext/re');
require Config; import Config;
require './test.pl';
+ skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
+ skip_all_without_unicode_tables();
}
-plan tests => 711; # Update this when adding/deleting tests.
+plan tests => 772; # Update this when adding/deleting tests.
run_tests() unless caller;
SKIP: {
skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
- BEGIN {
- if($Config{d_setlocale}) {
- require locale; import locale;
- }
- }
+ use locale;
$locale = qr/\b\v$/;
is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale');
no locale;
SKIP: {
skip 'No locale testing without d_setlocale', 2 if(!$Config{d_setlocale});
- BEGIN {
- if($Config{d_setlocale}) {
- require locale; import locale;
- }
- }
+ use locale;
is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale');
is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale');
}
ok($_ =~ /^abc\Gdef$/, $message);
pos = 3;
ok($_ =~ /c\Gd/, $message);
+ pos = 3;
+ ok($_ =~ /..\GX?def/, $message);
}
{
ok("Perl" =~ /P.*$/i, '#116148');
}
- { # 117327: Sequence (?#...) not recognized in regex
- # The space between the '(' and '?' is now deprecated; this test should
- # be removed when the deprecation is made fatal.
- no warnings;
- like("ab", qr/a( ?#foo)b/x);
- }
-
{ # 118297: Mixing up- and down-graded strings in regex
utf8::upgrade(my $u = "\x{e5}");
utf8::downgrade(my $d = "\x{e5}");
'undefining *^R within (??{}) does not result in a crash';
}
- {
- # [perl #120446]
- # this code should be virtually instantaneous. If it takes 10s of
- # seconds, there a bug in intuit_start.
- # (this test doesn't actually test for slowness - that involves
- # too much danger of false positives on loaded machines - but by
- # putting it here, hopefully someone might notice if it suddenly
- # runs slowly)
- my $s = ('a' x 1_000_000) . 'b';
- my $i = 0;
- for (1..10_000) {
- pos($s) = $_;
- $i++ if $s =~/\Gb/g;
+ SKIP: { # Test literal range end point special handling
+ unless ($::IS_EBCDIC) {
+ skip "Valid only for EBCDIC", 24;
}
- is($i, 0, "RT 120446: mustn't run slowly");
+
+ like("\x89", qr/[i-j]/, '"\x89" should match [i-j]');
+ unlike("\x8A", qr/[i-j]/, '"\x8A" shouldnt match [i-j]');
+ unlike("\x90", qr/[i-j]/, '"\x90" shouldnt match [i-j]');
+ like("\x91", qr/[i-j]/, '"\x91" should match [i-j]');
+
+ like("\x89", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x89" should match [i-\N{LATIN SMALL LETTER J}]');
+ unlike("\x8A", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
+ unlike("\x90", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
+ like("\x91", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x91" should match [i-\N{LATIN SMALL LETTER J}]');
+
+ like("\x89", qr/[i-\N{U+6A}]/, '"\x89" should match [i-\N{U+6A}]');
+ unlike("\x8A", qr/[i-\N{U+6A}]/, '"\x8A" shouldnt match [i-\N{U+6A}]');
+ unlike("\x90", qr/[i-\N{U+6A}]/, '"\x90" shouldnt match [i-\N{U+6A}]');
+ like("\x91", qr/[i-\N{U+6A}]/, '"\x91" should match [i-\N{U+6A}]');
+
+ like("\x89", qr/[\N{U+69}-\N{U+6A}]/, '"\x89" should match [\N{U+69}-\N{U+6A}]');
+ unlike("\x8A", qr/[\N{U+69}-\N{U+6A}]/, '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]');
+ unlike("\x90", qr/[\N{U+69}-\N{U+6A}]/, '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]');
+ like("\x91", qr/[\N{U+69}-\N{U+6A}]/, '"\x91" should match [\N{U+69}-\N{U+6A}]');
+
+ like("\x89", qr/[i-\x{91}]/, '"\x89" should match [i-\x{91}]');
+ like("\x8A", qr/[i-\x{91}]/, '"\x8A" should match [i-\x{91}]');
+ like("\x90", qr/[i-\x{91}]/, '"\x90" should match [i-\x{91}]');
+ like("\x91", qr/[i-\x{91}]/, '"\x91" should match [i-\x{91}]');
+
+ # Need to use eval, because tries to compile on ASCII platforms even
+ # though the tests are skipped, and fails because 0x89-j is an illegal
+ # range there.
+ like("\x89", eval "qr/[\x{89}-j]/", '"\x89" should match [\x{89}-j]');
+ like("\x8A", eval "qr/[\x{89}-j]/", '"\x8A" should match [\x{89}-j]');
+ like("\x90", eval "qr/[\x{89}-j]/", '"\x90" should match [\x{89}-j]');
+ like("\x91", eval "qr/[\x{89}-j]/", '"\x91" should match [\x{89}-j]');
}
# These are based on looking at the code in regcomp.c
qr/\d?c/d
qr/\w?c/l
qr/\s?c/a
- qr/[[:alpha:]]?c/u
+ qr/[[:lower:]]?c/u
)) {
SKIP: {
skip "no re-debug under miniperl" if is_miniperl;
}
}
+ {
+ like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works";
+ }
+
+ {
+ # Verify that the very last Latin-1 U+00FF
+ # (LATIN SMALL LETTER Y WITH DIAERESIS)
+ # and its UPPER counterpart (U+0178 which is pure Unicode),
+ # and likewise for the very first pure Unicode
+ # (LATIN CAPITAL LETTER A WITH MACRON) fold-match properly,
+ # and there are no off-by-one logic errors in the transition zone.
+
+ ok("\xFF" =~ /\xFF/i, "Y WITH DIAERESIS l =~ l");
+ ok("\xFF" =~ /\x{178}/i, "Y WITH DIAERESIS l =~ u");
+ ok("\x{178}" =~ /\xFF/i, "Y WITH DIAERESIS u =~ l");
+ ok("\x{178}" =~ /\x{178}/i, "Y WITH DIAERESIS u =~ u");
+
+ # U+00FF with U+05D0 (non-casing Hebrew letter).
+ ok("\xFF\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS l =~ l");
+ ok("\xFF\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS l =~ u");
+ ok("\x{178}\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS u =~ l");
+ ok("\x{178}\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS u =~ u");
+
+ # U+0100.
+ ok("\x{100}" =~ /\x{100}/i, "A WITH MACRON u =~ u");
+ ok("\x{100}" =~ /\x{101}/i, "A WITH MACRON u =~ l");
+ ok("\x{101}" =~ /\x{100}/i, "A WITH MACRON l =~ u");
+ ok("\x{101}" =~ /\x{101}/i, "A WITH MACRON l =~ l");
+ }
+
+ {
+ use utf8;
+ ok("abc" =~ /a\85b\85c/x, "NEL is white-space under /x");
+ }
+
+ {
+ ok('a(b)c' =~ qr(a\(b\)c), "'\\(' is a literal in qr(...)");
+ ok('a[b]c' =~ qr[a\[b\]c], "'\\[' is a literal in qr[...]");
+ ok('a{3}c' =~ qr{a\{3\}c}, # Only failed when { could be a meta
+ "'\\{' is a literal in qr{...}, where it could be a quantifier");
+ # This one is for completeness
+ ok('a<b>c' =~ qr<a\<b\>c>, "'\\<' is a literal in qr<...>)");
+ }
+
+ { # Was getting optimized into EXACT (non-folding node)
+ my $x = qr/[x]/i;
+ utf8::upgrade($x);
+ like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case");
+ }
+
+ { # make sure we get an error when \p{} cannot load Unicode tables
+ fresh_perl_like(<<' prog that cannot load uni tables',
+ BEGIN {
+ @INC = '../lib';
+ require utf8; require 'utf8_heavy.pl';
+ @INC = ();
+ }
+ $name = 'A B';
+ if ($name =~ /(\p{IsUpper}) (\p{IsUpper})/){
+ print "It's good! >$1< >$2<\n";
+ } else {
+ print "It's not good...\n";
+ }
+ prog that cannot load uni tables
+ qr/^Can't locate unicore\/Heavy\.pl(?x:
+ )|^Can't find Unicode property definition/,
+ undef,
+ '\p{} should not fail silently when uni tables evanesce');
+ }
+
+ { # Special handling of literal-ended ranges in [...] was breaking this
+ use utf8;
+ like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]");
+ }
+
+ { # [perl #123539]
+ like("TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, "");
+ like("TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, "");
+ }
+
+ { # [perl #123604]
+ my($s, $x, @x) = ('abc', 'a', 'd');
+ my $long = 'b' x 2000;
+ my $eval = q{$s =~ m{$x[bbb]c} ? 1 : 0};
+ $eval =~ s{bbb}{$long};
+ my $match = eval $eval;
+ ok(1, "did not crash");
+ ok($match, "[bbb...] resolved as character class, not subscript");
+ }
+
+ { # [perl #123755]
+ for my $pat ('(??', '(?P', '(?i-') {
+ eval qq{ qr/$pat/ };
+ ok(1, "qr/$pat/ did not crash");
+ eval qq{ qr/${pat}\x{123}/ };
+ my $e = $@;
+ like($e, qr{\x{123}},
+ "qr/${pat}x/ shows x in error even if it's a wide character");
+ }
+ }
+
+ {
+ # Expect one of these sizes to cause overflow and wrap to negative
+ for my $bits (32, 64) {
+ my $wrapneg = 2 ** ($bits - 2) * 3;
+ for my $sign ('', '-') {
+ my $pat = sprintf "qr/(?%s%u)/", $sign, $wrapneg;
+ eval $pat;
+ ok(1, "big backref $pat did not crash");
+ }
+ }
+ }
+ {
+ # Test that we handle qr/\8888888/ and variants without an infinite loop,
+ # we use a test within a test so we can todo it, and make sure we don't
+ # infinite loop our tests.
+ # NOTE - Do not put quotes in the code!
+ # NOTE - We have to triple escape the backref in the pattern below.
+ my $code='
+ BEGIN{require q(test.pl);}
+ watchdog(3);
+ for my $len (1 .. 20) {
+ my $eights= q(8) x $len;
+ eval qq{ qr/\\\\$eights/ };
+ }
+ print q(No infinite loop here!);
+ ';
+ fresh_perl_is($code, "No infinite loop here!", {},
+ "test that we handle things like m/\\888888888/ without infinite loops" );
+ }
+
+ {
+ # [perl #123843] hits SEGV trying to compile this pattern
+ my $match;
+ eval q{ ($match) = ("xxyxxyxy" =~ m{(x+(y(?1))*)}) };
+ ok(1, "compiled GOSUB in CURLYM ok");
+ is($match, 'xxyxxyx', "matched GOSUB in CURLYM");
+ }
} # End of sub run_tests
1;