X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/92e82afa16f5f1aa1b3e163f6d4656d14c44a4d2..b6e093f25daabf3a19790f257fae436db4adf515:/t/re/pat.t diff --git a/t/re/pat.t b/t/re/pat.t index 4ef9663..770a45a 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -2,9 +2,7 @@ # # This is a home for regular expression tests that don't fit into # the format supported by re/regexp.t. If you want to add a test -# that does fit that format, add it to re/re_tests, not here. Tests for \N -# should be added here because they are treated as single quoted strings -# there, which means they avoid the lexer which otherwise would look at them. +# that does fit that format, add it to re/re_tests, not here. use strict; use warnings; @@ -17,11 +15,12 @@ $| = 1; BEGIN { chdir 't' if -d 't'; - @INC = ('../lib','.'); + @INC = ('../lib','.','../ext/re'); + require Config; import Config; require './test.pl'; } -plan tests => 451; # Update this when adding/deleting tests. +plan tests => 738; # Update this when adding/deleting tests. run_tests() unless caller; @@ -154,7 +153,7 @@ sub run_tests { { $_ = 'now is the {time for all} good men to come to.'; - / {([^}]*)}/; + / \{([^}]*)}/; is($1, 'time for all', "Match braces"); } @@ -518,24 +517,44 @@ sub run_tests { is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'); my $dual = qr/\b\v$/; - use locale; - my $locale = qr/\b\v$/; - is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); - no locale; + my $locale; + + SKIP: { + skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale}); + + use locale; + $locale = qr/\b\v$/; + is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); + no locale; + } use feature 'unicode_strings'; my $unicode = qr/\b\v$/; is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'); is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); - is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); + + SKIP: { + skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale}); + + is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); + } no feature 'unicode_strings'; - is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); + SKIP: { + skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale}); + + is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); + } + is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'); - use locale; - is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); - is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); + SKIP: { + skip 'No locale testing without d_setlocale', 2 if(!$Config{d_setlocale}); + + use locale; + is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); + is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); + } } { @@ -676,12 +695,21 @@ sub run_tests { /.(a)(ba*)?/; is($#+, 2, $message); is($#-, 1, $message); + + # Check that values don’t stick + " "=~/()()()(.)(..)/; + my($m,$p) = (\$-[5], \$+[5]); + () = "$$_" for $m, $p; # FETCH (or eqv.) + " " =~ /()/; + is $$m, undef, 'values do not stick to @- elements'; + is $$p, undef, 'values do not stick to @+ elements'; } - foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', '@- = qw (foo bar)') { + foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', + '@- = qw (foo bar)', '$^N = 42') { is(eval $_, undef); like($@, qr/^Modification of a read-only value attempted/, - 'Elements of @- and @+ are read-only'); + '$^N, @- and @+ are read-only'); } { @@ -698,12 +726,40 @@ sub run_tests { like($str, qr/^..\G/, $message); unlike($str, qr/^...\G/, $message); ok($str =~ /\G../ && $& eq 'cd', $message); - - local $::TODO = $::running_as_thread; ok($str =~ /.\G./ && $& eq 'bc', $message); + + } + + { + my $message = '\G and intuit and anchoring'; + $_ = "abcdef"; + pos = 0; + ok($_ =~ /\Gabc/, $message); + ok($_ =~ /^\Gabc/, $message); + + pos = 3; + ok($_ =~ /\Gdef/, $message); + pos = 3; + ok($_ =~ /\Gdef$/, $message); + pos = 3; + ok($_ =~ /abc\Gdef$/, $message); + pos = 3; + ok($_ =~ /^abc\Gdef$/, $message); + pos = 3; + ok($_ =~ /c\Gd/, $message); + pos = 3; + ok($_ =~ /..\GX?def/, $message); } { + my $s = '123'; + pos($s) = 1; + my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1 + is("@a", "1", '\G looping'); + } + + + { my $message = 'pos inside (?{ })'; my $str = 'abcde'; our ($foo, $bar); @@ -771,22 +827,19 @@ sub run_tests { my $message = '\G anchor checks'; my $foo = 'aabbccddeeffgg'; pos ($foo) = 1; - { - local $::TODO = $::running_as_thread; - no warnings 'uninitialized'; - ok($foo =~ /.\G(..)/g, $message); - is($1, 'ab', $message); - pos ($foo) += 1; - ok($foo =~ /.\G(..)/g, $message); - is($1, 'cc', $message); + ok($foo =~ /.\G(..)/g, $message); + is($1, 'ab', $message); - pos ($foo) += 1; - ok($foo =~ /.\G(..)/g, $message); - is($1, 'de', $message); + pos ($foo) += 1; + ok($foo =~ /.\G(..)/g, $message); + is($1, 'cc', $message); - ok($foo =~ /\Gef/g, $message); - } + pos ($foo) += 1; + ok($foo =~ /.\G(..)/g, $message); + is($1, 'de', $message); + + ok($foo =~ /\Gef/g, $message); undef pos $foo; ok($foo =~ /\G(..)/g, $message); @@ -801,6 +854,36 @@ sub run_tests { } { + my $message = 'basic \G floating checks'; + my $foo = 'aabbccddeeffgg'; + pos ($foo) = 1; + + ok($foo =~ /a+\G(..)/g, "$message: a+\\G"); + is($1, 'ab', "$message: ab"); + + pos ($foo) += 1; + ok($foo =~ /b+\G(..)/g, "$message: b+\\G"); + is($1, 'cc', "$message: cc"); + + pos ($foo) += 1; + ok($foo =~ /d+\G(..)/g, "$message: d+\\G"); + is($1, 'de', "$message: de"); + + ok($foo =~ /\Gef/g, "$message: \\Gef"); + + pos ($foo) = 1; + + ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)"); + is($1, 'aa', "$message: aa"); + + pos ($foo) = 2; + + ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)"); + is($1, 'ab', "$message: ab"); + + } + + { $_ = '123x123'; my @res = /(\d*|x)/g; local $" = '|'; @@ -986,7 +1069,7 @@ sub run_tests { my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; - is("@space0", "cr ff lf spc tab", $message); + is("@space0", "cr ff lf spc tab vt", $message); is("@space1", "cr ff lf spc tab vt", $message); is("@space2", "spc tab", $message); } @@ -1070,51 +1153,6 @@ sub run_tests { } { - # Test that a regex followed by an operator and/or a statement modifier work - # These tests use string-eval so that it reports a clean error when it fails - # (without the string eval the test script might be unparseable) - - # Note: these test check the behaviour that currently is valid syntax - # If a new regex modifier is added and a test fails then there is a backwards-compatibility issue - # Note-2: a new deprecate warning was added for this with commit e6897b1a5db0410e387ccbf677e89fc4a1d8c97a - # which indicate that this syntax will be removed in 5.16. - # When this happens the tests can be removed - - foreach (['my $r = "a" =~ m/a/lt 2', 'm', 'lt'], - ['my $r = "a" =~ m/a/le 1', 'm', 'le'], - ['my $r = "a" =~ m/a/eq 1', 'm', 'eq'], - ['my $r = "a" =~ m/a/ne 0', 'm', 'ne'], - ['my $r = "a" =~ m/a/and 1', 'm', 'and'], - ['my $r = "a" =~ m/a/unless 0', 'm', 'unless'], - ['my $c = 1; my $r; $r = "a" =~ m/a/while $c--', 'm', 'while'], - ['my $c = 0; my $r; $r = "a" =~ m/a/until $c++', 'm', 'until'], - ['my $r; $r = "a" =~ m/a/for 1', 'm', 'for'], - ['my $r; $r = "a" =~ m/a/foreach 1', 'm', 'foreach'], - - ['my $t = "a"; my $r = $t =~ s/a//lt 2', 's', 'lt'], - ['my $t = "a"; my $r = $t =~ s/a//le 1', 's', 'le'], - ['my $t = "a"; my $r = $t =~ s/a//ne 0', 's', 'ne'], - ['my $t = "a"; my $r = $t =~ s/a//and 1', 's', 'and'], - ['my $t = "a"; my $r = $t =~ s/a//unless 0', 's', 'unless'], - - ['my $c = 1; my $r; my $t = "a"; $r = $t =~ s/a//while $c--', 's', 'while'], - ['my $c = 0; my $r; my $t = "a"; $r = $t =~ s/a//until $c++', 's', 'until'], - ['my $r; my $t = "a"; $r = $t =~ s/a//for 1', 's', 'for'], - ['my $r; my $t = "a"; $r = $t =~ s/a//for 1', 's', 'foreach'], - ) { - my $message = sprintf 'regex (%s) followed by $_->[2]', - $_->[1] eq 'm' ? 'm//' : 's///'; - my $code = "$_->[0]; 'eval_ok ' . \$r"; - my $result = do { - no warnings 'syntax'; - eval $code; - }; - is($@, '', $message); - is($result, 'eval_ok 1', $message); - } - } - - { my $str= "\x{100}"; chop $str; my $qr= qr/$str/; @@ -1167,6 +1205,413 @@ sub run_tests { is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); } + { + # Suppress warnings, as the non-unicode one comes out even if turn off + # warnings here (because the execution is done in another scope). + local $SIG{__WARN__} = sub {}; + my $str = "\x{110000}"; + + unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}"); + like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}"); + like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}"); + unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}"); + } + + { + # Test that IDstart works, but because the author (khw) knows + # regexes much better than the rest of the core, it is being done here + # in the context of a regex which relies on buffer names beginng with + # IDStarts. + use utf8; + my $str = "abc"; + like($str, qr/(?abc)/, "'a' is legal IDStart"); + like($str, qr/(?<_>abc)/, "'_' is legal IDStart"); + like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart"); + like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart"); + + # This test works on Unicode 6.0 in which U+2118 and U+212E are legal + # IDStarts there, but are not Word characters, and therefore Perl + # doesn't allow them to be IDStarts. But there is no guarantee that + # Unicode won't change things around in the future so that at some + # future Unicode revision these tests would need to be revised. + foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) { + my $prog = <<"EOP"; +use utf8;; +"abc" =~ qr/(?<$char>abc)/; +EOP + utf8::encode($prog); + fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {}, + sprintf("'U+%04X not legal IDFirst'", ord($char))); + } + } + + { # [perl #101710] + my $pat = "b"; + utf8::upgrade($pat); + like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string"); + } + + { # Crash with @a =~ // warning + local $SIG{__WARN__} = sub { + pass 'no crash for @a =~ // warning' + }; + eval ' sub { my @a =~ // } '; + } + + { # Concat overloading and qr// thingies + my @refs; + my $qr = qr//; + package Cat { + require overload; + overload->import( + '""' => sub { ${$_[0]} }, + '.' => sub { + push @refs, ref $_[1] if ref $_[1]; + bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]" + } + ); + } + my $s = "foo"; + my $o = bless \$s, Cat::; + /$o$qr/; + is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth'; + } + + { + my $count=0; + my $str="\n"; + $count++ while $str=~/.*/g; + is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g'; + my $class_count= 0; + $class_count++ while $str=~/[^\n]*/g; + is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same'; + my $anch_count= 0; + $anch_count++ while $str=~/^.*/mg; + is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once'; + } + + { # [perl #111174] + use re '/u'; + like "\xe0", qr/(?i:\xc0)/, "(?i: shouldn't lose the passed in /u"; + use re '/a'; + unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a"; + use re '/aa'; + unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; + } + + { + # the test for whether the pattern should be re-compiled should + # consider the UTF8ness of the previous and current pattern + # string, as well as the physical bytes of the pattern string + + for my $s ("\xc4\x80", "\x{100}") { + ok($s =~ /^$s$/, "re-compile check is UTF8-aware"); + } + } + + # #113682 more overloading and qr// + # when doing /foo$overloaded/, if $overloaded returns + # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval' + # shouldn't be required. Via '.', it still is. + { + package Qr0; + use overload 'qr' => sub { qr/(??{50})/ }; + + package Qr1; + use overload '""' => sub { qr/(??{51})/ }; + + package Qr2; + use overload '.' => sub { $_[1] . qr/(??{52})/ }; + + package Qr3; + use overload '""' => sub { qr/(??{7})/ }, + '.' => sub { $_[1] . qr/(??{53})/ }; + + package Qr_indirect; + use overload '""' => sub { $_[0][0] }; + + package main; + + for my $i (0..3) { + my $o = bless [], "Qr$i"; + if ((0,0,1,1)[$i]) { + eval { "A5$i" =~ /^A$o$/ }; + like($@, qr/Eval-group not allowed/, "Qr$i"); + eval { "5$i" =~ /$o/ }; + like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), + "Qr$i bare"); + { + use re 'eval'; + ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval"); + eval { "5$i" =~ /$o/ }; + like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), + "Qr$i bare - with use re eval"); + } + } + else { + ok("A5$i" =~ /^A$o$/, "Qr$i"); + ok("5$i" =~ /$o/, "Qr$i bare"); + } + } + + my $o = bless [ bless [], "Qr1" ], 'Qr_indirect'; + ok("A51" =~ /^A$o/, "Qr_indirect"); + ok("51" =~ /$o/, "Qr_indirect bare"); + } + + { # Various flags weren't being set when a [] is optimized into an + # EXACTish node + ; + ; + ok("\x{017F}\x{017F}" =~ qr/^[\x{00DF}]?$/i, "[] to EXACTish optimization"); + } + + { + for my $char (":", "\x{f7}", "\x{2010}") { + my $utf8_char = $char; + utf8::upgrade($utf8_char); + my $display = $char; + $display = display($display); + my $utf8_display = "utf8::upgrade(\"$display\")"; + + like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/"); + like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/"); + like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/"); + like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/"); + } + } + + { + # #116148: Pattern utf8ness sticks around globally + # the utf8 in the first match was sticking around for the second + # match + + use feature 'unicode_strings'; + + my $x = "\x{263a}"; + $x =~ /$x/; + + my $text = "Perl"; + ok("Perl" =~ /P.*$/i, '#116148'); + } + + { # 118297: Mixing up- and down-graded strings in regex + utf8::upgrade(my $u = "\x{e5}"); + utf8::downgrade(my $d = "\x{e5}"); + my $warned; + local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ }; + my $re = qr/$u$d/; + ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern"); + my $c = "\x{e5}\x{e5}"; + utf8::downgrade($c); + like($c, $re, "mixed up-/downgraded pattern matches downgraded string"); + utf8::upgrade($c); + like($c, $re, "mixed up-/downgraded pattern matches upgraded string"); + } + + { + # if we have 87 capture buffers defined then \87 should refer to the 87th. + # test that this is true for 1..100 + # Note that this test causes the engine to recurse at runtime, and + # hence use a lot of C stack. + for my $i (1..100) { + my $capture= "a"; + $capture= "($capture)" for 1 .. $i; + for my $mid ("","b") { + my $str= "a${mid}a"; + my $backref= "\\$i"; + eval { + ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/"); + 1; + } or do { + is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/"); + }; + } + } + } + + # this mixture of readonly (not COWable) and COWable strings + # messed up the capture buffers under COW. The actual test results + # are incidental; the issue is was an AddressSanitizer failure + { + my $c ='AB'; + my $res = ''; + for ($c, 'C', $c, 'DE') { + ok(/(.)/, "COWable match"); + $res .= $1; + } + is($res, "ACAD"); + } + + + { + # RT #45667 + # /[#$x]/x didn't interpolate the var $x. + my $b = 'cd'; + my $s = 'abcd$%#&'; + $s =~ s/[a#$b%]/X/g; + is ($s, 'XbXX$XX&', 'RT #45667 without /x'); + $s = 'abcd$%#&'; + $s =~ s/[a#$b%]/X/gx; + is ($s, 'XbXX$XX&', 'RT #45667 with /x'); + } + + { + no warnings "uninitialized"; + my @a; + $a[1]++; + /@a/; + pass('no crash with /@a/ when array has nonexistent elems'); + } + + { + is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'), + "ok\n", + 'deleting *^R does not result in crashes'; + no warnings 'once'; + *^R = *caretRglobwithnoscalar; + "" =~ /(?{42})/; + is $^R, 42, 'assigning to *^R does not result in a crash'; + is runperl( + stderr => 1, + prog => 'eval q|' + .' q-..- =~ /(??{undef *^R;q--})(?{42})/; ' + .' print qq-$^R\n-' + .'|' + ), + "42\n", + 'undefining *^R within (??{}) does not result in a crash'; + } + + { + # [perl #120446] + # this code should be virtually instantaneous. If it takes 10s of + # seconds, there a bug in intuit_start. + # (this test doesn't actually test for slowness - that involves + # too much danger of false positives on loaded machines - but by + # putting it here, hopefully someone might notice if it suddenly + # runs slowly) + my $s = ('a' x 1_000_000) . 'b'; + my $i = 0; + for (1..10_000) { + pos($s) = $_; + $i++ if $s =~/\Gb/g; + } + is($i, 0, "RT 120446: mustn't run slowly"); + } + + { + # [perl #120692] + # these tests should be virtually instantaneous. If they take 10s of + # seconds, there's a bug in intuit_start. + + my $s = 'ab' x 1_000_000; + utf8::upgrade($s); + 1 while $s =~ m/\Ga+ba+b/g; + pass("RT#120692 \\G mustn't run slowly"); + + $s=~ /^a{1,2}x/ for 1..10_000; + pass("RT#120692 a{1,2} mustn't run slowly"); + + $s=~ /ab.{1,2}x/; + pass("RT#120692 ab.{1,2} mustn't run slowly"); + + $s = "-a-bc" x 250_000; + $s .= "1a1bc"; + utf8::upgrade($s); + ok($s =~ /\da\d{0,30000}bc/, "\\d{30000}"); + + $s = "-ab\n" x 250_000; + $s .= "abx"; + ok($s =~ /^ab.*x/m, "distant float with /m"); + + my $r = qr/^abcd/; + $s = "abcd-xyz\n" x 500_000; + $s =~ /$r\d{1,2}xyz/m for 1..200; + pass("BOL within //m mustn't run slowly"); + + $s = "abcdefg" x 1_000_000; + $s =~ /(?-m:^)abcX?fg/m for 1..100; + pass("BOL within //m mustn't skip absolute anchored check"); + + $s = "abcdefg" x 1_000_000; + $s =~ /^XX\d{1,10}cde/ for 1..100; + pass("abs anchored float string should fail quickly"); + + } + + # These are based on looking at the code in regcomp.c + # We don't look for specific code, just the existence of an SSC + foreach my $re (qw( qr/a?c/ + qr/a?c/i + qr/[ab]?c/ + qr/\R?c/ + qr/\d?c/d + qr/\w?c/l + qr/\s?c/a + qr/[[:alpha:]]?c/u + )) { + SKIP: { + skip "no re-debug under miniperl" if is_miniperl; + my $prog = <<"EOP"; +use re qw(Debug COMPILE); +$re; +EOP + fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class"); + } + } + + { + like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works"; + } + + { + # Verify that the very last Latin-1 U+00FF + # (LATIN SMALL LETTER Y WITH DIAERESIS) + # and its UPPER counterpart (U+0178 which is pure Unicode), + # and likewise for the very first pure Unicode + # (LATIN CAPITAL LETTER A WITH MACRON) fold-match properly, + # and there are no off-by-one logic errors in the transition zone. + + ok("\xFF" =~ /\xFF/i, "Y WITH DIAERESIS l =~ l"); + ok("\xFF" =~ /\x{178}/i, "Y WITH DIAERESIS l =~ u"); + ok("\x{178}" =~ /\xFF/i, "Y WITH DIAERESIS u =~ l"); + ok("\x{178}" =~ /\x{178}/i, "Y WITH DIAERESIS u =~ u"); + + # U+00FF with U+05D0 (non-casing Hebrew letter). + ok("\xFF\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS l =~ l"); + ok("\xFF\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS l =~ u"); + ok("\x{178}\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS u =~ l"); + ok("\x{178}\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS u =~ u"); + + # U+0100. + ok("\x{100}" =~ /\x{100}/i, "A WITH MACRON u =~ u"); + ok("\x{100}" =~ /\x{101}/i, "A WITH MACRON u =~ l"); + ok("\x{101}" =~ /\x{100}/i, "A WITH MACRON l =~ u"); + ok("\x{101}" =~ /\x{101}/i, "A WITH MACRON l =~ l"); + } + + { + use utf8; + ok("abc" =~ /a…b…c/x, "NEL is white-space under /x"); + } + + { + ok('a(b)c' =~ qr(a\(b\)c), "'\\(' is a literal in qr(...)"); + ok('a[b]c' =~ qr[a\[b\]c], "'\\[' is a literal in qr[...]"); + ok('a{3}c' =~ qr{a\{3\}c}, # Only failed when { could be a meta + "'\\{' is a literal in qr{...}, where it could be a quantifier"); + + # This one is for completeness + ok('ac' =~ qrc>, "'\\<' is a literal in qr<...>)"); + } + + { # Was getting optimized into EXACT (non-folding node) + my $x = qr/[x]/i; + utf8::upgrade($x); + like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case"); + } + } # End of sub run_tests 1;