X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/cb55de95c99e46505d72b19ea780c7d46917be32..229196e5c9b94d8a3473e04c9e2907f473bca025:/t/op/pat.t diff --git a/t/op/pat.t b/t/op/pat.t index 6b038a5..9318070 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4,16 +4,16 @@ # the format supported by op/regexp.t. If you want to add a test # that does fit that format, add it to op/re_tests, not here. -print "1..581\n"; +$| = 1; + +print "1..849\n"; BEGIN { chdir 't' if -d 't'; @INC = '../lib'; } -eval 'use Config'; # Defaults assumed if this fails -# XXX known to leak scalars -$ENV{PERL_DESTRUCT_LEVEL} = 0 unless $ENV{PERL_DESTRUCT_LEVEL} > 3; +eval 'use Config'; # Defaults assumed if this fails $x = "abc\ndef\n"; @@ -1129,6 +1129,8 @@ print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/; print "ok $test\n"; $test++; +my $ordA = ord('A'); + $_ = "a\x{100}b"; if (/(.)(\C)(\C)(.)/) { print "ok 232\n"; @@ -1137,15 +1139,32 @@ if (/(.)(\C)(\C)(.)/) { } else { print "not ok 233\n"; } - if ($2 eq "\xC4") { - print "ok 234\n"; - } else { - print "not ok 234\n"; - } - if ($3 eq "\x80") { - print "ok 235\n"; + if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8 + if ($2 eq "\xC4") { + print "ok 234\n"; + } else { + print "not ok 234\n"; + } + if ($3 eq "\x80") { + print "ok 235\n"; + } else { + print "not ok 235\n"; + } + } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC + if ($2 eq "\x8C") { + print "ok 234\n"; + } else { + print "not ok 234\n"; + } + if ($3 eq "\x41") { + print "ok 235\n"; + } else { + print "not ok 235\n"; + } } else { - print "not ok 235\n"; + for (234..235) { + print "not ok $_ # ord('A') == $ordA\n"; + } } if ($4 eq "b") { print "ok 236\n"; @@ -1161,10 +1180,20 @@ $_ = "\x{100}"; if (/(\C)/g) { print "ok 237\n"; # currently \C are still tagged as UTF-8 - if ($1 eq "\xC4") { - print "ok 238\n"; + if ($ordA == 65) { + if ($1 eq "\xC4") { + print "ok 238\n"; + } else { + print "not ok 238\n"; + } + } elsif ($ordA == 193) { + if ($1 eq "\x8C") { + print "ok 238\n"; + } else { + print "not ok 238\n"; + } } else { - print "not ok 238\n"; + print "not ok 238 # ord('A') == $ordA\n"; } } else { for (237..238) { @@ -1174,10 +1203,20 @@ if (/(\C)/g) { if (/(\C)/g) { print "ok 239\n"; # currently \C are still tagged as UTF-8 - if ($1 eq "\x80") { - print "ok 240\n"; + if ($ordA == 65) { + if ($1 eq "\x80") { + print "ok 240\n"; + } else { + print "not ok 240\n"; + } + } elsif ($ordA == 193) { + if ($1 eq "\x41") { + print "ok 240\n"; + } else { + print "not ok 240\n"; + } } else { - print "not ok 240\n"; + print "not ok 240 # ord('A') == $ordA\n"; } } else { for (239..240) { @@ -1248,7 +1287,7 @@ print "ok 247\n"; "#latin[$latin]\nnot ok $test\n"; $test++; $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a - use utf8; + use utf8; # needed for the raw UTF-8 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a } } @@ -1301,6 +1340,7 @@ print "ok 247\n"; { # the second half of 20001028.003 + my $X = ''; $X =~ s/^/chr(1488)/e; print "not " unless length $X == 1 && ord($X) == 1488; print "ok 260\n"; @@ -1352,10 +1392,11 @@ print "ok 247\n"; "\0" => 'Cc', ); - for my $char (keys %s) { + for my $char (map { s/^\S+ //; $_ } + sort map { sprintf("%06x", ord($_))." $_" } keys %s) { my $class = $s{$char}; - my $code = sprintf("%04x", ord($char)); - printf "# 0x$code\n"; + my $code = sprintf("%06x", ord($char)); + printf "#\n# 0x$code\n#\n"; print "# IsAlpha\n"; if ($class =~ /^[LM]/) { print "not " unless $char =~ /\p{IsAlpha}/; @@ -1381,7 +1422,7 @@ print "ok 247\n"; print "ok $test\n"; $test++; } print "# IsASCII\n"; - if ($code <= 127) { + if ($code le '00007f') { print "not " unless $char =~ /\p{IsASCII}/; print "ok $test\n"; $test++; print "not " if $char =~ /\P{IsASCII}/; @@ -1546,3 +1587,1075 @@ print "ok 247\n"; print "ok 581\n"; } + +{ + $test = 582; + + # bugid 20010410.006 + for my $rx ( + '/(.*?)\{(.*?)\}/csg', + '/(.*?)\{(.*?)\}/cg', + '/(.*?)\{(.*?)\}/sg', + '/(.*?)\{(.*?)\}/g', + '/(.+?)\{(.+?)\}/csg', + ) + { + my($input, $i); + + $i = 0; + $input = "a{b}c{d}"; + eval <" =~ /<\x{100}\s>/ ? "ok 841\n" : "not ok 841\n"; + print "<\x{2028}>" =~ /<\s>/ ? "ok 842\n" : "not ok 842\n"; + print "<\x{2029}>" =~ /<\s>/ ? "ok 843\n" : "not ok 843\n"; +} + +{ + print "# . with /s should work on characters, as opposed to bytes\n"; + + my $s = "\x{e4}\x{100}"; + + # This is not expected to match: the point is that + # neither should we get "Malformed UTF-8" warnings. + print $s =~ /\G(.+?)\n/gcs ? + "not ok 844\n" : "ok 844\n"; + + my @c; + + while ($s =~ /\G(.)/gs) { + push @c, $1; + } + + print join("", @c) eq $s ? "ok 845\n" : "not ok 845\n"; + + my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256 + my $r1 = ""; + while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r1 .= $1 . $2; + } + + my $t2 = $t1 . "\x{100}"; # repeat with a larger char + my $r2 = ""; + while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r2 .= $1 . $2; + } + $r2 =~ s/\x{100}//; + print $r1 eq $r2 ? "ok 846\n" : "not ok 846\n"; +} + +{ + print "# Unicode lookbehind\n"; + + print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 847\n" : "not ok 847\n"; + print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 848\n" : "not ok 848\n"; +}