X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/60425c380d8403607be85706bb016ae8d577acf4..248175870b24dd04bea0abf89f6b13929587b79b:/t/op/pat.t diff --git a/t/op/pat.t b/t/op/pat.t index 8575ca8..d112bcc 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4,16 +4,16 @@ # the format supported by op/regexp.t. If you want to add a test # that does fit that format, add it to op/re_tests, not here. -print "1..581\n"; +$| = 1; + +print "1..854\n"; BEGIN { chdir 't' if -d 't'; @INC = '../lib'; } -eval 'use Config'; # Defaults assumed if this fails -# XXX known to leak scalars -$ENV{PERL_DESTRUCT_LEVEL} = 0 unless $ENV{PERL_DESTRUCT_LEVEL} > 3; +eval 'use Config'; # Defaults assumed if this fails $x = "abc\ndef\n"; @@ -73,24 +73,23 @@ $* = 1; # test 3 only tested the optimized version--this one is for real if ("ab\ncd\n" =~ /^cd/) {print "ok 24\n";} else {print "not ok 24\n";} $* = 0; -#$XXX{123} = 123; -#$XXX{234} = 234; -#$XXX{345} = 345; -# -#@XXX = ('ok 25','not ok 25', 'ok 26','not ok 26','not ok 27'); -#while ($_ = shift(@XXX)) { -# ?(.*)? && (print $1,"\n"); -# /not/ && reset; -# /not ok 26/ && reset 'X'; -#} -# -#while (($key,$val) = each(%XXX)) { -# print "not ok 27\n"; -# exit; -#} -# -#print "ok 27\n"; -for (25..27) { print "ok $_\n" } +$XXX{123} = 123; +$XXX{234} = 234; +$XXX{345} = 345; + +@XXX = ('ok 25','not ok 25', 'ok 26','not ok 26','not ok 27'); +while ($_ = shift(@XXX)) { + ?(.*)? && (print $1,"\n"); + /not/ && reset; + /not ok 26/ && reset 'X'; +} + +while (($key,$val) = each(%XXX)) { + print "not ok 27\n"; + exit; +} + +print "ok 27\n"; 'cde' =~ /[^ab]*/; 'xyz' =~ //; @@ -1130,6 +1129,8 @@ print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/; print "ok $test\n"; $test++; +my $ordA = ord('A'); + $_ = "a\x{100}b"; if (/(.)(\C)(\C)(.)/) { print "ok 232\n"; @@ -1138,15 +1139,32 @@ if (/(.)(\C)(\C)(.)/) { } else { print "not ok 233\n"; } - if ($2 eq "\xC4") { - print "ok 234\n"; - } else { - print "not ok 234\n"; - } - if ($3 eq "\x80") { - print "ok 235\n"; + if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8 + if ($2 eq "\xC4") { + print "ok 234\n"; + } else { + print "not ok 234\n"; + } + if ($3 eq "\x80") { + print "ok 235\n"; + } else { + print "not ok 235\n"; + } + } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC + if ($2 eq "\x8C") { + print "ok 234\n"; + } else { + print "not ok 234\n"; + } + if ($3 eq "\x41") { + print "ok 235\n"; + } else { + print "not ok 235\n"; + } } else { - print "not ok 235\n"; + for (234..235) { + print "not ok $_ # ord('A') == $ordA\n"; + } } if ($4 eq "b") { print "ok 236\n"; @@ -1162,10 +1180,20 @@ $_ = "\x{100}"; if (/(\C)/g) { print "ok 237\n"; # currently \C are still tagged as UTF-8 - if ($1 eq "\xC4") { - print "ok 238\n"; + if ($ordA == 65) { + if ($1 eq "\xC4") { + print "ok 238\n"; + } else { + print "not ok 238\n"; + } + } elsif ($ordA == 193) { + if ($1 eq "\x8C") { + print "ok 238\n"; + } else { + print "not ok 238\n"; + } } else { - print "not ok 238\n"; + print "not ok 238 # ord('A') == $ordA\n"; } } else { for (237..238) { @@ -1175,10 +1203,20 @@ if (/(\C)/g) { if (/(\C)/g) { print "ok 239\n"; # currently \C are still tagged as UTF-8 - if ($1 eq "\x80") { - print "ok 240\n"; + if ($ordA == 65) { + if ($1 eq "\x80") { + print "ok 240\n"; + } else { + print "not ok 240\n"; + } + } elsif ($ordA == 193) { + if ($1 eq "\x41") { + print "ok 240\n"; + } else { + print "not ok 240\n"; + } } else { - print "not ok 240\n"; + print "not ok 240 # ord('A') == $ordA\n"; } } else { for (239..240) { @@ -1249,7 +1287,7 @@ print "ok 247\n"; "#latin[$latin]\nnot ok $test\n"; $test++; $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a - use utf8; + use utf8; # needed for the raw UTF-8 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a } } @@ -1302,6 +1340,7 @@ print "ok 247\n"; { # the second half of 20001028.003 + my $X = ''; $X =~ s/^/chr(1488)/e; print "not " unless length $X == 1 && ord($X) == 1488; print "ok 260\n"; @@ -1353,10 +1392,11 @@ print "ok 247\n"; "\0" => 'Cc', ); - for my $char (keys %s) { + for my $char (map { s/^\S+ //; $_ } + sort map { sprintf("%06x", ord($_))." $_" } keys %s) { my $class = $s{$char}; - my $code = sprintf("%04x", ord($char)); - printf "# 0x$code\n"; + my $code = sprintf("%06x", ord($char)); + printf "#\n# 0x$code\n#\n"; print "# IsAlpha\n"; if ($class =~ /^[LM]/) { print "not " unless $char =~ /\p{IsAlpha}/; @@ -1382,16 +1422,21 @@ print "ok 247\n"; print "ok $test\n"; $test++; } print "# IsASCII\n"; - if ($code <= 127) { - print "not " unless $char =~ /\p{IsASCII}/; - print "ok $test\n"; $test++; - print "not " if $char =~ /\P{IsASCII}/; - print "ok $test\n"; $test++; + if (ord("A") == 193) { + print "ok $test # Skip: in EBCDIC\n"; $test++; + print "ok $test # Skip: in EBCDIC\n"; $test++; } else { - print "not " if $char =~ /\p{IsASCII}/; - print "ok $test\n"; $test++; - print "not " unless $char =~ /\P{IsASCII}/; - print "ok $test\n"; $test++; + if ($code le '00007f') { + print "not " unless $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } } print "# IsCntrl\n"; if ($class =~ /^C/) { @@ -1547,3 +1592,1078 @@ print "ok 247\n"; print "ok 581\n"; } + +{ + $test = 582; + + # bugid 20010410.006 + for my $rx ( + '/(.*?)\{(.*?)\}/csg', + '/(.*?)\{(.*?)\}/cg', + '/(.*?)\{(.*?)\}/sg', + '/(.*?)\{(.*?)\}/g', + '/(.+?)\{(.+?)\}/csg', + ) + { + my($input, $i); + + $i = 0; + $input = "a{b}c{d}"; + eval <" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n"; + print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n"; + print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n"; +} + +{ + print "# . with /s should work on characters, as opposed to bytes\n"; + + my $s = "\x{e4}\x{100}"; + + # This is not expected to match: the point is that + # neither should we get "Malformed UTF-8" warnings. + print $s =~ /\G(.+?)\n/gcs ? + "not ok 848\n" : "ok 848\n"; + + my @c; + + while ($s =~ /\G(.)/gs) { + push @c, $1; + } + + print join("", @c) eq $s ? "ok 849\n" : "not ok 849\n"; + + my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256 + my $r1 = ""; + while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r1 .= $1 . $2; + } + + my $t2 = $t1 . "\x{100}"; # repeat with a larger char + my $r2 = ""; + while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r2 .= $1 . $2; + } + $r2 =~ s/\x{100}//; + print $r1 eq $r2 ? "ok 850\n" : "not ok 850\n"; +} + +{ + print "# Unicode lookbehind\n"; + + print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 851\n" : "not ok 851\n"; + print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 852\n" : "not ok 852\n"; + print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n"; + print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n"; +}