# Grind out a lot of combinatoric tests for folding.
-use charnames ":full";
-
binmode STDOUT, ":utf8";
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
require './test.pl';
+ skip_all_if_miniperl("no dynamic loading on miniperl, no Encode nor POSIX");
}
+use charnames ":full";
+
my $DEBUG = 0; # Outputs extra information for debugging this .t
use strict;
use warnings;
use Encode;
+use POSIX;
+
+# Special-cased characters in the .c's that we want to make sure get tested.
+my %be_sure_to_test = (
+ "\xDF" => 1, # LATIN_SMALL_LETTER_SHARP_S
+ "\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S
+ "\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS
+ "\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS
+ "\x{1FD3}" => 1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+ "\x{1FE3}" => 1, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+ );
+
# Tests both unicode and not, so make sure not implicitly testing unicode
no feature 'unicode_strings';
# set.
my $skip_apparently_redundant = ! $ENV{PERL_RUN_SLOW_TESTS};
+# Additionally parts of this test run a lot of subtests, outputting the
+# resulting TAP can be expensive so the tests are summarised internally. The
+# PERL_DEBUG_FULL_TEST environment variable can be set to produce the full
+# output for debugging purposes.
+
sub range_type {
- my $ord = shift;
+ my $ord = ord shift;
return $ASCII if $ord < 128;
return $Latin1 if $ord < 256;
return $Unicode;
}
-my %todos;
-map { $todos{$_} = '1' } (
-95557,
-95558,
-95561,
-95562,
-95573,
-95574,
-95605,
-95606,
-95609,
-95610,
-95621,
-95622,
-);
-
sub numerically {
return $a <=> $b
}
-sub format_test($$$) {
- my ($test, $count, $debug) = @_;
+# Significant time is saved by not outputting each test but grouping the
+# output into subtests
+my $okays; # Number of ok's in current subtest
+my $this_iteration; # Number of possible tests in current subtest
+my $count=0; # Number of subtests = number of total tests
- # Create a test entry, with TODO set if it is one of the known problem
- # code points
+sub run_test($$$) {
+ my ($test, $todo, $debug) = @_;
$debug = "" unless $DEBUG;
+ my $res = eval $test;
+
+ if (!$res || $ENV{PERL_DEBUG_FULL_TEST}) {
+ # Failed or debug; output the result
+ $count++;
+ ok($res, "$test; $debug");
+ } else {
+ # Just count the test as passed
+ $okays++;
+ }
+ $this_iteration++;
+}
+
+my %has_test_by_participants; # Makes sure has tests for each range and each
+ # number of characters that fold to the same
+ # thing
+my %has_test_by_byte_count; # Makes sure has tests for each combination of
+ # n bytes folds to m bytes
+
+my %tests; # The set of tests.
+# Each key is a code point that folds to something else.
+# Each value is a list of things that the key folds to. If the 'thing' is a
+# single code point, it is that ordinal. If it is a multi-char fold, it is an
+# ordered list of the code points in that fold. Here's an example for 'S':
+# '83' => [ 115, 383 ]
+#
+# And one for a multi-char fold: \xDF
+# 223 => [
+# [ # 'ss'
+# 83,
+# 83
+# ],
+# [ # 'SS'
+# 115,
+# 115
+# ],
+# [ # LATIN SMALL LETTER LONG S
+# 383,
+# 383
+# ],
+# 7838 # LATIN_CAPITAL_LETTER_SHARP_S
+# ],
+
+my %inverse_folds; # keys are strings of the folded-to;
+ # values are lists of characters that fold to them
+
+sub add_test($@) {
+ my ($to, @from) = @_;
+
+ # Called to cause the input to be tested by adding to %tests. @from is
+ # the list of characters that fold to the string $to. @from should be
+ # sorted so the lowest code point is first....
+ # The input is in string form; %tests uses code points, so have to
+ # convert.
+
+ my $to_chars = length $to;
+ my @test_to; # List of tests for $to
+
+ if ($to_chars == 1) {
+ @test_to = ord $to;
+ }
+ else {
+ push @test_to, [ map { ord $_ } split "", $to ];
+
+ # For multi-char folds, we also test that things that can fold to each
+ # individual character in the fold also work. If we were testing
+ # comprehensively, we would try every combination of upper and lower
+ # case in the fold, but it will have to suffice to avoid running
+ # forever to make sure that each thing that folds to these is tested
+ # at least once. Because of complement matching, we need to do both
+ # the folded, and the folded-from.
+ # We first look at each character in the multi-char fold, and save how
+ # many characters fold to it; and also the maximum number of such
+ # folds
+ my @folds_to_count; # 0th char in fold is index 0 ...
+ my $max_folds_to = 0;
+
+ for (my $i = 0; $i < $to_chars; $i++) {
+ my $to_char = substr($to, $i, 1);
+ if (exists $inverse_folds{$to_char}) {
+ $folds_to_count[$i] = scalar @{$inverse_folds{$to_char}};
+ $max_folds_to = $folds_to_count[$i] if $max_folds_to < $folds_to_count[$i];
+ }
+ else {
+ $folds_to_count[$i] = 0;
+ }
+ }
+
+ # We will need to generate as many tests as the maximum number of
+ # folds, so that each fold will have at least one test.
+ for (my $i = 0; $i < $max_folds_to; $i++) {
+ my @this_test_to; # Assemble a single test
+
+ # For each character in the multi-char fold ...
+ for (my $j = 0; $j < $to_chars; $j++) {
+ my $this_char = substr($to, $j, 1);
+
+ # Use its corresponding inverse fold, if available.
+ if ($i < $folds_to_count[$j]) {
+ push @this_test_to, ord $inverse_folds{$this_char}[$i];
+ }
+ else { # Or else itself.
+ push @this_test_to, ord $this_char;
+ }
+ }
+
+ # Add this test to the list
+ push @test_to, [ @this_test_to ];
+ }
+
+ # Here, have assembled all the tests for the multi-char fold. Sort so
+ # lowest code points are first for consistency and aesthetics in
+ # output. We know there are at least two characters in the fold, but
+ # I haven't bothered to worry about sorting on an optional third
+ # character if the first two are identical.
+ @test_to = sort { ($a->[0] == $b->[0])
+ ? $a->[1] <=> $b->[1]
+ : $a->[0] <=> $b->[0]
+ } @test_to;
+ }
+
+
+ # This test is from n bytes to m bytes. Record that so won't try to add
+ # another test that does the same.
+ use bytes;
+ my $to_bytes = length $to;
+ foreach my $from_map (@from) {
+ $has_test_by_byte_count{length $from_map}{$to_bytes} = $to;
+ }
+ no bytes;
+
+ my $ord_smallest_from = ord shift @from;
+ if (exists $tests{$ord_smallest_from}) {
+ die "There are already tests for $ord_smallest_from"
+ };
- my $todo = (exists $todos{$count}) ? "Known problem" : 0;
+ # Add in the fold tests,
+ push @{$tests{$ord_smallest_from}}, @test_to;
- return qq[TODO: { local \$::TODO = "$todo"; ok(eval '$test', '$test; $debug'); }];
+ # Then any remaining froms in the equivalence class.
+ push @{$tests{$ord_smallest_from}}, map { ord $_ } @from;
}
-my %tests; # The final set of tests. keys are the code points to test
-my %simple_folds;
-my %multi_folds;
+# Read the Unicode rules file and construct inverse mappings from it
-# First, analyze the current Unicode's folding rules
-my %folded_from;
my $file="../lib/unicore/CaseFolding.txt";
open my $fh, "<", $file or die "Failed to read '$file': $!";
+
while (<$fh>) {
chomp;
#0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
my ($line, $comment) = split / \s+ \# \s+ /x, $_;
- next if $line eq "" || substr($line, 0, 1) eq '#';
- my ($hex_from, $fold_type, @folded) = split /[\s;]+/, $line;
-
- my $from = hex $hex_from;
-
- if ($fold_type eq 'F') {
- my $from_range_type = range_type($from);
-
- # If we were testing comprehensively, we would try every combination
- # of upper and lower case in the fold, but it is quite likely that if
- # the code can handle all combinations if it can handle the cases
- # where everything is upper and when everything is lower. Because of
- # complement matching, we need to do both. And we use the
- # reverse-fold instead of uppercase.
- @folded = map { hex $_ } @folded;
- # XXX better to use reverse fold of these instead of uc
- my @uc_folded = map { ord uc chr $_ } @folded;
-
- # Include three code points that are handled internally by the regex
- # engine specially, plus all non-above-255 multi folds (which actually
- # the only one is already included in the three, but this makes sure)
- # And if any member of the fold is not the same range type as the
- # source, add it directly to the tests. It needs to be an array of an
- # array, so that it is distinguished from multiple single folds
- if ($from == 0xDF || $from == 0x390 || $from == 0x3B0
- || $from_range_type != $Unicode
- || grep { range_type($_) != $from_range_type } @folded)
- {
- $tests{$from} = [ [ @folded ], [ @uc_folded ] ];
- }
- else {
-
- # The only multi-char non-utf8 fold is DF, which is handled above,
- # so here chr() must be utf8. Get the number of bytes in each.
- # This is because the optimizer cares about length differences.
- my $from_length = length encode('UTF-8', chr($from));
- my $to_length = length encode('UTF-8', pack 'U*', @folded);
- push @{$multi_folds{$from_length}{$to_length}}, { $from => [ [ @folded ], [ @uc_folded ] ] };
- }
- }
+ next if $line eq "" || $line =~ /^#/;
+ my ($hex_from, $fold_type, @hex_folded) = split /[\s;]+/, $line;
- # Perl only deals with C and F folds
- next if $fold_type ne 'C';
+ next if $fold_type eq 'T'; # Perl doesn't do Turkish folding
+ next if $fold_type eq 'S'; # If Unicode's tables are correct, the F
+ # should be a superset of S
- # C folds are single-char $from to single-char $folded, in chr terms
- # folded_from{'s'} = [ 'S', \N{LATIN SMALL LETTER LONG S} ]
- push @{$folded_from{hex $folded[0]}}, $from;
+ my $folded_str = pack ("U0U*", map { hex $_ } @hex_folded);
+ push @{$inverse_folds{$folded_str}}, chr hex $hex_from;
}
-# Now try to sort the single char folds into equivalence classes that are
-# likely to have identical successes and failures. Any fold that crosses
-# range types is suspect, and is automatically tested. Otherwise, store by
-# the number of characters that participate in a fold. Likely all folds in a
-# range type that fold to each other like B->b->B will have identical success
-# and failure; similarly all folds that have three characters participating
-# are likely to have the same successes and failures, etc.
-foreach my $folded (sort numerically keys %folded_from) {
- my $target_range_type = range_type($folded);
- my $count = @{$folded_from{$folded}};
-
- # Automatically test any fold that crosses range types
- if (grep { range_type($_) != $target_range_type } @{$folded_from{$folded}})
- {
- $tests{$folded} = $folded_from{$folded};
+# Analyze the data and generate tests to get adequate test coverage. We sort
+# things so that smallest code points are done first.
+TO:
+foreach my $to (sort { (length $a == length $b)
+ ? $a cmp $b
+ : length $a <=> length $b
+ } keys %inverse_folds) {
+
+ # Within each fold, sort so that the smallest code points are done first
+ @{$inverse_folds{$to}} = sort { $a cmp $b } @{$inverse_folds{$to}};
+ my @from = @{$inverse_folds{$to}};
+
+ # Just add it to the tests if doing complete coverage
+ if (! $skip_apparently_redundant) {
+ add_test($to, @from);
+ next TO;
}
- else {
- push @{$simple_folds{$target_range_type}{$count}},
- { $folded => $folded_from{$folded} };
+
+ my $to_chars = length $to;
+ my $to_range_type = range_type(substr($to, 0, 1));
+
+ # If this is required to be tested, do so. We check for these first, as
+ # they will take up slots of byte-to-byte combinations that we otherwise
+ # would have to have other tests to get.
+ foreach my $from_map (@from) {
+ if (exists $be_sure_to_test{$from_map}) {
+ add_test($to, @from);
+ next TO;
+ }
}
-}
-foreach my $from_length (keys %multi_folds) {
- foreach my $fold_length (keys %{$multi_folds{$from_length}}) {
- #print __LINE__, ref $multi_folds{$from_length}{$fold_length}, Dumper $multi_folds{$from_length}{$fold_length};
- foreach my $test (@{$multi_folds{$from_length}{$fold_length}}) {
- #print __LINE__, ": $from_length, $fold_length, $test:\n";
- my ($target, $pattern) = each %$test;
- #print __LINE__, ": $target: $pattern\n";
- $tests{$target} = $pattern;
- last if $skip_apparently_redundant;
+ # If the fold contains heterogeneous range types, is suspect and should be
+ # tested.
+ if ($to_chars > 1) {
+ foreach my $char (split "", $to) {
+ if (range_type($char) != $to_range_type) {
+ add_test($to, @from);
+ next TO;
+ }
+ }
+ }
+
+ # If the mapping crosses range types, is suspect and should be tested
+ foreach my $from_map (@from) {
+ if (range_type($from_map) != $to_range_type) {
+ add_test($to, @from);
+ next TO;
+ }
+ }
+
+ # Here, all components of the mapping are in the same range type. For
+ # single character folds, we test one case in each range type that has 2
+ # particpants, 3 particpants, etc.
+ if ($to_chars == 1) {
+ if (! exists $has_test_by_participants{scalar @from}{$to_range_type}) {
+ add_test($to, @from);
+ $has_test_by_participants{scalar @from}{$to_range_type} = $to;
+ next TO;
}
}
-}
-# Add in tests for single character folds. Add tests for each range type,
-# and within those tests for each number of characters participating in a
-# fold. Thus B->b has two characters participating. But K->k and Kelvin
-# Sign->k has three characters participating. So we would make sure that
-# there is a test for 3 chars, 4 chars, ... . (Note that the 'k' example is a
-# bad one because it crosses range types, so is automatically tested. In the
-# Unicode range there are various of these 3 and 4 char classes, but aren't as
-# easily described as the 'k' one.)
-foreach my $type (keys %simple_folds) {
- foreach my $count (keys %{$simple_folds{$type}}) {
- foreach my $test (@{$simple_folds{$type}{$count}}) {
- my ($target, $pattern) = each %$test;
- $tests{$target} = $pattern;
- last if $skip_apparently_redundant;
+ # We also test all combinations of mappings from m to n bytes. This is
+ # because the regex optimizer cares. (Don't bother worrying about that
+ # Latin1 chars will occupy a different number of bytes under utf8, as
+ # there are plenty of other cases that catch these byte numbers.)
+ use bytes;
+ my $to_bytes = length $to;
+ foreach my $from_map (@from) {
+ if (! exists $has_test_by_byte_count{length $from_map}{$to_bytes}) {
+ add_test($to, @from);
+ next TO;
}
}
}
# For each range type, test additionally a character that folds to itself
-$tests{0x3A} = [ 0x3A ];
-$tests{0xF7} = [ 0xF7 ];
-$tests{0x2C7} = [ 0x2C7 ];
-
-my $clump_execs = 10000; # Speed up by building an 'exec' of many tests
-my @eval_tests;
+add_test(chr 0x3A, chr 0x3A);
+add_test(chr 0xF7, chr 0xF7);
+add_test(chr 0x2C7, chr 0x2C7);
# To cut down on the number of tests
my $has_tested_aa_above_latin1;
my $has_tested_latin1_aa;
+my $has_tested_ascii_aa;
+my $has_tested_l_above_latin1;
+my $has_tested_above_latin1_l;
+my $has_tested_ascii_l;
+my $has_tested_above_latin1_d;
+my $has_tested_ascii_d;
+my $has_tested_non_latin1_d;
+my $has_tested_above_latin1_a;
+my $has_tested_ascii_a;
+my $has_tested_non_latin1_a;
# For use by pairs() in generating combinations
sub prefix {
map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
}
+my @charsets = qw(d u a aa);
+my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // "";
+if ($current_locale eq 'C') {
+ use locale;
+
+ # Some locale implementations don't have the range 128-255 characters all
+ # mean nothing. Skip the locale tests in that situation.
+ for my $i (128 .. 255) {
+ my $char = chr($i);
+ goto bad_locale if uc($char) ne $char || lc($char) ne $char;
+ }
+ push @charsets, 'l';
+bad_locale:
+}
# Finally ready to do the tests
-my $count=0;
foreach my $test (sort { numerically } keys %tests) {
my $previous_target;
@target = (ref $target) ? @$target : $target;
@pattern = (ref $pattern) ? @$pattern : $pattern;
+ # We are testing just folds to/from a single character. If our pairs
+ # happens to generate multi/multi, skip.
+ next if @target > 1 && @pattern > 1;
+
# Have to convert non-utf8 chars to native char set
@target = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @target;
@pattern = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @pattern;
my $pattern_above_latin1 = grep { $_ > 255 } @pattern;
my $target_has_ascii = grep { $_ < 128 } @target;
my $pattern_has_ascii = grep { $_ < 128 } @pattern;
+ my $target_only_ascii = ! grep { $_ > 127 } @target;
+ my $pattern_only_ascii = ! grep { $_ > 127 } @pattern;
+ my $target_has_latin1 = grep { $_ < 256 } @target;
+ my $target_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @target;
+ my $pattern_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @pattern;
+ my $pattern_has_latin1 = grep { $_ < 256 } @pattern;
my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0];
# We don't test multi-char folding into other multi-chars. We are testing
# code point for diagnostic purposes. (If both are single, choose the
# target string)
my $ord = @target == 1 ? $target[0] : $pattern[0];
- my $progress = sprintf "\"%s\" and /%s/",
+ my $progress = sprintf "%04X: \"%s\" and /%s/",
+ $test,
join("", @x_target),
join("", @x_pattern);
- #print $progress, "\n";
- #diag $progress;
+ #note $progress;
# Now grind out tests, using various combinations.
- foreach my $charset ('d', 'u', 'aa') {
-
- # /aa should only affect things with folds in the ASCII range. But, try
- # it on one pair in the other ranges just to make sure it doesn't break
- # them. Set these flags. They are set to the ord of the character
- # tested so that all pairs of that ord get tested.
- if ($charset eq 'aa') {
- if (! $target_has_ascii && ! $pattern_has_ascii) {
- if ($target_above_latin1 || $pattern_above_latin1) {
- next if defined $has_tested_aa_above_latin1
- && $has_tested_aa_above_latin1 != $ord;
- $has_tested_aa_above_latin1 = $ord;
+ foreach my $charset (@charsets) {
+ $okays = 0;
+ $this_iteration = 0;
+
+ # To cut down somewhat on the enormous quantity of tests this currently
+ # runs, skip some for some of the character sets whose results aren't
+ # likely to differ from others. But run all tests on the code points
+ # that don't fold, plus one other set in each range group.
+ if (! $is_self) {
+
+ # /aa should only affect things with folds in the ASCII range. But, try
+ # it on one set in the other ranges just to make sure it doesn't break
+ # them.
+ if ($charset eq 'aa') {
+ if (! $target_has_ascii && ! $pattern_has_ascii) {
+ if ($target_above_latin1 || $pattern_above_latin1) {
+ next if defined $has_tested_aa_above_latin1
+ && $has_tested_aa_above_latin1 != $test;
+ $has_tested_aa_above_latin1 = $test;
+ }
+ next if defined $has_tested_latin1_aa
+ && $has_tested_latin1_aa != $test;
+ $has_tested_latin1_aa = $test;
+ }
+ elsif ($target_only_ascii && $pattern_only_ascii) {
+
+ # And, except for one set just to make sure, skip tests
+ # where both elements in the pair are ASCII. If one works for
+ # aa, the others are likely too. This skips tests where the
+ # fold is from non-ASCII to ASCII, but this part of the test
+ # is just about the ASCII components.
+ next if defined $has_tested_ascii_l
+ && $has_tested_ascii_l != $test;
+ $has_tested_ascii_l = $test;
+ }
+ }
+ elsif ($charset eq 'l') {
+
+ # For l, don't need to test beyond one set those things that are
+ # all above latin1, because unlikely to have different successes
+ # than /u
+ if (! $target_has_latin1 && ! $pattern_has_latin1) {
+ next if defined $has_tested_above_latin1_l
+ && $has_tested_above_latin1_l != $test;
+ $has_tested_above_latin1_l = $test;
+ }
+ elsif ($target_only_ascii && $pattern_only_ascii) {
+
+ # And, except for one set just to make sure, skip tests
+ # where both elements in the pair are ASCII. This is
+ # essentially the same reasoning as above for /aa.
+ next if defined $has_tested_ascii_l
+ && $has_tested_ascii_l != $test;
+ $has_tested_ascii_l = $test;
+ }
+ }
+ elsif ($charset eq 'd') {
+ # Similarly for d. Beyond one test (besides self) each, we don't
+ # test pairs that are both ascii; or both above latin1, or are
+ # combinations of ascii and above latin1.
+ if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
+ if ($target_has_ascii && $pattern_has_ascii) {
+ next if defined $has_tested_ascii_d
+ && $has_tested_ascii_d != $test;
+ $has_tested_ascii_d = $test
+ }
+ elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
+ next if defined $has_tested_above_latin1_d
+ && $has_tested_above_latin1_d != $test;
+ $has_tested_above_latin1_d = $test;
+ }
+ else {
+ next if defined $has_tested_non_latin1_d
+ && $has_tested_non_latin1_d != $test;
+ $has_tested_non_latin1_d = $test;
+ }
+ }
+ }
+ elsif ($charset eq 'a') {
+ # Similarly for a. This should match identically to /u, so wasn't
+ # tested at all until a bug was found that was thereby missed.
+ # As a compromise, beyond one test (besides self) each, we don't
+ # test pairs that are both ascii; or both above latin1, or are
+ # combinations of ascii and above latin1.
+ if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
+ if ($target_has_ascii && $pattern_has_ascii) {
+ next if defined $has_tested_ascii_a
+ && $has_tested_ascii_a != $test;
+ $has_tested_ascii_a = $test
+ }
+ elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
+ next if defined $has_tested_above_latin1_a
+ && $has_tested_above_latin1_a != $test;
+ $has_tested_above_latin1_a = $test;
+ }
+ else {
+ next if defined $has_tested_non_latin1_a
+ && $has_tested_non_latin1_a != $test;
+ $has_tested_non_latin1_a = $test;
+ }
}
- next if defined $has_tested_latin1_aa && $has_tested_latin1_aa != $ord;
- $has_tested_latin1_aa = $ord;
}
}
foreach my $utf8_pattern (0, 1) {
next if $pattern_above_latin1 && ! $utf8_pattern;
- my $uni_semantics = $utf8_target || $charset eq 'u' || ($charset eq 'd' && $utf8_pattern) || $charset =~ /a/;
+
+ # Our testing of 'l' uses the POSIX locale, which is ASCII-only
+ my $uni_semantics = $charset ne 'l' && ($utf8_target || $charset eq 'u' || ($charset eq 'd' && $utf8_pattern) || $charset =~ /a/);
my $upgrade_pattern = "";
$upgrade_pattern = ' utf8::upgrade($p);' if ! $pattern_above_latin1 && $utf8_pattern;
my $lhs = join "", @x_target;
+ my $lhs_str = eval qq{"$lhs"}; fail($@) if $@;
my @rhs = @x_pattern;
my $rhs = join "", @rhs;
my $should_fail = (! $uni_semantics && $ord >= 128 && $ord < 256 && ! $is_self)
- || ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii);
+ || ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii)
+ || ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1);
# Do simple tests of referencing capture buffers, named and
# numbered.
my $op = '=~';
$op = '!~' if $should_fail;
+ my $todo = 0; # No longer any todo's
my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
- push @eval_tests, format_test($eval, ++$count, "");
+ run_test($eval, $todo, "");
$eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
- push @eval_tests, format_test($eval, ++$count, "");
+ run_test($eval, $todo, "");
if ($lhs ne $rhs) {
$eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
- push @eval_tests, format_test($eval, ++$count, "");
+ run_test($eval, "", "");
$eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
- push @eval_tests, format_test($eval, ++$count, "");
+ run_test($eval, "", "");
}
+ # See if works on what could be a simple trie.
+ $eval = "my \$c = \"$lhs\"; my \$p = qr/$rhs|xyz/i$charset;$upgrade_target$upgrade_pattern \$c $op \$p";
+ run_test($eval, "", "");
+
foreach my $bracketed (0, 1) { # Put rhs in [...], or not
+ next if $bracketed && @pattern != 1; # bracketed makes these
+ # or's instead of a sequence
foreach my $inverted (0,1) {
next if $inverted && ! $bracketed; # inversion only valid in [^...]
+ next if $inverted && @target != 1; # [perl #89750] multi-char
+ # not valid in [^...]
# In some cases, add an extra character that doesn't fold, and
# looks ok in the output.
: "((${rhs})+,?)";
foreach my $quantifier ("", '?', '*', '+', '{1,3}') {
+ # Perhaps should be TODOs, as are unimplemented, but
+ # maybe will never be implemented
+ next if @pattern != 1 && $quantifier;
+
# A ? or * quantifier normally causes the thing to be
# able to match a null string
my $quantifier_can_match_null = $quantifier eq '?' || $quantifier eq '*';
# fold for /aa and the quantifier isn't sufficient
# to allow it to span to both sides.
$op = 0 if $target_has_ascii && $charset eq 'aa' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;
+
+ # Or for /l
+ $op = 0 if $target_has_latin1 && $charset eq 'l' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;
}
- $op = ($op) ? '=~' : '!~';
-
- my $debug .= " uni_semantics=$uni_semantics, should_fail=$should_fail, bracketed=$bracketed, prepend=$prepend, append=$append, parend=$parend, quantifier=$quantifier, l_anchor=$l_anchor, r_anchor=$r_anchor";
- $debug .= "; pattern_above_latin1=$pattern_above_latin1; utf8_pattern=$utf8_pattern";
- my $eval = "my \$c = \"$prepend$lhs$append\"; my \$p = qr/$quantified/i;$upgrade_target$upgrade_pattern \$c $op \$p";
-
- # XXX Doesn't currently test multi-char folds in pattern
- next if @pattern != 1;
- push @eval_tests, format_test($eval, ++$count, $debug);
-
- # Group tests
- if (@eval_tests >= $clump_execs) {
- #eval "use re qw(Debug COMPILE EXECUTE);" . join ";\n", @eval_tests;
- eval join ";\n", @eval_tests;
- if ($@) {
- fail($@);
- exit 1;
- }
- undef @eval_tests;
+
+ my $desc = "my \$c = \"$prepend$lhs$append\"; "
+ . "my \$p = qr/$quantified/i;"
+ . "$upgrade_target$upgrade_pattern "
+ . "\$c " . ($op ? "=~" : "!~") . " \$p; ";
+ if ($DEBUG) {
+ $desc .= (
+ "; uni_semantics=$uni_semantics, "
+ . "should_fail=$should_fail, "
+ . "bracketed=$bracketed, "
+ . "prepend=$prepend, "
+ . "append=$append, "
+ . "parend=$parend, "
+ . "quantifier=$quantifier, "
+ . "l_anchor=$l_anchor, "
+ . "r_anchor=$r_anchor; "
+ . "pattern_above_latin1=$pattern_above_latin1; "
+ . "utf8_pattern=$utf8_pattern"
+ );
}
+
+ my $c = "$prepend$lhs_str$append";
+ my $p = qr/$quantified/i;
+ utf8::upgrade($c) if length($upgrade_target);
+ utf8::upgrade($p) if length($upgrade_pattern);
+ my $res = $op ? ($c =~ $p): ($c !~ $p);
+
+ if (!$res || $ENV{PERL_DEBUG_FULL_TEST}) {
+ # Failed or debug; output the result
+ $count++;
+ ok($res, $desc);
+ } else {
+ # Just count the test as passed
+ $okays++;
+ }
+ $this_iteration++;
}
}
}
}
}
}
+ unless($ENV{PERL_DEBUG_FULL_TEST}) {
+ $count++;
+ is $okays, $this_iteration, "$okays subtests ok for"
+ . " /$charset,"
+ . ' target="' . join("", @x_target) . '",'
+ . ' pat="' . join("", @x_pattern) . '"';
+ }
}
}
}
-# Finish up any tests not already done
-eval join ";\n", @eval_tests;
-if ($@) {
- fail($@);
- exit 1;
-}
-
plan($count);
1