This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regexec.c: Rmv special code no longer needed
[perl5.git] / t / re / fold_grind.t
CommitLineData
371a505e 1# Grind out a lot of combinatoric tests for folding.
a2d9a01a 2
a2d9a01a
KW
3binmode STDOUT, ":utf8";
4
5BEGIN {
6 chdir 't' if -d 't';
7 @INC = '../lib';
8 require './test.pl';
a59efd0a 9 skip_all_if_miniperl("no dynamic loading on miniperl, no Encode nor POSIX");
a2d9a01a
KW
10}
11
4fd7f5ff
NC
12use charnames ":full";
13
29d01a3e 14my $DEBUG = 0; # Outputs extra information for debugging this .t
abf4d645 15
a2d9a01a
KW
16use strict;
17use warnings;
d08723ac 18use Encode;
a59efd0a 19use POSIX;
a2d9a01a
KW
20
21# Tests both unicode and not, so make sure not implicitly testing unicode
22no feature 'unicode_strings';
23
24# Case-insensitive matching is a large and complicated issue. Perl does not
25# implement it fully, properly. For example, it doesn't include normalization
26# as part of the equation. To test every conceivable combination is clearly
27# impossible; these tests are mostly drawn from visual inspection of the code
28# and experience, trying to exercise all areas.
29
30# There are three basic ranges of characters that Perl may treat differently:
31# 1) Invariants under utf8 which on ASCII-ish machines are ASCII, and are
32# referred to here as ASCII. On EBCDIC machines, the non-ASCII invariants
33# are all controls that fold to themselves.
34my $ASCII = 1;
35
36# 2) Other characters that fit into a byte but are different in utf8 than not;
37# here referred to, taking some liberties, as Latin1.
38my $Latin1 = 2;
39
40# 3) Characters that won't fit in a byte; here referred to as Unicode
41my $Unicode = 3;
42
43# Within these basic groups are equivalence classes that testing any character
44# in is likely to lead to the same results as any other character. This is
45# used to cut down the number of tests needed, unless PERL_RUN_SLOW_TESTS is
46# set.
47my $skip_apparently_redundant = ! $ENV{PERL_RUN_SLOW_TESTS};
48
67fc5dca
DL
49# Additionally parts of this test run a lot of subtests, outputting the
50# resulting TAP can be expensive so the tests are summarised internally. The
51# PERL_DEBUG_FULL_TEST environment variable can be set to produce the full
52# output for debugging purposes.
53
a2d9a01a
KW
54sub range_type {
55 my $ord = shift;
56
57 return $ASCII if $ord < 128;
58 return $Latin1 if $ord < 256;
59 return $Unicode;
60}
61
62sub numerically {
63 return $a <=> $b
64}
65
bb2fbce0 66sub run_test($$$$) {
7a0a13a6 67 my ($test, $count, $todo, $debug) = @_;
abf4d645
KW
68
69 $debug = "" unless $DEBUG;
7a0a13a6 70 $todo = "Known problem" if $todo;
abf4d645 71
bb2fbce0
DM
72 TODO: {
73 local $::TODO = $todo ? "Known problem" : undef;
74 ok(eval $test, "$test; $debug");
75 }
abf4d645
KW
76}
77
29d01a3e 78my %tests; # The final set of tests. keys are the code points to test
a2d9a01a
KW
79my %simple_folds;
80my %multi_folds;
81
82# First, analyze the current Unicode's folding rules
83my %folded_from;
84my $file="../lib/unicore/CaseFolding.txt";
85open my $fh, "<", $file or die "Failed to read '$file': $!";
86while (<$fh>) {
87 chomp;
88
89 # Lines look like (though without the initial '#')
90 #0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
91
92 my ($line, $comment) = split / \s+ \# \s+ /x, $_;
93 next if $line eq "" || substr($line, 0, 1) eq '#';
94 my ($hex_from, $fold_type, @folded) = split /[\s;]+/, $line;
95
96 my $from = hex $hex_from;
97
98 if ($fold_type eq 'F') {
27f6057f
KW
99 my $from_range_type = range_type($from);
100
101 # If we were testing comprehensively, we would try every combination
102 # of upper and lower case in the fold, but it is quite likely that if
103 # the code can handle all combinations if it can handle the cases
104 # where everything is upper and when everything is lower. Because of
105 # complement matching, we need to do both. And we use the
106 # reverse-fold instead of uppercase.
a2d9a01a 107 @folded = map { hex $_ } @folded;
27f6057f
KW
108 # XXX better to use reverse fold of these instead of uc
109 my @uc_folded = map { ord uc chr $_ } @folded;
a2d9a01a
KW
110
111 # Include three code points that are handled internally by the regex
29d01a3e 112 # engine specially, plus all non-above-255 multi folds (which actually
a2d9a01a
KW
113 # the only one is already included in the three, but this makes sure)
114 # And if any member of the fold is not the same range type as the
115 # source, add it directly to the tests. It needs to be an array of an
116 # array, so that it is distinguished from multiple single folds
117 if ($from == 0xDF || $from == 0x390 || $from == 0x3B0
118 || $from_range_type != $Unicode
119 || grep { range_type($_) != $from_range_type } @folded)
120 {
27f6057f 121 $tests{$from} = [ [ @folded ], [ @uc_folded ] ];
a2d9a01a
KW
122 }
123 else {
124
27f6057f
KW
125 # The only multi-char non-utf8 fold is DF, which is handled above,
126 # so here chr() must be utf8. Get the number of bytes in each.
127 # This is because the optimizer cares about length differences.
128 my $from_length = length encode('UTF-8', chr($from));
129 my $to_length = length encode('UTF-8', pack 'U*', @folded);
130 push @{$multi_folds{$from_length}{$to_length}}, { $from => [ [ @folded ], [ @uc_folded ] ] };
a2d9a01a
KW
131 }
132 }
133
134 # Perl only deals with C and F folds
135 next if $fold_type ne 'C';
136
d2025f57
KW
137 # C folds are single-char $from to single-char $folded, in chr terms
138 # folded_from{'s'} = [ 'S', \N{LATIN SMALL LETTER LONG S} ]
a2d9a01a
KW
139 push @{$folded_from{hex $folded[0]}}, $from;
140}
141
d2025f57 142# Now try to sort the single char folds into equivalence classes that are
a2d9a01a
KW
143# likely to have identical successes and failures. Any fold that crosses
144# range types is suspect, and is automatically tested. Otherwise, store by
145# the number of characters that participate in a fold. Likely all folds in a
146# range type that fold to each other like B->b->B will have identical success
147# and failure; similarly all folds that have three characters participating
148# are likely to have the same successes and failures, etc.
149foreach my $folded (sort numerically keys %folded_from) {
150 my $target_range_type = range_type($folded);
151 my $count = @{$folded_from{$folded}};
152
153 # Automatically test any fold that crosses range types
154 if (grep { range_type($_) != $target_range_type } @{$folded_from{$folded}})
155 {
156 $tests{$folded} = $folded_from{$folded};
157 }
158 else {
159 push @{$simple_folds{$target_range_type}{$count}},
160 { $folded => $folded_from{$folded} };
a7caa9e8 161 }
a2d9a01a
KW
162}
163
164foreach my $from_length (keys %multi_folds) {
165 foreach my $fold_length (keys %{$multi_folds{$from_length}}) {
166 #print __LINE__, ref $multi_folds{$from_length}{$fold_length}, Dumper $multi_folds{$from_length}{$fold_length};
167 foreach my $test (@{$multi_folds{$from_length}{$fold_length}}) {
168 #print __LINE__, ": $from_length, $fold_length, $test:\n";
169 my ($target, $pattern) = each %$test;
170 #print __LINE__, ": $target: $pattern\n";
171 $tests{$target} = $pattern;
172 last if $skip_apparently_redundant;
173 }
174 }
175}
176
177# Add in tests for single character folds. Add tests for each range type,
178# and within those tests for each number of characters participating in a
179# fold. Thus B->b has two characters participating. But K->k and Kelvin
180# Sign->k has three characters participating. So we would make sure that
181# there is a test for 3 chars, 4 chars, ... . (Note that the 'k' example is a
182# bad one because it crosses range types, so is automatically tested. In the
183# Unicode range there are various of these 3 and 4 char classes, but aren't as
184# easily described as the 'k' one.)
185foreach my $type (keys %simple_folds) {
186 foreach my $count (keys %{$simple_folds{$type}}) {
187 foreach my $test (@{$simple_folds{$type}{$count}}) {
188 my ($target, $pattern) = each %$test;
189 $tests{$target} = $pattern;
190 last if $skip_apparently_redundant;
191 }
192 }
193}
194
195# For each range type, test additionally a character that folds to itself
196$tests{0x3A} = [ 0x3A ];
197$tests{0xF7} = [ 0xF7 ];
198$tests{0x2C7} = [ 0x2C7 ];
199
a2d9a01a 200
2f7f8cb1
KW
201# To cut down on the number of tests
202my $has_tested_aa_above_latin1;
203my $has_tested_latin1_aa;
6eea66eb 204my $has_tested_ascii_aa;
17580e7a 205my $has_tested_l_above_latin1;
ceb92b9b 206my $has_tested_above_latin1_l;
6eea66eb 207my $has_tested_ascii_l;
963bd580
KW
208my $has_tested_above_latin1_d;
209my $has_tested_ascii_d;
f16e8484 210my $has_tested_non_latin1_d;
419d8974
KW
211my $has_tested_above_latin1_a;
212my $has_tested_ascii_a;
213my $has_tested_non_latin1_a;
2f7f8cb1 214
a2d9a01a
KW
215# For use by pairs() in generating combinations
216sub prefix {
217 my $p = shift;
a7caa9e8 218 map [ $p, $_ ], @_
a2d9a01a
KW
219}
220
221# Returns all ordered combinations of pairs of elements from the input array.
222# It doesn't return pairs like (a, a), (b, b). Change the slice to an array
223# to do that. This was just to have fewer tests.
a7caa9e8 224sub pairs (@) {
a2d9a01a 225 #print __LINE__, ": ", join(" XXX ", @_), "\n";
a7caa9e8 226 map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
a2d9a01a
KW
227}
228
419d8974 229my @charsets = qw(d u a aa);
a59efd0a
KW
230my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // "";
231push @charsets, 'l' if $current_locale eq 'C';
a2d9a01a
KW
232
233# Finally ready to do the tests
abf4d645 234my $count=0;
a2d9a01a
KW
235foreach my $test (sort { numerically } keys %tests) {
236
237 my $previous_target;
238 my $previous_pattern;
239 my @pairs = pairs(sort numerically $test, @{$tests{$test}});
240
241 # Each fold can be viewed as a closure of all the characters that
242 # participate in it. Look at each possible pairing from a closure, with the
243 # first member of the pair the target string to match against, and the
244 # second member forming the pattern. Thus each fold member gets tested as
245 # the string, and the pattern with every other member in the opposite role.
246 while (my $pair = shift @pairs) {
247 my ($target, $pattern) = @$pair;
248
249 # When testing a char that doesn't fold, we can get the same
250 # permutation twice; so skip all but the first.
251 next if $previous_target
252 && $previous_target == $target
253 && $previous_pattern == $pattern;
254 ($previous_target, $previous_pattern) = ($target, $pattern);
255
256 # Each side may be either a single char or a string. Extract each into an
257 # array (perhaps of length 1)
258 my @target, my @pattern;
259 @target = (ref $target) ? @$target : $target;
260 @pattern = (ref $pattern) ? @$pattern : $pattern;
261
262 # Have to convert non-utf8 chars to native char set
263 @target = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @target;
264 @pattern = map { $_ > 255 ? $_ : ord latin1_to_native(chr($_)) } @pattern;
265
266 # Get in hex form.
267 my @x_target = map { sprintf "\\x{%04X}", $_ } @target;
268 my @x_pattern = map { sprintf "\\x{%04X}", $_ } @pattern;
269
270 my $target_above_latin1 = grep { $_ > 255 } @target;
271 my $pattern_above_latin1 = grep { $_ > 255 } @pattern;
2f7f8cb1
KW
272 my $target_has_ascii = grep { $_ < 128 } @target;
273 my $pattern_has_ascii = grep { $_ < 128 } @pattern;
6eea66eb
KW
274 my $target_only_ascii = ! grep { $_ > 127 } @target;
275 my $pattern_only_ascii = ! grep { $_ > 127 } @pattern;
17580e7a 276 my $target_has_latin1 = grep { $_ < 256 } @target;
f16e8484
KW
277 my $target_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @target;
278 my $pattern_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @pattern;
17580e7a 279 my $pattern_has_latin1 = grep { $_ < 256 } @pattern;
a2d9a01a
KW
280 my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0];
281
282 # We don't test multi-char folding into other multi-chars. We are testing
283 # a code point that folds to or from other characters. Find the single
284 # code point for diagnostic purposes. (If both are single, choose the
285 # target string)
286 my $ord = @target == 1 ? $target[0] : $pattern[0];
7fea222d
KW
287 my $progress = sprintf "%04X: \"%s\" and /%s/",
288 $test,
a2d9a01a
KW
289 join("", @x_target),
290 join("", @x_pattern);
291 #print $progress, "\n";
292 #diag $progress;
293
294 # Now grind out tests, using various combinations.
a59efd0a 295 foreach my $charset (@charsets) {
2f7f8cb1 296
63fb01f9
KW
297 # To cut down somewhat on the enormous quantity of tests this currently
298 # runs, skip some for some of the character sets whose results aren't
299 # likely to differ from others. But run all tests on the code points
300 # that don't fold, plus one other set in each range group.
301 if (! $is_self) {
302
b0d6380c
KW
303 # /aa should only affect things with folds in the ASCII range. But, try
304 # it on one set in the other ranges just to make sure it doesn't break
305 # them.
306 if ($charset eq 'aa') {
307 if (! $target_has_ascii && ! $pattern_has_ascii) {
308 if ($target_above_latin1 || $pattern_above_latin1) {
309 next if defined $has_tested_aa_above_latin1
310 && $has_tested_aa_above_latin1 != $test;
311 $has_tested_aa_above_latin1 = $test;
312 }
313 next if defined $has_tested_latin1_aa
314 && $has_tested_latin1_aa != $test;
315 $has_tested_latin1_aa = $test;
2f7f8cb1 316 }
f02497ec
KW
317 elsif ($target_only_ascii && $pattern_only_ascii) {
318
319 # And, except for one set just to make sure, skip tests
320 # where both elements in the pair are ASCII. If one works for
321 # aa, the others are likely too. This skips tests where the
322 # fold is from non-ASCII to ASCII, but this part of the test
323 # is just about the ASCII components.
324 next if defined $has_tested_ascii_l
325 && $has_tested_ascii_l != $test;
326 $has_tested_ascii_l = $test;
327 }
2f7f8cb1 328 }
b0d6380c 329 elsif ($charset eq 'l') {
6ed220eb
KW
330
331 # For l, don't need to test beyond one set those things that are
7b4853d1
KW
332 # all above latin1, because unlikely to have different successes
333 # than /u
b0d6380c 334 if (! $target_has_latin1 && ! $pattern_has_latin1) {
ceb92b9b
KW
335 next if defined $has_tested_above_latin1_l
336 && $has_tested_above_latin1_l != $test;
337 $has_tested_above_latin1_l = $test;
b0d6380c 338 }
6eea66eb
KW
339 elsif ($target_only_ascii && $pattern_only_ascii) {
340
341 # And, except for one set just to make sure, skip tests
f02497ec
KW
342 # where both elements in the pair are ASCII. This is
343 # essentially the same reasoning as above for /aa.
6eea66eb
KW
344 next if defined $has_tested_ascii_l
345 && $has_tested_ascii_l != $test;
346 $has_tested_ascii_l = $test;
347 }
17580e7a 348 }
f16e8484
KW
349 elsif ($charset eq 'd') {
350 # Similarly for d. Beyond one test (besides self) each, we don't
351 # test pairs that are both ascii; or both above latin1, or are
352 # combinations of ascii and above latin1.
353 if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
354 if ($target_has_ascii && $pattern_has_ascii) {
963bd580
KW
355 next if defined $has_tested_ascii_d
356 && $has_tested_ascii_d != $test;
f16e8484
KW
357 $has_tested_ascii_d = $test
358 }
359 elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
360 next if defined $has_tested_above_latin1_d
361 && $has_tested_above_latin1_d != $test;
362 $has_tested_above_latin1_d = $test;
363 }
364 else {
365 next if defined $has_tested_non_latin1_d
366 && $has_tested_non_latin1_d != $test;
367 $has_tested_non_latin1_d = $test;
368 }
963bd580
KW
369 }
370 }
419d8974
KW
371 elsif ($charset eq 'a') {
372 # Similarly for a. This should match identically to /u, so wasn't
373 # tested at all until a bug was found that was thereby missed.
374 # As a compromise, beyond one test (besides self) each, we don't
375 # test pairs that are both ascii; or both above latin1, or are
376 # combinations of ascii and above latin1.
377 if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
378 if ($target_has_ascii && $pattern_has_ascii) {
379 next if defined $has_tested_ascii_a
380 && $has_tested_ascii_a != $test;
381 $has_tested_ascii_a = $test
382 }
383 elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
384 next if defined $has_tested_above_latin1_a
385 && $has_tested_above_latin1_a != $test;
386 $has_tested_above_latin1_a = $test;
387 }
388 else {
389 next if defined $has_tested_non_latin1_a
390 && $has_tested_non_latin1_a != $test;
391 $has_tested_non_latin1_a = $test;
392 }
393 }
394 }
17580e7a 395 }
2f7f8cb1 396
a2d9a01a
KW
397 foreach my $utf8_target (0, 1) { # Both utf8 and not, for
398 # code points < 256
399 my $upgrade_target = "";
400
401 # These must already be in utf8 because the string to match has
402 # something above latin1. So impossible to test if to not to be in
403 # utf8; and otherwise, no upgrade is needed.
404 next if $target_above_latin1 && ! $utf8_target;
d08723ac 405 $upgrade_target = ' utf8::upgrade($c);' if ! $target_above_latin1 && $utf8_target;
a2d9a01a 406
d08723ac
KW
407 foreach my $utf8_pattern (0, 1) {
408 next if $pattern_above_latin1 && ! $utf8_pattern;
17580e7a
KW
409
410 # Our testing of 'l' uses the POSIX locale, which is ASCII-only
411 my $uni_semantics = $charset ne 'l' && ($utf8_target || $charset eq 'u' || ($charset eq 'd' && $utf8_pattern) || $charset =~ /a/);
a2d9a01a 412 my $upgrade_pattern = "";
d08723ac 413 $upgrade_pattern = ' utf8::upgrade($p);' if ! $pattern_above_latin1 && $utf8_pattern;
a2d9a01a
KW
414
415 my $lhs = join "", @x_target;
aa3ca102 416 my $lhs_str = eval qq{"$lhs"}; fail($@) if $@;
a2d9a01a 417 my @rhs = @x_pattern;
371a505e 418 my $rhs = join "", @rhs;
2f7f8cb1 419 my $should_fail = (! $uni_semantics && $ord >= 128 && $ord < 256 && ! $is_self)
17580e7a
KW
420 || ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii)
421 || ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1);
371a505e
KW
422
423 # Do simple tests of referencing capture buffers, named and
424 # numbered.
425 my $op = '=~';
426 $op = '!~' if $should_fail;
d2025f57 427
7a0a13a6
KW
428 # I'm afraid this was derived from trial and error.
429 my $todo = ($test == 0xdf
430 && $lhs =~ /DF/
431 && $uni_semantics
419d8974
KW
432 && ($charset eq 'u' || $charset eq 'a' || $charset eq 'd')
433 && ! (($charset eq 'u' || $charset eq 'a')
434 && (($upgrade_target eq "") != ($upgrade_pattern eq "")))
7a0a13a6
KW
435 && ! ($charset eq 'd' && (! $upgrade_target || ! $upgrade_pattern))
436 );
371a505e 437 my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
bb2fbce0 438 run_test($eval, ++$count, $todo, "");
abf4d645 439
371a505e 440 $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
bb2fbce0 441 run_test($eval, ++$count, $todo, "");
abf4d645 442
371a505e
KW
443 if ($lhs ne $rhs) {
444 $eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
bb2fbce0 445 run_test($eval, ++$count, "", "");
abf4d645 446
371a505e 447 $eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";
bb2fbce0 448 run_test($eval, ++$count, "", "");
371a505e 449 }
371a505e 450
6f9cf5ec
DM
451 # XXX Doesn't currently test multi-char folds in pattern
452 next if @pattern != 1;
453
67fc5dca
DL
454 my $okays = 0;
455 my $this_iteration = 0;
456
1ef17b72 457 foreach my $bracketed (0, 1) { # Put rhs in [...], or not
a2d9a01a 458 foreach my $inverted (0,1) {
d2025f57 459 next if $inverted && ! $bracketed; # inversion only valid in [^...]
a2d9a01a
KW
460
461 # In some cases, add an extra character that doesn't fold, and
462 # looks ok in the output.
463 my $extra_char = "_";
464 foreach my $prepend ("", $extra_char) {
465 foreach my $append ("", $extra_char) {
a2d9a01a
KW
466
467 # Assemble the rhs. Put each character in a separate
468 # bracketed if using charclasses. This creates a stress on
469 # the code to span a match across multiple elements
470 my $rhs = "";
471 foreach my $rhs_char (@rhs) {
472 $rhs .= '[' if $bracketed;
473 $rhs .= '^' if $inverted;
474 $rhs .= $rhs_char;
475
476 # Add a character to the class, so class doesn't get
477 # optimized out
478 $rhs .= '_]' if $bracketed;
479 }
480
481 # Add one of: no capturing parens
482 # a single set
483 # a nested set
484 # Use quantifiers and extra variable width matches inside
485 # them to keep some optimizations from happening
486 foreach my $parend (0, 1, 2) {
487 my $interior = (! $parend)
488 ? $rhs
489 : ($parend == 1)
490 ? "(${rhs},?)"
491 : "((${rhs})+,?)";
492 foreach my $quantifier ("", '?', '*', '+', '{1,3}') {
493
494 # A ? or * quantifier normally causes the thing to be
495 # able to match a null string
496 my $quantifier_can_match_null = $quantifier eq '?' || $quantifier eq '*';
497
498 # But since we only quantify the last character in a
499 # multiple fold, the other characters will have width,
500 # except if we are quantifying the whole rhs
501 my $can_match_null = $quantifier_can_match_null && (@rhs == 1 || $parend);
502
503 foreach my $l_anchor ("", '^') { # '\A' didn't change result)
504 foreach my $r_anchor ("", '$') { # '\Z', '\z' didn't change result)
505
506 # The folded part can match the null string if it
507 # isn't required to have width, and there's not
508 # something on one or both sides that force it to.
2f7f8cb1
KW
509 my $both_sides = ($l_anchor && $r_anchor) || ($l_anchor && $append) || ($r_anchor && $prepend) || ($prepend && $append);
510 my $must_match = ! $can_match_null || $both_sides;
511 # for performance, but doing this missed many failures
a2d9a01a 512 #next unless $must_match;
d08723ac 513 my $quantified = "(?$charset:$l_anchor$prepend$interior${quantifier}$append$r_anchor)";
a2d9a01a 514 my $op;
d08723ac 515 if ($must_match && $should_fail) {
a2d9a01a
KW
516 $op = 0;
517 } else {
518 $op = 1;
519 }
520 $op = ! $op if $must_match && $inverted;
27f6057f
KW
521
522 if ($inverted && @target > 1) {
523 # When doing an inverted match against a
524 # multi-char target, and there is not something on
525 # the left to anchor the match, if it shouldn't
526 # succeed, skip, as what will happen (when working
527 # correctly) is that it will match the first
528 # position correctly, and then be inverted to not
529 # match; then it will go to the second position
530 # where it won't match, but get inverted to match,
531 # and hence succeeding.
532 next if ! ($l_anchor || $prepend) && ! $op;
533
534 # Can't ever match for latin1 code points non-uni
535 # semantics that have a inverted multi-char fold
536 # when there is something on both sides and the
537 # quantifier isn't such as to span the required
538 # width, which is 2 or 3.
539 $op = 0 if $ord < 255
540 && ! $uni_semantics
541 && $both_sides
542 && ( ! $quantifier || $quantifier eq '?')
543 && $parend < 2;
544
545 # Similarly can't ever match when inverting a multi-char
546 # fold for /aa and the quantifier isn't sufficient
547 # to allow it to span to both sides.
548 $op = 0 if $target_has_ascii && $charset eq 'aa' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;
549
550 # Or for /l
551 $op = 0 if $target_has_latin1 && $charset eq 'l' && $both_sides && ( ! $quantifier || $quantifier eq '?') && $parend < 2;
552 }
553
85882dd7
DM
554
555 my $desc = "my \$c = \"$prepend$lhs$append\"; "
556 . "my \$p = qr/$quantified/i;"
557 . "$upgrade_target$upgrade_pattern "
558 . "\$c " . ($op ? "=~" : "!~") . " \$p; ";
559 if ($DEBUG) {
560 $desc .= (
561 "; uni_semantics=$uni_semantics, "
562 . "should_fail=$should_fail, "
563 . "bracketed=$bracketed, "
564 . "prepend=$prepend, "
565 . "append=$append, "
566 . "parend=$parend, "
567 . "quantifier=$quantifier, "
568 . "l_anchor=$l_anchor, "
569 . "r_anchor=$r_anchor; "
570 . "pattern_above_latin1=$pattern_above_latin1; "
571 . "utf8_pattern=$utf8_pattern"
572 );
573 }
574
aa3ca102
DM
575 my $c = "$prepend$lhs_str$append";
576 my $p = qr/$quantified/i;
577 utf8::upgrade($c) if length($upgrade_target);
578 utf8::upgrade($p) if length($upgrade_pattern);
85882dd7 579 my $res = $op ? ($c =~ $p): ($c !~ $p);
aa3ca102 580
67fc5dca
DL
581 if (!$res || $ENV{PERL_DEBUG_FULL_TEST}) {
582 # Failed or debug; output the result
583 $count++;
584 ok($res, $desc);
585 } else {
586 # Just count the test as passed
587 $okays++;
588 }
589 $this_iteration++;
a2d9a01a
KW
590 }
591 }
592 }
593 }
594 }
595 }
596 }
597 }
67fc5dca
DL
598
599 unless($ENV{PERL_DEBUG_FULL_TEST}) {
600 $count++;
601 is $okays, $this_iteration, "Subtests okay for "
602 . "charset=$charset, utf8_pattern=$utf8_pattern";
603 }
a2d9a01a
KW
604 }
605 }
606 }
607 }
608}
609
abf4d645 610plan($count);
a2d9a01a
KW
611
6121