Commit | Line | Data |
---|---|---|
daf3b8d4 KW |
1 | use strict; |
2 | use warnings; | |
3 | ||
4 | # re/fold_grind.t has more complex tests, but doesn't test every fold | |
5 | ||
9e55ce06 JH |
6 | BEGIN { |
7 | chdir 't' if -d 't'; | |
8 | @INC = '../lib'; | |
daf3b8d4 | 9 | require './test.pl'; |
9e55ce06 JH |
10 | } |
11 | ||
daf3b8d4 KW |
12 | binmode *STDOUT, ":utf8"; |
13 | ||
daf3b8d4 | 14 | our $TODO; |
9e55ce06 | 15 | |
daf3b8d4 KW |
16 | plan("no_plan"); |
17 | ||
18 | # Read in the official case folding definitions. | |
519ecd2c | 19 | my $CF = '../lib/unicore/CaseFolding.txt'; |
9e55ce06 | 20 | |
daf3b8d4 KW |
21 | die qq[$0: failed to open "$CF": $!\n] if ! open(my $fh, "<", $CF); |
22 | ||
23 | my @CF; | |
24 | my %reverse_fold; | |
25 | while (<$fh>) { | |
26 | # Skip S since we are going for 'F'ull case folding. I is obsolete starting | |
27 | # with Unicode 3.2, but leaving it in does no harm, and allows backward | |
28 | # compatibility | |
29 | next unless my ($code, $type, $mapping, $name) = $_ =~ | |
30 | /^([0-9A-F]+); ([CFI]); ((?:[0-9A-F]+)(?: [0-9A-F]+)*); \# (.+)/; | |
31 | ||
32 | # Convert any 0-255 range chars to native. | |
33 | $code = sprintf("%04X", ord_latin1_to_native(hex $code)) if hex $code < 0x100; | |
34 | $mapping = join " ", map { $_ = | |
35 | sprintf("%04X", ord_latin1_to_native(hex $_)) } | |
36 | split / /, $mapping; | |
37 | ||
38 | push @CF, [$code, $mapping, $type, $name]; | |
39 | ||
40 | # Get the inverse fold for single-char mappings. | |
41 | $reverse_fold{pack "U0U*", hex $mapping} = pack "U0U*", hex $code if $type ne 'F'; | |
42 | } | |
80bf4fef | 43 | |
daf3b8d4 | 44 | close($fh) or die "$0 Couldn't close $CF"; |
9e55ce06 | 45 | |
daf3b8d4 KW |
46 | foreach my $test_ref (@CF) { |
47 | my ($code, $mapping, $type, $name) = @$test_ref; | |
48 | my $c = pack("U0U*", hex $code); | |
49 | my $f = pack("U0U*", map { hex } split " ", $mapping); | |
50 | my $f_length = length $f; | |
51 | foreach my $test ( | |
52 | qq[":$c:" =~ /:$c:/], | |
53 | qq[":$c:" =~ /:$c:/i], | |
54 | qq[":$c:" =~ /:[_$c]:/], # Place two chars in [] so doesn't get | |
55 | # optimized to a non-charclass | |
56 | qq[":$c:" =~ /:[_$c]:/i], | |
57 | qq[":$c:" =~ /:$f:/i], | |
58 | qq[":$f:" =~ /:$c:/i], | |
59 | ) { | |
60 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
9e55ce06 JH |
61 | } |
62 | ||
daf3b8d4 KW |
63 | # Certain tests weren't convenient to put in the list above since they are |
64 | # TODO's in multi-character folds. | |
65 | if ($f_length == 1) { | |
66 | ||
67 | # The qq loses the utf8ness of ":$f:". These tests are not about | |
68 | # finding bugs in utf8ness, so make sure it's utf8. | |
69 | my $test = qq[my \$s = ":$f:"; utf8::upgrade(\$s); \$s =~ /:[_$c]:/i]; | |
70 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
71 | $test = qq[":$c:" =~ /:[_$f]:/i]; | |
72 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
73 | } | |
74 | else { | |
75 | ||
03b99f60 | 76 | # There are two classes of multi-char folds that need more work. For |
daf3b8d4 KW |
77 | # example, |
78 | # ":ß:" =~ /:[_s]{2}:/i | |
79 | # ":ss:" =~ /:[_ß]:/i | |
80 | # | |
81 | # Some of the old tests for the second case happened to pass somewhat | |
82 | # coincidentally. But none would pass if changed to this. | |
83 | # ":SS:" =~ /:[_ß]:/i | |
84 | # | |
85 | # As the capital SS doesn't get folded. When those pass, it means | |
86 | # that the code has been changed to take into account folding in the | |
03b99f60 KW |
87 | # string, and all should pass, capitalized or not (this wouldn't be |
88 | # true for [^complemented character classes], for which the fold case | |
89 | # is better, but these aren't used in this .t currently. So, what is | |
90 | # done is to essentially upper-case the string for this class (but use | |
91 | # the reverse fold not uc(), as that is more correct) | |
daf3b8d4 KW |
92 | my $u; |
93 | for my $i (0 .. $f_length - 1) { | |
94 | my $cur_char = substr($f, $i, 1); | |
95 | $u .= $reverse_fold{$cur_char} || $cur_char; | |
96 | } | |
97 | my $test; | |
98 | ||
5a6441ac KW |
99 | # A multi-char fold should not match just one char; |
100 | # e.g., ":ß:" !~ /:[_s]:/i | |
101 | $test = qq[":$c:" !~ /:[_$f]:/i]; | |
102 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
103 | ||
daf3b8d4 | 104 | TODO: { # e.g., ":ß:" =~ /:[_s]{2}:/i |
03b99f60 KW |
105 | local $TODO = 'Multi-char fold in [character class]'; |
106 | ||
daf3b8d4 KW |
107 | $test = qq[":$c:" =~ /:[_$f]{$f_length}:/i]; |
108 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
109 | } | |
03b99f60 KW |
110 | |
111 | # e.g., ":SS:" =~ /:[_ß]:/i now pass, so TODO has been removed, but | |
112 | # since they use '$u', they are left out of the main loop | |
113 | $test = qq[ my \$s = ":$u:"; utf8::upgrade(\$s); \$s =~ /:[_$c]:/i]; | |
114 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
9e55ce06 | 115 | } |
9e55ce06 | 116 | } |
daf3b8d4 KW |
117 | |
118 | my $num_tests = curr_test() - 1; | |
119 | ||
120 | die qq[$0: failed to find casefoldings from "$CF"\n] unless $num_tests > 0; | |
121 | ||
122 | plan($num_tests); |