Commit | Line | Data |
---|---|---|
daf3b8d4 KW |
1 | use strict; |
2 | use warnings; | |
3 | ||
4 | # re/fold_grind.t has more complex tests, but doesn't test every fold | |
5 | ||
9e55ce06 JH |
6 | BEGIN { |
7 | chdir 't' if -d 't'; | |
8 | @INC = '../lib'; | |
daf3b8d4 | 9 | require './test.pl'; |
9e55ce06 JH |
10 | } |
11 | ||
daf3b8d4 KW |
12 | binmode *STDOUT, ":utf8"; |
13 | ||
9e55ce06 | 14 | use File::Spec; |
daf3b8d4 | 15 | our $TODO; |
9e55ce06 | 16 | |
daf3b8d4 KW |
17 | plan("no_plan"); |
18 | ||
19 | # Read in the official case folding definitions. | |
9e55ce06 JH |
20 | my $CF = File::Spec->catfile(File::Spec->catdir(File::Spec->updir, |
21 | "lib", "unicore"), | |
551b6b6f | 22 | "CaseFolding.txt"); |
9e55ce06 | 23 | |
daf3b8d4 KW |
24 | die qq[$0: failed to open "$CF": $!\n] if ! open(my $fh, "<", $CF); |
25 | ||
26 | my @CF; | |
27 | my %reverse_fold; | |
28 | while (<$fh>) { | |
29 | # Skip S since we are going for 'F'ull case folding. I is obsolete starting | |
30 | # with Unicode 3.2, but leaving it in does no harm, and allows backward | |
31 | # compatibility | |
32 | next unless my ($code, $type, $mapping, $name) = $_ =~ | |
33 | /^([0-9A-F]+); ([CFI]); ((?:[0-9A-F]+)(?: [0-9A-F]+)*); \# (.+)/; | |
34 | ||
35 | # Convert any 0-255 range chars to native. | |
36 | $code = sprintf("%04X", ord_latin1_to_native(hex $code)) if hex $code < 0x100; | |
37 | $mapping = join " ", map { $_ = | |
38 | sprintf("%04X", ord_latin1_to_native(hex $_)) } | |
39 | split / /, $mapping; | |
40 | ||
41 | push @CF, [$code, $mapping, $type, $name]; | |
42 | ||
43 | # Get the inverse fold for single-char mappings. | |
44 | $reverse_fold{pack "U0U*", hex $mapping} = pack "U0U*", hex $code if $type ne 'F'; | |
45 | } | |
80bf4fef | 46 | |
daf3b8d4 | 47 | close($fh) or die "$0 Couldn't close $CF"; |
9e55ce06 | 48 | |
daf3b8d4 KW |
49 | foreach my $test_ref (@CF) { |
50 | my ($code, $mapping, $type, $name) = @$test_ref; | |
51 | my $c = pack("U0U*", hex $code); | |
52 | my $f = pack("U0U*", map { hex } split " ", $mapping); | |
53 | my $f_length = length $f; | |
54 | foreach my $test ( | |
55 | qq[":$c:" =~ /:$c:/], | |
56 | qq[":$c:" =~ /:$c:/i], | |
57 | qq[":$c:" =~ /:[_$c]:/], # Place two chars in [] so doesn't get | |
58 | # optimized to a non-charclass | |
59 | qq[":$c:" =~ /:[_$c]:/i], | |
60 | qq[":$c:" =~ /:$f:/i], | |
61 | qq[":$f:" =~ /:$c:/i], | |
62 | ) { | |
63 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
9e55ce06 JH |
64 | } |
65 | ||
daf3b8d4 KW |
66 | # Certain tests weren't convenient to put in the list above since they are |
67 | # TODO's in multi-character folds. | |
68 | if ($f_length == 1) { | |
69 | ||
70 | # The qq loses the utf8ness of ":$f:". These tests are not about | |
71 | # finding bugs in utf8ness, so make sure it's utf8. | |
72 | my $test = qq[my \$s = ":$f:"; utf8::upgrade(\$s); \$s =~ /:[_$c]:/i]; | |
73 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
74 | $test = qq[":$c:" =~ /:[_$f]:/i]; | |
75 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
76 | } | |
77 | else { | |
78 | ||
79 | # There are two classes of multi-char folds that don't pass. For | |
80 | # example, | |
81 | # ":ß:" =~ /:[_s]{2}:/i | |
82 | # ":ss:" =~ /:[_ß]:/i | |
83 | # | |
84 | # Some of the old tests for the second case happened to pass somewhat | |
85 | # coincidentally. But none would pass if changed to this. | |
86 | # ":SS:" =~ /:[_ß]:/i | |
87 | # | |
88 | # As the capital SS doesn't get folded. When those pass, it means | |
89 | # that the code has been changed to take into account folding in the | |
90 | # string, and all should pass, capitalized or not. So, what is done | |
91 | # is to essentially upper-case the string for this class (but use the | |
92 | # reverse fold not uc(), as that is more correct) | |
93 | my $u; | |
94 | for my $i (0 .. $f_length - 1) { | |
95 | my $cur_char = substr($f, $i, 1); | |
96 | $u .= $reverse_fold{$cur_char} || $cur_char; | |
97 | } | |
98 | my $test; | |
99 | ||
100 | local $TODO = 'Multi-char fold in [character class]'; | |
101 | ||
102 | TODO: { # e.g., ":ß:" !~ /:[_s]:/i # A multi-char fold should not | |
103 | # match just one char | |
104 | $test = qq[":$c:" !~ /:[_$f]:/i]; | |
105 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
106 | } | |
107 | TODO: { # e.g., ":ß:" =~ /:[_s]{2}:/i | |
108 | $test = qq[":$c:" =~ /:[_$f]{$f_length}:/i]; | |
109 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
110 | } | |
111 | TODO: { # e.g., ":SS:" =~ /:[_ß]:/i | |
112 | $test = qq[ my \$s = ":$u:"; utf8::upgrade(\$s); \$s =~ /:[_$c]:/i]; | |
113 | ok eval $test, "$code - $name - $mapping - $type - $test"; | |
114 | } | |
9e55ce06 | 115 | } |
9e55ce06 | 116 | } |
daf3b8d4 KW |
117 | |
118 | my $num_tests = curr_test() - 1; | |
119 | ||
120 | die qq[$0: failed to find casefoldings from "$CF"\n] unless $num_tests > 0; | |
121 | ||
122 | plan($num_tests); |