Commit | Line | Data |
---|---|---|
00f254e2 KW |
1 | use warnings; |
2 | use strict; | |
3 | ||
4 | BEGIN { | |
5 | chdir 't' if -d 't'; | |
6 | @INC = '../lib'; | |
7 | require './test.pl'; | |
8 | } | |
9 | ||
a12cf05f | 10 | plan(20736); # Determined by experimentation |
00f254e2 | 11 | |
a12cf05f KW |
12 | # In this section, test the upper/lower/title case mappings for all characters |
13 | # 0-255. | |
00f254e2 KW |
14 | |
15 | # First compute the case mappings without resorting to the functions we're | |
16 | # testing. | |
17 | ||
18 | # Initialize the arrays so each $i maps to itself. | |
19 | my @posix_to_upper; | |
20 | for my $i (0 .. 255) { | |
21 | $posix_to_upper[$i] = chr($i); | |
22 | } | |
23 | my @posix_to_lower | |
24 | = my @posix_to_title | |
25 | = my @latin1_to_upper | |
26 | = my @latin1_to_lower | |
27 | = my @latin1_to_title | |
28 | = @posix_to_upper; | |
29 | ||
4e89eca0 KW |
30 | # Override the elements in the to_lower arrays that have different standard |
31 | # lower case mappings. (standard meaning they are 32 numbers apart) | |
32 | for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) { | |
33 | my $upper_ord = ord_latin1_to_native $i; | |
34 | my $lower_ord = ord_latin1_to_native($i + 32); | |
35 | ||
36 | $latin1_to_lower[$upper_ord] = chr($lower_ord); | |
37 | ||
38 | next if $i > 127; | |
39 | ||
40 | $posix_to_lower[$upper_ord] = chr($lower_ord); | |
00f254e2 KW |
41 | } |
42 | ||
43 | # Same for upper and title | |
4e89eca0 KW |
44 | for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) { |
45 | my $lower_ord = ord_latin1_to_native $i; | |
46 | my $upper_ord = ord_latin1_to_native($i - 32); | |
00f254e2 | 47 | |
4e89eca0 KW |
48 | $latin1_to_upper[$lower_ord] = chr($upper_ord); |
49 | $latin1_to_title[$lower_ord] = chr($upper_ord); | |
50 | ||
51 | next if $i > 127; | |
52 | ||
53 | $posix_to_upper[$lower_ord] = chr($upper_ord); | |
54 | $posix_to_title[$lower_ord] = chr($upper_ord); | |
00f254e2 KW |
55 | } |
56 | ||
57 | # Override the abnormal cases. | |
4e89eca0 KW |
58 | $latin1_to_upper[ord_latin1_to_native 0xB5] = chr(0x39C); |
59 | $latin1_to_title[ord_latin1_to_native 0xB5] = chr(0x39C); | |
60 | $latin1_to_upper[ord_latin1_to_native 0xDF] = 'SS'; | |
61 | $latin1_to_title[ord_latin1_to_native 0xDF] = 'Ss'; | |
62 | $latin1_to_upper[ord_latin1_to_native 0xFF] = chr(0x178); | |
63 | $latin1_to_title[ord_latin1_to_native 0xFF] = chr(0x178); | |
00f254e2 KW |
64 | |
65 | my $repeat = 25; # Length to make strings. | |
66 | ||
67 | # Create hashes of strings in several ranges, both for uc and lc. | |
68 | my %posix; | |
69 | $posix{'uc'} = 'A' x $repeat; | |
70 | $posix{'lc'} = 'a' x $repeat ; | |
71 | ||
72 | my %cyrillic; | |
73 | $cyrillic{'uc'} = chr(0x42F) x $repeat; | |
74 | $cyrillic{'lc'} = chr(0x44F) x $repeat; | |
75 | ||
76 | my %latin1; | |
4e89eca0 KW |
77 | $latin1{'uc'} = chr(ord_latin1_to_native 0xD8) x $repeat; |
78 | $latin1{'lc'} = chr(ord_latin1_to_native 0xF8) x $repeat; | |
00f254e2 KW |
79 | |
80 | my %empty; | |
81 | $empty{'lc'} = $empty{'uc'} = ""; | |
82 | ||
83 | # Loop so prefix each character being tested with nothing, and the various | |
84 | # strings; then loop for suffixes of those strings as well. | |
85 | for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) { | |
86 | for my $suffix (\%empty, \%posix, \%cyrillic, \%latin1) { | |
87 | for my $i (0 .. 255) { # For each possible posix or latin1 character | |
61fc5122 | 88 | my $cp = sprintf "U+%04X", $i; |
00f254e2 KW |
89 | |
90 | # First try using latin1 (Unicode) semantics. | |
9fab35f2 | 91 | use feature "unicode_strings"; |
00f254e2 | 92 | |
1e9e76cc | 93 | my $phrase = 'in uni8bit'; |
00f254e2 KW |
94 | my $char = chr($i); |
95 | my $pre_lc = $prefix->{'lc'}; | |
96 | my $pre_uc = $prefix->{'uc'}; | |
97 | my $post_lc = $suffix->{'lc'}; | |
98 | my $post_uc = $suffix->{'uc'}; | |
99 | my $to_upper = $pre_lc . $char . $post_lc; | |
100 | my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc; | |
101 | my $to_lower = $pre_uc . $char . $post_uc; | |
102 | my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc; | |
103 | ||
104 | is (uc($to_upper), $expected_upper, | |
1e9e76cc | 105 | display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper")); |
00f254e2 | 106 | is (lc($to_lower), $expected_lower, |
1e9e76cc | 107 | display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower")); |
00f254e2 KW |
108 | |
109 | if ($pre_uc eq "") { # Title case if null prefix. | |
110 | my $expected_title = $latin1_to_title[$i] . $post_lc; | |
111 | is (ucfirst($to_upper), $expected_title, | |
1e9e76cc | 112 | display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title")); |
00f254e2 KW |
113 | my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc; |
114 | is (lcfirst($to_lower), $expected_lcfirst, | |
1e9e76cc | 115 | display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst")); |
00f254e2 KW |
116 | } |
117 | ||
118 | # Then try with posix semantics. | |
1863b879 | 119 | no feature "unicode_strings"; |
61fc5122 | 120 | $phrase = 'no uni8bit'; |
00f254e2 KW |
121 | |
122 | # These don't contribute anything in this case. | |
123 | next if $suffix == \%cyrillic; | |
124 | next if $suffix == \%latin1; | |
125 | next if $prefix == \%cyrillic; | |
126 | next if $prefix == \%latin1; | |
127 | ||
128 | $expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc; | |
129 | $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc; | |
130 | ||
131 | is (uc($to_upper), $expected_upper, | |
1e9e76cc | 132 | display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper")); |
00f254e2 | 133 | is (lc($to_lower), $expected_lower, |
1e9e76cc | 134 | display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower")); |
00f254e2 KW |
135 | |
136 | if ($pre_uc eq "") { | |
137 | my $expected_title = $posix_to_title[$i] . $post_lc; | |
138 | is (ucfirst($to_upper), $expected_title, | |
1e9e76cc | 139 | display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title")); |
00f254e2 KW |
140 | my $expected_lcfirst = $posix_to_lower[$i] . $post_uc; |
141 | is (lcfirst($to_lower), $expected_lcfirst, | |
1e9e76cc | 142 | display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst")); |
00f254e2 KW |
143 | } |
144 | } | |
145 | } | |
146 | } | |
a12cf05f | 147 | |
4e89eca0 KW |
148 | # In this section test that \w, \s, and \b (and complements) work correctly. |
149 | # These are the only character classes affected by this pragma. Above ASCII | |
150 | # range Latin-1 characters are in \w and \s iff the pragma is on. | |
a12cf05f | 151 | |
4e89eca0 KW |
152 | # Construct the expected full Latin1 values without using anything we're |
153 | # testing. All these were determined manually by looking in the manual. | |
154 | # Boolean: is w[$i] a \w character? | |
a12cf05f | 155 | my @w = (0) x 256; |
4e89eca0 KW |
156 | for my $i ( 0x30 .. 0x39, # 0-9 |
157 | 0x41 .. 0x5a, # A-Z | |
158 | 0x61 .. 0x7a, # a-z | |
159 | 0x5F, # _ | |
160 | 0xAA, # FEMININE ORDINAL INDICATOR | |
161 | 0xB5, # MICRO SIGN | |
162 | 0xBA, # MASCULINE ORDINAL INDICATOR | |
163 | 0xC0 .. 0xD6, # various | |
164 | 0xD8 .. 0xF6, # various | |
165 | 0xF8 .. 0xFF, # various | |
166 | ) | |
167 | { | |
168 | $w[ord_latin1_to_native $i] = 1; | |
169 | } | |
170 | ||
171 | # Boolean: is s[$i] a \s character? | |
a12cf05f | 172 | my @s = (0) x 256; |
4e89eca0 KW |
173 | $s[ord_latin1_to_native 0x09] = 1; # Tab |
174 | $s[ord_latin1_to_native 0x0A] = 1; # LF | |
075b9d7d | 175 | $s[ord_latin1_to_native 0x0B] = 1; # VT |
4e89eca0 KW |
176 | $s[ord_latin1_to_native 0x0C] = 1; # FF |
177 | $s[ord_latin1_to_native 0x0D] = 1; # CR | |
178 | $s[ord_latin1_to_native 0x20] = 1; # SPACE | |
179 | $s[ord_latin1_to_native 0x85] = 1; # NEL | |
180 | $s[ord_latin1_to_native 0xA0] = 1; # NO BREAK SPACE | |
a12cf05f KW |
181 | |
182 | for my $i (0 .. 255) { | |
183 | my $char = chr($i); | |
184 | my $hex_i = sprintf "%02X", $i; | |
185 | foreach my $which (\@s, \@w) { | |
186 | my $basic_name; | |
187 | if ($which == \@s) { | |
188 | $basic_name = 's'; | |
189 | } else { | |
190 | $basic_name = 'w' | |
191 | } | |
192 | ||
193 | # Test \w \W \s \S | |
194 | foreach my $complement (0, 1) { | |
195 | my $name = '\\' . (($complement) ? uc($basic_name) : $basic_name); | |
196 | ||
197 | # in and out of [...] | |
198 | foreach my $charclass (0, 1) { | |
199 | ||
200 | # And like [^...] or just plain [...] | |
201 | foreach my $complement_class (0, 1) { | |
202 | next if ! $charclass && $complement_class; | |
203 | ||
204 | # Start with the boolean as to if the character is in the | |
205 | # class, and then complement as needed. | |
206 | my $expect_success = $which->[$i]; | |
207 | $expect_success = ! $expect_success if $complement; | |
208 | $expect_success = ! $expect_success if $complement_class; | |
209 | ||
210 | my $test = $name; | |
211 | $test = "^$test" if $complement_class; | |
212 | $test = "[$test]" if $charclass; | |
213 | $test = "^$test\$"; | |
214 | ||
215 | use feature 'unicode_strings'; | |
216 | my $prefix = "in uni8bit; Verify chr(0x$hex_i)"; | |
217 | if ($expect_success) { | |
218 | like($char, qr/$test/, display("$prefix =~ qr/$test/")); | |
219 | } else { | |
220 | unlike($char, qr/$test/, display("$prefix !~ qr/$test/")); | |
221 | } | |
222 | ||
223 | no feature 'unicode_strings'; | |
224 | $prefix = "no uni8bit; Verify chr(0x$hex_i)"; | |
225 | ||
226 | # With the legacy, nothing above 128 should be in the | |
227 | # class | |
228 | if ($i >= 128) { | |
229 | $expect_success = 0; | |
230 | $expect_success = ! $expect_success if $complement; | |
231 | $expect_success = ! $expect_success if $complement_class; | |
232 | } | |
233 | if ($expect_success) { | |
234 | like($char, qr/$test/, display("$prefix =~ qr/$test/")); | |
235 | } else { | |
236 | unlike($char, qr/$test/, display("$prefix !~ qr/$test/")); | |
237 | } | |
238 | } | |
239 | } | |
240 | } | |
241 | } | |
242 | ||
243 | # Similarly for \b and \B. | |
244 | foreach my $complement (0, 1) { | |
245 | my $name = '\\' . (($complement) ? 'B' : 'b'); | |
246 | my $expect_success = ! $w[$i]; # \b is complement of \w | |
247 | $expect_success = ! $expect_success if $complement; | |
248 | ||
249 | my $string = "a$char"; | |
250 | my $test = "(^a$name\\x{$hex_i}\$)"; | |
251 | ||
252 | use feature 'unicode_strings'; | |
253 | my $prefix = "in uni8bit; Verify $string"; | |
254 | if ($expect_success) { | |
255 | like($string, qr/$test/, display("$prefix =~ qr/$test/")); | |
256 | } else { | |
257 | unlike($string, qr/$test/, display("$prefix !~ qr/$test/")); | |
258 | } | |
259 | ||
260 | no feature 'unicode_strings'; | |
261 | $prefix = "no uni8bit; Verify $string"; | |
262 | if ($i >= 128) { | |
263 | $expect_success = 1; | |
264 | $expect_success = ! $expect_success if $complement; | |
265 | } | |
266 | if ($expect_success) { | |
267 | like($string, qr/$test/, display("$prefix =~ qr/$test/")); | |
268 | like($string, qr/$test/, display("$prefix =~ qr/$test/")); | |
269 | } else { | |
270 | unlike($string, qr/$test/, display("$prefix !~ qr/$test/")); | |
271 | } | |
272 | } | |
273 | } |