10 plan(20736); # Determined by experimentation
12 # In this section, test the upper/lower/title case mappings for all characters
15 # First compute the case mappings without resorting to the functions we're
18 # Initialize the arrays so each $i maps to itself.
20 for my $i (0 .. 255) {
21 $posix_to_upper[$i] = chr($i);
30 # Override the elements in the to_lower arrays that have different standard
31 # lower case mappings. (standard meaning they are 32 numbers apart)
32 for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) {
33 my $upper_ord = utf8::unicode_to_native $i;
34 my $lower_ord = utf8::unicode_to_native($i + 32);
36 $latin1_to_lower[$upper_ord] = chr($lower_ord);
40 $posix_to_lower[$upper_ord] = chr($lower_ord);
43 # Same for upper and title
44 for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) {
45 my $lower_ord = utf8::unicode_to_native $i;
46 my $upper_ord = utf8::unicode_to_native($i - 32);
48 $latin1_to_upper[$lower_ord] = chr($upper_ord);
49 $latin1_to_title[$lower_ord] = chr($upper_ord);
53 $posix_to_upper[$lower_ord] = chr($upper_ord);
54 $posix_to_title[$lower_ord] = chr($upper_ord);
57 # Override the abnormal cases.
58 $latin1_to_upper[utf8::unicode_to_native 0xB5] = chr(0x39C);
59 $latin1_to_title[utf8::unicode_to_native 0xB5] = chr(0x39C);
60 $latin1_to_upper[utf8::unicode_to_native 0xDF] = 'SS';
61 $latin1_to_title[utf8::unicode_to_native 0xDF] = 'Ss';
62 $latin1_to_upper[utf8::unicode_to_native 0xFF] = chr(0x178);
63 $latin1_to_title[utf8::unicode_to_native 0xFF] = chr(0x178);
65 my $repeat = 25; # Length to make strings.
67 # Create hashes of strings in several ranges, both for uc and lc.
69 $posix{'uc'} = 'A' x $repeat;
70 $posix{'lc'} = 'a' x $repeat ;
73 $cyrillic{'uc'} = chr(0x42F) x $repeat;
74 $cyrillic{'lc'} = chr(0x44F) x $repeat;
77 $latin1{'uc'} = chr(utf8::unicode_to_native 0xD8) x $repeat;
78 $latin1{'lc'} = chr(utf8::unicode_to_native 0xF8) x $repeat;
81 $empty{'lc'} = $empty{'uc'} = "";
83 # Loop so prefix each character being tested with nothing, and the various
84 # strings; then loop for suffixes of those strings as well.
85 for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) {
86 for my $suffix (\%empty, \%posix, \%cyrillic, \%latin1) {
87 for my $i (0 .. 255) { # For each possible posix or latin1 character
88 my $cp = sprintf "U+%04X", $i;
90 # First try using latin1 (Unicode) semantics.
91 use feature "unicode_strings";
93 my $phrase = 'in uni8bit';
95 my $pre_lc = $prefix->{'lc'};
96 my $pre_uc = $prefix->{'uc'};
97 my $post_lc = $suffix->{'lc'};
98 my $post_uc = $suffix->{'uc'};
99 my $to_upper = $pre_lc . $char . $post_lc;
100 my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc;
101 my $to_lower = $pre_uc . $char . $post_uc;
102 my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc;
104 is (uc($to_upper), $expected_upper,
105 display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper"));
106 is (lc($to_lower), $expected_lower,
107 display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower"));
109 if ($pre_uc eq "") { # Title case if null prefix.
110 my $expected_title = $latin1_to_title[$i] . $post_lc;
111 is (ucfirst($to_upper), $expected_title,
112 display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title"));
113 my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc;
114 is (lcfirst($to_lower), $expected_lcfirst,
115 display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst"));
118 # Then try with posix semantics.
119 no feature "unicode_strings";
120 $phrase = 'no uni8bit';
122 # These don't contribute anything in this case.
123 next if $suffix == \%cyrillic;
124 next if $suffix == \%latin1;
125 next if $prefix == \%cyrillic;
126 next if $prefix == \%latin1;
128 $expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc;
129 $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc;
131 is (uc($to_upper), $expected_upper,
132 display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper"));
133 is (lc($to_lower), $expected_lower,
134 display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower"));
137 my $expected_title = $posix_to_title[$i] . $post_lc;
138 is (ucfirst($to_upper), $expected_title,
139 display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title"));
140 my $expected_lcfirst = $posix_to_lower[$i] . $post_uc;
141 is (lcfirst($to_lower), $expected_lcfirst,
142 display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst"));
148 # In this section test that \w, \s, and \b (and complements) work correctly.
149 # These are the only character classes affected by this pragma. Above ASCII
150 # range Latin-1 characters are in \w and \s iff the pragma is on.
152 # Construct the expected full Latin1 values without using anything we're
153 # testing. All these were determined manually by looking in the manual.
154 # Boolean: is w[$i] a \w character?
156 for my $i ( 0x30 .. 0x39, # 0-9
160 0xAA, # FEMININE ORDINAL INDICATOR
162 0xBA, # MASCULINE ORDINAL INDICATOR
163 0xC0 .. 0xD6, # various
164 0xD8 .. 0xF6, # various
165 0xF8 .. 0xFF, # various
168 $w[utf8::unicode_to_native $i] = 1;
171 # Boolean: is s[$i] a \s character?
173 $s[utf8::unicode_to_native 0x09] = 1; # Tab
174 $s[utf8::unicode_to_native 0x0A] = 1; # LF
175 $s[utf8::unicode_to_native 0x0B] = 1; # VT
176 $s[utf8::unicode_to_native 0x0C] = 1; # FF
177 $s[utf8::unicode_to_native 0x0D] = 1; # CR
178 $s[utf8::unicode_to_native 0x20] = 1; # SPACE
179 $s[utf8::unicode_to_native 0x85] = 1; # NEL
180 $s[utf8::unicode_to_native 0xA0] = 1; # NO BREAK SPACE
182 for my $i (0 .. 255) {
184 my $hex_i = sprintf "%02X", $i;
185 foreach my $which (\@s, \@w) {
194 foreach my $complement (0, 1) {
195 my $name = '\\' . (($complement) ? uc($basic_name) : $basic_name);
197 # in and out of [...]
198 foreach my $charclass (0, 1) {
200 # And like [^...] or just plain [...]
201 foreach my $complement_class (0, 1) {
202 next if ! $charclass && $complement_class;
204 # Start with the boolean as to if the character is in the
205 # class, and then complement as needed.
206 my $expect_success = $which->[$i];
207 $expect_success = ! $expect_success if $complement;
208 $expect_success = ! $expect_success if $complement_class;
211 $test = "^$test" if $complement_class;
212 $test = "[$test]" if $charclass;
215 use feature 'unicode_strings';
216 my $prefix = "in uni8bit; Verify chr(0x$hex_i)";
217 if ($expect_success) {
218 like($char, qr/$test/, display("$prefix =~ qr/$test/"));
220 unlike($char, qr/$test/, display("$prefix !~ qr/$test/"));
223 no feature 'unicode_strings';
224 $prefix = "no uni8bit; Verify chr(0x$hex_i)";
226 # With the legacy, nothing above 128 should be in the
228 if (utf8::native_to_unicode($i) >= 128) {
230 $expect_success = ! $expect_success if $complement;
231 $expect_success = ! $expect_success if $complement_class;
233 if ($expect_success) {
234 like($char, qr/$test/, display("$prefix =~ qr/$test/"));
236 unlike($char, qr/$test/, display("$prefix !~ qr/$test/"));
243 # Similarly for \b and \B.
244 foreach my $complement (0, 1) {
245 my $name = '\\' . (($complement) ? 'B' : 'b');
246 my $expect_success = ! $w[$i]; # \b is complement of \w
247 $expect_success = ! $expect_success if $complement;
249 my $string = "a$char";
250 my $test = "(^a$name\\x{$hex_i}\$)";
252 use feature 'unicode_strings';
253 my $prefix = "in uni8bit; Verify $string";
254 if ($expect_success) {
255 like($string, qr/$test/, display("$prefix =~ qr/$test/"));
257 unlike($string, qr/$test/, display("$prefix !~ qr/$test/"));
260 no feature 'unicode_strings';
261 $prefix = "no uni8bit; Verify $string";
262 if (utf8::native_to_unicode($i) >= 128) {
264 $expect_success = ! $expect_success if $complement;
266 if ($expect_success) {
267 like($string, qr/$test/, display("$prefix =~ qr/$test/"));
268 like($string, qr/$test/, display("$prefix =~ qr/$test/"));
270 unlike($string, qr/$test/, display("$prefix !~ qr/$test/"));