This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove MacOS classic support from File::{Copy,DosGlob,Find,Glob,stat}.
[perl5.git] / lib / feature / unicode_strings.t
CommitLineData
00f254e2
KW
1use warnings;
2use strict;
3
4BEGIN {
5 chdir 't' if -d 't';
6 @INC = '../lib';
7 require './test.pl';
8}
9
61fc5122 10plan(13312); # Determined by experimentation
00f254e2 11
61fc5122 12# Test the upper/lower/title case mappings for all characters 0-255.
00f254e2
KW
13
14# First compute the case mappings without resorting to the functions we're
15# testing.
16
17# Initialize the arrays so each $i maps to itself.
18my @posix_to_upper;
19for my $i (0 .. 255) {
20 $posix_to_upper[$i] = chr($i);
21}
22my @posix_to_lower
23= my @posix_to_title
24= my @latin1_to_upper
25= my @latin1_to_lower
26= my @latin1_to_title
27= @posix_to_upper;
28
29# Override the elements in the to_lower arrays that have different lower case
61fc5122 30# mappings
00f254e2
KW
31for my $i (0x41 .. 0x5A) {
32 $posix_to_lower[$i] = chr(ord($posix_to_lower[$i]) + 32);
33 $latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
34}
35
36# Same for upper and title
37for my $i (0x61 .. 0x7A) {
38 $posix_to_upper[$i] = chr(ord($posix_to_upper[$i]) - 32);
39 $latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
40 $posix_to_title[$i] = chr(ord($posix_to_title[$i]) - 32);
41 $latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
42}
43
44# And the same for those in the latin1 range
45for my $i (0xC0 .. 0xD6, 0xD8 .. 0xDE) {
46 $latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
47}
48for my $i (0xE0 .. 0xF6, 0xF8 .. 0xFE) {
49 $latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
50 $latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
51}
52
53# Override the abnormal cases.
54$latin1_to_upper[0xB5] = chr(0x39C);
55$latin1_to_title[0xB5] = chr(0x39C);
56$latin1_to_upper[0xDF] = 'SS';
57$latin1_to_title[0xDF] = 'Ss';
58$latin1_to_upper[0xFF] = chr(0x178);
59$latin1_to_title[0xFF] = chr(0x178);
60
61my $repeat = 25; # Length to make strings.
62
63# Create hashes of strings in several ranges, both for uc and lc.
64my %posix;
65$posix{'uc'} = 'A' x $repeat;
66$posix{'lc'} = 'a' x $repeat ;
67
68my %cyrillic;
69$cyrillic{'uc'} = chr(0x42F) x $repeat;
70$cyrillic{'lc'} = chr(0x44F) x $repeat;
71
72my %latin1;
73$latin1{'uc'} = chr(0xD8) x $repeat;
74$latin1{'lc'} = chr(0xF8) x $repeat;
75
76my %empty;
77$empty{'lc'} = $empty{'uc'} = "";
78
79# Loop so prefix each character being tested with nothing, and the various
80# strings; then loop for suffixes of those strings as well.
81for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) {
82 for my $suffix (\%empty, \%posix, \%cyrillic, \%latin1) {
83 for my $i (0 .. 255) { # For each possible posix or latin1 character
61fc5122 84 my $cp = sprintf "U+%04X", $i;
00f254e2
KW
85
86 # First try using latin1 (Unicode) semantics.
1863b879 87 use feature "unicode_strings";
00f254e2 88
61fc5122 89 my $phrase = 'with uni8bit';
00f254e2
KW
90 my $char = chr($i);
91 my $pre_lc = $prefix->{'lc'};
92 my $pre_uc = $prefix->{'uc'};
93 my $post_lc = $suffix->{'lc'};
94 my $post_uc = $suffix->{'uc'};
95 my $to_upper = $pre_lc . $char . $post_lc;
96 my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc;
97 my $to_lower = $pre_uc . $char . $post_uc;
98 my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc;
99
100 is (uc($to_upper), $expected_upper,
61fc5122 101 display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
00f254e2 102 is (lc($to_lower), $expected_lower,
61fc5122 103 display("$cp: $phrase: lc($to_lower) eq $expected_lower"));
00f254e2
KW
104
105 if ($pre_uc eq "") { # Title case if null prefix.
106 my $expected_title = $latin1_to_title[$i] . $post_lc;
107 is (ucfirst($to_upper), $expected_title,
61fc5122 108 display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
00f254e2
KW
109 my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc;
110 is (lcfirst($to_lower), $expected_lcfirst,
61fc5122 111 display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
00f254e2
KW
112 }
113
114 # Then try with posix semantics.
1863b879 115 no feature "unicode_strings";
61fc5122 116 $phrase = 'no uni8bit';
00f254e2
KW
117
118 # These don't contribute anything in this case.
119 next if $suffix == \%cyrillic;
120 next if $suffix == \%latin1;
121 next if $prefix == \%cyrillic;
122 next if $prefix == \%latin1;
123
124 $expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc;
125 $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc;
126
127 is (uc($to_upper), $expected_upper,
61fc5122 128 display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
00f254e2 129 is (lc($to_lower), $expected_lower,
61fc5122 130 display("$cp: $phrase: lc($to_lower) eq $expected_lower"));
00f254e2
KW
131
132 if ($pre_uc eq "") {
133 my $expected_title = $posix_to_title[$i] . $post_lc;
134 is (ucfirst($to_upper), $expected_title,
61fc5122 135 display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
00f254e2
KW
136 my $expected_lcfirst = $posix_to_lower[$i] . $post_uc;
137 is (lcfirst($to_lower), $expected_lcfirst,
61fc5122 138 display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
00f254e2
KW
139 }
140 }
141 }
142}