Commit | Line | Data |
---|---|---|
b0f2b690 JH |
1 | #!./perl |
2 | ||
3a2263fe RGS |
3 | BEGIN { |
4 | chdir 't'; | |
5 | @INC = '../lib'; | |
6 | require './test.pl'; | |
983ffd37 | 7 | } |
b0f2b690 | 8 | |
aa4af542 | 9 | plan tests => 93; |
c6502f5c RGS |
10 | |
11 | is(lc(undef), "", "lc(undef) is ''"); | |
12 | is(lcfirst(undef), "", "lcfirst(undef) is ''"); | |
13 | is(uc(undef), "", "uc(undef) is ''"); | |
14 | is(ucfirst(undef), "", "ucfirst(undef) is ''"); | |
3a2263fe | 15 | |
b0f2b690 JH |
16 | $a = "HELLO.* world"; |
17 | $b = "hello.* WORLD"; | |
18 | ||
3a2263fe RGS |
19 | is("\Q$a\E." , "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world'); |
20 | is("\u$a" , "HELLO\.\* world", '\u'); | |
21 | is("\l$a" , "hELLO\.\* world", '\l'); | |
22 | is("\U$a" , "HELLO\.\* WORLD", '\U'); | |
23 | is("\L$a" , "hello\.\* world", '\L'); | |
24 | ||
25 | is(quotemeta($a) , "HELLO\\.\\*\\ world", 'quotemeta'); | |
26 | is(ucfirst($a) , "HELLO\.\* world", 'ucfirst'); | |
27 | is(lcfirst($a) , "hELLO\.\* world", 'lcfirst'); | |
28 | is(uc($a) , "HELLO\.\* WORLD", 'uc'); | |
29 | is(lc($a) , "hello\.\* world", 'lc'); | |
30 | ||
31 | is("\Q$b\E." , "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD'); | |
32 | is("\u$b" , "Hello\.\* WORLD", '\u'); | |
33 | is("\l$b" , "hello\.\* WORLD", '\l'); | |
34 | is("\U$b" , "HELLO\.\* WORLD", '\U'); | |
35 | is("\L$b" , "hello\.\* world", '\L'); | |
36 | ||
37 | is(quotemeta($b) , "hello\\.\\*\\ WORLD", 'quotemeta'); | |
38 | is(ucfirst($b) , "Hello\.\* WORLD", 'ucfirst'); | |
39 | is(lcfirst($b) , "hello\.\* WORLD", 'lcfirst'); | |
40 | is(uc($b) , "HELLO\.\* WORLD", 'uc'); | |
41 | is(lc($b) , "hello\.\* world", 'lc'); | |
983ffd37 JH |
42 | |
43 | # \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is | |
7e965bc5 | 44 | # \x{101}, LATIN SMALL LETTER A WITH MACRON. |
b0f2b690 | 45 | |
2533d950 JH |
46 | $a = "\x{100}\x{101}Aa"; |
47 | $b = "\x{101}\x{100}aA"; | |
b0f2b690 | 48 | |
3a2263fe RGS |
49 | is("\Q$a\E." , "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa'); |
50 | is("\u$a" , "\x{100}\x{101}Aa", '\u'); | |
51 | is("\l$a" , "\x{101}\x{101}Aa", '\l'); | |
52 | is("\U$a" , "\x{100}\x{100}AA", '\U'); | |
53 | is("\L$a" , "\x{101}\x{101}aa", '\L'); | |
54 | ||
55 | is(quotemeta($a) , "\x{100}\x{101}Aa", 'quotemeta'); | |
56 | is(ucfirst($a) , "\x{100}\x{101}Aa", 'ucfirst'); | |
57 | is(lcfirst($a) , "\x{101}\x{101}Aa", 'lcfirst'); | |
58 | is(uc($a) , "\x{100}\x{100}AA", 'uc'); | |
59 | is(lc($a) , "\x{101}\x{101}aa", 'lc'); | |
60 | ||
61 | is("\Q$b\E." , "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA'); | |
62 | is("\u$b" , "\x{100}\x{100}aA", '\u'); | |
63 | is("\l$b" , "\x{101}\x{100}aA", '\l'); | |
64 | is("\U$b" , "\x{100}\x{100}AA", '\U'); | |
65 | is("\L$b" , "\x{101}\x{101}aa", '\L'); | |
66 | ||
67 | is(quotemeta($b) , "\x{101}\x{100}aA", 'quotemeta'); | |
68 | is(ucfirst($b) , "\x{100}\x{100}aA", 'ucfirst'); | |
69 | is(lcfirst($b) , "\x{101}\x{100}aA", 'lcfirst'); | |
70 | is(uc($b) , "\x{100}\x{100}AA", 'uc'); | |
71 | is(lc($b) , "\x{101}\x{101}aa", 'lc'); | |
983ffd37 JH |
72 | |
73 | # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; | |
74 | # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is | |
75 | # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. | |
76 | ||
8a38a836 | 77 | is(latin1_to_native("\U\x{DF}aB\x{149}cD"), latin1_to_native("SSAB\x{2BC}NCD"), |
c811e616 | 78 | "multicharacter uppercase"); |
983ffd37 JH |
79 | |
80 | # The \x{DF} is its own lowercase, ditto for \x{149}. | |
81 | # There are no single character -> multiple characters lowercase mappings. | |
b0f2b690 | 82 | |
8a38a836 | 83 | is(latin1_to_native("\L\x{DF}aB\x{149}cD"), latin1_to_native("\x{DF}ab\x{149}cd"), |
c811e616 | 84 | "multicharacter lowercase"); |
b0f2b690 | 85 | |
44bc797b JH |
86 | # titlecase is used for \u / ucfirst. |
87 | ||
88 | # \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is | |
89 | # \x{535}\x{582} ARMENIAN CAPITAL LETTER ECH + ARMENIAN SMALL LETTER YIWN | |
90 | # while its lowercase is | |
91 | # \x{587} itself | |
92 | # and its uppercase is | |
93 | # \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN | |
94 | ||
95 | $a = "\x{587}"; | |
96 | ||
3a2263fe RGS |
97 | is("\L\x{587}" , "\x{587}", "ligature lowercase"); |
98 | is("\u\x{587}" , "\x{535}\x{582}", "ligature titlecase"); | |
99 | is("\U\x{587}" , "\x{535}\x{552}", "ligature uppercase"); | |
44bc797b | 100 | |
2e3dedfe | 101 | # mktables had problems where many-to-one case mappings didn't work right. |
89ebb4a3 JH |
102 | # The lib/uni/fold.t should give the fourth folding, "casefolding", a good |
103 | # workout (one cannot directly get that from Perl). | |
83171573 JH |
104 | # \x{01C4} is LATIN CAPITAL LETTER DZ WITH CARON |
105 | # \x{01C5} is LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON | |
106 | # \x{01C6} is LATIN SMALL LETTER DZ WITH CARON | |
107 | # \x{03A3} is GREEK CAPITAL LETTER SIGMA | |
108 | # \x{03C2} is GREEK SMALL LETTER FINAL SIGMA | |
109 | # \x{03C3} is GREEK SMALL LETTER SIGMA | |
2e3dedfe | 110 | |
3a2263fe RGS |
111 | is(lc("\x{1C4}") , "\x{1C6}", "U+01C4 lc is U+01C6"); |
112 | is(lc("\x{1C5}") , "\x{1C6}", "U+01C5 lc is U+01C6, too"); | |
2e3dedfe | 113 | |
3a2263fe RGS |
114 | is(ucfirst("\x{3C2}") , "\x{3A3}", "U+03C2 ucfirst is U+03A3"); |
115 | is(ucfirst("\x{3C3}") , "\x{3A3}", "U+03C3 ucfirst is U+03A3, too"); | |
2e3dedfe | 116 | |
3a2263fe RGS |
117 | is(uc("\x{1C5}") , "\x{1C4}", "U+01C5 uc is U+01C4"); |
118 | is(uc("\x{1C6}") , "\x{1C4}", "U+01C6 uc is U+01C4, too"); | |
2e3dedfe | 119 | |
ada6e8a9 AMS |
120 | # #18107: A host of bugs involving [ul]c{,first}. AMS 20021106 |
121 | $a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA. | |
122 | $b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA. | |
123 | ||
124 | ($c = $b) =~ s/(\w+)/lc($1)/ge; | |
3a2263fe | 125 | is($c , $a, "Using s///e to change case."); |
ada6e8a9 | 126 | |
d1eb3177 | 127 | ($c = $a) =~ s/(\p{IsWord}+)/uc($1)/ge; |
3a2263fe | 128 | is($c , $b, "Using s///e to change case."); |
ada6e8a9 | 129 | |
d1eb3177 | 130 | ($c = $b) =~ s/(\p{IsWord}+)/lcfirst($1)/ge; |
3a2263fe | 131 | is($c , "\x{3c3}FOO.bAR", "Using s///e to change case."); |
ada6e8a9 | 132 | |
d1eb3177 | 133 | ($c = $a) =~ s/(\p{IsWord}+)/ucfirst($1)/ge; |
3a2263fe RGS |
134 | is($c , "\x{3a3}foo.Bar", "Using s///e to change case."); |
135 | ||
136 | # #18931: perl5.8.0 bug in \U..\E processing | |
04d26ece | 137 | # Test case from Nicholas Clark. |
3a2263fe RGS |
138 | for my $a (0,1) { |
139 | $_ = 'abcdefgh'; | |
140 | $_ .= chr 256; | |
141 | chop; | |
142 | /(.*)/; | |
143 | is(uc($1), "ABCDEFGH", "[perl #18931]"); | |
144 | } | |
145 | ||
146 | { | |
147 | foreach (0, 1) { | |
148 | $a = v10.v257; | |
149 | chop $a; | |
150 | $a =~ s/^(\s*)(\w*)/$1\u$2/; | |
151 | is($a, v10, "[perl #18857]"); | |
3a2263fe RGS |
152 | } |
153 | } | |
6818a357 TS |
154 | |
155 | ||
156 | # [perl #38619] Bug in lc and uc (interaction between UTF-8, substr, and lc/uc) | |
157 | ||
158 | for ("a\x{100}", "xyz\x{100}") { | |
159 | is(substr(uc($_), 0), uc($_), "[perl #38619] uc"); | |
160 | } | |
161 | for ("A\x{100}", "XYZ\x{100}") { | |
162 | is(substr(lc($_), 0), lc($_), "[perl #38619] lc"); | |
163 | } | |
164 | for ("a\x{100}", "ßyz\x{100}") { # ß to Ss (different length) | |
165 | is(substr(ucfirst($_), 0), ucfirst($_), "[perl #38619] ucfirst"); | |
166 | } | |
167 | ||
168 | # Related to [perl #38619] | |
169 | # the original report concerns PERL_MAGIC_utf8. | |
170 | # these cases concern PERL_MAGIC_regex_global. | |
171 | ||
172 | for (map { $_ } "a\x{100}", "abc\x{100}", "\x{100}") { | |
173 | chop; # get ("a", "abc", "") in utf8 | |
174 | my $return = uc($_) =~ /\G(.?)/g; | |
175 | my $result = $return ? $1 : "not"; | |
176 | my $expect = (uc($_) =~ /(.?)/g)[0]; | |
177 | is($return, 1, "[perl #38619]"); | |
178 | is($result, $expect, "[perl #38619]"); | |
179 | } | |
180 | ||
181 | for (map { $_ } "A\x{100}", "ABC\x{100}", "\x{100}") { | |
182 | chop; # get ("A", "ABC", "") in utf8 | |
183 | my $return = lc($_) =~ /\G(.?)/g; | |
184 | my $result = $return ? $1 : "not"; | |
185 | my $expect = (lc($_) =~ /(.?)/g)[0]; | |
186 | is($return, 1, "[perl #38619]"); | |
187 | is($result, $expect, "[perl #38619]"); | |
188 | } | |
189 | ||
c2955298 NC |
190 | for (1, 4, 9, 16, 25) { |
191 | is(uc "\x{03B0}" x $_, "\x{3a5}\x{308}\x{301}" x $_, | |
192 | 'uc U+03B0 grows threefold'); | |
193 | ||
194 | is(lc "\x{0130}" x $_, "i\x{307}" x $_, 'lc U+0130 grows'); | |
195 | } | |
17fa0776 RGS |
196 | |
197 | # bug #43207 | |
198 | my $temp = "Hello"; | |
199 | for ("$temp") { | |
200 | lc $_; | |
201 | is($_, "Hello"); | |
202 | } | |
aa4af542 RGS |
203 | |
204 | # new in Unicode 5.1.0 | |
205 | is(lc("\x{1E9E}"), "\x{df}", "lc(LATIN CAPITAL LETTER SHARP S)"); |