Commit | Line | Data |
---|---|---|
25a47338 | 1 | #!perl -w |
8b731da2 JH |
2 | BEGIN { |
3 | if (ord("A") == 193) { | |
4 | print "1..0 # Skip: EBCDIC\n"; | |
5 | exit 0; | |
6 | } | |
a778afa6 AD |
7 | chdir 't' if -d 't'; |
8 | @INC = '../lib'; | |
e69a2255 | 9 | @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself |
25a47338 NC |
10 | require Config; import Config; |
11 | if ($Config{'extensions'} !~ /\bStorable\b/) { | |
12 | print "1..0 # Skip: Storable was not built; Unicode::UCD uses Storable\n"; | |
13 | exit 0; | |
14 | } | |
8b731da2 JH |
15 | } |
16 | ||
a778afa6 AD |
17 | use strict; |
18 | use Unicode::UCD; | |
f5c9f3db | 19 | use Test::More; |
8b731da2 | 20 | |
c5a29f40 | 21 | BEGIN { plan tests => 179 }; |
561c79ed | 22 | |
55d7b906 | 23 | use Unicode::UCD 'charinfo'; |
561c79ed | 24 | |
b08cd201 JH |
25 | my $charinfo; |
26 | ||
27 | $charinfo = charinfo(0x41); | |
28 | ||
f5c9f3db JH |
29 | is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); |
30 | is($charinfo->{name}, 'LATIN CAPITAL LETTER A'); | |
31 | is($charinfo->{category}, 'Lu'); | |
32 | is($charinfo->{combining}, '0'); | |
33 | is($charinfo->{bidi}, 'L'); | |
34 | is($charinfo->{decomposition}, ''); | |
35 | is($charinfo->{decimal}, ''); | |
36 | is($charinfo->{digit}, ''); | |
37 | is($charinfo->{numeric}, ''); | |
38 | is($charinfo->{mirrored}, 'N'); | |
39 | is($charinfo->{unicode10}, ''); | |
40 | is($charinfo->{comment}, ''); | |
41 | is($charinfo->{upper}, ''); | |
42 | is($charinfo->{lower}, '0061'); | |
43 | is($charinfo->{title}, ''); | |
44 | is($charinfo->{block}, 'Basic Latin'); | |
45 | is($charinfo->{script}, 'Latin'); | |
b08cd201 JH |
46 | |
47 | $charinfo = charinfo(0x100); | |
48 | ||
f5c9f3db JH |
49 | is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); |
50 | is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); | |
51 | is($charinfo->{category}, 'Lu'); | |
52 | is($charinfo->{combining}, '0'); | |
53 | is($charinfo->{bidi}, 'L'); | |
54 | is($charinfo->{decomposition}, '0041 0304'); | |
55 | is($charinfo->{decimal}, ''); | |
56 | is($charinfo->{digit}, ''); | |
57 | is($charinfo->{numeric}, ''); | |
58 | is($charinfo->{mirrored}, 'N'); | |
59 | is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); | |
60 | is($charinfo->{comment}, ''); | |
61 | is($charinfo->{upper}, ''); | |
62 | is($charinfo->{lower}, '0101'); | |
63 | is($charinfo->{title}, ''); | |
64 | is($charinfo->{block}, 'Latin Extended-A'); | |
65 | is($charinfo->{script}, 'Latin'); | |
a196fbfd JH |
66 | |
67 | # 0x0590 is in the Hebrew block but unused. | |
561c79ed | 68 | |
b08cd201 JH |
69 | $charinfo = charinfo(0x590); |
70 | ||
f5c9f3db JH |
71 | is($charinfo->{code}, undef, '0x0590 - unused Hebrew'); |
72 | is($charinfo->{name}, undef); | |
73 | is($charinfo->{category}, undef); | |
74 | is($charinfo->{combining}, undef); | |
75 | is($charinfo->{bidi}, undef); | |
76 | is($charinfo->{decomposition}, undef); | |
77 | is($charinfo->{decimal}, undef); | |
78 | is($charinfo->{digit}, undef); | |
79 | is($charinfo->{numeric}, undef); | |
80 | is($charinfo->{mirrored}, undef); | |
81 | is($charinfo->{unicode10}, undef); | |
82 | is($charinfo->{comment}, undef); | |
83 | is($charinfo->{upper}, undef); | |
84 | is($charinfo->{lower}, undef); | |
85 | is($charinfo->{title}, undef); | |
86 | is($charinfo->{block}, undef); | |
87 | is($charinfo->{script}, undef); | |
a196fbfd JH |
88 | |
89 | # 0x05d0 is in the Hebrew block and used. | |
561c79ed | 90 | |
b08cd201 JH |
91 | $charinfo = charinfo(0x5d0); |
92 | ||
f5c9f3db JH |
93 | is($charinfo->{code}, '05D0', '05D0 - used Hebrew'); |
94 | is($charinfo->{name}, 'HEBREW LETTER ALEF'); | |
95 | is($charinfo->{category}, 'Lo'); | |
96 | is($charinfo->{combining}, '0'); | |
97 | is($charinfo->{bidi}, 'R'); | |
98 | is($charinfo->{decomposition}, ''); | |
99 | is($charinfo->{decimal}, ''); | |
100 | is($charinfo->{digit}, ''); | |
101 | is($charinfo->{numeric}, ''); | |
102 | is($charinfo->{mirrored}, 'N'); | |
103 | is($charinfo->{unicode10}, ''); | |
104 | is($charinfo->{comment}, ''); | |
105 | is($charinfo->{upper}, ''); | |
106 | is($charinfo->{lower}, ''); | |
107 | is($charinfo->{title}, ''); | |
108 | is($charinfo->{block}, 'Hebrew'); | |
109 | is($charinfo->{script}, 'Hebrew'); | |
561c79ed | 110 | |
74f8133e | 111 | # An open syllable in Hangul. |
a6fa416b TS |
112 | |
113 | $charinfo = charinfo(0xAC00); | |
114 | ||
f5c9f3db JH |
115 | is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); |
116 | is($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); | |
117 | is($charinfo->{category}, 'Lo'); | |
118 | is($charinfo->{combining}, '0'); | |
119 | is($charinfo->{bidi}, 'L'); | |
120 | is($charinfo->{decomposition}, undef); | |
121 | is($charinfo->{decimal}, ''); | |
122 | is($charinfo->{digit}, ''); | |
123 | is($charinfo->{numeric}, ''); | |
124 | is($charinfo->{mirrored}, 'N'); | |
125 | is($charinfo->{unicode10}, ''); | |
126 | is($charinfo->{comment}, ''); | |
127 | is($charinfo->{upper}, ''); | |
128 | is($charinfo->{lower}, ''); | |
129 | is($charinfo->{title}, ''); | |
130 | is($charinfo->{block}, 'Hangul Syllables'); | |
131 | is($charinfo->{script}, 'Hangul'); | |
a6fa416b | 132 | |
74f8133e | 133 | # A closed syllable in Hangul. |
a6fa416b TS |
134 | |
135 | $charinfo = charinfo(0xAE00); | |
136 | ||
f5c9f3db JH |
137 | is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); |
138 | is($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); | |
139 | is($charinfo->{category}, 'Lo'); | |
140 | is($charinfo->{combining}, '0'); | |
141 | is($charinfo->{bidi}, 'L'); | |
142 | is($charinfo->{decomposition}, undef); | |
143 | is($charinfo->{decimal}, ''); | |
144 | is($charinfo->{digit}, ''); | |
145 | is($charinfo->{numeric}, ''); | |
146 | is($charinfo->{mirrored}, 'N'); | |
147 | is($charinfo->{unicode10}, ''); | |
148 | is($charinfo->{comment}, ''); | |
149 | is($charinfo->{upper}, ''); | |
150 | is($charinfo->{lower}, ''); | |
151 | is($charinfo->{title}, ''); | |
152 | is($charinfo->{block}, 'Hangul Syllables'); | |
153 | is($charinfo->{script}, 'Hangul'); | |
a6fa416b TS |
154 | |
155 | $charinfo = charinfo(0x1D400); | |
156 | ||
f5c9f3db JH |
157 | is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); |
158 | is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); | |
159 | is($charinfo->{category}, 'Lu'); | |
160 | is($charinfo->{combining}, '0'); | |
161 | is($charinfo->{bidi}, 'L'); | |
162 | is($charinfo->{decomposition}, '<font> 0041'); | |
163 | is($charinfo->{decimal}, ''); | |
164 | is($charinfo->{digit}, ''); | |
165 | is($charinfo->{numeric}, ''); | |
166 | is($charinfo->{mirrored}, 'N'); | |
167 | is($charinfo->{unicode10}, ''); | |
168 | is($charinfo->{comment}, ''); | |
169 | is($charinfo->{upper}, ''); | |
170 | is($charinfo->{lower}, ''); | |
171 | is($charinfo->{title}, ''); | |
172 | is($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); | |
7be0dac3 | 173 | is($charinfo->{script}, 'Common'); |
a6fa416b | 174 | |
55d7b906 | 175 | use Unicode::UCD qw(charblock charscript); |
a196fbfd JH |
176 | |
177 | # 0x0590 is in the Hebrew block but unused. | |
561c79ed | 178 | |
f5c9f3db JH |
179 | is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); |
180 | is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); | |
561c79ed | 181 | |
b08cd201 JH |
182 | $charinfo = charinfo(0xbe); |
183 | ||
f5c9f3db JH |
184 | is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); |
185 | is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); | |
186 | is($charinfo->{category}, 'No'); | |
187 | is($charinfo->{combining}, '0'); | |
188 | is($charinfo->{bidi}, 'ON'); | |
189 | is($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); | |
190 | is($charinfo->{decimal}, ''); | |
191 | is($charinfo->{digit}, ''); | |
192 | is($charinfo->{numeric}, '3/4'); | |
193 | is($charinfo->{mirrored}, 'N'); | |
194 | is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); | |
195 | is($charinfo->{comment}, ''); | |
196 | is($charinfo->{upper}, ''); | |
197 | is($charinfo->{lower}, ''); | |
198 | is($charinfo->{title}, ''); | |
199 | is($charinfo->{block}, 'Latin-1 Supplement'); | |
7be0dac3 | 200 | is($charinfo->{script}, 'Common'); |
10a6ecd2 | 201 | |
55d7b906 | 202 | use Unicode::UCD qw(charblocks charscripts); |
10a6ecd2 | 203 | |
b08cd201 | 204 | my $charblocks = charblocks(); |
10a6ecd2 | 205 | |
f5c9f3db JH |
206 | ok(exists $charblocks->{Thai}, 'Thai charblock exists'); |
207 | is($charblocks->{Thai}->[0]->[0], hex('0e00')); | |
208 | ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); | |
10a6ecd2 | 209 | |
b08cd201 | 210 | my $charscripts = charscripts(); |
10a6ecd2 | 211 | |
f5c9f3db JH |
212 | ok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); |
213 | is($charscripts->{Armenian}->[0]->[0], hex('0531')); | |
214 | ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); | |
10a6ecd2 JH |
215 | |
216 | my $charscript; | |
217 | ||
218 | $charscript = charscript("12ab"); | |
f5c9f3db | 219 | is($charscript, 'Ethiopic', 'Ethiopic charscript'); |
10a6ecd2 JH |
220 | |
221 | $charscript = charscript("0x12ab"); | |
f5c9f3db | 222 | is($charscript, 'Ethiopic'); |
10a6ecd2 JH |
223 | |
224 | $charscript = charscript("U+12ab"); | |
f5c9f3db | 225 | is($charscript, 'Ethiopic'); |
10a6ecd2 JH |
226 | |
227 | my $ranges; | |
228 | ||
229 | $ranges = charscript('Ogham'); | |
7be0dac3 NC |
230 | is($ranges->[1]->[0], hex('1681'), 'Ogham charscript'); |
231 | is($ranges->[1]->[1], hex('169a')); | |
10a6ecd2 | 232 | |
55d7b906 | 233 | use Unicode::UCD qw(charinrange); |
10a6ecd2 JH |
234 | |
235 | $ranges = charscript('Cherokee'); | |
f5c9f3db | 236 | ok(!charinrange($ranges, "139f"), 'Cherokee charscript'); |
10a6ecd2 JH |
237 | ok( charinrange($ranges, "13a0")); |
238 | ok( charinrange($ranges, "13f4")); | |
239 | ok(!charinrange($ranges, "13f5")); | |
240 | ||
7be0dac3 | 241 | is(Unicode::UCD::UnicodeVersion, '4.0.1', 'UnicodeVersion'); |
b08cd201 | 242 | |
55d7b906 | 243 | use Unicode::UCD qw(compexcl); |
b08cd201 | 244 | |
f5c9f3db | 245 | ok(!compexcl(0x0100), 'compexcl'); |
b08cd201 JH |
246 | ok( compexcl(0x0958)); |
247 | ||
55d7b906 | 248 | use Unicode::UCD qw(casefold); |
b08cd201 JH |
249 | |
250 | my $casefold; | |
251 | ||
252 | $casefold = casefold(0x41); | |
253 | ||
254 | ok($casefold->{code} eq '0041' && | |
255 | $casefold->{status} eq 'C' && | |
f5c9f3db | 256 | $casefold->{mapping} eq '0061', 'casefold 0x41'); |
b08cd201 JH |
257 | |
258 | $casefold = casefold(0xdf); | |
259 | ||
260 | ok($casefold->{code} eq '00DF' && | |
261 | $casefold->{status} eq 'F' && | |
f5c9f3db | 262 | $casefold->{mapping} eq '0073 0073', 'casefold 0xDF'); |
b08cd201 JH |
263 | |
264 | ok(!casefold(0x20)); | |
265 | ||
55d7b906 | 266 | use Unicode::UCD qw(casespec); |
b08cd201 JH |
267 | |
268 | my $casespec; | |
269 | ||
270 | ok(!casespec(0x41)); | |
271 | ||
272 | $casespec = casespec(0xdf); | |
273 | ||
274 | ok($casespec->{code} eq '00DF' && | |
275 | $casespec->{lower} eq '00DF' && | |
276 | $casespec->{title} eq '0053 0073' && | |
277 | $casespec->{upper} eq '0053 0053' && | |
2d3cf3ee | 278 | !defined $casespec->{condition}, 'casespec 0xDF'); |
b08cd201 JH |
279 | |
280 | $casespec = casespec(0x307); | |
281 | ||
f499c386 | 282 | ok($casespec->{az}->{code} eq '0307' && |
2d3cf3ee | 283 | !defined $casespec->{az}->{lower} && |
f499c386 JH |
284 | $casespec->{az}->{title} eq '0307' && |
285 | $casespec->{az}->{upper} eq '0307' && | |
9c3dc587 | 286 | $casespec->{az}->{condition} eq 'az After_I', |
f5c9f3db | 287 | 'casespec 0x307'); |
6c8d78fb HS |
288 | |
289 | # perl #7305 UnicodeCD::compexcl is weird | |
290 | ||
2d3cf3ee | 291 | for (1) {my $a=compexcl $_} |
6c8d78fb HS |
292 | ok(1, 'compexcl read-only $_: perl #7305'); |
293 | grep {compexcl $_} %{{1=>2}}; | |
294 | ok(1, 'compexcl read-only hash: perl #7305'); | |
295 | ||
d7829152 JH |
296 | is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)"); |
297 | is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)"); | |
298 | is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)"); | |
299 | is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)"); | |
300 | is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)"); | |
301 | is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)"); | |
302 | is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)"); | |
303 | is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)"); | |
304 | is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)"); | |
305 | is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)"); | |
306 | is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)"); | |
307 | is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); | |
741297c1 JH |
308 | |
309 | { | |
310 | my $r1 = charscript('Latin'); | |
311 | my $n1 = @$r1; | |
312 | is($n1, 26, "26 ranges in Latin script (Unicode 4.0.0)"); | |
313 | shift @$r1 while @$r1; | |
314 | my $r2 = charscript('Latin'); | |
315 | is(@$r2, $n1, "modifying results should not mess up internal caches"); | |
316 | } | |
317 | ||
c5a29f40 LM |
318 | { |
319 | is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD"); | |
2d3cf3ee | 320 | } |