Commit | Line | Data |
---|---|---|
55d7b906 | 1 | use Unicode::UCD; |
561c79ed JH |
2 | |
3 | use Test; | |
4 | use strict; | |
5 | ||
74f8133e | 6 | BEGIN { plan tests => 162 }; |
561c79ed | 7 | |
55d7b906 | 8 | use Unicode::UCD 'charinfo'; |
561c79ed | 9 | |
b08cd201 JH |
10 | my $charinfo; |
11 | ||
12 | $charinfo = charinfo(0x41); | |
13 | ||
14 | ok($charinfo->{code}, '0041'); | |
15 | ok($charinfo->{name}, 'LATIN CAPITAL LETTER A'); | |
16 | ok($charinfo->{category}, 'Lu'); | |
17 | ok($charinfo->{combining}, '0'); | |
18 | ok($charinfo->{bidi}, 'L'); | |
19 | ok($charinfo->{decomposition}, ''); | |
20 | ok($charinfo->{decimal}, ''); | |
21 | ok($charinfo->{digit}, ''); | |
22 | ok($charinfo->{numeric}, ''); | |
23 | ok($charinfo->{mirrored}, 'N'); | |
24 | ok($charinfo->{unicode10}, ''); | |
25 | ok($charinfo->{comment}, ''); | |
26 | ok($charinfo->{upper}, ''); | |
27 | ok($charinfo->{lower}, '0061'); | |
28 | ok($charinfo->{title}, ''); | |
29 | ok($charinfo->{block}, 'Basic Latin'); | |
30 | ok($charinfo->{script}, 'Latin'); | |
31 | ||
32 | $charinfo = charinfo(0x100); | |
33 | ||
34 | ok($charinfo->{code}, '0100'); | |
35 | ok($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); | |
36 | ok($charinfo->{category}, 'Lu'); | |
37 | ok($charinfo->{combining}, '0'); | |
38 | ok($charinfo->{bidi}, 'L'); | |
39 | ok($charinfo->{decomposition}, '0041 0304'); | |
40 | ok($charinfo->{decimal}, ''); | |
41 | ok($charinfo->{digit}, ''); | |
42 | ok($charinfo->{numeric}, ''); | |
43 | ok($charinfo->{mirrored}, 'N'); | |
44 | ok($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); | |
45 | ok($charinfo->{comment}, ''); | |
46 | ok($charinfo->{upper}, ''); | |
47 | ok($charinfo->{lower}, '0101'); | |
48 | ok($charinfo->{title}, ''); | |
49 | ok($charinfo->{block}, 'Latin Extended-A'); | |
50 | ok($charinfo->{script}, 'Latin'); | |
a196fbfd JH |
51 | |
52 | # 0x0590 is in the Hebrew block but unused. | |
561c79ed | 53 | |
b08cd201 JH |
54 | $charinfo = charinfo(0x590); |
55 | ||
56 | ok($charinfo->{code}, undef); | |
57 | ok($charinfo->{name}, undef); | |
58 | ok($charinfo->{category}, undef); | |
59 | ok($charinfo->{combining}, undef); | |
60 | ok($charinfo->{bidi}, undef); | |
61 | ok($charinfo->{decomposition}, undef); | |
62 | ok($charinfo->{decimal}, undef); | |
63 | ok($charinfo->{digit}, undef); | |
64 | ok($charinfo->{numeric}, undef); | |
65 | ok($charinfo->{mirrored}, undef); | |
66 | ok($charinfo->{unicode10}, undef); | |
67 | ok($charinfo->{comment}, undef); | |
68 | ok($charinfo->{upper}, undef); | |
69 | ok($charinfo->{lower}, undef); | |
70 | ok($charinfo->{title}, undef); | |
71 | ok($charinfo->{block}, undef); | |
72 | ok($charinfo->{script}, undef); | |
a196fbfd JH |
73 | |
74 | # 0x05d0 is in the Hebrew block and used. | |
561c79ed | 75 | |
b08cd201 JH |
76 | $charinfo = charinfo(0x5d0); |
77 | ||
78 | ok($charinfo->{code}, '05D0'); | |
79 | ok($charinfo->{name}, 'HEBREW LETTER ALEF'); | |
80 | ok($charinfo->{category}, 'Lo'); | |
81 | ok($charinfo->{combining}, '0'); | |
82 | ok($charinfo->{bidi}, 'R'); | |
83 | ok($charinfo->{decomposition}, ''); | |
84 | ok($charinfo->{decimal}, ''); | |
85 | ok($charinfo->{digit}, ''); | |
86 | ok($charinfo->{numeric}, ''); | |
87 | ok($charinfo->{mirrored}, 'N'); | |
88 | ok($charinfo->{unicode10}, ''); | |
89 | ok($charinfo->{comment}, ''); | |
90 | ok($charinfo->{upper}, ''); | |
91 | ok($charinfo->{lower}, ''); | |
92 | ok($charinfo->{title}, ''); | |
93 | ok($charinfo->{block}, 'Hebrew'); | |
94 | ok($charinfo->{script}, 'Hebrew'); | |
561c79ed | 95 | |
74f8133e | 96 | # An open syllable in Hangul. |
a6fa416b TS |
97 | |
98 | $charinfo = charinfo(0xAC00); | |
99 | ||
100 | ok($charinfo->{code}, 'AC00'); | |
101 | ok($charinfo->{name}, 'HANGUL SYLLABLE GA'); | |
102 | ok($charinfo->{category}, 'Lo'); | |
103 | ok($charinfo->{combining}, '0'); | |
104 | ok($charinfo->{bidi}, 'L'); | |
105 | ok($charinfo->{decomposition}, '1100 1161'); | |
106 | ok($charinfo->{decimal}, ''); | |
107 | ok($charinfo->{digit}, ''); | |
108 | ok($charinfo->{numeric}, ''); | |
109 | ok($charinfo->{mirrored}, 'N'); | |
110 | ok($charinfo->{unicode10}, ''); | |
111 | ok($charinfo->{comment}, ''); | |
112 | ok($charinfo->{upper}, ''); | |
113 | ok($charinfo->{lower}, ''); | |
114 | ok($charinfo->{title}, ''); | |
115 | ok($charinfo->{block}, 'Hangul Syllables'); | |
116 | ok($charinfo->{script}, 'Hangul'); | |
117 | ||
74f8133e | 118 | # A closed syllable in Hangul. |
a6fa416b TS |
119 | |
120 | $charinfo = charinfo(0xAE00); | |
121 | ||
122 | ok($charinfo->{code}, 'AE00'); | |
123 | ok($charinfo->{name}, 'HANGUL SYLLABLE GEUL'); | |
124 | ok($charinfo->{category}, 'Lo'); | |
125 | ok($charinfo->{combining}, '0'); | |
126 | ok($charinfo->{bidi}, 'L'); | |
127 | ok($charinfo->{decomposition}, '1100 1173 11AF'); | |
128 | ok($charinfo->{decimal}, ''); | |
129 | ok($charinfo->{digit}, ''); | |
130 | ok($charinfo->{numeric}, ''); | |
131 | ok($charinfo->{mirrored}, 'N'); | |
132 | ok($charinfo->{unicode10}, ''); | |
133 | ok($charinfo->{comment}, ''); | |
134 | ok($charinfo->{upper}, ''); | |
135 | ok($charinfo->{lower}, ''); | |
136 | ok($charinfo->{title}, ''); | |
137 | ok($charinfo->{block}, 'Hangul Syllables'); | |
138 | ok($charinfo->{script}, 'Hangul'); | |
139 | ||
140 | $charinfo = charinfo(0x1D400); | |
141 | ||
142 | ok($charinfo->{code}, '1D400'); | |
143 | ok($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); | |
144 | ok($charinfo->{category}, 'Lu'); | |
145 | ok($charinfo->{combining}, '0'); | |
146 | ok($charinfo->{bidi}, 'L'); | |
147 | ok($charinfo->{decomposition}, '<font> 0041'); | |
148 | ok($charinfo->{decimal}, ''); | |
149 | ok($charinfo->{digit}, ''); | |
150 | ok($charinfo->{numeric}, ''); | |
151 | ok($charinfo->{mirrored}, 'N'); | |
152 | ok($charinfo->{unicode10}, ''); | |
153 | ok($charinfo->{comment}, ''); | |
154 | ok($charinfo->{upper}, ''); | |
155 | ok($charinfo->{lower}, ''); | |
156 | ok($charinfo->{title}, ''); | |
157 | ok($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); | |
158 | ok($charinfo->{script}, undef); | |
159 | ||
55d7b906 | 160 | use Unicode::UCD qw(charblock charscript); |
a196fbfd JH |
161 | |
162 | # 0x0590 is in the Hebrew block but unused. | |
561c79ed JH |
163 | |
164 | ok(charblock(0x590), 'Hebrew'); | |
a196fbfd | 165 | ok(charscript(0x590), undef); |
561c79ed | 166 | |
b08cd201 JH |
167 | $charinfo = charinfo(0xbe); |
168 | ||
169 | ok($charinfo->{code}, '00BE'); | |
170 | ok($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); | |
171 | ok($charinfo->{category}, 'No'); | |
172 | ok($charinfo->{combining}, '0'); | |
173 | ok($charinfo->{bidi}, 'ON'); | |
174 | ok($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); | |
175 | ok($charinfo->{decimal}, ''); | |
176 | ok($charinfo->{digit}, ''); | |
177 | ok($charinfo->{numeric}, '3/4'); | |
178 | ok($charinfo->{mirrored}, 'N'); | |
179 | ok($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); | |
180 | ok($charinfo->{comment}, ''); | |
181 | ok($charinfo->{upper}, ''); | |
182 | ok($charinfo->{lower}, ''); | |
183 | ok($charinfo->{title}, ''); | |
184 | ok($charinfo->{block}, 'Latin-1 Supplement'); | |
185 | ok($charinfo->{script}, undef); | |
10a6ecd2 | 186 | |
55d7b906 | 187 | use Unicode::UCD qw(charblocks charscripts); |
10a6ecd2 | 188 | |
b08cd201 | 189 | my $charblocks = charblocks(); |
10a6ecd2 | 190 | |
b08cd201 JH |
191 | ok(exists $charblocks->{Thai}); |
192 | ok($charblocks->{Thai}->[0]->[0], hex('0e00')); | |
193 | ok(!exists $charblocks->{PigLatin}); | |
10a6ecd2 | 194 | |
b08cd201 | 195 | my $charscripts = charscripts(); |
10a6ecd2 | 196 | |
b08cd201 JH |
197 | ok(exists $charscripts->{Armenian}); |
198 | ok($charscripts->{Armenian}->[0]->[0], hex('0531')); | |
199 | ok(!exists $charscripts->{PigLatin}); | |
10a6ecd2 JH |
200 | |
201 | my $charscript; | |
202 | ||
203 | $charscript = charscript("12ab"); | |
204 | ok($charscript, 'Ethiopic'); | |
205 | ||
206 | $charscript = charscript("0x12ab"); | |
207 | ok($charscript, 'Ethiopic'); | |
208 | ||
209 | $charscript = charscript("U+12ab"); | |
210 | ok($charscript, 'Ethiopic'); | |
211 | ||
212 | my $ranges; | |
213 | ||
214 | $ranges = charscript('Ogham'); | |
215 | ok($ranges->[0]->[0], hex('1681')); | |
216 | ok($ranges->[0]->[1], hex('169a')); | |
217 | ||
55d7b906 | 218 | use Unicode::UCD qw(charinrange); |
10a6ecd2 JH |
219 | |
220 | $ranges = charscript('Cherokee'); | |
221 | ok(!charinrange($ranges, "139f")); | |
222 | ok( charinrange($ranges, "13a0")); | |
223 | ok( charinrange($ranges, "13f4")); | |
224 | ok(!charinrange($ranges, "13f5")); | |
225 | ||
f499c386 | 226 | ok(Unicode::UCD::UnicodeVersion, '3.1.1'); |
b08cd201 | 227 | |
55d7b906 | 228 | use Unicode::UCD qw(compexcl); |
b08cd201 JH |
229 | |
230 | ok(!compexcl(0x0100)); | |
231 | ok( compexcl(0x0958)); | |
232 | ||
55d7b906 | 233 | use Unicode::UCD qw(casefold); |
b08cd201 JH |
234 | |
235 | my $casefold; | |
236 | ||
237 | $casefold = casefold(0x41); | |
238 | ||
239 | ok($casefold->{code} eq '0041' && | |
240 | $casefold->{status} eq 'C' && | |
241 | $casefold->{mapping} eq '0061'); | |
242 | ||
243 | $casefold = casefold(0xdf); | |
244 | ||
245 | ok($casefold->{code} eq '00DF' && | |
246 | $casefold->{status} eq 'F' && | |
247 | $casefold->{mapping} eq '0073 0073'); | |
248 | ||
249 | ok(!casefold(0x20)); | |
250 | ||
55d7b906 | 251 | use Unicode::UCD qw(casespec); |
b08cd201 JH |
252 | |
253 | my $casespec; | |
254 | ||
255 | ok(!casespec(0x41)); | |
256 | ||
257 | $casespec = casespec(0xdf); | |
258 | ||
259 | ok($casespec->{code} eq '00DF' && | |
260 | $casespec->{lower} eq '00DF' && | |
261 | $casespec->{title} eq '0053 0073' && | |
262 | $casespec->{upper} eq '0053 0053' && | |
263 | $casespec->{condition} eq undef); | |
264 | ||
265 | $casespec = casespec(0x307); | |
266 | ||
f499c386 JH |
267 | ok($casespec->{az}->{code} eq '0307' && |
268 | $casespec->{az}->{lower} eq '' && | |
269 | $casespec->{az}->{title} eq '0307' && | |
270 | $casespec->{az}->{upper} eq '0307' && | |
271 | $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE'); |