This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
UnicodeCD::charinfo
[perl5.git] / lib / UnicodeCD.t
CommitLineData
1189d1e4 1use UnicodeCD;
561c79ed
JH
2
3use Test;
4use strict;
5
a6fa416b 6BEGIN { plan tests => 111 + 17 * 3};
561c79ed 7
1189d1e4 8use UnicodeCD 'charinfo';
561c79ed 9
b08cd201
JH
10my $charinfo;
11
12$charinfo = charinfo(0x41);
13
14ok($charinfo->{code}, '0041');
15ok($charinfo->{name}, 'LATIN CAPITAL LETTER A');
16ok($charinfo->{category}, 'Lu');
17ok($charinfo->{combining}, '0');
18ok($charinfo->{bidi}, 'L');
19ok($charinfo->{decomposition}, '');
20ok($charinfo->{decimal}, '');
21ok($charinfo->{digit}, '');
22ok($charinfo->{numeric}, '');
23ok($charinfo->{mirrored}, 'N');
24ok($charinfo->{unicode10}, '');
25ok($charinfo->{comment}, '');
26ok($charinfo->{upper}, '');
27ok($charinfo->{lower}, '0061');
28ok($charinfo->{title}, '');
29ok($charinfo->{block}, 'Basic Latin');
30ok($charinfo->{script}, 'Latin');
31
32$charinfo = charinfo(0x100);
33
34ok($charinfo->{code}, '0100');
35ok($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
36ok($charinfo->{category}, 'Lu');
37ok($charinfo->{combining}, '0');
38ok($charinfo->{bidi}, 'L');
39ok($charinfo->{decomposition}, '0041 0304');
40ok($charinfo->{decimal}, '');
41ok($charinfo->{digit}, '');
42ok($charinfo->{numeric}, '');
43ok($charinfo->{mirrored}, 'N');
44ok($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
45ok($charinfo->{comment}, '');
46ok($charinfo->{upper}, '');
47ok($charinfo->{lower}, '0101');
48ok($charinfo->{title}, '');
49ok($charinfo->{block}, 'Latin Extended-A');
50ok($charinfo->{script}, 'Latin');
a196fbfd
JH
51
52# 0x0590 is in the Hebrew block but unused.
561c79ed 53
b08cd201
JH
54$charinfo = charinfo(0x590);
55
56ok($charinfo->{code}, undef);
57ok($charinfo->{name}, undef);
58ok($charinfo->{category}, undef);
59ok($charinfo->{combining}, undef);
60ok($charinfo->{bidi}, undef);
61ok($charinfo->{decomposition}, undef);
62ok($charinfo->{decimal}, undef);
63ok($charinfo->{digit}, undef);
64ok($charinfo->{numeric}, undef);
65ok($charinfo->{mirrored}, undef);
66ok($charinfo->{unicode10}, undef);
67ok($charinfo->{comment}, undef);
68ok($charinfo->{upper}, undef);
69ok($charinfo->{lower}, undef);
70ok($charinfo->{title}, undef);
71ok($charinfo->{block}, undef);
72ok($charinfo->{script}, undef);
a196fbfd
JH
73
74# 0x05d0 is in the Hebrew block and used.
561c79ed 75
b08cd201
JH
76$charinfo = charinfo(0x5d0);
77
78ok($charinfo->{code}, '05D0');
79ok($charinfo->{name}, 'HEBREW LETTER ALEF');
80ok($charinfo->{category}, 'Lo');
81ok($charinfo->{combining}, '0');
82ok($charinfo->{bidi}, 'R');
83ok($charinfo->{decomposition}, '');
84ok($charinfo->{decimal}, '');
85ok($charinfo->{digit}, '');
86ok($charinfo->{numeric}, '');
87ok($charinfo->{mirrored}, 'N');
88ok($charinfo->{unicode10}, '');
89ok($charinfo->{comment}, '');
90ok($charinfo->{upper}, '');
91ok($charinfo->{lower}, '');
92ok($charinfo->{title}, '');
93ok($charinfo->{block}, 'Hebrew');
94ok($charinfo->{script}, 'Hebrew');
561c79ed 95
a6fa416b
TS
96# an open syllable in Hangul
97
98$charinfo = charinfo(0xAC00);
99
100ok($charinfo->{code}, 'AC00');
101ok($charinfo->{name}, 'HANGUL SYLLABLE GA');
102ok($charinfo->{category}, 'Lo');
103ok($charinfo->{combining}, '0');
104ok($charinfo->{bidi}, 'L');
105ok($charinfo->{decomposition}, '1100 1161');
106ok($charinfo->{decimal}, '');
107ok($charinfo->{digit}, '');
108ok($charinfo->{numeric}, '');
109ok($charinfo->{mirrored}, 'N');
110ok($charinfo->{unicode10}, '');
111ok($charinfo->{comment}, '');
112ok($charinfo->{upper}, '');
113ok($charinfo->{lower}, '');
114ok($charinfo->{title}, '');
115ok($charinfo->{block}, 'Hangul Syllables');
116ok($charinfo->{script}, 'Hangul');
117
118# a close syllable in Hangul
119
120$charinfo = charinfo(0xAE00);
121
122ok($charinfo->{code}, 'AE00');
123ok($charinfo->{name}, 'HANGUL SYLLABLE GEUL');
124ok($charinfo->{category}, 'Lo');
125ok($charinfo->{combining}, '0');
126ok($charinfo->{bidi}, 'L');
127ok($charinfo->{decomposition}, '1100 1173 11AF');
128ok($charinfo->{decimal}, '');
129ok($charinfo->{digit}, '');
130ok($charinfo->{numeric}, '');
131ok($charinfo->{mirrored}, 'N');
132ok($charinfo->{unicode10}, '');
133ok($charinfo->{comment}, '');
134ok($charinfo->{upper}, '');
135ok($charinfo->{lower}, '');
136ok($charinfo->{title}, '');
137ok($charinfo->{block}, 'Hangul Syllables');
138ok($charinfo->{script}, 'Hangul');
139
140$charinfo = charinfo(0x1D400);
141
142ok($charinfo->{code}, '1D400');
143ok($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A');
144ok($charinfo->{category}, 'Lu');
145ok($charinfo->{combining}, '0');
146ok($charinfo->{bidi}, 'L');
147ok($charinfo->{decomposition}, '<font> 0041');
148ok($charinfo->{decimal}, '');
149ok($charinfo->{digit}, '');
150ok($charinfo->{numeric}, '');
151ok($charinfo->{mirrored}, 'N');
152ok($charinfo->{unicode10}, '');
153ok($charinfo->{comment}, '');
154ok($charinfo->{upper}, '');
155ok($charinfo->{lower}, '');
156ok($charinfo->{title}, '');
157ok($charinfo->{block}, 'Mathematical Alphanumeric Symbols');
158ok($charinfo->{script}, undef);
159
1189d1e4 160use UnicodeCD qw(charblock charscript);
a196fbfd
JH
161
162# 0x0590 is in the Hebrew block but unused.
561c79ed
JH
163
164ok(charblock(0x590), 'Hebrew');
a196fbfd 165ok(charscript(0x590), undef);
561c79ed 166
b08cd201
JH
167$charinfo = charinfo(0xbe);
168
169ok($charinfo->{code}, '00BE');
170ok($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
171ok($charinfo->{category}, 'No');
172ok($charinfo->{combining}, '0');
173ok($charinfo->{bidi}, 'ON');
174ok($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
175ok($charinfo->{decimal}, '');
176ok($charinfo->{digit}, '');
177ok($charinfo->{numeric}, '3/4');
178ok($charinfo->{mirrored}, 'N');
179ok($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
180ok($charinfo->{comment}, '');
181ok($charinfo->{upper}, '');
182ok($charinfo->{lower}, '');
183ok($charinfo->{title}, '');
184ok($charinfo->{block}, 'Latin-1 Supplement');
185ok($charinfo->{script}, undef);
10a6ecd2 186
1189d1e4 187use UnicodeCD qw(charblocks charscripts);
10a6ecd2 188
b08cd201 189my $charblocks = charblocks();
10a6ecd2 190
b08cd201
JH
191ok(exists $charblocks->{Thai});
192ok($charblocks->{Thai}->[0]->[0], hex('0e00'));
193ok(!exists $charblocks->{PigLatin});
10a6ecd2 194
b08cd201 195my $charscripts = charscripts();
10a6ecd2 196
b08cd201
JH
197ok(exists $charscripts->{Armenian});
198ok($charscripts->{Armenian}->[0]->[0], hex('0531'));
199ok(!exists $charscripts->{PigLatin});
10a6ecd2
JH
200
201my $charscript;
202
203$charscript = charscript("12ab");
204ok($charscript, 'Ethiopic');
205
206$charscript = charscript("0x12ab");
207ok($charscript, 'Ethiopic');
208
209$charscript = charscript("U+12ab");
210ok($charscript, 'Ethiopic');
211
212my $ranges;
213
214$ranges = charscript('Ogham');
215ok($ranges->[0]->[0], hex('1681'));
216ok($ranges->[0]->[1], hex('169a'));
217
1189d1e4 218use UnicodeCD qw(charinrange);
10a6ecd2
JH
219
220$ranges = charscript('Cherokee');
221ok(!charinrange($ranges, "139f"));
222ok( charinrange($ranges, "13a0"));
223ok( charinrange($ranges, "13f4"));
224ok(!charinrange($ranges, "13f5"));
225
1189d1e4 226ok(UnicodeCD::UnicodeVersion, 3.1);
b08cd201 227
1189d1e4 228use UnicodeCD qw(compexcl);
b08cd201
JH
229
230ok(!compexcl(0x0100));
231ok( compexcl(0x0958));
232
1189d1e4 233use UnicodeCD qw(casefold);
b08cd201
JH
234
235my $casefold;
236
237$casefold = casefold(0x41);
238
239ok($casefold->{code} eq '0041' &&
240 $casefold->{status} eq 'C' &&
241 $casefold->{mapping} eq '0061');
242
243$casefold = casefold(0xdf);
244
245ok($casefold->{code} eq '00DF' &&
246 $casefold->{status} eq 'F' &&
247 $casefold->{mapping} eq '0073 0073');
248
249ok(!casefold(0x20));
250
1189d1e4 251use UnicodeCD qw(casespec);
b08cd201
JH
252
253my $casespec;
254
255ok(!casespec(0x41));
256
257$casespec = casespec(0xdf);
258
259ok($casespec->{code} eq '00DF' &&
260 $casespec->{lower} eq '00DF' &&
261 $casespec->{title} eq '0053 0073' &&
262 $casespec->{upper} eq '0053 0053' &&
263 $casespec->{condition} eq undef);
264
265$casespec = casespec(0x307);
266
267ok($casespec->{code} eq '0307' &&
268 $casespec->{lower} eq '0307' &&
269 $casespec->{title} eq '' &&
270 $casespec->{upper} eq '' &&
271 $casespec->{condition} eq 'lt AFTER_i');