This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
UnicodeCD::charinfo
[perl5.git] / lib / UnicodeCD.t
1 use UnicodeCD;
2
3 use Test;
4 use strict;
5
6 BEGIN { plan tests => 111 + 17 * 3};
7
8 use UnicodeCD 'charinfo';
9
10 my $charinfo;
11
12 $charinfo = charinfo(0x41);
13
14 ok($charinfo->{code},           '0041');
15 ok($charinfo->{name},           'LATIN CAPITAL LETTER A');
16 ok($charinfo->{category},       'Lu');
17 ok($charinfo->{combining},      '0');
18 ok($charinfo->{bidi},           'L');
19 ok($charinfo->{decomposition},  '');
20 ok($charinfo->{decimal},        '');
21 ok($charinfo->{digit},          '');
22 ok($charinfo->{numeric},        '');
23 ok($charinfo->{mirrored},       'N');
24 ok($charinfo->{unicode10},      '');
25 ok($charinfo->{comment},        '');
26 ok($charinfo->{upper},          '');
27 ok($charinfo->{lower},          '0061');
28 ok($charinfo->{title},          '');
29 ok($charinfo->{block},          'Basic Latin');
30 ok($charinfo->{script},         'Latin');
31
32 $charinfo = charinfo(0x100);
33
34 ok($charinfo->{code},           '0100');
35 ok($charinfo->{name},           'LATIN CAPITAL LETTER A WITH MACRON');
36 ok($charinfo->{category},       'Lu');
37 ok($charinfo->{combining},      '0');
38 ok($charinfo->{bidi},           'L');
39 ok($charinfo->{decomposition},  '0041 0304');
40 ok($charinfo->{decimal},        '');
41 ok($charinfo->{digit},          '');
42 ok($charinfo->{numeric},        '');
43 ok($charinfo->{mirrored},       'N');
44 ok($charinfo->{unicode10},      'LATIN CAPITAL LETTER A MACRON');
45 ok($charinfo->{comment},        '');
46 ok($charinfo->{upper},          '');
47 ok($charinfo->{lower},          '0101');
48 ok($charinfo->{title},          '');
49 ok($charinfo->{block},          'Latin Extended-A');
50 ok($charinfo->{script},         'Latin');
51
52 # 0x0590 is in the Hebrew block but unused.
53
54 $charinfo = charinfo(0x590);
55
56 ok($charinfo->{code},          undef);
57 ok($charinfo->{name},          undef);
58 ok($charinfo->{category},      undef);
59 ok($charinfo->{combining},     undef);
60 ok($charinfo->{bidi},          undef);
61 ok($charinfo->{decomposition}, undef);
62 ok($charinfo->{decimal},       undef);
63 ok($charinfo->{digit},         undef);
64 ok($charinfo->{numeric},       undef);
65 ok($charinfo->{mirrored},      undef);
66 ok($charinfo->{unicode10},     undef);
67 ok($charinfo->{comment},       undef);
68 ok($charinfo->{upper},         undef);
69 ok($charinfo->{lower},         undef);
70 ok($charinfo->{title},         undef);
71 ok($charinfo->{block},         undef);
72 ok($charinfo->{script},        undef);
73
74 # 0x05d0 is in the Hebrew block and used.
75
76 $charinfo = charinfo(0x5d0);
77
78 ok($charinfo->{code},           '05D0');
79 ok($charinfo->{name},           'HEBREW LETTER ALEF');
80 ok($charinfo->{category},       'Lo');
81 ok($charinfo->{combining},      '0');
82 ok($charinfo->{bidi},           'R');
83 ok($charinfo->{decomposition},  '');
84 ok($charinfo->{decimal},        '');
85 ok($charinfo->{digit},          '');
86 ok($charinfo->{numeric},        '');
87 ok($charinfo->{mirrored},       'N');
88 ok($charinfo->{unicode10},      '');
89 ok($charinfo->{comment},        '');
90 ok($charinfo->{upper},          '');
91 ok($charinfo->{lower},          '');
92 ok($charinfo->{title},          '');
93 ok($charinfo->{block},          'Hebrew');
94 ok($charinfo->{script},         'Hebrew');
95
96 # an open syllable in Hangul
97
98 $charinfo = charinfo(0xAC00);
99
100 ok($charinfo->{code},           'AC00');
101 ok($charinfo->{name},           'HANGUL SYLLABLE GA');
102 ok($charinfo->{category},       'Lo');
103 ok($charinfo->{combining},      '0');
104 ok($charinfo->{bidi},           'L');
105 ok($charinfo->{decomposition},  '1100 1161');
106 ok($charinfo->{decimal},        '');
107 ok($charinfo->{digit},          '');
108 ok($charinfo->{numeric},        '');
109 ok($charinfo->{mirrored},       'N');
110 ok($charinfo->{unicode10},      '');
111 ok($charinfo->{comment},        '');
112 ok($charinfo->{upper},          '');
113 ok($charinfo->{lower},          '');
114 ok($charinfo->{title},          '');
115 ok($charinfo->{block},          'Hangul Syllables');
116 ok($charinfo->{script},         'Hangul');
117
118 # a close syllable in Hangul
119
120 $charinfo = charinfo(0xAE00);
121
122 ok($charinfo->{code},           'AE00');
123 ok($charinfo->{name},           'HANGUL SYLLABLE GEUL');
124 ok($charinfo->{category},       'Lo');
125 ok($charinfo->{combining},      '0');
126 ok($charinfo->{bidi},           'L');
127 ok($charinfo->{decomposition},  '1100 1173 11AF');
128 ok($charinfo->{decimal},        '');
129 ok($charinfo->{digit},          '');
130 ok($charinfo->{numeric},        '');
131 ok($charinfo->{mirrored},       'N');
132 ok($charinfo->{unicode10},      '');
133 ok($charinfo->{comment},        '');
134 ok($charinfo->{upper},          '');
135 ok($charinfo->{lower},          '');
136 ok($charinfo->{title},          '');
137 ok($charinfo->{block},          'Hangul Syllables');
138 ok($charinfo->{script},         'Hangul');
139
140 $charinfo = charinfo(0x1D400);
141
142 ok($charinfo->{code},           '1D400');
143 ok($charinfo->{name},           'MATHEMATICAL BOLD CAPITAL A');
144 ok($charinfo->{category},       'Lu');
145 ok($charinfo->{combining},      '0');
146 ok($charinfo->{bidi},           'L');
147 ok($charinfo->{decomposition},  '<font> 0041');
148 ok($charinfo->{decimal},        '');
149 ok($charinfo->{digit},          '');
150 ok($charinfo->{numeric},        '');
151 ok($charinfo->{mirrored},       'N');
152 ok($charinfo->{unicode10},      '');
153 ok($charinfo->{comment},        '');
154 ok($charinfo->{upper},          '');
155 ok($charinfo->{lower},          '');
156 ok($charinfo->{title},          '');
157 ok($charinfo->{block},          'Mathematical Alphanumeric Symbols');
158 ok($charinfo->{script},         undef);
159
160 use UnicodeCD qw(charblock charscript);
161
162 # 0x0590 is in the Hebrew block but unused.
163
164 ok(charblock(0x590),          'Hebrew');
165 ok(charscript(0x590),         undef);
166
167 $charinfo = charinfo(0xbe);
168
169 ok($charinfo->{code},           '00BE');
170 ok($charinfo->{name},           'VULGAR FRACTION THREE QUARTERS');
171 ok($charinfo->{category},       'No');
172 ok($charinfo->{combining},      '0');
173 ok($charinfo->{bidi},           'ON');
174 ok($charinfo->{decomposition},  '<fraction> 0033 2044 0034');
175 ok($charinfo->{decimal},        '');
176 ok($charinfo->{digit},          '');
177 ok($charinfo->{numeric},        '3/4');
178 ok($charinfo->{mirrored},       'N');
179 ok($charinfo->{unicode10},      'FRACTION THREE QUARTERS');
180 ok($charinfo->{comment},        '');
181 ok($charinfo->{upper},          '');
182 ok($charinfo->{lower},          '');
183 ok($charinfo->{title},          '');
184 ok($charinfo->{block},          'Latin-1 Supplement');
185 ok($charinfo->{script},         undef);
186
187 use UnicodeCD qw(charblocks charscripts);
188
189 my $charblocks = charblocks();
190
191 ok(exists $charblocks->{Thai});
192 ok($charblocks->{Thai}->[0]->[0], hex('0e00'));
193 ok(!exists $charblocks->{PigLatin});
194
195 my $charscripts = charscripts();
196
197 ok(exists $charscripts->{Armenian});
198 ok($charscripts->{Armenian}->[0]->[0], hex('0531'));
199 ok(!exists $charscripts->{PigLatin});
200
201 my $charscript;
202
203 $charscript = charscript("12ab");
204 ok($charscript, 'Ethiopic');
205
206 $charscript = charscript("0x12ab");
207 ok($charscript, 'Ethiopic');
208
209 $charscript = charscript("U+12ab");
210 ok($charscript, 'Ethiopic');
211
212 my $ranges;
213
214 $ranges = charscript('Ogham');
215 ok($ranges->[0]->[0], hex('1681'));
216 ok($ranges->[0]->[1], hex('169a'));
217
218 use UnicodeCD qw(charinrange);
219
220 $ranges = charscript('Cherokee');
221 ok(!charinrange($ranges, "139f"));
222 ok( charinrange($ranges, "13a0"));
223 ok( charinrange($ranges, "13f4"));
224 ok(!charinrange($ranges, "13f5"));
225
226 ok(UnicodeCD::UnicodeVersion, 3.1);
227
228 use UnicodeCD qw(compexcl);
229
230 ok(!compexcl(0x0100));
231 ok( compexcl(0x0958));
232
233 use UnicodeCD qw(casefold);
234
235 my $casefold;
236
237 $casefold = casefold(0x41);
238
239 ok($casefold->{code} eq '0041' &&
240    $casefold->{status} eq 'C'  &&
241    $casefold->{mapping} eq '0061');
242
243 $casefold = casefold(0xdf);
244
245 ok($casefold->{code} eq '00DF' &&
246    $casefold->{status} eq 'F'  &&
247    $casefold->{mapping} eq '0073 0073');
248
249 ok(!casefold(0x20));
250
251 use UnicodeCD qw(casespec);
252
253 my $casespec;
254
255 ok(!casespec(0x41));
256
257 $casespec = casespec(0xdf);
258
259 ok($casespec->{code} eq '00DF' &&
260    $casespec->{lower} eq '00DF'  &&
261    $casespec->{title} eq '0053 0073'  &&
262    $casespec->{upper} eq '0053 0053' &&
263    $casespec->{condition} eq undef);
264
265 $casespec = casespec(0x307);
266
267 ok($casespec->{code} eq '0307' &&
268    $casespec->{lower} eq '0307'  &&
269    $casespec->{title} eq ''  &&
270    $casespec->{upper} eq '' &&
271    $casespec->{condition} eq 'lt AFTER_i');