This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
OS/2 File::* modules
[perl5.git] / lib / Unicode / UCD.t
1 BEGIN {
2     if (ord("A") == 193) {
3         print "1..0 # Skip: EBCDIC\n";
4         exit 0;
5     }
6     chdir 't' if -d 't';
7     @INC = '../lib';
8 }
9
10 use strict;
11 use Unicode::UCD;
12 use Test::More;
13
14 BEGIN { plan tests => 162 };
15
16 use Unicode::UCD 'charinfo';
17
18 my $charinfo;
19
20 $charinfo = charinfo(0x41);
21
22 is($charinfo->{code},           '0041', 'LATIN CAPITAL LETTER A');
23 is($charinfo->{name},           'LATIN CAPITAL LETTER A');
24 is($charinfo->{category},       'Lu');
25 is($charinfo->{combining},      '0');
26 is($charinfo->{bidi},           'L');
27 is($charinfo->{decomposition},  '');
28 is($charinfo->{decimal},        '');
29 is($charinfo->{digit},          '');
30 is($charinfo->{numeric},        '');
31 is($charinfo->{mirrored},       'N');
32 is($charinfo->{unicode10},      '');
33 is($charinfo->{comment},        '');
34 is($charinfo->{upper},          '');
35 is($charinfo->{lower},          '0061');
36 is($charinfo->{title},          '');
37 is($charinfo->{block},          'Basic Latin');
38 is($charinfo->{script},         'Latin');
39
40 $charinfo = charinfo(0x100);
41
42 is($charinfo->{code},           '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
43 is($charinfo->{name},           'LATIN CAPITAL LETTER A WITH MACRON');
44 is($charinfo->{category},       'Lu');
45 is($charinfo->{combining},      '0');
46 is($charinfo->{bidi},           'L');
47 is($charinfo->{decomposition},  '0041 0304');
48 is($charinfo->{decimal},        '');
49 is($charinfo->{digit},          '');
50 is($charinfo->{numeric},        '');
51 is($charinfo->{mirrored},       'N');
52 is($charinfo->{unicode10},      'LATIN CAPITAL LETTER A MACRON');
53 is($charinfo->{comment},        '');
54 is($charinfo->{upper},          '');
55 is($charinfo->{lower},          '0101');
56 is($charinfo->{title},          '');
57 is($charinfo->{block},          'Latin Extended-A');
58 is($charinfo->{script},         'Latin');
59
60 # 0x0590 is in the Hebrew block but unused.
61
62 $charinfo = charinfo(0x590);
63
64 is($charinfo->{code},          undef,   '0x0590 - unused Hebrew');
65 is($charinfo->{name},          undef);
66 is($charinfo->{category},      undef);
67 is($charinfo->{combining},     undef);
68 is($charinfo->{bidi},          undef);
69 is($charinfo->{decomposition}, undef);
70 is($charinfo->{decimal},       undef);
71 is($charinfo->{digit},         undef);
72 is($charinfo->{numeric},       undef);
73 is($charinfo->{mirrored},      undef);
74 is($charinfo->{unicode10},     undef);
75 is($charinfo->{comment},       undef);
76 is($charinfo->{upper},         undef);
77 is($charinfo->{lower},         undef);
78 is($charinfo->{title},         undef);
79 is($charinfo->{block},         undef);
80 is($charinfo->{script},        undef);
81
82 # 0x05d0 is in the Hebrew block and used.
83
84 $charinfo = charinfo(0x5d0);
85
86 is($charinfo->{code},           '05D0', '05D0 - used Hebrew');
87 is($charinfo->{name},           'HEBREW LETTER ALEF');
88 is($charinfo->{category},       'Lo');
89 is($charinfo->{combining},      '0');
90 is($charinfo->{bidi},           'R');
91 is($charinfo->{decomposition},  '');
92 is($charinfo->{decimal},        '');
93 is($charinfo->{digit},          '');
94 is($charinfo->{numeric},        '');
95 is($charinfo->{mirrored},       'N');
96 is($charinfo->{unicode10},      '');
97 is($charinfo->{comment},        '');
98 is($charinfo->{upper},          '');
99 is($charinfo->{lower},          '');
100 is($charinfo->{title},          '');
101 is($charinfo->{block},          'Hebrew');
102 is($charinfo->{script},         'Hebrew');
103
104 # An open syllable in Hangul.
105
106 $charinfo = charinfo(0xAC00);
107
108 is($charinfo->{code},           'AC00', 'HANGUL SYLLABLE-AC00');
109 is($charinfo->{name},           'HANGUL SYLLABLE-AC00');
110 is($charinfo->{category},       'Lo');
111 is($charinfo->{combining},      '0');
112 is($charinfo->{bidi},           'L');
113 is($charinfo->{decomposition},  undef);
114 is($charinfo->{decimal},        '');
115 is($charinfo->{digit},          '');
116 is($charinfo->{numeric},        '');
117 is($charinfo->{mirrored},       'N');
118 is($charinfo->{unicode10},      '');
119 is($charinfo->{comment},        '');
120 is($charinfo->{upper},          '');
121 is($charinfo->{lower},          '');
122 is($charinfo->{title},          '');
123 is($charinfo->{block},          'Hangul Syllables');
124 is($charinfo->{script},         'Hangul');
125
126 # A closed syllable in Hangul.
127
128 $charinfo = charinfo(0xAE00);
129
130 is($charinfo->{code},           'AE00', 'HANGUL SYLLABLE-AE00');
131 is($charinfo->{name},           'HANGUL SYLLABLE-AE00');
132 is($charinfo->{category},       'Lo');
133 is($charinfo->{combining},      '0');
134 is($charinfo->{bidi},           'L');
135 is($charinfo->{decomposition},  undef);
136 is($charinfo->{decimal},        '');
137 is($charinfo->{digit},          '');
138 is($charinfo->{numeric},        '');
139 is($charinfo->{mirrored},       'N');
140 is($charinfo->{unicode10},      '');
141 is($charinfo->{comment},        '');
142 is($charinfo->{upper},          '');
143 is($charinfo->{lower},          '');
144 is($charinfo->{title},          '');
145 is($charinfo->{block},          'Hangul Syllables');
146 is($charinfo->{script},         'Hangul');
147
148 $charinfo = charinfo(0x1D400);
149
150 is($charinfo->{code},           '1D400', 'MATHEMATICAL BOLD CAPITAL A');
151 is($charinfo->{name},           'MATHEMATICAL BOLD CAPITAL A');
152 is($charinfo->{category},       'Lu');
153 is($charinfo->{combining},      '0');
154 is($charinfo->{bidi},           'L');
155 is($charinfo->{decomposition},  '<font> 0041');
156 is($charinfo->{decimal},        '');
157 is($charinfo->{digit},          '');
158 is($charinfo->{numeric},        '');
159 is($charinfo->{mirrored},       'N');
160 is($charinfo->{unicode10},      '');
161 is($charinfo->{comment},        '');
162 is($charinfo->{upper},          '');
163 is($charinfo->{lower},          '');
164 is($charinfo->{title},          '');
165 is($charinfo->{block},          'Mathematical Alphanumeric Symbols');
166 is($charinfo->{script},         undef);
167
168 use Unicode::UCD qw(charblock charscript);
169
170 # 0x0590 is in the Hebrew block but unused.
171
172 is(charblock(0x590),          'Hebrew', '0x0590 - Hebrew unused charblock');
173 is(charscript(0x590),         undef,    '0x0590 - Hebrew unused charscript');
174
175 $charinfo = charinfo(0xbe);
176
177 is($charinfo->{code},           '00BE', 'VULGAR FRACTION THREE QUARTERS');
178 is($charinfo->{name},           'VULGAR FRACTION THREE QUARTERS');
179 is($charinfo->{category},       'No');
180 is($charinfo->{combining},      '0');
181 is($charinfo->{bidi},           'ON');
182 is($charinfo->{decomposition},  '<fraction> 0033 2044 0034');
183 is($charinfo->{decimal},        '');
184 is($charinfo->{digit},          '');
185 is($charinfo->{numeric},        '3/4');
186 is($charinfo->{mirrored},       'N');
187 is($charinfo->{unicode10},      'FRACTION THREE QUARTERS');
188 is($charinfo->{comment},        '');
189 is($charinfo->{upper},          '');
190 is($charinfo->{lower},          '');
191 is($charinfo->{title},          '');
192 is($charinfo->{block},          'Latin-1 Supplement');
193 is($charinfo->{script},         undef);
194
195 use Unicode::UCD qw(charblocks charscripts);
196
197 my $charblocks = charblocks();
198
199 ok(exists $charblocks->{Thai}, 'Thai charblock exists');
200 is($charblocks->{Thai}->[0]->[0], hex('0e00'));
201 ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
202
203 my $charscripts = charscripts();
204
205 ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
206 is($charscripts->{Armenian}->[0]->[0], hex('0531'));
207 ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
208
209 my $charscript;
210
211 $charscript = charscript("12ab");
212 is($charscript, 'Ethiopic', 'Ethiopic charscript');
213
214 $charscript = charscript("0x12ab");
215 is($charscript, 'Ethiopic');
216
217 $charscript = charscript("U+12ab");
218 is($charscript, 'Ethiopic');
219
220 my $ranges;
221
222 $ranges = charscript('Ogham');
223 is($ranges->[0]->[0], hex('1681'), 'Ogham charscript');
224 is($ranges->[0]->[1], hex('169a'));
225
226 use Unicode::UCD qw(charinrange);
227
228 $ranges = charscript('Cherokee');
229 ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
230 ok( charinrange($ranges, "13a0"));
231 ok( charinrange($ranges, "13f4"));
232 ok(!charinrange($ranges, "13f5"));
233
234 is(Unicode::UCD::UnicodeVersion, '3.1.1', 'UnicodeVersion');
235
236 use Unicode::UCD qw(compexcl);
237
238 ok(!compexcl(0x0100), 'compexcl');
239 ok( compexcl(0x0958));
240
241 use Unicode::UCD qw(casefold);
242
243 my $casefold;
244
245 $casefold = casefold(0x41);
246
247 ok($casefold->{code} eq '0041' &&
248    $casefold->{status} eq 'C'  &&
249    $casefold->{mapping} eq '0061', 'casefold 0x41');
250
251 $casefold = casefold(0xdf);
252
253 ok($casefold->{code} eq '00DF' &&
254    $casefold->{status} eq 'F'  &&
255    $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
256
257 ok(!casefold(0x20));
258
259 use Unicode::UCD qw(casespec);
260
261 my $casespec;
262
263 ok(!casespec(0x41));
264
265 $casespec = casespec(0xdf);
266
267 ok($casespec->{code} eq '00DF' &&
268    $casespec->{lower} eq '00DF'  &&
269    $casespec->{title} eq '0053 0073'  &&
270    $casespec->{upper} eq '0053 0053' &&
271    $casespec->{condition} eq undef, 'casespec 0xDF');
272
273 $casespec = casespec(0x307);
274
275 ok($casespec->{az}->{code} eq '0307' &&
276    $casespec->{az}->{lower} eq ''  &&
277    $casespec->{az}->{title} eq '0307'  &&
278    $casespec->{az}->{upper} eq '0307' &&
279    $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE',
280    'casespec 0x307');