Commit | Line | Data |
---|---|---|
b5d9a953 | 1 | |
06c8fc8f | 2 | BEGIN { |
456a1446 CBW |
3 | if ($ENV{PERL_CORE}) { |
4 | chdir('t') if -d 't'; | |
5 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); | |
6 | } | |
06c8fc8f RGS |
7 | } |
8 | ||
06c8fc8f RGS |
9 | use strict; |
10 | use warnings; | |
c28567dd | 11 | BEGIN { $| = 1; print "1..118\n"; } |
cba8842c A |
12 | my $count = 0; |
13 | sub ok ($;$) { | |
14 | my $p = my $r = shift; | |
15 | if (@_) { | |
16 | my $x = shift; | |
17 | $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; | |
18 | } | |
19 | print $p ? "ok" : "not ok", ' ', ++$count, "\n"; | |
20 | } | |
21 | ||
06c8fc8f RGS |
22 | use Unicode::Collate; |
23 | ||
68adb2b0 CBW |
24 | ok(1); |
25 | ||
983d5bee TS |
26 | sub _pack_U { Unicode::Collate::pack_U(@_) } |
27 | sub _unpack_U { Unicode::Collate::unpack_U(@_) } | |
28 | ||
68adb2b0 CBW |
29 | ######################### |
30 | ||
06c8fc8f RGS |
31 | our $kjeEntry = <<'ENTRIES'; |
32 | 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT | |
33 | 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY | |
34 | 043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA | |
35 | 041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA | |
36 | 045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE | |
37 | 043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE | |
38 | 040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE | |
39 | 041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE | |
40 | ENTRIES | |
41 | ||
42 | our $aaEntry = <<'ENTRIES'; | |
43 | 0304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230) | |
44 | 030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230) | |
45 | 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202) | |
46 | 031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232) | |
47 | 0061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A | |
48 | 0041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A | |
49 | 007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z | |
50 | 005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z | |
51 | 00E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM | |
52 | 00C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM | |
53 | 0061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE | |
54 | 0041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE | |
55 | ENTRIES | |
56 | ||
57 | ######################### | |
58 | ||
06c8fc8f RGS |
59 | my $kjeNoN = Unicode::Collate->new( |
60 | level => 1, | |
61 | table => undef, | |
62 | normalization => undef, | |
63 | entry => $kjeEntry, | |
64 | ); | |
65 | ||
aa7758f7 CBW |
66 | ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); |
67 | ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); | |
68 | ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); | |
69 | ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); | |
06c8fc8f | 70 | |
b5d9a953 CBW |
71 | # 5 |
72 | ||
06c8fc8f RGS |
73 | our %sortkeys; |
74 | ||
aa7758f7 CBW |
75 | $sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}"); |
76 | $sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"); | |
77 | $sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"); | |
06c8fc8f RGS |
78 | |
79 | eval { require Unicode::Normalize }; | |
abd1ec54 | 80 | if (!$@) { |
06c8fc8f RGS |
81 | my $kjeNFD = Unicode::Collate->new( |
82 | level => 1, | |
83 | table => undef, | |
84 | entry => $kjeEntry, | |
85 | ); | |
b5d9a953 | 86 | |
aa7758f7 CBW |
87 | ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}")); |
88 | ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}")); | |
89 | ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}")); | |
90 | ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); | |
b5d9a953 | 91 | # 9 |
06c8fc8f RGS |
92 | |
93 | my $aaNFD = Unicode::Collate->new( | |
94 | level => 1, | |
95 | table => undef, | |
96 | entry => $aaEntry, | |
97 | ); | |
98 | ||
99 | ok($aaNFD->lt("Z", "A\x{30A}\x{304}")); | |
100 | ok($aaNFD->eq("A", "A\x{304}\x{30A}")); | |
983d5bee | 101 | ok($aaNFD->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); |
06c8fc8f RGS |
102 | ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}")); |
103 | ok($aaNFD->lt("Z", "A\x{327}\x{30A}")); | |
104 | ok($aaNFD->lt("Z", "A\x{30A}\x{327}")); | |
105 | ok($aaNFD->lt("Z", "A\x{31A}\x{30A}")); | |
106 | ok($aaNFD->lt("Z", "A\x{30A}\x{31A}")); | |
b5d9a953 | 107 | # 17 |
06c8fc8f RGS |
108 | |
109 | my $aaPre = Unicode::Collate->new( | |
110 | level => 1, | |
111 | normalization => "prenormalized", | |
112 | table => undef, | |
113 | entry => $aaEntry, | |
114 | ); | |
115 | ||
116 | ok($aaPre->lt("Z", "A\x{30A}\x{304}")); | |
117 | ok($aaPre->eq("A", "A\x{304}\x{30A}")); | |
983d5bee | 118 | ok($aaPre->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); |
06c8fc8f RGS |
119 | ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}")); |
120 | ok($aaPre->lt("Z", "A\x{327}\x{30A}")); | |
121 | ok($aaPre->lt("Z", "A\x{30A}\x{327}")); | |
122 | ok($aaPre->lt("Z", "A\x{31A}\x{30A}")); | |
123 | ok($aaPre->lt("Z", "A\x{30A}\x{31A}")); | |
b5d9a953 CBW |
124 | # 25 |
125 | } else { | |
126 | ok(1) for 1..20; | |
06c8fc8f RGS |
127 | } |
128 | ||
129 | # again: loading Unicode::Normalize should not affect $kjeNoN. | |
aa7758f7 CBW |
130 | ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); |
131 | ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); | |
132 | ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); | |
133 | ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); | |
06c8fc8f | 134 | |
aa7758f7 CBW |
135 | ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}")); |
136 | ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}")); | |
137 | ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}")); | |
06c8fc8f | 138 | |
b5d9a953 CBW |
139 | # 32 |
140 | ||
06c8fc8f RGS |
141 | my $aaNoN = Unicode::Collate->new( |
142 | level => 1, | |
143 | table => undef, | |
144 | entry => $aaEntry, | |
145 | normalization => undef, | |
146 | ); | |
147 | ||
148 | ok($aaNoN->lt("Z", "A\x{30A}\x{304}")); | |
149 | ok($aaNoN->eq("A", "A\x{304}\x{30A}")); | |
983d5bee | 150 | ok($aaNoN->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); |
06c8fc8f RGS |
151 | ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}")); |
152 | ok($aaNoN->eq("A", "A\x{327}\x{30A}")); | |
153 | ok($aaNoN->lt("Z", "A\x{30A}\x{327}")); | |
154 | ok($aaNoN->eq("A", "A\x{31A}\x{30A}")); | |
155 | ok($aaNoN->lt("Z", "A\x{30A}\x{31A}")); | |
156 | ||
b5d9a953 CBW |
157 | # 40 |
158 | ||
bd65daab | 159 | # suppress contractions (not affected) |
aa7758f7 CBW |
160 | |
161 | my $kjeSup = Unicode::Collate->new( | |
162 | level => 1, | |
163 | table => undef, | |
164 | normalization => undef, | |
165 | entry => $kjeEntry, | |
166 | suppress => [0x400..0x45F], | |
167 | ); | |
168 | ||
bd65daab CBW |
169 | ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}")); |
170 | ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}")); | |
171 | ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}")); | |
172 | ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}")); | |
aa7758f7 | 173 | |
b5d9a953 CBW |
174 | # 44 |
175 | ||
176 | our $tibetanEntry = <<'ENTRIES'; | |
f8187d97 SH |
177 | 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) |
178 | 0FB2 ; [.205B.0020.0002.0FB2] # TIBETAN SUBJOINED LETTER RA | |
179 | 0FB3 ; [.205E.0020.0002.0FB3] # TIBETAN SUBJOINED LETTER LA | |
b5d9a953 CBW |
180 | 0F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA |
181 | 0F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I | |
182 | 0F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II | |
183 | 0F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II | |
184 | 0F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I | |
185 | 0F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II | |
186 | 0F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II | |
187 | 0F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U | |
188 | 0F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU | |
189 | 0F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU | |
190 | 0F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R | |
191 | 0FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R | |
192 | 0F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR | |
193 | 0FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR | |
194 | 0FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR | |
195 | 0F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L | |
196 | 0FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L | |
197 | 0F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL | |
198 | 0FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL | |
199 | 0FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL | |
200 | ENTRIES | |
201 | ||
202 | # ccc(0F71) = 129 | |
203 | # ccc(0F80) = 130 | |
204 | # 0F76 = 0FB2 0F80 | |
205 | # 0F78 = 0FB3 0F80 | |
206 | # 0F81 = 0F71 0F80 | |
207 | # 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71 | |
208 | # 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71 | |
209 | ||
210 | eval { require Unicode::Normalize }; | |
211 | if (!$@) { | |
212 | my $tibNFD = Unicode::Collate->new( | |
213 | table => undef, | |
214 | entry => $tibetanEntry, | |
f8187d97 | 215 | UCA_Version => 24, |
b5d9a953 CBW |
216 | ); |
217 | ||
218 | # VOCALIC RR | |
219 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}")); | |
220 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}")); | |
221 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}")); | |
222 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}")); | |
223 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}")); | |
224 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}")); | |
225 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}")); | |
226 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}")); | |
227 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}")); | |
228 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}")); | |
229 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}")); | |
230 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}")); | |
231 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}")); | |
232 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}")); | |
233 | # 58 | |
234 | ||
235 | # VOCALIC LL | |
236 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}")); | |
237 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}")); | |
238 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}")); | |
239 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}")); | |
240 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}")); | |
241 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}")); | |
242 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}")); | |
243 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}")); | |
244 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}")); | |
245 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}")); | |
246 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}")); | |
247 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}")); | |
248 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}")); | |
249 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}")); | |
250 | # 72 | |
251 | ||
f8187d97 SH |
252 | my $a1 = "\x{FB2}\x{334}\x{F81}"; |
253 | my $b1 = "\x{F77}\0\x{334}"; | |
254 | my $a2 = "\x{FB2}\x{334}\x{F81}"; | |
255 | my $b2 = "\x{FB2}\x{F80}\0\x{334}\x{F71}"; | |
256 | ||
257 | for my $v (qw/20 22 24 26 28/) { | |
258 | my $tib = Unicode::Collate->new( | |
259 | table => undef, | |
260 | entry => $tibetanEntry, | |
261 | UCA_Version => $v, | |
262 | ); | |
263 | my $long = 22 <= $v && $v <= 24; | |
264 | ok($tib->cmp($a1, $b1), $long ? 0 : -1); | |
265 | ok($tib->cmp($a2, $b2), $long ? 1 : 0); | |
266 | ||
267 | $tib->change(long_contraction => 0); | |
268 | ok($tib->cmp($a1, $b1), -1); | |
269 | ok($tib->cmp($a2, $b2), 0); | |
270 | ||
271 | $tib->change(long_contraction => 1); | |
272 | ok($tib->cmp($a1, $b1), 0); | |
273 | ok($tib->cmp($a2, $b2), 1); | |
274 | } | |
275 | # 102 | |
276 | ||
277 | # UCA_Version => 22 | |
278 | ok($tibNFD->cmp($a1, $b1), 0); | |
279 | ok($tibNFD->cmp($a2, $b2), 1); | |
280 | ||
281 | $tibNFD->change(UCA_Version => 26); # not affect long_contraction | |
282 | ok($tibNFD->cmp($a1, $b1), 0); | |
283 | ok($tibNFD->cmp($a2, $b2), 1); | |
284 | # 106 | |
285 | ||
b5d9a953 CBW |
286 | my $discontNFD = Unicode::Collate->new( |
287 | table => undef, | |
f8187d97 | 288 | UCA_Version => 22, |
b5d9a953 CBW |
289 | entry => <<'ENTRIES', |
290 | 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) | |
291 | 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT | |
292 | 0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT | |
293 | 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA | |
294 | 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY | |
295 | 0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A | |
296 | 0041 0327 0301 ; [.0102.0020.0008.0041] | |
297 | 0041 0300 ; [.0103.0020.0008.0041] | |
298 | ENTRIES | |
299 | ); | |
300 | ||
c28567dd CBW |
301 | ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); |
302 | ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); | |
303 | ||
304 | $discontNFD->change(long_contraction => 0); | |
305 | ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); | |
306 | ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\0\x{327}\x{301}\x{334}")); | |
307 | ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); | |
308 | ||
309 | $discontNFD->change(level => 1); | |
310 | ok($discontNFD->gt("A\x{327}\x{300}", "A\x{327}\0\x{300}")); | |
311 | ||
312 | # discontiguous | |
313 | ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); | |
314 | ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{300}")); | |
315 | ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A")); | |
316 | ||
317 | # contiguous | |
318 | ok($discontNFD->eq("A\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); | |
319 | ok($discontNFD->lt("A\x{327}\x{301}", "A\x{300}")); | |
320 | ok($discontNFD->gt("A\x{327}\x{301}", "A")); | |
b5d9a953 | 321 | } else { |
c28567dd | 322 | ok(1) for 1..74; |
b5d9a953 | 323 | } |
c28567dd | 324 | # 118 |