This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Unicode-Collate: synch with CPAN version 1.31
[perl5.git] / cpan / Unicode-Collate / t / contract.t
CommitLineData
b5d9a953 1
06c8fc8f 2BEGIN {
456a1446
CBW
3 if ($ENV{PERL_CORE}) {
4 chdir('t') if -d 't';
5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6 }
06c8fc8f
RGS
7}
8
06c8fc8f
RGS
9use strict;
10use warnings;
c28567dd 11BEGIN { $| = 1; print "1..118\n"; }
cba8842c
A
12my $count = 0;
13sub ok ($;$) {
14 my $p = my $r = shift;
15 if (@_) {
16 my $x = shift;
17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18 }
19 print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20}
21
06c8fc8f
RGS
22use Unicode::Collate;
23
68adb2b0
CBW
24ok(1);
25
983d5bee
TS
26sub _pack_U { Unicode::Collate::pack_U(@_) }
27sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28
68adb2b0
CBW
29#########################
30
06c8fc8f
RGS
31our $kjeEntry = <<'ENTRIES';
320301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
330334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
34043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
35041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
36045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
37043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
38040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
39041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
40ENTRIES
41
42our $aaEntry = <<'ENTRIES';
430304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
44030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
450327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
46031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
470061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
480041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
49007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
50005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
5100E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
5200C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
530061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
540041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
55ENTRIES
56
57#########################
58
06c8fc8f
RGS
59my $kjeNoN = Unicode::Collate->new(
60 level => 1,
61 table => undef,
62 normalization => undef,
63 entry => $kjeEntry,
64);
65
aa7758f7
CBW
66ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
67ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
68ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
69ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
06c8fc8f 70
b5d9a953
CBW
71# 5
72
06c8fc8f
RGS
73our %sortkeys;
74
aa7758f7
CBW
75$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
76$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}");
77$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}");
06c8fc8f
RGS
78
79eval { require Unicode::Normalize };
abd1ec54 80if (!$@) {
06c8fc8f
RGS
81 my $kjeNFD = Unicode::Collate->new(
82 level => 1,
83 table => undef,
84 entry => $kjeEntry,
85 );
b5d9a953 86
aa7758f7
CBW
87ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
88ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
89ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
90ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
b5d9a953 91# 9
06c8fc8f
RGS
92
93 my $aaNFD = Unicode::Collate->new(
94 level => 1,
95 table => undef,
96 entry => $aaEntry,
97 );
98
99ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
100ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
983d5bee 101ok($aaNFD->eq(_pack_U(0xE5), "A\x{30A}\x{304}"));
06c8fc8f
RGS
102ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
103ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
104ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
105ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
106ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
b5d9a953 107# 17
06c8fc8f
RGS
108
109 my $aaPre = Unicode::Collate->new(
110 level => 1,
111 normalization => "prenormalized",
112 table => undef,
113 entry => $aaEntry,
114 );
115
116ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
117ok($aaPre->eq("A", "A\x{304}\x{30A}"));
983d5bee 118ok($aaPre->eq(_pack_U(0xE5), "A\x{30A}\x{304}"));
06c8fc8f
RGS
119ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
120ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
121ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
122ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
123ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
b5d9a953
CBW
124# 25
125} else {
126 ok(1) for 1..20;
06c8fc8f
RGS
127}
128
129# again: loading Unicode::Normalize should not affect $kjeNoN.
aa7758f7
CBW
130ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
131ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
132ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
133ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
06c8fc8f 134
aa7758f7
CBW
135ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}"));
136ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
137ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
06c8fc8f 138
b5d9a953
CBW
139# 32
140
06c8fc8f
RGS
141my $aaNoN = Unicode::Collate->new(
142 level => 1,
143 table => undef,
144 entry => $aaEntry,
145 normalization => undef,
146);
147
148ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
149ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
983d5bee 150ok($aaNoN->eq(_pack_U(0xE5), "A\x{30A}\x{304}"));
06c8fc8f
RGS
151ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
152ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
153ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
154ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
155ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
156
b5d9a953
CBW
157# 40
158
bd65daab 159# suppress contractions (not affected)
aa7758f7
CBW
160
161my $kjeSup = Unicode::Collate->new(
162 level => 1,
163 table => undef,
164 normalization => undef,
165 entry => $kjeEntry,
166 suppress => [0x400..0x45F],
167);
168
bd65daab
CBW
169ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}"));
170ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}"));
171ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}"));
172ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}"));
aa7758f7 173
b5d9a953
CBW
174# 44
175
176our $tibetanEntry = <<'ENTRIES';
f8187d97
SH
1770000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
1780FB2 ; [.205B.0020.0002.0FB2] # TIBETAN SUBJOINED LETTER RA
1790FB3 ; [.205E.0020.0002.0FB3] # TIBETAN SUBJOINED LETTER LA
b5d9a953
CBW
1800F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
1810F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
1820F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1830F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1840F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
1850F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1860F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1870F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
1880F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1890F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1900F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1910FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1920F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1930FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1940FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1950F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1960FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1970F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1980FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1990FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
200ENTRIES
201
202# ccc(0F71) = 129
203# ccc(0F80) = 130
204# 0F76 = 0FB2 0F80
205# 0F78 = 0FB3 0F80
206# 0F81 = 0F71 0F80
207# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
208# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
209
210eval { require Unicode::Normalize };
211if (!$@) {
212 my $tibNFD = Unicode::Collate->new(
213 table => undef,
214 entry => $tibetanEntry,
f8187d97 215 UCA_Version => 24,
b5d9a953
CBW
216 );
217
218 # VOCALIC RR
219 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
220 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
221 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
222 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
223 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
224 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
225 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
226 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
227 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
228 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
229 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
230 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
231 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
232 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
233# 58
234
235 # VOCALIC LL
236 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
237 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
238 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
239 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
240 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
241 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
242 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
243 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
244 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
245 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
246 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
247 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
248 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
249 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
250# 72
251
f8187d97
SH
252 my $a1 = "\x{FB2}\x{334}\x{F81}";
253 my $b1 = "\x{F77}\0\x{334}";
254 my $a2 = "\x{FB2}\x{334}\x{F81}";
255 my $b2 = "\x{FB2}\x{F80}\0\x{334}\x{F71}";
256
257 for my $v (qw/20 22 24 26 28/) {
258 my $tib = Unicode::Collate->new(
259 table => undef,
260 entry => $tibetanEntry,
261 UCA_Version => $v,
262 );
263 my $long = 22 <= $v && $v <= 24;
264 ok($tib->cmp($a1, $b1), $long ? 0 : -1);
265 ok($tib->cmp($a2, $b2), $long ? 1 : 0);
266
267 $tib->change(long_contraction => 0);
268 ok($tib->cmp($a1, $b1), -1);
269 ok($tib->cmp($a2, $b2), 0);
270
271 $tib->change(long_contraction => 1);
272 ok($tib->cmp($a1, $b1), 0);
273 ok($tib->cmp($a2, $b2), 1);
274 }
275# 102
276
277 # UCA_Version => 22
278 ok($tibNFD->cmp($a1, $b1), 0);
279 ok($tibNFD->cmp($a2, $b2), 1);
280
281 $tibNFD->change(UCA_Version => 26); # not affect long_contraction
282 ok($tibNFD->cmp($a1, $b1), 0);
283 ok($tibNFD->cmp($a2, $b2), 1);
284# 106
285
b5d9a953
CBW
286 my $discontNFD = Unicode::Collate->new(
287 table => undef,
f8187d97 288 UCA_Version => 22,
b5d9a953
CBW
289 entry => <<'ENTRIES',
2900000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
2910301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
2920300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
2930327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA
2940334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
2950041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2960041 0327 0301 ; [.0102.0020.0008.0041]
2970041 0300 ; [.0103.0020.0008.0041]
298ENTRIES
299 );
300
c28567dd
CBW
301 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
302 ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}"));
303
304 $discontNFD->change(long_contraction => 0);
305 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
306 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\0\x{327}\x{301}\x{334}"));
307 ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}"));
308
309 $discontNFD->change(level => 1);
310 ok($discontNFD->gt("A\x{327}\x{300}", "A\x{327}\0\x{300}"));
311
312 # discontiguous
313 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
314 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{300}"));
315 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A"));
316
317 # contiguous
318 ok($discontNFD->eq("A\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
319 ok($discontNFD->lt("A\x{327}\x{301}", "A\x{300}"));
320 ok($discontNFD->gt("A\x{327}\x{301}", "A"));
b5d9a953 321} else {
c28567dd 322 ok(1) for 1..74;
b5d9a953 323}
c28567dd 324# 118