This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactor die_exit.t to loop over a list, rather than iterate on an hash.
[perl5.git] / cpan / Unicode-Collate / t / contract.t
CommitLineData
b5d9a953 1
06c8fc8f
RGS
2BEGIN {
3 unless ("A" eq pack('U', 0x41)) {
4 print "1..0 # Unicode::Collate " .
5 "cannot stringify a Unicode code point\n";
6 exit 0;
7 }
456a1446
CBW
8 if ($ENV{PERL_CORE}) {
9 chdir('t') if -d 't';
10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11 }
06c8fc8f
RGS
12}
13
14use Test;
b5d9a953 15BEGIN { plan tests => 74 };
06c8fc8f
RGS
16
17use strict;
18use warnings;
19use Unicode::Collate;
20
68adb2b0
CBW
21ok(1);
22
23#########################
24
06c8fc8f
RGS
25our $kjeEntry = <<'ENTRIES';
260301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
270334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
28043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
29041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
30045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
31043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
32040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
33041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
34ENTRIES
35
36our $aaEntry = <<'ENTRIES';
370304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
38030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
390327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
40031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
410061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
420041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
43007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
44005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
4500E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
4600C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
470061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
480041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
49ENTRIES
50
51#########################
52
06c8fc8f
RGS
53my $kjeNoN = Unicode::Collate->new(
54 level => 1,
55 table => undef,
56 normalization => undef,
57 entry => $kjeEntry,
58);
59
aa7758f7
CBW
60ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
61ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
62ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
63ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
06c8fc8f 64
b5d9a953
CBW
65# 5
66
06c8fc8f
RGS
67our %sortkeys;
68
aa7758f7
CBW
69$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
70$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}");
71$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}");
06c8fc8f
RGS
72
73eval { require Unicode::Normalize };
abd1ec54 74if (!$@) {
06c8fc8f
RGS
75 my $kjeNFD = Unicode::Collate->new(
76 level => 1,
77 table => undef,
78 entry => $kjeEntry,
79 );
b5d9a953 80
aa7758f7
CBW
81ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
82ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
83ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
84ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
b5d9a953 85# 9
06c8fc8f
RGS
86
87 my $aaNFD = Unicode::Collate->new(
88 level => 1,
89 table => undef,
90 entry => $aaEntry,
91 );
92
93ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
94ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
95ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
96ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
97ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
98ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
99ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
100ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
b5d9a953 101# 17
06c8fc8f
RGS
102
103 my $aaPre = Unicode::Collate->new(
104 level => 1,
105 normalization => "prenormalized",
106 table => undef,
107 entry => $aaEntry,
108 );
109
110ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
111ok($aaPre->eq("A", "A\x{304}\x{30A}"));
112ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
113ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
114ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
115ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
116ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
117ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
b5d9a953
CBW
118# 25
119} else {
120 ok(1) for 1..20;
06c8fc8f
RGS
121}
122
123# again: loading Unicode::Normalize should not affect $kjeNoN.
aa7758f7
CBW
124ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
125ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
126ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
127ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
06c8fc8f 128
aa7758f7
CBW
129ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}"));
130ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
131ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
06c8fc8f 132
b5d9a953
CBW
133# 32
134
06c8fc8f
RGS
135my $aaNoN = Unicode::Collate->new(
136 level => 1,
137 table => undef,
138 entry => $aaEntry,
139 normalization => undef,
140);
141
142ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
143ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
144ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
145ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
146ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
147ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
148ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
149ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
150
b5d9a953
CBW
151# 40
152
aa7758f7
CBW
153# suppress contractions
154
155my $kjeSup = Unicode::Collate->new(
156 level => 1,
157 table => undef,
158 normalization => undef,
159 entry => $kjeEntry,
160 suppress => [0x400..0x45F],
161);
162
163ok($kjeSup->eq("\x{43A}", "\x{43A}\x{301}"));
164ok($kjeSup->gt("\x{45C}", "\x{43A}\x{301}"));
165ok($kjeSup->eq("\x{41A}", "\x{41A}\x{301}"));
166ok($kjeSup->gt("\x{40C}", "\x{41A}\x{301}"));
167
b5d9a953
CBW
168# 44
169
170our $tibetanEntry = <<'ENTRIES';
1710000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
1720F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
1730F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
1740F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1750F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1760F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
1770F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1780F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1790F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
1800F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1810F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1820F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1830FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1840F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1850FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1860FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1870F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1880FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1890F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1900FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1910FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
192ENTRIES
193
194# ccc(0F71) = 129
195# ccc(0F80) = 130
196# 0F76 = 0FB2 0F80
197# 0F78 = 0FB3 0F80
198# 0F81 = 0F71 0F80
199# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
200# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
201
202eval { require Unicode::Normalize };
203if (!$@) {
204 my $tibNFD = Unicode::Collate->new(
205 table => undef,
206 entry => $tibetanEntry,
207 );
208
209 # VOCALIC RR
210 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
211 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
212 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
213 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
214 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
215 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
216 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
217 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
218 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
219 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
220 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
221 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
222 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
223 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
224# 58
225
226 # VOCALIC LL
227 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
228 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
229 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
230 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
231 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
232 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
233 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
234 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
235 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
236 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
237 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
238 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
239 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
240 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
241# 72
242
243 my $discontNFD = Unicode::Collate->new(
244 table => undef,
245 entry => <<'ENTRIES',
2460000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
2470301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
2480300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
2490327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA
2500334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
2510041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2520041 0327 0301 ; [.0102.0020.0008.0041]
2530041 0300 ; [.0103.0020.0008.0041]
254ENTRIES
255 );
256
257 ok($discontNFD->eq("A\x{327}\x{301}\0\x{334}", "A\x{334}\x{327}\x{301}"));
258 ok($discontNFD->eq("A\x{300}\0\x{327}", "A\x{327}\x{300}"));
259} else {
260 ok(1) for 1..30;
261}
262# 74