3 unless ("A" eq pack('U', 0x41)) {
4 print "1..0 # Unicode::Collate " .
5 "cannot stringify a Unicode code point\n";
10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
15 BEGIN { plan tests => 74 };
23 #########################
25 our $kjeEntry = <<'ENTRIES';
26 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
27 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
28 043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
29 041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
30 045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
31 043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
32 040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
33 041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
36 our $aaEntry = <<'ENTRIES';
37 0304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
38 030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
39 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
40 031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
41 0061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
42 0041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
43 007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
44 005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
45 00E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
46 00C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
47 0061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
48 0041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
51 #########################
53 my $kjeNoN = Unicode::Collate->new(
56 normalization => undef,
60 ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
61 ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
62 ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
63 ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
69 $sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
70 $sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}");
71 $sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}");
73 eval { require Unicode::Normalize };
75 my $kjeNFD = Unicode::Collate->new(
81 ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
82 ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
83 ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
84 ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
87 my $aaNFD = Unicode::Collate->new(
93 ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
94 ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
95 ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
96 ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
97 ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
98 ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
99 ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
100 ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
103 my $aaPre = Unicode::Collate->new(
105 normalization => "prenormalized",
110 ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
111 ok($aaPre->eq("A", "A\x{304}\x{30A}"));
112 ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
113 ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
114 ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
115 ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
116 ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
117 ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
123 # again: loading Unicode::Normalize should not affect $kjeNoN.
124 ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
125 ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
126 ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
127 ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
129 ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}"));
130 ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
131 ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
135 my $aaNoN = Unicode::Collate->new(
139 normalization => undef,
142 ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
143 ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
144 ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
145 ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
146 ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
147 ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
148 ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
149 ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
153 # suppress contractions
155 my $kjeSup = Unicode::Collate->new(
158 normalization => undef,
160 suppress => [0x400..0x45F],
163 ok($kjeSup->eq("\x{43A}", "\x{43A}\x{301}"));
164 ok($kjeSup->gt("\x{45C}", "\x{43A}\x{301}"));
165 ok($kjeSup->eq("\x{41A}", "\x{41A}\x{301}"));
166 ok($kjeSup->gt("\x{40C}", "\x{41A}\x{301}"));
170 our $tibetanEntry = <<'ENTRIES';
171 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
172 0F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
173 0F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
174 0F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
175 0F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
176 0F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
177 0F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
178 0F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
179 0F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
180 0F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
181 0F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
182 0F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
183 0FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
184 0F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
185 0FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
186 0FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
187 0F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
188 0FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
189 0F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
190 0FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
191 0FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
199 # 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
200 # 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
202 eval { require Unicode::Normalize };
204 my $tibNFD = Unicode::Collate->new(
206 entry => $tibetanEntry,
210 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
211 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
212 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
213 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
214 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
215 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
216 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
217 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
218 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
219 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
220 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
221 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
222 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
223 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
227 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
228 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
229 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
230 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
231 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
232 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
233 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
234 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
235 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
236 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
237 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
238 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
239 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
240 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
243 my $discontNFD = Unicode::Collate->new(
245 entry => <<'ENTRIES',
246 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
247 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
248 0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
249 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA
250 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
251 0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
252 0041 0327 0301 ; [.0102.0020.0008.0041]
253 0041 0300 ; [.0103.0020.0008.0041]
257 ok($discontNFD->eq("A\x{327}\x{301}\0\x{334}", "A\x{334}\x{327}\x{301}"));
258 ok($discontNFD->eq("A\x{300}\0\x{327}", "A\x{327}\x{300}"));