Commit | Line | Data |
---|---|---|
b5d9a953 | 1 | |
06c8fc8f RGS |
2 | BEGIN { |
3 | unless ("A" eq pack('U', 0x41)) { | |
4 | print "1..0 # Unicode::Collate " . | |
5 | "cannot stringify a Unicode code point\n"; | |
6 | exit 0; | |
7 | } | |
456a1446 CBW |
8 | if ($ENV{PERL_CORE}) { |
9 | chdir('t') if -d 't'; | |
10 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); | |
11 | } | |
06c8fc8f RGS |
12 | } |
13 | ||
14 | use Test; | |
b5d9a953 | 15 | BEGIN { plan tests => 74 }; |
06c8fc8f RGS |
16 | |
17 | use strict; | |
18 | use warnings; | |
19 | use Unicode::Collate; | |
20 | ||
68adb2b0 CBW |
21 | ok(1); |
22 | ||
23 | ######################### | |
24 | ||
06c8fc8f RGS |
25 | our $kjeEntry = <<'ENTRIES'; |
26 | 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT | |
27 | 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY | |
28 | 043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA | |
29 | 041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA | |
30 | 045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE | |
31 | 043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE | |
32 | 040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE | |
33 | 041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE | |
34 | ENTRIES | |
35 | ||
36 | our $aaEntry = <<'ENTRIES'; | |
37 | 0304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230) | |
38 | 030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230) | |
39 | 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202) | |
40 | 031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232) | |
41 | 0061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A | |
42 | 0041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A | |
43 | 007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z | |
44 | 005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z | |
45 | 00E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM | |
46 | 00C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM | |
47 | 0061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE | |
48 | 0041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE | |
49 | ENTRIES | |
50 | ||
51 | ######################### | |
52 | ||
06c8fc8f RGS |
53 | my $kjeNoN = Unicode::Collate->new( |
54 | level => 1, | |
55 | table => undef, | |
56 | normalization => undef, | |
57 | entry => $kjeEntry, | |
58 | ); | |
59 | ||
aa7758f7 CBW |
60 | ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); |
61 | ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); | |
62 | ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); | |
63 | ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); | |
06c8fc8f | 64 | |
b5d9a953 CBW |
65 | # 5 |
66 | ||
06c8fc8f RGS |
67 | our %sortkeys; |
68 | ||
aa7758f7 CBW |
69 | $sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}"); |
70 | $sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"); | |
71 | $sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"); | |
06c8fc8f RGS |
72 | |
73 | eval { require Unicode::Normalize }; | |
abd1ec54 | 74 | if (!$@) { |
06c8fc8f RGS |
75 | my $kjeNFD = Unicode::Collate->new( |
76 | level => 1, | |
77 | table => undef, | |
78 | entry => $kjeEntry, | |
79 | ); | |
b5d9a953 | 80 | |
aa7758f7 CBW |
81 | ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}")); |
82 | ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}")); | |
83 | ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}")); | |
84 | ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); | |
b5d9a953 | 85 | # 9 |
06c8fc8f RGS |
86 | |
87 | my $aaNFD = Unicode::Collate->new( | |
88 | level => 1, | |
89 | table => undef, | |
90 | entry => $aaEntry, | |
91 | ); | |
92 | ||
93 | ok($aaNFD->lt("Z", "A\x{30A}\x{304}")); | |
94 | ok($aaNFD->eq("A", "A\x{304}\x{30A}")); | |
95 | ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); | |
96 | ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}")); | |
97 | ok($aaNFD->lt("Z", "A\x{327}\x{30A}")); | |
98 | ok($aaNFD->lt("Z", "A\x{30A}\x{327}")); | |
99 | ok($aaNFD->lt("Z", "A\x{31A}\x{30A}")); | |
100 | ok($aaNFD->lt("Z", "A\x{30A}\x{31A}")); | |
b5d9a953 | 101 | # 17 |
06c8fc8f RGS |
102 | |
103 | my $aaPre = Unicode::Collate->new( | |
104 | level => 1, | |
105 | normalization => "prenormalized", | |
106 | table => undef, | |
107 | entry => $aaEntry, | |
108 | ); | |
109 | ||
110 | ok($aaPre->lt("Z", "A\x{30A}\x{304}")); | |
111 | ok($aaPre->eq("A", "A\x{304}\x{30A}")); | |
112 | ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); | |
113 | ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}")); | |
114 | ok($aaPre->lt("Z", "A\x{327}\x{30A}")); | |
115 | ok($aaPre->lt("Z", "A\x{30A}\x{327}")); | |
116 | ok($aaPre->lt("Z", "A\x{31A}\x{30A}")); | |
117 | ok($aaPre->lt("Z", "A\x{30A}\x{31A}")); | |
b5d9a953 CBW |
118 | # 25 |
119 | } else { | |
120 | ok(1) for 1..20; | |
06c8fc8f RGS |
121 | } |
122 | ||
123 | # again: loading Unicode::Normalize should not affect $kjeNoN. | |
aa7758f7 CBW |
124 | ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); |
125 | ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); | |
126 | ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); | |
127 | ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); | |
06c8fc8f | 128 | |
aa7758f7 CBW |
129 | ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}")); |
130 | ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}")); | |
131 | ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}")); | |
06c8fc8f | 132 | |
b5d9a953 CBW |
133 | # 32 |
134 | ||
06c8fc8f RGS |
135 | my $aaNoN = Unicode::Collate->new( |
136 | level => 1, | |
137 | table => undef, | |
138 | entry => $aaEntry, | |
139 | normalization => undef, | |
140 | ); | |
141 | ||
142 | ok($aaNoN->lt("Z", "A\x{30A}\x{304}")); | |
143 | ok($aaNoN->eq("A", "A\x{304}\x{30A}")); | |
144 | ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); | |
145 | ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}")); | |
146 | ok($aaNoN->eq("A", "A\x{327}\x{30A}")); | |
147 | ok($aaNoN->lt("Z", "A\x{30A}\x{327}")); | |
148 | ok($aaNoN->eq("A", "A\x{31A}\x{30A}")); | |
149 | ok($aaNoN->lt("Z", "A\x{30A}\x{31A}")); | |
150 | ||
b5d9a953 CBW |
151 | # 40 |
152 | ||
aa7758f7 CBW |
153 | # suppress contractions |
154 | ||
155 | my $kjeSup = Unicode::Collate->new( | |
156 | level => 1, | |
157 | table => undef, | |
158 | normalization => undef, | |
159 | entry => $kjeEntry, | |
160 | suppress => [0x400..0x45F], | |
161 | ); | |
162 | ||
163 | ok($kjeSup->eq("\x{43A}", "\x{43A}\x{301}")); | |
164 | ok($kjeSup->gt("\x{45C}", "\x{43A}\x{301}")); | |
165 | ok($kjeSup->eq("\x{41A}", "\x{41A}\x{301}")); | |
166 | ok($kjeSup->gt("\x{40C}", "\x{41A}\x{301}")); | |
167 | ||
b5d9a953 CBW |
168 | # 44 |
169 | ||
170 | our $tibetanEntry = <<'ENTRIES'; | |
171 | 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) | |
172 | 0F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA | |
173 | 0F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I | |
174 | 0F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II | |
175 | 0F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II | |
176 | 0F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I | |
177 | 0F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II | |
178 | 0F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II | |
179 | 0F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U | |
180 | 0F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU | |
181 | 0F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU | |
182 | 0F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R | |
183 | 0FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R | |
184 | 0F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR | |
185 | 0FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR | |
186 | 0FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR | |
187 | 0F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L | |
188 | 0FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L | |
189 | 0F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL | |
190 | 0FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL | |
191 | 0FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL | |
192 | ENTRIES | |
193 | ||
194 | # ccc(0F71) = 129 | |
195 | # ccc(0F80) = 130 | |
196 | # 0F76 = 0FB2 0F80 | |
197 | # 0F78 = 0FB3 0F80 | |
198 | # 0F81 = 0F71 0F80 | |
199 | # 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71 | |
200 | # 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71 | |
201 | ||
202 | eval { require Unicode::Normalize }; | |
203 | if (!$@) { | |
204 | my $tibNFD = Unicode::Collate->new( | |
205 | table => undef, | |
206 | entry => $tibetanEntry, | |
207 | ); | |
208 | ||
209 | # VOCALIC RR | |
210 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}")); | |
211 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}")); | |
212 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}")); | |
213 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}")); | |
214 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}")); | |
215 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}")); | |
216 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}")); | |
217 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}")); | |
218 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}")); | |
219 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}")); | |
220 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}")); | |
221 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}")); | |
222 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}")); | |
223 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}")); | |
224 | # 58 | |
225 | ||
226 | # VOCALIC LL | |
227 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}")); | |
228 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}")); | |
229 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}")); | |
230 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}")); | |
231 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}")); | |
232 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}")); | |
233 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}")); | |
234 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}")); | |
235 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}")); | |
236 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}")); | |
237 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}")); | |
238 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}")); | |
239 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}")); | |
240 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}")); | |
241 | # 72 | |
242 | ||
243 | my $discontNFD = Unicode::Collate->new( | |
244 | table => undef, | |
245 | entry => <<'ENTRIES', | |
246 | 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) | |
247 | 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT | |
248 | 0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT | |
249 | 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA | |
250 | 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY | |
251 | 0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A | |
252 | 0041 0327 0301 ; [.0102.0020.0008.0041] | |
253 | 0041 0300 ; [.0103.0020.0008.0041] | |
254 | ENTRIES | |
255 | ); | |
256 | ||
257 | ok($discontNFD->eq("A\x{327}\x{301}\0\x{334}", "A\x{334}\x{327}\x{301}")); | |
258 | ok($discontNFD->eq("A\x{300}\0\x{327}", "A\x{327}\x{300}")); | |
259 | } else { | |
260 | ok(1) for 1..30; | |
261 | } | |
262 | # 74 |