| 1 | |
| 2 | BEGIN { |
| 3 | if ($ENV{PERL_CORE}) { |
| 4 | chdir('t') if -d 't'; |
| 5 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); |
| 6 | } |
| 7 | } |
| 8 | |
| 9 | use strict; |
| 10 | use warnings; |
| 11 | BEGIN { $| = 1; print "1..118\n"; } |
| 12 | my $count = 0; |
| 13 | sub ok ($;$) { |
| 14 | my $p = my $r = shift; |
| 15 | if (@_) { |
| 16 | my $x = shift; |
| 17 | $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; |
| 18 | } |
| 19 | print $p ? "ok" : "not ok", ' ', ++$count, "\n"; |
| 20 | } |
| 21 | |
| 22 | use Unicode::Collate; |
| 23 | |
| 24 | ok(1); |
| 25 | |
| 26 | sub _pack_U { Unicode::Collate::pack_U(@_) } |
| 27 | sub _unpack_U { Unicode::Collate::unpack_U(@_) } |
| 28 | |
| 29 | ######################### |
| 30 | |
| 31 | our $kjeEntry = <<'ENTRIES'; |
| 32 | 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT |
| 33 | 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY |
| 34 | 043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA |
| 35 | 041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA |
| 36 | 045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE |
| 37 | 043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE |
| 38 | 040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE |
| 39 | 041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE |
| 40 | ENTRIES |
| 41 | |
| 42 | our $aaEntry = <<'ENTRIES'; |
| 43 | 0304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230) |
| 44 | 030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230) |
| 45 | 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202) |
| 46 | 031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232) |
| 47 | 0061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A |
| 48 | 0041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A |
| 49 | 007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z |
| 50 | 005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z |
| 51 | 00E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM |
| 52 | 00C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM |
| 53 | 0061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE |
| 54 | 0041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE |
| 55 | ENTRIES |
| 56 | |
| 57 | ######################### |
| 58 | |
| 59 | my $kjeNoN = Unicode::Collate->new( |
| 60 | level => 1, |
| 61 | table => undef, |
| 62 | normalization => undef, |
| 63 | entry => $kjeEntry, |
| 64 | ); |
| 65 | |
| 66 | ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); |
| 67 | ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); |
| 68 | ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); |
| 69 | ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); |
| 70 | |
| 71 | # 5 |
| 72 | |
| 73 | our %sortkeys; |
| 74 | |
| 75 | $sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}"); |
| 76 | $sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"); |
| 77 | $sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"); |
| 78 | |
| 79 | eval { require Unicode::Normalize }; |
| 80 | if (!$@) { |
| 81 | my $kjeNFD = Unicode::Collate->new( |
| 82 | level => 1, |
| 83 | table => undef, |
| 84 | entry => $kjeEntry, |
| 85 | ); |
| 86 | |
| 87 | ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}")); |
| 88 | ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}")); |
| 89 | ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}")); |
| 90 | ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); |
| 91 | # 9 |
| 92 | |
| 93 | my $aaNFD = Unicode::Collate->new( |
| 94 | level => 1, |
| 95 | table => undef, |
| 96 | entry => $aaEntry, |
| 97 | ); |
| 98 | |
| 99 | ok($aaNFD->lt("Z", "A\x{30A}\x{304}")); |
| 100 | ok($aaNFD->eq("A", "A\x{304}\x{30A}")); |
| 101 | ok($aaNFD->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); |
| 102 | ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}")); |
| 103 | ok($aaNFD->lt("Z", "A\x{327}\x{30A}")); |
| 104 | ok($aaNFD->lt("Z", "A\x{30A}\x{327}")); |
| 105 | ok($aaNFD->lt("Z", "A\x{31A}\x{30A}")); |
| 106 | ok($aaNFD->lt("Z", "A\x{30A}\x{31A}")); |
| 107 | # 17 |
| 108 | |
| 109 | my $aaPre = Unicode::Collate->new( |
| 110 | level => 1, |
| 111 | normalization => "prenormalized", |
| 112 | table => undef, |
| 113 | entry => $aaEntry, |
| 114 | ); |
| 115 | |
| 116 | ok($aaPre->lt("Z", "A\x{30A}\x{304}")); |
| 117 | ok($aaPre->eq("A", "A\x{304}\x{30A}")); |
| 118 | ok($aaPre->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); |
| 119 | ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}")); |
| 120 | ok($aaPre->lt("Z", "A\x{327}\x{30A}")); |
| 121 | ok($aaPre->lt("Z", "A\x{30A}\x{327}")); |
| 122 | ok($aaPre->lt("Z", "A\x{31A}\x{30A}")); |
| 123 | ok($aaPre->lt("Z", "A\x{30A}\x{31A}")); |
| 124 | # 25 |
| 125 | } else { |
| 126 | ok(1) for 1..20; |
| 127 | } |
| 128 | |
| 129 | # again: loading Unicode::Normalize should not affect $kjeNoN. |
| 130 | ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); |
| 131 | ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); |
| 132 | ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); |
| 133 | ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); |
| 134 | |
| 135 | ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}")); |
| 136 | ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}")); |
| 137 | ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}")); |
| 138 | |
| 139 | # 32 |
| 140 | |
| 141 | my $aaNoN = Unicode::Collate->new( |
| 142 | level => 1, |
| 143 | table => undef, |
| 144 | entry => $aaEntry, |
| 145 | normalization => undef, |
| 146 | ); |
| 147 | |
| 148 | ok($aaNoN->lt("Z", "A\x{30A}\x{304}")); |
| 149 | ok($aaNoN->eq("A", "A\x{304}\x{30A}")); |
| 150 | ok($aaNoN->eq(_pack_U(0xE5), "A\x{30A}\x{304}")); |
| 151 | ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}")); |
| 152 | ok($aaNoN->eq("A", "A\x{327}\x{30A}")); |
| 153 | ok($aaNoN->lt("Z", "A\x{30A}\x{327}")); |
| 154 | ok($aaNoN->eq("A", "A\x{31A}\x{30A}")); |
| 155 | ok($aaNoN->lt("Z", "A\x{30A}\x{31A}")); |
| 156 | |
| 157 | # 40 |
| 158 | |
| 159 | # suppress contractions (not affected) |
| 160 | |
| 161 | my $kjeSup = Unicode::Collate->new( |
| 162 | level => 1, |
| 163 | table => undef, |
| 164 | normalization => undef, |
| 165 | entry => $kjeEntry, |
| 166 | suppress => [0x400..0x45F], |
| 167 | ); |
| 168 | |
| 169 | ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}")); |
| 170 | ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}")); |
| 171 | ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}")); |
| 172 | ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}")); |
| 173 | |
| 174 | # 44 |
| 175 | |
| 176 | our $tibetanEntry = <<'ENTRIES'; |
| 177 | 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) |
| 178 | 0FB2 ; [.205B.0020.0002.0FB2] # TIBETAN SUBJOINED LETTER RA |
| 179 | 0FB3 ; [.205E.0020.0002.0FB3] # TIBETAN SUBJOINED LETTER LA |
| 180 | 0F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA |
| 181 | 0F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I |
| 182 | 0F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II |
| 183 | 0F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II |
| 184 | 0F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I |
| 185 | 0F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II |
| 186 | 0F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II |
| 187 | 0F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U |
| 188 | 0F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU |
| 189 | 0F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU |
| 190 | 0F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R |
| 191 | 0FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R |
| 192 | 0F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR |
| 193 | 0FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR |
| 194 | 0FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR |
| 195 | 0F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L |
| 196 | 0FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L |
| 197 | 0F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL |
| 198 | 0FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL |
| 199 | 0FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL |
| 200 | ENTRIES |
| 201 | |
| 202 | # ccc(0F71) = 129 |
| 203 | # ccc(0F80) = 130 |
| 204 | # 0F76 = 0FB2 0F80 |
| 205 | # 0F78 = 0FB3 0F80 |
| 206 | # 0F81 = 0F71 0F80 |
| 207 | # 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71 |
| 208 | # 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71 |
| 209 | |
| 210 | eval { require Unicode::Normalize }; |
| 211 | if (!$@) { |
| 212 | my $tibNFD = Unicode::Collate->new( |
| 213 | table => undef, |
| 214 | entry => $tibetanEntry, |
| 215 | UCA_Version => 24, |
| 216 | ); |
| 217 | |
| 218 | # VOCALIC RR |
| 219 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}")); |
| 220 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}")); |
| 221 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}")); |
| 222 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}")); |
| 223 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}")); |
| 224 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}")); |
| 225 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}")); |
| 226 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}")); |
| 227 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}")); |
| 228 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}")); |
| 229 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}")); |
| 230 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}")); |
| 231 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}")); |
| 232 | ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}")); |
| 233 | # 58 |
| 234 | |
| 235 | # VOCALIC LL |
| 236 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}")); |
| 237 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}")); |
| 238 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}")); |
| 239 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}")); |
| 240 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}")); |
| 241 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}")); |
| 242 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}")); |
| 243 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}")); |
| 244 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}")); |
| 245 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}")); |
| 246 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}")); |
| 247 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}")); |
| 248 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}")); |
| 249 | ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}")); |
| 250 | # 72 |
| 251 | |
| 252 | my $a1 = "\x{FB2}\x{334}\x{F81}"; |
| 253 | my $b1 = "\x{F77}\0\x{334}"; |
| 254 | my $a2 = "\x{FB2}\x{334}\x{F81}"; |
| 255 | my $b2 = "\x{FB2}\x{F80}\0\x{334}\x{F71}"; |
| 256 | |
| 257 | for my $v (qw/20 22 24 26 28/) { |
| 258 | my $tib = Unicode::Collate->new( |
| 259 | table => undef, |
| 260 | entry => $tibetanEntry, |
| 261 | UCA_Version => $v, |
| 262 | ); |
| 263 | my $long = 22 <= $v && $v <= 24; |
| 264 | ok($tib->cmp($a1, $b1), $long ? 0 : -1); |
| 265 | ok($tib->cmp($a2, $b2), $long ? 1 : 0); |
| 266 | |
| 267 | $tib->change(long_contraction => 0); |
| 268 | ok($tib->cmp($a1, $b1), -1); |
| 269 | ok($tib->cmp($a2, $b2), 0); |
| 270 | |
| 271 | $tib->change(long_contraction => 1); |
| 272 | ok($tib->cmp($a1, $b1), 0); |
| 273 | ok($tib->cmp($a2, $b2), 1); |
| 274 | } |
| 275 | # 102 |
| 276 | |
| 277 | # UCA_Version => 22 |
| 278 | ok($tibNFD->cmp($a1, $b1), 0); |
| 279 | ok($tibNFD->cmp($a2, $b2), 1); |
| 280 | |
| 281 | $tibNFD->change(UCA_Version => 26); # not affect long_contraction |
| 282 | ok($tibNFD->cmp($a1, $b1), 0); |
| 283 | ok($tibNFD->cmp($a2, $b2), 1); |
| 284 | # 106 |
| 285 | |
| 286 | my $discontNFD = Unicode::Collate->new( |
| 287 | table => undef, |
| 288 | UCA_Version => 22, |
| 289 | entry => <<'ENTRIES', |
| 290 | 0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) |
| 291 | 0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT |
| 292 | 0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT |
| 293 | 0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA |
| 294 | 0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY |
| 295 | 0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A |
| 296 | 0041 0327 0301 ; [.0102.0020.0008.0041] |
| 297 | 0041 0300 ; [.0103.0020.0008.0041] |
| 298 | ENTRIES |
| 299 | ); |
| 300 | |
| 301 | ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); |
| 302 | ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); |
| 303 | |
| 304 | $discontNFD->change(long_contraction => 0); |
| 305 | ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); |
| 306 | ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\0\x{327}\x{301}\x{334}")); |
| 307 | ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); |
| 308 | |
| 309 | $discontNFD->change(level => 1); |
| 310 | ok($discontNFD->gt("A\x{327}\x{300}", "A\x{327}\0\x{300}")); |
| 311 | |
| 312 | # discontiguous |
| 313 | ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); |
| 314 | ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{300}")); |
| 315 | ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A")); |
| 316 | |
| 317 | # contiguous |
| 318 | ok($discontNFD->eq("A\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); |
| 319 | ok($discontNFD->lt("A\x{327}\x{301}", "A\x{300}")); |
| 320 | ok($discontNFD->gt("A\x{327}\x{301}", "A")); |
| 321 | } else { |
| 322 | ok(1) for 1..74; |
| 323 | } |
| 324 | # 118 |