Commit | Line | Data |
---|---|---|
68adb2b0 | 1 | |
1d2654e1 JH |
2 | BEGIN { |
3 | unless ("A" eq pack('U', 0x41)) { | |
4 | print "1..0 # Unicode::Collate " . | |
5 | "cannot stringify a Unicode code point\n"; | |
6 | exit 0; | |
7 | } | |
456a1446 CBW |
8 | if ($ENV{PERL_CORE}) { |
9 | chdir('t') if -d 't'; | |
10 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); | |
11 | } | |
1d2654e1 JH |
12 | } |
13 | ||
14 | use Test; | |
91ae00cb | 15 | BEGIN { plan tests => 72 }; |
1d2654e1 JH |
16 | |
17 | use strict; | |
18 | use warnings; | |
19 | use Unicode::Collate; | |
20 | ||
91ae00cb | 21 | ok(1); |
1d2654e1 | 22 | |
68adb2b0 CBW |
23 | ######################### |
24 | ||
1d2654e1 JH |
25 | # a standard collator (3.1.1) |
26 | my $Collator = Unicode::Collate->new( | |
27 | table => 'keys.txt', | |
28 | normalization => undef, | |
29 | ); | |
30 | ||
31 | ||
32 | # a collator for hangul sorting, | |
33 | # cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html | |
e7f779c8 | 34 | # http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf |
1d2654e1 JH |
35 | my $hangul = Unicode::Collate->new( |
36 | level => 3, | |
37 | table => undef, | |
38 | normalization => undef, | |
91ae00cb | 39 | |
1d2654e1 JH |
40 | entry => <<'ENTRIES', |
41 | 0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A | |
42 | 0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A | |
43 | #1161 ; [.1800.0020.0002] # <comment> initial jungseong A | |
44 | #1163 ; [.1801.0020.0002] # <comment> initial jungseong YA | |
45 | 1100 ; [.1831.0020.0002] # choseong KIYEOK | |
46 | 1100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A | |
47 | 1100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA | |
48 | 1101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK | |
49 | 1101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A | |
50 | 1101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA | |
51 | 1102 ; [.1833.0020.0002] # choseong NIEUN | |
52 | 1102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A | |
53 | 1102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA | |
54 | 3042 ; [.1921.0020.000E] # HIRAGANA LETTER A | |
55 | 11A8 ; [.FE10.0020.0002] # jongseong KIYEOK | |
56 | 11A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK | |
57 | 1161 ; [.FE20.0020.0002] # jungseong A <non-initial> | |
58 | 1163 ; [.FE21.0020.0002] # jungseong YA <non-initial> | |
59 | ENTRIES | |
60 | ); | |
61 | ||
62 | ok(ref $hangul, "Unicode::Collate"); | |
63 | ||
91ae00cb NC |
64 | my $trailwt = Unicode::Collate->new( |
65 | level => 3, | |
66 | table => undef, | |
67 | normalization => undef, | |
68 | hangul_terminator => 16, | |
69 | ||
70 | entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong | |
71 | 0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A | |
72 | 0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A | |
73 | 11A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK | |
74 | 11A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK | |
75 | 1161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A | |
76 | 1163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA | |
77 | 1100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK | |
78 | 1101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK | |
79 | 1102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN | |
80 | 3042 ; [.1921.0020.000E] # HIRAGANA LETTER A | |
81 | ENTRIES | |
82 | ); | |
83 | ||
1d2654e1 JH |
84 | ######################### |
85 | ||
86 | # L(simp)L(simp) vs L(comp): /GGA/ | |
87 | ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); | |
88 | ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); | |
91ae00cb | 89 | ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); |
1d2654e1 JH |
90 | |
91 | # L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ | |
92 | ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); | |
93 | ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); | |
91ae00cb | 94 | ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); |
1d2654e1 JH |
95 | |
96 | # T(simp)T(simp) vs T(comp): /AGG/ | |
97 | ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); | |
98 | ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); | |
91ae00cb | 99 | ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); |
1d2654e1 JH |
100 | |
101 | # T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ | |
102 | ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); | |
103 | ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); | |
91ae00cb | 104 | ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); |
1d2654e1 JH |
105 | |
106 | # LV vs LLV: /GA/ vs /GNA/ | |
107 | ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); | |
108 | ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); | |
91ae00cb | 109 | ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); |
1d2654e1 JH |
110 | |
111 | # LVX vs LVV: /GAA/ vs /GA/.latinA | |
112 | ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); | |
113 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); | |
91ae00cb | 114 | ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); |
1d2654e1 JH |
115 | |
116 | # LVX vs LVV: /GAA/ vs /GA/.hiraganaA | |
117 | ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); | |
118 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); | |
91ae00cb | 119 | ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); |
1d2654e1 JH |
120 | |
121 | # LVX vs LVV: /GAA/ vs /GA/.hanja | |
122 | ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); | |
123 | ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); | |
91ae00cb | 124 | ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); |
1d2654e1 JH |
125 | |
126 | # LVL vs LVT: /GA/./G/ vs /GAG/ | |
127 | ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); | |
128 | ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); | |
91ae00cb | 129 | ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); |
1d2654e1 JH |
130 | |
131 | # LVT vs LVX: /GAG/ vs /GA/.latinA | |
132 | ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); | |
133 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); | |
91ae00cb | 134 | ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); |
1d2654e1 JH |
135 | |
136 | # LVT vs LVX: /GAG/ vs /GA/.hiraganaA | |
137 | ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); | |
138 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); | |
91ae00cb | 139 | ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); |
1d2654e1 JH |
140 | |
141 | # LVT vs LVX: /GAG/ vs /GA/.hanja | |
142 | ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); | |
143 | ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); | |
91ae00cb | 144 | ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); |
1d2654e1 JH |
145 | |
146 | # LVT vs LVV: /GAG/ vs /GAA/ | |
147 | ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); | |
148 | ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); | |
91ae00cb | 149 | ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); |
1d2654e1 JH |
150 | |
151 | # LVL vs LVV: /GA/./G/ vs /GAA/ | |
152 | ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); | |
153 | ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); | |
91ae00cb | 154 | ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); |
1d2654e1 JH |
155 | |
156 | # LV vs Syl(LV): /GA/ vs /[GA]/ | |
157 | ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); | |
158 | ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); | |
91ae00cb | 159 | ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); |
1d2654e1 JH |
160 | |
161 | # LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ | |
162 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); | |
163 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); | |
91ae00cb | 164 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); |
1d2654e1 JH |
165 | |
166 | # LVT vs Syl(LVT): /GAG/ vs /[GAG]/ | |
167 | ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); | |
168 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); | |
91ae00cb | 169 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); |
1d2654e1 JH |
170 | |
171 | # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ | |
172 | ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); | |
173 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); | |
91ae00cb | 174 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); |
1d2654e1 JH |
175 | |
176 | # LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ | |
177 | ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); | |
178 | ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); | |
91ae00cb | 179 | ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); |
1d2654e1 JH |
180 | |
181 | # LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ | |
182 | ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); | |
183 | ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); | |
91ae00cb | 184 | ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); |
1d2654e1 JH |
185 | |
186 | ######################### | |
187 | ||
188 | # checks contraction in LVT: | |
189 | # weights of these contractions may be non-sense. | |
190 | ||
191 | my $hangcont = Unicode::Collate->new( | |
192 | level => 3, | |
193 | table => undef, | |
194 | normalization => undef, | |
195 | entry => <<'ENTRIES', | |
196 | 1100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK | |
197 | 1101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK | |
198 | 1161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A | |
199 | 1162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE | |
200 | 1163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA | |
201 | 11A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK | |
202 | 11A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK | |
203 | 1161 11A9 ; [.0000.0000.0000] # A-GG <contraction> | |
204 | 1100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 | |
205 | ENTRIES | |
206 | ); | |
207 | ||
208 | # contracted into VT | |
209 | ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); | |
210 | ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); | |
211 | ||
212 | # not contracted into LVT but into VT | |
213 | ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); | |
214 | ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); | |
215 | ||
216 | # contracted into LVT | |
217 | ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); | |
218 | ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); | |
219 | ||
220 | # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ | |
221 | ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); | |
222 | ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); | |
223 | ||
224 | # LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ | |
225 | ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); | |
226 | ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); | |
227 | ||
228 | 1; | |
229 | __END__ |