This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactor die_exit.t to loop over a list, rather than iterate on an hash.
[perl5.git] / cpan / Unicode-Collate / t / hangul.t
CommitLineData
68adb2b0 1
1d2654e1
JH
2BEGIN {
3 unless ("A" eq pack('U', 0x41)) {
4 print "1..0 # Unicode::Collate " .
5 "cannot stringify a Unicode code point\n";
6 exit 0;
7 }
456a1446
CBW
8 if ($ENV{PERL_CORE}) {
9 chdir('t') if -d 't';
10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11 }
1d2654e1
JH
12}
13
14use Test;
91ae00cb 15BEGIN { plan tests => 72 };
1d2654e1
JH
16
17use strict;
18use warnings;
19use Unicode::Collate;
20
91ae00cb 21ok(1);
1d2654e1 22
68adb2b0
CBW
23#########################
24
1d2654e1
JH
25# a standard collator (3.1.1)
26my $Collator = Unicode::Collate->new(
27 table => 'keys.txt',
28 normalization => undef,
29);
30
31
32# a collator for hangul sorting,
33# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
e7f779c8 34# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
1d2654e1
JH
35my $hangul = Unicode::Collate->new(
36 level => 3,
37 table => undef,
38 normalization => undef,
91ae00cb 39
1d2654e1
JH
40 entry => <<'ENTRIES',
410061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A
420041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
43#1161 ; [.1800.0020.0002] # <comment> initial jungseong A
44#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA
451100 ; [.1831.0020.0002] # choseong KIYEOK
461100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
471100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
481101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
491101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
501101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
511102 ; [.1833.0020.0002] # choseong NIEUN
521102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
531102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
543042 ; [.1921.0020.000E] # HIRAGANA LETTER A
5511A8 ; [.FE10.0020.0002] # jongseong KIYEOK
5611A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
571161 ; [.FE20.0020.0002] # jungseong A <non-initial>
581163 ; [.FE21.0020.0002] # jungseong YA <non-initial>
59ENTRIES
60);
61
62ok(ref $hangul, "Unicode::Collate");
63
91ae00cb
NC
64my $trailwt = Unicode::Collate->new(
65 level => 3,
66 table => undef,
67 normalization => undef,
68 hangul_terminator => 16,
69
70 entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
710061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A
720041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
7311A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
7411A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
751161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A
761163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
771100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
781101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
791102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
803042 ; [.1921.0020.000E] # HIRAGANA LETTER A
81ENTRIES
82);
83
1d2654e1
JH
84#########################
85
86# L(simp)L(simp) vs L(comp): /GGA/
87ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
88ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
91ae00cb 89ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
1d2654e1
JH
90
91# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
92ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
93ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
91ae00cb 94ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
1d2654e1
JH
95
96# T(simp)T(simp) vs T(comp): /AGG/
97ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
98ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
91ae00cb 99ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
1d2654e1
JH
100
101# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
102ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
103ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
91ae00cb 104ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
1d2654e1
JH
105
106# LV vs LLV: /GA/ vs /GNA/
107ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
108ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
91ae00cb 109ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
1d2654e1
JH
110
111# LVX vs LVV: /GAA/ vs /GA/.latinA
112ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
113ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
91ae00cb 114ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
1d2654e1
JH
115
116# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
117ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
118ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
91ae00cb 119ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
1d2654e1
JH
120
121# LVX vs LVV: /GAA/ vs /GA/.hanja
122ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
123ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
91ae00cb 124ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
1d2654e1
JH
125
126# LVL vs LVT: /GA/./G/ vs /GAG/
127ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
128ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
91ae00cb 129ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
1d2654e1
JH
130
131# LVT vs LVX: /GAG/ vs /GA/.latinA
132ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
133ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
91ae00cb 134ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
1d2654e1
JH
135
136# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
137ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
138ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
91ae00cb 139ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
1d2654e1
JH
140
141# LVT vs LVX: /GAG/ vs /GA/.hanja
142ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
143ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
91ae00cb 144ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
1d2654e1
JH
145
146# LVT vs LVV: /GAG/ vs /GAA/
147ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
148ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
91ae00cb 149ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
1d2654e1
JH
150
151# LVL vs LVV: /GA/./G/ vs /GAA/
152ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
153ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
91ae00cb 154ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
1d2654e1
JH
155
156# LV vs Syl(LV): /GA/ vs /[GA]/
157ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
158ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}"));
91ae00cb 159ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
1d2654e1
JH
160
161# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
162ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
163ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
91ae00cb 164ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
1d2654e1
JH
165
166# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
167ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
168ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
91ae00cb 169ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
1d2654e1
JH
170
171# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
172ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
173ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
91ae00cb 174ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
1d2654e1
JH
175
176# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
177ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
178ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
91ae00cb 179ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
1d2654e1
JH
180
181# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
182ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
183ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
91ae00cb 184ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
1d2654e1
JH
185
186#########################
187
188# checks contraction in LVT:
189# weights of these contractions may be non-sense.
190
191my $hangcont = Unicode::Collate->new(
192 level => 3,
193 table => undef,
194 normalization => undef,
195 entry => <<'ENTRIES',
1961100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
1971101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
1981161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A
1991162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
2001163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
20111A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
20211A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
2031161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
2041100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
205ENTRIES
206);
207
208# contracted into VT
209ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
210ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
211
212# not contracted into LVT but into VT
213ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
214ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
215
216# contracted into LVT
217ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
218ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
219
220# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
221ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
222ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
223
224# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
225ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
226ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
227
2281;
229__END__