This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[patch @13687] Unicode::Collate 0.10
[perl5.git] / lib / Unicode / Collate / t / test.t
CommitLineData
45394607
JH
1# Before `make install' is performed this script should be runnable with
2# `make test'. After `make install' it should work as `perl test.pl'
3
4#########################
5
6use Test;
905aa9f0 7BEGIN { plan tests => 54 };
45394607
JH
8use Unicode::Collate;
9ok(1); # If we made it this far, we're ok.
10
11#########################
12
5398038e 13my $Collator = Unicode::Collate->new(
45394607
JH
14 table => 'keys.txt',
15 normalization => undef,
16);
17
5398038e 18ok(ref $Collator, "Unicode::Collate");
45394607
JH
19
20ok(
5398038e 21 join(':', $Collator->sort(
45394607
JH
22 qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN /
23 ) ),
24 join(':',
25 qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings /
26 ),
27);
28
29my $A_acute = pack('U', 0x00C1);
30my $acute = pack('U', 0x0301);
31
5398038e 32ok($Collator->cmp("A$acute", $A_acute), -1);
45394607 33
5398038e
TS
34ok($Collator->cmp("", ""), 0);
35ok(! $Collator->ne("", "") );
36ok( $Collator->eq("", "") );
37
38ok($Collator->cmp("", "perl"), -1);
45394607
JH
39
40eval "use Unicode::Normalize";
41
42if(!$@){
43 my $NFD = Unicode::Collate->new(
44 table => 'keys.txt',
905aa9f0
TS
45 entry => <<'ENTRIES',
460430 ; [.0B01.0020.0002.0430] # CYRILLIC SMALL LETTER A
470410 ; [.0B01.0020.0008.0410] # CYRILLIC CAPITAL LETTER A
4804D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
490430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
5004D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
510430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
5204D2 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
530410 0308 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
540430 3099 ; [.0B10.0020.0002.04D3] # A WITH KATAKANA VOICED
550430 3099 0308 ; [.0B11.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS
56ENTRIES
45394607 57 );
905aa9f0
TS
58 ok($NFD->eq("A$acute", $A_acute));
59 ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}"));
60 ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B"));
61 ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A"));
62 ok($NFD->eq("\x{0430}\x{3099}\x{309A}\x{0308}",
63 "\x{0430}\x{309A}\x{3099}\x{0308}") );
45394607
JH
64}
65else{
d16e9e3d 66 ok(1);
905aa9f0
TS
67 ok(1);
68 ok(1);
69 ok(1);
70 ok(1);
45394607
JH
71}
72
73my $tr = Unicode::Collate->new(
74 table => 'keys.txt',
75 normalization => undef,
76 ignoreName => qr/^(?:HANGUL|HIRAGANA|KATAKANA|BOPOMOFO)$/,
77 entry => <<'ENTRIES',
780063 0068 ; [.0893.0020.0002.0063] # "ch" in traditional Spanish
790043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish
5398038e 8000DF ; [.09F3.0154.0004.00DF] [.09F3.0020.0004.00DF] # eszet in Germany
45394607
JH
81ENTRIES
82);
83
84ok(
85 join(':', $tr->sort(
86 qw/ acha aca ada acia acka /
87 ) ),
88 join(':',
89 qw/ aca acia acka acha ada /
90 ),
91);
92
93ok(
5398038e 94 join(':', $Collator->sort(
45394607
JH
95 qw/ acha aca ada acia acka /
96 ) ),
97 join(':',
98 qw/ aca acha acia acka ada /
99 ),
100);
101
5398038e 102my $old_level = $Collator->{level};
45394607
JH
103my $hiragana = "\x{3042}\x{3044}";
104my $katakana = "\x{30A2}\x{30A4}";
105
5398038e
TS
106$Collator->{level} = 2;
107
108ok( $Collator->cmp("ABC","abc"), 0);
109ok( $Collator->eq("ABC","abc") );
110ok( $Collator->le("ABC","abc") );
111ok( $Collator->cmp($hiragana, $katakana), 0);
112ok( $Collator->eq($hiragana, $katakana) );
113ok( $Collator->ge($hiragana, $katakana) );
45394607 114
5398038e
TS
115# hangul
116ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
117ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") );
118ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") );
119ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
120ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
121ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
45394607 122
5398038e 123$Collator->{level} = $old_level;
45394607 124
5398038e 125$Collator->{katakana_before_hiragana} = 1;
45394607 126
5398038e
TS
127ok( $Collator->cmp("abc", "ABC"), -1);
128ok( $Collator->ne("abc", "ABC") );
129ok( $Collator->lt("abc", "ABC") );
130ok( $Collator->le("abc", "ABC") );
131ok( $Collator->cmp($hiragana, $katakana), 1);
132ok( $Collator->ne($hiragana, $katakana) );
133ok( $Collator->gt($hiragana, $katakana) );
134ok( $Collator->ge($hiragana, $katakana) );
45394607 135
5398038e 136$Collator->{upper_before_lower} = 1;
45394607 137
5398038e
TS
138ok( $Collator->cmp("abc", "ABC"), 1);
139ok( $Collator->ge("abc", "ABC"), 1);
140ok( $Collator->gt("abc", "ABC"), 1);
141ok( $Collator->cmp($hiragana, $katakana), 1);
142ok( $Collator->ge($hiragana, $katakana), 1);
143ok( $Collator->gt($hiragana, $katakana), 1);
45394607 144
5398038e 145$Collator->{katakana_before_hiragana} = 0;
45394607 146
5398038e
TS
147ok( $Collator->cmp("abc", "ABC"), 1);
148ok( $Collator->cmp($hiragana, $katakana), -1);
45394607 149
5398038e 150$Collator->{upper_before_lower} = 0;
45394607 151
5398038e
TS
152ok( $Collator->cmp("abc", "ABC"), -1);
153ok( $Collator->le("abc", "ABC") );
154ok( $Collator->cmp($hiragana, $katakana), -1);
155ok( $Collator->lt($hiragana, $katakana) );
45394607
JH
156
157my $ign = Unicode::Collate->new(
158 table => 'keys.txt',
159 normalization => undef,
160 ignoreChar => qr/^[ae]$/,
161);
162
163ok( $ign->cmp("element","lament"), 0);
164
5398038e 165$Collator->{level} = 2;
d16e9e3d 166
d16e9e3d 167my $str;
5398038e
TS
168
169my $orig = "This is a Perl book.";
d16e9e3d
JH
170my $sub = "PERL";
171my $rep = "camel";
172my $ret = "This is a camel book.";
173
174$str = $orig;
5398038e
TS
175if(my($pos,$len) = $Collator->index($str, $sub)){
176 substr($str, $pos, $len, $rep);
d16e9e3d
JH
177}
178
179ok($str, $ret);
180
5398038e 181$Collator->{level} = $old_level;
d16e9e3d
JH
182
183$str = $orig;
5398038e
TS
184if(my($pos,$len) = $Collator->index($str, $sub)){
185 substr($str, $pos, $len, $rep);
d16e9e3d
JH
186}
187
188ok($str, $orig);
189
5398038e
TS
190$tr->{level} = 1;
191
192$str = "Ich mu\x{00DF} studieren.";
193$sub = "m\x{00FC}ss";
194my $match = undef;
195if(my($pos, $len) = $tr->index($str, $sub)){
196 $match = substr($str, $pos, $len);
197}
198ok($match, "mu\x{00DF}");
199
200$tr->{level} = $old_level;
201
202$str = "Ich mu\x{00DF} studieren.";
203$sub = "m\x{00FC}ss";
204$match = undef;
205if(my($pos, $len) = $tr->index($str, $sub)){
206 $match = substr($str, $pos, $len);
207}
208ok($match, undef);
209
210$match = undef;
211if(my($pos,$len) = $Collator->index("", "")){
212 $match = substr("", $pos, $len);
213}
214ok($match, "");
215
216$match = undef;
217if(my($pos,$len) = $Collator->index("", "abc")){
218 $match = substr("", $pos, $len);
219}
220ok($match, undef);
221