Commit | Line | Data |
---|---|---|
45394607 JH |
1 | # Before `make install' is performed this script should be runnable with |
2 | # `make test'. After `make install' it should work as `perl test.pl' | |
3 | ||
4 | ######################### | |
5 | ||
6 | use Test; | |
905aa9f0 | 7 | BEGIN { plan tests => 54 }; |
45394607 JH |
8 | use Unicode::Collate; |
9 | ok(1); # If we made it this far, we're ok. | |
10 | ||
11 | ######################### | |
12 | ||
5398038e | 13 | my $Collator = Unicode::Collate->new( |
45394607 JH |
14 | table => 'keys.txt', |
15 | normalization => undef, | |
16 | ); | |
17 | ||
5398038e | 18 | ok(ref $Collator, "Unicode::Collate"); |
45394607 JH |
19 | |
20 | ok( | |
5398038e | 21 | join(':', $Collator->sort( |
45394607 JH |
22 | qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN / |
23 | ) ), | |
24 | join(':', | |
25 | qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings / | |
26 | ), | |
27 | ); | |
28 | ||
29 | my $A_acute = pack('U', 0x00C1); | |
30 | my $acute = pack('U', 0x0301); | |
31 | ||
5398038e | 32 | ok($Collator->cmp("A$acute", $A_acute), -1); |
45394607 | 33 | |
5398038e TS |
34 | ok($Collator->cmp("", ""), 0); |
35 | ok(! $Collator->ne("", "") ); | |
36 | ok( $Collator->eq("", "") ); | |
37 | ||
38 | ok($Collator->cmp("", "perl"), -1); | |
45394607 JH |
39 | |
40 | eval "use Unicode::Normalize"; | |
41 | ||
42 | if(!$@){ | |
43 | my $NFD = Unicode::Collate->new( | |
44 | table => 'keys.txt', | |
905aa9f0 TS |
45 | entry => <<'ENTRIES', |
46 | 0430 ; [.0B01.0020.0002.0430] # CYRILLIC SMALL LETTER A | |
47 | 0410 ; [.0B01.0020.0008.0410] # CYRILLIC CAPITAL LETTER A | |
48 | 04D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS | |
49 | 0430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS | |
50 | 04D3 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS | |
51 | 0430 0308 ; [.0B09.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS | |
52 | 04D2 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS | |
53 | 0410 0308 ; [.0B09.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS | |
54 | 0430 3099 ; [.0B10.0020.0002.04D3] # A WITH KATAKANA VOICED | |
55 | 0430 3099 0308 ; [.0B11.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS | |
56 | ENTRIES | |
45394607 | 57 | ); |
905aa9f0 TS |
58 | ok($NFD->eq("A$acute", $A_acute)); |
59 | ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}")); | |
60 | ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B")); | |
61 | ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A")); | |
62 | ok($NFD->eq("\x{0430}\x{3099}\x{309A}\x{0308}", | |
63 | "\x{0430}\x{309A}\x{3099}\x{0308}") ); | |
45394607 JH |
64 | } |
65 | else{ | |
d16e9e3d | 66 | ok(1); |
905aa9f0 TS |
67 | ok(1); |
68 | ok(1); | |
69 | ok(1); | |
70 | ok(1); | |
45394607 JH |
71 | } |
72 | ||
73 | my $tr = Unicode::Collate->new( | |
74 | table => 'keys.txt', | |
75 | normalization => undef, | |
76 | ignoreName => qr/^(?:HANGUL|HIRAGANA|KATAKANA|BOPOMOFO)$/, | |
77 | entry => <<'ENTRIES', | |
78 | 0063 0068 ; [.0893.0020.0002.0063] # "ch" in traditional Spanish | |
79 | 0043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish | |
5398038e | 80 | 00DF ; [.09F3.0154.0004.00DF] [.09F3.0020.0004.00DF] # eszet in Germany |
45394607 JH |
81 | ENTRIES |
82 | ); | |
83 | ||
84 | ok( | |
85 | join(':', $tr->sort( | |
86 | qw/ acha aca ada acia acka / | |
87 | ) ), | |
88 | join(':', | |
89 | qw/ aca acia acka acha ada / | |
90 | ), | |
91 | ); | |
92 | ||
93 | ok( | |
5398038e | 94 | join(':', $Collator->sort( |
45394607 JH |
95 | qw/ acha aca ada acia acka / |
96 | ) ), | |
97 | join(':', | |
98 | qw/ aca acha acia acka ada / | |
99 | ), | |
100 | ); | |
101 | ||
5398038e | 102 | my $old_level = $Collator->{level}; |
45394607 JH |
103 | my $hiragana = "\x{3042}\x{3044}"; |
104 | my $katakana = "\x{30A2}\x{30A4}"; | |
105 | ||
5398038e TS |
106 | $Collator->{level} = 2; |
107 | ||
108 | ok( $Collator->cmp("ABC","abc"), 0); | |
109 | ok( $Collator->eq("ABC","abc") ); | |
110 | ok( $Collator->le("ABC","abc") ); | |
111 | ok( $Collator->cmp($hiragana, $katakana), 0); | |
112 | ok( $Collator->eq($hiragana, $katakana) ); | |
113 | ok( $Collator->ge($hiragana, $katakana) ); | |
45394607 | 114 | |
5398038e TS |
115 | # hangul |
116 | ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") ); | |
117 | ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") ); | |
118 | ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") ); | |
119 | ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") ); | |
120 | ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") ); | |
121 | ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana | |
45394607 | 122 | |
5398038e | 123 | $Collator->{level} = $old_level; |
45394607 | 124 | |
5398038e | 125 | $Collator->{katakana_before_hiragana} = 1; |
45394607 | 126 | |
5398038e TS |
127 | ok( $Collator->cmp("abc", "ABC"), -1); |
128 | ok( $Collator->ne("abc", "ABC") ); | |
129 | ok( $Collator->lt("abc", "ABC") ); | |
130 | ok( $Collator->le("abc", "ABC") ); | |
131 | ok( $Collator->cmp($hiragana, $katakana), 1); | |
132 | ok( $Collator->ne($hiragana, $katakana) ); | |
133 | ok( $Collator->gt($hiragana, $katakana) ); | |
134 | ok( $Collator->ge($hiragana, $katakana) ); | |
45394607 | 135 | |
5398038e | 136 | $Collator->{upper_before_lower} = 1; |
45394607 | 137 | |
5398038e TS |
138 | ok( $Collator->cmp("abc", "ABC"), 1); |
139 | ok( $Collator->ge("abc", "ABC"), 1); | |
140 | ok( $Collator->gt("abc", "ABC"), 1); | |
141 | ok( $Collator->cmp($hiragana, $katakana), 1); | |
142 | ok( $Collator->ge($hiragana, $katakana), 1); | |
143 | ok( $Collator->gt($hiragana, $katakana), 1); | |
45394607 | 144 | |
5398038e | 145 | $Collator->{katakana_before_hiragana} = 0; |
45394607 | 146 | |
5398038e TS |
147 | ok( $Collator->cmp("abc", "ABC"), 1); |
148 | ok( $Collator->cmp($hiragana, $katakana), -1); | |
45394607 | 149 | |
5398038e | 150 | $Collator->{upper_before_lower} = 0; |
45394607 | 151 | |
5398038e TS |
152 | ok( $Collator->cmp("abc", "ABC"), -1); |
153 | ok( $Collator->le("abc", "ABC") ); | |
154 | ok( $Collator->cmp($hiragana, $katakana), -1); | |
155 | ok( $Collator->lt($hiragana, $katakana) ); | |
45394607 JH |
156 | |
157 | my $ign = Unicode::Collate->new( | |
158 | table => 'keys.txt', | |
159 | normalization => undef, | |
160 | ignoreChar => qr/^[ae]$/, | |
161 | ); | |
162 | ||
163 | ok( $ign->cmp("element","lament"), 0); | |
164 | ||
5398038e | 165 | $Collator->{level} = 2; |
d16e9e3d | 166 | |
d16e9e3d | 167 | my $str; |
5398038e TS |
168 | |
169 | my $orig = "This is a Perl book."; | |
d16e9e3d JH |
170 | my $sub = "PERL"; |
171 | my $rep = "camel"; | |
172 | my $ret = "This is a camel book."; | |
173 | ||
174 | $str = $orig; | |
5398038e TS |
175 | if(my($pos,$len) = $Collator->index($str, $sub)){ |
176 | substr($str, $pos, $len, $rep); | |
d16e9e3d JH |
177 | } |
178 | ||
179 | ok($str, $ret); | |
180 | ||
5398038e | 181 | $Collator->{level} = $old_level; |
d16e9e3d JH |
182 | |
183 | $str = $orig; | |
5398038e TS |
184 | if(my($pos,$len) = $Collator->index($str, $sub)){ |
185 | substr($str, $pos, $len, $rep); | |
d16e9e3d JH |
186 | } |
187 | ||
188 | ok($str, $orig); | |
189 | ||
5398038e TS |
190 | $tr->{level} = 1; |
191 | ||
192 | $str = "Ich mu\x{00DF} studieren."; | |
193 | $sub = "m\x{00FC}ss"; | |
194 | my $match = undef; | |
195 | if(my($pos, $len) = $tr->index($str, $sub)){ | |
196 | $match = substr($str, $pos, $len); | |
197 | } | |
198 | ok($match, "mu\x{00DF}"); | |
199 | ||
200 | $tr->{level} = $old_level; | |
201 | ||
202 | $str = "Ich mu\x{00DF} studieren."; | |
203 | $sub = "m\x{00FC}ss"; | |
204 | $match = undef; | |
205 | if(my($pos, $len) = $tr->index($str, $sub)){ | |
206 | $match = substr($str, $pos, $len); | |
207 | } | |
208 | ok($match, undef); | |
209 | ||
210 | $match = undef; | |
211 | if(my($pos,$len) = $Collator->index("", "")){ | |
212 | $match = substr("", $pos, $len); | |
213 | } | |
214 | ok($match, ""); | |
215 | ||
216 | $match = undef; | |
217 | if(my($pos,$len) = $Collator->index("", "abc")){ | |
218 | $match = substr("", $pos, $len); | |
219 | } | |
220 | ok($match, undef); | |
221 |