Commit | Line | Data |
---|---|---|
4d36a948 TS |
1 | |
2 | BEGIN { | |
ae6aa562 | 3 | unless ("A" eq pack('U', 0x41)) { |
9f1f04a1 RGS |
4 | print "1..0 # Unicode::Collate " . |
5 | "cannot stringify a Unicode code point\n"; | |
4d36a948 TS |
6 | exit 0; |
7 | } | |
456a1446 CBW |
8 | if ($ENV{PERL_CORE}) { |
9 | chdir('t') if -d 't'; | |
10 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); | |
11 | } | |
4d36a948 TS |
12 | } |
13 | ||
14 | use Test; | |
1393fe00 | 15 | BEGIN { plan tests => 71 }; |
4c843366 JH |
16 | |
17 | use strict; | |
18 | use warnings; | |
4d36a948 TS |
19 | use Unicode::Collate; |
20 | ||
68adb2b0 | 21 | ok(1); |
4d36a948 TS |
22 | |
23 | ######################### | |
24 | ||
68adb2b0 | 25 | our $IsEBCDIC = ord("A") != 0x41; |
4d36a948 TS |
26 | |
27 | my $Collator = Unicode::Collate->new( | |
28 | table => 'keys.txt', | |
29 | normalization => undef, | |
30 | ); | |
31 | ||
32 | ############## | |
33 | ||
34 | my %old_level = $Collator->change(level => 2); | |
35 | ||
36 | my $str; | |
37 | ||
38 | my $orig = "This is a Perl book."; | |
39 | my $sub = "PERL"; | |
40 | my $rep = "camel"; | |
41 | my $ret = "This is a camel book."; | |
42 | ||
43 | $str = $orig; | |
44 | if (my($pos,$len) = $Collator->index($str, $sub)) { | |
45 | substr($str, $pos, $len, $rep); | |
46 | } | |
47 | ||
48 | ok($str, $ret); | |
49 | ||
50 | $Collator->change(%old_level); | |
51 | ||
52 | $str = $orig; | |
53 | if (my($pos,$len) = $Collator->index($str, $sub)) { | |
54 | substr($str, $pos, $len, $rep); | |
55 | } | |
56 | ||
57 | ok($str, $orig); | |
58 | ||
59 | ############## | |
60 | ||
61 | my $match; | |
62 | ||
63 | $Collator->change(level => 1); | |
64 | ||
65 | $str = "Pe\x{300}rl"; | |
66 | $sub = "pe"; | |
67 | $ret = "Pe\x{300}"; | |
68 | $match = undef; | |
69 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
70 | $match = substr($str, $pos, $len); | |
71 | } | |
72 | ok($match, $ret); | |
73 | ||
74 | $str = "P\x{300}e\x{300}\x{301}\x{303}rl"; | |
75 | $sub = "pE"; | |
76 | $ret = "P\x{300}e\x{300}\x{301}\x{303}"; | |
77 | $match = undef; | |
78 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
79 | $match = substr($str, $pos, $len); | |
80 | } | |
81 | ok($match, $ret); | |
82 | ||
83 | $Collator->change(level => 2); | |
84 | ||
85 | $str = "Pe\x{300}rl"; | |
86 | $sub = "pe"; | |
87 | $ret = undef; | |
88 | $match = undef; | |
89 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
90 | $match = substr($str, $pos, $len); | |
91 | } | |
92 | ok($match, $ret); | |
93 | ||
94 | $str = "P\x{300}e\x{300}\x{301}\x{303}rl"; | |
95 | $sub = "pE"; | |
96 | $ret = undef; | |
97 | $match = undef; | |
98 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
99 | $match = substr($str, $pos, $len); | |
100 | } | |
101 | ok($match, $ret); | |
102 | ||
103 | $str = "Pe\x{300}rl"; | |
104 | $sub = "pe\x{300}"; | |
105 | $ret = "Pe\x{300}"; | |
106 | $match = undef; | |
107 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
108 | $match = substr($str, $pos, $len); | |
109 | } | |
110 | ok($match, $ret); | |
111 | ||
112 | $str = "P\x{300}e\x{300}\x{301}\x{303}rl"; | |
113 | $sub = "p\x{300}E\x{300}\x{301}\x{303}"; | |
114 | $ret = "P\x{300}e\x{300}\x{301}\x{303}"; | |
115 | $match = undef; | |
116 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
117 | $match = substr($str, $pos, $len); | |
118 | } | |
119 | ok($match, $ret); | |
120 | ||
121 | ############## | |
122 | ||
123 | $Collator->change(level => 1); | |
124 | ||
125 | $str = $IsEBCDIC | |
126 | ? "Ich mu\x{0059} studieren Perl." | |
127 | : "Ich mu\x{00DF} studieren Perl."; | |
128 | $sub = $IsEBCDIC | |
129 | ? "m\x{00DC}ss" | |
130 | : "m\x{00FC}ss"; | |
131 | $ret = $IsEBCDIC | |
132 | ? "mu\x{0059}" | |
133 | : "mu\x{00DF}"; | |
134 | $match = undef; | |
135 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
136 | $match = substr($str, $pos, $len); | |
137 | } | |
138 | ok($match, $ret); | |
139 | ||
140 | $Collator->change(%old_level); | |
141 | ||
142 | $match = undef; | |
143 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
144 | $match = substr($str, $pos, $len); | |
145 | } | |
146 | ok($match, undef); | |
147 | ||
148 | $match = undef; | |
149 | if (my($pos,$len) = $Collator->index("", "")) { | |
150 | $match = substr("", $pos, $len); | |
151 | } | |
152 | ok($match, ""); | |
153 | ||
154 | $match = undef; | |
155 | if (my($pos,$len) = $Collator->index("", "abc")) { | |
156 | $match = substr("", $pos, $len); | |
157 | } | |
158 | ok($match, undef); | |
159 | ||
160 | ############## | |
161 | ||
162 | $Collator->change(level => 1); | |
163 | ||
164 | $str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA"; | |
165 | $sub = "e"; | |
166 | $ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0"; | |
167 | $match = undef; | |
168 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
169 | $match = substr($str, $pos, $len); | |
170 | } | |
171 | ok($match, $ret); | |
172 | ||
173 | $Collator->change(level => 1); | |
174 | ||
175 | $str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe"; | |
176 | $sub = "e"; | |
177 | $ret = "e\0\cA\x{300}\0\cA"; | |
178 | $match = undef; | |
179 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
180 | $match = substr($str, $pos, $len); | |
181 | } | |
182 | ok($match, $ret); | |
183 | ||
184 | ||
185 | $Collator->change(%old_level); | |
186 | ||
187 | $str = "e\x{300}"; | |
188 | $sub = "e"; | |
189 | $ret = undef; | |
190 | $match = undef; | |
191 | if (my($pos, $len) = $Collator->index($str, $sub)) { | |
192 | $match = substr($str, $pos, $len); | |
193 | } | |
194 | ok($match, $ret); | |
195 | ||
196 | ############## | |
197 | ||
198 | $Collator->change(level => 1); | |
199 | ||
200 | $str = "The Perl is a language, and the perl is an interpreter."; | |
201 | $sub = "PERL"; | |
202 | ||
203 | $match = undef; | |
204 | if (my($pos, $len) = $Collator->index($str, $sub, -40)) { | |
205 | $match = substr($str, $pos, $len); | |
206 | } | |
207 | ok($match, "Perl"); | |
208 | ||
209 | $match = undef; | |
210 | if (my($pos, $len) = $Collator->index($str, $sub, 4)) { | |
211 | $match = substr($str, $pos, $len); | |
212 | } | |
213 | ok($match, "Perl"); | |
214 | ||
215 | $match = undef; | |
216 | if (my($pos, $len) = $Collator->index($str, $sub, 5)) { | |
217 | $match = substr($str, $pos, $len); | |
218 | } | |
219 | ok($match, "perl"); | |
220 | ||
221 | $match = undef; | |
222 | if (my($pos, $len) = $Collator->index($str, $sub, 32)) { | |
223 | $match = substr($str, $pos, $len); | |
224 | } | |
225 | ok($match, "perl"); | |
226 | ||
227 | $match = undef; | |
228 | if (my($pos, $len) = $Collator->index($str, $sub, 33)) { | |
229 | $match = substr($str, $pos, $len); | |
230 | } | |
231 | ok($match, undef); | |
232 | ||
233 | $match = undef; | |
234 | if (my($pos, $len) = $Collator->index($str, $sub, 100)) { | |
235 | $match = substr($str, $pos, $len); | |
236 | } | |
237 | ok($match, undef); | |
238 | ||
239 | $Collator->change(%old_level); | |
240 | ||
241 | ############## | |
242 | ||
243 | my @ret; | |
244 | ||
245 | $Collator->change(level => 1); | |
246 | ||
247 | $ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); | |
248 | ok($ret); | |
249 | ok($$ret eq "P\cBe\x{300}\cB"); | |
250 | ||
251 | @ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); | |
252 | ok($ret[0], "P\cBe\x{300}\cB"); | |
253 | ||
254 | $str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; | |
255 | $sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss"; | |
256 | ||
257 | ($ret) = $Collator->match($str, $sub); | |
258 | ok($ret, $str); | |
259 | ||
260 | $str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; | |
261 | $sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s"; | |
262 | ||
263 | ($ret) = $Collator->match($str, $sub); | |
264 | ok($ret, undef); | |
265 | ||
266 | $ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); | |
267 | ok($ret eq "P\cBe\x{300}\cB:pe:PE"); | |
268 | ||
269 | $ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); | |
270 | ok($ret == 3); | |
271 | ||
272 | $str = "ABCDEF"; | |
273 | $sub = "cde"; | |
274 | $ret = $Collator->match($str, $sub); | |
275 | $str = "01234567"; | |
276 | ok($ret && $$ret, "CDE"); | |
277 | ||
278 | $str = "ABCDEF"; | |
279 | $sub = "cde"; | |
280 | ($ret) = $Collator->match($str, $sub); | |
281 | $str = "01234567"; | |
282 | ok($ret, "CDE"); | |
283 | ||
284 | ||
285 | $Collator->change(level => 3); | |
286 | ||
287 | $ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); | |
288 | ok($ret, undef); | |
289 | ||
290 | @ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); | |
291 | ok(@ret == 0); | |
292 | ||
293 | $ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe"); | |
294 | ok($ret eq ""); | |
295 | ||
296 | $ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe"); | |
297 | ok($ret == 0); | |
298 | ||
299 | $ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); | |
300 | ok($ret eq "pe"); | |
301 | ||
302 | $ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); | |
303 | ok($ret == 1); | |
304 | ||
305 | $str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; | |
306 | $sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss"; | |
307 | ||
308 | ($ret) = $Collator->match($str, $sub); | |
309 | ok($ret, undef); | |
310 | ||
311 | $Collator->change(%old_level); | |
312 | ||
313 | ############## | |
314 | ||
315 | $Collator->change(level => 1); | |
316 | ||
317 | sub strreverse { scalar reverse shift } | |
318 | ||
319 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
320 | $ret = $Collator->subst($str, "perl", 'Camel'); | |
321 | ok($ret, 1); | |
322 | ok($str, "Camel and PERL."); | |
323 | ||
324 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
325 | $ret = $Collator->subst($str, "perl", \&strreverse); | |
326 | ok($ret, 1); | |
327 | ok($str, "lr\cB\x{300}e\cBP and PERL."); | |
328 | ||
329 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
330 | $ret = $Collator->gsubst($str, "perl", 'Camel'); | |
331 | ok($ret, 2); | |
332 | ok($str, "Camel and Camel."); | |
333 | ||
334 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
335 | $ret = $Collator->gsubst($str, "perl", \&strreverse); | |
336 | ok($ret, 2); | |
337 | ok($str, "lr\cB\x{300}e\cBP and LREP."); | |
338 | ||
3756e7ca | 339 | $str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; |
4d36a948 | 340 | $Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); |
3756e7ca RGS |
341 | ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> " |
342 | . "<b>CAMEL</b> horse <b>cAm\0E\0L</b>..."); | |
4d36a948 | 343 | |
1393fe00 CBW |
344 | # http://www.xray.mpe.mpg.de/mailing-lists/perl-unicode/2010-09/msg00014.html |
345 | # when the substring includes an ignorable element like a space... | |
346 | ||
347 | $str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; | |
348 | $Collator->gsubst($str, "camel horse", sub { "<b>$_[0]</b>" }); | |
349 | ok($str, "Camel donkey zebra came\x{301}l <b>CAMEL horse</b> cAm\0E\0L..."); | |
350 | ||
351 | $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; | |
352 | $Collator->gsubst($str, "camel horse", sub { "=$_[0]=" }); | |
353 | ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); | |
354 | ||
355 | $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; | |
356 | $Collator->gsubst($str, "camel-horse", sub { "=$_[0]=" }); | |
357 | ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); | |
358 | ||
359 | $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; | |
360 | $Collator->gsubst($str, "camelhorse", sub { "=$_[0]=" }); | |
361 | ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); | |
362 | ||
363 | $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; | |
364 | $Collator->gsubst($str, " ca mel hor se ", sub { "=$_[0]=" }); | |
365 | ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); | |
366 | ||
367 | $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; | |
368 | $Collator->gsubst($str, "ca\x{300}melho\x{302}rse", sub { "=$_[0]=" }); | |
369 | ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); | |
370 | ||
4d36a948 TS |
371 | $Collator->change(level => 3); |
372 | ||
373 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
374 | $ret = $Collator->subst($str, "perl", "Camel"); | |
375 | ok(! $ret); | |
376 | ok($str, "P\cBe\x{300}\cBrl and PERL."); | |
377 | ||
378 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
379 | $ret = $Collator->subst($str, "perl", \&strreverse); | |
380 | ok(! $ret); | |
381 | ok($str, "P\cBe\x{300}\cBrl and PERL."); | |
382 | ||
383 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
384 | $ret = $Collator->gsubst($str, "perl", "Camel"); | |
385 | ok($ret, 0); | |
386 | ok($str, "P\cBe\x{300}\cBrl and PERL."); | |
387 | ||
388 | $str = "P\cBe\x{300}\cBrl and PERL."; | |
389 | $ret = $Collator->gsubst($str, "perl", \&strreverse); | |
390 | ok($ret, 0); | |
391 | ok($str, "P\cBe\x{300}\cBrl and PERL."); | |
392 | ||
393 | $Collator->change(%old_level); | |
394 | ||
395 | ############## | |
396 | ||
397 | $str = "Perl and Camel"; | |
398 | $ret = $Collator->gsubst($str, "\cA\cA\0", "AB"); | |
399 | ok($ret, 15); | |
400 | ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB"); | |
401 | ||
402 | $str = ''; | |
403 | $ret = $Collator->subst($str, "", "ABC"); | |
404 | ok($ret, 1); | |
405 | ok($str, "ABC"); | |
406 | ||
407 | $str = ''; | |
408 | $ret = $Collator->gsubst($str, "", "ABC"); | |
409 | ok($ret, 1); | |
410 | ok($str, "ABC"); | |
411 | ||
412 | $str = 'PPPPP'; | |
413 | $ret = $Collator->gsubst($str, 'PP', "ABC"); | |
414 | ok($ret, 2); | |
415 | ok($str, "ABCABCP"); | |
416 | ||
417 | ############## | |
418 | ||
419 | # Shifted; ignorable after variable | |
420 | ||
421 | ($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!"); | |
422 | ok($ret, "?\x{300}!\x{301}\x{344}"); | |
423 | ||
424 | $Collator->change(alternate => 'Non-ignorable'); | |
425 | ||
426 | ($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!"); | |
427 | ok($ret, undef); | |
3756e7ca | 428 |