3 unless ("A" eq pack('U', 0x41)) {
4 print "1..0 # Unicode::Collate " .
5 "cannot stringify a Unicode code point\n";
10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
16 BEGIN { $| = 1; print "1..91\n"; }
19 my $p = my $r = shift;
22 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
24 print $p ? "ok" : "not ok", ' ', ++$count, "\n";
31 our $IsEBCDIC = ord("A") != 0x41;
33 my $Collator = Unicode::Collate->new(
35 normalization => undef,
40 my %old_level = $Collator->change(level => 2);
44 my $orig = "This is a Perl book.";
47 my $ret = "This is a camel book.";
50 if (my($pos,$len) = $Collator->index($str, $sub)) {
51 substr($str, $pos, $len, $rep);
56 $Collator->change(%old_level);
59 if (my($pos,$len) = $Collator->index($str, $sub)) {
60 substr($str, $pos, $len, $rep);
69 $Collator->change(level => 1);
75 if (my($pos, $len) = $Collator->index($str, $sub)) {
76 $match = substr($str, $pos, $len);
80 $str = "P\x{300}e\x{300}\x{301}\x{303}rl";
82 $ret = "P\x{300}e\x{300}\x{301}\x{303}";
84 if (my($pos, $len) = $Collator->index($str, $sub)) {
85 $match = substr($str, $pos, $len);
89 $Collator->change(level => 2);
95 if (my($pos, $len) = $Collator->index($str, $sub)) {
96 $match = substr($str, $pos, $len);
100 $str = "P\x{300}e\x{300}\x{301}\x{303}rl";
104 if (my($pos, $len) = $Collator->index($str, $sub)) {
105 $match = substr($str, $pos, $len);
109 $str = "Pe\x{300}rl";
113 if (my($pos, $len) = $Collator->index($str, $sub)) {
114 $match = substr($str, $pos, $len);
118 $str = "P\x{300}e\x{300}\x{301}\x{303}rl";
119 $sub = "p\x{300}E\x{300}\x{301}\x{303}";
120 $ret = "P\x{300}e\x{300}\x{301}\x{303}";
122 if (my($pos, $len) = $Collator->index($str, $sub)) {
123 $match = substr($str, $pos, $len);
129 $Collator->change(level => 1);
132 ? "Ich mu\x{0059} studieren Perl."
133 : "Ich mu\x{00DF} studieren Perl.";
141 if (my($pos, $len) = $Collator->index($str, $sub)) {
142 $match = substr($str, $pos, $len);
146 $Collator->change(%old_level);
149 if (my($pos, $len) = $Collator->index($str, $sub)) {
150 $match = substr($str, $pos, $len);
155 if (my($pos,$len) = $Collator->index("", "")) {
156 $match = substr("", $pos, $len);
161 if (my($pos,$len) = $Collator->index("", "abc")) {
162 $match = substr("", $pos, $len);
168 $Collator->change(level => 1);
170 $str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA";
172 $ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0";
174 if (my($pos, $len) = $Collator->index($str, $sub)) {
175 $match = substr($str, $pos, $len);
179 $Collator->change(level => 1);
181 $str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe";
183 $ret = "e\0\cA\x{300}\0\cA";
185 if (my($pos, $len) = $Collator->index($str, $sub)) {
186 $match = substr($str, $pos, $len);
191 $Collator->change(%old_level);
197 if (my($pos, $len) = $Collator->index($str, $sub)) {
198 $match = substr($str, $pos, $len);
204 $Collator->change(level => 1);
206 $str = "The Perl is a language, and the perl is an interpreter.";
210 if (my($pos, $len) = $Collator->index($str, $sub, -40)) {
211 $match = substr($str, $pos, $len);
216 if (my($pos, $len) = $Collator->index($str, $sub, 4)) {
217 $match = substr($str, $pos, $len);
222 if (my($pos, $len) = $Collator->index($str, $sub, 5)) {
223 $match = substr($str, $pos, $len);
228 if (my($pos, $len) = $Collator->index($str, $sub, 32)) {
229 $match = substr($str, $pos, $len);
234 if (my($pos, $len) = $Collator->index($str, $sub, 33)) {
235 $match = substr($str, $pos, $len);
240 if (my($pos, $len) = $Collator->index($str, $sub, 100)) {
241 $match = substr($str, $pos, $len);
245 $Collator->change(%old_level);
251 $Collator->change(level => 1);
253 $ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
255 ok($$ret eq "P\cBe\x{300}\cB");
257 @ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
258 ok($ret[0], "P\cBe\x{300}\cB");
260 $str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
261 $sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
263 ($ret) = $Collator->match($str, $sub);
266 $str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
267 $sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s";
269 ($ret) = $Collator->match($str, $sub);
272 $ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
273 ok($ret eq "P\cBe\x{300}\cB:pe:PE");
275 $ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
280 $ret = $Collator->match($str, $sub);
282 ok($ret && $$ret, "CDE");
286 ($ret) = $Collator->match($str, $sub);
291 $Collator->change(level => 3);
293 $ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
296 @ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
299 $ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
302 $ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
305 $ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
308 $ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
311 $str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
312 $sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
314 ($ret) = $Collator->match($str, $sub);
317 $Collator->change(%old_level);
321 $Collator->change(level => 1);
323 sub strreverse { scalar reverse shift }
325 $str = "P\cBe\x{300}\cBrl and PERL.";
326 $ret = $Collator->subst($str, "perl", 'Camel');
328 ok($str, "Camel and PERL.");
330 $str = "P\cBe\x{300}\cBrl and PERL.";
331 $ret = $Collator->subst($str, "perl", \&strreverse);
333 ok($str, "lr\cB\x{300}e\cBP and PERL.");
335 $str = "P\cBe\x{300}\cBrl and PERL.";
336 $ret = $Collator->gsubst($str, "perl", 'Camel');
338 ok($str, "Camel and Camel.");
340 $str = "P\cBe\x{300}\cBrl and PERL.";
341 $ret = $Collator->gsubst($str, "perl", \&strreverse);
343 ok($str, "lr\cB\x{300}e\cBP and LREP.");
345 $str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
346 $Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
347 ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> "
348 . "<b>CAMEL</b> horse <b>cAm\0E\0L</b>...");
352 # http://www.xray.mpe.mpg.de/mailing-lists/perl-unicode/2010-09/msg00014.html
353 # when the substring includes an ignorable element like a space...
355 $str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
356 $Collator->gsubst($str, "camel horse", sub { "<b>$_[0]</b>" });
357 ok($str, "Camel donkey zebra came\x{301}l <b>CAMEL horse</b> cAm\0E\0L...");
359 $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
360 $Collator->gsubst($str, "camel horse", sub { "=$_[0]=" });
361 ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
363 $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
364 $Collator->gsubst($str, "camel-horse", sub { "=$_[0]=" });
365 ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
367 $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
368 $Collator->gsubst($str, "camelhorse", sub { "=$_[0]=" });
369 ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
371 $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
372 $Collator->gsubst($str, " ca mel hor se ", sub { "=$_[0]=" });
373 ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
375 $str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
376 $Collator->gsubst($str, "ca\x{300}melho\x{302}rse", sub { "=$_[0]=" });
377 ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
381 $Collator->change(level => 3);
383 $str = "P\cBe\x{300}\cBrl and PERL.";
384 $ret = $Collator->subst($str, "perl", "Camel");
386 ok($str, "P\cBe\x{300}\cBrl and PERL.");
388 $str = "P\cBe\x{300}\cBrl and PERL.";
389 $ret = $Collator->subst($str, "perl", \&strreverse);
391 ok($str, "P\cBe\x{300}\cBrl and PERL.");
393 $str = "P\cBe\x{300}\cBrl and PERL.";
394 $ret = $Collator->gsubst($str, "perl", "Camel");
396 ok($str, "P\cBe\x{300}\cBrl and PERL.");
398 $str = "P\cBe\x{300}\cBrl and PERL.";
399 $ret = $Collator->gsubst($str, "perl", \&strreverse);
401 ok($str, "P\cBe\x{300}\cBrl and PERL.");
403 $Collator->change(%old_level);
407 $str = "Perl and Camel";
408 $ret = $Collator->gsubst($str, "\cA\cA\0", "AB");
410 ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB");
413 $ret = $Collator->subst($str, "", "ABC");
418 $ret = $Collator->gsubst($str, "", "ABC");
423 $ret = $Collator->gsubst($str, 'PP', "ABC");
429 # Shifted; ignorable after variable
431 ($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!");
432 ok($ret, "?\x{300}!\x{301}\x{344}");
434 $Collator->change(alternate => 'Non-ignorable');
436 ($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!");
441 # Now preprocess is defined.
443 $Collator->change(preprocess => sub {''});
445 eval { $Collator->index("", "") };
446 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
448 eval { $Collator->index("a", "a") };
449 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
451 eval { $Collator->match("", "") };
452 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
454 eval { $Collator->match("a", "a") };
455 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
457 $Collator->change(preprocess => sub { uc shift });
459 eval { $Collator->index("", "") };
460 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
462 eval { $Collator->index("a", "a") };
463 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
465 eval { $Collator->match("", "") };
466 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
468 eval { $Collator->match("a", "a") };
469 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
473 eval { require Unicode::Normalize };
477 # Now preprocess and normalization are defined.
479 $Collator->change(normalization => 'NFD');
481 eval { $Collator->index("", "") };
482 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
484 eval { $Collator->index("a", "a") };
485 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
487 eval { $Collator->match("", "") };
488 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
490 eval { $Collator->match("a", "a") };
491 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
496 $Collator->change(preprocess => undef);
499 # Now only normalization is defined.
501 eval { $Collator->index("", "") };
502 ok($@ && $@ =~ /Don't use Normalization with index\(\)/);
504 eval { $Collator->index("a", "a") };
505 ok($@ && $@ =~ /Don't use Normalization with index\(\)/);
507 eval { $Collator->match("", "") };
508 ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/);
510 eval { $Collator->match("a", "a") };
511 ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/);
513 $Collator->change(normalization => undef);
520 # Now preprocess and normalization are undef.
522 eval { $Collator->index("", "") };
525 eval { $Collator->index("a", "a") };
528 eval { $Collator->match("", "") };
531 eval { $Collator->match("a", "a") };