This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to Unicode::Collate 0.28
[perl5.git] / lib / Unicode / Collate / t / test.t
CommitLineData
45394607 1
4a2e806c 2BEGIN {
ae6aa562 3 unless ("A" eq pack('U', 0x41)) {
9f1f04a1
RGS
4 print "1..0 # Unicode::Collate " .
5 "cannot stringify a Unicode code point\n";
4a2e806c
JH
6 exit 0;
7 }
8}
9
0116f5dc
JH
10BEGIN {
11 if ($ENV{PERL_CORE}) {
12 chdir('t') if -d 't';
63c6dcc1 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
0116f5dc
JH
14 }
15}
16
45394607 17use Test;
06c8fc8f 18BEGIN { plan tests => 200 };
4c843366
JH
19
20use strict;
21use warnings;
45394607 22use Unicode::Collate;
45394607 23
4d36a948
TS
24our $IsEBCDIC = ord("A") != 0x41;
25
45394607
JH
26#########################
27
0116f5dc
JH
28ok(1); # If we made it this far, we're ok.
29
30my $UCA_Version = "9";
3164dd77
TS
31
32ok(Unicode::Collate::UCA_Version, $UCA_Version);
33ok(Unicode::Collate->UCA_Version, $UCA_Version);
34
5398038e 35my $Collator = Unicode::Collate->new(
45394607
JH
36 table => 'keys.txt',
37 normalization => undef,
38);
39
5398038e 40ok(ref $Collator, "Unicode::Collate");
45394607 41
3164dd77
TS
42ok($Collator->UCA_Version, $UCA_Version);
43ok($Collator->UCA_Version(), $UCA_Version);
44
45394607 45ok(
5398038e 46 join(':', $Collator->sort(
45394607
JH
47 qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN /
48 ) ),
49 join(':',
50 qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings /
51 ),
52);
53
0116f5dc
JH
54ok($Collator->cmp("", ""), 0);
55ok($Collator->eq("", ""));
56ok($Collator->cmp("", "perl"), -1);
57
58##############
59
9f1f04a1
RGS
60sub _pack_U { Unicode::Collate::pack_U(@_) }
61sub _unpack_U { Unicode::Collate::unpack_U(@_) }
62
63my $A_acute = _pack_U(0xC1);
64my $a_acute = _pack_U(0xE1);
65my $acute = _pack_U(0x0301);
45394607 66
caffd4cf 67ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1)
0116f5dc 68ok($Collator->cmp($a_acute, $A_acute), -1);
4d36a948 69ok($Collator->eq("A\cA$acute", $A_acute)); # UCA v9. \cA is invariant.
0116f5dc
JH
70
71my %old_level = $Collator->change(level => 1);
72ok($Collator->eq("A$acute", $A_acute));
73ok($Collator->eq("A", $A_acute));
74
75ok($Collator->change(level => 2)->eq($a_acute, $A_acute));
76ok($Collator->lt("A", $A_acute));
77
78ok($Collator->change(%old_level)->lt("A", $A_acute));
79ok($Collator->lt("A", $A_acute));
80ok($Collator->lt("A", $a_acute));
81ok($Collator->lt($a_acute, $A_acute));
45394607 82
809c7673
TS
83##############
84
85eval { require Unicode::Normalize };
45394607 86
4d36a948 87if (!$@ && !$IsEBCDIC) {
45394607 88 my $NFD = Unicode::Collate->new(
06c8fc8f
RGS
89 table => 'keys.txt',
90 level => 1,
905aa9f0 91 entry => <<'ENTRIES',
caffd4cf
TS
920430 ; [.0CB5.0020.0002.0430] # CYRILLIC SMALL LETTER A
930410 ; [.0CB5.0020.0008.0410] # CYRILLIC CAPITAL LETTER A
9404D3 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
950430 0308 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS
9604D2 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
970410 0308 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
980430 3099 ; [.0CBE.0020.0002.04D3] # A WITH KATAKANA VOICED
990430 3099 0308 ; [.0CBF.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS
905aa9f0 100ENTRIES
45394607 101 );
905aa9f0
TS
102 ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}"));
103 ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B"));
104 ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A"));
45394607 105}
809c7673 106else {
d16e9e3d 107 ok(1);
905aa9f0
TS
108 ok(1);
109 ok(1);
45394607
JH
110}
111
809c7673
TS
112##############
113
114my $trad = Unicode::Collate->new(
45394607
JH
115 table => 'keys.txt',
116 normalization => undef,
809c7673 117 ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
06c8fc8f 118 level => 3,
809c7673 119 entry => << 'ENTRIES',
caffd4cf
TS
120 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
121 0043 0068 ; [.0A3F.0020.0008.0043] # "Ch" in traditional Spanish
45394607
JH
122ENTRIES
123);
caffd4cf
TS
124# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
125# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
4d36a948 126# Deutsch sz is included in 'keys.txt';
45394607
JH
127
128ok(
809c7673
TS
129 join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
130 join(':', qw/ aca acia acka acha ada / ),
45394607
JH
131);
132
133ok(
809c7673
TS
134 join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ),
135 join(':', qw/ aca acha acia acka ada / ),
45394607 136);
caffd4cf 137ok($trad->eq("ocho", "oc\cAho")); # UCA v9
4d36a948 138ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
06c8fc8f
RGS
139ok($trad->eq("-", "")); # also UCA v8
140ok($trad->lt("oc-ho", "ocho")); # also UCA v8
45394607 141
45394607
JH
142my $hiragana = "\x{3042}\x{3044}";
143my $katakana = "\x{30A2}\x{30A4}";
144
809c7673
TS
145# HIRAGANA and KATAKANA are ignorable via ignoreName
146ok($trad->eq($hiragana, ""));
147ok($trad->eq("", $katakana));
148ok($trad->eq($hiragana, $katakana));
149ok($trad->eq($katakana, $hiragana));
150
151##############
152
0116f5dc 153$Collator->change(level => 2);
809c7673 154
0116f5dc 155ok($Collator->{level}, 2);
5398038e
TS
156
157ok( $Collator->cmp("ABC","abc"), 0);
158ok( $Collator->eq("ABC","abc") );
159ok( $Collator->le("ABC","abc") );
160ok( $Collator->cmp($hiragana, $katakana), 0);
161ok( $Collator->eq($hiragana, $katakana) );
162ok( $Collator->ge($hiragana, $katakana) );
45394607 163
5398038e
TS
164# hangul
165ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") );
166ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") );
167ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") );
168ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") );
169ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") );
170ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana
45394607 171
0116f5dc 172$Collator->change(%old_level, katakana_before_hiragana => 1);
45394607 173
0116f5dc 174ok($Collator->{level}, 4);
45394607 175
5398038e
TS
176ok( $Collator->cmp("abc", "ABC"), -1);
177ok( $Collator->ne("abc", "ABC") );
178ok( $Collator->lt("abc", "ABC") );
179ok( $Collator->le("abc", "ABC") );
180ok( $Collator->cmp($hiragana, $katakana), 1);
181ok( $Collator->ne($hiragana, $katakana) );
182ok( $Collator->gt($hiragana, $katakana) );
183ok( $Collator->ge($hiragana, $katakana) );
45394607 184
0116f5dc 185$Collator->change(upper_before_lower => 1);
45394607 186
5398038e
TS
187ok( $Collator->cmp("abc", "ABC"), 1);
188ok( $Collator->ge("abc", "ABC"), 1);
189ok( $Collator->gt("abc", "ABC"), 1);
190ok( $Collator->cmp($hiragana, $katakana), 1);
191ok( $Collator->ge($hiragana, $katakana), 1);
192ok( $Collator->gt($hiragana, $katakana), 1);
45394607 193
0116f5dc 194$Collator->change(katakana_before_hiragana => 0);
45394607 195
5398038e
TS
196ok( $Collator->cmp("abc", "ABC"), 1);
197ok( $Collator->cmp($hiragana, $katakana), -1);
45394607 198
0116f5dc 199$Collator->change(upper_before_lower => 0);
45394607 200
5398038e
TS
201ok( $Collator->cmp("abc", "ABC"), -1);
202ok( $Collator->le("abc", "ABC") );
203ok( $Collator->cmp($hiragana, $katakana), -1);
204ok( $Collator->lt($hiragana, $katakana) );
45394607 205
809c7673
TS
206##############
207
208my $ignoreAE = Unicode::Collate->new(
209 table => 'keys.txt',
210 normalization => undef,
211 ignoreChar => qr/^[aAeE]$/,
212);
213
214ok($ignoreAE->eq("element","lament"));
215ok($ignoreAE->eq("Perl","ePrl"));
216
217##############
218
219my $onlyABC = Unicode::Collate->new(
220 table => undef,
327745dc 221 normalization => undef,
809c7673
TS
222 entry => << 'ENTRIES',
2230061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A
2240041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2250062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B
2260042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B
2270063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C
2280043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C
229ENTRIES
230);
231
232ok(
233 join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ),
234 join(':', qw/ A aB Ab ABA BAC cAc cc / ),
235);
236
237##############
238
239my $undefAE = Unicode::Collate->new(
45394607
JH
240 table => 'keys.txt',
241 normalization => undef,
809c7673 242 undefChar => qr/^[aAeE]$/,
45394607
JH
243);
244
809c7673
TS
245ok($undefAE ->gt("edge","fog"));
246ok($Collator->lt("edge","fog"));
247ok($undefAE ->gt("lake","like"));
248ok($Collator->lt("lake","like"));
249
250##############
45394607 251
809c7673
TS
252# Table is undefined, then no entry is defined.
253
254my $undef_table = Unicode::Collate->new(
255 table => undef,
256 normalization => undef,
257 level => 1,
258);
259
260# in the Unicode code point order
261ok($undef_table->lt('', 'A'));
262ok($undef_table->lt('ABC', 'B'));
263
264# Hangul should be decomposed (even w/o Unicode::Normalize).
265
266ok($undef_table->lt("Perl", "\x{AC00}"));
267ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}"));
268ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}"));
269ok($undef_table->lt("\x{AE00}", "\x{3042}"));
270 # U+AC00: Hangul GA
271 # U+AE00: Hangul GEUL
272 # U+3042: Hiragana A
273
274# Weight for CJK Ideographs is defined, though.
275
276ok($undef_table->lt("", "\x{4E00}"));
277ok($undef_table->lt("\x{4E8C}","ABC"));
278ok($undef_table->lt("\x{4E00}","\x{3042}"));
279ok($undef_table->lt("\x{4E00}","\x{4E8C}"));
280 # U+4E00: Ideograph "ONE"
281 # U+4E8C: Ideograph "TWO"
282
283
284##############
285
286my $few_entries = Unicode::Collate->new(
287 entry => <<'ENTRIES',
2880050 ; [.0101.0020.0002.0050] # P
2890045 ; [.0102.0020.0002.0045] # E
2900052 ; [.0103.0020.0002.0052] # R
291004C ; [.0104.0020.0002.004C] # L
2921100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G
2931175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I
2945B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
295ENTRIES
296 table => undef,
297 normalization => undef,
298);
299
300# defined before undefined
301
302my $sortABC = join '',
303 $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ ");
304
305ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ");
306
307ok($few_entries->lt('E', 'D'));
308ok($few_entries->lt("\x{5B57}", "\x{4E00}"));
309ok($few_entries->lt("\x{AE30}", "\x{AC00}"));
310
311# Hangul must be decomposed.
312
313ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}"));
314
315##############
316
0116f5dc 317my $all_undef_8 = Unicode::Collate->new(
809c7673
TS
318 table => undef,
319 normalization => undef,
320 overrideCJK => undef,
321 overrideHangul => undef,
0116f5dc 322 UCA_Version => 8,
809c7673
TS
323);
324
325# All in the Unicode code point order.
326# No hangul decomposition.
327
0116f5dc
JH
328ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
329ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
330ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
331ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
332ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
333
334##############
335
336my $all_undef_9 = Unicode::Collate->new(
337 table => undef,
338 normalization => undef,
339 overrideCJK => undef,
340 overrideHangul => undef,
341 UCA_Version => 9,
342);
343
344# CJK Ideo. < CJK ext A/B < Others.
345# No hangul decomposition.
346
347ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
348ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
349ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
350ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
caffd4cf 351ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
809c7673
TS
352
353##############
354
355my $ignoreCJK = Unicode::Collate->new(
356 table => undef,
357 normalization => undef,
358 overrideCJK => sub {()},
359 entry => <<'ENTRIES',
3605B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter"
361ENTRIES
362);
363
364# All CJK Unified Ideographs except U+5B57 are ignored.
365
366ok($ignoreCJK->eq("\x{4E00}", ""));
367ok($ignoreCJK->lt("\x{4E00}", "\0"));
368ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
369ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
370ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
371
372##############
373
374my $ignoreHangul = Unicode::Collate->new(
375 table => undef,
376 normalization => undef,
377 overrideHangul => sub {()},
378 entry => <<'ENTRIES',
379AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL
380ENTRIES
381);
382
383# All Hangul Syllables except U+AE00 are ignored.
384
385ok($ignoreHangul->eq("\x{AC00}", ""));
386ok($ignoreHangul->lt("\x{AC00}", "\0"));
387ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
388ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
389ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
390
391##############
392
0116f5dc 393my %origAlter = $Collator->change(alternate => 'Blanked');
809c7673 394
0116f5dc
JH
395ok($Collator->lt("death", "de luge"));
396ok($Collator->lt("de luge", "de-luge"));
397ok($Collator->lt("de-luge", "deluge"));
398ok($Collator->lt("deluge", "de\x{2010}luge"));
399ok($Collator->lt("deluge", "de Luge"));
809c7673 400
0116f5dc 401$Collator->change(alternate => 'Non-ignorable');
809c7673 402
0116f5dc
JH
403ok($Collator->lt("de luge", "de Luge"));
404ok($Collator->lt("de Luge", "de-luge"));
405ok($Collator->lt("de-Luge", "de\x{2010}luge"));
406ok($Collator->lt("de-luge", "death"));
407ok($Collator->lt("death", "deluge"));
809c7673 408
0116f5dc 409$Collator->change(alternate => 'Shifted');
809c7673 410
0116f5dc
JH
411ok($Collator->lt("death", "de luge"));
412ok($Collator->lt("de luge", "de-luge"));
413ok($Collator->lt("de-luge", "deluge"));
414ok($Collator->lt("deluge", "de Luge"));
415ok($Collator->lt("de Luge", "deLuge"));
809c7673 416
0116f5dc 417$Collator->change(alternate => 'Shift-Trimmed');
809c7673 418
0116f5dc
JH
419ok($Collator->lt("death", "deluge"));
420ok($Collator->lt("deluge", "de luge"));
421ok($Collator->lt("de luge", "de-luge"));
422ok($Collator->lt("de-luge", "deLuge"));
423ok($Collator->lt("deLuge", "de Luge"));
809c7673 424
0116f5dc 425$Collator->change(%origAlter);
809c7673 426
0116f5dc 427ok($Collator->{alternate}, 'shifted');
809c7673
TS
428
429##############
430
431my $overCJK = Unicode::Collate->new(
432 table => undef,
433 normalization => undef,
434 entry => <<'ENTRIES',
4350061 ; [.0101.0020.0002.0061] # latin a
4360041 ; [.0101.0020.0008.0041] # LATIN A
4374E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
438ENTRIES
439 overrideCJK => sub {
440 my $u = 0xFFFF - $_[0]; # reversed
441 [$u, 0x20, 0x2, $u];
442 },
443);
444
445ok($overCJK->lt("a", "A")); # diff. at level 3.
446ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
447ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
448ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
449ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
450
451##############
452
0116f5dc
JH
453# rearrange : 0x0E40..0x0E44, 0x0EC0..0x0EC4 (default)
454
455my %old_rearrange = $Collator->change(rearrange => undef);
456
457ok($Collator->gt("\x{0E41}A", "\x{0E40}B"));
458ok($Collator->gt("A\x{0E41}A", "A\x{0E40}B"));
459
4d36a948
TS
460$Collator->change(rearrange => [ 0x61 ]);
461 # U+0061, 'a': This is a Unicode value, never a native value.
809c7673 462
0116f5dc
JH
463ok($Collator->gt("ab", "AB")); # as 'ba' > 'AB'
464
465$Collator->change(%old_rearrange);
466
467ok($Collator->lt("ab", "AB"));
809c7673
TS
468ok($Collator->lt("\x{0E40}", "\x{0E41}"));
469ok($Collator->lt("\x{0E40}A", "\x{0E41}B"));
470ok($Collator->lt("\x{0E41}A", "\x{0E40}B"));
471ok($Collator->lt("A\x{0E41}A", "A\x{0E40}B"));
472
0116f5dc
JH
473ok($all_undef_8->lt("\x{0E40}", "\x{0E41}"));
474ok($all_undef_8->lt("\x{0E40}A", "\x{0E41}B"));
475ok($all_undef_8->lt("\x{0E41}A", "\x{0E40}B"));
476ok($all_undef_8->lt("A\x{0E41}A", "A\x{0E40}B"));
809c7673
TS
477
478##############
479
480my $no_rearrange = Unicode::Collate->new(
481 table => undef,
482 normalization => undef,
483 rearrange => [],
484);
485
486ok($no_rearrange->lt("A", "B"));
487ok($no_rearrange->lt("\x{0E40}", "\x{0E41}"));
488ok($no_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
489ok($no_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
490ok($no_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
491
492##############
493
809c7673
TS
494my $undef_rearrange = Unicode::Collate->new(
495 table => undef,
496 normalization => undef,
497 rearrange => undef,
498);
499
500ok($undef_rearrange->lt("A", "B"));
501ok($undef_rearrange->lt("\x{0E40}", "\x{0E41}"));
502ok($undef_rearrange->lt("\x{0E40}A", "\x{0E41}B"));
503ok($undef_rearrange->gt("\x{0E41}A", "\x{0E40}B"));
504ok($undef_rearrange->gt("A\x{0E41}A", "A\x{0E40}B"));
505
506##############
507
508my $dropArticles = Unicode::Collate->new(
509 table => "keys.txt",
510 normalization => undef,
511 preprocess => sub {
512 my $string = shift;
513 $string =~ s/\b(?:an?|the)\s+//ig;
514 $string;
515 },
516);
517
518ok($dropArticles->eq("camel", "a camel"));
519ok($dropArticles->eq("Perl", "The Perl"));
520ok($dropArticles->lt("the pen", "a pencil"));
521ok($Collator->lt("Perl", "The Perl"));
522ok($Collator->gt("the pen", "a pencil"));
523
524##############
525
526my $backLevel1 = Unicode::Collate->new(
527 table => undef,
528 normalization => undef,
529 backwards => [ 1 ],
530);
531
532# all strings are reversed at level 1.
533
534ok($backLevel1->gt("AB", "BA"));
535ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}"));
536
537##############
538
539my $backLevel2 = Unicode::Collate->new(
540 table => "keys.txt",
541 normalization => undef,
542 undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
543 backwards => 2,
544);
545
546ok($backLevel2->gt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
547ok($backLevel2->gt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
548ok($Collator ->lt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}"));
549ok($Collator ->lt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}"));
550
3164dd77
TS
551# HIRAGANA and KATAKANA are made undefined via undefName.
552# So they are after CJK Unified Ideographs.
809c7673
TS
553
554ok($backLevel2->lt("\x{4E00}", $hiragana));
555ok($backLevel2->lt("\x{4E03}", $katakana));
556ok($Collator ->gt("\x{4E00}", $hiragana));
557ok($Collator ->gt("\x{4E03}", $katakana));
558
559##############
caffd4cf 560
4d36a948 561# ignorable after variable
caffd4cf 562
4d36a948 563# Shifted;
caffd4cf 564ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
4d36a948 565ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
caffd4cf 566ok($Collator->eq("?\x{300}", "?"));
4d36a948 567ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
caffd4cf 568
4d36a948
TS
569$Collator->change(level => 3);
570ok($Collator->eq("\cA", "?"));
571
572$Collator->change(alternate => 'blanked', level => 4);
573ok($Collator->eq("?\x{300}!\x{301}\x{315}", "?!"));
574ok($Collator->eq("?\x{300}A\x{301}", "?$A_acute"));
575ok($Collator->eq("?\x{300}", "?"));
576ok($Collator->eq("?\x{344}", "?")); # U+0344 has two CEs.
577
578$Collator->change(level => 3);
579ok($Collator->eq("\cA", "?"));
caffd4cf 580
4d36a948
TS
581$Collator->change(alternate => 'Non-ignorable', level => 4);
582
583ok($Collator->lt("?\x{300}", "?!"));
584ok($Collator->gt("?\x{300}A$acute", "?$A_acute"));
caffd4cf 585ok($Collator->gt("?\x{300}", "?"));
4d36a948 586ok($Collator->gt("?\x{344}", "?"));
caffd4cf 587
4d36a948
TS
588$Collator->change(level => 3);
589ok($Collator->lt("\cA", "?"));
590
591$Collator->change(alternate => 'Shifted', level => 4);
592
593##############
594
595# According to Conformance Test,
596# a L3-ignorable is treated as a completely ignorable.
597
598my $L3ignorable = Unicode::Collate->new(
599 alternate => 'Non-ignorable',
4c843366 600 level => 3,
4d36a948
TS
601 table => undef,
602 normalization => undef,
603 entry => <<'ENTRIES',
6040000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
6050001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
6060591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
6071D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
6080021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK
60909BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
61009C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
61109CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
61209C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
4c843366
JH
6131D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE
6141D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK
6151D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA
6161D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK
4d36a948
TS
617ENTRIES
618);
619
620ok($L3ignorable->lt("\cA", "!"));
621ok($L3ignorable->lt("\x{591}", "!"));
622ok($L3ignorable->eq("\cA", "\x{591}"));
623ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A"));
624ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A"));
625ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A"));
626ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A"));
4c843366
JH
627ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}"));
628ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}"));
629ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
630ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
631ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));