This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactor die_exit.t to loop over a list, rather than iterate on an hash.
[perl5.git] / cpan / Unicode-Collate / t / ignor.t
1
2 BEGIN {
3     unless ("A" eq pack('U', 0x41)) {
4         print "1..0 # Unicode::Collate " .
5             "cannot stringify a Unicode code point\n";
6         exit 0;
7     }
8     if ($ENV{PERL_CORE}) {
9         chdir('t') if -d 't';
10         @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11     }
12 }
13
14 use Test;
15 BEGIN { plan tests => 41 };
16
17 use strict;
18 use warnings;
19 use Unicode::Collate;
20
21 ok(1);
22
23 #########################
24
25 my $trad = Unicode::Collate->new(
26   table => 'keys.txt',
27   normalization => undef,
28   ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/,
29   level => 3,
30   entry => << 'ENTRIES',
31  0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
32  0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
33  0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
34 ENTRIES
35 );
36 # 0063  ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
37 # 0064  ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
38
39 ##### 2..3
40
41 ok(
42   join(':', $trad->sort( qw/ acha aca ada acia acka / ) ),
43   join(':',              qw/ aca acia acka acha ada / ),
44 );
45
46 ok(
47   join(':', $trad->sort( qw/ ACHA ACA ADA ACIA ACKA / ) ),
48   join(':',              qw/ ACA ACIA ACKA ACHA ADA / ),
49 );
50
51 ##### 4..7
52
53 ok($trad->gt("ocho", "oc\cAho")); # UCA v14
54 ok($trad->gt("ocho", "oc\0\cA\0\cBho"));  # UCA v14
55 ok($trad->eq("-", ""));
56 ok($trad->gt("ocho", "oc-ho"));
57
58 ##### 8..11
59
60 $trad->change(UCA_Version => 9);
61
62 ok($trad->eq("ocho", "oc\cAho")); # UCA v9
63 ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9
64 ok($trad->eq("-", ""));
65 ok($trad->gt("ocho", "oc-ho"));
66
67 ##### 12..15
68
69 $trad->change(UCA_Version => 8);
70
71 ok($trad->gt("ocho", "oc\cAho"));
72 ok($trad->gt("ocho", "oc\0\cA\0\cBho"));
73 ok($trad->eq("-", ""));
74 ok($trad->gt("ocho", "oc-ho"));
75
76
77 ##### 16..19
78
79 $trad->change(UCA_Version => 9);
80
81 my $hiragana = "\x{3042}\x{3044}";
82 my $katakana = "\x{30A2}\x{30A4}";
83
84 # HIRAGANA and KATAKANA are ignorable via ignoreName
85 ok($trad->eq($hiragana, ""));
86 ok($trad->eq("", $katakana));
87 ok($trad->eq($hiragana, $katakana));
88 ok($trad->eq($katakana, $hiragana));
89
90
91 ##### 20..31
92
93 # According to Conformance Test (UCA_Version == 9 or 11),
94 # a L3-ignorable is treated as a completely ignorable.
95
96 my $L3ignorable = Unicode::Collate->new(
97   alternate => 'Non-ignorable',
98   level => 3,
99   table => undef,
100   normalization => undef,
101   UCA_Version => 9,
102   entry => <<'ENTRIES',
103 0000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
104 0001  ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429)
105 0591  ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA
106 1D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM
107 0021  ; [*024B.0020.0002.0021] # EXCLAMATION MARK
108 09BE  ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA
109 09C7  ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E
110 09CB  ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
111 09C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O
112 1D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE
113 1D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK
114 1D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA
115 1D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK
116 ENTRIES
117 );
118
119 ok($L3ignorable->lt("\cA", "!"));
120 ok($L3ignorable->lt("\x{591}", "!"));
121 ok($L3ignorable->eq("\cA", "\x{591}"));
122 ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A"));
123 ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A"));
124 ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A"));
125 ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A"));
126 ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}"));
127 ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}"));
128 ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}"));
129 ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}"));
130 ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}"));
131
132 ##### 32..41
133
134 my $c = Unicode::Collate->new(
135   table => 'keys.txt',
136   normalization => undef,
137   level => 1,
138   UCA_Version => 14,
139   entry => << 'ENTRIES',
140 034F  ; [.0000.0000.0000.034F] # COMBINING GRAPHEME JOINER
141 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish
142 0043 0068 ; [.0A3F.0020.0007.0043] # "Ch" in traditional Spanish
143 0043 0048 ; [.0A3F.0020.0008.0043] # "CH" in traditional Spanish
144 ENTRIES
145 );
146 # 0063  ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C
147 # 0064  ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D
148
149 ok($c->gt("ocho", "oc\x00\x00ho"));
150 ok($c->gt("ocho", "oc\cAho"));
151 ok($c->gt("ocho", "oc\x{034F}ho"));
152 ok($c->gt("ocio", "oc\x{034F}ho"));
153 ok($c->lt("ocgo", "oc\x{034F}ho"));
154 ok($c->lt("oceo", "oc\x{034F}ho"));
155
156 ok($c->viewSortKey("ocho"),         "[0B4B 0A3F 0B4B | | |]");
157 ok($c->viewSortKey("oc\x00\x00ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
158 ok($c->viewSortKey("oc\cAho"),      "[0B4B 0A3D 0AB9 0B4B | | |]");
159 ok($c->viewSortKey("oc\x{034F}ho"), "[0B4B 0A3D 0AB9 0B4B | | |]");
160
161