This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add Mike Kelly to AUTHORS
[perl5.git] / cpan / Unicode-Collate / Collate / Locale.pm
CommitLineData
00e00351
CBW
1package Unicode::Collate::Locale;
2
3use strict;
4use Carp;
5use base qw(Unicode::Collate);
6
6709de88 7our $VERSION = '0.62';
00e00351
CBW
8
9use File::Spec;
10
11(my $ModPath = $INC{'Unicode/Collate/Locale.pm'}) =~ s/\.pm$//;
12my $KeyPath = File::Spec->catfile('allkeys.txt');
13my $PL_EXT = '.pl';
14
64dc7822 15my %LocaleFile = map { ($_, $_) } qw(
6709de88
CBW
16 af ar az ca cs cy da eo es et fi fil fo fr ha haw
17 hr hu hy ig is kl lt lv mt nb nn nso om pl ro
18 se sk sl sq sv sw tn to tr uk vi wo yo
64dc7822 19);
456a1446 20 $LocaleFile{'default'} = '';
1393fe00 21 $LocaleFile{'de__phonebook'} = 'de_phone';
456a1446 22 $LocaleFile{'es__traditional'} = 'es_trad';
00e00351
CBW
23
24sub _locale {
25 my $locale = shift;
26 if ($locale) {
27 $locale = lc $locale;
28 $locale =~ tr/\-\ \./_/;
6709de88 29 $locale =~ s/_phone(?:bk)?\z/_phonebook/;
456a1446 30 $locale =~ s/_trad\z/_traditional/;
00e00351
CBW
31 $LocaleFile{$locale} and return $locale;
32
33 my ($l,$t,$v) = split(/_/, $locale.'__');
456a1446 34 for my $loc ("${l}_${t}_$v", "${l}_$t", "${l}__$v", "${l}__$t", $l) {
00e00351
CBW
35 $LocaleFile{$loc} and return $loc;
36 }
37 }
38 return 'default';
39}
40
41sub getlocale {
42 return shift->{accepted_locale};
43}
44
f1a7422f 45sub _fetchpl {
6484f676
CBW
46 my $accepted = shift;
47 my $f = $LocaleFile{$accepted};
48 return if !$f;
49 $f .= $PL_EXT;
50 my $path = File::Spec->catfile($ModPath, $f);
51 my $h = do $path;
52 croak "Unicode/Collate/Locale/$f can't be found" if !$h;
53 return $h;
54}
55
00e00351
CBW
56sub new {
57 my $class = shift;
58 my %hash = @_;
00e00351
CBW
59 $hash{accepted_locale} = _locale($hash{locale});
60
64dc7822
CBW
61 if (exists $hash{table}) {
62 croak "your table can't be used with Unicode::Collate::Locale";
63 }
6484f676 64 $hash{table} = $KeyPath;
00e00351 65
f1a7422f 66 my $href = _fetchpl($hash{accepted_locale});
00e00351
CBW
67 while (my($k,$v) = each %$href) {
68 if (exists $hash{$k}) {
69 croak "$k is reserved by $hash{locale}, can't be overwritten";
70 }
71 $hash{$k} = $v;
72 }
73 return $class->SUPER::new(%hash);
74}
75
761;
77__END__
78
79=head1 NAME
80
81Unicode::Collate::Locale - Linguistic tailoring for DUCET via Unicode::Collate
82
83=head1 SYNOPSIS
84
85 use Unicode::Collate::Locale;
86
87 $Collator = Unicode::Collate::Locale->
88 new(locale => $locale_name, %tailoring);
89
90 @sorted = $Collator->sort(@not_sorted);
91
92=head1 DESCRIPTION
93
94This module provides linguistic tailoring for it
95taking advantage of C<Unicode::Collate>.
96
97=head2 Constructor
98
99The C<new> method returns a collator object.
100
101A parameter list for the constructor is a hash, which can include
102a special key C<'locale'> and its value (case-insensitive) standing
103for a two-letter language code (ISO-639) like C<'en'> for English.
104For example, C<Unicode::Collate::Locale-E<gt>new(locale =E<gt> 'FR')>
105returns a collator tailored for French.
106
107C<$locale_name> may be suffixed with a territory(country)
108code or a variant code, which are separated with C<'_'>.
109E.g. C<en_US> for English in USA,
110C<es_ES_traditional> for Spanish in Spain (Traditional),
111
112If C<$localename> is not defined,
113fallback is selected in the following order:
114
64dc7822
CBW
115 1. language_territory_variant
116 2. language_territory
117 3. language__variant
118 4. language
119 5. default
00e00351
CBW
120
121Tailoring tags provided by C<Unicode::Collate> are allowed
122as long as they are not used for C<'locale'> support.
123Esp. the C<table> tag is always untailorable
124since it is reserved for DUCET.
125
126E.g. a collator for French, which ignores diacritics and case difference
127(i.e. level 1), with reversed case ordering and no normalization.
128
129 Unicode::Collate::Locale->new(
64dc7822
CBW
130 level => 1,
131 locale => 'fr',
132 upper_before_lower => 1,
133 normalization => undef
00e00351
CBW
134 )
135
136=head2 Methods
137
138C<Unicode::Collate::Locale> is a subclass of C<Unicode::Collate>
139and methods other than C<new> are inherited from C<Unicode::Collate>.
140
141Here is a list of additional methods:
142
143=over 4
144
145=item C<$Collator-E<gt>getlocale>
146
147Returns a language code accepted and used actually on collation.
148If linguistic tailoring is not provided for a language code you passed
149(intensionally for some languages, or due to the incomplete implementation),
150this method returns a string C<'default'> meaning no special tailoring.
151
152=back
153
154=head2 A list of tailorable locales
155
64dc7822
CBW
156 locale name description
157 ----------------------------------------------------------
6484f676 158 af Afrikaans
6709de88 159 ar Arabic
f1a7422f 160 az Azerbaijani (Azeri)
64dc7822 161 ca Catalan
00e00351 162 cs Czech
6484f676
CBW
163 cy Welsh
164 da Danish
1393fe00 165 de__phonebook German (umlaut as 'ae', 'oe', 'ue')
456a1446 166 eo Esperanto
00e00351
CBW
167 es Spanish
168 es__traditional Spanish ('ch' and 'll' as a grapheme)
64dc7822
CBW
169 et Estonian
170 fi Finnish
f1a7422f 171 fil Filipino
6484f676 172 fo Faroese
00e00351 173 fr French
f1a7422f 174 ha Hausa
6484f676 175 haw Hawaiian
c02ee425 176 hr Croatian
6709de88
CBW
177 hu Hungarian
178 hy Armenian
c02ee425 179 ig Igbo
6484f676
CBW
180 is Icelandic
181 kl Kalaallisut
f1a7422f 182 lt Lithuanian
64dc7822 183 lv Latvian
f1a7422f 184 mt Maltese
456a1446 185 nb Norwegian Bokmal
00e00351 186 nn Norwegian Nynorsk
1393fe00
CBW
187 nso Northern Sotho
188 om Oromo
00e00351 189 pl Polish
456a1446 190 ro Romanian
6709de88 191 se Northern Sami
64dc7822
CBW
192 sk Slovak
193 sl Slovenian
c02ee425 194 sq Albanian
456a1446 195 sv Swedish
6484f676 196 sw Swahili
1393fe00 197 tn Tswana
6709de88 198 to Tonga
f1a7422f 199 tr Turkish
6709de88 200 uk Ukrainian
1393fe00 201 vi Vietnamese
f1a7422f
CBW
202 wo Wolof
203 yo Yoruba
204
205=head1 INSTALL
206
207Installation of Unicode::Collate::Locale requires F<Collate/Locale.pm>,
208F<Collate/Locale/*.pm> and F<Collate/allkeys.txt>. On building,
209Unicode::Collate::Locale doesn't require F<data/*.txt> and F<mklocale>.
210Tests for Unicode::Collate::Locale are named F<t/loc_*.t>.
00e00351 211
1393fe00
CBW
212=head1 CAVEAT
213
214=over 4
215
216=item tailoring is not maximum
217
218If a certain letter is tailored, its equivalents are not always
219tailored as well as it. For example, even though W is tailored,
220fullwidth W (C<U+FF37>), W with acute (C<U+1E82>), etc. are not
c02ee425
CBW
221tailored. The result may depend on whether source strings are
222normalized or not, and whether decomposed or composed.
223Thus C<(normalization =E<gt> undef> is less preferred.
1393fe00
CBW
224
225=back
226
00e00351
CBW
227=head1 AUTHOR
228
229The Unicode::Collate::Locale module for perl was written
230by SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>.
231This module is Copyright(C) 2004-2010, SADAHIRO Tomoyuki. Japan.
232All rights reserved.
233
234This module is free software; you can redistribute it and/or
235modify it under the same terms as Perl itself.
236
237=head1 SEE ALSO
238
239=over 4
240
241=item Unicode Collation Algorithm - UTS #10
242
243L<http://www.unicode.org/reports/tr10/>
244
245=item The Default Unicode Collation Element Table (DUCET)
246
247L<http://www.unicode.org/Public/UCA/latest/allkeys.txt>
248
249=item CLDR - Unicode Common Locale Data Repository
250
251L<http://cldr.unicode.org/>
252
253=item L<Unicode::Collate>
254
255=item L<Unicode::Normalize>
256
257=back
258
259=cut