This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Avoid defining a full XSLoader::bootstrap_inherit post 5.6, as it's not needed.
[perl5.git] / cpan / Unicode-Collate / Collate / Locale.pm
CommitLineData
00e00351
CBW
1package Unicode::Collate::Locale;
2
3use strict;
4use Carp;
5use base qw(Unicode::Collate);
6
c02ee425 7our $VERSION = '0.61';
00e00351
CBW
8
9use File::Spec;
10
11(my $ModPath = $INC{'Unicode/Collate/Locale.pm'}) =~ s/\.pm$//;
12my $KeyPath = File::Spec->catfile('allkeys.txt');
13my $PL_EXT = '.pl';
14
64dc7822 15my %LocaleFile = map { ($_, $_) } qw(
c02ee425
CBW
16 af az ca cs cy da eo es et fi fil fo fr ha haw hr ig is kl
17 lt lv mt nn nso om pl ro sk sl sq sv sw tn tr vi wo yo
64dc7822 18);
456a1446 19 $LocaleFile{'default'} = '';
1393fe00 20 $LocaleFile{'de__phonebook'} = 'de_phone';
456a1446
CBW
21 $LocaleFile{'es__traditional'} = 'es_trad';
22 $LocaleFile{'nb'} = 'nn';
00e00351
CBW
23
24sub _locale {
25 my $locale = shift;
26 if ($locale) {
27 $locale = lc $locale;
28 $locale =~ tr/\-\ \./_/;
1393fe00 29 $locale =~ s/_phone\z/_phonebook/;
456a1446 30 $locale =~ s/_trad\z/_traditional/;
00e00351
CBW
31 $LocaleFile{$locale} and return $locale;
32
33 my ($l,$t,$v) = split(/_/, $locale.'__');
456a1446 34 for my $loc ("${l}_${t}_$v", "${l}_$t", "${l}__$v", "${l}__$t", $l) {
00e00351
CBW
35 $LocaleFile{$loc} and return $loc;
36 }
37 }
38 return 'default';
39}
40
41sub getlocale {
42 return shift->{accepted_locale};
43}
44
f1a7422f 45sub _fetchpl {
6484f676
CBW
46 my $accepted = shift;
47 my $f = $LocaleFile{$accepted};
48 return if !$f;
49 $f .= $PL_EXT;
50 my $path = File::Spec->catfile($ModPath, $f);
51 my $h = do $path;
52 croak "Unicode/Collate/Locale/$f can't be found" if !$h;
53 return $h;
54}
55
00e00351
CBW
56sub new {
57 my $class = shift;
58 my %hash = @_;
00e00351
CBW
59 $hash{accepted_locale} = _locale($hash{locale});
60
64dc7822
CBW
61 if (exists $hash{table}) {
62 croak "your table can't be used with Unicode::Collate::Locale";
63 }
6484f676 64 $hash{table} = $KeyPath;
00e00351 65
f1a7422f 66 my $href = _fetchpl($hash{accepted_locale});
00e00351
CBW
67 while (my($k,$v) = each %$href) {
68 if (exists $hash{$k}) {
69 croak "$k is reserved by $hash{locale}, can't be overwritten";
70 }
71 $hash{$k} = $v;
72 }
73 return $class->SUPER::new(%hash);
74}
75
761;
77__END__
78
79=head1 NAME
80
81Unicode::Collate::Locale - Linguistic tailoring for DUCET via Unicode::Collate
82
83=head1 SYNOPSIS
84
85 use Unicode::Collate::Locale;
86
87 $Collator = Unicode::Collate::Locale->
88 new(locale => $locale_name, %tailoring);
89
90 @sorted = $Collator->sort(@not_sorted);
91
92=head1 DESCRIPTION
93
94This module provides linguistic tailoring for it
95taking advantage of C<Unicode::Collate>.
96
97=head2 Constructor
98
99The C<new> method returns a collator object.
100
101A parameter list for the constructor is a hash, which can include
102a special key C<'locale'> and its value (case-insensitive) standing
103for a two-letter language code (ISO-639) like C<'en'> for English.
104For example, C<Unicode::Collate::Locale-E<gt>new(locale =E<gt> 'FR')>
105returns a collator tailored for French.
106
107C<$locale_name> may be suffixed with a territory(country)
108code or a variant code, which are separated with C<'_'>.
109E.g. C<en_US> for English in USA,
110C<es_ES_traditional> for Spanish in Spain (Traditional),
111
112If C<$localename> is not defined,
113fallback is selected in the following order:
114
64dc7822
CBW
115 1. language_territory_variant
116 2. language_territory
117 3. language__variant
118 4. language
119 5. default
00e00351
CBW
120
121Tailoring tags provided by C<Unicode::Collate> are allowed
122as long as they are not used for C<'locale'> support.
123Esp. the C<table> tag is always untailorable
124since it is reserved for DUCET.
125
126E.g. a collator for French, which ignores diacritics and case difference
127(i.e. level 1), with reversed case ordering and no normalization.
128
129 Unicode::Collate::Locale->new(
64dc7822
CBW
130 level => 1,
131 locale => 'fr',
132 upper_before_lower => 1,
133 normalization => undef
00e00351
CBW
134 )
135
136=head2 Methods
137
138C<Unicode::Collate::Locale> is a subclass of C<Unicode::Collate>
139and methods other than C<new> are inherited from C<Unicode::Collate>.
140
141Here is a list of additional methods:
142
143=over 4
144
145=item C<$Collator-E<gt>getlocale>
146
147Returns a language code accepted and used actually on collation.
148If linguistic tailoring is not provided for a language code you passed
149(intensionally for some languages, or due to the incomplete implementation),
150this method returns a string C<'default'> meaning no special tailoring.
151
152=back
153
154=head2 A list of tailorable locales
155
64dc7822
CBW
156 locale name description
157 ----------------------------------------------------------
6484f676 158 af Afrikaans
f1a7422f 159 az Azerbaijani (Azeri)
64dc7822 160 ca Catalan
00e00351 161 cs Czech
6484f676
CBW
162 cy Welsh
163 da Danish
1393fe00 164 de__phonebook German (umlaut as 'ae', 'oe', 'ue')
456a1446 165 eo Esperanto
00e00351
CBW
166 es Spanish
167 es__traditional Spanish ('ch' and 'll' as a grapheme)
64dc7822
CBW
168 et Estonian
169 fi Finnish
f1a7422f 170 fil Filipino
6484f676 171 fo Faroese
00e00351 172 fr French
f1a7422f 173 ha Hausa
6484f676 174 haw Hawaiian
c02ee425
CBW
175 hr Croatian
176 ig Igbo
6484f676
CBW
177 is Icelandic
178 kl Kalaallisut
f1a7422f 179 lt Lithuanian
64dc7822 180 lv Latvian
f1a7422f 181 mt Maltese
456a1446 182 nb Norwegian Bokmal
00e00351 183 nn Norwegian Nynorsk
1393fe00
CBW
184 nso Northern Sotho
185 om Oromo
00e00351 186 pl Polish
456a1446 187 ro Romanian
64dc7822
CBW
188 sk Slovak
189 sl Slovenian
c02ee425 190 sq Albanian
456a1446 191 sv Swedish
6484f676 192 sw Swahili
1393fe00 193 tn Tswana
f1a7422f 194 tr Turkish
1393fe00 195 vi Vietnamese
f1a7422f
CBW
196 wo Wolof
197 yo Yoruba
198
199=head1 INSTALL
200
201Installation of Unicode::Collate::Locale requires F<Collate/Locale.pm>,
202F<Collate/Locale/*.pm> and F<Collate/allkeys.txt>. On building,
203Unicode::Collate::Locale doesn't require F<data/*.txt> and F<mklocale>.
204Tests for Unicode::Collate::Locale are named F<t/loc_*.t>.
00e00351 205
1393fe00
CBW
206=head1 CAVEAT
207
208=over 4
209
210=item tailoring is not maximum
211
212If a certain letter is tailored, its equivalents are not always
213tailored as well as it. For example, even though W is tailored,
214fullwidth W (C<U+FF37>), W with acute (C<U+1E82>), etc. are not
c02ee425
CBW
215tailored. The result may depend on whether source strings are
216normalized or not, and whether decomposed or composed.
217Thus C<(normalization =E<gt> undef> is less preferred.
1393fe00
CBW
218
219=back
220
00e00351
CBW
221=head1 AUTHOR
222
223The Unicode::Collate::Locale module for perl was written
224by SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>.
225This module is Copyright(C) 2004-2010, SADAHIRO Tomoyuki. Japan.
226All rights reserved.
227
228This module is free software; you can redistribute it and/or
229modify it under the same terms as Perl itself.
230
231=head1 SEE ALSO
232
233=over 4
234
235=item Unicode Collation Algorithm - UTS #10
236
237L<http://www.unicode.org/reports/tr10/>
238
239=item The Default Unicode Collation Element Table (DUCET)
240
241L<http://www.unicode.org/Public/UCA/latest/allkeys.txt>
242
243=item CLDR - Unicode Common Locale Data Repository
244
245L<http://cldr.unicode.org/>
246
247=item L<Unicode::Collate>
248
249=item L<Unicode::Normalize>
250
251=back
252
253=cut