This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove NEXT's README and Changes files
[perl5.git] / cpan / Unicode-Collate / Collate / Locale.pm
CommitLineData
00e00351
CBW
1package Unicode::Collate::Locale;
2
3use strict;
4use Carp;
5use base qw(Unicode::Collate);
6
6484f676 7our $VERSION = '0.58';
00e00351
CBW
8
9use File::Spec;
10
11(my $ModPath = $INC{'Unicode/Collate/Locale.pm'}) =~ s/\.pm$//;
12my $KeyPath = File::Spec->catfile('allkeys.txt');
13my $PL_EXT = '.pl';
14
64dc7822 15my %LocaleFile = map { ($_, $_) } qw(
6484f676 16 af ca cs cy da eo es et fi fo fr haw is kl lv nn pl ro sk sl sv sw
64dc7822 17);
456a1446
CBW
18 $LocaleFile{'default'} = '';
19 $LocaleFile{'es__traditional'} = 'es_trad';
20 $LocaleFile{'nb'} = 'nn';
00e00351
CBW
21
22sub _locale {
23 my $locale = shift;
24 if ($locale) {
25 $locale = lc $locale;
26 $locale =~ tr/\-\ \./_/;
456a1446 27 $locale =~ s/_trad\z/_traditional/;
00e00351
CBW
28 $LocaleFile{$locale} and return $locale;
29
30 my ($l,$t,$v) = split(/_/, $locale.'__');
456a1446 31 for my $loc ("${l}_${t}_$v", "${l}_$t", "${l}__$v", "${l}__$t", $l) {
00e00351
CBW
32 $LocaleFile{$loc} and return $loc;
33 }
34 }
35 return 'default';
36}
37
38sub getlocale {
39 return shift->{accepted_locale};
40}
41
6484f676
CBW
42sub _fetch_locale {
43 my $accepted = shift;
44 my $f = $LocaleFile{$accepted};
45 return if !$f;
46 $f .= $PL_EXT;
47 my $path = File::Spec->catfile($ModPath, $f);
48 my $h = do $path;
49 croak "Unicode/Collate/Locale/$f can't be found" if !$h;
50 return $h;
51}
52
00e00351
CBW
53sub new {
54 my $class = shift;
55 my %hash = @_;
00e00351
CBW
56 $hash{accepted_locale} = _locale($hash{locale});
57
64dc7822
CBW
58 if (exists $hash{table}) {
59 croak "your table can't be used with Unicode::Collate::Locale";
60 }
6484f676 61 $hash{table} = $KeyPath;
00e00351 62
6484f676 63 my $href = _fetch_locale($hash{accepted_locale});
00e00351
CBW
64 while (my($k,$v) = each %$href) {
65 if (exists $hash{$k}) {
66 croak "$k is reserved by $hash{locale}, can't be overwritten";
67 }
68 $hash{$k} = $v;
69 }
70 return $class->SUPER::new(%hash);
71}
72
731;
74__END__
75
76=head1 NAME
77
78Unicode::Collate::Locale - Linguistic tailoring for DUCET via Unicode::Collate
79
80=head1 SYNOPSIS
81
82 use Unicode::Collate::Locale;
83
84 $Collator = Unicode::Collate::Locale->
85 new(locale => $locale_name, %tailoring);
86
87 @sorted = $Collator->sort(@not_sorted);
88
89=head1 DESCRIPTION
90
91This module provides linguistic tailoring for it
92taking advantage of C<Unicode::Collate>.
93
94=head2 Constructor
95
96The C<new> method returns a collator object.
97
98A parameter list for the constructor is a hash, which can include
99a special key C<'locale'> and its value (case-insensitive) standing
100for a two-letter language code (ISO-639) like C<'en'> for English.
101For example, C<Unicode::Collate::Locale-E<gt>new(locale =E<gt> 'FR')>
102returns a collator tailored for French.
103
104C<$locale_name> may be suffixed with a territory(country)
105code or a variant code, which are separated with C<'_'>.
106E.g. C<en_US> for English in USA,
107C<es_ES_traditional> for Spanish in Spain (Traditional),
108
109If C<$localename> is not defined,
110fallback is selected in the following order:
111
64dc7822
CBW
112 1. language_territory_variant
113 2. language_territory
114 3. language__variant
115 4. language
116 5. default
00e00351
CBW
117
118Tailoring tags provided by C<Unicode::Collate> are allowed
119as long as they are not used for C<'locale'> support.
120Esp. the C<table> tag is always untailorable
121since it is reserved for DUCET.
122
123E.g. a collator for French, which ignores diacritics and case difference
124(i.e. level 1), with reversed case ordering and no normalization.
125
126 Unicode::Collate::Locale->new(
64dc7822
CBW
127 level => 1,
128 locale => 'fr',
129 upper_before_lower => 1,
130 normalization => undef
00e00351
CBW
131 )
132
133=head2 Methods
134
135C<Unicode::Collate::Locale> is a subclass of C<Unicode::Collate>
136and methods other than C<new> are inherited from C<Unicode::Collate>.
137
138Here is a list of additional methods:
139
140=over 4
141
142=item C<$Collator-E<gt>getlocale>
143
144Returns a language code accepted and used actually on collation.
145If linguistic tailoring is not provided for a language code you passed
146(intensionally for some languages, or due to the incomplete implementation),
147this method returns a string C<'default'> meaning no special tailoring.
148
149=back
150
151=head2 A list of tailorable locales
152
64dc7822
CBW
153 locale name description
154 ----------------------------------------------------------
6484f676 155 af Afrikaans
64dc7822 156 ca Catalan
00e00351 157 cs Czech
6484f676
CBW
158 cy Welsh
159 da Danish
456a1446 160 eo Esperanto
00e00351
CBW
161 es Spanish
162 es__traditional Spanish ('ch' and 'll' as a grapheme)
64dc7822
CBW
163 et Estonian
164 fi Finnish
6484f676 165 fo Faroese
00e00351 166 fr French
6484f676
CBW
167 haw Hawaiian
168 is Icelandic
169 kl Kalaallisut
64dc7822 170 lv Latvian
456a1446 171 nb Norwegian Bokmal
00e00351
CBW
172 nn Norwegian Nynorsk
173 pl Polish
456a1446 174 ro Romanian
64dc7822
CBW
175 sk Slovak
176 sl Slovenian
456a1446 177 sv Swedish
6484f676 178 sw Swahili
00e00351
CBW
179
180=head1 AUTHOR
181
182The Unicode::Collate::Locale module for perl was written
183by SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>.
184This module is Copyright(C) 2004-2010, SADAHIRO Tomoyuki. Japan.
185All rights reserved.
186
187This module is free software; you can redistribute it and/or
188modify it under the same terms as Perl itself.
189
190=head1 SEE ALSO
191
192=over 4
193
194=item Unicode Collation Algorithm - UTS #10
195
196L<http://www.unicode.org/reports/tr10/>
197
198=item The Default Unicode Collation Element Table (DUCET)
199
200L<http://www.unicode.org/Public/UCA/latest/allkeys.txt>
201
202=item CLDR - Unicode Common Locale Data Repository
203
204L<http://cldr.unicode.org/>
205
206=item L<Unicode::Collate>
207
208=item L<Unicode::Normalize>
209
210=back
211
212=cut