This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
I sometimes outsmart myself.
[perl5.git] / lib / Locale / Language.pm
CommitLineData
47a334e9
JH
1#-----------------------------------------------------------------------
2
3=head1 NAME
4
5Locale::Language - ISO two letter codes for language identification (ISO 639)
6
7=head1 SYNOPSIS
8
9 use Locale::Language;
88c28ceb 10
47a334e9
JH
11 $lang = code2language('en'); # $lang gets 'English'
12 $code = language2code('French'); # $code gets 'fr'
88c28ceb 13
47a334e9
JH
14 @codes = all_language_codes();
15 @names = all_language_names();
16
17=cut
18
19#-----------------------------------------------------------------------
20
21package Locale::Language;
22use strict;
23require 5.002;
24
25#-----------------------------------------------------------------------
26
27=head1 DESCRIPTION
28
29The C<Locale::Language> module provides access to the ISO two-letter
30codes for identifying languages, as defined in ISO 639. You can either
31access the codes via the L<conversion routines> (described below),
32or with the two functions which return lists of all language codes or
33all language names.
34
35=cut
36
37#-----------------------------------------------------------------------
38
39require Exporter;
40
41#-----------------------------------------------------------------------
42# Public Global Variables
43#-----------------------------------------------------------------------
44use vars qw($VERSION @ISA @EXPORT);
45$VERSION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);
46@ISA = qw(Exporter);
47@EXPORT = qw(&code2language &language2code
48 &all_language_codes &all_language_names );
49
50#-----------------------------------------------------------------------
51# Private Global Variables
52#-----------------------------------------------------------------------
53my %CODES = ();
54my %LANGUAGES = ();
55
56
57#=======================================================================
58
59=head1 CONVERSION ROUTINES
60
61There are two conversion routines: C<code2language()> and C<language2code()>.
62
63=over 8
64
65=item code2language()
66
67This function takes a two letter language code and returns a string
68which contains the name of the language identified. If the code is
69not a valid language code, as defined by ISO 639, then C<undef>
70will be returned.
71
72 $lang = code2language($code);
73
74=item language2code()
75
76This function takes a language name and returns the corresponding
77two letter language code, if such exists.
78If the argument could not be identified as a language name,
79then C<undef> will be returned.
80
81 $code = language2code('French');
82
83The case of the language name is not important.
84See the section L<KNOWN BUGS AND LIMITATIONS> below.
85
86=back
87
88=cut
89
90#=======================================================================
91sub code2language
92{
93 my $code = shift;
94
95
96 return undef unless defined $code;
97 $code = lc($code);
98 if (exists $CODES{$code})
99 {
100 return $CODES{$code};
101 }
102 else
103 {
104 #---------------------------------------------------------------
105 # no such language code!
106 #---------------------------------------------------------------
107 return undef;
108 }
109}
110
111sub language2code
112{
113 my $lang = shift;
114
115
116 return undef unless defined $lang;
117 $lang = lc($lang);
118 if (exists $LANGUAGES{$lang})
119 {
120 return $LANGUAGES{$lang};
121 }
122 else
123 {
124 #---------------------------------------------------------------
125 # no such language!
126 #---------------------------------------------------------------
127 return undef;
128 }
129}
130
131#=======================================================================
132
133=head1 QUERY ROUTINES
134
135There are two function which can be used to obtain a list of all
136language codes, or all language names:
137
138=over 8
139
140=item C<all_language_codes()>
141
142Returns a list of all two-letter language codes.
143The codes are guaranteed to be all lower-case,
144and not in any particular order.
145
146=item C<all_language_names()>
147
148Returns a list of all language names for which there is a corresponding
149two-letter language code. The names are capitalised, and not returned
150in any particular order.
151
152=back
153
154=cut
155
156#=======================================================================
157sub all_language_codes
158{
159 return keys %CODES;
160}
161
162sub all_language_names
163{
164 return values %CODES;
165}
166
167#-----------------------------------------------------------------------
168
169=head1 EXAMPLES
170
171The following example illustrates use of the C<code2language()> function.
172The user is prompted for a language code, and then told the corresponding
173language name:
174
175 $| = 1; # turn off buffering
88c28ceb 176
47a334e9
JH
177 print "Enter language code: ";
178 chop($code = <STDIN>);
179 $lang = code2language($code);
180 if (defined $lang)
181 {
182 print "$code = $lang\n";
183 }
184 else
185 {
186 print "'$code' is not a valid language code!\n";
187 }
188
189=head1 KNOWN BUGS AND LIMITATIONS
190
191=over 4
192
193=item *
194
195In the current implementation, all data is read in when the
196module is loaded, and then held in memory.
197A lazy implementation would be more memory friendly.
198
199=item *
200
201Currently just supports the two letter language codes -
202there are also three-letter codes, and numbers.
203Would these be of any use to anyone?
204
205=back
206
207=head1 SEE ALSO
208
209=over 4
210
211=item Locale::Country
212
213ISO codes for identification of country (ISO 3166).
214Supports 2-letter, 3-letter, and numeric country codes.
215
216=item Locale::Currency
217
218ISO three letter codes for identification of currencies and funds (ISO 4217).
219
220=item ISO 639:1988 (E/F)
221
222Code for the representation of names of languages.
223
224=item http://lcweb.loc.gov/standards/iso639-2/langhome.html
225
226Home page for ISO 639-2
227
228=back
229
230
231=head1 AUTHOR
232
233Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt>
234
235=head1 COPYRIGHT
236
237Copyright (c) 1997-2001 Canon Research Centre Europe (CRE).
238
239This module is free software; you can redistribute it and/or
240modify it under the same terms as Perl itself.
241
242=cut
243
244#-----------------------------------------------------------------------
245
246#=======================================================================
247# initialisation code - stuff the DATA into the CODES hash
248#=======================================================================
249{
4c53e876
JH
250 no utf8; # __DATA__ contains Latin-1
251
47a334e9
JH
252 my $code;
253 my $language;
254
255
256 while (<DATA>)
257 {
4c53e876 258 next unless /\S/;
47a334e9
JH
259 chop;
260 ($code, $language) = split(/:/, $_, 2);
261 $CODES{$code} = $language;
262 $LANGUAGES{"\L$language"} = $code;
263 }
264}
265
2661;
267
268__DATA__
269aa:Afar
270ab:Abkhazian
271ae:Avestan
272af:Afrikaans
273am:Amharic
274ar:Arabic
275as:Assamese
276ay:Aymara
277az:Azerbaijani
278
279ba:Bashkir
280be:Belarusian
281bg:Bulgarian
282bh:Bihari
283bi:Bislama
284bn:Bengali
285bo:Tibetan
286br:Breton
287bs:Bosnian
288
289ca:Catalan
290ce:Chechen
291ch:Chamorro
292co:Corsican
293cs:Czech
294cu:Church Slavic
295cv:Chuvash
296cy:Welsh
297
298da:Danish
299de:German
300dz:Dzongkha
301
302el:Greek
303en:English
304eo:Esperanto
305es:Spanish
306et:Estonian
307eu:Basque
308
309fa:Persian
310fi:Finnish
311fj:Fijian
312fo:Faeroese
313fr:French
314fy:Frisian
315
316ga:Irish
317gd:Gaelic (Scots)
318gl:Gallegan
319gn:Guarani
320gu:Gujarati
321gv:Manx
322
323ha:Hausa
324he:Hebrew
325hi:Hindi
326ho:Hiri Motu
327hr:Croatian
328hu:Hungarian
329hy:Armenian
330hz:Herero
331
332ia:Interlingua
333id:Indonesian
334ie:Interlingue
335ik:Inupiaq
336is:Icelandic
337it:Italian
338iu:Inuktitut
339
340ja:Japanese
341jw:Javanese
342
343ka:Georgian
344ki:Kikuyu
345kj:Kuanyama
346kk:Kazakh
347kl:Kalaallisut
348km:Khmer
349kn:Kannada
350ko:Korean
351ks:Kashmiri
352ku:Kurdish
353kv:Komi
354kw:Cornish
355ky:Kirghiz
356
357la:Latin
358lb:Letzeburgesch
359ln:Lingala
360lo:Lao
361lt:Lithuanian
362lv:Latvian
363
364mg:Malagasy
365mh:Marshall
366mi:Maori
367mk:Macedonian
368ml:Malayalam
369mn:Mongolian
370mo:Moldavian
371mr:Marathi
372ms:Malay
373mt:Maltese
374my:Burmese
375
376na:Nauru
377nb:Norwegian Bokmål
378nd:Ndebele, North
379ne:Nepali
380ng:Ndonga
381nl:Dutch
382nn:Norwegian Nynorsk
383no:Norwegian
384nr:Ndebele, South
385nv:Navajo
386ny:Chichewa; Nyanja
387
388oc:Occitan (post 1500)
389om:Oromo
390or:Oriya
391os:Ossetian; Ossetic
392
393pa:Panjabi
394pi:Pali
395pl:Polish
396ps:Pushto
397pt:Portuguese
398
399qu:Quechua
400
401rm:Rhaeto-Romance
402rn:Rundi
403ro:Romanian
404ru:Russian
405rw:Kinyarwanda
406
407sa:Sanskrit
408sc:Sardinian
409sd:Sindhi
410se:Sami
411sg:Sango
412si:Sinhalese
413sk:Slovak
414sl:Slovenian
415sm:Samoan
416sn:Shona
417so:Somali
418sq:Albanian
419sr:Serbian
420ss:Swati
421st:Sotho
422su:Sundanese
423sv:Swedish
424sw:Swahili
425
426ta:Tamil
427te:Telugu
428tg:Tajik
429th:Thai
430ti:Tigrinya
431tk:Turkmen
432tl:Tagalog
433tn:Tswana
434to:Tonga
435tr:Turkish
436ts:Tsonga
437tt:Tatar
438tw:Twi
439
440ug:Uighur
441uk:Ukrainian
442ur:Urdu
443uz:Uzbek
444
445vi:Vietnamese
446vo:Volapük
447
448wo:Wolof
449
450xh:Xhosa
451
452yi:Yiddish
453yo:Yoruba
454
455za:Zhuang
456zh:Chinese
457zu:Zulu