Commit | Line | Data |
---|---|---|
47a334e9 JH |
1 | #----------------------------------------------------------------------- |
2 | ||
3 | =head1 NAME | |
4 | ||
5 | Locale::Language - ISO two letter codes for language identification (ISO 639) | |
6 | ||
7 | =head1 SYNOPSIS | |
8 | ||
9 | use Locale::Language; | |
88c28ceb | 10 | |
47a334e9 JH |
11 | $lang = code2language('en'); # $lang gets 'English' |
12 | $code = language2code('French'); # $code gets 'fr' | |
88c28ceb | 13 | |
47a334e9 JH |
14 | @codes = all_language_codes(); |
15 | @names = all_language_names(); | |
16 | ||
17 | =cut | |
18 | ||
19 | #----------------------------------------------------------------------- | |
20 | ||
21 | package Locale::Language; | |
22 | use strict; | |
23 | require 5.002; | |
24 | ||
25 | #----------------------------------------------------------------------- | |
26 | ||
27 | =head1 DESCRIPTION | |
28 | ||
29 | The C<Locale::Language> module provides access to the ISO two-letter | |
30 | codes for identifying languages, as defined in ISO 639. You can either | |
31 | access the codes via the L<conversion routines> (described below), | |
32 | or with the two functions which return lists of all language codes or | |
33 | all language names. | |
34 | ||
35 | =cut | |
36 | ||
37 | #----------------------------------------------------------------------- | |
38 | ||
39 | require Exporter; | |
40 | ||
41 | #----------------------------------------------------------------------- | |
42 | # Public Global Variables | |
43 | #----------------------------------------------------------------------- | |
44 | use vars qw($VERSION @ISA @EXPORT); | |
45 | $VERSION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/); | |
46 | @ISA = qw(Exporter); | |
47 | @EXPORT = qw(&code2language &language2code | |
48 | &all_language_codes &all_language_names ); | |
49 | ||
50 | #----------------------------------------------------------------------- | |
51 | # Private Global Variables | |
52 | #----------------------------------------------------------------------- | |
53 | my %CODES = (); | |
54 | my %LANGUAGES = (); | |
55 | ||
56 | ||
57 | #======================================================================= | |
58 | ||
59 | =head1 CONVERSION ROUTINES | |
60 | ||
61 | There are two conversion routines: C<code2language()> and C<language2code()>. | |
62 | ||
63 | =over 8 | |
64 | ||
65 | =item code2language() | |
66 | ||
67 | This function takes a two letter language code and returns a string | |
68 | which contains the name of the language identified. If the code is | |
69 | not a valid language code, as defined by ISO 639, then C<undef> | |
70 | will be returned. | |
71 | ||
72 | $lang = code2language($code); | |
73 | ||
74 | =item language2code() | |
75 | ||
76 | This function takes a language name and returns the corresponding | |
77 | two letter language code, if such exists. | |
78 | If the argument could not be identified as a language name, | |
79 | then C<undef> will be returned. | |
80 | ||
81 | $code = language2code('French'); | |
82 | ||
83 | The case of the language name is not important. | |
84 | See the section L<KNOWN BUGS AND LIMITATIONS> below. | |
85 | ||
86 | =back | |
87 | ||
88 | =cut | |
89 | ||
90 | #======================================================================= | |
91 | sub code2language | |
92 | { | |
93 | my $code = shift; | |
94 | ||
95 | ||
96 | return undef unless defined $code; | |
97 | $code = lc($code); | |
98 | if (exists $CODES{$code}) | |
99 | { | |
100 | return $CODES{$code}; | |
101 | } | |
102 | else | |
103 | { | |
104 | #--------------------------------------------------------------- | |
105 | # no such language code! | |
106 | #--------------------------------------------------------------- | |
107 | return undef; | |
108 | } | |
109 | } | |
110 | ||
111 | sub language2code | |
112 | { | |
113 | my $lang = shift; | |
114 | ||
115 | ||
116 | return undef unless defined $lang; | |
117 | $lang = lc($lang); | |
118 | if (exists $LANGUAGES{$lang}) | |
119 | { | |
120 | return $LANGUAGES{$lang}; | |
121 | } | |
122 | else | |
123 | { | |
124 | #--------------------------------------------------------------- | |
125 | # no such language! | |
126 | #--------------------------------------------------------------- | |
127 | return undef; | |
128 | } | |
129 | } | |
130 | ||
131 | #======================================================================= | |
132 | ||
133 | =head1 QUERY ROUTINES | |
134 | ||
135 | There are two function which can be used to obtain a list of all | |
136 | language codes, or all language names: | |
137 | ||
138 | =over 8 | |
139 | ||
140 | =item C<all_language_codes()> | |
141 | ||
142 | Returns a list of all two-letter language codes. | |
143 | The codes are guaranteed to be all lower-case, | |
144 | and not in any particular order. | |
145 | ||
146 | =item C<all_language_names()> | |
147 | ||
148 | Returns a list of all language names for which there is a corresponding | |
149 | two-letter language code. The names are capitalised, and not returned | |
150 | in any particular order. | |
151 | ||
152 | =back | |
153 | ||
154 | =cut | |
155 | ||
156 | #======================================================================= | |
157 | sub all_language_codes | |
158 | { | |
159 | return keys %CODES; | |
160 | } | |
161 | ||
162 | sub all_language_names | |
163 | { | |
164 | return values %CODES; | |
165 | } | |
166 | ||
167 | #----------------------------------------------------------------------- | |
168 | ||
169 | =head1 EXAMPLES | |
170 | ||
171 | The following example illustrates use of the C<code2language()> function. | |
172 | The user is prompted for a language code, and then told the corresponding | |
173 | language name: | |
174 | ||
175 | $| = 1; # turn off buffering | |
88c28ceb | 176 | |
47a334e9 JH |
177 | print "Enter language code: "; |
178 | chop($code = <STDIN>); | |
179 | $lang = code2language($code); | |
180 | if (defined $lang) | |
181 | { | |
182 | print "$code = $lang\n"; | |
183 | } | |
184 | else | |
185 | { | |
186 | print "'$code' is not a valid language code!\n"; | |
187 | } | |
188 | ||
189 | =head1 KNOWN BUGS AND LIMITATIONS | |
190 | ||
191 | =over 4 | |
192 | ||
193 | =item * | |
194 | ||
195 | In the current implementation, all data is read in when the | |
196 | module is loaded, and then held in memory. | |
197 | A lazy implementation would be more memory friendly. | |
198 | ||
199 | =item * | |
200 | ||
201 | Currently just supports the two letter language codes - | |
202 | there are also three-letter codes, and numbers. | |
203 | Would these be of any use to anyone? | |
204 | ||
205 | =back | |
206 | ||
207 | =head1 SEE ALSO | |
208 | ||
209 | =over 4 | |
210 | ||
211 | =item Locale::Country | |
212 | ||
213 | ISO codes for identification of country (ISO 3166). | |
214 | Supports 2-letter, 3-letter, and numeric country codes. | |
215 | ||
216 | =item Locale::Currency | |
217 | ||
218 | ISO three letter codes for identification of currencies and funds (ISO 4217). | |
219 | ||
220 | =item ISO 639:1988 (E/F) | |
221 | ||
222 | Code for the representation of names of languages. | |
223 | ||
224 | =item http://lcweb.loc.gov/standards/iso639-2/langhome.html | |
225 | ||
226 | Home page for ISO 639-2 | |
227 | ||
228 | =back | |
229 | ||
230 | ||
231 | =head1 AUTHOR | |
232 | ||
233 | Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt> | |
234 | ||
235 | =head1 COPYRIGHT | |
236 | ||
237 | Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). | |
238 | ||
239 | This module is free software; you can redistribute it and/or | |
240 | modify it under the same terms as Perl itself. | |
241 | ||
242 | =cut | |
243 | ||
244 | #----------------------------------------------------------------------- | |
245 | ||
246 | #======================================================================= | |
247 | # initialisation code - stuff the DATA into the CODES hash | |
248 | #======================================================================= | |
249 | { | |
4c53e876 JH |
250 | no utf8; # __DATA__ contains Latin-1 |
251 | ||
47a334e9 JH |
252 | my $code; |
253 | my $language; | |
254 | ||
255 | ||
256 | while (<DATA>) | |
257 | { | |
4c53e876 | 258 | next unless /\S/; |
47a334e9 JH |
259 | chop; |
260 | ($code, $language) = split(/:/, $_, 2); | |
261 | $CODES{$code} = $language; | |
262 | $LANGUAGES{"\L$language"} = $code; | |
263 | } | |
264 | } | |
265 | ||
266 | 1; | |
267 | ||
268 | __DATA__ | |
269 | aa:Afar | |
270 | ab:Abkhazian | |
271 | ae:Avestan | |
272 | af:Afrikaans | |
273 | am:Amharic | |
274 | ar:Arabic | |
275 | as:Assamese | |
276 | ay:Aymara | |
277 | az:Azerbaijani | |
278 | ||
279 | ba:Bashkir | |
280 | be:Belarusian | |
281 | bg:Bulgarian | |
282 | bh:Bihari | |
283 | bi:Bislama | |
284 | bn:Bengali | |
285 | bo:Tibetan | |
286 | br:Breton | |
287 | bs:Bosnian | |
288 | ||
289 | ca:Catalan | |
290 | ce:Chechen | |
291 | ch:Chamorro | |
292 | co:Corsican | |
293 | cs:Czech | |
294 | cu:Church Slavic | |
295 | cv:Chuvash | |
296 | cy:Welsh | |
297 | ||
298 | da:Danish | |
299 | de:German | |
300 | dz:Dzongkha | |
301 | ||
302 | el:Greek | |
303 | en:English | |
304 | eo:Esperanto | |
305 | es:Spanish | |
306 | et:Estonian | |
307 | eu:Basque | |
308 | ||
309 | fa:Persian | |
310 | fi:Finnish | |
311 | fj:Fijian | |
312 | fo:Faeroese | |
313 | fr:French | |
314 | fy:Frisian | |
315 | ||
316 | ga:Irish | |
317 | gd:Gaelic (Scots) | |
318 | gl:Gallegan | |
319 | gn:Guarani | |
320 | gu:Gujarati | |
321 | gv:Manx | |
322 | ||
323 | ha:Hausa | |
324 | he:Hebrew | |
325 | hi:Hindi | |
326 | ho:Hiri Motu | |
327 | hr:Croatian | |
328 | hu:Hungarian | |
329 | hy:Armenian | |
330 | hz:Herero | |
331 | ||
332 | ia:Interlingua | |
333 | id:Indonesian | |
334 | ie:Interlingue | |
335 | ik:Inupiaq | |
336 | is:Icelandic | |
337 | it:Italian | |
338 | iu:Inuktitut | |
339 | ||
340 | ja:Japanese | |
341 | jw:Javanese | |
342 | ||
343 | ka:Georgian | |
344 | ki:Kikuyu | |
345 | kj:Kuanyama | |
346 | kk:Kazakh | |
347 | kl:Kalaallisut | |
348 | km:Khmer | |
349 | kn:Kannada | |
350 | ko:Korean | |
351 | ks:Kashmiri | |
352 | ku:Kurdish | |
353 | kv:Komi | |
354 | kw:Cornish | |
355 | ky:Kirghiz | |
356 | ||
357 | la:Latin | |
358 | lb:Letzeburgesch | |
359 | ln:Lingala | |
360 | lo:Lao | |
361 | lt:Lithuanian | |
362 | lv:Latvian | |
363 | ||
364 | mg:Malagasy | |
365 | mh:Marshall | |
366 | mi:Maori | |
367 | mk:Macedonian | |
368 | ml:Malayalam | |
369 | mn:Mongolian | |
370 | mo:Moldavian | |
371 | mr:Marathi | |
372 | ms:Malay | |
373 | mt:Maltese | |
374 | my:Burmese | |
375 | ||
376 | na:Nauru | |
377 | nb:Norwegian Bokmål | |
378 | nd:Ndebele, North | |
379 | ne:Nepali | |
380 | ng:Ndonga | |
381 | nl:Dutch | |
382 | nn:Norwegian Nynorsk | |
383 | no:Norwegian | |
384 | nr:Ndebele, South | |
385 | nv:Navajo | |
386 | ny:Chichewa; Nyanja | |
387 | ||
388 | oc:Occitan (post 1500) | |
389 | om:Oromo | |
390 | or:Oriya | |
391 | os:Ossetian; Ossetic | |
392 | ||
393 | pa:Panjabi | |
394 | pi:Pali | |
395 | pl:Polish | |
396 | ps:Pushto | |
397 | pt:Portuguese | |
398 | ||
399 | qu:Quechua | |
400 | ||
401 | rm:Rhaeto-Romance | |
402 | rn:Rundi | |
403 | ro:Romanian | |
404 | ru:Russian | |
405 | rw:Kinyarwanda | |
406 | ||
407 | sa:Sanskrit | |
408 | sc:Sardinian | |
409 | sd:Sindhi | |
410 | se:Sami | |
411 | sg:Sango | |
412 | si:Sinhalese | |
413 | sk:Slovak | |
414 | sl:Slovenian | |
415 | sm:Samoan | |
416 | sn:Shona | |
417 | so:Somali | |
418 | sq:Albanian | |
419 | sr:Serbian | |
420 | ss:Swati | |
421 | st:Sotho | |
422 | su:Sundanese | |
423 | sv:Swedish | |
424 | sw:Swahili | |
425 | ||
426 | ta:Tamil | |
427 | te:Telugu | |
428 | tg:Tajik | |
429 | th:Thai | |
430 | ti:Tigrinya | |
431 | tk:Turkmen | |
432 | tl:Tagalog | |
433 | tn:Tswana | |
434 | to:Tonga | |
435 | tr:Turkish | |
436 | ts:Tsonga | |
437 | tt:Tatar | |
438 | tw:Twi | |
439 | ||
440 | ug:Uighur | |
441 | uk:Ukrainian | |
442 | ur:Urdu | |
443 | uz:Uzbek | |
444 | ||
445 | vi:Vietnamese | |
446 | vo:Volapük | |
447 | ||
448 | wo:Wolof | |
449 | ||
450 | xh:Xhosa | |
451 | ||
452 | yi:Yiddish | |
453 | yo:Yoruba | |
454 | ||
455 | za:Zhuang | |
456 | zh:Chinese | |
457 | zu:Zulu |