[perl5.git] / lib / Locale / Language.pm

#-----------------------------------------------------------------------

=head1 NAME

Locale::Language - ISO two letter codes for language identification (ISO 639)

=head1 SYNOPSIS

    use Locale::Language;
    
    $lang = code2language('en');        # $lang gets 'English'
    $code = language2code('French');    # $code gets 'fr'
    
    @codes   = all_language_codes();
    @names   = all_language_names();

=cut

#-----------------------------------------------------------------------

package Locale::Language;
use strict;
require 5.002;

#-----------------------------------------------------------------------

=head1 DESCRIPTION

The C<Locale::Language> module provides access to the ISO two-letter
codes for identifying languages, as defined in ISO 639. You can either
access the codes via the L<conversion routines> (described below),
or with the two functions which return lists of all language codes or
all language names.

=cut

#-----------------------------------------------------------------------

require Exporter;

#-----------------------------------------------------------------------
#	Public Global Variables
#-----------------------------------------------------------------------
use vars qw($VERSION @ISA @EXPORT);
$VERSION      = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);
@ISA          = qw(Exporter);
@EXPORT       = qw(&code2language &language2code
                   &all_language_codes &all_language_names );

#-----------------------------------------------------------------------
#	Private Global Variables
#-----------------------------------------------------------------------
my %CODES     = ();
my %LANGUAGES = ();


#=======================================================================

=head1 CONVERSION ROUTINES

There are two conversion routines: C<code2language()> and C<language2code()>.

=over 8

=item code2language()

This function takes a two letter language code and returns a string
which contains the name of the language identified. If the code is
not a valid language code, as defined by ISO 639, then C<undef>
will be returned.

    $lang = code2language($code);

=item language2code()

This function takes a language name and returns the corresponding
two letter language code, if such exists.
If the argument could not be identified as a language name,
then C<undef> will be returned.

    $code = language2code('French');

The case of the language name is not important.
See the section L<KNOWN BUGS AND LIMITATIONS> below.

=back

=cut

#=======================================================================
sub code2language
{
    my $code = shift;


    return undef unless defined $code;
    $code = lc($code);
    if (exists $CODES{$code})
    {
        return $CODES{$code};
    }
    else
    {
        #---------------------------------------------------------------
        # no such language code!
        #---------------------------------------------------------------
        return undef;
    }
}

sub language2code
{
    my $lang = shift;


    return undef unless defined $lang;
    $lang = lc($lang);
    if (exists $LANGUAGES{$lang})
    {
        return $LANGUAGES{$lang};
    }
    else
    {
        #---------------------------------------------------------------
        # no such language!
        #---------------------------------------------------------------
        return undef;
    }
}

#=======================================================================

=head1 QUERY ROUTINES

There are two function which can be used to obtain a list of all
language codes, or all language names:

=over 8

=item C<all_language_codes()>

Returns a list of all two-letter language codes.
The codes are guaranteed to be all lower-case,
and not in any particular order.

=item C<all_language_names()>

Returns a list of all language names for which there is a corresponding
two-letter language code. The names are capitalised, and not returned
in any particular order.

=back

=cut

#=======================================================================
sub all_language_codes
{
    return keys %CODES;
}

sub all_language_names
{
    return values %CODES;
}

#-----------------------------------------------------------------------

=head1 EXAMPLES

The following example illustrates use of the C<code2language()> function.
The user is prompted for a language code, and then told the corresponding
language name:

    $| = 1;    # turn off buffering
    
    print "Enter language code: ";
    chop($code = <STDIN>);
    $lang = code2language($code);
    if (defined $lang)
    {
        print "$code = $lang\n";
    }
    else
    {
        print "'$code' is not a valid language code!\n";
    }

=head1 KNOWN BUGS AND LIMITATIONS

=over 4

=item *

In the current implementation, all data is read in when the
module is loaded, and then held in memory.
A lazy implementation would be more memory friendly.

=item *

Currently just supports the two letter language codes -
there are also three-letter codes, and numbers.
Would these be of any use to anyone?

=back

=head1 SEE ALSO

=over 4

=item Locale::Country

ISO codes for identification of country (ISO 3166).
Supports 2-letter, 3-letter, and numeric country codes.

=item Locale::Currency

ISO three letter codes for identification of currencies and funds (ISO 4217).

=item ISO 639:1988 (E/F)

Code for the representation of names of languages.

=item http://lcweb.loc.gov/standards/iso639-2/langhome.html

Home page for ISO 639-2

=back


=head1 AUTHOR

Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt>

=head1 COPYRIGHT

Copyright (c) 1997-2001 Canon Research Centre Europe (CRE).

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

#-----------------------------------------------------------------------

#=======================================================================
# initialisation code - stuff the DATA into the CODES hash
#=======================================================================
{
    no utf8; # __DATA__ contains Latin-1

    my $code;
    my $language;


    while (<DATA>)
    {
	next unless /\S/;
        chop;
        ($code, $language) = split(/:/, $_, 2);
        $CODES{$code} = $language;
        $LANGUAGES{"\L$language"} = $code;
    }
}

1;

__DATA__
aa:Afar
ab:Abkhazian
ae:Avestan
af:Afrikaans
am:Amharic
ar:Arabic
as:Assamese
ay:Aymara
az:Azerbaijani

ba:Bashkir
be:Belarusian
bg:Bulgarian
bh:Bihari
bi:Bislama
bn:Bengali
bo:Tibetan
br:Breton
bs:Bosnian

ca:Catalan
ce:Chechen
ch:Chamorro
co:Corsican
cs:Czech
cu:Church Slavic
cv:Chuvash
cy:Welsh

da:Danish
de:German
dz:Dzongkha

el:Greek
en:English
eo:Esperanto
es:Spanish
et:Estonian
eu:Basque

fa:Persian
fi:Finnish
fj:Fijian
fo:Faeroese
fr:French
fy:Frisian

ga:Irish
gd:Gaelic (Scots)
gl:Gallegan
gn:Guarani
gu:Gujarati
gv:Manx

ha:Hausa
he:Hebrew
hi:Hindi
ho:Hiri Motu
hr:Croatian
hu:Hungarian
hy:Armenian
hz:Herero

ia:Interlingua
id:Indonesian
ie:Interlingue
ik:Inupiaq
is:Icelandic
it:Italian
iu:Inuktitut

ja:Japanese
jw:Javanese

ka:Georgian
ki:Kikuyu
kj:Kuanyama
kk:Kazakh
kl:Kalaallisut
km:Khmer
kn:Kannada
ko:Korean
ks:Kashmiri
ku:Kurdish
kv:Komi
kw:Cornish
ky:Kirghiz

la:Latin
lb:Letzeburgesch
ln:Lingala
lo:Lao
lt:Lithuanian
lv:Latvian

mg:Malagasy
mh:Marshall
mi:Maori
mk:Macedonian
ml:Malayalam
mn:Mongolian
mo:Moldavian
mr:Marathi
ms:Malay
mt:Maltese
my:Burmese

na:Nauru
nb:Norwegian Bokmål
nd:Ndebele, North
ne:Nepali
ng:Ndonga
nl:Dutch
nn:Norwegian Nynorsk
no:Norwegian
nr:Ndebele, South
nv:Navajo
ny:Chichewa; Nyanja

oc:Occitan (post 1500)
om:Oromo
or:Oriya
os:Ossetian; Ossetic

pa:Panjabi
pi:Pali
pl:Polish
ps:Pushto
pt:Portuguese

qu:Quechua

rm:Rhaeto-Romance
rn:Rundi
ro:Romanian
ru:Russian
rw:Kinyarwanda

sa:Sanskrit
sc:Sardinian
sd:Sindhi
se:Sami
sg:Sango
si:Sinhalese
sk:Slovak
sl:Slovenian
sm:Samoan
sn:Shona
so:Somali
sq:Albanian
sr:Serbian
ss:Swati
st:Sotho
su:Sundanese
sv:Swedish
sw:Swahili

ta:Tamil
te:Telugu
tg:Tajik
th:Thai
ti:Tigrinya
tk:Turkmen
tl:Tagalog
tn:Tswana
to:Tonga
tr:Turkish
ts:Tsonga
tt:Tatar
tw:Twi

ug:Uighur
uk:Ukrainian
ur:Urdu
uz:Uzbek

vi:Vietnamese
vo:Volapük

wo:Wolof

xh:Xhosa

yi:Yiddish
yo:Yoruba

za:Zhuang
zh:Chinese
zu:Zulu
Commit	Line	Data
47a334e9 JH	1	#-----------------------------------------------------------------------
	2
	3	=head1 NAME
	4
	5	Locale::Language - ISO two letter codes for language identification (ISO 639)
	6
	7	=head1 SYNOPSIS
	8
	9	use Locale::Language;
88c28ceb	10
47a334e9 JH	11	$lang = code2language('en'); # $lang gets 'English'
47a334e9 JH	12	$code = language2code('French'); # $code gets 'fr'
88c28ceb	13
47a334e9 JH	14	@codes = all_language_codes();
	15	@names = all_language_names();
	16
	17	=cut
	18
	19	#-----------------------------------------------------------------------
	20
	21	package Locale::Language;
	22	use strict;
	23	require 5.002;
	24
	25	#-----------------------------------------------------------------------
	26
	27	=head1 DESCRIPTION
	28
	29	The C<Locale::Language> module provides access to the ISO two-letter
	30	codes for identifying languages, as defined in ISO 639. You can either
	31	access the codes via the L<conversion routines> (described below),
	32	or with the two functions which return lists of all language codes or
	33	all language names.
	34
	35	=cut
	36
	37	#-----------------------------------------------------------------------
	38
	39	require Exporter;
	40
	41	#-----------------------------------------------------------------------
	42	# Public Global Variables
	43	#-----------------------------------------------------------------------
	44	use vars qw($VERSION @ISA @EXPORT);
	45	$VERSION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);
	46	@ISA = qw(Exporter);
	47	@EXPORT = qw(&code2language &language2code
	48	&all_language_codes &all_language_names );
	49
	50	#-----------------------------------------------------------------------
	51	# Private Global Variables
	52	#-----------------------------------------------------------------------
	53	my %CODES = ();
	54	my %LANGUAGES = ();
	55
	56
	57	#=======================================================================
	58
	59	=head1 CONVERSION ROUTINES
	60
	61	There are two conversion routines: C<code2language()> and C<language2code()>.
	62
	63	=over 8
	64
	65	=item code2language()
	66
	67	This function takes a two letter language code and returns a string
	68	which contains the name of the language identified. If the code is
	69	not a valid language code, as defined by ISO 639, then C<undef>
	70	will be returned.
	71
	72	$lang = code2language($code);
	73
	74	=item language2code()
	75
	76	This function takes a language name and returns the corresponding
	77	two letter language code, if such exists.
78	If the argument could not be identified as a language name,
79	then C<undef> will be returned.
80
81	$code = language2code('French');
82
83	The case of the language name is not important.
84	See the section L<KNOWN BUGS AND LIMITATIONS> below.
85
86	=back
87
88	=cut
89
90	#=======================================================================
91	sub code2language
92	{
93	my $code = shift;
94
95
96	return undef unless defined $code;
97	$code = lc($code);
98	if (exists $CODES{$code})
99	{
100	return $CODES{$code};
101	}
102	else
103	{
104	#---------------------------------------------------------------
105	# no such language code!
106	#---------------------------------------------------------------
107	return undef;
108	}
109	}
110
111	sub language2code
112	{
113	my $lang = shift;
114
115
116	return undef unless defined $lang;
117	$lang = lc($lang);
118	if (exists $LANGUAGES{$lang})
119	{
120	return $LANGUAGES{$lang};
121	}
122	else
123	{
124	#---------------------------------------------------------------
125	# no such language!
126	#---------------------------------------------------------------
127	return undef;
128	}
129	}
130
131	#=======================================================================
132
133	=head1 QUERY ROUTINES
134
135	There are two function which can be used to obtain a list of all
136	language codes, or all language names:
137
138	=over 8
139
140	=item C<all_language_codes()>
141
142	Returns a list of all two-letter language codes.
143	The codes are guaranteed to be all lower-case,
144	and not in any particular order.
145
146	=item C<all_language_names()>
147
148	Returns a list of all language names for which there is a corresponding
149	two-letter language code. The names are capitalised, and not returned
150	in any particular order.
151
152	=back
153
154	=cut
155
156	#=======================================================================
157	sub all_language_codes
158	{
159	return keys %CODES;
160	}
161
162	sub all_language_names
163	{
164	return values %CODES;
165	}
166
167	#-----------------------------------------------------------------------
168
169	=head1 EXAMPLES
170
171	The following example illustrates use of the C<code2language()> function.
172	The user is prompted for a language code, and then told the corresponding
173	language name:
174
175	$\| = 1; # turn off buffering
88c28ceb	176
47a334e9 JH	177	print "Enter language code: ";
	178	chop($code = <STDIN>);
	179	$lang = code2language($code);
	180	if (defined $lang)
	181	{
	182	print "$code = $lang\n";
	183	}
	184	else
	185	{
	186	print "'$code' is not a valid language code!\n";
	187	}
	188
	189	=head1 KNOWN BUGS AND LIMITATIONS
	190
	191	=over 4
	192
	193	=item *
	194
	195	In the current implementation, all data is read in when the
	196	module is loaded, and then held in memory.
	197	A lazy implementation would be more memory friendly.
	198
	199	=item *
	200
	201	Currently just supports the two letter language codes -
	202	there are also three-letter codes, and numbers.
	203	Would these be of any use to anyone?
	204
	205	=back
	206
	207	=head1 SEE ALSO
	208
	209	=over 4
	210
	211	=item Locale::Country
	212
	213	ISO codes for identification of country (ISO 3166).
	214	Supports 2-letter, 3-letter, and numeric country codes.
	215
	216	=item Locale::Currency
	217
	218	ISO three letter codes for identification of currencies and funds (ISO 4217).
	219
	220	=item ISO 639:1988 (E/F)
	221
	222	Code for the representation of names of languages.
	223
	224	=item http://lcweb.loc.gov/standards/iso639-2/langhome.html
	225
	226	Home page for ISO 639-2
	227
	228	=back
	229
	230
	231	=head1 AUTHOR
	232
	233	Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt>
	234
	235	=head1 COPYRIGHT
	236
	237	Copyright (c) 1997-2001 Canon Research Centre Europe (CRE).
	238
	239	This module is free software; you can redistribute it and/or
	240	modify it under the same terms as Perl itself.
241
242	=cut
243
244	#-----------------------------------------------------------------------
245
246	#=======================================================================
247	# initialisation code - stuff the DATA into the CODES hash
248	#=======================================================================
249	{
4c53e876 JH	250	no utf8; # __DATA__ contains Latin-1
4c53e876 JH	251
47a334e9 JH	252	my $code;
	253	my $language;
	254
	255
	256	while (<DATA>)
	257	{
4c53e876	258	next unless /\S/;
47a334e9 JH	259	chop;
	260	($code, $language) = split(/:/, $_, 2);
	261	$CODES{$code} = $language;
	262	$LANGUAGES{"\L$language"} = $code;
	263	}
	264	}
	265
	266	1;
	267
	268	__DATA__
	269	aa:Afar
	270	ab:Abkhazian
	271	ae:Avestan
	272	af:Afrikaans
	273	am:Amharic
	274	ar:Arabic
	275	as:Assamese
	276	ay:Aymara
	277	az:Azerbaijani
	278
	279	ba:Bashkir
	280	be:Belarusian
	281	bg:Bulgarian
	282	bh:Bihari
	283	bi:Bislama
	284	bn:Bengali
	285	bo:Tibetan
	286	br:Breton
	287	bs:Bosnian
	288
	289	ca:Catalan
	290	ce:Chechen
	291	ch:Chamorro
	292	co:Corsican
	293	cs:Czech
	294	cu:Church Slavic
	295	cv:Chuvash
	296	cy:Welsh
	297
	298	da:Danish
	299	de:German
	300	dz:Dzongkha
	301
	302	el:Greek
	303	en:English
	304	eo:Esperanto
	305	es:Spanish
	306	et:Estonian
	307	eu:Basque
	308
	309	fa:Persian
	310	fi:Finnish
	311	fj:Fijian
	312	fo:Faeroese
	313	fr:French
	314	fy:Frisian
	315
	316	ga:Irish
	317	gd:Gaelic (Scots)
	318	gl:Gallegan
	319	gn:Guarani
	320	gu:Gujarati
	321	gv:Manx
	322
323	ha:Hausa
324	he:Hebrew
325	hi:Hindi
326	ho:Hiri Motu
327	hr:Croatian
328	hu:Hungarian
329	hy:Armenian
330	hz:Herero
331
332	ia:Interlingua
333	id:Indonesian
334	ie:Interlingue
335	ik:Inupiaq
336	is:Icelandic
337	it:Italian
338	iu:Inuktitut
339
340	ja:Japanese
341	jw:Javanese
342
343	ka:Georgian
344	ki:Kikuyu
345	kj:Kuanyama
346	kk:Kazakh
347	kl:Kalaallisut
348	km:Khmer
349	kn:Kannada
350	ko:Korean
351	ks:Kashmiri
352	ku:Kurdish
353	kv:Komi
354	kw:Cornish
355	ky:Kirghiz
356
357	la:Latin
358	lb:Letzeburgesch
359	ln:Lingala
360	lo:Lao
361	lt:Lithuanian
362	lv:Latvian
363
364	mg:Malagasy
365	mh:Marshall
366	mi:Maori
367	mk:Macedonian
368	ml:Malayalam
369	mn:Mongolian
370	mo:Moldavian
371	mr:Marathi
372	ms:Malay
373	mt:Maltese
374	my:Burmese
375
376	na:Nauru
377	nb:Norwegian Bokmål
378	nd:Ndebele, North
379	ne:Nepali
380	ng:Ndonga
381	nl:Dutch
382	nn:Norwegian Nynorsk
383	no:Norwegian
384	nr:Ndebele, South
385	nv:Navajo
386	ny:Chichewa; Nyanja
387
388	oc:Occitan (post 1500)
389	om:Oromo
390	or:Oriya
391	os:Ossetian; Ossetic
392
393	pa:Panjabi
394	pi:Pali
395	pl:Polish
396	ps:Pushto
397	pt:Portuguese
398
399	qu:Quechua
400
401	rm:Rhaeto-Romance
402	rn:Rundi
403	ro:Romanian
404	ru:Russian
405	rw:Kinyarwanda
406
407	sa:Sanskrit
408	sc:Sardinian
409	sd:Sindhi
410	se:Sami
411	sg:Sango
412	si:Sinhalese
413	sk:Slovak
414	sl:Slovenian
415	sm:Samoan
416	sn:Shona
417	so:Somali
418	sq:Albanian
419	sr:Serbian
420	ss:Swati
421	st:Sotho
422	su:Sundanese
423	sv:Swedish
424	sw:Swahili
425
426	ta:Tamil
427	te:Telugu
428	tg:Tajik
429	th:Thai
430	ti:Tigrinya
431	tk:Turkmen
432	tl:Tagalog
433	tn:Tswana
434	to:Tonga
435	tr:Turkish
436	ts:Tsonga
437	tt:Tatar
438	tw:Twi
439
440	ug:Uighur
441	uk:Ukrainian
442	ur:Urdu
443	uz:Uzbek
444
445	vi:Vietnamese
446	vo:Volapük
447
448	wo:Wolof
449
450	xh:Xhosa
451
452	yi:Yiddish
453	yo:Yoruba
454
455	za:Zhuang
456	zh:Chinese
457	zu:Zulu