This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
m2t3: minor doc patch (to obsolete I18N::Collate)
[perl5.git] / lib / I18N / Collate.pm
CommitLineData
a0d0e21e
LW
1package I18N::Collate;
2
f06db76b
AD
3=head1 NAME
4
69b19ea2 5I18N::Collate - compare 8-bit scalar data according to the current locale
f06db76b 6
4413da2b
JH
7 ***
8
9 WARNING: starting from the Perl version 5.003_06
10 the I18N::Collate interface for comparing 8-bit scalar data
11 according to the current locale
12
13 HAS BEEN DEPRECATED
14
15 That is, please do not use it anymore for any new applications
16 and please migrate the old applications away from it because its
17 functionality was integrated into the Perl core language in the
18 release 5.003_06.
19
20 See the perllocale manual page for further information.
21
22 ***
23
f06db76b
AD
24=head1 SYNOPSIS
25
69b19ea2 26 use I18N::Collate;
f06db76b 27 setlocale(LC_COLLATE, 'locale-of-your-choice');
69b19ea2 28 $s1 = new I18N::Collate "scalar_data_1";
29 $s2 = new I18N::Collate "scalar_data_2";
f06db76b
AD
30
31=head1 DESCRIPTION
32
33This module provides you with objects that will collate
69b19ea2 34according to your national character set, provided that the
35POSIX setlocale() function is supported on your system.
f06db76b
AD
36
37You can compare $s1 and $s2 above with
38
39 $s1 le $s2
40
41to extract the data itself, you'll need a dereference: $$s1
42
6158a1ac
CS
43This module uses POSIX::setlocale(). The basic collation conversion is
44done by strxfrm() which terminates at NUL characters being a decent C
45routine. collate_xfrm() handles embedded NUL characters gracefully.
c2960299 46
6158a1ac
CS
47The available locales depend on your operating system; try whether
48C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
49direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
50C<ls /usr/lib/locale>. Not all the locales that your vendor supports
51are necessarily installed: please consult your operating system's
52documentation and possibly your local system administration. The
53locale names are probably something like C<xx_XX.(ISO)?8859-N> or
54C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
55variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
56European character set.
f06db76b
AD
57
58=cut
59
69b19ea2 60# I18N::Collate.pm
a0d0e21e 61#
5aabfad6 62# Author: Jarkko Hietaniemi <F<jhi@iki.fi>>
a0d0e21e
LW
63# Helsinki University of Technology, Finland
64#
5aabfad6 65# Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood
a0d0e21e
LW
66# overloading magic much deeper than I and told
67# how to cut the size of this code by more than half.
68# (my first version did overload all of lt gt eq le ge cmp)
69#
70# Purpose: compare 8-bit scalar data according to the current locale
71#
72# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
73#
74# Exports: setlocale 1)
75# collate_xfrm 2)
76#
77# Overloads: cmp # 3)
78#
69b19ea2 79# Usage: use I18N::Collate;
c2960299 80# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
69b19ea2 81# $s1 = new I18N::Collate "scalar_data_1";
82# $s2 = new I18N::Collate "scalar_data_2";
a0d0e21e
LW
83#
84# now you can compare $s1 and $s2: $s1 le $s2
85# to extract the data itself, you need to deref: $$s1
86#
87# Notes:
88# 1) this uses POSIX::setlocale
89# 2) the basic collation conversion is done by strxfrm() which
90# terminates at NUL characters being a decent C routine.
91# collate_xfrm handles embedded NUL characters gracefully.
92# 3) due to cmp and overload magic, lt le eq ge gt work also
93# 4) the available locales depend on your operating system;
c2960299
AD
94# try whether "locale -a" shows them or man pages for
95# "locale" or "nlsinfo" work or the more direct
a0d0e21e 96# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
c2960299
AD
97# Not all the locales that your vendor supports
98# are necessarily installed: please consult your
99# operating system's documentation.
a0d0e21e 100# The locale names are probably something like
c2960299
AD
101# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
102# for example 'fr_CH.ISO8859-1' is the Swiss (CH)
103# variant of French (fr), ISO Latin (8859) 1 (-1)
104# which is the Western European character set.
a0d0e21e 105#
6b48aaa4 106# Updated: 19961005
a0d0e21e
LW
107#
108# ---
109
110use POSIX qw(strxfrm LC_COLLATE);
111
112require Exporter;
113
114@ISA = qw(Exporter);
115@EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
116@EXPORT_OK = qw();
117
a5f75d66 118use overload qw(
a0d0e21e
LW
119fallback 1
120cmp collate_cmp
121);
122
6b48aaa4
JH
123sub new {
124 my $new = $_[1];
125
126 if ($^W && $] >= 5.003_06) {
127 unless ($please_use_I18N_Collate_even_if_deprecated) {
128 warn <<___EOD___;
129***
130
4413da2b
JH
131 WARNING: starting from the Perl version 5.003_06
132 the I18N::Collate interface for comparing 8-bit scalar data
133 according to the current locale
6b48aaa4
JH
134
135 HAS BEEN DEPRECATED
136
4413da2b
JH
137 That is, please do not use it anymore for any new applications
138 and please migrate the old applications away from it because its
139 functionality was integrated into the Perl core language in the
140 release 5.003_06.
6b48aaa4 141
4413da2b 142 See the perllocale manual page for further information.
6b48aaa4
JH
143
144***
145___EOD___
146 $please_use_I18N_Collate_even_if_deprecated++;
147 }
148 }
149
150 bless \$new;
151}
a0d0e21e
LW
152
153sub setlocale {
154 my ($category, $locale) = @_[0,1];
155
156 POSIX::setlocale($category, $locale) if (defined $category);
157 # the current $LOCALE
158 $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
159}
160
161sub C {
162 my $s = ${$_[0]};
163
164 $C->{$LOCALE}->{$s} = collate_xfrm($s)
165 unless (defined $C->{$LOCALE}->{$s}); # cache when met
166
167 $C->{$LOCALE}->{$s};
168}
169
170sub collate_xfrm {
171 my $s = $_[0];
172 my $x = '';
173
174 for (split(/(\000+)/, $s)) {
175 $x .= (/^\000/) ? $_ : strxfrm("$_\000");
176 }
177
178 $x;
179}
180
181sub collate_cmp {
182 &C($_[0]) cmp &C($_[1]);
183}
184
185# init $LOCALE
186
187&I18N::Collate::setlocale();
188
1891; # keep require happy