This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Change 15637: Behaviour (make viacode 0xFFFE to return BYTE ORDER MARK,
[perl5.git] / lib / I18N / Collate.pm
CommitLineData
a0d0e21e
LW
1package I18N::Collate;
2
b75c8c73
MS
3use strict;
4our $VERSION = '1.00';
5
f06db76b
AD
6=head1 NAME
7
69b19ea2 8I18N::Collate - compare 8-bit scalar data according to the current locale
f06db76b 9
91542540
BD
10=head1 SYNOPSIS
11
12 use I18N::Collate;
13 setlocale(LC_COLLATE, 'locale-of-your-choice');
14 $s1 = new I18N::Collate "scalar_data_1";
15 $s2 = new I18N::Collate "scalar_data_2";
16
17=head1 DESCRIPTION
18
4413da2b
JH
19 ***
20
21 WARNING: starting from the Perl version 5.003_06
22 the I18N::Collate interface for comparing 8-bit scalar data
23 according to the current locale
24
25 HAS BEEN DEPRECATED
26
27 That is, please do not use it anymore for any new applications
28 and please migrate the old applications away from it because its
29 functionality was integrated into the Perl core language in the
30 release 5.003_06.
31
32 See the perllocale manual page for further information.
33
34 ***
35
f06db76b 36This module provides you with objects that will collate
69b19ea2 37according to your national character set, provided that the
38POSIX setlocale() function is supported on your system.
f06db76b
AD
39
40You can compare $s1 and $s2 above with
41
42 $s1 le $s2
43
44to extract the data itself, you'll need a dereference: $$s1
45
6158a1ac
CS
46This module uses POSIX::setlocale(). The basic collation conversion is
47done by strxfrm() which terminates at NUL characters being a decent C
48routine. collate_xfrm() handles embedded NUL characters gracefully.
c2960299 49
6158a1ac
CS
50The available locales depend on your operating system; try whether
51C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
52direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
53C<ls /usr/lib/locale>. Not all the locales that your vendor supports
54are necessarily installed: please consult your operating system's
55documentation and possibly your local system administration. The
56locale names are probably something like C<xx_XX.(ISO)?8859-N> or
57C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
58variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
59European character set.
f06db76b
AD
60
61=cut
62
69b19ea2 63# I18N::Collate.pm
a0d0e21e 64#
5aabfad6 65# Author: Jarkko Hietaniemi <F<jhi@iki.fi>>
a0d0e21e
LW
66# Helsinki University of Technology, Finland
67#
5aabfad6 68# Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood
a0d0e21e
LW
69# overloading magic much deeper than I and told
70# how to cut the size of this code by more than half.
71# (my first version did overload all of lt gt eq le ge cmp)
72#
73# Purpose: compare 8-bit scalar data according to the current locale
74#
75# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
76#
77# Exports: setlocale 1)
78# collate_xfrm 2)
79#
80# Overloads: cmp # 3)
81#
69b19ea2 82# Usage: use I18N::Collate;
c2960299 83# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
69b19ea2 84# $s1 = new I18N::Collate "scalar_data_1";
85# $s2 = new I18N::Collate "scalar_data_2";
a0d0e21e
LW
86#
87# now you can compare $s1 and $s2: $s1 le $s2
88# to extract the data itself, you need to deref: $$s1
89#
90# Notes:
91# 1) this uses POSIX::setlocale
92# 2) the basic collation conversion is done by strxfrm() which
93# terminates at NUL characters being a decent C routine.
94# collate_xfrm handles embedded NUL characters gracefully.
95# 3) due to cmp and overload magic, lt le eq ge gt work also
96# 4) the available locales depend on your operating system;
c2960299
AD
97# try whether "locale -a" shows them or man pages for
98# "locale" or "nlsinfo" work or the more direct
a0d0e21e 99# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
c2960299
AD
100# Not all the locales that your vendor supports
101# are necessarily installed: please consult your
102# operating system's documentation.
a0d0e21e 103# The locale names are probably something like
c2960299
AD
104# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
105# for example 'fr_CH.ISO8859-1' is the Swiss (CH)
106# variant of French (fr), ISO Latin (8859) 1 (-1)
107# which is the Western European character set.
a0d0e21e 108#
6b48aaa4 109# Updated: 19961005
a0d0e21e
LW
110#
111# ---
112
113use POSIX qw(strxfrm LC_COLLATE);
d3a7d8c7 114use warnings::register;
a0d0e21e
LW
115
116require Exporter;
117
b75c8c73
MS
118our @ISA = qw(Exporter);
119our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
120our @EXPORT_OK = qw();
a0d0e21e 121
a5f75d66 122use overload qw(
a0d0e21e
LW
123fallback 1
124cmp collate_cmp
125);
126
b75c8c73
MS
127our($LOCALE, $C);
128
129our $please_use_I18N_Collate_even_if_deprecated = 0;
6b48aaa4
JH
130sub new {
131 my $new = $_[1];
132
d3a7d8c7 133 if (warnings::enabled() && $] >= 5.003_06) {
6b48aaa4 134 unless ($please_use_I18N_Collate_even_if_deprecated) {
d3a7d8c7 135 warnings::warn <<___EOD___;
6b48aaa4
JH
136***
137
4413da2b
JH
138 WARNING: starting from the Perl version 5.003_06
139 the I18N::Collate interface for comparing 8-bit scalar data
140 according to the current locale
6b48aaa4
JH
141
142 HAS BEEN DEPRECATED
143
4413da2b
JH
144 That is, please do not use it anymore for any new applications
145 and please migrate the old applications away from it because its
146 functionality was integrated into the Perl core language in the
147 release 5.003_06.
6b48aaa4 148
4413da2b 149 See the perllocale manual page for further information.
6b48aaa4
JH
150
151***
152___EOD___
153 $please_use_I18N_Collate_even_if_deprecated++;
154 }
155 }
156
157 bless \$new;
158}
a0d0e21e
LW
159
160sub setlocale {
161 my ($category, $locale) = @_[0,1];
162
163 POSIX::setlocale($category, $locale) if (defined $category);
164 # the current $LOCALE
165 $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
166}
167
168sub C {
169 my $s = ${$_[0]};
170
171 $C->{$LOCALE}->{$s} = collate_xfrm($s)
172 unless (defined $C->{$LOCALE}->{$s}); # cache when met
173
174 $C->{$LOCALE}->{$s};
175}
176
177sub collate_xfrm {
178 my $s = $_[0];
179 my $x = '';
180
181 for (split(/(\000+)/, $s)) {
182 $x .= (/^\000/) ? $_ : strxfrm("$_\000");
183 }
184
185 $x;
186}
187
188sub collate_cmp {
189 &C($_[0]) cmp &C($_[1]);
190}
191
192# init $LOCALE
193
194&I18N::Collate::setlocale();
195
1961; # keep require happy