Commit | Line | Data |
---|---|---|
a0d0e21e LW |
1 | package I18N::Collate; |
2 | ||
b75c8c73 MS |
3 | use strict; |
4 | our $VERSION = '1.00'; | |
5 | ||
f06db76b AD |
6 | =head1 NAME |
7 | ||
69b19ea2 | 8 | I18N::Collate - compare 8-bit scalar data according to the current locale |
f06db76b | 9 | |
91542540 BD |
10 | =head1 SYNOPSIS |
11 | ||
12 | use I18N::Collate; | |
13 | setlocale(LC_COLLATE, 'locale-of-your-choice'); | |
14 | $s1 = new I18N::Collate "scalar_data_1"; | |
15 | $s2 = new I18N::Collate "scalar_data_2"; | |
16 | ||
17 | =head1 DESCRIPTION | |
18 | ||
4413da2b JH |
19 | *** |
20 | ||
21 | WARNING: starting from the Perl version 5.003_06 | |
22 | the I18N::Collate interface for comparing 8-bit scalar data | |
23 | according to the current locale | |
24 | ||
25 | HAS BEEN DEPRECATED | |
26 | ||
27 | That is, please do not use it anymore for any new applications | |
28 | and please migrate the old applications away from it because its | |
29 | functionality was integrated into the Perl core language in the | |
30 | release 5.003_06. | |
31 | ||
32 | See the perllocale manual page for further information. | |
33 | ||
34 | *** | |
35 | ||
f06db76b | 36 | This module provides you with objects that will collate |
69b19ea2 | 37 | according to your national character set, provided that the |
38 | POSIX setlocale() function is supported on your system. | |
f06db76b AD |
39 | |
40 | You can compare $s1 and $s2 above with | |
41 | ||
42 | $s1 le $s2 | |
43 | ||
44 | to extract the data itself, you'll need a dereference: $$s1 | |
45 | ||
6158a1ac CS |
46 | This module uses POSIX::setlocale(). The basic collation conversion is |
47 | done by strxfrm() which terminates at NUL characters being a decent C | |
48 | routine. collate_xfrm() handles embedded NUL characters gracefully. | |
c2960299 | 49 | |
6158a1ac CS |
50 | The available locales depend on your operating system; try whether |
51 | C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the | |
52 | direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or | |
53 | C<ls /usr/lib/locale>. Not all the locales that your vendor supports | |
54 | are necessarily installed: please consult your operating system's | |
55 | documentation and possibly your local system administration. The | |
56 | locale names are probably something like C<xx_XX.(ISO)?8859-N> or | |
57 | C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH) | |
58 | variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western | |
59 | European character set. | |
f06db76b AD |
60 | |
61 | =cut | |
62 | ||
69b19ea2 | 63 | # I18N::Collate.pm |
a0d0e21e | 64 | # |
5aabfad6 | 65 | # Author: Jarkko Hietaniemi <F<jhi@iki.fi>> |
a0d0e21e LW |
66 | # Helsinki University of Technology, Finland |
67 | # | |
5aabfad6 | 68 | # Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood |
a0d0e21e LW |
69 | # overloading magic much deeper than I and told |
70 | # how to cut the size of this code by more than half. | |
71 | # (my first version did overload all of lt gt eq le ge cmp) | |
72 | # | |
73 | # Purpose: compare 8-bit scalar data according to the current locale | |
74 | # | |
75 | # Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() | |
76 | # | |
77 | # Exports: setlocale 1) | |
78 | # collate_xfrm 2) | |
79 | # | |
80 | # Overloads: cmp # 3) | |
81 | # | |
69b19ea2 | 82 | # Usage: use I18N::Collate; |
c2960299 | 83 | # setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4) |
69b19ea2 | 84 | # $s1 = new I18N::Collate "scalar_data_1"; |
85 | # $s2 = new I18N::Collate "scalar_data_2"; | |
a0d0e21e LW |
86 | # |
87 | # now you can compare $s1 and $s2: $s1 le $s2 | |
88 | # to extract the data itself, you need to deref: $$s1 | |
89 | # | |
90 | # Notes: | |
91 | # 1) this uses POSIX::setlocale | |
92 | # 2) the basic collation conversion is done by strxfrm() which | |
93 | # terminates at NUL characters being a decent C routine. | |
94 | # collate_xfrm handles embedded NUL characters gracefully. | |
95 | # 3) due to cmp and overload magic, lt le eq ge gt work also | |
96 | # 4) the available locales depend on your operating system; | |
c2960299 AD |
97 | # try whether "locale -a" shows them or man pages for |
98 | # "locale" or "nlsinfo" work or the more direct | |
a0d0e21e | 99 | # approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". |
c2960299 AD |
100 | # Not all the locales that your vendor supports |
101 | # are necessarily installed: please consult your | |
102 | # operating system's documentation. | |
a0d0e21e | 103 | # The locale names are probably something like |
c2960299 AD |
104 | # 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N', |
105 | # for example 'fr_CH.ISO8859-1' is the Swiss (CH) | |
106 | # variant of French (fr), ISO Latin (8859) 1 (-1) | |
107 | # which is the Western European character set. | |
a0d0e21e | 108 | # |
6b48aaa4 | 109 | # Updated: 19961005 |
a0d0e21e LW |
110 | # |
111 | # --- | |
112 | ||
113 | use POSIX qw(strxfrm LC_COLLATE); | |
d3a7d8c7 | 114 | use warnings::register; |
a0d0e21e LW |
115 | |
116 | require Exporter; | |
117 | ||
b75c8c73 MS |
118 | our @ISA = qw(Exporter); |
119 | our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE); | |
120 | our @EXPORT_OK = qw(); | |
a0d0e21e | 121 | |
a5f75d66 | 122 | use overload qw( |
a0d0e21e LW |
123 | fallback 1 |
124 | cmp collate_cmp | |
125 | ); | |
126 | ||
b75c8c73 MS |
127 | our($LOCALE, $C); |
128 | ||
129 | our $please_use_I18N_Collate_even_if_deprecated = 0; | |
6b48aaa4 JH |
130 | sub new { |
131 | my $new = $_[1]; | |
132 | ||
d3a7d8c7 | 133 | if (warnings::enabled() && $] >= 5.003_06) { |
6b48aaa4 | 134 | unless ($please_use_I18N_Collate_even_if_deprecated) { |
d3a7d8c7 | 135 | warnings::warn <<___EOD___; |
6b48aaa4 JH |
136 | *** |
137 | ||
4413da2b JH |
138 | WARNING: starting from the Perl version 5.003_06 |
139 | the I18N::Collate interface for comparing 8-bit scalar data | |
140 | according to the current locale | |
6b48aaa4 JH |
141 | |
142 | HAS BEEN DEPRECATED | |
143 | ||
4413da2b JH |
144 | That is, please do not use it anymore for any new applications |
145 | and please migrate the old applications away from it because its | |
146 | functionality was integrated into the Perl core language in the | |
147 | release 5.003_06. | |
6b48aaa4 | 148 | |
4413da2b | 149 | See the perllocale manual page for further information. |
6b48aaa4 JH |
150 | |
151 | *** | |
152 | ___EOD___ | |
153 | $please_use_I18N_Collate_even_if_deprecated++; | |
154 | } | |
155 | } | |
156 | ||
157 | bless \$new; | |
158 | } | |
a0d0e21e LW |
159 | |
160 | sub setlocale { | |
161 | my ($category, $locale) = @_[0,1]; | |
162 | ||
163 | POSIX::setlocale($category, $locale) if (defined $category); | |
164 | # the current $LOCALE | |
165 | $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; | |
166 | } | |
167 | ||
168 | sub C { | |
169 | my $s = ${$_[0]}; | |
170 | ||
171 | $C->{$LOCALE}->{$s} = collate_xfrm($s) | |
172 | unless (defined $C->{$LOCALE}->{$s}); # cache when met | |
173 | ||
174 | $C->{$LOCALE}->{$s}; | |
175 | } | |
176 | ||
177 | sub collate_xfrm { | |
178 | my $s = $_[0]; | |
179 | my $x = ''; | |
180 | ||
181 | for (split(/(\000+)/, $s)) { | |
182 | $x .= (/^\000/) ? $_ : strxfrm("$_\000"); | |
183 | } | |
184 | ||
185 | $x; | |
186 | } | |
187 | ||
188 | sub collate_cmp { | |
189 | &C($_[0]) cmp &C($_[1]); | |
190 | } | |
191 | ||
192 | # init $LOCALE | |
193 | ||
194 | &I18N::Collate::setlocale(); | |
195 | ||
196 | 1; # keep require happy |