Commit | Line | Data |
---|---|---|
bbce6d69 | 1 | package locale; |
2 | ||
fc82b82e | 3 | our $VERSION = '1.09'; |
569f7fc5 | 4 | use Config; |
b75c8c73 | 5 | |
66cbab2c KW |
6 | $Carp::Internal{ (__PACKAGE__) } = 1; |
7 | ||
bbce6d69 | 8 | =head1 NAME |
9 | ||
f36a3959 | 10 | locale - Perl pragma to use or avoid POSIX locales for built-in operations |
bbce6d69 | 11 | |
fc82b82e KW |
12 | =head1 WARNING |
13 | ||
14 | DO NOT USE this pragma in scripts that have multiple | |
15 | L<threads|threads> active. The locale is not local to a single thread. | |
16 | Another thread may change the locale at any time, which could cause at a | |
17 | minimum that a given thread is operating in a locale it isn't expecting | |
18 | to be in. On some platforms, segfaults can also occur. The locale | |
19 | change need not be explicit; some operations cause perl to change the | |
20 | locale itself. You are vulnerable simply by having done a C<"use | |
21 | locale">. | |
22 | ||
bbce6d69 | 23 | =head1 SYNOPSIS |
24 | ||
465d77f9 | 25 | @x = sort @y; # Native-platform/Unicode code point sort order |
bbce6d69 | 26 | { |
27 | use locale; | |
465d77f9 | 28 | @x = sort @y; # Locale-defined sort order |
bbce6d69 | 29 | } |
465d77f9 KW |
30 | @x = sort @y; # Native-platform/Unicode code point sort order |
31 | # again | |
bbce6d69 | 32 | |
33 | =head1 DESCRIPTION | |
34 | ||
35 | This pragma tells the compiler to enable (or disable) the use of POSIX | |
f36a3959 KW |
36 | locales for built-in operations (for example, LC_CTYPE for regular |
37 | expressions, LC_COLLATE for string comparison, and LC_NUMERIC for number | |
38 | formatting). Each "use locale" or "no locale" | |
bbce6d69 | 39 | affects statements to the end of the enclosing BLOCK. |
40 | ||
b8bc843f A |
41 | See L<perllocale> for more detailed information on how Perl supports |
42 | locales. | |
43 | ||
a02ae656 KW |
44 | On systems that don't have locales, this pragma will cause your operations |
45 | to behave as if in the "C" locale; attempts to change the locale will fail. | |
569f7fc5 | 46 | |
bbce6d69 | 47 | =cut |
48 | ||
d6ded950 KW |
49 | # A separate bit is used for each of the two forms of the pragma, to save |
50 | # having to look at %^H for the normal case of a plain 'use locale' without an | |
51 | # argument. | |
66cbab2c | 52 | |
2de3dbcc | 53 | $locale::hint_bits = 0x4; |
d6ded950 KW |
54 | $locale::partial_hint_bits = 0x10; # If pragma has an argument |
55 | ||
56 | # The pseudo-category :characters consists of 2 real ones; but it also is | |
57 | # given its own number, -1, because in the complement form it also has the | |
58 | # side effect of "use feature 'unicode_strings'" | |
d5448623 | 59 | |
bbce6d69 | 60 | sub import { |
66cbab2c | 61 | shift; # should be 'locale'; not checked |
569f7fc5 | 62 | |
d6ded950 KW |
63 | $^H{locale} = 0 unless defined $^H{locale}; |
64 | if (! @_) { # If no parameter, use the plain form that changes all categories | |
65 | $^H |= $locale::hint_bits; | |
66 | ||
67 | } | |
68 | else { | |
69 | my @categories = ( qw(:ctype :collate :messages | |
70 | :numeric :monetary :time) ); | |
71 | for (my $i = 0; $i < @_; $i++) { | |
72 | my $arg = $_[$i]; | |
73 | $complement = $arg =~ s/ : ( ! | not_ ) /:/x; | |
74 | if (! grep { $arg eq $_ } @categories, ":characters") { | |
75 | require Carp; | |
76 | Carp::croak("Unknown parameter '$_[$i]' to 'use locale'"); | |
77 | } | |
78 | ||
79 | if ($complement) { | |
80 | if ($i != 0 || $i < @_ - 1) { | |
81 | require Carp; | |
82 | Carp::croak("Only one argument to 'use locale' allowed" | |
83 | . "if is $complement"); | |
84 | } | |
85 | ||
86 | if ($arg eq ':characters') { | |
87 | push @_, grep { $_ ne ':ctype' && $_ ne ':collate' } | |
88 | @categories; | |
89 | # We add 1 to the category number; This category number | |
90 | # is -1 | |
91 | $^H{locale} |= (1 << 0); | |
92 | } | |
93 | else { | |
94 | push @_, grep { $_ ne $arg } @categories; | |
95 | } | |
96 | next; | |
97 | } | |
98 | elsif ($arg eq ':characters') { | |
99 | push @_, ':ctype', ':collate'; | |
100 | next; | |
101 | } | |
102 | ||
103 | $^H |= $locale::partial_hint_bits; | |
66cbab2c KW |
104 | |
105 | # This form of the pragma overrides the other | |
106 | $^H &= ~$locale::hint_bits; | |
d6ded950 KW |
107 | |
108 | $arg =~ s/^://; | |
109 | ||
395e4985 | 110 | eval { require POSIX; import POSIX 'locale_h'; }; |
395e4985 | 111 | |
d6ded950 | 112 | # Map our names to the ones defined by POSIX |
51cdbd7c | 113 | my $LC = "LC_" . uc($arg); |
395e4985 | 114 | |
51cdbd7c | 115 | my $bit = eval "&POSIX::$LC"; |
467a667a KW |
116 | if (defined $bit) { # XXX Should we warn that this category isn't |
117 | # supported on this platform, or make it | |
118 | # always be the C locale? | |
51cdbd7c KW |
119 | |
120 | # Verify our assumption. | |
121 | if (! ($bit >= 0 && $bit < 31)) { | |
122 | require Carp; | |
123 | Carp::croak("Cannot have ':$arg' parameter to 'use locale'" | |
124 | . " on this platform. Use the 'perlbug' utility" | |
125 | . " to report this problem, or send email to" | |
126 | . " 'perlbug\@perl.org'. $LC=$bit"); | |
127 | } | |
128 | ||
d6ded950 KW |
129 | # 1 is added so that the pseudo-category :characters, which is |
130 | # -1, comes out 0. | |
131 | $^H{locale} |= 1 << ($bit + 1); | |
132 | } | |
66cbab2c KW |
133 | } |
134 | } | |
135 | ||
bbce6d69 | 136 | } |
137 | ||
138 | sub unimport { | |
d6ded950 KW |
139 | $^H &= ~($locale::hint_bits|$locale::partial_hint_bits); |
140 | $^H{locale} = 0; | |
bbce6d69 | 141 | } |
142 | ||
143 | 1; |