This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Surely a typo.
[perl5.git] / lib / open.pm
... / ...
CommitLineData
1package open;
2use Carp;
3$open::hint_bits = 0x20000;
4
5our $VERSION = '1.01';
6
7my $locale_encoding;
8
9sub in_locale { $^H & ($locale::hint_bits || 0)}
10
11sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
13 # I18N::Langinfo isn't available everywhere
14 eval {
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
17 $locale_encoding = langinfo(CODESET());
18 };
19 my $country_language;
20
21 no warnings 'uninitialized';
22
23 if (not $locale_encoding && in_locale()) {
24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
28 }
29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
33 }
34 # Could do more heuristics based on the country and language
35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
40 }
41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
45 $locale_encoding = 'euc-jp';
46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
47 $locale_encoding = 'euc-kr';
48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
51 $locale_encoding = 'big5';
52 }
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
55 }
56 }
57}
58
59sub import {
60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
62 my $std;
63 $^H |= $open::hint_bits;
64 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
65 while (@args) {
66 my $type = shift(@args);
67 my $dscp;
68 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
69 $type = 'IO';
70 $dscp = ":$1";
71 } elsif ($type eq ':std') {
72 $std = 1;
73 next;
74 } else {
75 $dscp = shift(@args) || '';
76 }
77 my @val;
78 foreach my $layer (split(/\s+/,$dscp)) {
79 $layer =~ s/^://;
80 if ($layer eq 'locale') {
81 use Encode;
82 _get_locale_encoding()
83 unless defined $locale_encoding;
84 (carp("Cannot figure out an encoding to use"), last)
85 unless defined $locale_encoding;
86 if ($locale_encoding =~ /^utf-?8$/i) {
87 $layer = "utf8";
88 } else {
89 $layer = "encoding($locale_encoding)";
90 }
91 $std = 1;
92 } else {
93 unless(PerlIO::Layer::->find($layer)) {
94 carp("Unknown discipline layer '$layer'");
95 }
96 }
97 push(@val,":$layer");
98 if ($layer =~ /^(crlf|raw)$/) {
99 $^H{"open_$type"} = $layer;
100 }
101 }
102 if ($type eq 'IN') {
103 $in = join(' ',@val);
104 }
105 elsif ($type eq 'OUT') {
106 $out = join(' ',@val);
107 }
108 elsif ($type eq 'IO') {
109 $in = $out = join(' ',@val);
110 }
111 else {
112 croak "Unknown discipline class '$type'";
113 }
114 }
115 ${^OPEN} = join("\0",$in,$out) if $in or $out;
116 if ($std) {
117 if ($in) {
118 if ($in =~ /:utf8\b/) {
119 binmode(STDIN, ":utf8");
120 } elsif ($in =~ /(\w+\(.+\))/) {
121 binmode(STDIN, ":$1");
122 }
123 }
124 if ($out) {
125 if ($out =~ /:utf8\b/) {
126 binmode(STDOUT, ":utf8");
127 binmode(STDERR, ":utf8");
128 } elsif ($out =~ /(\w+\(.+\))/) {
129 binmode(STDOUT, ":$1");
130 binmode(STDERR, ":$1");
131 }
132 }
133 }
134}
135
1361;
137__END__
138
139=head1 NAME
140
141open - perl pragma to set default disciplines for input and output
142
143=head1 SYNOPSIS
144
145 use open IN => ":crlf", OUT => ":raw";
146 use open OUT => ':utf8';
147 use open IO => ":encoding(iso-8859-7)";
148
149 use open IO => ':locale';
150
151 use open ':utf8';
152 use open ':locale';
153 use open ':encoding(iso-8859-7)';
154
155 use open ':std';
156
157=head1 DESCRIPTION
158
159Full-fledged support for I/O disciplines is now implemented provided
160Perl is configured to use PerlIO as its IO system (which is now the
161default).
162
163The C<open> pragma serves as one of the interfaces to declare default
164"layers" (aka disciplines) for all I/O.
165
166The C<open> pragma is used to declare one or more default layers for
167I/O operations. Any open(), readpipe() (aka qx//) and similar
168operators found within the lexical scope of this pragma will use the
169declared defaults.
170
171With the C<IN> subpragma you can declare the default layers
172of input streams, and with the C<OUT> subpragma you can declare
173the default layers of output streams. With the C<IO> subpragma
174you can control both input and output streams simultaneously.
175
176If you have a legacy encoding, you can use the C<:encoding(...)> tag.
177
178if you want to set your encoding disciplines based on your
179locale environment variables, you can use the C<:locale> tag.
180For example:
181
182 $ENV{LANG} = 'ru_RU.KOI8-R';
183 # the :locale will probe the locale environment variables like LANG
184 use open OUT => ':locale';
185 open(O, ">koi8");
186 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
187 close O;
188 open(I, "<koi8");
189 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
190 close I;
191
192These are equivalent
193
194 use open ':utf8';
195 use open IO => ':utf8';
196
197as are these
198
199 use open ':locale';
200 use open IO => ':locale';
201
202and these
203
204 use open ':encoding(iso-8859-7)';
205 use open IO => ':encoding(iso-8859-7)';
206
207When open() is given an explicit list of layers they are appended to
208the list declared using this pragma.
209
210The C<:std> subpragma on its own has no effect, but if combined with
211the C<:utf8> or C<:encoding> subpragmas, it converts the standard
212filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
213for input/output handles. For example, if both input and out are
214chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
215STDERR are also in C<:utf8>. On the other hand, if only output is
216chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
217STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
218implicitly turns on C<:std>.
219
220The logic of C<:locale> is as follows:
221
222=over 4
223
224=item 1.
225
226If the platform supports the langinfo(CODESET) interface, the codeset
227returned is used as the default encoding for the open pragma.
228
229=item 2.
230
231If 1. didn't work but we are under the locale pragma, the environment
232variables LC_ALL and LANG (in that order) are matched for encodings
233(the part after C<.>, if any), and if any found, that is used
234as the default encoding for the open pragma.
235
236=item 3.
237
238If 1. and 2. didn't work, the environment variables LC_ALL and LANG
239(in that order) are matched for anything looking like UTF-8, and if
240any found, C<:utf8> is used as the default encoding for the open
241pragma.
242
243=back
244
245Directory handles may also support disciplines in future.
246
247=head1 NONPERLIO FUNCTIONALITY
248
249If Perl is not built to use PerlIO as its IO system then only the two
250pseudo-disciplines ":raw" and ":crlf" are available.
251
252The ":raw" discipline corresponds to "binary mode" and the ":crlf"
253discipline corresponds to "text mode" on platforms that distinguish
254between the two modes when opening files (which is many DOS-like
255platforms, including Windows). These two disciplines are no-ops on
256platforms where binmode() is a no-op, but perform their functions
257everywhere if PerlIO is enabled.
258
259=head1 IMPLEMENTATION DETAILS
260
261There is a class method in C<PerlIO::Layer> C<find> which is
262implemented as XS code. It is called by C<import> to validate the
263layers:
264
265 PerlIO::Layer::->find("perlio")
266
267The return value (if defined) is a Perl object, of class
268C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
269yet there is nothing useful you can do with the object at the perl
270level.
271
272=head1 SEE ALSO
273
274L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
275L<encoding>
276
277=cut