This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
exec.t tweak
[perl5.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
ac27b0f5 2use Carp;
16fe6d59
GS
3$open::hint_bits = 0x20000;
4
0c4f7ff0 5our $VERSION = '1.01';
b75c8c73 6
58d53262
JH
7my $locale_encoding;
8
b178108d 9sub in_locale { $^H & ($locale::hint_bits || 0)}
58d53262
JH
10
11sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
276c9210 13 # I18N::Langinfo isn't available everywhere
9615f2ee
BT
14 eval {
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
ba6ce41c 17 $locale_encoding = langinfo(CODESET());
9615f2ee 18 };
11fc5dc3 19 my $country_language;
a4157ebb
JH
20
21 no warnings 'uninitialized';
22
58d53262 23 if (not $locale_encoding && in_locale()) {
11fc5dc3
JH
24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
58d53262 28 }
1e616cf5
JH
29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
33 }
34 # Could do more heuristics based on the country and language
58d53262
JH
35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
421e5dc3
JH
38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
58d53262 40 }
11fc5dc3
JH
41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
56fb2e42 44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
1e616cf5 45 $locale_encoding = 'euc-jp';
5a192dee 46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
1e616cf5 47 $locale_encoding = 'euc-kr';
a4157ebb
JH
48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
56fb2e42 50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
a4157ebb 51 $locale_encoding = 'big5';
11fc5dc3
JH
52 }
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
55 }
58d53262
JH
56 }
57}
58
16fe6d59 59sub import {
dfebf958
NIS
60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
b178108d 62 my $std;
16fe6d59 63 $^H |= $open::hint_bits;
ba6ce41c 64 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958
NIS
65 while (@args) {
66 my $type = shift(@args);
1e616cf5
JH
67 my $dscp;
68 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
69 $type = 'IO';
70 $dscp = ":$1";
b178108d
JH
71 } elsif ($type eq ':std') {
72 $std = 1;
73 next;
1e616cf5 74 } else {
725d232a 75 $dscp = shift(@args) || '';
1e616cf5 76 }
ac27b0f5 77 my @val;
1e616cf5 78 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 79 $layer =~ s/^://;
58d53262
JH
80 if ($layer eq 'locale') {
81 use Encode;
82 _get_locale_encoding()
83 unless defined $locale_encoding;
a4157ebb 84 (carp("Cannot figure out an encoding to use"), last)
58d53262 85 unless defined $locale_encoding;
11fc5dc3
JH
86 if ($locale_encoding =~ /^utf-?8$/i) {
87 $layer = "utf8";
88 } else {
738b23dc 89 $layer = "encoding($locale_encoding)";
11fc5dc3 90 }
b178108d 91 $std = 1;
97ed432b
NIS
92 } else {
93 unless(PerlIO::Layer::->find($layer)) {
94 carp("Unknown discipline layer '$layer'");
95 }
ac27b0f5
NIS
96 }
97 push(@val,":$layer");
98 if ($layer =~ /^(crlf|raw)$/) {
99 $^H{"open_$type"} = $layer;
16fe6d59 100 }
ac27b0f5
NIS
101 }
102 if ($type eq 'IN') {
103 $in = join(' ',@val);
104 }
105 elsif ($type eq 'OUT') {
106 $out = join(' ',@val);
16fe6d59 107 }
1e616cf5 108 elsif ($type eq 'IO') {
f3b00462
JH
109 $in = $out = join(' ',@val);
110 }
16fe6d59 111 else {
ac27b0f5 112 croak "Unknown discipline class '$type'";
16fe6d59
GS
113 }
114 }
a4157ebb 115 ${^OPEN} = join("\0",$in,$out) if $in or $out;
b178108d
JH
116 if ($std) {
117 if ($in) {
118 if ($in =~ /:utf8\b/) {
119 binmode(STDIN, ":utf8");
120 } elsif ($in =~ /(\w+\(.+\))/) {
121 binmode(STDIN, ":$1");
122 }
123 }
124 if ($out) {
125 if ($out =~ /:utf8\b/) {
126 binmode(STDOUT, ":utf8");
127 binmode(STDERR, ":utf8");
128 } elsif ($out =~ /(\w+\(.+\))/) {
129 binmode(STDOUT, ":$1");
130 binmode(STDERR, ":$1");
131 }
132 }
133 }
16fe6d59
GS
134}
135
1361;
137__END__
d1edabcf
GS
138
139=head1 NAME
140
141open - perl pragma to set default disciplines for input and output
142
143=head1 SYNOPSIS
144
1e616cf5
JH
145 use open IN => ":crlf", OUT => ":raw";
146 use open OUT => ':utf8';
147 use open IO => ":encoding(iso-8859-7)";
148
149 use open IO => ':locale';
725d232a 150
1e616cf5
JH
151 use open ':utf8';
152 use open ':locale';
153 use open ':encoding(iso-8859-7)';
d1edabcf 154
b178108d
JH
155 use open ':std';
156
d1edabcf
GS
157=head1 DESCRIPTION
158
d151aa0e
JH
159Full-fledged support for I/O disciplines is now implemented provided
160Perl is configured to use PerlIO as its IO system (which is now the
161default).
16fe6d59 162
7d3b96bb
NIS
163The C<open> pragma serves as one of the interfaces to declare default
164"layers" (aka disciplines) for all I/O.
165
166The C<open> pragma is used to declare one or more default layers for
d151aa0e
JH
167I/O operations. Any open(), readpipe() (aka qx//) and similar
168operators found within the lexical scope of this pragma will use the
169declared defaults.
7d3b96bb 170
1e616cf5 171With the C<IN> subpragma you can declare the default layers
d8d29d4f 172of input streams, and with the C<OUT> subpragma you can declare
1e616cf5
JH
173the default layers of output streams. With the C<IO> subpragma
174you can control both input and output streams simultaneously.
175
176If you have a legacy encoding, you can use the C<:encoding(...)> tag.
177
178if you want to set your encoding disciplines based on your
179locale environment variables, you can use the C<:locale> tag.
180For example:
181
182 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41
JH
183 # the :locale will probe the locale environment variables like LANG
184 use open OUT => ':locale';
1e616cf5 185 open(O, ">koi8");
23bcb45a 186 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5
JH
187 close O;
188 open(I, "<koi8");
23bcb45a 189 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5
JH
190 close I;
191
192These are equivalent
193
194 use open ':utf8';
195 use open IO => ':utf8';
196
197as are these
198
199 use open ':locale';
200 use open IO => ':locale';
201
202and these
203
204 use open ':encoding(iso-8859-7)';
205 use open IO => ':encoding(iso-8859-7)';
206
d151aa0e
JH
207When open() is given an explicit list of layers they are appended to
208the list declared using this pragma.
7d3b96bb 209
b178108d
JH
210The C<:std> subpragma on its own has no effect, but if combined with
211the C<:utf8> or C<:encoding> subpragmas, it converts the standard
212filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
213for input/output handles. For example, if both input and out are
214chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
215STDERR are also in C<:utf8>. On the other hand, if only output is
fb80c70c 216chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
b178108d
JH
217STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
218implicitly turns on C<:std>.
219
ba9a69eb
JH
220The logic of C<:locale> is as follows:
221
222=over 4
223
224=item 1.
225
226If the platform supports the langinfo(CODESET) interface, the codeset
227returned is used as the default encoding for the open pragma.
228
229=item 2.
230
231If 1. didn't work but we are under the locale pragma, the environment
232variables LC_ALL and LANG (in that order) are matched for encodings
233(the part after C<.>, if any), and if any found, that is used
234as the default encoding for the open pragma.
235
236=item 3.
237
238If 1. and 2. didn't work, the environment variables LC_ALL and LANG
239(in that order) are matched for anything looking like UTF-8, and if
240any found, C<:utf8> is used as the default encoding for the open
241pragma.
242
243=back
244
b310b053
JH
245If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
246contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
247the default encoding of your STDIN, STDOUT, and STDERR, and of
248B<any subsequent file open>, is UTF-8.
249
7d3b96bb
NIS
250Directory handles may also support disciplines in future.
251
252=head1 NONPERLIO FUNCTIONALITY
253
d151aa0e
JH
254If Perl is not built to use PerlIO as its IO system then only the two
255pseudo-disciplines ":raw" and ":crlf" are available.
16fe6d59
GS
256
257The ":raw" discipline corresponds to "binary mode" and the ":crlf"
258discipline corresponds to "text mode" on platforms that distinguish
259between the two modes when opening files (which is many DOS-like
d151aa0e
JH
260platforms, including Windows). These two disciplines are no-ops on
261platforms where binmode() is a no-op, but perform their functions
262everywhere if PerlIO is enabled.
7d3b96bb
NIS
263
264=head1 IMPLEMENTATION DETAILS
d1edabcf 265
f3b00462
JH
266There is a class method in C<PerlIO::Layer> C<find> which is
267implemented as XS code. It is called by C<import> to validate the
268layers:
0c4f7ff0
NIS
269
270 PerlIO::Layer::->find("perlio")
271
f3b00462
JH
272The return value (if defined) is a Perl object, of class
273C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
274yet there is nothing useful you can do with the object at the perl
275level.
16fe6d59 276
d1edabcf
GS
277=head1 SEE ALSO
278
1768d7eb
JH
279L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
280L<encoding>
d1edabcf
GS
281
282=cut