This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[patch] :utf8 updates
[perl5.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
99ef548b 2use warnings;
16fe6d59 3
9fe371da 4our $VERSION = '1.05';
b75c8c73 5
7c0e976d 6require 5.008001; # for PerlIO::get_layers()
58d53262 7
b4ebbc94 8my $locale_encoding;
a4157ebb 9
7c0e976d 10sub _get_encname {
b4ebbc94 11 return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/;
7c0e976d
JH
12 return;
13}
a4157ebb 14
8878f897
T
15sub croak {
16 require Carp; goto &Carp::croak;
17}
18
7c0e976d
JH
19sub _drop_oldenc {
20 # If by the time we arrive here there already is at the top of the
21 # perlio layer stack an encoding identical to what we would like
22 # to push via this open pragma, we will pop away the old encoding
23 # (+utf8) so that we can push ourselves in place (this is easier
24 # than ignoring pushing ourselves because of the way how ${^OPEN}
25 # works). So we are looking for something like
26 #
27 # stdio encoding(xxx) utf8
28 #
29 # in the existing layer stack, and in the new stack chunk for
30 #
31 # :encoding(xxx)
32 #
33 # If we find a match, we pop the old stack (once, since
34 # the utf8 is just a flag on the encoding layer)
35 my ($h, @new) = @_;
36 return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/;
37 my @old = PerlIO::get_layers($h);
38 return unless @old >= 3 &&
00243fce 39 $old[-1] eq 'utf8' &&
7c0e976d 40 $old[-2] =~ /^encoding\(.+\)$/;
b4ebbc94 41 require Encode;
7c0e976d
JH
42 my ($loname, $lcname) = _get_encname($old[-2]);
43 unless (defined $lcname) { # Should we trust get_layers()?
8878f897 44 croak("open: Unknown encoding '$loname'");
7c0e976d
JH
45 }
46 my ($voname, $vcname) = _get_encname($new[-1]);
47 unless (defined $vcname) {
8878f897 48 croak("open: Unknown encoding '$voname'");
7c0e976d
JH
49 }
50 if ($lcname eq $vcname) {
51 binmode($h, ":pop"); # utf8 is part of the encoding layer
58d53262
JH
52 }
53}
54
16fe6d59 55sub import {
dfebf958 56 my ($class,@args) = @_;
7c0e976d 57 croak("open: needs explicit list of PerlIO layers") unless @args;
b178108d 58 my $std;
ba6ce41c 59 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958
NIS
60 while (@args) {
61 my $type = shift(@args);
1e616cf5
JH
62 my $dscp;
63 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
64 $type = 'IO';
65 $dscp = ":$1";
b178108d
JH
66 } elsif ($type eq ':std') {
67 $std = 1;
68 next;
1e616cf5 69 } else {
725d232a 70 $dscp = shift(@args) || '';
1e616cf5 71 }
ac27b0f5 72 my @val;
1e616cf5 73 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 74 $layer =~ s/^://;
58d53262 75 if ($layer eq 'locale') {
54cfe943 76 require Encode;
b4ebbc94
NC
77 require encoding;
78 $locale_encoding = encoding::_get_locale_encoding()
58d53262 79 unless defined $locale_encoding;
99ef548b 80 (warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
58d53262 81 unless defined $locale_encoding;
740d4bb2 82 $layer = "encoding($locale_encoding)";
b178108d 83 $std = 1;
97ed432b 84 } else {
011f8d22
JH
85 my $target = $layer; # the layer name itself
86 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
87
c7732655
NIS
88 unless(PerlIO::Layer::->find($target,1)) {
89 warnings::warnif("layer", "Unknown PerlIO layer '$target'");
97ed432b 90 }
ac27b0f5
NIS
91 }
92 push(@val,":$layer");
93 if ($layer =~ /^(crlf|raw)$/) {
94 $^H{"open_$type"} = $layer;
16fe6d59 95 }
ac27b0f5
NIS
96 }
97 if ($type eq 'IN') {
7c0e976d
JH
98 _drop_oldenc(*STDIN, @val);
99 $in = join(' ', @val);
ac27b0f5
NIS
100 }
101 elsif ($type eq 'OUT') {
7c0e976d
JH
102 _drop_oldenc(*STDOUT, @val);
103 $out = join(' ', @val);
16fe6d59 104 }
1e616cf5 105 elsif ($type eq 'IO') {
7c0e976d
JH
106 _drop_oldenc(*STDIN, @val);
107 _drop_oldenc(*STDOUT, @val);
108 $in = $out = join(' ', @val);
f3b00462 109 }
16fe6d59 110 else {
e2d9456f 111 croak "Unknown PerlIO layer class '$type'";
16fe6d59
GS
112 }
113 }
7c0e976d 114 ${^OPEN} = join("\0", $in, $out);
b178108d
JH
115 if ($std) {
116 if ($in) {
117 if ($in =~ /:utf8\b/) {
118 binmode(STDIN, ":utf8");
119 } elsif ($in =~ /(\w+\(.+\))/) {
120 binmode(STDIN, ":$1");
121 }
122 }
123 if ($out) {
124 if ($out =~ /:utf8\b/) {
125 binmode(STDOUT, ":utf8");
126 binmode(STDERR, ":utf8");
127 } elsif ($out =~ /(\w+\(.+\))/) {
128 binmode(STDOUT, ":$1");
129 binmode(STDERR, ":$1");
130 }
131 }
132 }
16fe6d59
GS
133}
134
1351;
136__END__
d1edabcf
GS
137
138=head1 NAME
139
e2d9456f 140open - perl pragma to set default PerlIO layers for input and output
d1edabcf
GS
141
142=head1 SYNOPSIS
143
d5563ed7 144 use open IN => ":crlf", OUT => ":bytes";
1e616cf5
JH
145 use open OUT => ':utf8';
146 use open IO => ":encoding(iso-8859-7)";
147
148 use open IO => ':locale';
725d232a 149
740d4bb2 150 use open ':encoding(utf8)';
1e616cf5
JH
151 use open ':locale';
152 use open ':encoding(iso-8859-7)';
d1edabcf 153
b178108d
JH
154 use open ':std';
155
d1edabcf
GS
156=head1 DESCRIPTION
157
e2d9456f 158Full-fledged support for I/O layers is now implemented provided
d151aa0e
JH
159Perl is configured to use PerlIO as its IO system (which is now the
160default).
16fe6d59 161
7d3b96bb 162The C<open> pragma serves as one of the interfaces to declare default
16479489
JH
163"layers" (also known as "disciplines") for all I/O. Any two-argument
164open(), readpipe() (aka qx//) and similar operators found within the
165lexical scope of this pragma will use the declared defaults.
6d5e88a0
ST
166Even three-argument opens may be affected by this pragma
167when they don't specify IO layers in MODE.
7d3b96bb 168
1e616cf5 169With the C<IN> subpragma you can declare the default layers
d8d29d4f 170of input streams, and with the C<OUT> subpragma you can declare
1e616cf5
JH
171the default layers of output streams. With the C<IO> subpragma
172you can control both input and output streams simultaneously.
173
174If you have a legacy encoding, you can use the C<:encoding(...)> tag.
175
6d5e88a0 176If you want to set your encoding layers based on your
1e616cf5
JH
177locale environment variables, you can use the C<:locale> tag.
178For example:
179
180 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41
JH
181 # the :locale will probe the locale environment variables like LANG
182 use open OUT => ':locale';
1e616cf5 183 open(O, ">koi8");
23bcb45a 184 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5
JH
185 close O;
186 open(I, "<koi8");
23bcb45a 187 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5
JH
188 close I;
189
190These are equivalent
191
740d4bb2
JW
192 use open ':encoding(utf8)';
193 use open IO => ':encoding(utf8)';
1e616cf5
JH
194
195as are these
196
197 use open ':locale';
198 use open IO => ':locale';
199
200and these
201
202 use open ':encoding(iso-8859-7)';
203 use open IO => ':encoding(iso-8859-7)';
204
b5d8778e
JH
205The matching of encoding names is loose: case does not matter, and
206many encodings have several aliases. See L<Encode::Supported> for
207details and the list of supported locales.
208
9fe371da
RGS
209When open() is given an explicit list of layers (with the three-arg
210syntax), they override the list declared using this pragma.
7d3b96bb 211
b178108d
JH
212The C<:std> subpragma on its own has no effect, but if combined with
213the C<:utf8> or C<:encoding> subpragmas, it converts the standard
214filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
215for input/output handles. For example, if both input and out are
740d4bb2
JW
216chosen to be C<:encoding(utf8)>, a C<:std> will mean that STDIN, STDOUT,
217and STDERR are also in C<:encoding(utf8)>. On the other hand, if only
218output is chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause
219only the STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
b178108d
JH
220implicitly turns on C<:std>.
221
9fe371da 222The logic of C<:locale> is described in full in L<encoding>,
7c0e976d
JH
223but in short it is first trying nl_langinfo(CODESET) and then
224guessing from the LC_ALL and LANG locale environment variables.
b310b053 225
e2d9456f 226Directory handles may also support PerlIO layers in the future.
7d3b96bb
NIS
227
228=head1 NONPERLIO FUNCTIONALITY
229
d151aa0e 230If Perl is not built to use PerlIO as its IO system then only the two
e2d9456f 231pseudo-layers C<:bytes> and C<:crlf> are available.
16fe6d59 232
e2d9456f
EM
233The C<:bytes> layer corresponds to "binary mode" and the C<:crlf>
234layer corresponds to "text mode" on platforms that distinguish
16fe6d59 235between the two modes when opening files (which is many DOS-like
e2d9456f 236platforms, including Windows). These two layers are no-ops on
d151aa0e
JH
237platforms where binmode() is a no-op, but perform their functions
238everywhere if PerlIO is enabled.
7d3b96bb
NIS
239
240=head1 IMPLEMENTATION DETAILS
d1edabcf 241
f3b00462
JH
242There is a class method in C<PerlIO::Layer> C<find> which is
243implemented as XS code. It is called by C<import> to validate the
244layers:
0c4f7ff0
NIS
245
246 PerlIO::Layer::->find("perlio")
247
f3b00462
JH
248The return value (if defined) is a Perl object, of class
249C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
250yet there is nothing useful you can do with the object at the perl
251level.
16fe6d59 252
d1edabcf
GS
253=head1 SEE ALSO
254
1768d7eb
JH
255L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
256L<encoding>
d1edabcf
GS
257
258=cut