Commit | Line | Data |
---|---|---|
d1edabcf | 1 | package open; |
99ef548b | 2 | use warnings; |
16fe6d59 | 3 | |
c0fd9d21 | 4 | our $VERSION = '1.10'; |
b75c8c73 | 5 | |
7c0e976d | 6 | require 5.008001; # for PerlIO::get_layers() |
58d53262 | 7 | |
b4ebbc94 | 8 | my $locale_encoding; |
a4157ebb | 9 | |
7c0e976d | 10 | sub _get_encname { |
b4ebbc94 | 11 | return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/; |
7c0e976d JH |
12 | return; |
13 | } | |
a4157ebb | 14 | |
8878f897 T |
15 | sub croak { |
16 | require Carp; goto &Carp::croak; | |
17 | } | |
18 | ||
7c0e976d JH |
19 | sub _drop_oldenc { |
20 | # If by the time we arrive here there already is at the top of the | |
21 | # perlio layer stack an encoding identical to what we would like | |
22 | # to push via this open pragma, we will pop away the old encoding | |
23 | # (+utf8) so that we can push ourselves in place (this is easier | |
24 | # than ignoring pushing ourselves because of the way how ${^OPEN} | |
25 | # works). So we are looking for something like | |
26 | # | |
27 | # stdio encoding(xxx) utf8 | |
28 | # | |
29 | # in the existing layer stack, and in the new stack chunk for | |
30 | # | |
31 | # :encoding(xxx) | |
32 | # | |
33 | # If we find a match, we pop the old stack (once, since | |
34 | # the utf8 is just a flag on the encoding layer) | |
35 | my ($h, @new) = @_; | |
36 | return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/; | |
37 | my @old = PerlIO::get_layers($h); | |
38 | return unless @old >= 3 && | |
00243fce | 39 | $old[-1] eq 'utf8' && |
7c0e976d | 40 | $old[-2] =~ /^encoding\(.+\)$/; |
b4ebbc94 | 41 | require Encode; |
7c0e976d JH |
42 | my ($loname, $lcname) = _get_encname($old[-2]); |
43 | unless (defined $lcname) { # Should we trust get_layers()? | |
8878f897 | 44 | croak("open: Unknown encoding '$loname'"); |
7c0e976d JH |
45 | } |
46 | my ($voname, $vcname) = _get_encname($new[-1]); | |
47 | unless (defined $vcname) { | |
8878f897 | 48 | croak("open: Unknown encoding '$voname'"); |
7c0e976d JH |
49 | } |
50 | if ($lcname eq $vcname) { | |
51 | binmode($h, ":pop"); # utf8 is part of the encoding layer | |
58d53262 JH |
52 | } |
53 | } | |
54 | ||
16fe6d59 | 55 | sub import { |
dfebf958 | 56 | my ($class,@args) = @_; |
7c0e976d | 57 | croak("open: needs explicit list of PerlIO layers") unless @args; |
b178108d | 58 | my $std; |
ba6ce41c | 59 | my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); |
dfebf958 NIS |
60 | while (@args) { |
61 | my $type = shift(@args); | |
1e616cf5 JH |
62 | my $dscp; |
63 | if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { | |
64 | $type = 'IO'; | |
65 | $dscp = ":$1"; | |
b178108d JH |
66 | } elsif ($type eq ':std') { |
67 | $std = 1; | |
68 | next; | |
1e616cf5 | 69 | } else { |
725d232a | 70 | $dscp = shift(@args) || ''; |
1e616cf5 | 71 | } |
ac27b0f5 | 72 | my @val; |
1e616cf5 | 73 | foreach my $layer (split(/\s+/,$dscp)) { |
dfebf958 | 74 | $layer =~ s/^://; |
58d53262 | 75 | if ($layer eq 'locale') { |
54cfe943 | 76 | require Encode; |
b4ebbc94 NC |
77 | require encoding; |
78 | $locale_encoding = encoding::_get_locale_encoding() | |
58d53262 | 79 | unless defined $locale_encoding; |
99ef548b | 80 | (warnings::warnif("layer", "Cannot figure out an encoding to use"), last) |
58d53262 | 81 | unless defined $locale_encoding; |
740d4bb2 | 82 | $layer = "encoding($locale_encoding)"; |
b178108d | 83 | $std = 1; |
97ed432b | 84 | } else { |
011f8d22 JH |
85 | my $target = $layer; # the layer name itself |
86 | $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters | |
87 | ||
c7732655 NIS |
88 | unless(PerlIO::Layer::->find($target,1)) { |
89 | warnings::warnif("layer", "Unknown PerlIO layer '$target'"); | |
97ed432b | 90 | } |
ac27b0f5 NIS |
91 | } |
92 | push(@val,":$layer"); | |
93 | if ($layer =~ /^(crlf|raw)$/) { | |
94 | $^H{"open_$type"} = $layer; | |
16fe6d59 | 95 | } |
ac27b0f5 NIS |
96 | } |
97 | if ($type eq 'IN') { | |
73f1eaca | 98 | _drop_oldenc(*STDIN, @val) if $std; |
7c0e976d | 99 | $in = join(' ', @val); |
ac27b0f5 NIS |
100 | } |
101 | elsif ($type eq 'OUT') { | |
73f1eaca FC |
102 | if ($std) { |
103 | _drop_oldenc(*STDOUT, @val); | |
104 | _drop_oldenc(*STDERR, @val); | |
105 | } | |
7c0e976d | 106 | $out = join(' ', @val); |
16fe6d59 | 107 | } |
1e616cf5 | 108 | elsif ($type eq 'IO') { |
73f1eaca FC |
109 | if ($std) { |
110 | _drop_oldenc(*STDIN, @val); | |
111 | _drop_oldenc(*STDOUT, @val); | |
112 | _drop_oldenc(*STDERR, @val); | |
113 | } | |
7c0e976d | 114 | $in = $out = join(' ', @val); |
f3b00462 | 115 | } |
16fe6d59 | 116 | else { |
ab3216d9 | 117 | croak "Unknown PerlIO layer class '$type' (need IN, OUT or IO)"; |
16fe6d59 GS |
118 | } |
119 | } | |
7c0e976d | 120 | ${^OPEN} = join("\0", $in, $out); |
b178108d JH |
121 | if ($std) { |
122 | if ($in) { | |
123 | if ($in =~ /:utf8\b/) { | |
124 | binmode(STDIN, ":utf8"); | |
125 | } elsif ($in =~ /(\w+\(.+\))/) { | |
126 | binmode(STDIN, ":$1"); | |
127 | } | |
128 | } | |
129 | if ($out) { | |
130 | if ($out =~ /:utf8\b/) { | |
131 | binmode(STDOUT, ":utf8"); | |
132 | binmode(STDERR, ":utf8"); | |
133 | } elsif ($out =~ /(\w+\(.+\))/) { | |
134 | binmode(STDOUT, ":$1"); | |
135 | binmode(STDERR, ":$1"); | |
136 | } | |
137 | } | |
138 | } | |
16fe6d59 GS |
139 | } |
140 | ||
141 | 1; | |
142 | __END__ | |
d1edabcf GS |
143 | |
144 | =head1 NAME | |
145 | ||
e2d9456f | 146 | open - perl pragma to set default PerlIO layers for input and output |
d1edabcf GS |
147 | |
148 | =head1 SYNOPSIS | |
149 | ||
d5563ed7 | 150 | use open IN => ":crlf", OUT => ":bytes"; |
1e616cf5 JH |
151 | use open OUT => ':utf8'; |
152 | use open IO => ":encoding(iso-8859-7)"; | |
153 | ||
154 | use open IO => ':locale'; | |
725d232a | 155 | |
740d4bb2 | 156 | use open ':encoding(utf8)'; |
1e616cf5 JH |
157 | use open ':locale'; |
158 | use open ':encoding(iso-8859-7)'; | |
d1edabcf | 159 | |
b178108d JH |
160 | use open ':std'; |
161 | ||
d1edabcf GS |
162 | =head1 DESCRIPTION |
163 | ||
e2d9456f | 164 | Full-fledged support for I/O layers is now implemented provided |
d151aa0e JH |
165 | Perl is configured to use PerlIO as its IO system (which is now the |
166 | default). | |
16fe6d59 | 167 | |
7d3b96bb | 168 | The C<open> pragma serves as one of the interfaces to declare default |
16479489 JH |
169 | "layers" (also known as "disciplines") for all I/O. Any two-argument |
170 | open(), readpipe() (aka qx//) and similar operators found within the | |
171 | lexical scope of this pragma will use the declared defaults. | |
6d5e88a0 TS |
172 | Even three-argument opens may be affected by this pragma |
173 | when they don't specify IO layers in MODE. | |
7d3b96bb | 174 | |
1e616cf5 | 175 | With the C<IN> subpragma you can declare the default layers |
d8d29d4f | 176 | of input streams, and with the C<OUT> subpragma you can declare |
1e616cf5 JH |
177 | the default layers of output streams. With the C<IO> subpragma |
178 | you can control both input and output streams simultaneously. | |
179 | ||
180 | If you have a legacy encoding, you can use the C<:encoding(...)> tag. | |
181 | ||
6d5e88a0 | 182 | If you want to set your encoding layers based on your |
1e616cf5 JH |
183 | locale environment variables, you can use the C<:locale> tag. |
184 | For example: | |
185 | ||
186 | $ENV{LANG} = 'ru_RU.KOI8-R'; | |
dbd62f41 JH |
187 | # the :locale will probe the locale environment variables like LANG |
188 | use open OUT => ':locale'; | |
1e616cf5 | 189 | open(O, ">koi8"); |
23bcb45a | 190 | print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 |
1e616cf5 JH |
191 | close O; |
192 | open(I, "<koi8"); | |
23bcb45a | 193 | printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 |
1e616cf5 JH |
194 | close I; |
195 | ||
196 | These are equivalent | |
197 | ||
740d4bb2 JW |
198 | use open ':encoding(utf8)'; |
199 | use open IO => ':encoding(utf8)'; | |
1e616cf5 JH |
200 | |
201 | as are these | |
202 | ||
203 | use open ':locale'; | |
204 | use open IO => ':locale'; | |
205 | ||
206 | and these | |
207 | ||
208 | use open ':encoding(iso-8859-7)'; | |
209 | use open IO => ':encoding(iso-8859-7)'; | |
210 | ||
b5d8778e JH |
211 | The matching of encoding names is loose: case does not matter, and |
212 | many encodings have several aliases. See L<Encode::Supported> for | |
213 | details and the list of supported locales. | |
214 | ||
9fe371da | 215 | When open() is given an explicit list of layers (with the three-arg |
c0fd9d21 FC |
216 | syntax), they override the list declared using this pragma. open() can |
217 | also be given a single colon (:) for a layer name, to override this pragma | |
218 | and use the default (C<:raw> on Unix, C<:crlf> on Windows). | |
7d3b96bb | 219 | |
b178108d JH |
220 | The C<:std> subpragma on its own has no effect, but if combined with |
221 | the C<:utf8> or C<:encoding> subpragmas, it converts the standard | |
222 | filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected | |
223 | for input/output handles. For example, if both input and out are | |
740d4bb2 JW |
224 | chosen to be C<:encoding(utf8)>, a C<:std> will mean that STDIN, STDOUT, |
225 | and STDERR are also in C<:encoding(utf8)>. On the other hand, if only | |
226 | output is chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause | |
227 | only the STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma | |
b178108d JH |
228 | implicitly turns on C<:std>. |
229 | ||
9fe371da | 230 | The logic of C<:locale> is described in full in L<encoding>, |
7c0e976d JH |
231 | but in short it is first trying nl_langinfo(CODESET) and then |
232 | guessing from the LC_ALL and LANG locale environment variables. | |
b310b053 | 233 | |
e2d9456f | 234 | Directory handles may also support PerlIO layers in the future. |
7d3b96bb NIS |
235 | |
236 | =head1 NONPERLIO FUNCTIONALITY | |
237 | ||
d151aa0e | 238 | If Perl is not built to use PerlIO as its IO system then only the two |
e2d9456f | 239 | pseudo-layers C<:bytes> and C<:crlf> are available. |
16fe6d59 | 240 | |
e2d9456f EM |
241 | The C<:bytes> layer corresponds to "binary mode" and the C<:crlf> |
242 | layer corresponds to "text mode" on platforms that distinguish | |
16fe6d59 | 243 | between the two modes when opening files (which is many DOS-like |
e2d9456f | 244 | platforms, including Windows). These two layers are no-ops on |
d151aa0e JH |
245 | platforms where binmode() is a no-op, but perform their functions |
246 | everywhere if PerlIO is enabled. | |
7d3b96bb NIS |
247 | |
248 | =head1 IMPLEMENTATION DETAILS | |
d1edabcf | 249 | |
f3b00462 JH |
250 | There is a class method in C<PerlIO::Layer> C<find> which is |
251 | implemented as XS code. It is called by C<import> to validate the | |
252 | layers: | |
0c4f7ff0 NIS |
253 | |
254 | PerlIO::Layer::->find("perlio") | |
255 | ||
f3b00462 JH |
256 | The return value (if defined) is a Perl object, of class |
257 | C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As | |
258 | yet there is nothing useful you can do with the object at the perl | |
259 | level. | |
16fe6d59 | 260 | |
d1edabcf GS |
261 | =head1 SEE ALSO |
262 | ||
1768d7eb JH |
263 | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, |
264 | L<encoding> | |
d1edabcf GS |
265 | |
266 | =cut |