This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
More maneuvering closer to proposed POSIX classes
[perl5.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
99ef548b 2use warnings;
ac27b0f5 3use Carp;
9cfe5470 4$open::hint_bits = 0x20000; # HINT_LOCALIZE_HH
16fe6d59 5
0c4f7ff0 6our $VERSION = '1.01';
b75c8c73 7
58d53262
JH
8my $locale_encoding;
9
b178108d 10sub in_locale { $^H & ($locale::hint_bits || 0)}
58d53262
JH
11
12sub _get_locale_encoding {
13 unless (defined $locale_encoding) {
276c9210 14 # I18N::Langinfo isn't available everywhere
9615f2ee
BT
15 eval {
16 require I18N::Langinfo;
17 I18N::Langinfo->import(qw(langinfo CODESET));
ba6ce41c 18 $locale_encoding = langinfo(CODESET());
9615f2ee 19 };
11fc5dc3 20 my $country_language;
a4157ebb
JH
21
22 no warnings 'uninitialized';
23
58d53262 24 if (not $locale_encoding && in_locale()) {
11fc5dc3
JH
25 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
26 ($country_language, $locale_encoding) = ($1, $2);
27 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
28 ($country_language, $locale_encoding) = ($1, $2);
58d53262 29 }
61de9fb5 30 # LANGUAGE affects only LC_MESSAGES only on glibc
1e616cf5
JH
31 } elsif (not $locale_encoding) {
32 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
33 $ENV{LANG} =~ /\butf-?8\b/i) {
34 $locale_encoding = 'utf8';
35 }
36 # Could do more heuristics based on the country and language
58d53262
JH
37 # parts of LC_ALL and LANG (the parts before the dot (if any)),
38 # since we have Locale::Country and Locale::Language available.
39 # TODO: get a database of Language -> Encoding mappings
421e5dc3
JH
40 # (the Estonian database at http://www.eki.ee/letter/
41 # would be excellent!) --jhi
58d53262 42 }
11fc5dc3
JH
43 if (defined $locale_encoding &&
44 $locale_encoding eq 'euc' &&
45 defined $country_language) {
56fb2e42 46 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
1e616cf5 47 $locale_encoding = 'euc-jp';
5a192dee 48 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
1e616cf5 49 $locale_encoding = 'euc-kr';
a4157ebb
JH
50 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
51 $locale_encoding = 'euc-cn';
56fb2e42 52 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
011f8d22 53 $locale_encoding = 'euc-tw';
11fc5dc3
JH
54 }
55 croak "Locale encoding 'euc' too ambiguous"
56 if $locale_encoding eq 'euc';
57 }
58d53262
JH
58 }
59}
60
16fe6d59 61sub import {
dfebf958 62 my ($class,@args) = @_;
e2d9456f 63 croak("`use open' needs explicit list of PerlIO layers") unless @args;
b178108d 64 my $std;
16fe6d59 65 $^H |= $open::hint_bits;
ba6ce41c 66 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958
NIS
67 while (@args) {
68 my $type = shift(@args);
1e616cf5
JH
69 my $dscp;
70 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
71 $type = 'IO';
72 $dscp = ":$1";
b178108d
JH
73 } elsif ($type eq ':std') {
74 $std = 1;
75 next;
1e616cf5 76 } else {
725d232a 77 $dscp = shift(@args) || '';
1e616cf5 78 }
ac27b0f5 79 my @val;
1e616cf5 80 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 81 $layer =~ s/^://;
58d53262
JH
82 if ($layer eq 'locale') {
83 use Encode;
84 _get_locale_encoding()
85 unless defined $locale_encoding;
99ef548b 86 (warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
58d53262 87 unless defined $locale_encoding;
11fc5dc3
JH
88 if ($locale_encoding =~ /^utf-?8$/i) {
89 $layer = "utf8";
90 } else {
738b23dc 91 $layer = "encoding($locale_encoding)";
11fc5dc3 92 }
b178108d 93 $std = 1;
97ed432b 94 } else {
011f8d22
JH
95 my $target = $layer; # the layer name itself
96 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
97
c7732655
NIS
98 unless(PerlIO::Layer::->find($target,1)) {
99 warnings::warnif("layer", "Unknown PerlIO layer '$target'");
97ed432b 100 }
ac27b0f5
NIS
101 }
102 push(@val,":$layer");
103 if ($layer =~ /^(crlf|raw)$/) {
104 $^H{"open_$type"} = $layer;
16fe6d59 105 }
ac27b0f5
NIS
106 }
107 if ($type eq 'IN') {
108 $in = join(' ',@val);
109 }
110 elsif ($type eq 'OUT') {
111 $out = join(' ',@val);
16fe6d59 112 }
1e616cf5 113 elsif ($type eq 'IO') {
f3b00462
JH
114 $in = $out = join(' ',@val);
115 }
16fe6d59 116 else {
e2d9456f 117 croak "Unknown PerlIO layer class '$type'";
16fe6d59
GS
118 }
119 }
a4157ebb 120 ${^OPEN} = join("\0",$in,$out) if $in or $out;
b178108d
JH
121 if ($std) {
122 if ($in) {
123 if ($in =~ /:utf8\b/) {
124 binmode(STDIN, ":utf8");
125 } elsif ($in =~ /(\w+\(.+\))/) {
126 binmode(STDIN, ":$1");
127 }
128 }
129 if ($out) {
130 if ($out =~ /:utf8\b/) {
131 binmode(STDOUT, ":utf8");
132 binmode(STDERR, ":utf8");
133 } elsif ($out =~ /(\w+\(.+\))/) {
134 binmode(STDOUT, ":$1");
135 binmode(STDERR, ":$1");
136 }
137 }
138 }
16fe6d59
GS
139}
140
1411;
142__END__
d1edabcf
GS
143
144=head1 NAME
145
e2d9456f 146open - perl pragma to set default PerlIO layers for input and output
d1edabcf
GS
147
148=head1 SYNOPSIS
149
d5563ed7 150 use open IN => ":crlf", OUT => ":bytes";
1e616cf5
JH
151 use open OUT => ':utf8';
152 use open IO => ":encoding(iso-8859-7)";
153
154 use open IO => ':locale';
725d232a 155
1e616cf5
JH
156 use open ':utf8';
157 use open ':locale';
158 use open ':encoding(iso-8859-7)';
d1edabcf 159
b178108d
JH
160 use open ':std';
161
d1edabcf
GS
162=head1 DESCRIPTION
163
e2d9456f 164Full-fledged support for I/O layers is now implemented provided
d151aa0e
JH
165Perl is configured to use PerlIO as its IO system (which is now the
166default).
16fe6d59 167
7d3b96bb 168The C<open> pragma serves as one of the interfaces to declare default
fae2c0fb
RGS
169"layers" (also known as "disciplines") for all I/O. Any open(),
170readpipe() (aka qx//) and similar operators found within the lexical
171scope of this pragma will use the declared defaults.
7d3b96bb 172
1e616cf5 173With the C<IN> subpragma you can declare the default layers
d8d29d4f 174of input streams, and with the C<OUT> subpragma you can declare
1e616cf5
JH
175the default layers of output streams. With the C<IO> subpragma
176you can control both input and output streams simultaneously.
177
178If you have a legacy encoding, you can use the C<:encoding(...)> tag.
179
e2d9456f 180if you want to set your encoding layers based on your
1e616cf5
JH
181locale environment variables, you can use the C<:locale> tag.
182For example:
183
184 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41
JH
185 # the :locale will probe the locale environment variables like LANG
186 use open OUT => ':locale';
1e616cf5 187 open(O, ">koi8");
23bcb45a 188 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5
JH
189 close O;
190 open(I, "<koi8");
23bcb45a 191 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5
JH
192 close I;
193
194These are equivalent
195
196 use open ':utf8';
197 use open IO => ':utf8';
198
199as are these
200
201 use open ':locale';
202 use open IO => ':locale';
203
204and these
205
206 use open ':encoding(iso-8859-7)';
207 use open IO => ':encoding(iso-8859-7)';
208
b5d8778e
JH
209The matching of encoding names is loose: case does not matter, and
210many encodings have several aliases. See L<Encode::Supported> for
211details and the list of supported locales.
212
e2d9456f 213Note that C<:utf8> PerlIO layer must always be specified exactly like
b5d8778e
JH
214that, it is not subject to the loose matching of encoding names.
215
d151aa0e
JH
216When open() is given an explicit list of layers they are appended to
217the list declared using this pragma.
7d3b96bb 218
b178108d
JH
219The C<:std> subpragma on its own has no effect, but if combined with
220the C<:utf8> or C<:encoding> subpragmas, it converts the standard
221filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
222for input/output handles. For example, if both input and out are
223chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
224STDERR are also in C<:utf8>. On the other hand, if only output is
fb80c70c 225chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
b178108d
JH
226STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
227implicitly turns on C<:std>.
228
ba9a69eb
JH
229The logic of C<:locale> is as follows:
230
231=over 4
232
233=item 1.
234
235If the platform supports the langinfo(CODESET) interface, the codeset
236returned is used as the default encoding for the open pragma.
237
238=item 2.
239
240If 1. didn't work but we are under the locale pragma, the environment
241variables LC_ALL and LANG (in that order) are matched for encodings
242(the part after C<.>, if any), and if any found, that is used
243as the default encoding for the open pragma.
244
245=item 3.
246
247If 1. and 2. didn't work, the environment variables LC_ALL and LANG
248(in that order) are matched for anything looking like UTF-8, and if
249any found, C<:utf8> is used as the default encoding for the open
250pragma.
251
252=back
253
61de9fb5 254If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
b310b053
JH
255contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
256the default encoding of your STDIN, STDOUT, and STDERR, and of
257B<any subsequent file open>, is UTF-8.
258
e2d9456f 259Directory handles may also support PerlIO layers in the future.
7d3b96bb
NIS
260
261=head1 NONPERLIO FUNCTIONALITY
262
d151aa0e 263If Perl is not built to use PerlIO as its IO system then only the two
e2d9456f 264pseudo-layers C<:bytes> and C<:crlf> are available.
16fe6d59 265
e2d9456f
EM
266The C<:bytes> layer corresponds to "binary mode" and the C<:crlf>
267layer corresponds to "text mode" on platforms that distinguish
16fe6d59 268between the two modes when opening files (which is many DOS-like
e2d9456f 269platforms, including Windows). These two layers are no-ops on
d151aa0e
JH
270platforms where binmode() is a no-op, but perform their functions
271everywhere if PerlIO is enabled.
7d3b96bb
NIS
272
273=head1 IMPLEMENTATION DETAILS
d1edabcf 274
f3b00462
JH
275There is a class method in C<PerlIO::Layer> C<find> which is
276implemented as XS code. It is called by C<import> to validate the
277layers:
0c4f7ff0
NIS
278
279 PerlIO::Layer::->find("perlio")
280
f3b00462
JH
281The return value (if defined) is a Perl object, of class
282C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
283yet there is nothing useful you can do with the object at the perl
284level.
16fe6d59 285
d1edabcf
GS
286=head1 SEE ALSO
287
1768d7eb
JH
288L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
289L<encoding>
d1edabcf
GS
290
291=cut