This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make writing user-defined character properties nicer.
[perl5.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
99ef548b 2use warnings;
ac27b0f5 3use Carp;
16fe6d59
GS
4$open::hint_bits = 0x20000;
5
0c4f7ff0 6our $VERSION = '1.01';
b75c8c73 7
58d53262
JH
8my $locale_encoding;
9
b178108d 10sub in_locale { $^H & ($locale::hint_bits || 0)}
58d53262
JH
11
12sub _get_locale_encoding {
13 unless (defined $locale_encoding) {
276c9210 14 # I18N::Langinfo isn't available everywhere
9615f2ee
BT
15 eval {
16 require I18N::Langinfo;
17 I18N::Langinfo->import(qw(langinfo CODESET));
ba6ce41c 18 $locale_encoding = langinfo(CODESET());
9615f2ee 19 };
11fc5dc3 20 my $country_language;
a4157ebb
JH
21
22 no warnings 'uninitialized';
23
58d53262 24 if (not $locale_encoding && in_locale()) {
11fc5dc3
JH
25 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
26 ($country_language, $locale_encoding) = ($1, $2);
27 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
28 ($country_language, $locale_encoding) = ($1, $2);
58d53262 29 }
1e616cf5
JH
30 } elsif (not $locale_encoding) {
31 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
32 $ENV{LANG} =~ /\butf-?8\b/i) {
33 $locale_encoding = 'utf8';
34 }
35 # Could do more heuristics based on the country and language
58d53262
JH
36 # parts of LC_ALL and LANG (the parts before the dot (if any)),
37 # since we have Locale::Country and Locale::Language available.
38 # TODO: get a database of Language -> Encoding mappings
421e5dc3
JH
39 # (the Estonian database at http://www.eki.ee/letter/
40 # would be excellent!) --jhi
58d53262 41 }
11fc5dc3
JH
42 if (defined $locale_encoding &&
43 $locale_encoding eq 'euc' &&
44 defined $country_language) {
56fb2e42 45 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
1e616cf5 46 $locale_encoding = 'euc-jp';
5a192dee 47 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
1e616cf5 48 $locale_encoding = 'euc-kr';
a4157ebb
JH
49 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
50 $locale_encoding = 'euc-cn';
56fb2e42 51 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
011f8d22 52 $locale_encoding = 'euc-tw';
11fc5dc3
JH
53 }
54 croak "Locale encoding 'euc' too ambiguous"
55 if $locale_encoding eq 'euc';
56 }
58d53262
JH
57 }
58}
59
16fe6d59 60sub import {
dfebf958
NIS
61 my ($class,@args) = @_;
62 croak("`use open' needs explicit list of disciplines") unless @args;
b178108d 63 my $std;
16fe6d59 64 $^H |= $open::hint_bits;
ba6ce41c 65 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958
NIS
66 while (@args) {
67 my $type = shift(@args);
1e616cf5
JH
68 my $dscp;
69 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
70 $type = 'IO';
71 $dscp = ":$1";
b178108d
JH
72 } elsif ($type eq ':std') {
73 $std = 1;
74 next;
1e616cf5 75 } else {
725d232a 76 $dscp = shift(@args) || '';
1e616cf5 77 }
ac27b0f5 78 my @val;
1e616cf5 79 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 80 $layer =~ s/^://;
58d53262
JH
81 if ($layer eq 'locale') {
82 use Encode;
83 _get_locale_encoding()
84 unless defined $locale_encoding;
99ef548b 85 (warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
58d53262 86 unless defined $locale_encoding;
11fc5dc3
JH
87 if ($locale_encoding =~ /^utf-?8$/i) {
88 $layer = "utf8";
89 } else {
738b23dc 90 $layer = "encoding($locale_encoding)";
11fc5dc3 91 }
b178108d 92 $std = 1;
97ed432b 93 } else {
011f8d22
JH
94 my $target = $layer; # the layer name itself
95 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
96
97 unless(PerlIO::Layer::->find($target)) {
99ef548b 98 warnings::warnif("layer", "Unknown discipline layer '$layer'");
97ed432b 99 }
ac27b0f5
NIS
100 }
101 push(@val,":$layer");
102 if ($layer =~ /^(crlf|raw)$/) {
103 $^H{"open_$type"} = $layer;
16fe6d59 104 }
ac27b0f5
NIS
105 }
106 if ($type eq 'IN') {
107 $in = join(' ',@val);
108 }
109 elsif ($type eq 'OUT') {
110 $out = join(' ',@val);
16fe6d59 111 }
1e616cf5 112 elsif ($type eq 'IO') {
f3b00462
JH
113 $in = $out = join(' ',@val);
114 }
16fe6d59 115 else {
ac27b0f5 116 croak "Unknown discipline class '$type'";
16fe6d59
GS
117 }
118 }
a4157ebb 119 ${^OPEN} = join("\0",$in,$out) if $in or $out;
b178108d
JH
120 if ($std) {
121 if ($in) {
122 if ($in =~ /:utf8\b/) {
123 binmode(STDIN, ":utf8");
124 } elsif ($in =~ /(\w+\(.+\))/) {
125 binmode(STDIN, ":$1");
126 }
127 }
128 if ($out) {
129 if ($out =~ /:utf8\b/) {
130 binmode(STDOUT, ":utf8");
131 binmode(STDERR, ":utf8");
132 } elsif ($out =~ /(\w+\(.+\))/) {
133 binmode(STDOUT, ":$1");
134 binmode(STDERR, ":$1");
135 }
136 }
137 }
16fe6d59
GS
138}
139
1401;
141__END__
d1edabcf
GS
142
143=head1 NAME
144
145open - perl pragma to set default disciplines for input and output
146
147=head1 SYNOPSIS
148
1e616cf5
JH
149 use open IN => ":crlf", OUT => ":raw";
150 use open OUT => ':utf8';
151 use open IO => ":encoding(iso-8859-7)";
152
153 use open IO => ':locale';
725d232a 154
1e616cf5
JH
155 use open ':utf8';
156 use open ':locale';
157 use open ':encoding(iso-8859-7)';
d1edabcf 158
b178108d
JH
159 use open ':std';
160
d1edabcf
GS
161=head1 DESCRIPTION
162
d151aa0e
JH
163Full-fledged support for I/O disciplines is now implemented provided
164Perl is configured to use PerlIO as its IO system (which is now the
165default).
16fe6d59 166
7d3b96bb
NIS
167The C<open> pragma serves as one of the interfaces to declare default
168"layers" (aka disciplines) for all I/O.
169
170The C<open> pragma is used to declare one or more default layers for
d151aa0e
JH
171I/O operations. Any open(), readpipe() (aka qx//) and similar
172operators found within the lexical scope of this pragma will use the
173declared defaults.
7d3b96bb 174
1e616cf5 175With the C<IN> subpragma you can declare the default layers
d8d29d4f 176of input streams, and with the C<OUT> subpragma you can declare
1e616cf5
JH
177the default layers of output streams. With the C<IO> subpragma
178you can control both input and output streams simultaneously.
179
180If you have a legacy encoding, you can use the C<:encoding(...)> tag.
181
182if you want to set your encoding disciplines based on your
183locale environment variables, you can use the C<:locale> tag.
184For example:
185
186 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41
JH
187 # the :locale will probe the locale environment variables like LANG
188 use open OUT => ':locale';
1e616cf5 189 open(O, ">koi8");
23bcb45a 190 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5
JH
191 close O;
192 open(I, "<koi8");
23bcb45a 193 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5
JH
194 close I;
195
196These are equivalent
197
198 use open ':utf8';
199 use open IO => ':utf8';
200
201as are these
202
203 use open ':locale';
204 use open IO => ':locale';
205
206and these
207
208 use open ':encoding(iso-8859-7)';
209 use open IO => ':encoding(iso-8859-7)';
210
d151aa0e
JH
211When open() is given an explicit list of layers they are appended to
212the list declared using this pragma.
7d3b96bb 213
b178108d
JH
214The C<:std> subpragma on its own has no effect, but if combined with
215the C<:utf8> or C<:encoding> subpragmas, it converts the standard
216filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
217for input/output handles. For example, if both input and out are
218chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
219STDERR are also in C<:utf8>. On the other hand, if only output is
fb80c70c 220chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
b178108d
JH
221STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
222implicitly turns on C<:std>.
223
ba9a69eb
JH
224The logic of C<:locale> is as follows:
225
226=over 4
227
228=item 1.
229
230If the platform supports the langinfo(CODESET) interface, the codeset
231returned is used as the default encoding for the open pragma.
232
233=item 2.
234
235If 1. didn't work but we are under the locale pragma, the environment
236variables LC_ALL and LANG (in that order) are matched for encodings
237(the part after C<.>, if any), and if any found, that is used
238as the default encoding for the open pragma.
239
240=item 3.
241
242If 1. and 2. didn't work, the environment variables LC_ALL and LANG
243(in that order) are matched for anything looking like UTF-8, and if
244any found, C<:utf8> is used as the default encoding for the open
245pragma.
246
247=back
248
b310b053
JH
249If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
250contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
251the default encoding of your STDIN, STDOUT, and STDERR, and of
252B<any subsequent file open>, is UTF-8.
253
7d3b96bb
NIS
254Directory handles may also support disciplines in future.
255
256=head1 NONPERLIO FUNCTIONALITY
257
d151aa0e
JH
258If Perl is not built to use PerlIO as its IO system then only the two
259pseudo-disciplines ":raw" and ":crlf" are available.
16fe6d59
GS
260
261The ":raw" discipline corresponds to "binary mode" and the ":crlf"
262discipline corresponds to "text mode" on platforms that distinguish
263between the two modes when opening files (which is many DOS-like
d151aa0e
JH
264platforms, including Windows). These two disciplines are no-ops on
265platforms where binmode() is a no-op, but perform their functions
266everywhere if PerlIO is enabled.
7d3b96bb
NIS
267
268=head1 IMPLEMENTATION DETAILS
d1edabcf 269
f3b00462
JH
270There is a class method in C<PerlIO::Layer> C<find> which is
271implemented as XS code. It is called by C<import> to validate the
272layers:
0c4f7ff0
NIS
273
274 PerlIO::Layer::->find("perlio")
275
f3b00462
JH
276The return value (if defined) is a Perl object, of class
277C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
278yet there is nothing useful you can do with the object at the perl
279level.
16fe6d59 280
d1edabcf
GS
281=head1 SEE ALSO
282
1768d7eb
JH
283L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
284L<encoding>
d1edabcf
GS
285
286=cut