| 1 | package open; |
| 2 | use warnings; |
| 3 | |
| 4 | our $VERSION = '1.10'; |
| 5 | |
| 6 | require 5.008001; # for PerlIO::get_layers() |
| 7 | |
| 8 | my $locale_encoding; |
| 9 | |
| 10 | sub _get_encname { |
| 11 | return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/; |
| 12 | return; |
| 13 | } |
| 14 | |
| 15 | sub croak { |
| 16 | require Carp; goto &Carp::croak; |
| 17 | } |
| 18 | |
| 19 | sub _drop_oldenc { |
| 20 | # If by the time we arrive here there already is at the top of the |
| 21 | # perlio layer stack an encoding identical to what we would like |
| 22 | # to push via this open pragma, we will pop away the old encoding |
| 23 | # (+utf8) so that we can push ourselves in place (this is easier |
| 24 | # than ignoring pushing ourselves because of the way how ${^OPEN} |
| 25 | # works). So we are looking for something like |
| 26 | # |
| 27 | # stdio encoding(xxx) utf8 |
| 28 | # |
| 29 | # in the existing layer stack, and in the new stack chunk for |
| 30 | # |
| 31 | # :encoding(xxx) |
| 32 | # |
| 33 | # If we find a match, we pop the old stack (once, since |
| 34 | # the utf8 is just a flag on the encoding layer) |
| 35 | my ($h, @new) = @_; |
| 36 | return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/; |
| 37 | my @old = PerlIO::get_layers($h); |
| 38 | return unless @old >= 3 && |
| 39 | $old[-1] eq 'utf8' && |
| 40 | $old[-2] =~ /^encoding\(.+\)$/; |
| 41 | require Encode; |
| 42 | my ($loname, $lcname) = _get_encname($old[-2]); |
| 43 | unless (defined $lcname) { # Should we trust get_layers()? |
| 44 | croak("open: Unknown encoding '$loname'"); |
| 45 | } |
| 46 | my ($voname, $vcname) = _get_encname($new[-1]); |
| 47 | unless (defined $vcname) { |
| 48 | croak("open: Unknown encoding '$voname'"); |
| 49 | } |
| 50 | if ($lcname eq $vcname) { |
| 51 | binmode($h, ":pop"); # utf8 is part of the encoding layer |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | sub import { |
| 56 | my ($class,@args) = @_; |
| 57 | croak("open: needs explicit list of PerlIO layers") unless @args; |
| 58 | my $std; |
| 59 | my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); |
| 60 | while (@args) { |
| 61 | my $type = shift(@args); |
| 62 | my $dscp; |
| 63 | if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { |
| 64 | $type = 'IO'; |
| 65 | $dscp = ":$1"; |
| 66 | } elsif ($type eq ':std') { |
| 67 | $std = 1; |
| 68 | next; |
| 69 | } else { |
| 70 | $dscp = shift(@args) || ''; |
| 71 | } |
| 72 | my @val; |
| 73 | foreach my $layer (split(/\s+/,$dscp)) { |
| 74 | $layer =~ s/^://; |
| 75 | if ($layer eq 'locale') { |
| 76 | require Encode; |
| 77 | require encoding; |
| 78 | $locale_encoding = encoding::_get_locale_encoding() |
| 79 | unless defined $locale_encoding; |
| 80 | (warnings::warnif("layer", "Cannot figure out an encoding to use"), last) |
| 81 | unless defined $locale_encoding; |
| 82 | $layer = "encoding($locale_encoding)"; |
| 83 | $std = 1; |
| 84 | } else { |
| 85 | my $target = $layer; # the layer name itself |
| 86 | $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters |
| 87 | |
| 88 | unless(PerlIO::Layer::->find($target,1)) { |
| 89 | warnings::warnif("layer", "Unknown PerlIO layer '$target'"); |
| 90 | } |
| 91 | } |
| 92 | push(@val,":$layer"); |
| 93 | if ($layer =~ /^(crlf|raw)$/) { |
| 94 | $^H{"open_$type"} = $layer; |
| 95 | } |
| 96 | } |
| 97 | if ($type eq 'IN') { |
| 98 | _drop_oldenc(*STDIN, @val) if $std; |
| 99 | $in = join(' ', @val); |
| 100 | } |
| 101 | elsif ($type eq 'OUT') { |
| 102 | if ($std) { |
| 103 | _drop_oldenc(*STDOUT, @val); |
| 104 | _drop_oldenc(*STDERR, @val); |
| 105 | } |
| 106 | $out = join(' ', @val); |
| 107 | } |
| 108 | elsif ($type eq 'IO') { |
| 109 | if ($std) { |
| 110 | _drop_oldenc(*STDIN, @val); |
| 111 | _drop_oldenc(*STDOUT, @val); |
| 112 | _drop_oldenc(*STDERR, @val); |
| 113 | } |
| 114 | $in = $out = join(' ', @val); |
| 115 | } |
| 116 | else { |
| 117 | croak "Unknown PerlIO layer class '$type' (need IN, OUT or IO)"; |
| 118 | } |
| 119 | } |
| 120 | ${^OPEN} = join("\0", $in, $out); |
| 121 | if ($std) { |
| 122 | if ($in) { |
| 123 | if ($in =~ /:utf8\b/) { |
| 124 | binmode(STDIN, ":utf8"); |
| 125 | } elsif ($in =~ /(\w+\(.+\))/) { |
| 126 | binmode(STDIN, ":$1"); |
| 127 | } |
| 128 | } |
| 129 | if ($out) { |
| 130 | if ($out =~ /:utf8\b/) { |
| 131 | binmode(STDOUT, ":utf8"); |
| 132 | binmode(STDERR, ":utf8"); |
| 133 | } elsif ($out =~ /(\w+\(.+\))/) { |
| 134 | binmode(STDOUT, ":$1"); |
| 135 | binmode(STDERR, ":$1"); |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | 1; |
| 142 | __END__ |
| 143 | |
| 144 | =head1 NAME |
| 145 | |
| 146 | open - perl pragma to set default PerlIO layers for input and output |
| 147 | |
| 148 | =head1 SYNOPSIS |
| 149 | |
| 150 | use open IN => ":crlf", OUT => ":bytes"; |
| 151 | use open OUT => ':utf8'; |
| 152 | use open IO => ":encoding(iso-8859-7)"; |
| 153 | |
| 154 | use open IO => ':locale'; |
| 155 | |
| 156 | use open ':encoding(utf8)'; |
| 157 | use open ':locale'; |
| 158 | use open ':encoding(iso-8859-7)'; |
| 159 | |
| 160 | use open ':std'; |
| 161 | |
| 162 | =head1 DESCRIPTION |
| 163 | |
| 164 | Full-fledged support for I/O layers is now implemented provided |
| 165 | Perl is configured to use PerlIO as its IO system (which is now the |
| 166 | default). |
| 167 | |
| 168 | The C<open> pragma serves as one of the interfaces to declare default |
| 169 | "layers" (also known as "disciplines") for all I/O. Any two-argument |
| 170 | open(), readpipe() (aka qx//) and similar operators found within the |
| 171 | lexical scope of this pragma will use the declared defaults. |
| 172 | Even three-argument opens may be affected by this pragma |
| 173 | when they don't specify IO layers in MODE. |
| 174 | |
| 175 | With the C<IN> subpragma you can declare the default layers |
| 176 | of input streams, and with the C<OUT> subpragma you can declare |
| 177 | the default layers of output streams. With the C<IO> subpragma |
| 178 | you can control both input and output streams simultaneously. |
| 179 | |
| 180 | If you have a legacy encoding, you can use the C<:encoding(...)> tag. |
| 181 | |
| 182 | If you want to set your encoding layers based on your |
| 183 | locale environment variables, you can use the C<:locale> tag. |
| 184 | For example: |
| 185 | |
| 186 | $ENV{LANG} = 'ru_RU.KOI8-R'; |
| 187 | # the :locale will probe the locale environment variables like LANG |
| 188 | use open OUT => ':locale'; |
| 189 | open(O, ">koi8"); |
| 190 | print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 |
| 191 | close O; |
| 192 | open(I, "<koi8"); |
| 193 | printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 |
| 194 | close I; |
| 195 | |
| 196 | These are equivalent |
| 197 | |
| 198 | use open ':encoding(utf8)'; |
| 199 | use open IO => ':encoding(utf8)'; |
| 200 | |
| 201 | as are these |
| 202 | |
| 203 | use open ':locale'; |
| 204 | use open IO => ':locale'; |
| 205 | |
| 206 | and these |
| 207 | |
| 208 | use open ':encoding(iso-8859-7)'; |
| 209 | use open IO => ':encoding(iso-8859-7)'; |
| 210 | |
| 211 | The matching of encoding names is loose: case does not matter, and |
| 212 | many encodings have several aliases. See L<Encode::Supported> for |
| 213 | details and the list of supported locales. |
| 214 | |
| 215 | When open() is given an explicit list of layers (with the three-arg |
| 216 | syntax), they override the list declared using this pragma. open() can |
| 217 | also be given a single colon (:) for a layer name, to override this pragma |
| 218 | and use the default (C<:raw> on Unix, C<:crlf> on Windows). |
| 219 | |
| 220 | The C<:std> subpragma on its own has no effect, but if combined with |
| 221 | the C<:utf8> or C<:encoding> subpragmas, it converts the standard |
| 222 | filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected |
| 223 | for input/output handles. For example, if both input and out are |
| 224 | chosen to be C<:encoding(utf8)>, a C<:std> will mean that STDIN, STDOUT, |
| 225 | and STDERR are also in C<:encoding(utf8)>. On the other hand, if only |
| 226 | output is chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause |
| 227 | only the STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma |
| 228 | implicitly turns on C<:std>. |
| 229 | |
| 230 | The logic of C<:locale> is described in full in L<encoding>, |
| 231 | but in short it is first trying nl_langinfo(CODESET) and then |
| 232 | guessing from the LC_ALL and LANG locale environment variables. |
| 233 | |
| 234 | Directory handles may also support PerlIO layers in the future. |
| 235 | |
| 236 | =head1 NONPERLIO FUNCTIONALITY |
| 237 | |
| 238 | If Perl is not built to use PerlIO as its IO system then only the two |
| 239 | pseudo-layers C<:bytes> and C<:crlf> are available. |
| 240 | |
| 241 | The C<:bytes> layer corresponds to "binary mode" and the C<:crlf> |
| 242 | layer corresponds to "text mode" on platforms that distinguish |
| 243 | between the two modes when opening files (which is many DOS-like |
| 244 | platforms, including Windows). These two layers are no-ops on |
| 245 | platforms where binmode() is a no-op, but perform their functions |
| 246 | everywhere if PerlIO is enabled. |
| 247 | |
| 248 | =head1 IMPLEMENTATION DETAILS |
| 249 | |
| 250 | There is a class method in C<PerlIO::Layer> C<find> which is |
| 251 | implemented as XS code. It is called by C<import> to validate the |
| 252 | layers: |
| 253 | |
| 254 | PerlIO::Layer::->find("perlio") |
| 255 | |
| 256 | The return value (if defined) is a Perl object, of class |
| 257 | C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As |
| 258 | yet there is nothing useful you can do with the object at the perl |
| 259 | level. |
| 260 | |
| 261 | =head1 SEE ALSO |
| 262 | |
| 263 | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, |
| 264 | L<encoding> |
| 265 | |
| 266 | =cut |