Commit | Line | Data |
---|---|---|
d1edabcf | 1 | package open; |
ac27b0f5 | 2 | use Carp; |
16fe6d59 GS |
3 | $open::hint_bits = 0x20000; |
4 | ||
0c4f7ff0 | 5 | our $VERSION = '1.01'; |
b75c8c73 | 6 | |
58d53262 JH |
7 | my $locale_encoding; |
8 | ||
9 | sub in_locale { $^H & $locale::hint_bits } | |
10 | ||
11 | sub _get_locale_encoding { | |
12 | unless (defined $locale_encoding) { | |
276c9210 | 13 | # I18N::Langinfo isn't available everywhere |
9615f2ee BT |
14 | eval { |
15 | require I18N::Langinfo; | |
16 | I18N::Langinfo->import(qw(langinfo CODESET)); | |
ba6ce41c | 17 | $locale_encoding = langinfo(CODESET()); |
9615f2ee BT |
18 | }; |
19 | unless ($@) { | |
20 | print "# locale_encoding = $locale_encoding\n"; | |
58d53262 | 21 | } |
11fc5dc3 | 22 | my $country_language; |
58d53262 | 23 | if (not $locale_encoding && in_locale()) { |
11fc5dc3 JH |
24 | if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) { |
25 | ($country_language, $locale_encoding) = ($1, $2); | |
26 | } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) { | |
27 | ($country_language, $locale_encoding) = ($1, $2); | |
58d53262 | 28 | } |
1e616cf5 JH |
29 | } elsif (not $locale_encoding) { |
30 | if ($ENV{LC_ALL} =~ /\butf-?8\b/i || | |
31 | $ENV{LANG} =~ /\butf-?8\b/i) { | |
32 | $locale_encoding = 'utf8'; | |
33 | } | |
34 | # Could do more heuristics based on the country and language | |
58d53262 JH |
35 | # parts of LC_ALL and LANG (the parts before the dot (if any)), |
36 | # since we have Locale::Country and Locale::Language available. | |
37 | # TODO: get a database of Language -> Encoding mappings | |
421e5dc3 JH |
38 | # (the Estonian database at http://www.eki.ee/letter/ |
39 | # would be excellent!) --jhi | |
58d53262 | 40 | } |
11fc5dc3 JH |
41 | if (defined $locale_encoding && |
42 | $locale_encoding eq 'euc' && | |
43 | defined $country_language) { | |
56fb2e42 | 44 | if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) { |
1e616cf5 | 45 | $locale_encoding = 'euc-jp'; |
5a192dee | 46 | } elsif ($country_language =~ /^ko_KR|korean?$/i) { |
1e616cf5 | 47 | $locale_encoding = 'euc-kr'; |
56fb2e42 | 48 | } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) { |
1e616cf5 | 49 | $locale_encoding = 'euc-tw'; |
11fc5dc3 JH |
50 | } |
51 | croak "Locale encoding 'euc' too ambiguous" | |
52 | if $locale_encoding eq 'euc'; | |
53 | } | |
58d53262 JH |
54 | } |
55 | } | |
56 | ||
16fe6d59 | 57 | sub import { |
dfebf958 NIS |
58 | my ($class,@args) = @_; |
59 | croak("`use open' needs explicit list of disciplines") unless @args; | |
16fe6d59 | 60 | $^H |= $open::hint_bits; |
ba6ce41c | 61 | my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); |
dfebf958 NIS |
62 | while (@args) { |
63 | my $type = shift(@args); | |
1e616cf5 JH |
64 | my $dscp; |
65 | if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { | |
66 | $type = 'IO'; | |
67 | $dscp = ":$1"; | |
68 | } else { | |
725d232a | 69 | $dscp = shift(@args) || ''; |
1e616cf5 | 70 | } |
ac27b0f5 | 71 | my @val; |
1e616cf5 | 72 | foreach my $layer (split(/\s+/,$dscp)) { |
dfebf958 | 73 | $layer =~ s/^://; |
58d53262 JH |
74 | if ($layer eq 'locale') { |
75 | use Encode; | |
76 | _get_locale_encoding() | |
77 | unless defined $locale_encoding; | |
78 | croak "Cannot figure out an encoding to use" | |
79 | unless defined $locale_encoding; | |
11fc5dc3 JH |
80 | if ($locale_encoding =~ /^utf-?8$/i) { |
81 | $layer = "utf8"; | |
82 | } else { | |
738b23dc | 83 | $layer = "encoding($locale_encoding)"; |
11fc5dc3 | 84 | } |
97ed432b NIS |
85 | } else { |
86 | unless(PerlIO::Layer::->find($layer)) { | |
87 | carp("Unknown discipline layer '$layer'"); | |
88 | } | |
ac27b0f5 NIS |
89 | } |
90 | push(@val,":$layer"); | |
91 | if ($layer =~ /^(crlf|raw)$/) { | |
92 | $^H{"open_$type"} = $layer; | |
16fe6d59 | 93 | } |
ac27b0f5 | 94 | } |
738b23dc | 95 | # print "# type = $type, val = @val\n"; |
ac27b0f5 NIS |
96 | if ($type eq 'IN') { |
97 | $in = join(' ',@val); | |
98 | } | |
99 | elsif ($type eq 'OUT') { | |
100 | $out = join(' ',@val); | |
16fe6d59 | 101 | } |
1e616cf5 | 102 | elsif ($type eq 'IO') { |
f3b00462 JH |
103 | $in = $out = join(' ',@val); |
104 | } | |
16fe6d59 | 105 | else { |
ac27b0f5 | 106 | croak "Unknown discipline class '$type'"; |
16fe6d59 GS |
107 | } |
108 | } | |
1e616cf5 | 109 | ${^OPEN} = join("\0",$in,$out); |
16fe6d59 GS |
110 | } |
111 | ||
112 | 1; | |
113 | __END__ | |
d1edabcf GS |
114 | |
115 | =head1 NAME | |
116 | ||
117 | open - perl pragma to set default disciplines for input and output | |
118 | ||
119 | =head1 SYNOPSIS | |
120 | ||
1e616cf5 JH |
121 | use open IN => ":crlf", OUT => ":raw"; |
122 | use open OUT => ':utf8'; | |
123 | use open IO => ":encoding(iso-8859-7)"; | |
124 | ||
125 | use open IO => ':locale'; | |
725d232a | 126 | |
1e616cf5 JH |
127 | use open ':utf8'; |
128 | use open ':locale'; | |
129 | use open ':encoding(iso-8859-7)'; | |
d1edabcf GS |
130 | |
131 | =head1 DESCRIPTION | |
132 | ||
d151aa0e JH |
133 | Full-fledged support for I/O disciplines is now implemented provided |
134 | Perl is configured to use PerlIO as its IO system (which is now the | |
135 | default). | |
16fe6d59 | 136 | |
7d3b96bb NIS |
137 | The C<open> pragma serves as one of the interfaces to declare default |
138 | "layers" (aka disciplines) for all I/O. | |
139 | ||
140 | The C<open> pragma is used to declare one or more default layers for | |
d151aa0e JH |
141 | I/O operations. Any open(), readpipe() (aka qx//) and similar |
142 | operators found within the lexical scope of this pragma will use the | |
143 | declared defaults. | |
7d3b96bb | 144 | |
1e616cf5 | 145 | With the C<IN> subpragma you can declare the default layers |
d8d29d4f | 146 | of input streams, and with the C<OUT> subpragma you can declare |
1e616cf5 JH |
147 | the default layers of output streams. With the C<IO> subpragma |
148 | you can control both input and output streams simultaneously. | |
149 | ||
150 | If you have a legacy encoding, you can use the C<:encoding(...)> tag. | |
151 | ||
152 | if you want to set your encoding disciplines based on your | |
153 | locale environment variables, you can use the C<:locale> tag. | |
154 | For example: | |
155 | ||
156 | $ENV{LANG} = 'ru_RU.KOI8-R'; | |
dbd62f41 JH |
157 | # the :locale will probe the locale environment variables like LANG |
158 | use open OUT => ':locale'; | |
1e616cf5 | 159 | open(O, ">koi8"); |
23bcb45a | 160 | print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 |
1e616cf5 JH |
161 | close O; |
162 | open(I, "<koi8"); | |
23bcb45a | 163 | printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 |
1e616cf5 JH |
164 | close I; |
165 | ||
166 | These are equivalent | |
167 | ||
168 | use open ':utf8'; | |
169 | use open IO => ':utf8'; | |
170 | ||
171 | as are these | |
172 | ||
173 | use open ':locale'; | |
174 | use open IO => ':locale'; | |
175 | ||
176 | and these | |
177 | ||
178 | use open ':encoding(iso-8859-7)'; | |
179 | use open IO => ':encoding(iso-8859-7)'; | |
180 | ||
d151aa0e JH |
181 | When open() is given an explicit list of layers they are appended to |
182 | the list declared using this pragma. | |
7d3b96bb NIS |
183 | |
184 | Directory handles may also support disciplines in future. | |
185 | ||
186 | =head1 NONPERLIO FUNCTIONALITY | |
187 | ||
d151aa0e JH |
188 | If Perl is not built to use PerlIO as its IO system then only the two |
189 | pseudo-disciplines ":raw" and ":crlf" are available. | |
16fe6d59 GS |
190 | |
191 | The ":raw" discipline corresponds to "binary mode" and the ":crlf" | |
192 | discipline corresponds to "text mode" on platforms that distinguish | |
193 | between the two modes when opening files (which is many DOS-like | |
d151aa0e JH |
194 | platforms, including Windows). These two disciplines are no-ops on |
195 | platforms where binmode() is a no-op, but perform their functions | |
196 | everywhere if PerlIO is enabled. | |
7d3b96bb NIS |
197 | |
198 | =head1 IMPLEMENTATION DETAILS | |
d1edabcf | 199 | |
f3b00462 JH |
200 | There is a class method in C<PerlIO::Layer> C<find> which is |
201 | implemented as XS code. It is called by C<import> to validate the | |
202 | layers: | |
0c4f7ff0 NIS |
203 | |
204 | PerlIO::Layer::->find("perlio") | |
205 | ||
f3b00462 JH |
206 | The return value (if defined) is a Perl object, of class |
207 | C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As | |
208 | yet there is nothing useful you can do with the object at the perl | |
209 | level. | |
16fe6d59 | 210 | |
d1edabcf GS |
211 | =head1 SEE ALSO |
212 | ||
1768d7eb JH |
213 | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, |
214 | L<encoding> | |
d1edabcf GS |
215 | |
216 | =cut |