This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PerlIO-encoding/t/encoding.t: Generalize for non-ASCII platform
[perl5.git] / ext / PerlIO-encoding / t / encoding.t
CommitLineData
bb7af5ca 1#!./perl -w
9ba8831b
JH
2
3BEGIN {
9ba8831b
JH
4 unless (find PerlIO::Layer 'perlio') {
5 print "1..0 # Skip: not perlio\n";
6 exit 0;
7 }
54cfe943
RB
8 unless (eval { require Encode } ) {
9 print "1..0 # Skip: not Encode\n";
10 exit 0;
11 }
aa0bd1ca 12 require "../../t/charset_tools.pl";
9ba8831b
JH
13}
14
8f79eb5b 15use Test::More tests => 24;
08efa405 16
8e86646e
JH
17my $grk = "grk$$";
18my $utf = "utf$$";
92e410c2
MB
19my $fail1 = "fa$$";
20my $fail2 = "fb$$";
ed53a2bb 21my $russki = "koi8r$$";
19d607df 22my $threebyte = "3byte$$";
8e86646e 23
2b18b92a
JH
24if (open(GRK, ">$grk")) {
25 binmode(GRK, ":bytes");
8e86646e
JH
26 # alpha beta gamma in ISO 8859-7
27 print GRK "\xe1\xe2\xe3";
d1e4d418 28 close GRK or die "Could not close: $!";
8e86646e
JH
29}
30
31{
30029c0d
NC
32 is(open(my $i,'<:encoding(iso-8859-7)',$grk), 1);
33 is(open(my $o,'>:utf8',$utf), 1);
34 is((print $o readline $i), 1);
d1e4d418 35 close($o) or die "Could not close: $!";
8e86646e
JH
36 close($i);
37}
38
2b18b92a
JH
39if (open(UTF, "<$utf")) {
40 binmode(UTF, ":bytes");
aa0bd1ca
KW
41
42 # alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
43 is(scalar <UTF>, byte_utf8a_to_utf8n("\xce\xb1\xce\xb2\xce\xb3"));
206b12d5 44 close UTF;
8e86646e
JH
45}
46
47{
48 use Encode;
30029c0d
NC
49 is (open(my $i,'<:utf8',$utf), 1);
50 is (open(my $o,'>:encoding(iso-8859-7)',$grk), 1);
51 is ((scalar print $o readline $i), 1);
d1e4d418 52 close($o) or die "Could not close: $!";
8e86646e
JH
53 close($i);
54}
55
2b18b92a
JH
56if (open(GRK, "<$grk")) {
57 binmode(GRK, ":bytes");
30029c0d 58 is(scalar <GRK>, "\xe1\xe2\xe3");
206b12d5 59 close GRK;
8e86646e
JH
60}
61
bb7af5ca 62$SIG{__WARN__} = sub {$warn .= $_[0]};
b26b1ab5 63
30029c0d
NC
64is (open(FAIL, ">:encoding(NoneSuch)", $fail1), undef, 'Open should fail');
65like($warn, qr/^Cannot find encoding "NoneSuch" at/);
66
67is(open(RUSSKI, ">$russki"), 1);
68print RUSSKI "\x3c\x3f\x78";
69close RUSSKI or die "Could not close: $!";
70open(RUSSKI, "$russki");
71binmode(RUSSKI, ":raw");
72my $buf1;
73read(RUSSKI, $buf1, 1);
74# eof(RUSSKI);
75binmode(RUSSKI, ":encoding(koi8-r)");
76my $buf2;
77read(RUSSKI, $buf2, 1);
78my $offset = tell(RUSSKI);
79is(ord $buf1, 0x3c);
80is(ord $buf2, (ord('A') == 193) ? 0x6f : 0x3f);
81is($offset, 2);
82close RUSSKI;
ed53a2bb 83
bb7af5ca
NC
84undef $warn;
85
86# Check there is no Use of uninitialized value in concatenation (.) warning
87# due to the way @latin2iso_num was used to make aliases.
30029c0d
NC
88is(open(FAIL, ">:encoding(latin42)", $fail2), undef, 'Open should fail');
89
90like($warn, qr/^Cannot find encoding "latin42" at.*line \d+\.$/);
bb7af5ca 91
19d607df
NIS
92# Create a string of chars that are 3 bytes in UTF-8
93my $str = "\x{1f80}" x 2048;
94
95# Write them to a file
96open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
97print F $str;
98close(F);
99
100# Read file back as UTF-8
101open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
102my $dstr = <F>;
103close(F);
30029c0d 104is($dstr, $str);
19d607df 105
fbadb8fa
GA
106# Try decoding some bad stuff
107open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
501f55b9
JH
108if (ord('A') == 193) { # EBCDIC
109 print F "foo\x8c\x80\x80\x80bar\n\x80foo\n";
110} else {
111 print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
112}
fbadb8fa
GA
113close(F);
114
115open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
116$dstr = join(":", <F>);
117close(F);
501f55b9 118if (ord('A') == 193) { # EBCDIC
30029c0d 119 is($dstr, "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n");
501f55b9 120} else {
30029c0d 121 is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n");
501f55b9 122}
fbadb8fa 123
667763bd
FC
124# Check that PerlIO::encoding can handle custom encodings that do funny
125# things with the buffer.
126use Encode::Encoding;
127package Extensive {
128 @ISA = Encode::Encoding;
129 __PACKAGE__->Define('extensive');
130 sub encode($$;$) {
131 my ($self,$buf,$chk) = @_;
132 my $leftovers = '';
133 if ($buf =~ /(.*\n)(?!\z)/) {
134 $buf = $1;
135 $leftovers = $';
136 }
137 if ($chk) {
e9a8753a
FC
138 undef $_[1];
139 my @x = (' ') x 8000; # reuse the just-freed buffer
140 $_[1] = $leftovers; # SvPVX now points elsewhere and is shorter
141 } # than bufsiz
667763bd
FC
142 $buf;
143 }
144 no warnings 'once';
145 *decode = *encode;
146}
147open my $fh, ">:encoding(extensive)", \$buf;
148$fh->autoflush;
149print $fh "doughnut\n";
150print $fh "quaffee\n";
e9a8753a
FC
151# Print something longer than the buffer that encode() shrunk:
152print $fh "The beech leaves beech leaves on the beach by the beech.\n";
667763bd 153close $fh;
e9a8753a
FC
154is $buf, "doughnut\nquaffee\nThe beech leaves beech leaves on the beach by"
155 ." the beech.\n", 'buffer realloc during encoding';
667763bd
FC
156$buf = "Sheila surely shod Sean\nin shoddy shoes.\n";
157open $fh, "<:encoding(extensive)", \$buf;
158is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n",
159 'buffer realloc during decoding';
160
8f79eb5b
FC
161package Cower {
162 @ISA = Encode::Encoding;
163 __PACKAGE__->Define('cower');
164 sub encode($$;$) {
165 my ($self,$buf,$chk) = @_;
166 my $leftovers = '';
167 if ($buf =~ /(.*\n)(?!\z)/) {
168 $buf = $1;
169 $leftovers = $';
170 }
171 if ($chk) {
172 no warnings; # stupid @_[1] warning
173 @_[1] = keys %{{$leftovers=>1}}; # shared hash key (copy-on-write)
174 }
175 $buf;
176 }
177 no warnings 'once';
178 *decode = *encode;
179}
180open $fh, ">:encoding(cower)", \$buf;
181$fh->autoflush;
182print $fh $_ for qw "pumping plum pits";
183close $fh;
184is $buf, "pumpingplumpits", 'cowing buffer during encoding';
185$buf = "pumping\nplum\npits\n";
186open $fh, "<:encoding(cower)", \$buf;
187is join("", <$fh>), "pumping\nplum\npits\n",
188 'cowing buffer during decoding';
189
667763bd
FC
190package Globber {
191 no warnings 'once';
192 @ISA = Encode::Encoding;
193 __PACKAGE__->Define('globber');
194 sub encode($$;$) {
195 my ($self,$buf,$chk) = @_;
196 $_[1] = *foo if $chk;
197 $buf;
198 }
199 *decode = *encode;
200}
201
202# Here we just want to test there is no crash. The actual output is not so
203# important.
204# We need a double eval, as scope unwinding will close the handle,
205# which croaks.
668a8625
FC
206# Under debugging builds with PERL_DESTRUCT_LEVEL set, we have to skip this
207# test, as it triggers bug #115692, resulting in string table warnings.
208require Config;
209SKIP: {
210skip "produces string table warnings", 2
211 if "@{[Config::non_bincompat_options()]}" =~ /\bDEBUGGING\b/
212 && $ENV{PERL_DESTRUCT_LEVEL};
213
667763bd
FC
214eval { eval {
215 open my $fh, ">:encoding(globber)", \$buf;
216 print $fh "Agathopous Goodfoot\n";
217 close $fh;
218}; $e = $@};
219like $@||$e, qr/Close with partial character/,
220 'no crash when assigning glob to buffer in encode';
221$buf = "To hymn him who heard her herd herd\n";
222open $fh, "<:encoding(globber)", \$buf;
223my $x = <$fh>;
224close $fh;
225is $x, "To hymn him who heard her herd herd\n",
226 'no crash when assigning glob to buffer in decode';
227
668a8625
FC
228} # SKIP
229
8e86646e 230END {
98a392ec 231 1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
8e86646e 232}