This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [perl #24926] chop/~ mangles UTF8 [PATCH]
[perl5.git] / ext / Encode / t / CJKT.t
CommitLineData
ef175861
JH
1BEGIN {
2 if ($ENV{'PERL_CORE'}){
3 chdir 't';
4 unshift @INC, '../lib';
5 }
6 require Config; import Config;
7 if ($Config{'extensions'} !~ /\bEncode\b/) {
8 print "1..0 # Skip: Encode was not built\n";
9 exit 0;
10 }
11 if (ord("A") == 193) {
12 print "1..0 # Skip: EBCDIC\n";
13 exit 0;
14 }
17ba578b
JH
15# should work w/o PerlIO now!
16# unless (PerlIO::Layer->find('perlio')){
17# print "1..0 # Skip: PerlIO required\n";
18# exit 0;
19# }
ef175861
JH
20 $| = 1;
21}
22use strict;
8676e7d3 23use Test::More tests => 60;
ef175861
JH
24use Encode;
25use File::Basename;
26use File::Spec;
27use File::Compare qw(compare_text);
b7a5c9de 28our $DEBUG = shift || 0;
ef175861
JH
29
30my %Charset =
31 (
8676e7d3 32 'big5-eten' => [qw(big5-eten)],
ef175861 33 'big5-hkscs' => [qw(big5-hkscs)],
8676e7d3
JH
34 gb2312 => [qw(euc-cn hz)],
35 jisx0201 => [qw(euc-jp shiftjis 7bit-jis)],
36 jisx0208 => [qw(euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1)],
37 jisx0212 => [qw(euc-jp 7bit-jis iso-2022-jp-1)],
38 ksc5601 => [qw(euc-kr iso-2022-kr johab)],
ef175861
JH
39 );
40
8676e7d3 41
ef175861 42my $dir = dirname(__FILE__);
b7a5c9de 43my $seq = 1;
ef175861 44
cb3fb9b7 45for my $charset (sort keys %Charset){
ef175861
JH
46 my ($src, $uni, $dst, $txt);
47
48 my $transcoder = find_encoding($Charset{$charset}[0]) or die;
49
50 my $src_enc = File::Spec->catfile($dir,"$charset.enc");
51 my $src_utf = File::Spec->catfile($dir,"$charset.utf");
52 my $dst_enc = File::Spec->catfile($dir,"$$.enc");
53 my $dst_utf = File::Spec->catfile($dir,"$$.utf");
54
ef175861 55 open $src, "<$src_enc" or die "$src_enc : $!";
9735c3fc
JH
56
57 if (PerlIO::Layer->find('perlio')){
58 binmode($src, ":bytes"); # needed when :utf8 in default open layer
59 }
7e19fb92 60
ef175861
JH
61 $txt = join('',<$src>);
62 close($src);
63
64 eval{ $uni = $transcoder->decode($txt, 1) };
65 $@ and print $@;
b7a5c9de
JH
66 ok(defined($uni), "decode $charset"); $seq++;
67 is(length($txt),0, "decode $charset completely"); $seq++;
ef175861
JH
68
69 open $dst, ">$dst_utf" or die "$dst_utf : $!";
70 if (PerlIO::Layer->find('perlio')){
71 binmode($dst, ":utf8");
72 print $dst $uni;
cb3fb9b7 73 }else{ # ugh!
ef175861
JH
74 binmode($dst);
75 my $raw = $uni; Encode::_utf8_off($raw);
76 print $dst $raw;
77 }
78
79 close($dst);
b7a5c9de
JH
80 is(compare_text($dst_utf, $src_utf), 0, "$dst_utf eq $src_utf")
81 or ($DEBUG and rename $dst_utf, "$dst_utf.$seq");
82 $seq++;
ef175861
JH
83
84 open $src, "<$src_utf" or die "$src_utf : $!";
85 if (PerlIO::Layer->find('perlio')){
86 binmode($src, ":utf8");
87 $uni = join('', <$src>);
cb3fb9b7 88 }else{ # ugh!
ef175861
JH
89 binmode($src);
90 $uni = join('', <$src>);
91 Encode::_utf8_on($uni);
92 }
93 close $src;
94
ab3374e4 95 my $unisave = $uni;
ef175861
JH
96 eval{ $txt = $transcoder->encode($uni,1) };
97 $@ and print $@;
b7a5c9de
JH
98 ok(defined($txt), "encode $charset"); $seq++;
99 is(length($uni), 0, "encode $charset completely"); $seq++;
ab3374e4 100 $uni = $unisave;
ef175861
JH
101
102 open $dst,">$dst_enc" or die "$dst_utf : $!";
103 binmode($dst);
104 print $dst $txt;
105 close($dst);
b7a5c9de
JH
106 is(compare_text($src_enc, $dst_enc), 0 => "$dst_enc eq $src_enc")
107 or ($DEBUG and rename $dst_enc, "$dst_enc.$seq");
108 $seq++;
109
b7a5c9de 110 unlink($dst_utf, $dst_enc);
8676e7d3
JH
111
112 for my $encoding (@{$Charset{$charset}}){
113 my $rt = decode($encoding, encode($encoding, $uni));
114 is ($rt, $uni, "RT $encoding");
115 }
ef175861 116}