This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to Encode-2.27
[perl5.git] / ext / Encode / t / guess.t
CommitLineData
af1f55d9
JH
1BEGIN {
2 if ($ENV{'PERL_CORE'}){
3 chdir 't';
4 unshift @INC, '../lib';
5 }
6 require Config; import Config;
7 if ($Config{'extensions'} !~ /\bEncode\b/) {
8 print "1..0 # Skip: Encode was not built\n";
9 exit 0;
10 }
982a4085 11 if (ord("A") == 193) {
d1256cb1
RGS
12 print "1..0 # Skip: EBCDIC\n";
13 exit 0;
982a4085 14 }
af1f55d9
JH
15 $| = 1;
16}
17
18use strict;
19use File::Basename;
20use File::Spec;
21use Encode qw(decode encode find_encoding _utf8_off);
22
23#use Test::More qw(no_plan);
64bc6d54 24use Test::More tests => 30;
af1f55d9
JH
25use_ok("Encode::Guess");
26{
27 no warnings;
28 $Encode::Guess::DEBUG = shift || 0;
29}
30
31my $ascii = join('' => map {chr($_)}(0x21..0x7e));
32my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
33my $utf8on = join('' => map {chr($_)}(0x3000..0x30fe));
34my $utf8off = $utf8on; _utf8_off($utf8off);
7e19fb92
JH
35my $utf16 = encode('UTF-16', $utf8on);
36my $utf32 = encode('UTF-32', $utf8on);
af1f55d9 37
64bc6d54 38like(guess_encoding(''), qr/empty string/io, 'empty string');
7e19fb92
JH
39is(guess_encoding($ascii)->name, 'ascii', 'ascii');
40like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
41is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
42is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag');
43is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag');
44is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16');
45is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32');
af1f55d9
JH
46
47my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
48my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
49my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');
50
51open my $fh, $jisx0208 or die "$jisx0208: $!";
cb3fb9b7 52binmode($fh);
af1f55d9
JH
53$utf8off = join('' => <$fh>);
54close $fh;
55$utf8on = decode('utf8', $utf8off);
7e19fb92 56
af1f55d9
JH
57my @jp = qw(7bit-jis shiftjis euc-jp);
58
7e19fb92 59Encode::Guess->set_suspects(@jp);
af1f55d9
JH
60
61for my $jp (@jp){
62 my $test = encode($jp, $utf8on);
7e19fb92 63 is(guess_encoding($test)->name, $jp, "JP:$jp");
af1f55d9 64}
7e19fb92 65
af1f55d9
JH
66is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
67eval{ encode('Guess', $utf8on) };
10c5ecbb 68like($@, qr/not defined/io, "no encode()");
7e19fb92
JH
69
70my %CJKT =
71 (
72 'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
73 'euc-jp' => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'),
74 'euc-kr' => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'),
75 'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'),
76);
77
78Encode::Guess->set_suspects(keys %CJKT);
79
80for my $name (keys %CJKT){
81 open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!";
cb3fb9b7 82 binmode($fh);
7e19fb92
JH
83 $utf8off = join('' => <$fh>);
84 close $fh;
85
86 my $test = encode($name, decode('utf8', $utf8off));
87 is(guess_encoding($test)->name, $name, "CJKT:$name");
88}
89
8676e7d3
JH
90my $ambiguous = "\x{5c0f}\x{98fc}\x{5f3e}";
91my $english = "The quick brown fox jumps over the black lazy dog.";
92for my $utf (qw/UTF-16 UTF-32/){
93 for my $bl (qw/BE LE/){
d1256cb1
RGS
94 my $test = encode("$utf$bl" => $english);
95 is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
8676e7d3
JH
96 }
97}
98for my $bl (qw/BE LE/){
99 my $test = encode("UTF-16$bl" => $ambiguous);
100 my $result = guess_encoding($test);
101 ok(! ref($result), "UTF-16$bl:$result");
102}
b5ab1f6f
DK
103
104
105
106Encode::Guess->set_suspects();
107for my $jp (@jp){
108 # intentionally set $1 a priori -- see Changes
109 my $test = "English";
110 '$1' =~ m/^(.*)/o;
111 is(guess_encoding($test, ($jp))->name, 'ascii',
112 "ascii vs $jp (\$1 messed)");
113 $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}");
114 is(guess_encoding($test, ($jp))->name,
115 $jp, "$jp vs ascii (\$1 messed)");
116}
117
af1f55d9 118__END__;