This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
514ecac7b47180cf02d781b2ae0f4c1d9a582f20
[perl5.git] / cpan / Encode / t / guess.t
1 BEGIN {
2     if ( $ENV{'PERL_CORE'} && $] >= 5.011) {
3         chdir 't';
4         unshift @INC, '../lib';
5     }
6     require Config; import Config;
7     if ($Config{'extensions'} !~ /\bEncode\b/) {
8       print "1..0 # Skip: Encode was not built\n";
9       exit 0;
10     }
11     if (ord("A") == 193) {
12     print "1..0 # Skip: EBCDIC\n";
13     exit 0;
14     }
15     $| = 1;
16 }
17
18 use strict;
19 use File::Basename;
20 use File::Spec;
21 use Encode qw(decode encode find_encoding _utf8_off);
22
23 #use Test::More qw(no_plan);
24 use Test::More tests => 30;
25 use_ok("Encode::Guess");
26 {
27     no warnings;
28     $Encode::Guess::DEBUG = shift || 0;
29 }
30
31 my $ascii  = join('' => map {chr($_)}(0x21..0x7e));
32 my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
33 my $utf8on  = join('' => map {chr($_)}(0x3000..0x30fe));
34 my $utf8off = $utf8on; _utf8_off($utf8off);
35 my $utf16 = encode('UTF-16', $utf8on);
36 my $utf32 = encode('UTF-32', $utf8on);
37
38 like(guess_encoding(''), qr/empty string/io, 'empty string');
39 is(guess_encoding($ascii)->name, 'ascii', 'ascii');
40 like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
41 is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
42 is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag');
43 is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag');
44 is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16');
45 is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32');
46
47 my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
48 my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
49 my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');
50
51 open my $fh, $jisx0208 or die "$jisx0208: $!";
52 binmode($fh);
53 $utf8off = join('' => <$fh>);
54 close $fh;
55 $utf8on = decode('utf8', $utf8off);
56
57 my @jp = qw(7bit-jis shiftjis euc-jp);
58
59 Encode::Guess->set_suspects(@jp);
60
61 for my $jp (@jp){
62     my $test = encode($jp, $utf8on);
63     is(guess_encoding($test)->name, $jp, "JP:$jp");
64 }
65
66 is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
67 eval{ encode('Guess', $utf8on) };
68 like($@, qr/not defined/io, "no encode()");
69
70 my %CJKT = 
71     (
72      'euc-cn'    => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
73      'euc-jp'    => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'),
74      'euc-kr'    => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'),
75      'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'),
76 );
77
78 Encode::Guess->set_suspects(keys %CJKT);
79
80 for my $name (keys %CJKT){
81     open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!";
82     binmode($fh);
83     $utf8off = join('' => <$fh>);
84     close $fh;
85
86     my $test = encode($name, decode('utf8', $utf8off));
87     is(guess_encoding($test)->name, $name, "CJKT:$name");
88 }
89
90 my $ambiguous =  "\x{5c0f}\x{98fc}\x{5f3e}";
91 my $english   = "The quick brown fox jumps over the black lazy dog.";
92 for my $utf (qw/UTF-16 UTF-32/){
93     for my $bl (qw/BE LE/){
94     my $test = encode("$utf$bl" => $english);
95     is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
96     }
97 }
98 for my $bl (qw/BE LE/){
99     my $test = encode("UTF-16$bl" => $ambiguous);
100     my $result = guess_encoding($test);
101     ok(! ref($result), "UTF-16$bl:$result");
102 }
103
104
105
106 Encode::Guess->set_suspects();
107 for my $jp (@jp){
108     # intentionally set $1 a priori -- see Changes
109     my $test = "English";
110     '$1' =~ m/^(.*)/o;
111     is(guess_encoding($test, ($jp))->name, 'ascii', 
112        "ascii vs $jp (\$1 messed)");
113     $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}");
114     is(guess_encoding($test, ($jp))->name, 
115        $jp, "$jp vs ascii (\$1 messed)");
116 }
117
118 __END__;