Commit | Line | Data |
---|---|---|
af1f55d9 JH |
1 | BEGIN { |
2 | if ($ENV{'PERL_CORE'}){ | |
3 | chdir 't'; | |
4 | unshift @INC, '../lib'; | |
5 | } | |
6 | require Config; import Config; | |
7 | if ($Config{'extensions'} !~ /\bEncode\b/) { | |
8 | print "1..0 # Skip: Encode was not built\n"; | |
9 | exit 0; | |
10 | } | |
982a4085 | 11 | if (ord("A") == 193) { |
d1256cb1 RGS |
12 | print "1..0 # Skip: EBCDIC\n"; |
13 | exit 0; | |
982a4085 | 14 | } |
af1f55d9 JH |
15 | $| = 1; |
16 | } | |
17 | ||
18 | use strict; | |
19 | use File::Basename; | |
20 | use File::Spec; | |
21 | use Encode qw(decode encode find_encoding _utf8_off); | |
22 | ||
23 | #use Test::More qw(no_plan); | |
64bc6d54 | 24 | use Test::More tests => 30; |
af1f55d9 JH |
25 | use_ok("Encode::Guess"); |
26 | { | |
27 | no warnings; | |
28 | $Encode::Guess::DEBUG = shift || 0; | |
29 | } | |
30 | ||
31 | my $ascii = join('' => map {chr($_)}(0x21..0x7e)); | |
32 | my $latin1 = join('' => map {chr($_)}(0xa1..0xfe)); | |
33 | my $utf8on = join('' => map {chr($_)}(0x3000..0x30fe)); | |
34 | my $utf8off = $utf8on; _utf8_off($utf8off); | |
7e19fb92 JH |
35 | my $utf16 = encode('UTF-16', $utf8on); |
36 | my $utf32 = encode('UTF-32', $utf8on); | |
af1f55d9 | 37 | |
64bc6d54 | 38 | like(guess_encoding(''), qr/empty string/io, 'empty string'); |
7e19fb92 JH |
39 | is(guess_encoding($ascii)->name, 'ascii', 'ascii'); |
40 | like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii'); | |
41 | is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1'); | |
42 | is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag'); | |
43 | is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag'); | |
44 | is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16'); | |
45 | is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32'); | |
af1f55d9 JH |
46 | |
47 | my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf'); | |
48 | my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'); | |
49 | my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf'); | |
50 | ||
51 | open my $fh, $jisx0208 or die "$jisx0208: $!"; | |
cb3fb9b7 | 52 | binmode($fh); |
af1f55d9 JH |
53 | $utf8off = join('' => <$fh>); |
54 | close $fh; | |
55 | $utf8on = decode('utf8', $utf8off); | |
7e19fb92 | 56 | |
af1f55d9 JH |
57 | my @jp = qw(7bit-jis shiftjis euc-jp); |
58 | ||
7e19fb92 | 59 | Encode::Guess->set_suspects(@jp); |
af1f55d9 JH |
60 | |
61 | for my $jp (@jp){ | |
62 | my $test = encode($jp, $utf8on); | |
7e19fb92 | 63 | is(guess_encoding($test)->name, $jp, "JP:$jp"); |
af1f55d9 | 64 | } |
7e19fb92 | 65 | |
af1f55d9 JH |
66 | is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')"); |
67 | eval{ encode('Guess', $utf8on) }; | |
10c5ecbb | 68 | like($@, qr/not defined/io, "no encode()"); |
7e19fb92 JH |
69 | |
70 | my %CJKT = | |
71 | ( | |
72 | 'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'), | |
73 | 'euc-jp' => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'), | |
74 | 'euc-kr' => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'), | |
75 | 'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'), | |
76 | ); | |
77 | ||
78 | Encode::Guess->set_suspects(keys %CJKT); | |
79 | ||
80 | for my $name (keys %CJKT){ | |
81 | open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!"; | |
cb3fb9b7 | 82 | binmode($fh); |
7e19fb92 JH |
83 | $utf8off = join('' => <$fh>); |
84 | close $fh; | |
85 | ||
86 | my $test = encode($name, decode('utf8', $utf8off)); | |
87 | is(guess_encoding($test)->name, $name, "CJKT:$name"); | |
88 | } | |
89 | ||
8676e7d3 JH |
90 | my $ambiguous = "\x{5c0f}\x{98fc}\x{5f3e}"; |
91 | my $english = "The quick brown fox jumps over the black lazy dog."; | |
92 | for my $utf (qw/UTF-16 UTF-32/){ | |
93 | for my $bl (qw/BE LE/){ | |
d1256cb1 RGS |
94 | my $test = encode("$utf$bl" => $english); |
95 | is(guess_encoding($test)->name, "$utf$bl", "$utf$bl"); | |
8676e7d3 JH |
96 | } |
97 | } | |
98 | for my $bl (qw/BE LE/){ | |
99 | my $test = encode("UTF-16$bl" => $ambiguous); | |
100 | my $result = guess_encoding($test); | |
101 | ok(! ref($result), "UTF-16$bl:$result"); | |
102 | } | |
b5ab1f6f DK |
103 | |
104 | ||
105 | ||
106 | Encode::Guess->set_suspects(); | |
107 | for my $jp (@jp){ | |
108 | # intentionally set $1 a priori -- see Changes | |
109 | my $test = "English"; | |
110 | '$1' =~ m/^(.*)/o; | |
111 | is(guess_encoding($test, ($jp))->name, 'ascii', | |
112 | "ascii vs $jp (\$1 messed)"); | |
113 | $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}"); | |
114 | is(guess_encoding($test, ($jp))->name, | |
115 | $jp, "$jp vs ascii (\$1 messed)"); | |
116 | } | |
117 | ||
af1f55d9 | 118 | __END__; |