This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Compress::Zlib, pack "C" and utf-8 [PATCH]
[perl5.git] / ext / Encode / t / encoding.t
CommitLineData
0effba8c 1BEGIN {
aa5485d1
MJD
2 require Config; import Config;
3 if ($Config{'extensions'} !~ /\bEncode\b/) {
4 print "1..0 # Skip: Encode was not built\n";
5 exit 0;
6 }
b9890021 7 unless (find PerlIO::Layer 'perlio') {
d1256cb1
RGS
8 print "1..0 # Skip: PerlIO was not built\n";
9 exit 0;
b9890021 10 }
0effba8c 11 if (ord("A") == 193) {
d1256cb1
RGS
12 print "1..0 # encoding pragma does not support EBCDIC platforms\n";
13 exit(0);
0effba8c
JH
14 }
15}
16
a999c27c 17print "1..31\n";
799ef3cb 18
0a378802 19use encoding "latin1"; # ignored (overwritten by the next line)
f14ed3c6 20use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
0a378802 21
0a378802 22# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
f14ed3c6 23# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
0a378802
JH
24# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
25
9f4817db
JH
26$a = "\xDF";
27$b = "\x{100}";
28
29print "not " unless ord($a) == 0x3af;
0a378802
JH
30print "ok 1\n";
31
9f4817db 32print "not " unless ord($b) == 0x100;
0a378802
JH
33print "ok 2\n";
34
9f4817db
JH
35my $c;
36
37$c = $a . $b;
38
39print "not " unless ord($c) == 0x3af;
0a378802
JH
40print "ok 3\n";
41
9f4817db
JH
42print "not " unless length($c) == 2;
43print "ok 4\n";
44
45print "not " unless ord(substr($c, 1, 1)) == 0x100;
46print "ok 5\n";
0a378802 47
121910a4
JH
48print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
49print "ok 6\n";
50
51print "not " unless ord(pack("C", 0xdf)) == 0x3af;
52print "ok 7\n";
53
54# we didn't break pack/unpack, I hope
55
56print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
57print "ok 8\n";
58
59# the first octet of UTF-8 encoded 0x3af
1651fc44 60print "not " unless unpack("U0 C", chr(0xdf)) == 0xce;
121910a4 61print "ok 9\n";
bfa383d6 62
3de8ed06
JH
63print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
64print "ok 10\n";
65
66print "not " unless unpack("U", chr(0xdf)) == 0x3af;
67print "ok 11\n";
68
bfa383d6
JH
69# charnames must still work
70use charnames ':full';
71print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
3de8ed06
JH
72print "ok 12\n";
73
74# combine
75
76$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
77
78print "not " unless ord($c) == 0x3af;
79print "ok 13\n";
80
81print "not " unless ord(substr($c, 1, 1)) == 0xdf;
82print "ok 14\n";
83
84print "not " unless ord(substr($c, 2, 1)) == 0x3af;
85print "ok 15\n";
bfa383d6 86
a72c7584
JH
87# regex literals
88
89print "not " unless "\xDF" =~ /\x{3AF}/;
90print "ok 16\n";
91
92print "not " unless "\x{3AF}" =~ /\xDF/;
93print "ok 17\n";
94
95print "not " unless "\xDF" =~ /\xDF/;
96print "ok 18\n";
97
98print "not " unless "\x{3AF}" =~ /\x{3AF}/;
99print "ok 19\n";
100
799ef3cb
JH
101# eq, cmp
102
553e1bcc
AT
103my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = (
104 pack("C*", 0xDF ), # byte
105 pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0
106 pack("U*", 0x3AF), # $U eq $byte
107 pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding
108 pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1)
109 pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
110 pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb)
111);
112
113# all the tests in this section that compare a byte encoded string
114# ato UTF-8 encoded are run in all possible vairants
115# all of the eq, ne, cmp operations tested,
116# $v z $u tested as well as $u z $v
117
118sub alleq($$){
119 my ($a,$b) = (shift, shift);
120 $a eq $b && $b eq $a &&
121 !( $a ne $b ) && !( $b ne $a ) &&
122 ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0;
123}
124
125sub anyeq($$){
126 my ($a,$b) = (shift, shift);
127 $a eq $b || $b eq $a ||
128 !( $a ne $b ) || !( $b ne $a ) ||
129 ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0;
130}
131
132sub allgt($$){
133 my ($a,$b) = (shift, shift);
134 ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
135}
136#match the correct UTF-8 string
137print "not " unless alleq($byte, $U);
138print "ok 20\n";
139
140#do not match a wrong UTF-8 string
141print "not " if anyeq($byte, $Ub);
142print "ok 21\n";
143
144#string ordering
145print "not " unless allgt ( $g1, $byte ) &&
146 allgt ( $g2, $byte ) &&
147 allgt ( $byte, $l ) &&
148 allgt ( $bytes, $U );
149print "ok 22\n";
150
151# upgrade, downgrade
152
153my ($u,$v,$v2);
154$u = $v = $v2 = pack("C*", 0xDF);
155utf8::upgrade($v); #explicit upgrade
156$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
157
158# implicit upgrade === explicit upgrade
159print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2;
160print "ok 23\n";
161
162# utf8::upgrade is transparent and does not break equality
163print "not " unless alleq( $u, $v );
164print "ok 24\n";
165
166$u = $v = pack("C*", 0xDF);
167utf8::upgrade($v);
168#test for a roundtrip, we should get back from where we left
169eval {utf8::downgrade( $v )};
170print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
171print "ok 25\n";
172
173# some more eq, cmp
174
3ef515df 175$byte=pack("C*", 0xDF);
799ef3cb
JH
176
177print "not " unless pack("U*", 0x3AF) eq $byte;
553e1bcc 178print "ok 26\n";
799ef3cb
JH
179
180print "not " if chr(0xDF) cmp $byte;
553e1bcc 181print "ok 27\n";
799ef3cb
JH
182
183print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) &&
184 ((pack("U*", 0x3AE) cmp $byte) == -1) &&
185 ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) &&
d1256cb1 186 ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
553e1bcc 187print "ok 28\n";
799ef3cb 188
3ef515df
JH
189
190{
191 # Used to core dump in 5.7.3
192 no warnings; # so test goes noiselessly
193 print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n";
194}
a999c27c
JH
195
196{
d1256cb1
RGS
197 my %h1;
198 my %h2;
199 $h1{"\xdf"} = 41;
200 $h2{"\x{3af}"} = 42;
201 print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
202 print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n";
a999c27c 203}