This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix lib/Time/Local.pm for VOS
[perl5.git] / lib / encoding.t
CommitLineData
0effba8c 1BEGIN {
aa5485d1
MJD
2 require Config; import Config;
3 if ($Config{'extensions'} !~ /\bEncode\b/) {
4 print "1..0 # Skip: Encode was not built\n";
5 exit 0;
6 }
0effba8c
JH
7 if (ord("A") == 193) {
8 print "1..0 # encoding pragma does not support EBCDIC platforms\n";
0f963d18 9 exit(0);
0effba8c
JH
10 }
11}
12
553e1bcc 13print "1..29\n";
799ef3cb 14
0a378802 15use encoding "latin1"; # ignored (overwritten by the next line)
f14ed3c6 16use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
0a378802 17
0a378802 18# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
f14ed3c6 19# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
0a378802
JH
20# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
21
9f4817db
JH
22$a = "\xDF";
23$b = "\x{100}";
24
25print "not " unless ord($a) == 0x3af;
0a378802
JH
26print "ok 1\n";
27
9f4817db 28print "not " unless ord($b) == 0x100;
0a378802
JH
29print "ok 2\n";
30
9f4817db
JH
31my $c;
32
33$c = $a . $b;
34
35print "not " unless ord($c) == 0x3af;
0a378802
JH
36print "ok 3\n";
37
9f4817db
JH
38print "not " unless length($c) == 2;
39print "ok 4\n";
40
41print "not " unless ord(substr($c, 1, 1)) == 0x100;
42print "ok 5\n";
0a378802 43
121910a4
JH
44print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
45print "ok 6\n";
46
47print "not " unless ord(pack("C", 0xdf)) == 0x3af;
48print "ok 7\n";
49
50# we didn't break pack/unpack, I hope
51
52print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
53print "ok 8\n";
54
55# the first octet of UTF-8 encoded 0x3af
56print "not " unless unpack("C", chr(0xdf)) == 0xce;
57print "ok 9\n";
bfa383d6 58
3de8ed06
JH
59print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
60print "ok 10\n";
61
62print "not " unless unpack("U", chr(0xdf)) == 0x3af;
63print "ok 11\n";
64
bfa383d6
JH
65# charnames must still work
66use charnames ':full';
67print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
3de8ed06
JH
68print "ok 12\n";
69
70# combine
71
72$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
73
74print "not " unless ord($c) == 0x3af;
75print "ok 13\n";
76
77print "not " unless ord(substr($c, 1, 1)) == 0xdf;
78print "ok 14\n";
79
80print "not " unless ord(substr($c, 2, 1)) == 0x3af;
81print "ok 15\n";
bfa383d6 82
a72c7584
JH
83# regex literals
84
85print "not " unless "\xDF" =~ /\x{3AF}/;
86print "ok 16\n";
87
88print "not " unless "\x{3AF}" =~ /\xDF/;
89print "ok 17\n";
90
91print "not " unless "\xDF" =~ /\xDF/;
92print "ok 18\n";
93
94print "not " unless "\x{3AF}" =~ /\x{3AF}/;
95print "ok 19\n";
96
799ef3cb
JH
97# eq, cmp
98
553e1bcc
AT
99my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = (
100 pack("C*", 0xDF ), # byte
101 pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0
102 pack("U*", 0x3AF), # $U eq $byte
103 pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding
104 pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1)
105 pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
106 pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb)
107);
108
109# all the tests in this section that compare a byte encoded string
110# ato UTF-8 encoded are run in all possible vairants
111# all of the eq, ne, cmp operations tested,
112# $v z $u tested as well as $u z $v
113
114sub alleq($$){
115 my ($a,$b) = (shift, shift);
116 $a eq $b && $b eq $a &&
117 !( $a ne $b ) && !( $b ne $a ) &&
118 ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0;
119}
120
121sub anyeq($$){
122 my ($a,$b) = (shift, shift);
123 $a eq $b || $b eq $a ||
124 !( $a ne $b ) || !( $b ne $a ) ||
125 ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0;
126}
127
128sub allgt($$){
129 my ($a,$b) = (shift, shift);
130 ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
131}
132#match the correct UTF-8 string
133print "not " unless alleq($byte, $U);
134print "ok 20\n";
135
136#do not match a wrong UTF-8 string
137print "not " if anyeq($byte, $Ub);
138print "ok 21\n";
139
140#string ordering
141print "not " unless allgt ( $g1, $byte ) &&
142 allgt ( $g2, $byte ) &&
143 allgt ( $byte, $l ) &&
144 allgt ( $bytes, $U );
145print "ok 22\n";
146
147# upgrade, downgrade
148
149my ($u,$v,$v2);
150$u = $v = $v2 = pack("C*", 0xDF);
151utf8::upgrade($v); #explicit upgrade
152$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
153
154# implicit upgrade === explicit upgrade
155print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2;
156print "ok 23\n";
157
158# utf8::upgrade is transparent and does not break equality
159print "not " unless alleq( $u, $v );
160print "ok 24\n";
161
162$u = $v = pack("C*", 0xDF);
163utf8::upgrade($v);
164#test for a roundtrip, we should get back from where we left
165eval {utf8::downgrade( $v )};
166print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
167print "ok 25\n";
168
169# some more eq, cmp
170
799ef3cb
JH
171my $byte=pack("C*", 0xDF);
172
173print "not " unless pack("U*", 0x3AF) eq $byte;
553e1bcc 174print "ok 26\n";
799ef3cb
JH
175
176print "not " if chr(0xDF) cmp $byte;
553e1bcc 177print "ok 27\n";
799ef3cb
JH
178
179print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) &&
180 ((pack("U*", 0x3AE) cmp $byte) == -1) &&
181 ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) &&
182 ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
553e1bcc 183print "ok 28\n";
799ef3cb
JH
184
185# Used to core dump in 5.7.3
553e1bcc 186print ord undef == 0 ? "ok 29\n" : "not ok 29\n";