Commit | Line | Data |
---|---|---|
0effba8c | 1 | BEGIN { |
aa5485d1 MJD |
2 | require Config; import Config; |
3 | if ($Config{'extensions'} !~ /\bEncode\b/) { | |
4 | print "1..0 # Skip: Encode was not built\n"; | |
5 | exit 0; | |
6 | } | |
0effba8c JH |
7 | if (ord("A") == 193) { |
8 | print "1..0 # encoding pragma does not support EBCDIC platforms\n"; | |
0f963d18 | 9 | exit(0); |
0effba8c JH |
10 | } |
11 | } | |
12 | ||
553e1bcc | 13 | print "1..29\n"; |
799ef3cb | 14 | |
0a378802 | 15 | use encoding "latin1"; # ignored (overwritten by the next line) |
f14ed3c6 | 16 | use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) |
0a378802 | 17 | |
0a378802 | 18 | # "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is |
f14ed3c6 | 19 | # \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), |
0a378802 JH |
20 | # instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) |
21 | ||
9f4817db JH |
22 | $a = "\xDF"; |
23 | $b = "\x{100}"; | |
24 | ||
25 | print "not " unless ord($a) == 0x3af; | |
0a378802 JH |
26 | print "ok 1\n"; |
27 | ||
9f4817db | 28 | print "not " unless ord($b) == 0x100; |
0a378802 JH |
29 | print "ok 2\n"; |
30 | ||
9f4817db JH |
31 | my $c; |
32 | ||
33 | $c = $a . $b; | |
34 | ||
35 | print "not " unless ord($c) == 0x3af; | |
0a378802 JH |
36 | print "ok 3\n"; |
37 | ||
9f4817db JH |
38 | print "not " unless length($c) == 2; |
39 | print "ok 4\n"; | |
40 | ||
41 | print "not " unless ord(substr($c, 1, 1)) == 0x100; | |
42 | print "ok 5\n"; | |
0a378802 | 43 | |
121910a4 JH |
44 | print "not " unless ord(chr(0xdf)) == 0x3af; # spooky |
45 | print "ok 6\n"; | |
46 | ||
47 | print "not " unless ord(pack("C", 0xdf)) == 0x3af; | |
48 | print "ok 7\n"; | |
49 | ||
50 | # we didn't break pack/unpack, I hope | |
51 | ||
52 | print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; | |
53 | print "ok 8\n"; | |
54 | ||
55 | # the first octet of UTF-8 encoded 0x3af | |
56 | print "not " unless unpack("C", chr(0xdf)) == 0xce; | |
57 | print "ok 9\n"; | |
bfa383d6 | 58 | |
3de8ed06 JH |
59 | print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; |
60 | print "ok 10\n"; | |
61 | ||
62 | print "not " unless unpack("U", chr(0xdf)) == 0x3af; | |
63 | print "ok 11\n"; | |
64 | ||
bfa383d6 JH |
65 | # charnames must still work |
66 | use charnames ':full'; | |
67 | print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; | |
3de8ed06 JH |
68 | print "ok 12\n"; |
69 | ||
70 | # combine | |
71 | ||
72 | $c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); | |
73 | ||
74 | print "not " unless ord($c) == 0x3af; | |
75 | print "ok 13\n"; | |
76 | ||
77 | print "not " unless ord(substr($c, 1, 1)) == 0xdf; | |
78 | print "ok 14\n"; | |
79 | ||
80 | print "not " unless ord(substr($c, 2, 1)) == 0x3af; | |
81 | print "ok 15\n"; | |
bfa383d6 | 82 | |
a72c7584 JH |
83 | # regex literals |
84 | ||
85 | print "not " unless "\xDF" =~ /\x{3AF}/; | |
86 | print "ok 16\n"; | |
87 | ||
88 | print "not " unless "\x{3AF}" =~ /\xDF/; | |
89 | print "ok 17\n"; | |
90 | ||
91 | print "not " unless "\xDF" =~ /\xDF/; | |
92 | print "ok 18\n"; | |
93 | ||
94 | print "not " unless "\x{3AF}" =~ /\x{3AF}/; | |
95 | print "ok 19\n"; | |
96 | ||
799ef3cb JH |
97 | # eq, cmp |
98 | ||
553e1bcc AT |
99 | my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( |
100 | pack("C*", 0xDF ), # byte | |
101 | pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 | |
102 | pack("U*", 0x3AF), # $U eq $byte | |
103 | pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding | |
104 | pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) | |
105 | pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; | |
106 | pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) | |
107 | ); | |
108 | ||
109 | # all the tests in this section that compare a byte encoded string | |
110 | # ato UTF-8 encoded are run in all possible vairants | |
111 | # all of the eq, ne, cmp operations tested, | |
112 | # $v z $u tested as well as $u z $v | |
113 | ||
114 | sub alleq($$){ | |
115 | my ($a,$b) = (shift, shift); | |
116 | $a eq $b && $b eq $a && | |
117 | !( $a ne $b ) && !( $b ne $a ) && | |
118 | ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; | |
119 | } | |
120 | ||
121 | sub anyeq($$){ | |
122 | my ($a,$b) = (shift, shift); | |
123 | $a eq $b || $b eq $a || | |
124 | !( $a ne $b ) || !( $b ne $a ) || | |
125 | ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; | |
126 | } | |
127 | ||
128 | sub allgt($$){ | |
129 | my ($a,$b) = (shift, shift); | |
130 | ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; | |
131 | } | |
132 | #match the correct UTF-8 string | |
133 | print "not " unless alleq($byte, $U); | |
134 | print "ok 20\n"; | |
135 | ||
136 | #do not match a wrong UTF-8 string | |
137 | print "not " if anyeq($byte, $Ub); | |
138 | print "ok 21\n"; | |
139 | ||
140 | #string ordering | |
141 | print "not " unless allgt ( $g1, $byte ) && | |
142 | allgt ( $g2, $byte ) && | |
143 | allgt ( $byte, $l ) && | |
144 | allgt ( $bytes, $U ); | |
145 | print "ok 22\n"; | |
146 | ||
147 | # upgrade, downgrade | |
148 | ||
149 | my ($u,$v,$v2); | |
150 | $u = $v = $v2 = pack("C*", 0xDF); | |
151 | utf8::upgrade($v); #explicit upgrade | |
152 | $v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade | |
153 | ||
154 | # implicit upgrade === explicit upgrade | |
155 | print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; | |
156 | print "ok 23\n"; | |
157 | ||
158 | # utf8::upgrade is transparent and does not break equality | |
159 | print "not " unless alleq( $u, $v ); | |
160 | print "ok 24\n"; | |
161 | ||
162 | $u = $v = pack("C*", 0xDF); | |
163 | utf8::upgrade($v); | |
164 | #test for a roundtrip, we should get back from where we left | |
165 | eval {utf8::downgrade( $v )}; | |
166 | print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; | |
167 | print "ok 25\n"; | |
168 | ||
169 | # some more eq, cmp | |
170 | ||
799ef3cb JH |
171 | my $byte=pack("C*", 0xDF); |
172 | ||
173 | print "not " unless pack("U*", 0x3AF) eq $byte; | |
553e1bcc | 174 | print "ok 26\n"; |
799ef3cb JH |
175 | |
176 | print "not " if chr(0xDF) cmp $byte; | |
553e1bcc | 177 | print "ok 27\n"; |
799ef3cb JH |
178 | |
179 | print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && | |
180 | ((pack("U*", 0x3AE) cmp $byte) == -1) && | |
181 | ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && | |
182 | ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); | |
553e1bcc | 183 | print "ok 28\n"; |
799ef3cb JH |
184 | |
185 | # Used to core dump in 5.7.3 | |
553e1bcc | 186 | print ord undef == 0 ? "ok 29\n" : "not ok 29\n"; |