Commit | Line | Data |
---|---|---|
92331800 NC |
1 | #!perl -w |
2 | ||
3 | BEGIN { | |
a817e89d | 4 | chdir 't' if -d 't'; |
569f7fc5 | 5 | require Config; import Config; |
629eeaee KW |
6 | require './test.pl'; |
7 | require './charset_tools.pl'; | |
8 | require './loc_tools.pl'; | |
624c42e2 | 9 | set_up_inc( '../lib' ); |
92331800 NC |
10 | } |
11 | ||
f35ddf90 | 12 | plan(tests => 217); |
92331800 | 13 | |
ec9af7d4 | 14 | package UTF8Toggle; |
92331800 NC |
15 | use strict; |
16 | ||
12abf4f0 | 17 | use overload '""' => 'stringify', fallback => 1; |
92331800 NC |
18 | |
19 | sub new { | |
20 | my $class = shift; | |
676f44e7 NC |
21 | my $value = shift; |
22 | my $state = shift||0; | |
23 | return bless [$value, $state], $class; | |
92331800 NC |
24 | } |
25 | ||
26 | sub stringify { | |
27 | my $self = shift; | |
28 | $self->[1] = ! $self->[1]; | |
29 | if ($self->[1]) { | |
30 | utf8::downgrade($self->[0]); | |
31 | } else { | |
32 | utf8::upgrade($self->[0]); | |
33 | } | |
34 | $self->[0]; | |
35 | } | |
36 | ||
37 | package main; | |
38 | ||
61fc5122 KW |
39 | # These tests are based on characters 128-255 not having latin1, and hence |
40 | # Unicode, semantics | |
1863b879 | 41 | # no feature "unicode_strings"; |
61fc5122 | 42 | |
92331800 | 43 | # Bug 34297 |
f6fca319 | 44 | foreach my $t ("ASCII", "B" . uni_to_native("\366") . "se") { |
92331800 NC |
45 | my $length = length $t; |
46 | ||
ec9af7d4 | 47 | my $u = UTF8Toggle->new($t); |
92331800 NC |
48 | is (length $u, $length, "length of '$t'"); |
49 | is (length $u, $length, "length of '$t'"); | |
50 | is (length $u, $length, "length of '$t'"); | |
51 | is (length $u, $length, "length of '$t'"); | |
52 | } | |
ec9af7d4 | 53 | |
f6fca319 KW |
54 | my $E_acute = uni_to_native("\311"); |
55 | my $e_acute = uni_to_native("\351"); | |
56 | my $u = UTF8Toggle->new($E_acute); | |
6e08b83d NC |
57 | my $lc = lc $u; |
58 | is (length $lc, 1); | |
f6fca319 | 59 | is ($lc, $E_acute, "E acute -> e acute"); |
6e08b83d NC |
60 | $lc = lc $u; |
61 | is (length $lc, 1); | |
f6fca319 | 62 | is ($lc, $e_acute, "E acute -> e acute"); |
6e08b83d NC |
63 | $lc = lc $u; |
64 | is (length $lc, 1); | |
f6fca319 | 65 | is ($lc, $E_acute, "E acute -> e acute"); |
6e08b83d | 66 | |
f6fca319 | 67 | $u = UTF8Toggle->new($e_acute); |
6e08b83d NC |
68 | my $uc = uc $u; |
69 | is (length $uc, 1); | |
f6fca319 | 70 | is ($uc, $e_acute, "e acute -> E acute"); |
6e08b83d NC |
71 | $uc = uc $u; |
72 | is (length $uc, 1); | |
f6fca319 | 73 | is ($uc, $E_acute, "e acute -> E acute"); |
6e08b83d NC |
74 | $uc = uc $u; |
75 | is (length $uc, 1); | |
f6fca319 | 76 | is ($uc, $e_acute, "e acute -> E acute"); |
6e08b83d | 77 | |
f6fca319 | 78 | $u = UTF8Toggle->new($E_acute); |
6e08b83d NC |
79 | $lc = lcfirst $u; |
80 | is (length $lc, 1); | |
f6fca319 | 81 | is ($lc, $E_acute, "E acute -> e acute"); |
6e08b83d NC |
82 | $lc = lcfirst $u; |
83 | is (length $lc, 1); | |
f6fca319 | 84 | is ($lc, $e_acute, "E acute -> e acute"); |
6e08b83d NC |
85 | $lc = lcfirst $u; |
86 | is (length $lc, 1); | |
f6fca319 | 87 | is ($lc, $E_acute, "E acute -> e acute"); |
6e08b83d | 88 | |
f6fca319 | 89 | $u = UTF8Toggle->new($e_acute); |
6e08b83d NC |
90 | $uc = ucfirst $u; |
91 | is (length $uc, 1); | |
f6fca319 | 92 | is ($uc, $e_acute, "e acute -> E acute"); |
6e08b83d NC |
93 | $uc = ucfirst $u; |
94 | is (length $uc, 1); | |
f6fca319 | 95 | is ($uc, $E_acute, "e acute -> E acute"); |
6e08b83d NC |
96 | $uc = ucfirst $u; |
97 | is (length $uc, 1); | |
f6fca319 | 98 | is ($uc, $e_acute, "e acute -> E acute"); |
6e08b83d | 99 | |
629eeaee | 100 | my $have_setlocale = locales_enabled('LC_ALL'); |
ec9af7d4 NC |
101 | |
102 | SKIP: { | |
103 | if (!$have_setlocale) { | |
6e08b83d | 104 | skip "No setlocale", 24; |
ec9af7d4 | 105 | } elsif (!setlocale(&POSIX::LC_ALL, "en_GB.ISO8859-1")) { |
6e08b83d | 106 | skip "Could not setlocale to en_GB.ISO8859-1", 24; |
23ae3dfb | 107 | } elsif ($^O eq 'dec_osf' || $^O eq 'VMS') { |
bce8aa37 | 108 | skip "$^O has broken en_GB.ISO8859-1 locale", 24; |
ec9af7d4 | 109 | } else { |
5f1269ab | 110 | use locale; |
f6fca319 | 111 | my $u = UTF8Toggle->new($E_acute); |
ec9af7d4 NC |
112 | my $lc = lc $u; |
113 | is (length $lc, 1); | |
f6fca319 | 114 | is ($lc, $e_acute, "E acute -> e acute"); |
ec9af7d4 NC |
115 | $lc = lc $u; |
116 | is (length $lc, 1); | |
f6fca319 | 117 | is ($lc, $e_acute, "E acute -> e acute"); |
6e08b83d NC |
118 | $lc = lc $u; |
119 | is (length $lc, 1); | |
f6fca319 | 120 | is ($lc, $e_acute, "E acute -> e acute"); |
67306194 | 121 | |
f6fca319 | 122 | $u = UTF8Toggle->new($e_acute); |
67306194 NC |
123 | my $uc = uc $u; |
124 | is (length $uc, 1); | |
f6fca319 | 125 | is ($uc, $E_acute, "e acute -> E acute"); |
67306194 NC |
126 | $uc = uc $u; |
127 | is (length $uc, 1); | |
f6fca319 | 128 | is ($uc, $E_acute, "e acute -> E acute"); |
6e08b83d NC |
129 | $uc = uc $u; |
130 | is (length $uc, 1); | |
f6fca319 | 131 | is ($uc, $E_acute, "e acute -> E acute"); |
d54190f6 | 132 | |
f6fca319 | 133 | $u = UTF8Toggle->new($E_acute); |
d54190f6 NC |
134 | $lc = lcfirst $u; |
135 | is (length $lc, 1); | |
f6fca319 | 136 | is ($lc, $e_acute, "E acute -> e acute"); |
d54190f6 NC |
137 | $lc = lcfirst $u; |
138 | is (length $lc, 1); | |
f6fca319 | 139 | is ($lc, $e_acute, "E acute -> e acute"); |
6e08b83d NC |
140 | $lc = lcfirst $u; |
141 | is (length $lc, 1); | |
f6fca319 | 142 | is ($lc, $e_acute, "E acute -> e acute"); |
d54190f6 | 143 | |
f6fca319 | 144 | $u = UTF8Toggle->new($e_acute); |
d54190f6 NC |
145 | $uc = ucfirst $u; |
146 | is (length $uc, 1); | |
f6fca319 | 147 | is ($uc, $E_acute, "e acute -> E acute"); |
d54190f6 NC |
148 | $uc = ucfirst $u; |
149 | is (length $uc, 1); | |
f6fca319 | 150 | is ($uc, $E_acute, "e acute -> E acute"); |
6e08b83d NC |
151 | $uc = ucfirst $u; |
152 | is (length $uc, 1); | |
f6fca319 | 153 | is ($uc, $E_acute, "e acute -> E acute"); |
ec9af7d4 NC |
154 | } |
155 | } | |
676f44e7 | 156 | |
6ddfe9e8 | 157 | my $tmpfile = tempfile(); |
676f44e7 | 158 | |
c9cb0f41 NC |
159 | foreach my $operator ('print', 'syswrite', 'syswrite len', 'syswrite off', |
160 | 'syswrite len off') { | |
676f44e7 NC |
161 | foreach my $layer ('', ':utf8') { |
162 | open my $fh, "+>$layer", $tmpfile or die $!; | |
c9cb0f41 NC |
163 | my $pad = $operator =~ /\boff\b/ ? "\243" : ""; |
164 | my $trail = $operator =~ /\blen\b/ ? "!" : ""; | |
f6fca319 KW |
165 | my $u = UTF8Toggle->new("$pad$E_acute\n$trail"); |
166 | my $l = UTF8Toggle->new("$pad$e_acute\n$trail", 1); | |
fb10a8a7 | 167 | no warnings 'deprecated'; |
c9cb0f41 | 168 | if ($operator eq 'print') { |
b3c6e229 | 169 | no warnings 'utf8'; |
c9cb0f41 NC |
170 | print $fh $u; |
171 | print $fh $u; | |
172 | print $fh $u; | |
173 | print $fh $l; | |
174 | print $fh $l; | |
175 | print $fh $l; | |
176 | } elsif ($operator eq 'syswrite') { | |
177 | syswrite $fh, $u; | |
178 | syswrite $fh, $u; | |
179 | syswrite $fh, $u; | |
180 | syswrite $fh, $l; | |
181 | syswrite $fh, $l; | |
182 | syswrite $fh, $l; | |
183 | } elsif ($operator eq 'syswrite len') { | |
184 | syswrite $fh, $u, 2; | |
185 | syswrite $fh, $u, 2; | |
186 | syswrite $fh, $u, 2; | |
187 | syswrite $fh, $l, 2; | |
188 | syswrite $fh, $l, 2; | |
189 | syswrite $fh, $l, 2; | |
190 | } elsif ($operator eq 'syswrite off' | |
191 | || $operator eq 'syswrite len off') { | |
192 | syswrite $fh, $u, 2, 1; | |
193 | syswrite $fh, $u, 2, 1; | |
194 | syswrite $fh, $u, 2, 1; | |
195 | syswrite $fh, $l, 2, 1; | |
196 | syswrite $fh, $l, 2, 1; | |
197 | syswrite $fh, $l, 2, 1; | |
198 | } else { | |
199 | die $operator; | |
200 | } | |
676f44e7 NC |
201 | |
202 | seek $fh, 0, 0 or die $!; | |
203 | my $line; | |
204 | chomp ($line = <$fh>); | |
f6fca319 | 205 | is ($line, $E_acute, "$operator $layer"); |
676f44e7 | 206 | chomp ($line = <$fh>); |
f6fca319 | 207 | is ($line, $E_acute, "$operator $layer"); |
676f44e7 | 208 | chomp ($line = <$fh>); |
f6fca319 | 209 | is ($line, $E_acute, "$operator $layer"); |
676f44e7 | 210 | chomp ($line = <$fh>); |
f6fca319 | 211 | is ($line, $e_acute, "$operator $layer"); |
676f44e7 | 212 | chomp ($line = <$fh>); |
f6fca319 | 213 | is ($line, $e_acute, "$operator $layer"); |
676f44e7 | 214 | chomp ($line = <$fh>); |
f6fca319 | 215 | is ($line, $e_acute, "$operator $layer"); |
676f44e7 NC |
216 | |
217 | close $fh or die $!; | |
676f44e7 NC |
218 | } |
219 | } | |
220 | ||
73ee8be2 NC |
221 | my $little = "\243\243"; |
222 | my $big = " \243 $little ! $little ! $little \243 "; | |
223 | my $right = rindex $big, $little; | |
224 | my $right1 = rindex $big, $little, 11; | |
225 | my $left = index $big, $little; | |
226 | my $left1 = index $big, $little, 4; | |
227 | ||
228 | cmp_ok ($right, ">", $right1, "Sanity check our rindex tests"); | |
229 | cmp_ok ($left, "<", $left1, "Sanity check our index tests"); | |
230 | ||
231 | foreach my $b ($big, UTF8Toggle->new($big)) { | |
232 | foreach my $l ($little, UTF8Toggle->new($little), | |
233 | UTF8Toggle->new($little, 1)) { | |
234 | is (rindex ($b, $l), $right, "rindex"); | |
235 | is (rindex ($b, $l), $right, "rindex"); | |
236 | is (rindex ($b, $l), $right, "rindex"); | |
237 | ||
238 | is (rindex ($b, $l, 11), $right1, "rindex 11"); | |
239 | is (rindex ($b, $l, 11), $right1, "rindex 11"); | |
240 | is (rindex ($b, $l, 11), $right1, "rindex 11"); | |
241 | ||
242 | is (index ($b, $l), $left, "index"); | |
243 | is (index ($b, $l), $left, "index"); | |
244 | is (index ($b, $l), $left, "index"); | |
245 | ||
246 | is (index ($b, $l, 4), $left1, "index 4"); | |
247 | is (index ($b, $l, 4), $left1, "index 4"); | |
248 | is (index ($b, $l, 4), $left1, "index 4"); | |
249 | } | |
250 | } | |
676f44e7 | 251 | |
f6fca319 | 252 | my $bits = $E_acute; |
12abf4f0 NC |
253 | foreach my $pieces ($bits, UTF8Toggle->new($bits)) { |
254 | like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); | |
255 | like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); | |
256 | like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); | |
257 | ||
258 | like ($pieces ^ $bits, qr/\A\0+\z/, "something xor itself is zeros"); | |
259 | like ($pieces ^ $bits, qr/\A\0+\z/, "something xor itself is zeros"); | |
260 | like ($pieces ^ $bits, qr/\A\0+\z/, "something xor itself is zeros"); | |
261 | } | |
262 | ||
ce399ba6 NC |
263 | foreach my $value ("\243", UTF8Toggle->new("\243")) { |
264 | is (pack ("A/A", $value), pack ("A/A", "\243"), | |
265 | "pack copes with overloading"); | |
266 | is (pack ("A/A", $value), pack ("A/A", "\243")); | |
267 | is (pack ("A/A", $value), pack ("A/A", "\243")); | |
268 | } | |
269 | ||
ab8be49d NC |
270 | foreach my $value ("\243", UTF8Toggle->new("\243")) { |
271 | my $v; | |
272 | $v = substr $value, 0, 1; | |
273 | is ($v, "\243"); | |
274 | $v = substr $value, 0, 1; | |
275 | is ($v, "\243"); | |
276 | $v = substr $value, 0, 1; | |
277 | is ($v, "\243"); | |
278 | } | |
279 | ||
280 | { | |
281 | package RT69422; | |
282 | use overload '""' => sub { $_[0]->{data} } | |
283 | } | |
284 | ||
285 | { | |
286 | my $text = bless { data => "\x{3075}" }, 'RT69422'; | |
287 | my $p = substr $text, 0, 1; | |
288 | is ($p, "\x{3075}"); | |
289 | } | |
111da786 DC |
290 | |
291 | TODO: { | |
292 | local $::TODO = 'RT #3054: Recursive operator overloading overflows the C stack'; | |
822e6f87 DM |
293 | # XXX this test is expected to SEGV, and can produce |
294 | # sh: line 1: 5106 Segmentation fault | |
a39d27b5 | 295 | # on STDERR. So just completely disable for now |
822e6f87 | 296 | todo_skip($::TODO); |
111da786 DC |
297 | fresh_perl_is(<<'EOP', "ok\n", {}, 'RT #3054: Recursive operator overloading should not crash the interpreter'); |
298 | use overload '""' => sub { "$_[0]" }; | |
299 | print bless {}, __PACKAGE__; | |
300 | print "ok\n"; | |
301 | EOP | |
302 | } | |
f35ddf90 DC |
303 | |
304 | TODO: { | |
305 | local $::TODO = 'RT #3270: Overloaded operators can not be treated as lvalues'; | |
306 | fresh_perl_is(<<'EOP', '', {stderr => 1}, 'RT #3270: Overloaded operator that returns an lvalue can be used as an lvalue'); | |
307 | use overload '.' => \˙ | |
308 | sub dot : lvalue {my ($obj, $method) = @_; $obj -> {$method};} | |
309 | my $o = bless {} => "main"; | |
310 | $o.foo = "bar"; | |
311 | EOP | |
312 | } |