This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
t/op/bop.t: Verify complement downgrades UTF-8.
[perl5.git] / t / op / bop.t
CommitLineData
ddb9d9dc
PP
1#!./perl
2
3#
55497cff 4# test the bit operators '&', '|', '^', '~', '<<', and '>>'
ddb9d9dc
PP
5#
6
760c7c2f
KW
7use warnings;
8no warnings 'deprecated';
9
d1f8c7a4
CS
10BEGIN {
11 chdir 't' if -d 't';
624c42e2
N
12 require "./test.pl";
13 set_up_inc('../lib');
14 require "./charset_tools.pl";
784fea9c 15 require Config;
d1f8c7a4
CS
16}
17
add36b05
NC
18# Tests don't have names yet.
19# If you find tests are failing, please try adding names to tests to track
20# down where the failure is, and supply your new names as a patch.
21# (Just-in-time test naming)
55951dd7 22plan tests => 341;
ddb9d9dc
PP
23
24# numerics
add36b05
NC
25ok ((0xdead & 0xbeef) == 0x9ead);
26ok ((0xdead | 0xbeef) == 0xfeef);
27ok ((0xdead ^ 0xbeef) == 0x6042);
28ok ((~0xdead & 0xbeef) == 0x2042);
55497cff
PP
29
30# shifts
add36b05
NC
31ok ((257 << 7) == 32896);
32ok ((33023 >> 7) == 257);
55497cff
PP
33
34# signed vs. unsigned
add36b05 35ok ((~0 > 0 && do { use integer; ~0 } == -1));
d1f8c7a4
CS
36
37my $bits = 0;
38for (my $i = ~0; $i; $i >>= 1) { ++$bits; }
39my $cusp = 1 << ($bits - 1);
40
add36b05
NC
41
42ok (($cusp & -1) > 0 && do { use integer; $cusp & -1 } < 0);
43ok (($cusp | 1) > 0 && do { use integer; $cusp | 1 } < 0);
44ok (($cusp ^ 1) > 0 && do { use integer; $cusp ^ 1 } < 0);
45ok ((1 << ($bits - 1)) == $cusp &&
46 do { use integer; 1 << ($bits - 1) } == -$cusp);
47ok (($cusp >> 1) == ($cusp / 2) &&
48 do { use integer; abs($cusp >> 1) } == ($cusp / 2));
ddb9d9dc 49
9d116dd7
JH
50$Aaz = chr(ord("A") & ord("z"));
51$Aoz = chr(ord("A") | ord("z"));
52$Axz = chr(ord("A") ^ ord("z"));
53
ddb9d9dc 54# short strings
add36b05
NC
55is (("AAAAA" & "zzzzz"), ($Aaz x 5));
56is (("AAAAA" | "zzzzz"), ($Aoz x 5));
57is (("AAAAA" ^ "zzzzz"), ($Axz x 5));
ddb9d9dc
PP
58
59# long strings
60$foo = "A" x 150;
61$bar = "z" x 75;
9d116dd7
JH
62$zap = "A" x 75;
63# & truncates
add36b05 64is (($foo & $bar), ($Aaz x 75 ));
9d116dd7 65# | does not truncate
add36b05 66is (($foo | $bar), ($Aoz x 75 . $zap));
9d116dd7 67# ^ does not truncate
add36b05 68is (($foo ^ $bar), ($Axz x 75 . $zap));
9d116dd7 69
b35338b6
KW
70# string constants. These tests expect the bit patterns of these strings in
71# ASCII, so convert to that.
72sub _and($) { $_[0] & native_to_uni("+0") }
73sub _oar($) { $_[0] | native_to_uni("+0") }
74sub _xor($) { $_[0] ^ native_to_uni("+0") }
75is _and native_to_uni("waf"), native_to_uni('# '), 'str var & const str'; # [perl #20661]
76is _and native_to_uni("waf"), native_to_uni('# '), 'str var & const str again'; # [perl #20661]
77is _oar native_to_uni("yit"), native_to_uni('{yt'), 'str var | const str';
78is _oar native_to_uni("yit"), native_to_uni('{yt'), 'str var | const str again';
79is _xor native_to_uni("yit"), native_to_uni('RYt'), 'str var ^ const str';
80is _xor native_to_uni("yit"), native_to_uni('RYt'), 'str var ^ const str again';
81
82SKIP: {
83 skip "Converting a numeric doesn't work with EBCDIC unlike the above tests",
84 3 if $::IS_EBCDIC;
85 is _and 0, '0', 'num var & const str'; # [perl #20661]
86 is _oar 0, '0', 'num var | const str';
87 is _xor 0, '0', 'num var ^ const str';
88}
b20c4ee1 89
5ee80e13
FC
90# But don’t mistake a COW for a constant when assigning to it
91%h=(150=>1);
92$i=(keys %h)[0];
93$i |= 105;
94is $i, 255, '[perl #108480] $cow |= number';
95$i=(keys %h)[0];
96$i &= 105;
97is $i, 0, '[perl #108480] $cow &= number';
98$i=(keys %h)[0];
99$i ^= 105;
100is $i, 255, '[perl #108480] $cow ^= number';
101
0c57e439 102#
add36b05
NC
103is ("ok \xFF\xFF\n" & "ok 19\n", "ok 19\n");
104is ("ok 20\n" | "ok \0\0\n", "ok 20\n");
105is ("o\000 \0001\000" ^ "\000k\0002\000\n", "ok 21\n");
0c57e439
GS
106
107#
add36b05
NC
108is ("ok \x{FF}\x{FF}\n" & "ok 22\n", "ok 22\n");
109is ("ok 23\n" | "ok \x{0}\x{0}\n", "ok 23\n");
110is ("o\x{0} \x{0}4\x{0}" ^ "\x{0}k\x{0}2\x{0}\n", "ok 24\n");
0c57e439 111
299b089d 112# More variations on 19 and 22.
add36b05
NC
113is ("ok \xFF\x{FF}\n" & "ok 41\n", "ok 41\n");
114is ("ok \x{FF}\xFF\n" & "ok 42\n", "ok 42\n");
66a74c25
JO
115
116# Tests to see if you really can do casts negative floats to unsigned properly
117$neg1 = -1.0;
add36b05 118ok (~ $neg1 == 0);
66a74c25 119$neg7 = -7.0;
add36b05 120ok (~ $neg7 == 6);
891f9566 121
891f9566
YST
122
123# double magic tests
124
125sub TIESCALAR { bless { value => $_[1], orig => $_[1] } }
126sub STORE { $_[0]{store}++; $_[0]{value} = $_[1] }
127sub FETCH { $_[0]{fetch}++; $_[0]{value} }
128sub stores { tied($_[0])->{value} = tied($_[0])->{orig};
129 delete(tied($_[0])->{store}) || 0 }
130sub fetches { delete(tied($_[0])->{fetch}) || 0 }
131
132# numeric double magic tests
133
134tie $x, "main", 1;
135tie $y, "main", 3;
136
137is(($x | $y), 3);
138is(fetches($x), 1);
139is(fetches($y), 1);
140is(stores($x), 0);
141is(stores($y), 0);
142
143is(($x & $y), 1);
144is(fetches($x), 1);
145is(fetches($y), 1);
146is(stores($x), 0);
147is(stores($y), 0);
148
149is(($x ^ $y), 2);
150is(fetches($x), 1);
151is(fetches($y), 1);
152is(stores($x), 0);
153is(stores($y), 0);
154
155is(($x |= $y), 3);
156is(fetches($x), 2);
157is(fetches($y), 1);
158is(stores($x), 1);
159is(stores($y), 0);
160
161is(($x &= $y), 1);
162is(fetches($x), 2);
163is(fetches($y), 1);
164is(stores($x), 1);
165is(stores($y), 0);
166
167is(($x ^= $y), 2);
168is(fetches($x), 2);
169is(fetches($y), 1);
170is(stores($x), 1);
171is(stores($y), 0);
172
173is(~~$y, 3);
174is(fetches($y), 1);
175is(stores($y), 0);
176
177{ use integer;
178
179is(($x | $y), 3);
180is(fetches($x), 1);
181is(fetches($y), 1);
182is(stores($x), 0);
183is(stores($y), 0);
184
185is(($x & $y), 1);
186is(fetches($x), 1);
187is(fetches($y), 1);
188is(stores($x), 0);
189is(stores($y), 0);
190
191is(($x ^ $y), 2);
192is(fetches($x), 1);
193is(fetches($y), 1);
194is(stores($x), 0);
195is(stores($y), 0);
196
197is(($x |= $y), 3);
198is(fetches($x), 2);
199is(fetches($y), 1);
200is(stores($x), 1);
201is(stores($y), 0);
202
203is(($x &= $y), 1);
204is(fetches($x), 2);
205is(fetches($y), 1);
206is(stores($x), 1);
207is(stores($y), 0);
208
209is(($x ^= $y), 2);
210is(fetches($x), 2);
211is(fetches($y), 1);
212is(stores($x), 1);
213is(stores($y), 0);
214
215is(~$y, -4);
216is(fetches($y), 1);
217is(stores($y), 0);
218
219} # end of use integer;
220
221# stringwise double magic tests
222
223tie $x, "main", "a";
224tie $y, "main", "c";
225
226is(($x | $y), ("a" | "c"));
227is(fetches($x), 1);
228is(fetches($y), 1);
229is(stores($x), 0);
230is(stores($y), 0);
231
232is(($x & $y), ("a" & "c"));
233is(fetches($x), 1);
234is(fetches($y), 1);
235is(stores($x), 0);
236is(stores($y), 0);
237
238is(($x ^ $y), ("a" ^ "c"));
239is(fetches($x), 1);
240is(fetches($y), 1);
241is(stores($x), 0);
242is(stores($y), 0);
243
244is(($x |= $y), ("a" | "c"));
245is(fetches($x), 2);
246is(fetches($y), 1);
247is(stores($x), 1);
248is(stores($y), 0);
249
250is(($x &= $y), ("a" & "c"));
251is(fetches($x), 2);
252is(fetches($y), 1);
253is(stores($x), 1);
254is(stores($y), 0);
255
256is(($x ^= $y), ("a" ^ "c"));
257is(fetches($x), 2);
258is(fetches($y), 1);
259is(stores($x), 1);
260is(stores($y), 0);
261
262is(~~$y, "c");
263is(fetches($y), 1);
264is(stores($y), 0);
d0a21e00
GA
265
266$a = "\0\x{100}"; chop($a);
267ok(utf8::is_utf8($a)); # make sure UTF8 flag is still there
268$a = ~$a;
269is($a, "\xFF", "~ works with utf-8");
55951dd7 270ok(! utf8::is_utf8($a), " and turns off the UTF-8 flag");
80ff368f 271
b08562c6
KW
272$a = "\0\x{100}"; chop($a);
273undef $b;
274$b = $a | "\xFF";
275ok(utf8::is_utf8($b), "Verify UTF-8 | non-UTF-8 retains UTF-8 flag");
276undef $b;
277$b = "\xFF" | $a;
278ok(utf8::is_utf8($b), "Verify non-UTF-8 | UTF-8 retains UTF-8 flag");
279undef $b;
280$b = $a & "\xFF";
281ok(utf8::is_utf8($b), "Verify UTF-8 & non-UTF-8 retains UTF-8 flag");
282undef $b;
283$b = "\xFF" & $a;
284ok(utf8::is_utf8($b), "Verify non-UTF-8 & UTF-8 retains UTF-8 flag");
285undef $b;
286$b = $a ^ "\xFF";
287ok(utf8::is_utf8($b), "Verify UTF-8 ^ non-UTF-8 retains UTF-8 flag");
288undef $b;
289$b = "\xFF" ^ $a;
290ok(utf8::is_utf8($b), "Verify non-UTF-8 ^ UTF-8 retains UTF-8 flag");
291
55951dd7 292
80ff368f 293# [rt.perl.org 33003]
784fea9c
NC
294# This would cause a segfault without malloc wrap
295SKIP: {
296 skip "No malloc wrap checks" unless $Config::Config{usemallocwrap};
aaa63dae 297 like( runperl(prog => 'eval q($#a>>=1); print 1'), qr/^1\n?/ );
784fea9c 298}
1a787b95
ST
299
300# [perl #37616] Bug in &= (string) and/or m//
301{
302 $a = "aa";
303 $a &= "a";
304 ok($a =~ /a+$/, 'ASCII "a" is NUL-terminated');
305
306 $b = "bb\x{100}";
307 $b &= "b";
308 ok($b =~ /b+$/, 'Unicode "b" is NUL-terminated');
309}
794a0d33 310
8c8eee82 311
b6e8d7fe
FC
312# New string- and number-specific bitwise ops
313{
314 use feature "bitwise";
315 no warnings "experimental::bitwise";
316 is "22" & "66", 2, 'numeric & with strings';
317 is "22" | "66", 86, 'numeric | with strings';
318 is "22" ^ "66", 84, 'numeric ^ with strings';
319 is ~"22" & 0xff, 233, 'numeric ~ with string';
320 is 22 &. 66, 22, '&. with numbers';
321 is 22 |. 66, 66, '|. with numbers';
322 is 22 ^. 66, "\4\4", '^. with numbers';
b35338b6
KW
323 if ($::IS_EBCDIC) {
324 # ord('2') is 0xF2 on EBCDIC
325 is ~.22, "\x0d\x0d", '~. with number';
326 }
327 else {
328 # ord('2') is 0x32 on ASCII
329 is ~.22, "\xcd\xcd", '~. with number';
330 }
b6e8d7fe
FC
331 $_ = "22";
332 is $_ &= "66", 2, 'numeric &= with strings';
333 $_ = "22";
334 is $_ |= "66", 86, 'numeric |= with strings';
335 $_ = "22";
336 is $_ ^= "66", 84, 'numeric ^= with strings';
337 $_ = 22;
338 is $_ &.= 66, 22, '&.= with numbers';
339 $_ = 22;
340 is $_ |.= 66, 66, '|.= with numbers';
341 $_ = 22;
342 is $_ ^.= 66, "\4\4", '^.= with numbers';
343
344 # signed vs. unsigned
345 ok ((~0 > 0 && do { use integer; ~0 } == -1));
346
347 my $bits = 0;
348 for (my $i = ~0; $i; $i >>= 1) { ++$bits; }
349 my $cusp = 1 << ($bits - 1);
350
351 ok (($cusp & -1) > 0 && do { use integer; $cusp & -1 } < 0);
352 ok (($cusp | 1) > 0 && do { use integer; $cusp | 1 } < 0);
353 ok (($cusp ^ 1) > 0 && do { use integer; $cusp ^ 1 } < 0);
354 ok ((1 << ($bits - 1)) == $cusp &&
355 do { use integer; 1 << ($bits - 1) } == -$cusp);
356 ok (($cusp >> 1) == ($cusp / 2) &&
357 do { use integer; abs($cusp >> 1) } == ($cusp / 2));
358}
359
8c8eee82
BM
360# ref tests
361
362my %res;
363
364for my $str ("x", "\x{100}") {
365 for my $chr (qw/S A H G X ( * F/) {
366 for my $op (qw/| & ^/) {
367 my $co = ord $chr;
368 my $so = ord $str;
369 $res{"$chr$op$str"} = eval qq/chr($co $op $so)/;
370 }
371 }
372 $res{"undef|$str"} = $str;
373 $res{"undef&$str"} = "";
374 $res{"undef^$str"} = $str;
375}
376
377sub PVBM () { "X" }
51f0b9cd 3781 if index "foo", PVBM;
8c8eee82
BM
379
380my $warn = 0;
381local $^W = 1;
382local $SIG{__WARN__} = sub { $warn++ };
383
384sub is_first {
385 my ($got, $orig, $op, $str, $name) = @_;
386 is(substr($got, 0, 1), $res{"$orig$op$str"}, $name);
387}
388
389for (
390 # [object to test, first char of stringification, name]
391 [undef, "undef", "undef" ],
392 [\1, "S", "scalar ref" ],
393 [[], "A", "array ref" ],
394 [{}, "H", "hash ref" ],
395 [qr/x/, "(", "qr//" ],
396 [*foo, "*", "glob" ],
397 [\*foo, "G", "glob ref" ],
398 [PVBM, "X", "PVBM" ],
399 [\PVBM, "S", "PVBM ref" ],
400 [bless([], "Foo"), "F", "object" ],
401) {
402 my ($val, $orig, $type) = @$_;
403
5d09ee1c 404 for (["x", "string"]) {
8c8eee82
BM
405 my ($str, $desc) = @$_;
406
407 $warn = 0;
408
409 is_first($val | $str, $orig, "|", $str, "$type | $desc");
410 is_first($val & $str, $orig, "&", $str, "$type & $desc");
411 is_first($val ^ $str, $orig, "^", $str, "$type ^ $desc");
412
413 is_first($str | $val, $orig, "|", $str, "$desc | $type");
414 is_first($str & $val, $orig, "&", $str, "$desc & $type");
415 is_first($str ^ $val, $orig, "^", $str, "$desc ^ $type");
416
417 my $new;
418 ($new = $val) |= $str;
419 is_first($new, $orig, "|", $str, "$type |= $desc");
420 ($new = $val) &= $str;
421 is_first($new, $orig, "&", $str, "$type &= $desc");
422 ($new = $val) ^= $str;
423 is_first($new, $orig, "^", $str, "$type ^= $desc");
424
425 ($new = $str) |= $val;
426 is_first($new, $orig, "|", $str, "$desc |= $type");
427 ($new = $str) &= $val;
428 is_first($new, $orig, "&", $str, "$desc &= $type");
429 ($new = $str) ^= $val;
430 is_first($new, $orig, "^", $str, "$desc ^= $type");
431
432 if ($orig eq "undef") {
433 # undef |= and undef ^= don't warn
434 is($warn, 10, "no duplicate warnings");
435 }
436 else {
437 is($warn, 0, "no warnings");
438 }
439 }
440}
441
bccb768e
FC
442delete $SIG{__WARN__};
443
8c8eee82
BM
444my $strval;
445
446{
447 package Bar;
448 use overload q/""/ => sub { $strval };
449
450 package Baz;
451 use overload q/|/ => sub { "y" };
452}
453
51f0b9cd 454ok(!eval { 1 if bless([], "Bar") | "x"; 1 },"string overload can't use |");
8c8eee82
BM
455like($@, qr/no method found/, "correct error");
456is(eval { bless([], "Baz") | "x" }, "y", "| overload works");
457
458my $obj = bless [], "Bar";
459$strval = "x";
460eval { $obj |= "Q" };
461$strval = "z";
462is("$obj", "z", "|= doesn't break string overload");
1e6bda93
FC
463
464# [perl #29070]
b35338b6
KW
465$^A .= new version ~$_ for eval sprintf('"\\x%02x"', 0xff - ord("1")),
466 $::IS_EBCDIC ? v13 : v205, # 255 - ord('2')
467 eval sprintf('"\\x%02x"', 0xff - ord("3"));
1e6bda93 468is $^A, "123", '~v0 clears vstring magic on retval';
b3498293
JH
469
470{
471 my $w = $Config::Config{ivsize} * 8;
472
473 fail("unexpected w $w") unless $w == 32 || $w == 64;
474
475 is(1 << 1, 2, "UV 1 left shift 1");
476 is(1 >> 1, 0, "UV 1 right shift 1");
477
478 is(0x7b << -4, 0x007, "UV left negative shift == right shift");
479 is(0x7b >> -4, 0x7b0, "UV right negative shift == left shift");
480
481 is(0x7b << 0, 0x07b, "UV left zero shift == identity");
482 is(0x7b >> 0, 0x07b, "UV right zero shift == identity");
483
484 is(0x0 << -1, 0x0, "zero left negative shift == zero");
485 is(0x0 >> -1, 0x0, "zero right negative shift == zero");
486
487 cmp_ok(1 << $w - 1, '==', 2 ** ($w - 1), # not is() because NV stringify.
488 "UV left $w - 1 shift == 2 ** ($w - 1)");
489 is(1 << $w, 0, "UV left shift $w == zero");
490 is(1 << $w + 1, 0, "UV left shift $w + 1 == zero");
491
492 is(1 >> $w - 1, 0, "UV right shift $w - 1 == zero");
493 is(1 >> $w, 0, "UV right shift $w == zero");
494 is(1 >> $w + 1, 0, "UV right shift $w + 1 == zero");
495
496 # Negative shiftees get promoted to UVs before shifting. This is
497 # not necessarily the ideal behavior, but that is what is happening.
498 if ($w == 64) {
499 no warnings "portable";
2183d14b 500 no warnings "overflow"; # prevent compile-time warning for ivsize=4
b69687e7
JH
501 is(-1 << 1, 0xFFFF_FFFF_FFFF_FFFE,
502 "neg UV (sic) left shift = 0xFF..E");
503 is(-1 >> 1, 0x7FFF_FFFF_FFFF_FFFF,
504 "neg UV (sic) right right = 0x7F..F");
b3498293
JH
505 } elsif ($w == 32) {
506 no warnings "portable";
b69687e7
JH
507 is(-1 << 1, 0xFFFF_FFFE, "neg left shift == 0xFF..E");
508 is(-1 >> 1, 0x7FFF_FFFF, "neg right right == 0x7F..F");
b3498293
JH
509 }
510
511 {
512 # 'use integer' means use IVs instead of UVs.
513 use integer;
514
b69687e7
JH
515 # No surprises here.
516 is(1 << 1, 2, "IV 1 left shift 1 == 2");
517 is(1 >> 1, 0, "IV 1 right shift 1 == 0");
b3498293 518
b69687e7
JH
519 # The left overshift should behave like without 'use integer',
520 # that is, return zero.
521 is(1 << $w, 0, "IV 1 left shift $w == 0");
522 is(1 << $w + 1, 0, "IV 1 left shift $w + 1 == 0");
523 is(-1 << $w, 0, "IV -1 left shift $w == 0");
524 is(-1 << $w + 1, 0, "IV -1 left shift $w + 1 == 0");
b3498293 525
b69687e7
JH
526 # Even for negative IVs, left shift is multiplication.
527 # But right shift should display the stuckiness to -1.
528 is(-1 << 1, -2, "IV -1 left shift 1 == -2");
b3498293
JH
529 is(-1 >> 1, -1, "IV -1 right shift 1 == -1");
530
531 # As for UVs, negative shifting means the reverse shift.
532 is(-1 << -1, -1, "IV -1 left shift -1 == -1");
533 is(-1 >> -1, -2, "IV -1 right shift -1 == -2");
534
535 # Test also at and around wordsize, expect stuckiness to -1.
536 is(-1 >> $w - 1, -1, "IV -1 right shift $w - 1 == -1");
537 is(-1 >> $w, -1, "IV -1 right shift $w == -1");
538 is(-1 >> $w + 1, -1, "IV -1 right shift $w + 1 == -1");
539 }
540}
b43665ff
FC
541
542# [perl #129287] UTF8 & was not providing a trailing null byte.
543# This test is a bit convoluted, as we want to make sure that the string
544# allocated for &’s target contains memory initialised to something other
545# than a null byte. Uninitialised memory does not make for a reliable
546# test. So we do &. on a longer non-utf8 string first.
547for (["aaa","aaa"],[substr ("a\x{100}",0,1), "a"]) {
548 use feature "bitwise";
549 no warnings "experimental::bitwise", "pack";
550 $byte = substr unpack("P2", pack "P", $$_[0] &. $$_[1]), -1;
551}
552is $byte, "\0", "utf8 &. appends null byte";
dc529e65
TC
553
554# only visible under sanitize
555fresh_perl_is('$x = "UUUUUUUV"; $y = "xxxxxxx"; $x |= $y; print $x',
a37fb5d5
KW
556 ( $::IS_EBCDIC) ? 'XXXXXXXV' : '}}}}}}}V',
557 {}, "[perl #129995] access to freed memory");
5d09ee1c
A
558
559
560#
561# Using code points above 0xFF is fatal
562#
563foreach my $op_info ([and => "&"], [or => "|"], [xor => "^"]) {
564 my ($op_name, $op) = @$op_info;
565 local $@;
566 eval '$_ = "\xFF" ' . $op . ' "\x{100}";';
567 like $@, qr /^Use of strings with code points over 0xFF as arguments (?#
568 )to bitwise $op_name \Q($op)\E operator is not allowed/,
569 "Use of code points above 0xFF as arguments to bitwise " .
570 "$op_name ($op) is not allowed";
571}
572
573{
574 local $@;
575 eval '$_ = ~ "\x{100}";';
576 like $@, qr /^Use of strings with code points over 0xFF as arguments (?#
577 )to 1's complement \(~\) operator is not allowed/,
578 "Use of code points above 0xFF as argument to 1's complement " .
579 "(~) is not allowed";
580}