detect sub attributes following a signature

[perl5.git] / t / op / bop.t
diff --git a/t/op/bop.t b/t/op/bop.t

index 09f2be9..411d253 100644 (file)
--- a/t/op/bop.t
+++ b/t/op/bop.t
@@ -4,10 +4,13 @@
  # test the bit operators '&', '|', '^', '~', '<<', and '>>'
  #
  
+use warnings;
+
  BEGIN {
      chdir 't' if -d 't';
-    @INC = '../lib';
-    require "./test.pl"; require "./charset_tools.pl";
+    require "./test.pl";
+    set_up_inc('../lib');
+    require "./charset_tools.pl";
      require Config;
  }
  
@@ -15,7 +18,7 @@ BEGIN {
  # If you find tests are failing, please try adding names to tests to track
  # down where the failure is, and supply your new names as a patch.
  # (Just-in-time test naming)
-plan tests => 194 + (10*13*2) + 5;
+plan tests => 504;
  
  # numerics
  ok ((0xdead & 0xbeef) == 0x9ead);
@@ -105,83 +108,6 @@ is ("ok \x{FF}\x{FF}\n" & "ok 22\n", "ok 22\n");
  is ("ok 23\n" | "ok \x{0}\x{0}\n", "ok 23\n");
  is ("o\x{0} \x{0}4\x{0}" ^ "\x{0}k\x{0}2\x{0}\n", "ok 24\n");
  
-#
-is (sprintf("%vd", v4095 & v801), 801);
-is (sprintf("%vd", v4095 | v801), 4095);
-is (sprintf("%vd", v4095 ^ v801), 3294);
-
-#
-is (sprintf("%vd", v4095.801.4095 & v801.4095), '801.801');
-is (sprintf("%vd", v4095.801.4095 | v801.4095), '4095.4095.4095');
-is (sprintf("%vd", v801.4095 ^ v4095.801.4095), '3294.3294.4095');
-#
-is (sprintf("%vd", v120.300 & v200.400), '72.256');
-is (sprintf("%vd", v120.300 | v200.400), '248.444');
-is (sprintf("%vd", v120.300 ^ v200.400), '176.188');
-#
-my $a = v120.300;
-my $b = v200.400;
-$a ^= $b;
-is (sprintf("%vd", $a), '176.188');
-my $a = v120.300;
-my $b = v200.400;
-$a |= $b;
-is (sprintf("%vd", $a), '248.444');
-
-#
-# UTF8 ~ behaviour
-#
-
-SKIP: {
-    skip "Complements exceed maximum representable on EBCDIC ", 5 if $::IS_EBCDIC;
-
-    my @not36;
-
-    for (0x100...0xFFF) {
-    $a = ~(chr $_);
-        push @not36, sprintf("%#03X", $_)
-            if $a ne chr(~$_) or length($a) != 1 or ~$a ne chr($_);
-    }
-    is (join (', ', @not36), '');
-
-    my @not37;
-
-    for my $i (0xEEE...0xF00) {
-        for my $j (0x0..0x120) {
-            $a = ~(chr ($i) . chr $j);
-                push @not37, sprintf("%#03X %#03X", $i, $j)
-                    if $a ne chr(~$i).chr(~$j) or
-                    length($a) != 2 or
-                    ~$a ne chr($i).chr($j);
-        }
-    }
-    is (join (', ', @not37), '');
-
-    is (~chr(~0), "\0");
-
-
-    my @not39;
-
-    for my $i (0x100..0x120) {
-        for my $j (0x100...0x120) {
-            push @not39, sprintf("%#03X %#03X", $i, $j)
-                if ~(chr($i)|chr($j)) ne (~chr($i)&~chr($j));
-        }
-    }
-    is (join (', ', @not39), '');
-
-    my @not40;
-
-    for my $i (0x100..0x120) {
-        for my $j (0x100...0x120) {
-            push @not40, sprintf("%#03X %#03X", $i, $j)
-                if ~(chr($i)&chr($j)) ne (~chr($i)|~chr($j));
-        }
-    }
-    is (join (', ', @not40), '');
-}
-
-
  # More variations on 19 and 22.
  is ("ok \xFF\x{FF}\n" & "ok 41\n", "ok 41\n");
  is ("ok \x{FF}\xFF\n" & "ok 42\n", "ok 42\n");
@@ -340,6 +266,28 @@ $a = "\0\x{100}"; chop($a);
  ok(utf8::is_utf8($a)); # make sure UTF8 flag is still there
  $a = ~$a;
  is($a, "\xFF", "~ works with utf-8");
+ok(! utf8::is_utf8($a), "    and turns off the UTF-8 flag");
+
+$a = "\0\x{100}"; chop($a);
+undef $b;
+$b = $a | "\xFF";
+ok(utf8::is_utf8($b), "Verify UTF-8 | non-UTF-8 retains UTF-8 flag");
+undef $b;
+$b = "\xFF" | $a;
+ok(utf8::is_utf8($b), "Verify non-UTF-8 | UTF-8 retains UTF-8 flag");
+undef $b;
+$b = $a & "\xFF";
+ok(utf8::is_utf8($b), "Verify UTF-8 & non-UTF-8 retains UTF-8 flag");
+undef $b;
+$b = "\xFF" & $a;
+ok(utf8::is_utf8($b), "Verify non-UTF-8 & UTF-8 retains UTF-8 flag");
+undef $b;
+$b = $a ^ "\xFF";
+ok(utf8::is_utf8($b), "Verify UTF-8 ^ non-UTF-8 retains UTF-8 flag");
+undef $b;
+$b = "\xFF" ^ $a;
+ok(utf8::is_utf8($b), "Verify non-UTF-8 ^ UTF-8 retains UTF-8 flag");
+
  
  # [rt.perl.org 33003]
  # This would cause a segfault without malloc wrap
@@ -354,117 +302,12 @@ SKIP: {
      $a &= "a";
      ok($a =~ /a+$/, 'ASCII "a" is NUL-terminated');
  
-    $b = "bb\x{100}";
+    $b = "bb\x{FF}";
+    utf8::upgrade($b);
      $b &= "b";
      ok($b =~ /b+$/, 'Unicode "b" is NUL-terminated');
  }
  
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $c = $a | $b;
-    is($c, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-
-    $c = $b | $a;
-    is($c, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-
-    $c = $a & $b;
-    is($c, chr(0x001) x 0x0FF);
-
-    $c = $b & $a;
-    is($c, chr(0x001) x 0x0FF);
-
-    $c = $a ^ $b;
-    is($c, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-
-    $c = $b ^ $a;
-    is($c, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $a |= $b;
-    is($a, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $b |= $a;
-    is($b, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $a &= $b;
-    is($a, chr(0x001) x 0x0FF);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $b &= $a;
-    is($b, chr(0x001) x 0x0FF);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $a ^= $b;
-    is($a, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $b ^= $a;
-    is($b, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-}
-
-# update to pp_complement() via Coverity
-SKIP: {
-  # UTF-EBCDIC is limited to 0x7fffffff and can't encode ~0.
-  skip "Complements exceed maximum representable on EBCDIC ", 2 if $::IS_EBCDIC;
-
-  my $str = "\x{10000}\x{800}";
-  # U+10000 is four bytes in UTF-8/UTF-EBCDIC.
-  # U+0800 is three bytes in UTF-8/UTF-EBCDIC.
-
-  no warnings "utf8";
-  {
-    use bytes;
-    no warnings 'deprecated';
-    $str =~ s/\C\C\z//;
-  }
-
-  # it's really bogus that (~~malformed) is \0.
-  my $ref = "\x{10000}\0";
-  is(~~$str, $ref);
-
-  # same test, but this time with a longer replacement string that
-  # exercises a different branch in pp_subsr()
-
-  $str = "\x{10000}\x{800}";
-  {
-    use bytes;
-    no warnings 'deprecated';
-    $str =~ s/\C\C\z/\0\0\0/;
-  }
-
-  # it's also bogus that (~~malformed) is \0\0\0\0.
-  my $ref = "\x{10000}\0\0\0\0";
-  is(~~$str, $ref, "use bytes with long replacement");
-}
-
  # New string- and number-specific bitwise ops
  {
    use feature "bitwise";
@@ -512,12 +355,45 @@ SKIP: {
   ok (($cusp >> 1) == ($cusp / 2) &&
      do { use integer; abs($cusp >> 1) } == ($cusp / 2));
  }
+# Repeat some of those, with 'use v5.27'
+{
+  use v5.27;
+
+  is "22" & "66", 2,    'numeric & with strings';
+  is "22" | "66", 86,   'numeric | with strings';
+  is "22" ^ "66", 84,   'numeric ^ with strings';
+  is ~"22" & 0xff, 233, 'numeric ~ with string';
+  is 22 &. 66, 22,     '&. with numbers';
+  is 22 |. 66, 66,     '|. with numbers';
+  is 22 ^. 66, "\4\4", '^. with numbers';
+  if ($::IS_EBCDIC) {
+    # ord('2') is 0xF2 on EBCDIC
+    is ~.22, "\x0d\x0d", '~. with number';
+  }
+  else {
+    # ord('2') is 0x32 on ASCII
+    is ~.22, "\xcd\xcd", '~. with number';
+  }
+  $_ = "22";
+  is $_ &= "66", 2,  'numeric &= with strings';
+  $_ = "22";
+  is $_ |= "66", 86, 'numeric |= with strings';
+  $_ = "22";
+  is $_ ^= "66", 84, 'numeric ^= with strings';
+  $_ = 22;
+  is $_ &.= 66, 22,     '&.= with numbers';
+  $_ = 22;
+  is $_ |.= 66, 66,     '|.= with numbers';
+  $_ = 22;
+  is $_ ^.= 66, "\4\4", '^.= with numbers';
+}
  
  # ref tests
  
  my %res;
  
-for my $str ("x", "\x{100}") {
+for my $str ("x", "\x{B6}") {
+    utf8::upgrade($str) if $str !~ /x/;
      for my $chr (qw/S A H G X ( * F/) {
          for my $op (qw/| & ^/) {
              my $co = ord $chr;
@@ -531,7 +407,7 @@ for my $str ("x", "\x{100}") {
  }
  
  sub PVBM () { "X" }
-index "foo", PVBM;
+1 if index "foo", PVBM;
  
  my $warn = 0;
  local $^W = 1;
@@ -557,8 +433,9 @@ for (
  ) {
      my ($val, $orig, $type) = @$_;
  
-    for (["x", "string"], ["\x{100}", "utf8"]) {
+    for (["x", "string"], ["\x{B6}", "utf8"]) {
          my ($str, $desc) = @$_;
+        utf8::upgrade($str) if $desc =~ /utf8/;
  
          $warn = 0;
  
@@ -595,6 +472,8 @@ for (
      }
  }
  
+delete $SIG{__WARN__};
+
  my $strval;
  
  {
@@ -605,7 +484,7 @@ my $strval;
      use overload q/|/ => sub { "y" };
  }
  
-ok(!eval { bless([], "Bar") | "x"; 1 },     "string overload can't use |");
+ok(!eval { 1 if bless([], "Bar") | "x"; 1 },"string overload can't use |");
  like($@, qr/no method found/,               "correct error");
  is(eval { bless([], "Baz") | "x" }, "y",    "| overload works");
  
@@ -620,3 +499,187 @@ $^A .= new version ~$_ for eval sprintf('"\\x%02x"', 0xff - ord("1")),
                             $::IS_EBCDIC ? v13 : v205, # 255 - ord('2')
                             eval sprintf('"\\x%02x"', 0xff - ord("3"));
  is $^A, "123", '~v0 clears vstring magic on retval';
+
+{
+    my $w = $Config::Config{ivsize} * 8;
+
+    fail("unexpected w $w") unless $w == 32 || $w == 64;
+
+    is(1 << 1, 2, "UV 1 left shift 1");
+    is(1 >> 1, 0, "UV 1 right shift 1");
+
+    is(0x7b << -4, 0x007, "UV left negative shift == right shift");
+    is(0x7b >> -4, 0x7b0, "UV right negative shift == left shift");
+
+    is(0x7b <<  0, 0x07b, "UV left  zero shift == identity");
+    is(0x7b >>  0, 0x07b, "UV right zero shift == identity");
+
+    is(0x0 << -1, 0x0, "zero left  negative shift == zero");
+    is(0x0 >> -1, 0x0, "zero right negative shift == zero");
+
+    cmp_ok(1 << $w - 1, '==', 2 ** ($w - 1), # not is() because NV stringify.
+       "UV left $w - 1 shift == 2 ** ($w - 1)");
+    is(1 << $w,     0, "UV left shift $w     == zero");
+    is(1 << $w + 1, 0, "UV left shift $w + 1 == zero");
+
+    is(1 >> $w - 1, 0, "UV right shift $w - 1 == zero");
+    is(1 >> $w,     0, "UV right shift $w     == zero");
+    is(1 >> $w + 1, 0, "UV right shift $w + 1 == zero");
+
+    # Negative shiftees get promoted to UVs before shifting.  This is
+    # not necessarily the ideal behavior, but that is what is happening.
+    if ($w == 64) {
+        no warnings "portable";
+        no warnings "overflow"; # prevent compile-time warning for ivsize=4
+        is(-1 << 1, 0xFFFF_FFFF_FFFF_FFFE,
+           "neg UV (sic) left shift  = 0xFF..E");
+        is(-1 >> 1, 0x7FFF_FFFF_FFFF_FFFF,
+           "neg UV (sic) right right = 0x7F..F");
+    } elsif ($w == 32) {
+        no warnings "portable";
+        is(-1 << 1, 0xFFFF_FFFE, "neg left shift  == 0xFF..E");
+        is(-1 >> 1, 0x7FFF_FFFF, "neg right right == 0x7F..F");
+    }
+
+    {
+        # 'use integer' means use IVs instead of UVs.
+        use integer;
+
+        # No surprises here.
+        is(1 << 1, 2, "IV 1 left shift 1  == 2");
+        is(1 >> 1, 0, "IV 1 right shift 1 == 0");
+
+        # The left overshift should behave like without 'use integer',
+        # that is, return zero.
+        is(1 << $w,     0, "IV 1 left shift $w     == 0");
+        is(1 << $w + 1, 0, "IV 1 left shift $w + 1 == 0");
+        is(-1 << $w,     0, "IV -1 left shift $w     == 0");
+        is(-1 << $w + 1, 0, "IV -1 left shift $w + 1 == 0");
+
+        # Even for negative IVs, left shift is multiplication.
+        # But right shift should display the stuckiness to -1.
+        is(-1 <<      1, -2, "IV -1 left shift       1 == -2");
+        is(-1 >>      1, -1, "IV -1 right shift      1 == -1");
+
+        # As for UVs, negative shifting means the reverse shift.
+        is(-1 <<     -1, -1, "IV -1 left shift      -1 == -1");
+        is(-1 >>     -1, -2, "IV -1 right shift     -1 == -2");
+
+        # Test also at and around wordsize, expect stuckiness to -1.
+        is(-1 >> $w - 1, -1, "IV -1 right shift $w - 1 == -1");
+        is(-1 >> $w,     -1, "IV -1 right shift $w     == -1");
+        is(-1 >> $w + 1, -1, "IV -1 right shift $w + 1 == -1");
+    }
+}
+
+# [perl #129287] UTF8 & was not providing a trailing null byte.
+# This test is a bit convoluted, as we want to make sure that the string
+# allocated for &’s target contains memory initialised to something other
+# than a null byte.  Uninitialised memory does not make for a reliable
+# test.  So we do &. on a longer non-utf8 string first.
+for (["aaa","aaa"],[substr ("a\x{100}",0,1), "a"]) {
+    use feature "bitwise";
+    no warnings "experimental::bitwise", "pack";
+    $byte = substr unpack("P2", pack "P", $$_[0] &. $$_[1]), -1;
+}
+is $byte, "\0", "utf8 &. appends null byte";
+
+# only visible under sanitize
+fresh_perl_is('$x = "UUUUUUUV"; $y = "xxxxxxx"; $x |= $y; print $x',
+              ( $::IS_EBCDIC) ? 'XXXXXXXV' : '}}}}}}}V',
+              {}, "[perl #129995] access to freed memory");
+
+
+#
+# Using code points above 0xFF is fatal
+#
+foreach my $op_info ([and => "&"], [or => "|"], [xor => "^"]) {
+    my ($op_name, $op) = @$op_info;
+    local $@;
+    eval '$_ = "\xFF" ' . $op . ' "\x{100}";';
+    like $@, qr /^Use of strings with code points over 0xFF as arguments (?#
+                 )to bitwise $op_name \Q($op)\E operator is not allowed/,
+         "Use of code points above 0xFF as arguments to bitwise " .
+         "$op_name ($op) is not allowed";
+}
+
+{
+    local $@;
+    eval '$_ = ~ "\x{100}";';
+    like $@, qr /^Use of strings with code points over 0xFF as arguments (?#
+                 )to 1's complement \(~\) operator is not allowed/,
+         "Use of code points above 0xFF as argument to 1's complement " .
+         "(~) is not allowed";
+}
+
+{
+    # Since these are temporary, and it was a pain to make them into loops,
+    # the code is just rolled out.
+    local $SIG{__WARN__} = sub { push @warnings, @_; };
+
+    undef @warnings;
+    is("abc" & "abc\x{100}", "abc", '"abc" & "abc\x{100}" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise and \(&\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc" | "abc\x{100}", "abc\x{100}", '"abc" | "abc\x{100}" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise or \(|\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc" ^ "abc\x{100}", "\0\0\0\x{100}", '"abc" ^ "abc\x{100}" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise xor \(\^\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc\x{100}" & "abc", "abc", '"abc\x{100}" & "abc" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise and \(&\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc\x{100}" | "abc", "abc\x{100}", '"abc\x{100}" | "abc" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise or \(|\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc\x{100}" ^ "abc", "\0\0\0\x{100}", '"abc\x{100}" ^ "abc" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise xor \(\^\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    no warnings 'deprecated';
+    undef @warnings;
+    my $foo = "abc" & "abc\x{100}";
+    $foo = "abc" | "abc\x{100}";
+    $foo = "abc" ^ "abc\x{100}";
+    $foo = "abc\x{100}" & "abc";
+    $foo = "abc\x{100}" | "abc";
+    $foo = "abc\x{100}" ^ "abc";
+    if (! is(@warnings, 0, "... And none of the last 6 main tests warns when 'deprecated' is off")) {
+        diag join "\n", @warnings;
+    }
+}