Deprecate above \xFF in bitwise string ops

[perl5.git] / t / op / bop.t
diff --git a/t/op/bop.t b/t/op/bop.t

index 1f96e9b..411d253 100644 (file)
--- a/t/op/bop.t
+++ b/t/op/bop.t
@@ -5,7 +5,6 @@
  #
  
  use warnings;
-no warnings 'deprecated';
  
  BEGIN {
      chdir 't' if -d 't';
@@ -19,7 +18,7 @@ BEGIN {
  # If you find tests are failing, please try adding names to tests to track
  # down where the failure is, and supply your new names as a patch.
  # (Just-in-time test naming)
-plan tests => 192 + (10*13*2) + 5 + 30;
+plan tests => 504;
  
  # numerics
  ok ((0xdead & 0xbeef) == 0x9ead);
@@ -109,85 +108,6 @@ is ("ok \x{FF}\x{FF}\n" & "ok 22\n", "ok 22\n");
  is ("ok 23\n" | "ok \x{0}\x{0}\n", "ok 23\n");
  is ("o\x{0} \x{0}4\x{0}" ^ "\x{0}k\x{0}2\x{0}\n", "ok 24\n");
  
-#
-is (sprintf("%vd", v4095 & v801), 801);
-is (sprintf("%vd", v4095 | v801), 4095);
-is (sprintf("%vd", v4095 ^ v801), 3294);
-
-#
-is (sprintf("%vd", v4095.801.4095 & v801.4095), '801.801');
-is (sprintf("%vd", v4095.801.4095 | v801.4095), '4095.4095.4095');
-is (sprintf("%vd", v801.4095 ^ v4095.801.4095), '3294.3294.4095');
-#
-is (sprintf("%vd", v120.300 & v200.400), '72.256');
-is (sprintf("%vd", v120.300 | v200.400), '248.444');
-is (sprintf("%vd", v120.300 ^ v200.400), '176.188');
-#
-{
-    my $a = v120.300;
-    my $b = v200.400;
-    $a ^= $b;
-    is (sprintf("%vd", $a), '176.188');
-}
-{
-    my $a = v120.300;
-    my $b = v200.400;
-    $a |= $b;
-    is (sprintf("%vd", $a), '248.444');
-}
-
-#
-# UTF8 ~ behaviour
-#
-
-{
-    my @not36;
-
-    for (0x100...0xFFF) {
-    $a = ~(chr $_);
-        push @not36, sprintf("%#03X", $_)
-            if $a ne chr(~$_) or length($a) != 1 or ~$a ne chr($_);
-    }
-    is (join (', ', @not36), '');
-
-    my @not37;
-
-    for my $i (0xEEE...0xF00) {
-        for my $j (0x0..0x120) {
-            $a = ~(chr ($i) . chr $j);
-                push @not37, sprintf("%#03X %#03X", $i, $j)
-                    if $a ne chr(~$i).chr(~$j) or
-                    length($a) != 2 or
-                    ~$a ne chr($i).chr($j);
-        }
-    }
-    is (join (', ', @not37), '');
-
-    is (~chr(~0), "\0");
-
-
-    my @not39;
-
-    for my $i (0x100..0x120) {
-        for my $j (0x100...0x120) {
-            push @not39, sprintf("%#03X %#03X", $i, $j)
-                if ~(chr($i)|chr($j)) ne (~chr($i)&~chr($j));
-        }
-    }
-    is (join (', ', @not39), '');
-
-    my @not40;
-
-    for my $i (0x100..0x120) {
-        for my $j (0x100...0x120) {
-            push @not40, sprintf("%#03X %#03X", $i, $j)
-                if ~(chr($i)&chr($j)) ne (~chr($i)|~chr($j));
-        }
-    }
-    is (join (', ', @not40), '');
-}
-
-
  # More variations on 19 and 22.
  is ("ok \xFF\x{FF}\n" & "ok 41\n", "ok 41\n");
  is ("ok \x{FF}\xFF\n" & "ok 42\n", "ok 42\n");
@@ -346,6 +266,28 @@ $a = "\0\x{100}"; chop($a);
  ok(utf8::is_utf8($a)); # make sure UTF8 flag is still there
  $a = ~$a;
  is($a, "\xFF", "~ works with utf-8");
+ok(! utf8::is_utf8($a), "    and turns off the UTF-8 flag");
+
+$a = "\0\x{100}"; chop($a);
+undef $b;
+$b = $a | "\xFF";
+ok(utf8::is_utf8($b), "Verify UTF-8 | non-UTF-8 retains UTF-8 flag");
+undef $b;
+$b = "\xFF" | $a;
+ok(utf8::is_utf8($b), "Verify non-UTF-8 | UTF-8 retains UTF-8 flag");
+undef $b;
+$b = $a & "\xFF";
+ok(utf8::is_utf8($b), "Verify UTF-8 & non-UTF-8 retains UTF-8 flag");
+undef $b;
+$b = "\xFF" & $a;
+ok(utf8::is_utf8($b), "Verify non-UTF-8 & UTF-8 retains UTF-8 flag");
+undef $b;
+$b = $a ^ "\xFF";
+ok(utf8::is_utf8($b), "Verify UTF-8 ^ non-UTF-8 retains UTF-8 flag");
+undef $b;
+$b = "\xFF" ^ $a;
+ok(utf8::is_utf8($b), "Verify non-UTF-8 ^ UTF-8 retains UTF-8 flag");
+
  
  # [rt.perl.org 33003]
  # This would cause a segfault without malloc wrap
@@ -360,83 +302,12 @@ SKIP: {
      $a &= "a";
      ok($a =~ /a+$/, 'ASCII "a" is NUL-terminated');
  
-    $b = "bb\x{100}";
+    $b = "bb\x{FF}";
+    utf8::upgrade($b);
      $b &= "b";
      ok($b =~ /b+$/, 'Unicode "b" is NUL-terminated');
  }
  
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $c = $a | $b;
-    is($c, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-
-    $c = $b | $a;
-    is($c, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-
-    $c = $a & $b;
-    is($c, chr(0x001) x 0x0FF);
-
-    $c = $b & $a;
-    is($c, chr(0x001) x 0x0FF);
-
-    $c = $a ^ $b;
-    is($c, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-
-    $c = $b ^ $a;
-    is($c, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $a |= $b;
-    is($a, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $b |= $a;
-    is($b, chr(0x1FF) x 0xFF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $a &= $b;
-    is($a, chr(0x001) x 0x0FF);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $b &= $a;
-    is($b, chr(0x001) x 0x0FF);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $a ^= $b;
-    is($a, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-}
-
-{
-    $a = chr(0x101) x 0x101;
-    $b = chr(0x0FF) x 0x0FF;
-
-    $b ^= $a;
-    is($b, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
-}
-
-
  # New string- and number-specific bitwise ops
  {
    use feature "bitwise";
@@ -484,12 +355,45 @@ SKIP: {
   ok (($cusp >> 1) == ($cusp / 2) &&
      do { use integer; abs($cusp >> 1) } == ($cusp / 2));
  }
+# Repeat some of those, with 'use v5.27'
+{
+  use v5.27;
+
+  is "22" & "66", 2,    'numeric & with strings';
+  is "22" | "66", 86,   'numeric | with strings';
+  is "22" ^ "66", 84,   'numeric ^ with strings';
+  is ~"22" & 0xff, 233, 'numeric ~ with string';
+  is 22 &. 66, 22,     '&. with numbers';
+  is 22 |. 66, 66,     '|. with numbers';
+  is 22 ^. 66, "\4\4", '^. with numbers';
+  if ($::IS_EBCDIC) {
+    # ord('2') is 0xF2 on EBCDIC
+    is ~.22, "\x0d\x0d", '~. with number';
+  }
+  else {
+    # ord('2') is 0x32 on ASCII
+    is ~.22, "\xcd\xcd", '~. with number';
+  }
+  $_ = "22";
+  is $_ &= "66", 2,  'numeric &= with strings';
+  $_ = "22";
+  is $_ |= "66", 86, 'numeric |= with strings';
+  $_ = "22";
+  is $_ ^= "66", 84, 'numeric ^= with strings';
+  $_ = 22;
+  is $_ &.= 66, 22,     '&.= with numbers';
+  $_ = 22;
+  is $_ |.= 66, 66,     '|.= with numbers';
+  $_ = 22;
+  is $_ ^.= 66, "\4\4", '^.= with numbers';
+}
  
  # ref tests
  
  my %res;
  
-for my $str ("x", "\x{100}") {
+for my $str ("x", "\x{B6}") {
+    utf8::upgrade($str) if $str !~ /x/;
      for my $chr (qw/S A H G X ( * F/) {
          for my $op (qw/| & ^/) {
              my $co = ord $chr;
@@ -529,8 +433,9 @@ for (
  ) {
      my ($val, $orig, $type) = @$_;
  
-    for (["x", "string"], ["\x{100}", "utf8"]) {
+    for (["x", "string"], ["\x{B6}", "utf8"]) {
          my ($str, $desc) = @$_;
+        utf8::upgrade($str) if $desc =~ /utf8/;
  
          $warn = 0;
  
@@ -567,6 +472,8 @@ for (
      }
  }
  
+delete $SIG{__WARN__};
+
  my $strval;
  
  {
@@ -676,3 +583,103 @@ for (["aaa","aaa"],[substr ("a\x{100}",0,1), "a"]) {
      $byte = substr unpack("P2", pack "P", $$_[0] &. $$_[1]), -1;
  }
  is $byte, "\0", "utf8 &. appends null byte";
+
+# only visible under sanitize
+fresh_perl_is('$x = "UUUUUUUV"; $y = "xxxxxxx"; $x |= $y; print $x',
+              ( $::IS_EBCDIC) ? 'XXXXXXXV' : '}}}}}}}V',
+              {}, "[perl #129995] access to freed memory");
+
+
+#
+# Using code points above 0xFF is fatal
+#
+foreach my $op_info ([and => "&"], [or => "|"], [xor => "^"]) {
+    my ($op_name, $op) = @$op_info;
+    local $@;
+    eval '$_ = "\xFF" ' . $op . ' "\x{100}";';
+    like $@, qr /^Use of strings with code points over 0xFF as arguments (?#
+                 )to bitwise $op_name \Q($op)\E operator is not allowed/,
+         "Use of code points above 0xFF as arguments to bitwise " .
+         "$op_name ($op) is not allowed";
+}
+
+{
+    local $@;
+    eval '$_ = ~ "\x{100}";';
+    like $@, qr /^Use of strings with code points over 0xFF as arguments (?#
+                 )to 1's complement \(~\) operator is not allowed/,
+         "Use of code points above 0xFF as argument to 1's complement " .
+         "(~) is not allowed";
+}
+
+{
+    # Since these are temporary, and it was a pain to make them into loops,
+    # the code is just rolled out.
+    local $SIG{__WARN__} = sub { push @warnings, @_; };
+
+    undef @warnings;
+    is("abc" & "abc\x{100}", "abc", '"abc" & "abc\x{100}" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise and \(&\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc" | "abc\x{100}", "abc\x{100}", '"abc" | "abc\x{100}" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise or \(|\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc" ^ "abc\x{100}", "\0\0\0\x{100}", '"abc" ^ "abc\x{100}" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise xor \(\^\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc\x{100}" & "abc", "abc", '"abc\x{100}" & "abc" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise and \(&\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc\x{100}" | "abc", "abc\x{100}", '"abc\x{100}" | "abc" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise or \(|\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    undef @warnings;
+    is("abc\x{100}" ^ "abc", "\0\0\0\x{100}", '"abc\x{100}" ^ "abc" works');
+    if (! is(@warnings, 1, "... but returned a single warning")) {
+        diag join "\n", @warnings;
+    }
+    like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
+                            )arguments to bitwise xor \(\^\) operator (?#
+                            )is deprecated/,
+                        "... which is the expected warning");
+    no warnings 'deprecated';
+    undef @warnings;
+    my $foo = "abc" & "abc\x{100}";
+    $foo = "abc" | "abc\x{100}";
+    $foo = "abc" ^ "abc\x{100}";
+    $foo = "abc\x{100}" & "abc";
+    $foo = "abc\x{100}" | "abc";
+    $foo = "abc\x{100}" ^ "abc";
+    if (! is(@warnings, 0, "... And none of the last 6 main tests warns when 'deprecated' is off")) {
+        diag join "\n", @warnings;
+    }
+}