[perl #119043] Allow utf8 up/downgrade on ro COWs

[perl5.git] / lib / utf8.t
diff --git a/lib/utf8.t b/lib/utf8.t

index ae81ccd..e6c94e6 100644 (file)
--- a/lib/utf8.t
+++ b/lib/utf8.t
@@ -427,6 +427,40 @@ SKIP: {
  }
  
  {
+    # Make sure utf8::decode respects copy-on-write [perl #91834].
+    # Hash keys are the easiest way to test this.
+    my $name = "\x{c3}\x{b3}";
+    my ($k1) = keys %{ { $name=>undef } };
+    my $k2 = $name;
+    utf8::decode($k1);
+    utf8::decode($k2);
+    my $h = { $k1 => 1, $k2 => 2 };
+    is join('', keys $h), $k2, 'utf8::decode respects copy-on-write';
+}
+
+{
+    # Make sure utf8::decode does not modify read-only scalars
+    # [perl #91850].
+    
+    my $name = "\x{c3}\x{b3}";
+    Internals::SvREADONLY($name, 1);
+    eval { utf8::decode($name) };
+    like $@, qr/^Modification of a read-only/,
+       'utf8::decode respects readonliness';
+}
+
+{
+    # utf8::decode should stringify refs [perl #91852].
+
+    package eieifg { use overload '""'      => sub { "\x{c3}\x{b3}" },
+                                   fallback => 1 }
+
+    my $name = bless[], eieifg::;
+    utf8::decode($name);
+    is $name, "\xf3", 'utf8::decode flattens references';
+}
+
+{
      my $a = "456\xb6";
      utf8::upgrade($a);
  
@@ -489,9 +523,6 @@ SKIP: {
  
  for my $pos (0..5) {
  
-    my $pos1 = ($pos >= 3)  ? 2 : ($pos >= 1) ? 1 : 0;
-    my $pos2 = ($pos1 == 2) ? 3 : $pos1;
-
      my $p;
      my $s = "A\xc8\x81\xe8\xab\x86\x{100}";
      chop($s);
@@ -506,11 +537,11 @@ for my $pos (0..5) {
      is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after     utf8::downgrade");
      utf8::decode($s);
      is(length($s), 3,             "(pos $pos) len after  D; utf8::decode");
-    is(pos($s),    $pos1,         "(pos $pos) pos after  D; utf8::decode");
+    is(pos($s),    undef,         "(pos $pos) pos after  D; utf8::decode");
      is($s, "A\x{201}\x{8ac6}",    "(pos $pos) str after  D; utf8::decode");
      utf8::encode($s);
      is(length($s), 6,             "(pos $pos) len after  D; utf8::encode");
-    is(pos($s),    $pos2,         "(pos $pos) pos after  D; utf8::encode");
+    is(pos($s),    undef,         "(pos $pos) pos after  D; utf8::encode");
      is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after  D; utf8::encode");
  
      $s = "A\xc8\x81\xe8\xab\x86";
@@ -524,12 +555,37 @@ for my $pos (0..5) {
      is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after     utf8::upgrade");
      utf8::decode($s);
      is(length($s), 3,             "(pos $pos) len after  U; utf8::decode");
-    is(pos($s),    $pos1,         "(pos $pos) pos after  U; utf8::decode");
+    is(pos($s),    undef,         "(pos $pos) pos after  U; utf8::decode");
      is($s, "A\x{201}\x{8ac6}",    "(pos $pos) str after  U; utf8::decode");
      utf8::encode($s);
      is(length($s), 6,             "(pos $pos) len after  U; utf8::encode");
-    is(pos($s),    $pos2,         "(pos $pos) pos after  U; utf8::encode");
+    is(pos($s),    undef,         "(pos $pos) pos after  U; utf8::encode");
      is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after  U; utf8::encode");
  }
  
+# [perl #119043] utf8::upgrade should not croak on read-only COWs
+for(__PACKAGE__) {
+    # First make sure we have a COW, otherwise this test is useless.
+    my $copy = $_;
+    my @addrs = unpack "L!L!", pack "pp", $copy, $_;
+    if ($addrs[0] != $addrs[1]) {
+       fail("__PACKAGE__ did not produce a COW - if this change was "
+           ."intentional, please provide me with another ro COW scalar")
+    }
+    else {
+       eval { utf8::upgrade($_) };
+       is $@, "", 'no error with utf8::upgrade on read-only COW';
+    }
+}
+# This one croaks, but not because the scalar is read-only
+eval "package \x{100};\n" . <<'END'
+    for(__PACKAGE__) {
+       eval { utf8::downgrade($_) };
+       ::like $@, qr/^Wide character/,
+           'right error with utf8::downgrade on read-only COW';
+    }
+    1
+END
+or die $@;
+
  done_testing();