X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/a18d6e6e4cf998a0ba9067ceac2d75f71aedef15..c56ed9f6dbe3d89129c7f5a37b28d4fc398561e4:/lib/utf8.t diff --git a/lib/utf8.t b/lib/utf8.t index 715ca3e..e6c94e6 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -427,6 +427,40 @@ SKIP: { } { + # Make sure utf8::decode respects copy-on-write [perl #91834]. + # Hash keys are the easiest way to test this. + my $name = "\x{c3}\x{b3}"; + my ($k1) = keys %{ { $name=>undef } }; + my $k2 = $name; + utf8::decode($k1); + utf8::decode($k2); + my $h = { $k1 => 1, $k2 => 2 }; + is join('', keys $h), $k2, 'utf8::decode respects copy-on-write'; +} + +{ + # Make sure utf8::decode does not modify read-only scalars + # [perl #91850]. + + my $name = "\x{c3}\x{b3}"; + Internals::SvREADONLY($name, 1); + eval { utf8::decode($name) }; + like $@, qr/^Modification of a read-only/, + 'utf8::decode respects readonliness'; +} + +{ + # utf8::decode should stringify refs [perl #91852]. + + package eieifg { use overload '""' => sub { "\x{c3}\x{b3}" }, + fallback => 1 } + + my $name = bless[], eieifg::; + utf8::decode($name); + is $name, "\xf3", 'utf8::decode flattens references'; +} + +{ my $a = "456\xb6"; utf8::upgrade($a); @@ -484,4 +518,74 @@ SKIP: { } } +# #80190 update pos, and cached length/position-mapping after +# utf8 upgrade/downgrade, encode/decode + +for my $pos (0..5) { + + my $p; + my $s = "A\xc8\x81\xe8\xab\x86\x{100}"; + chop($s); + + pos($s) = $pos; + # also sets cache + is(length($s), 6, "(pos $pos) len before utf8::downgrade"); + is(pos($s), $pos, "(pos $pos) pos before utf8::downgrade"); + utf8::downgrade($s); + is(length($s), 6, "(pos $pos) len after utf8::downgrade"); + is(pos($s), $pos, "(pos $pos) pos after utf8::downgrade"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::downgrade"); + utf8::decode($s); + is(length($s), 3, "(pos $pos) len after D; utf8::decode"); + is(pos($s), undef, "(pos $pos) pos after D; utf8::decode"); + is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after D; utf8::decode"); + utf8::encode($s); + is(length($s), 6, "(pos $pos) len after D; utf8::encode"); + is(pos($s), undef, "(pos $pos) pos after D; utf8::encode"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after D; utf8::encode"); + + $s = "A\xc8\x81\xe8\xab\x86"; + + pos($s) = $pos; + is(length($s), 6, "(pos $pos) len before utf8::upgrade"); + is(pos($s), $pos, "(pos $pos) pos before utf8::upgrade"); + utf8::upgrade($s); + is(length($s), 6, "(pos $pos) len after utf8::upgrade"); + is(pos($s), $pos, "(pos $pos) pos after utf8::upgrade"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::upgrade"); + utf8::decode($s); + is(length($s), 3, "(pos $pos) len after U; utf8::decode"); + is(pos($s), undef, "(pos $pos) pos after U; utf8::decode"); + is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after U; utf8::decode"); + utf8::encode($s); + is(length($s), 6, "(pos $pos) len after U; utf8::encode"); + is(pos($s), undef, "(pos $pos) pos after U; utf8::encode"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after U; utf8::encode"); +} + +# [perl #119043] utf8::upgrade should not croak on read-only COWs +for(__PACKAGE__) { + # First make sure we have a COW, otherwise this test is useless. + my $copy = $_; + my @addrs = unpack "L!L!", pack "pp", $copy, $_; + if ($addrs[0] != $addrs[1]) { + fail("__PACKAGE__ did not produce a COW - if this change was " + ."intentional, please provide me with another ro COW scalar") + } + else { + eval { utf8::upgrade($_) }; + is $@, "", 'no error with utf8::upgrade on read-only COW'; + } +} +# This one croaks, but not because the scalar is read-only +eval "package \x{100};\n" . <<'END' + for(__PACKAGE__) { + eval { utf8::downgrade($_) }; + ::like $@, qr/^Wide character/, + 'right error with utf8::downgrade on read-only COW'; + } + 1 +END +or die $@; + done_testing();