X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/e4dc48dc285e86e786d9f1ca22417ef481b6daff..40f11004fb3b5fa1cd207a20090df837d721b736:/lib/utf8.t diff --git a/lib/utf8.t b/lib/utf8.t index 722c51d..b13bb53 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -39,8 +39,6 @@ no utf8; # Ironic, no? # # -plan tests => 157; - { # bug id 20001009.001 @@ -429,6 +427,18 @@ SKIP: { } { + # Make sure utf8::decode respects copy-on-write [perl #91834]. + # Hash keys are the easiest way to test this. + my $name = "\x{c3}\x{b3}"; + my ($k1) = keys %{ { $name=>undef } }; + my $k2 = $name; + utf8::decode($k1); + utf8::decode($k2); + my $h = { $k1 => 1, $k2 => 2 }; + is join('', keys $h), $k2, 'utf8::decode respects copy-on-write'; +} + +{ my $a = "456\xb6"; utf8::upgrade($a); @@ -465,3 +475,73 @@ SKIP: { no strict 'subs'; ok( !utf8::is_utf8( asd ), "Wasteful format - bareword" ); } + +{ + my @highest = + (undef, 0x7F, 0x7FF, 0xFFFF, 0x1FFFFF, 0x3FFFFFF, 0x7FFFFFFF); + my @step = + (undef, undef, 0x40, 0x1000, 0x40000, 0x1000000, 0x40000000); + + foreach my $length (6, 5, 4, 3, 2) { + my $high = $highest[$length]; + while ($high > $highest[$length - 1]) { + my $low = $high - $step[$length] + 1; + $low = $highest[$length - 1] + 1 if $low <= $highest[$length - 1]; + ok(utf8::valid(do {no warnings 'utf8'; chr $low}), + sprintf "chr %x, length $length is valid", $low); + ok(utf8::valid(do {no warnings 'utf8'; chr $high}), + sprintf "chr %x, length $length is valid", $high); + $high -= $step[$length]; + } + } +} + +# #80190 update pos, and cached length/position-mapping after +# utf8 upgrade/downgrade, encode/decode + +for my $pos (0..5) { + + my $pos1 = ($pos >= 3) ? 2 : ($pos >= 1) ? 1 : 0; + my $pos2 = ($pos1 == 2) ? 3 : $pos1; + + my $p; + my $s = "A\xc8\x81\xe8\xab\x86\x{100}"; + chop($s); + + pos($s) = $pos; + # also sets cache + is(length($s), 6, "(pos $pos) len before utf8::downgrade"); + is(pos($s), $pos, "(pos $pos) pos before utf8::downgrade"); + utf8::downgrade($s); + is(length($s), 6, "(pos $pos) len after utf8::downgrade"); + is(pos($s), $pos, "(pos $pos) pos after utf8::downgrade"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::downgrade"); + utf8::decode($s); + is(length($s), 3, "(pos $pos) len after D; utf8::decode"); + is(pos($s), $pos1, "(pos $pos) pos after D; utf8::decode"); + is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after D; utf8::decode"); + utf8::encode($s); + is(length($s), 6, "(pos $pos) len after D; utf8::encode"); + is(pos($s), $pos2, "(pos $pos) pos after D; utf8::encode"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after D; utf8::encode"); + + $s = "A\xc8\x81\xe8\xab\x86"; + + pos($s) = $pos; + is(length($s), 6, "(pos $pos) len before utf8::upgrade"); + is(pos($s), $pos, "(pos $pos) pos before utf8::upgrade"); + utf8::upgrade($s); + is(length($s), 6, "(pos $pos) len after utf8::upgrade"); + is(pos($s), $pos, "(pos $pos) pos after utf8::upgrade"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::upgrade"); + utf8::decode($s); + is(length($s), 3, "(pos $pos) len after U; utf8::decode"); + is(pos($s), $pos1, "(pos $pos) pos after U; utf8::decode"); + is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after U; utf8::decode"); + utf8::encode($s); + is(length($s), 6, "(pos $pos) len after U; utf8::encode"); + is(pos($s), $pos2, "(pos $pos) pos after U; utf8::encode"); + is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after U; utf8::encode"); +} + +done_testing();