This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix chop formats with non PV vars
authorDavid Mitchell <davem@iabyn.com>
Thu, 7 Nov 2013 12:17:26 +0000 (12:17 +0000)
committerDavid Mitchell <davem@iabyn.com>
Mon, 11 Nov 2013 11:21:40 +0000 (11:21 +0000)
[perl #119847],  [perl #119849], [perl #119851]

Strange vars like ties, overloads, or stringified refs (and in recent
perls, pure NOK vars) would generally do the wrong thing in formats
when the var is treated as a string and repeatedly chopped, as in
^<<<~~ and similar. This would manifest itself in infinite loops, utf8
errors etc. A recent change that stopped a stringified NOK getting
converted into a POK made the same badness happen for plain NVs too.

This commit contains two main fixes. First, the chopping was done
using sv_chop(), which only worked on POK strings. If its !POK, we now do
sv_setpvn() instead, which is less efficient, but will ensure the right
thing is always done.

Secondly, we make sure that the sv is accessed only once per cycle,
doing s = SvPV(sv, len) or similar. After that, all access is done only
via s and len. One place was using SvPVX(sv), and several places
were using the sv for utf8<->byte length conversions, such as
sv_pos_b2u().

It turns out that all the complex utf8 handling could be enormously
simplified. Since the code that needed to do utf8/byte length conversions
already scanned the string looking for suitable split points (such as
spaces or \n or \r), it was easiest to include any utf8 processing in the
same loop - i.e. incrementing s by UTF8SKIP(s) each time, but incrementing
the character count by 1.

The original diagnosis and reporting of this issue was done by Nicholas
Clark, who also supplied most of the tests.

pp_ctl.c
t/op/write.t

index 1ab3f42..95727f2 100644 (file)
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -466,7 +466,8 @@ PP(pp_formline)
     I32 arg;
     SV *sv = NULL; /* current item */
     const char *item = NULL;/* string value of current item */
-    I32 itemsize  = 0;     /* length of current item, possibly truncated */
+    I32 itemsize  = 0;     /* length (chars) of item, possibly truncated */
+    I32 itembytes = 0;     /* as itemsize, but length in bytes */
     I32 fieldsize = 0;     /* width of current field */
     I32 lines = 0;         /* number of lines that have been output */
     bool chopspace = (strchr(PL_chopset, ' ') != NULL); /* does $: have space */
@@ -474,7 +475,7 @@ PP(pp_formline)
     STRLEN linemark = 0;    /* pos of start of line in output */
     NV value;
     bool gotsome = FALSE;   /* seen at least one non-blank item on this line */
-    STRLEN len;
+    STRLEN len;             /* length of current sv */
     STRLEN linemax;        /* estimate of output size in bytes */
     bool item_is_utf8 = FALSE;
     bool targ_is_utf8 = FALSE;
@@ -569,133 +570,85 @@ PP(pp_formline)
 
        case FF_CHECKNL: /* find max len of item (up to \n) that fits field */
            {
-               const char *send;
                const char *s = item = SvPV_const(sv, len);
-               itemsize = len;
-               if (DO_UTF8(sv)) {
-                   itemsize = sv_len_utf8(sv);
-                   if (itemsize != (I32)len) {
-                       I32 itembytes;
-                       if (itemsize > fieldsize) {
-                           itemsize = fieldsize;
-                           itembytes = itemsize;
-                           sv_pos_u2b(sv, &itembytes, 0);
-                       }
-                       else
-                           itembytes = len;
-                       send = chophere = s + itembytes;
-                       while (s < send) {
-                           if (! isCNTRL(*s))
-                               gotsome = TRUE;
-                           else if (*s == '\n')
-                               break;
-                           s++;
-                       }
-                       item_is_utf8 = TRUE;
-                       itemsize = s - item;
-                       sv_pos_b2u(sv, &itemsize);
-                       break;
-                   }
-               }
-               item_is_utf8 = FALSE;
-               if (itemsize > fieldsize)
-                   itemsize = fieldsize;
-               send = chophere = s + itemsize;
-               while (s < send) {
-                   if (! isCNTRL(*s))
-                       gotsome = TRUE;
-                   else if (*s == '\n')
-                       break;
-                   s++;
-               }
-               itemsize = s - item;
+               const char *send = s + len;
+
+                itemsize = 0;
+               item_is_utf8 = DO_UTF8(sv);
+                while (s < send) {
+                    if (!isCNTRL(*s))
+                        gotsome = TRUE;
+                    else if (*s == '\n')
+                        break;
+
+                    if (item_is_utf8)
+                        s += UTF8SKIP(s);
+                    else
+                        s++;
+                    itemsize++;
+                    if (itemsize == fieldsize)
+                        break;
+                }
+                itembytes = s - item;
                break;
            }
 
        case FF_CHECKCHOP: /* like CHECKNL, but up to highest split point */
            {
                const char *s = item = SvPV_const(sv, len);
-               itemsize = len;
-               if (DO_UTF8(sv)) {
-                   itemsize = sv_len_utf8(sv);
-                   if (itemsize != (I32)len) {
-                       I32 itembytes;
-                       if (itemsize <= fieldsize) {
-                           const char *send = chophere = s + itemsize;
-                           while (s < send) {
-                               if (*s == '\r') {
-                                   itemsize = s - item;
-                                   chophere = s;
-                                   break;
-                               }
-                               if (! isCNTRL(*s))
-                                   gotsome = TRUE;
-                                s++;
-                           }
-                       }
-                       else {
-                           const char *send;
-                           itemsize = fieldsize;
-                           itembytes = itemsize;
-                           sv_pos_u2b(sv, &itembytes, 0);
-                           send = chophere = s + itembytes;
-                           while (s < send || (s == send && isSPACE(*s))) {
-                               if (isSPACE(*s)) {
-                                   if (chopspace)
-                                       chophere = s;
-                                   if (*s == '\r')
-                                       break;
-                               }
-                               else {
-                                   if (! isCNTRL(*s))
-                                       gotsome = TRUE;
-                                   if (strchr(PL_chopset, *s))
-                                       chophere = s + 1;
-                               }
-                               s++;
-                           }
-                           itemsize = chophere - item;
-                           sv_pos_b2u(sv, &itemsize);
-                       }
-                       item_is_utf8 = TRUE;
-                       break;
-                   }
-               }
-               item_is_utf8 = FALSE;
-               if (itemsize <= fieldsize) {
-                   const char *const send = chophere = s + itemsize;
-                   while (s < send) {
-                       if (*s == '\r') {
-                           itemsize = s - item;
-                           chophere = s;
-                           break;
-                       }
-                       if (! isCNTRL(*s))
-                           gotsome = TRUE;
+               const char *send = s + len;
+                I32 size = 0;
+
+                chophere = NULL;
+               item_is_utf8 = DO_UTF8(sv);
+                while (s < send) {
+                    /* look for a legal split position */
+                    if (isSPACE(*s)) {
+                        if (*s == '\r') {
+                            chophere = s;
+                            itemsize = size;
+                            break;
+                        }
+                        if (chopspace) {
+                            /* provisional split point */
+                            chophere = s;
+                            itemsize = size;
+                        }
+                        /* we delay testing fieldsize until after we've
+                         * processed the possible split char directly
+                         * following the last field char; so if fieldsize=3
+                         * and item="a b cdef", we consume "a b", not "a".
+                         * Ditto further down.
+                         */
+                        if (size == fieldsize)
+                            break;
+                    }
+                    else {
+                        if (strchr(PL_chopset, *s)) {
+                            /* provisional split point */
+                            /* for a non-space split char, we include
+                             * the split char; hence the '+1' */
+                            chophere = s + 1;
+                            itemsize = size;
+                        }
+                        if (size == fieldsize)
+                            break;
+                        if (!isCNTRL(*s))
+                            gotsome = TRUE;
+                    }
+
+                    if (item_is_utf8)
+                        s += UTF8SKIP(s);
+                    else
                         s++;
-                   }
-               }
-               else {
-                   const char *send;
-                   itemsize = fieldsize;
-                   send = chophere = s + itemsize;
-                   while (s < send || (s == send && isSPACE(*s))) {
-                       if (isSPACE(*s)) {
-                           if (chopspace)
-                               chophere = s;
-                           if (*s == '\r')
-                               break;
-                       }
-                       else {
-                           if (! isCNTRL(*s))
-                               gotsome = TRUE;
-                           if (strchr(PL_chopset, *s))
-                               chophere = s + 1;
-                       }
-                       s++;
-                   }
-                   itemsize = chophere - item;
-               }
+                    size++;
+                }
+                if (!chophere || s == send) {
+                    chophere = s;
+                    itemsize = size;
+                }
+                itembytes = chophere - item;
+
                break;
            }
 
@@ -719,16 +672,9 @@ PP(pp_formline)
            break;
 
        case FF_ITEM: /* append a text item, while blanking ctrl chars */
-           to_copy = itemsize;
+           to_copy = itembytes;
            source = (U8 *)item;
            trans = 1;
-           if (item_is_utf8) {
-               /* convert to_copy from chars to bytes */
-               U8 *s = source;
-               while (to_copy--)
-                  s += UTF8SKIP(s);
-               to_copy = s - source;
-           }
            goto append;
 
        case FF_CHOP: /* (for ^*) chop the current item */
@@ -738,7 +684,12 @@ PP(pp_formline)
                    while (isSPACE(*s))
                        s++;
                }
-               sv_chop(sv,s);
+                if (SvPOKp(sv))
+                    sv_chop(sv,s);
+                else
+                    /* tied, overloaded or similar strangeness.
+                     * Do it the hard way */
+                    sv_setpvn(sv, s, len - (s-item));
                SvSETMAGIC(sv);
                break;
            }
@@ -763,7 +714,7 @@ PP(pp_formline)
                while (s < send) {
                    if (*s++ == '\n') {
                        if (oneline) {
-                           to_copy = s - SvPVX_const(sv) - 1;
+                           to_copy = s - item - 1;
                            chophere = s;
                            break;
                        } else {
index 7dcf1e0..0bad2e4 100644 (file)
@@ -7,6 +7,7 @@ BEGIN {
 }
 
 use strict;    # Amazed that this hackery can be made strict ...
+use Tie::Scalar;
 
 # read in a file
 sub cat {
@@ -18,6 +19,42 @@ sub cat {
     $data;
 }
 
+# read in a utf-8 file
+#
+sub cat_utf8 {
+    my $file = shift;
+    local $/;
+    open my $fh, '<', $file or die "can't open '$file': $!";
+    binmode $fh, ':utf8';
+    my $data = <$fh> // die "Can't read from '$file': $!";
+    close $fh or die "error closing '$file': $!";
+    $data;
+}
+
+# write a format to a utf8 file, then read it back in and compare
+
+sub is_format_utf8 {
+    my ($glob, $want, $desc) = @_;
+    local $::Level = $::Level + 1;
+    my $file = 'Op_write.tmp';
+    open $glob, '>:utf8', $file or die "Can't create '$file': $!";
+    write $glob;
+    close $glob or die "Could not close '$file': $!";
+    is(cat_utf8($file), $want, $desc);
+}
+
+sub like_format_utf8 {
+    my ($glob, $want, $desc) = @_;
+    local $::Level = $::Level + 1;
+    my $file = 'Op_write.tmp';
+    open $glob, '>:utf8', $file or die "Can't create '$file': $!";
+    write $glob;
+    close $glob or die "Could not close '$file': $!";
+    like(cat_utf8($file), $want, $desc);
+}
+
+
+
 #-- testing numeric fields in all variants (WL)
 
 sub swrite {
@@ -61,7 +98,7 @@ for my $tref ( @NumTests ){
 my $bas_tests = 21;
 
 # number of tests in section 3
-my $bug_tests = 8 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 4 + 2 + 3 + 96 + 11;
+my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 4 + 2 + 3 + 96 + 11;
 
 # number of tests in section 4
 my $hmb_tests = 35;
@@ -351,7 +388,6 @@ $el
 {
     my $test = curr_test();
     # Bug report and testcase by Alexey Tourbin
-    use Tie::Scalar;
     my $v;
     tie $v, 'Tie::StdScalar';
     $v = $test;
@@ -560,6 +596,770 @@ $_
     close OUT21 or die "Could not close: $!";
 })[0];
 
+
+
+# [perl #119847],  [perl #119849], [perl #119851]
+# Non-real vars like tied, overloaded and refs could, when stringified,
+# fail to be processed properly, causing infinite loops on ~~, utf8
+# warnings etc, ad nauseum.
+
+
+my $u22a = "N" x 8;
+
+format OUT22a =
+'^<<<<<<<<'~~
+$u22a
+.
+
+is_format_utf8(\*OUT22a,
+               "'NNNNNNNN '\n");
+
+
+my $u22b = "N" x 8;
+utf8::upgrade($u22b);
+
+format OUT22b =
+'^<<<<<<<<'~~
+$u22b
+.
+
+is_format_utf8(\*OUT22b,
+               "'NNNNNNNN '\n");
+
+my $u22c = "\x{FF}" x 8;
+
+format OUT22c =
+'^<<<<<<<<'~~
+$u22c
+.
+
+is_format_utf8(\*OUT22c,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+my $u22d = "\x{FF}" x 8;
+utf8::upgrade($u22d);
+
+format OUT22d =
+'^<<<<<<<<'~~
+$u22d
+.
+
+is_format_utf8(\*OUT22d,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+my $u22e = "\x{100}" x 8;
+
+format OUT22e =
+'^<<<<<<<<'~~
+$u22e
+.
+
+is_format_utf8(\*OUT22e,
+               "'\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} '\n");
+
+
+my $u22f = "N" x 8;
+
+format OUT22f =
+'^<'~~
+$u22f
+.
+
+is_format_utf8(\*OUT22f,
+               "'NN'\n"x4);
+
+
+my $u22g = "N" x 8;
+utf8::upgrade($u22g);
+
+format OUT22g =
+'^<'~~
+$u22g
+.
+
+is_format_utf8(\*OUT22g,
+               "'NN'\n"x4);
+
+my $u22h = "\x{FF}" x 8;
+
+format OUT22h =
+'^<'~~
+$u22h
+.
+
+is_format_utf8(\*OUT22h,
+               "'\x{FF}\x{FF}'\n"x4);
+
+my $u22i = "\x{FF}" x 8;
+utf8::upgrade($u22i);
+
+format OUT22i =
+'^<'~~
+$u22i
+.
+
+is_format_utf8(\*OUT22i,
+               "'\x{FF}\x{FF}'\n"x4);
+
+my $u22j = "\x{100}" x 8;
+
+format OUT22j =
+'^<'~~
+$u22j
+.
+
+is_format_utf8(\*OUT22j,
+               "'\x{100}\x{100}'\n"x4);
+
+
+tie my $u23a, 'Tie::StdScalar';
+$u23a = "N" x 8;
+
+format OUT23a =
+'^<<<<<<<<'~~
+$u23a
+.
+
+is_format_utf8(\*OUT23a,
+               "'NNNNNNNN '\n");
+
+
+tie my $u23b, 'Tie::StdScalar';
+$u23b = "N" x 8;
+utf8::upgrade($u23b);
+
+format OUT23b =
+'^<<<<<<<<'~~
+$u23b
+.
+
+is_format_utf8(\*OUT23b,
+               "'NNNNNNNN '\n");
+
+tie my $u23c, 'Tie::StdScalar';
+$u23c = "\x{FF}" x 8;
+
+format OUT23c =
+'^<<<<<<<<'~~
+$u23c
+.
+
+is_format_utf8(\*OUT23c,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+tie my $u23d, 'Tie::StdScalar';
+my $temp = "\x{FF}" x 8;
+utf8::upgrade($temp);
+$u23d = $temp;
+
+format OUT23d =
+'^<<<<<<<<'~~
+$u23d
+.
+
+is_format_utf8(\*OUT23d,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+tie my $u23e, 'Tie::StdScalar';
+$u23e = "\x{100}" x 8;
+
+format OUT23e =
+'^<<<<<<<<'~~
+$u23e
+.
+
+is_format_utf8(\*OUT23e,
+               "'\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} '\n");
+
+tie my $u23f, 'Tie::StdScalar';
+$u23f = "N" x 8;
+
+format OUT23f =
+'^<'~~
+$u23f
+.
+
+is_format_utf8(\*OUT23f,
+               "'NN'\n"x4);
+
+
+tie my $u23g, 'Tie::StdScalar';
+my $temp = "N" x 8;
+utf8::upgrade($temp);
+$u23g = $temp;
+
+format OUT23g =
+'^<'~~
+$u23g
+.
+
+is_format_utf8(\*OUT23g,
+               "'NN'\n"x4);
+
+tie my $u23h, 'Tie::StdScalar';
+$u23h = "\x{FF}" x 8;
+
+format OUT23h =
+'^<'~~
+$u23h
+.
+
+is_format_utf8(\*OUT23h,
+               "'\x{FF}\x{FF}'\n"x4);
+
+$temp = "\x{FF}" x 8;
+utf8::upgrade($temp);
+tie my $u23i, 'Tie::StdScalar';
+$u23i = $temp;
+
+format OUT23i =
+'^<'~~
+$u23i
+.
+
+is_format_utf8(\*OUT23i,
+               "'\x{FF}\x{FF}'\n"x4);
+
+tie my $u23j, 'Tie::StdScalar';
+$u23j = "\x{100}" x 8;
+
+format OUT23j =
+'^<'~~
+$u23j
+.
+
+is_format_utf8(\*OUT23j,
+               "'\x{100}\x{100}'\n"x4);
+
+{
+    package UTF8Toggle;
+
+    sub TIESCALAR {
+        my $class = shift;
+        my $value = shift;
+        my $state = shift||0;
+        return bless [$value, $state], $class;
+    }
+
+    sub FETCH {
+        my $self = shift;
+        $self->[1] = ! $self->[1];
+        if ($self->[1]) {
+           utf8::downgrade($self->[0]);
+        } else {
+           utf8::upgrade($self->[0]);
+        }
+        $self->[0];
+    }
+
+   sub STORE {
+       my $self = shift;
+       $self->[0] = shift;
+    }
+}
+
+tie my $u24a, 'UTF8Toggle';
+$u24a = "N" x 8;
+
+format OUT24a =
+'^<<<<<<<<'~~
+$u24a
+.
+
+is_format_utf8(\*OUT24a,
+               "'NNNNNNNN '\n");
+
+
+tie my $u24b, 'UTF8Toggle';
+$u24b = "N" x 8;
+utf8::upgrade($u24b);
+
+format OUT24b =
+'^<<<<<<<<'~~
+$u24b
+.
+
+is_format_utf8(\*OUT24b,
+               "'NNNNNNNN '\n");
+
+tie my $u24c, 'UTF8Toggle';
+$u24c = "\x{FF}" x 8;
+
+format OUT24c =
+'^<<<<<<<<'~~
+$u24c
+.
+
+is_format_utf8(\*OUT24c,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+tie my $u24d, 'UTF8Toggle', 1;
+$u24d = "\x{FF}" x 8;
+
+format OUT24d =
+'^<<<<<<<<'~~
+$u24d
+.
+
+is_format_utf8(\*OUT24d,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+
+
+tie my $u24f, 'UTF8Toggle';
+$u24f = "N" x 8;
+
+format OUT24f =
+'^<'~~
+$u24f
+.
+
+is_format_utf8(\*OUT24f,
+               "'NN'\n"x4);
+
+
+tie my $u24g, 'UTF8Toggle';
+my $temp = "N" x 8;
+utf8::upgrade($temp);
+$u24g = $temp;
+
+format OUT24g =
+'^<'~~
+$u24g
+.
+
+is_format_utf8(\*OUT24g,
+               "'NN'\n"x4);
+
+tie my $u24h, 'UTF8Toggle';
+$u24h = "\x{FF}" x 8;
+
+format OUT24h =
+'^<'~~
+$u24h
+.
+
+is_format_utf8(\*OUT24h,
+               "'\x{FF}\x{FF}'\n"x4);
+
+tie my $u24i, 'UTF8Toggle', 1;
+$u24i = "\x{FF}" x 8;
+
+format OUT24i =
+'^<'~~
+$u24i
+.
+
+is_format_utf8(\*OUT24i,
+               "'\x{FF}\x{FF}'\n"x4);
+
+{
+    package OS;
+    use overload '""' => sub { ${$_[0]}; };
+
+    sub new {
+        my ($class, $value) = @_;
+        bless \$value, $class;
+    }
+}
+
+my $u25a = OS->new("N" x 8);
+
+format OUT25a =
+'^<<<<<<<<'~~
+$u25a
+.
+
+is_format_utf8(\*OUT25a,
+               "'NNNNNNNN '\n");
+
+
+my $temp = "N" x 8;
+utf8::upgrade($temp);
+my $u25b = OS->new($temp);
+
+format OUT25b =
+'^<<<<<<<<'~~
+$u25b
+.
+
+is_format_utf8(\*OUT25b,
+               "'NNNNNNNN '\n");
+
+my $u25c = OS->new("\x{FF}" x 8);
+
+format OUT25c =
+'^<<<<<<<<'~~
+$u25c
+.
+
+is_format_utf8(\*OUT25c,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+$temp = "\x{FF}" x 8;
+utf8::upgrade($temp);
+my $u25d = OS->new($temp);
+
+format OUT25d =
+'^<<<<<<<<'~~
+$u25d
+.
+
+is_format_utf8(\*OUT25d,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+my $u25e = OS->new("\x{100}" x 8);
+
+format OUT25e =
+'^<<<<<<<<'~~
+$u25e
+.
+
+is_format_utf8(\*OUT25e,
+               "'\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} '\n");
+
+
+my $u25f = OS->new("N" x 8);
+
+format OUT25f =
+'^<'~~
+$u25f
+.
+
+is_format_utf8(\*OUT25f,
+               "'NN'\n"x4);
+
+
+$temp = "N" x 8;
+utf8::upgrade($temp);
+my $u25g = OS->new($temp);
+
+format OUT25g =
+'^<'~~
+$u25g
+.
+
+is_format_utf8(\*OUT25g,
+               "'NN'\n"x4);
+
+my $u25h = OS->new("\x{FF}" x 8);
+
+format OUT25h =
+'^<'~~
+$u25h
+.
+
+is_format_utf8(\*OUT25h,
+               "'\x{FF}\x{FF}'\n"x4);
+
+$temp = "\x{FF}" x 8;
+utf8::upgrade($temp);
+my $u25i = OS->new($temp);
+
+format OUT25i =
+'^<'~~
+$u25i
+.
+
+is_format_utf8(\*OUT25i,
+               "'\x{FF}\x{FF}'\n"x4);
+
+my $u25j = OS->new("\x{100}" x 8);
+
+format OUT25j =
+'^<'~~
+$u25j
+.
+
+is_format_utf8(\*OUT25j,
+               "'\x{100}\x{100}'\n"x4);
+
+{
+    package OS::UTF8Toggle;
+    use overload '""' => sub {
+        my $self = shift;
+        $self->[1] = ! $self->[1];
+        if ($self->[1]) {
+            utf8::downgrade($self->[0]);
+        } else {
+            utf8::upgrade($self->[0]);
+        }
+        $self->[0];
+    };
+
+    sub new {
+        my ($class, $value, $state) = @_;
+        bless [$value, $state], $class;
+    }
+}
+
+
+my $u26a = OS::UTF8Toggle->new("N" x 8);
+
+format OUT26a =
+'^<<<<<<<<'~~
+$u26a
+.
+
+is_format_utf8(\*OUT26a,
+               "'NNNNNNNN '\n");
+
+
+my $u26b = OS::UTF8Toggle->new("N" x 8, 1);
+
+format OUT26b =
+'^<<<<<<<<'~~
+$u26b
+.
+
+is_format_utf8(\*OUT26b,
+               "'NNNNNNNN '\n");
+
+my $u26c = OS::UTF8Toggle->new("\x{FF}" x 8);
+
+format OUT26c =
+'^<<<<<<<<'~~
+$u26c
+.
+
+is_format_utf8(\*OUT26c,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+my $u26d = OS::UTF8Toggle->new("\x{FF}" x 8, 1);
+
+format OUT26d =
+'^<<<<<<<<'~~
+$u26d
+.
+
+is_format_utf8(\*OUT26d,
+               "'\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF}\x{FF} '\n");
+
+
+my $u26f = OS::UTF8Toggle->new("N" x 8);
+
+format OUT26f =
+'^<'~~
+$u26f
+.
+
+is_format_utf8(\*OUT26f,
+               "'NN'\n"x4);
+
+
+my $u26g = OS::UTF8Toggle->new("N" x 8, 1);
+
+format OUT26g =
+'^<'~~
+$u26g
+.
+
+is_format_utf8(\*OUT26g,
+               "'NN'\n"x4);
+
+my $u26h = OS::UTF8Toggle->new("\x{FF}" x 8);
+
+format OUT26h =
+'^<'~~
+$u26h
+.
+
+is_format_utf8(\*OUT26h,
+               "'\x{FF}\x{FF}'\n"x4);
+
+my $u26i = OS::UTF8Toggle->new("\x{FF}" x 8, 1);
+
+format OUT26i =
+'^<'~~
+$u26i
+.
+
+is_format_utf8(\*OUT26i,
+               "'\x{FF}\x{FF}'\n"x4);
+
+
+
+{
+    my $zero = $$ - $$;
+
+    package Number;
+
+    sub TIESCALAR {
+        my $class = shift;
+        my $value = shift;
+        return bless \$value, $class;
+    }
+
+    # The return value should always be SvNOK() only:
+    sub FETCH {
+        my $self = shift;
+        # avoid "" getting converted to "0" and thus
+        # causing an infinite loop
+        return "" unless length ($$self);
+        return $$self - 0.5 + $zero + 0.5;
+    }
+
+   sub STORE {
+       my $self = shift;
+       $$self = shift;
+    }
+
+   package ONumber;
+
+   use overload '""' => sub {
+        my $self = shift;
+        return $$self - 0.5 + $zero + 0.5;
+    };
+
+    sub new {
+       my $class = shift;
+       my $value = shift;
+       return bless \$value, $class;
+   }
+}
+
+my $v27a = 1/256;
+
+format OUT27a =
+'^<<<<<<<<<'~~
+$v27a
+.
+
+is_format_utf8(\*OUT27a,
+               "'0.00390625'\n");
+
+my $v27b = 1/256;
+
+format OUT27b =
+'^<'~~
+$v27b
+.
+
+is_format_utf8(\*OUT27b,
+               "'0.'\n'00'\n'39'\n'06'\n'25'\n");
+
+tie my $v27c, 'Number', 1/256;
+
+format OUT27c =
+'^<<<<<<<<<'~~
+$v27c
+.
+
+is_format_utf8(\*OUT27c,
+               "'0.00390625'\n");
+
+my $v27d = 1/256;
+
+format OUT27d =
+'^<'~~
+$v27d
+.
+
+is_format_utf8(\*OUT27d,
+               "'0.'\n'00'\n'39'\n'06'\n'25'\n");
+
+my $v27e = ONumber->new(1/256);
+
+format OUT27e =
+'^<<<<<<<<<'~~
+$v27e
+.
+
+is_format_utf8(\*OUT27e,
+               "'0.00390625'\n");
+
+my $v27f = ONumber->new(1/256);
+
+format OUT27f =
+'^<'~~
+$v27f
+.
+
+is_format_utf8(\*OUT27f,
+               "'0.'\n'00'\n'39'\n'06'\n'25'\n");
+
+{
+    package Ref;
+    use overload '""' => sub {
+       return ${$_[0]};
+    };
+
+    sub new {
+       my $class = shift;
+       my $value = shift;
+       return bless \$value, $class;
+   }
+}
+
+my $v28a = {};
+
+format OUT28a =
+'^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'~~
+$v28a
+.
+
+
+# 'HASH(0x1716b60)     '
+my $qr_hash   = qr/^'HASH\(0x[0-9a-f]+\)\s+'\n$/;
+
+# 'HASH'
+# '(0x1'
+# '716b'
+# 'c0) '
+my $qr_hash_m = qr/^'HASH'\n('[0-9a-fx() ]{4}'\n)+$/;
+
+like_format_utf8(\*OUT28a, $qr_hash);
+
+my $v28b = {};
+
+format OUT28b =
+'^<<<'~~
+$v28b
+.
+
+like_format_utf8(\*OUT28b, $qr_hash_m);
+
+
+tie my $v28c, 'Tie::StdScalar';
+$v28c = {};
+
+format OUT28c =
+'^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'~~
+$v28c
+.
+
+like_format_utf8(\*OUT28c, $qr_hash);
+
+tie my $v28d, 'Tie::StdScalar';
+$v28d = {};
+
+format OUT28d =
+'^<<<'~~
+$v28d
+.
+
+like_format_utf8(\*OUT28d, $qr_hash_m);
+
+my $v28e = Ref->new({});
+
+format OUT28e =
+'^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'~~
+$v28e
+.
+
+like_format_utf8(\*OUT28e, $qr_hash);
+
+my $v28f = Ref->new({});
+
+format OUT28f =
+'^<<<'~~
+$v28f
+.
+
+like_format_utf8(\*OUT28f, $qr_hash_m);
+
+
+
 {
   package Count;
 
@@ -627,8 +1427,11 @@ $_
   # pp_formline attempts to set SvCUR() on an SVt_RV). I suspect that it will
   # be doing something similarly out of bounds on everything from 5.000
   my $ref = [];
-  is swrite('>^*<', $ref), ">$ref<";
-  is swrite('>@*<', $ref), ">$ref<";
+  my $exp = ">$ref<";
+  is swrite('>^*<', $ref), $exp;
+  $ref = [];
+  my $exp = ">$ref<";
+  is swrite('>@*<', $ref), $exp;
 }
 
 format EMPTY =