This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
doop.c: Refactor do_trans_complex()
authorKarl Williamson <khw@cpan.org>
Mon, 4 Nov 2019 21:38:58 +0000 (14:38 -0700)
committerKarl Williamson <khw@cpan.org>
Thu, 7 Nov 2019 04:22:24 +0000 (21:22 -0700)
I had trouble understanding how this uncommented routine worked.  And it
turned out to be broken, squeezing the pre-transliterated characters
instead of the post-transliterated ones.  This fixes the TODO test added
in the previous commit.

doop.c
t/op/tr.t

diff --git a/doop.c b/doop.c
index bd7b7c5..e0d63f1 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -165,6 +165,7 @@ S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
     U8 *s = (U8*)SvPV_nomg(sv, len);
     U8 * const send = s+len;
     Size_t matches = 0;
+    const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT);
 
     PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
 
@@ -173,19 +174,31 @@ S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
        U8 * const dstart = d;
 
        if (PL_op->op_private & OPpTRANS_SQUASH) {
-           const U8* p = send;
+
+            /* What the mapping of the previous character was to.  If the new
+             * character has the same mapping, it is squashed from the output
+             * (but still is included in the count) */
+            short previous_map = (short) TR_OOB;
+
            while (s < send) {
                const short this_map = tbl->map[*s];
                if (this_map >= 0) {
-                   *d = (U8)this_map;
-                   matches++;
-                   if (p != d - 1 || *p != *d)
-                       p = d++;
+                    matches++;
+                    if (this_map != previous_map) {
+                        *d++ = (U8)this_map;
+                        previous_map = this_map;
+                    }
                }
-               else if (this_map == (short) TR_UNMAPPED)
-                   *d++ = *s;
-               else if (ch == (short) TR_DELETE)
-                   matches++;
+               else {
+                    if (this_map == (short) TR_UNMAPPED)
+                        *d++ = *s;
+                    else {
+                        assert(this_map == (short) TR_DELETE);
+                        matches++;
+                    }
+                    previous_map = (short) TR_OOB;
+                }
+
                s++;
            }
        }
@@ -235,7 +248,11 @@ S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
             UV     ch;
             short sch;
 
-            sch = tbl->map[comp >= size ? size : comp];
+            sch = (comp < size)
+                  ? tbl->map[comp]
+                  : (! complement)
+                    ? (short) TR_UNMAPPED
+                    : tbl->map[size];
 
             if (sch >= 0) {
                 ch = (UV)sch;
index b7c78d1..24dd3cb 100644 (file)
--- a/t/op/tr.t
+++ b/t/op/tr.t
@@ -454,12 +454,8 @@ SKIP: {
         skip "Not valid only for EBCDIC", 4;
     }
     $s = $all255_twice;
-
-    {
-    local $TODO = 'tr/// broken for /sd';
     $c = $s =~ tr/[](){}<>\x00-\xff/[[(({{<</sd;
     is $s, "(<<[[{{", 'tr/[](){}<>\x00-\xff/[[(({{<</sd';
-    }
     is $c, 512, "count of above";
 
     $s = $all255_plus;