From 8b0c3377906a6f991cd6c21a674bf9561d85e3cb Mon Sep 17 00:00:00 2001
From: David Mitchell <davem@iabyn.com>
Date: Wed, 5 Oct 2016 10:10:56 +0100
Subject: [PATCH] Better optimise array and hash assignment

[perl #127999] Slowdown in split + list assign

Re-implement the code that handles e.g.

    (..., @a) = (...);
    (..., %h) = (...);

to make it a lot faster - more than reversing a performance regression
introduced in 5.24.0 - and fix some bugs. In particular, it now
special-cases an empty RHS, which just clears the aggregate, e.g.

    (..., @a) = ()

Getting list assignment correct is quite tricky, due to the possibility of
premature frees, like @a = ($a[0]), and magic/tied values on the LHS or
RHS being triggered too soon/late, which might have side-effects.  This
often requires making a copy of each RHS element (and indeed for assigning
to an array or hash, the values need copying anyway). But copying too soon
can result in leaked SVs if magic (such as calling FETCH()) dies. This
usually involves mortalising all the copies, which slows things down.

Further, a bug fix in 5.24.0 added the SV_NOSTEAL flag when copying SVs.
This meant in something like @a = (split(...))[0,0], where the two SvTEMPs
on the RHS are the same, the first copy is no longer allowed to steal the
PVX buffer, which would have made the second SV undef. But this means that
PVX buffers are now always copied, which resulted in the slowdown seen in
RT #127999.

Amongst the general rewriting and optimising, this commit does the
following specific things to boost performance (and fix RT #127999).

* If the SVs on the RHS are non-magical SvTEMPs with a ref count of 1, then
the SV isn't copied; instead it is stored directly in the array/hash. This
more than undoes the cost of SV_NOSTEAL.

* The tmps stack is now used as a temporary refcounted version of the
argument stack frame, meaning that args placed there will be freed on
croak.  In something like @a = (....), each RHS element is copied, with
the copy placed on the temps stack. Then @a is cleared. Then the elements
on the tmps stack are stored in the array, and removed from the temps
stack (with the ownership of 1 reference count transferring from the temps
stack to the array). Normally by the time pp_aassign() returns, there is
nothing left on the tmps stack and tmps_free() isn't called - this is the
novel element that distinguishes this from the normal use of mortalising.

* For hash assignment, the keys and values are processed in separate
loops, with keys not normally being copied.

* The ENTER/SAVEFREESV(ary/hash)/LEAVE has been removed, and the array or
hash kept temporarily alive by using the temps stack along with all the
other copied SVs.

* The main 'for each LHS element' loop has been split into two loops: the
second one is run when there no more RHS elements to consume. The second
loop is much simpler, and makes things like @a = () much faster.

Here are the average expr::aassign:: benchmarks for selected perls
(raw numbers - lower is better)

          5.6.1    5.22.0    5.24.0    5.25.5      this
         ------    ------    ------    ------    ------
    Ir   1355.9    1497.8    1387.0    1382.0    1146.6
    Dr    417.2     454.2     410.1     411.1     335.2
    Dw    260.6     270.8     249.0     246.8     194.5
  COND    193.5     223.2     212.0     207.7     174.4
   IND     25.3      17.6      10.8      10.8      10.0

COND_m      4.1       3.1       3.1       3.7       2.8
 IND_m      8.9       6.1       5.5       5.5       5.5

And this code:

    my @a;
    for my $i (1..10_000_000) {
        @a = (1,2,3);
        #@a = ();
    }

with the empty assign is 33% faster than blead, and without is 12% faster
than blead.
---
 pp_hot.c          | 602 +++++++++++++++++++++++++++++++++++++-----------------
 t/op/aassign.t    | 101 ++++++++-
 t/op/hash.t       |  21 ++
 t/op/tie.t        |  16 ++
 t/op/tiearray.t   |  34 ++-
 t/perf/benchmarks |  76 +++++++
 6 files changed, 649 insertions(+), 201 deletions(-)

diff --git a/pp_hot.c b/pp_hot.c
index b4098d3..3f3ac10 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1250,14 +1250,10 @@ PP(pp_aassign)
     SV **relem;
     SV **lelem;
 
-    SV *sv;
     AV *ary;
 
     U8 gimme;
     HV *hash;
-    SSize_t i;
-    int magic;
-    U32 lval;
     /* PL_delaymagic is restored by JUMPENV_POP on dieing, so we
      * only need to save locally, not on the save stack */
     U16 old_delaymagic = PL_delaymagic;
@@ -1286,7 +1282,7 @@ PP(pp_aassign)
             if (PL_op->op_private & OPpASSIGN_COMMON_RC1) {
                 /* skip the scan if all scalars have a ref count of 1 */
                 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
-                    sv = *lelem;
+                    SV *sv = *lelem;
                     if (!sv || SvREFCNT(sv) == 1)
                         continue;
                     if (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVAV)
@@ -1318,241 +1314,461 @@ PP(pp_aassign)
 #endif
 
     gimme = GIMME_V;
-    lval = (gimme == G_ARRAY) ? (PL_op->op_flags & OPf_MOD || LVRET) : 0;
-
     relem = firstrelem;
     lelem = firstlelem;
     ary = NULL;
     hash = NULL;
 
+    if (relem > lastrelem)
+        goto no_relems;
+
+    /* first lelem loop while there are still relems */
     while (LIKELY(lelem <= lastlelem)) {
 	bool alias = FALSE;
-	TAINT_NOT;		/* Each item stands on its own, taintwise. */
-	sv = *lelem++;
-	if (UNLIKELY(!sv)) {
+	SV *lsv = *lelem++;
+
+        assert(relem <= lastrelem);
+	if (UNLIKELY(!lsv)) {
 	    alias = TRUE;
-	    sv = *lelem++;
-	    ASSUME(SvTYPE(sv) == SVt_PVAV);
+	    lsv = *lelem++;
+	    ASSUME(SvTYPE(lsv) == SVt_PVAV);
 	}
-	switch (SvTYPE(sv)) {
-	case SVt_PVAV: {
-            bool already_copied = FALSE;
-	    ary = MUTABLE_AV(sv);
-	    magic = SvMAGICAL(ary) != 0;
-	    ENTER;
-	    SAVEFREESV(SvREFCNT_inc_simple_NN(sv));
-
-            /* We need to clear ary. The is a danger that if we do this,
-             * elements on the RHS may be prematurely freed, e.g.
-             *   @a = ($a[0]);
-             * In the case of possible commonality, make a copy of each
-             * RHS SV *before* clearing the array, and add a reference
-             * from the tmps stack, so that it doesn't leak on death.
-             * Otherwise, make a copy of each RHS SV only as we're storing
-             * it into the array - that way we don't have to worry about
-             * it being leaked if we die, but don't incur the cost of
-             * mortalising everything.
-             */
 
-            if (   (PL_op->op_private & OPpASSIGN_COMMON_AGG)
-                && (relem <= lastrelem)
-                && (magic || AvFILL(ary) != -1))
-            {
-                SV **svp;
-                EXTEND_MORTAL(lastrelem - relem + 1);
+	switch (SvTYPE(lsv)) {
+	case SVt_PVAV: {
+            SV **svp;
+            SSize_t i;
+            SSize_t tmps_base;
+            SSize_t nelems = lastrelem - relem + 1;
+
+	    ary = MUTABLE_AV(lsv);
+
+            /* Assigning to an aggregate is tricky. First there is the
+             * issue of commonality, e.g. @a = ($a[0]). Since the
+             * stack isn't refcounted, clearing @a prior to storing
+             * elements will free $a[0]. Similarly with
+             *    sub FETCH { $status[$_[1]] } @status = @tied[0,1];
+             *
+             * The way to avoid these issues is to make the copy of each
+             * SV (and we normally store a *copy* in the array) *before*
+             * clearing the array. But this has a problem in that
+             * if the code croaks during copying, the not-yet-stored copies
+             * could leak. One way to avoid this is to make all the copies
+             * mortal, but that's quite expensive.
+             *
+             * The current solution to these issues is to use a chunk
+             * of the tmps stack as a temporary refcounted-stack. SVs
+             * will be put on there during processing to avoid leaks,
+             * but will be removed again before the end of this block,
+             * so free_tmps() is never normally called. Also, the
+             * sv_refcnt of the SVs doesn't have to be manipulated, since
+             * the ownership of 1 reference count is transferred directly
+             * from the tmps stack to the AV when the SV is stored.
+             *
+             * We disarm slots in the temps stack by storing PL_sv_undef
+             * there: it doesn't matter if that SV's refcount is
+             * repeatedly decremented during a croak. But usually this is
+             * only an interim measure. By the end of this code block
+             * we try where possible to not leave any PL_sv_undef's on the
+             * tmps stack e.g. by shuffling newer entries down.
+             *
+             * There is one case where we don't copy: non-magical
+             * SvTEMP(sv)'s with a ref count of 1. The only owner of these
+             * is on the tmps stack, so its safe to directly steal the SV
+             * rather than copying. This is common in things like function
+             * returns, map etc, which all return a list of such SVs.
+             *
+             * Note however something like @a = (f())[0,0], where there is
+             * a danger of the same SV being shared:  this avoided because
+             * when the SV is stored as $a[0], its ref count gets bumped,
+             * so the RC==1 test fails and the second element is copied
+             * instead.
+             *
+             * We also use one slot in the tmps stack to hold an extra
+             * ref to the array, to ensure it doesn't get prematurely
+             * freed. Again, this is removed before the end of this block.
+             *
+             * Note that OPpASSIGN_COMMON_AGG is used to flag a possible
+             * @a = ($a[0]) case, but the current implementation uses the
+             * same algorithm regardless, so ignores that flag. (It *is*
+             * used in the hash branch below, however).
+            */
+
+            /* Reserve slots for ary, plus the elems we're about to copy,
+             * then protect ary and temporarily void the remaining slots
+             * with &PL_sv_undef */
+            EXTEND_MORTAL(nelems + 1);
+            PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(ary);
+            tmps_base = PL_tmps_ix + 1;
+            for (i = 0; i < nelems; i++)
+                PL_tmps_stack[tmps_base + i] = &PL_sv_undef;
+            PL_tmps_ix += nelems;
+
+            /* Make a copy of each RHS elem and save on the tmps_stack
+             * (or pass through where we can optimise away the copy) */
+
+            if (UNLIKELY(alias)) {
+                U32 lval = (gimme == G_ARRAY)
+                                ? (PL_op->op_flags & OPf_MOD || LVRET) : 0;
                 for (svp = relem; svp <= lastrelem; svp++) {
-                    /* see comment in S_aassign_copy_common about SV_NOSTEAL */
-                    *svp = sv_mortalcopy_flags(*svp,
-                            SV_GMAGIC|SV_DO_COW_SVSETSV|SV_NOSTEAL);
-                    TAINT_NOT;
+                    SV *rsv = *svp;
+
+                    SvGETMAGIC(rsv);
+                    if (!SvROK(rsv))
+                        DIE(aTHX_ "Assigned value is not a reference");
+                    if (SvTYPE(SvRV(rsv)) > SVt_PVLV)
+                   /* diag_listed_as: Assigned value is not %s reference */
+                        DIE(aTHX_
+                           "Assigned value is not a SCALAR reference");
+                    if (lval)
+                        *svp = rsv = sv_mortalcopy(rsv);
+                    /* XXX else check for weak refs?  */
+                    rsv = SvREFCNT_inc_NN(SvRV(rsv));
+                    assert(tmps_base <= PL_tmps_max);
+                    PL_tmps_stack[tmps_base++] = rsv;
                 }
-                already_copied = TRUE;
             }
+            else {
+                for (svp = relem; svp <= lastrelem; svp++) {
+                    SV *rsv = *svp;
 
-            av_clear(ary);
-	    if (relem <= lastrelem)
-                av_extend(ary, lastrelem - relem);
-
-	    i = 0;
-	    while (relem <= lastrelem) {	/* gobble up all the rest */
-		SV **didstore;
-		if (LIKELY(!alias)) {
-                    if (already_copied)
-                        sv = *relem;
+                    if (SvTEMP(rsv) && !SvGMAGICAL(rsv) && SvREFCNT(rsv) == 1) {
+                        /* can skip the copy */
+                        SvREFCNT_inc_simple_void_NN(rsv);
+                        SvTEMP_off(rsv);
+                    }
                     else {
-                        if (LIKELY(*relem))
-                            /* before newSV, in case it dies */
-                            SvGETMAGIC(*relem);
-                        sv = newSV(0);
+                        SV *nsv;
+                        /* do get before newSV, in case it dies and leaks */
+                        SvGETMAGIC(rsv);
+                        nsv = newSV(0);
                         /* see comment in S_aassign_copy_common about
                          * SV_NOSTEAL */
-                        sv_setsv_flags(sv, *relem,
-                                    (SV_DO_COW_SVSETSV|SV_NOSTEAL));
-                        *relem = sv;
+                        sv_setsv_flags(nsv, rsv,
+                                (SV_DO_COW_SVSETSV|SV_NOSTEAL));
+                        rsv = *svp = nsv;
                     }
-		}
-		else {
-                    if (!already_copied)
-                        SvGETMAGIC(*relem);
-		    if (!SvROK(*relem))
-			DIE(aTHX_ "Assigned value is not a reference");
-		    if (SvTYPE(SvRV(*relem)) > SVt_PVLV)
-		   /* diag_listed_as: Assigned value is not %s reference */
-			DIE(aTHX_
-			   "Assigned value is not a SCALAR reference");
-		    if (lval && !already_copied)
-			*relem = sv_mortalcopy(*relem);
-		    /* XXX else check for weak refs?  */
-		    sv = SvREFCNT_inc_NN(SvRV(*relem));
-		}
-		relem++;
-                if (already_copied)
-                    SvREFCNT_inc_simple_void_NN(sv); /* undo mortal free */
-		didstore = av_store(ary,i++,sv);
-		if (magic) {
-		    if (!didstore)
-			sv_2mortal(sv);
-		    if (SvSMAGICAL(sv))
-			mg_set(sv);
-		}
-		TAINT_NOT;
-	    }
+
+                    assert(tmps_base <= PL_tmps_max);
+                    PL_tmps_stack[tmps_base++] = rsv;
+                }
+            }
+
+            if (SvRMAGICAL(ary) || AvFILLp(ary) >= 0) /* may be non-empty */
+                av_clear(ary);
+
+            /* store in the array, the SVs that are in the tmps stack */
+
+            tmps_base -= nelems;
+
+            if (SvRMAGICAL(ary) || SvREADONLY(ary) || !AvREAL(ary)) {
+                /* for arrays we can't cheat with, use the official API */
+                av_extend(ary, nelems - 1);
+                for (i = 0; i < nelems; i++) {
+                    SV **svp = &(PL_tmps_stack[tmps_base + i]);
+                    SV *rsv = *svp;
+                    /* A tied store won't take ownership of rsv, so keep
+                     * the 1 refcnt on the tmps stack; otherwise disarm
+                     * the tmps stack entry */
+                    if (av_store(ary, i, rsv))
+                        *svp = &PL_sv_undef;
+                    /* av_store() may have added set magic to rsv */;
+                    SvSETMAGIC(rsv);
+                }
+                /* disarm ary refcount: see comments below about leak */
+                PL_tmps_stack[tmps_base - 1] = &PL_sv_undef;
+            }
+            else {
+                /* directly access/set the guts of the AV */
+                SSize_t fill = nelems - 1;
+                if (fill > AvMAX(ary))
+                    av_extend_guts(ary, fill, &AvMAX(ary), &AvALLOC(ary),
+                                    &AvARRAY(ary));
+                AvFILLp(ary) = fill;
+                Copy(&(PL_tmps_stack[tmps_base]), AvARRAY(ary), nelems, SV*);
+                /* Quietly remove all the SVs from the tmps stack slots,
+                 * since ary has now taken ownership of the refcnt.
+                 * Also remove ary: which will now leak if we die before
+                 * the SvREFCNT_dec_NN(ary) below */
+                if (UNLIKELY(PL_tmps_ix >= tmps_base + nelems))
+                    Move(&PL_tmps_stack[tmps_base + nelems],
+                         &PL_tmps_stack[tmps_base - 1],
+                         PL_tmps_ix - (tmps_base + nelems) + 1,
+                         SV*);
+                PL_tmps_ix -= (nelems + 1);
+            }
+
 	    if (UNLIKELY(PL_delaymagic & DM_ARRAY_ISA))
+                /* its assumed @ISA set magic can't die and leak ary */
 		SvSETMAGIC(MUTABLE_SV(ary));
-	    LEAVE;
-	    break;
+            SvREFCNT_dec_NN(ary);
+
+            relem = lastrelem + 1;
+	    goto no_relems;
         }
 
 	case SVt_PVHV: {				/* normal hash */
-		SV *tmpstr;
-                int odd;
-                int duplicates = 0;
-		SV** topelem = relem;
-                SV **firsthashrelem = relem;
-                bool already_copied = FALSE;
-
-		hash = MUTABLE_HV(sv);
-		magic = SvMAGICAL(hash) != 0;
-
-                odd = ((lastrelem - firsthashrelem)&1)? 0 : 1;
-                if (UNLIKELY(odd)) {
-                    do_oddball(lastrelem, firsthashrelem);
-                    /* we have firstlelem to reuse, it's not needed anymore
-		     */
-                    *(lastrelem+1) = &PL_sv_undef;
+
+            SV **svp;
+            bool dirty_tmps;
+            SSize_t i;
+            SSize_t tmps_base;
+            SSize_t nelems = lastrelem - relem + 1;
+
+	    hash = MUTABLE_HV(lsv);
+
+            if (UNLIKELY(nelems & 1)) {
+                do_oddball(lastrelem, relem);
+                /* we have firstlelem to reuse, it's not needed any more */
+                *++lastrelem = &PL_sv_undef;
+                nelems++;
+            }
+
+            /* See the SVt_PVAV branch above for a long description of
+             * how the following all works. The main difference for hashes
+             * is that we treat keys and values separately (and have
+             * separate loops for them): as for arrays, values are always
+             * copied (except for the SvTEMP optimisation), since they
+             * need to be stored in the hash; while keys are only
+             * processed where they might get prematurely freed or
+             * whatever. */
+
+            /* tmps stack slots:
+             * * reserve a slot for the hash keepalive;
+             * * reserve slots for the hash values we're about to copy;
+             * * preallocate for the keys we'll possibly copy or refcount bump
+             *   later;
+             * then protect hash and temporarily void the remaining
+             * value slots with &PL_sv_undef */
+            EXTEND_MORTAL(nelems + 1);
+
+             /* convert to number of key/value pairs */
+             nelems >>= 1;
+
+            PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(hash);
+            tmps_base = PL_tmps_ix + 1;
+            for (i = 0; i < nelems; i++)
+                PL_tmps_stack[tmps_base + i] = &PL_sv_undef;
+            PL_tmps_ix += nelems;
+
+            /* Make a copy of each RHS hash value and save on the tmps_stack
+             * (or pass through where we can optimise away the copy) */
+
+            for (svp = relem + 1; svp <= lastrelem; svp += 2) {
+                SV *rsv = *svp;
+
+                if (SvTEMP(rsv) && !SvGMAGICAL(rsv) && SvREFCNT(rsv) == 1) {
+                    /* can skip the copy */
+                    SvREFCNT_inc_simple_void_NN(rsv);
+                    SvTEMP_off(rsv);
+                }
+                else {
+                    SV *nsv;
+                    /* do get before newSV, in case it dies and leaks */
+                    SvGETMAGIC(rsv);
+                    nsv = newSV(0);
+                    /* see comment in S_aassign_copy_common about
+                     * SV_NOSTEAL */
+                    sv_setsv_flags(nsv, rsv,
+                            (SV_DO_COW_SVSETSV|SV_NOSTEAL));
+                    rsv = *svp = nsv;
                 }
 
-		ENTER;
-		SAVEFREESV(SvREFCNT_inc_simple_NN(sv));
+                assert(tmps_base <= PL_tmps_max);
+                PL_tmps_stack[tmps_base++] = rsv;
+            }
+            tmps_base -= nelems;
 
-                /* We need to clear hash. The is a danger that if we do this,
-                 * elements on the RHS may be prematurely freed, e.g.
-                 *   %h = (foo => $h{bar});
-                 * In the case of possible commonality, make a copy of each
-                 * RHS SV *before* clearing the hash, and add a reference
-                 * from the tmps stack, so that it doesn't leak on death.
-                 */
 
-                if (   (PL_op->op_private & OPpASSIGN_COMMON_AGG)
-                    && (relem <= lastrelem)
-                    && (magic || HvUSEDKEYS(hash)))
-                {
-                    SV **svp;
-                    EXTEND_MORTAL(lastrelem - relem + 1);
-                    for (svp = relem; svp <= lastrelem; svp++) {
+            /* possibly protect keys */
+
+            if (UNLIKELY(gimme == G_ARRAY)) {
+                /* handle e.g.
+                *     @a = ((%h = ($$r, 1)), $r = "x");
+                *     $_++ for %h = (1,2,3,4);
+                */
+                EXTEND_MORTAL(nelems);
+                for (svp = relem; svp <= lastrelem; svp += 2)
+                    *svp = sv_mortalcopy_flags(*svp,
+                                SV_GMAGIC|SV_DO_COW_SVSETSV|SV_NOSTEAL);
+            }
+            else if (PL_op->op_private & OPpASSIGN_COMMON_AGG) {
+                /* for possible commonality, e.g.
+                 *       %h = ($h{a},1)
+                 * avoid premature freeing RHS keys by mortalising
+                 * them.
+                 * For a magic element, make a copy so that its magic is
+                 * called *before* the hash is emptied (which may affect
+                 * a tied value for example).
+                 * In theory we should check for magic keys in all
+                 * cases, not just under OPpASSIGN_COMMON_AGG, but in
+                 * practice, !OPpASSIGN_COMMON_AGG implies only
+                 * constants or padtmps on the RHS.
+                 */
+                EXTEND_MORTAL(nelems);
+                for (svp = relem; svp <= lastrelem; svp += 2) {
+                    SV *rsv = *svp;
+                    if (UNLIKELY(SvGMAGICAL(rsv))) {
+                        SSize_t n;
                         *svp = sv_mortalcopy_flags(*svp,
                                 SV_GMAGIC|SV_DO_COW_SVSETSV|SV_NOSTEAL);
-                        TAINT_NOT;
+                        /* allow other branch to continue pushing
+                         * onto tmps stack without checking each time */
+                        n = (lastrelem - relem) >> 1;
+                        EXTEND_MORTAL(n);
                     }
-                    already_copied = TRUE;
+                    else
+                        PL_tmps_stack[++PL_tmps_ix] =
+                                    SvREFCNT_inc_simple_NN(rsv);
                 }
+            }
 
-		hv_clear(hash);
-
-		while (LIKELY(relem < lastrelem+odd)) {	/* gobble up all the rest */
-		    HE *didstore;
-                    assert(*relem);
-		    /* Copy the key if aassign is called in lvalue context,
-		       to avoid having the next op modify our rhs.  Copy
-		       it also if it is gmagical, lest it make the
-		       hv_store_ent call below croak, leaking the value. */
-		    sv = (lval || SvGMAGICAL(*relem)) && !already_copied
-			 ? sv_mortalcopy(*relem)
-			 : *relem;
-		    relem++;
-                    assert(*relem);
-                    if (already_copied)
-                        tmpstr = *relem++;
-                    else {
-                        SvGETMAGIC(*relem);
-                        tmpstr = newSV(0);
-                        sv_setsv_nomg(tmpstr,*relem++);	/* value */
-                    }
+            if (SvRMAGICAL(hash) || HvUSEDKEYS(hash))
+                hv_clear(hash);
 
-		    if (gimme == G_ARRAY) {
-			if (hv_exists_ent(hash, sv, 0))
-			    /* key overwrites an existing entry */
-			    duplicates += 2;
-			else {
-			    /* copy element back: possibly to an earlier
-			     * stack location if we encountered dups earlier,
-			     * possibly to a later stack location if odd */
-			    *topelem++ = sv;
-			    *topelem++ = tmpstr;
-			}
-		    }
-                    if (already_copied)
-                        SvREFCNT_inc_simple_void_NN(tmpstr); /* undo mortal free */
-		    didstore = hv_store_ent(hash,sv,tmpstr,0);
-		    if (magic) {
-			if (!didstore) sv_2mortal(tmpstr);
-			SvSETMAGIC(tmpstr);
+            /* now assign the keys and values to the hash */
+
+            dirty_tmps = FALSE;
+
+            if (UNLIKELY(gimme == G_ARRAY)) {
+                /* @a = (%h = (...)) etc */
+                SV **svp;
+                SV **topelem = relem;
+
+                for (i = 0, svp = relem; svp <= lastrelem; i++, svp++) {
+                    SV *key = *svp++;
+                    SV *val = *svp;
+                    /* remove duplicates from list we return */
+                    if (!hv_exists_ent(hash, key, 0)) {
+                        /* copy key back: possibly to an earlier
+                         * stack location if we encountered dups earlier,
+                         * The values will be updated later
+                         */
+                        *topelem = key;
+                        topelem += 2;
                     }
-		    TAINT_NOT;
-		}
-		LEAVE;
-                if (duplicates && gimme == G_ARRAY) {
+                    /* A tied store won't take ownership of val, so keep
+                     * the 1 refcnt on the tmps stack; otherwise disarm
+                     * the tmps stack entry */
+                    if (hv_store_ent(hash, key, val, 0))
+                        PL_tmps_stack[tmps_base + i] = &PL_sv_undef;
+                    else
+                        dirty_tmps = TRUE;
+                    /* hv_store_ent() may have added set magic to val */;
+                    SvSETMAGIC(val);
+                }
+                if (topelem < svp) {
                     /* at this point we have removed the duplicate key/value
                      * pairs from the stack, but the remaining values may be
                      * wrong; i.e. with (a 1 a 2 b 3) on the stack we've removed
                      * the (a 2), but the stack now probably contains
                      * (a <freed> b 3), because { hv_save(a,1); hv_save(a,2) }
                      * obliterates the earlier key. So refresh all values. */
-                    lastrelem -= duplicates;
-                    relem = firsthashrelem;
-                    while (relem < lastrelem+odd) {
+                    lastrelem = topelem - 1;
+                    while (relem < lastrelem) {
                         HE *he;
                         he = hv_fetch_ent(hash, *relem++, 0, 0);
                         *relem++ = (he ? HeVAL(he) : &PL_sv_undef);
                     }
                 }
-                if (odd && gimme == G_ARRAY) lastrelem++;
-	    }
-	    break;
+            }
+            else {
+                SV **svp;
+                for (i = 0, svp = relem; svp <= lastrelem; i++, svp++) {
+                    SV *key = *svp++;
+                    SV *val = *svp;
+                    if (hv_store_ent(hash, key, val, 0))
+                        PL_tmps_stack[tmps_base + i] = &PL_sv_undef;
+                    else
+                        dirty_tmps = TRUE;
+                    /* hv_store_ent() may have added set magic to val */;
+                    SvSETMAGIC(val);
+                }
+            }
+
+            if (dirty_tmps) {
+                /* there are still some 'live' recounts on the tmps stack
+                 * - usually caused by storing into a tied hash. So let
+                 * free_tmps() do the proper but slow job later.
+                 * Just disarm hash refcount: see comments below about leak
+                 */
+                PL_tmps_stack[tmps_base - 1] = &PL_sv_undef;
+            }
+            else {
+                /* Quietly remove all the SVs from the tmps stack slots,
+                 * since hash has now taken ownership of the refcnt.
+                 * Also remove hash: which will now leak if we die before
+                 * the SvREFCNT_dec_NN(hash) below */
+                if (UNLIKELY(PL_tmps_ix >= tmps_base + nelems))
+                    Move(&PL_tmps_stack[tmps_base + nelems],
+                         &PL_tmps_stack[tmps_base - 1],
+                         PL_tmps_ix - (tmps_base + nelems) + 1,
+                         SV*);
+                PL_tmps_ix -= (nelems + 1);
+            }
+
+            SvREFCNT_dec_NN(hash);
+
+            relem = lastrelem + 1;
+	    goto no_relems;
+	}
+
 	default:
-	    if (SvIMMORTAL(sv)) {
-		if (relem <= lastrelem)
-		    relem++;
-		break;
-	    }
-	    if (relem <= lastrelem) {
-		if (UNLIKELY(
-		  SvTEMP(sv) && !SvSMAGICAL(sv) && SvREFCNT(sv) == 1 &&
-		  (!isGV_with_GP(sv) || SvFAKE(sv)) && ckWARN(WARN_MISC)
-		))
-		    Perl_warner(aTHX_
-		       packWARN(WARN_MISC),
-		      "Useless assignment to a temporary"
-		    );
-		sv_setsv(sv, *relem);
-		*(relem++) = sv;
-	    }
-	    else
-		sv_setsv(sv, &PL_sv_undef);
-	    SvSETMAGIC(sv);
+	    if (!SvIMMORTAL(lsv)) {
+                if (UNLIKELY(
+                  SvTEMP(lsv) && !SvSMAGICAL(lsv) && SvREFCNT(lsv) == 1 &&
+                  (!isGV_with_GP(lsv) || SvFAKE(lsv)) && ckWARN(WARN_MISC)
+                ))
+                    Perl_warner(aTHX_
+                       packWARN(WARN_MISC),
+                      "Useless assignment to a temporary"
+                    );
+                sv_setsv(lsv, *relem);
+                *relem = lsv;
+                SvSETMAGIC(lsv);
+            }
+            if (++relem > lastrelem)
+                goto no_relems;
 	    break;
+        } /* switch */
+    } /* while */
+
+
+  no_relems:
+
+    /* simplified lelem loop for when there are no relems left */
+    while (LIKELY(lelem <= lastlelem)) {
+	SV *lsv = *lelem++;
+	if (UNLIKELY(!lsv)) {
+	    lsv = *lelem++;
+	    ASSUME(SvTYPE(lsv) == SVt_PVAV);
 	}
-    }
+
+	switch (SvTYPE(lsv)) {
+	case SVt_PVAV:
+	    ary = MUTABLE_AV(lsv);
+            if (SvRMAGICAL(ary) || AvFILLp(ary) >= 0) {
+                av_clear(ary);
+                if (UNLIKELY(PL_delaymagic & DM_ARRAY_ISA))
+                    SvSETMAGIC(MUTABLE_SV(ary));
+            }
+            break;
+
+	case SVt_PVHV:
+	    hash = MUTABLE_HV(lsv);
+            if (SvRMAGICAL(hash) || HvUSEDKEYS(hash))
+                hv_clear(hash);
+            break;
+
+	default:
+	    if (!SvIMMORTAL(lsv)) {
+                sv_setsv(lsv, &PL_sv_undef);
+                SvSETMAGIC(lsv);
+            }
+	    break;
+        } /* switch */
+    } /* while */
+
     if (UNLIKELY(PL_delaymagic & ~DM_DELAY)) {
 	/* Will be used to set PL_tainting below */
 	Uid_t tmp_uid  = PerlProc_getuid();
@@ -1647,7 +1863,7 @@ PP(pp_aassign)
     else if (gimme == G_SCALAR) {
 	dTARGET;
 	SP = firstrelem;
-	SETi(lastrelem - firstrelem + 1);
+	SETi(firstlelem - firstrelem);
     }
     else {
 	if (ary || hash)
diff --git a/t/op/aassign.t b/t/op/aassign.t
index e894841..063c5a1 100644
--- a/t/op/aassign.t
+++ b/t/op/aassign.t
@@ -303,6 +303,9 @@ SKIP: {
 #    (...) = (f())[0,0]
 # the same TEMP RHS element may be used more than once, so when copying
 # it, we mustn't steal its buffer.
+# DAPM 10/2016 - but in that case the SvTEMP flag is sometimes getting
+# cleared: using split() instead as a source of temps seems more reliable,
+# so I've added splut variants too.
 
 {
     # a string long enough for COW and buffer stealing to be enabled
@@ -311,28 +314,81 @@ SKIP: {
     # a sub that is intended to return a TEMP string that isn't COW
     # the concat returns a non-COW PADTMP; pp_leavesub sees a long
     # stealable string, so creates a TEMP with the stolen buffer from the
-    # PADTMP - hence it returns a non-COW string
+    # PADTMP - hence it returns a non-COW string. It also returns a couple
+    # of key strings for the hash tests
     sub f18 {
         my $x = "abc";
-        $x . $long;
+        ($x . $long, "key1", "key2");
     }
 
-    my @a;
+    my (@a, %h);
 
     # with @a initially empty,the code path creates a new copy of each
     # RHS element to store in the array
 
     @a = (f18())[0,0];
-    is (substr($a[0], 0, 7), "abcdefx", 'NOSTEAL empty $a[0]');
-    is (substr($a[1], 0, 7), "abcdefx", 'NOSTEAL empty $a[1]');
+    is (substr($a[0], 0, 7), "abcdefx", 'NOSTEAL f18 empty $a[0]');
+    is (substr($a[1], 0, 7), "abcdefx", 'NOSTEAL f18 empty $a[1]');
+    @a = (split /-/, "abc-def")[0,0];
+    is ($a[0], "abc", 'NOSTEAL split empty $a[0]');
+    is ($a[1], "abc", 'NOSTEAL split empty $a[1]');
 
     # with @a initially non-empty, it takes a different code path that
     # makes a mortal copy of each RHS element
     @a = 1..3;
     @a = (f18())[0,0];
-    is (substr($a[0], 0, 7), "abcdefx", 'NOSTEAL non-empty $a[0]');
-    is (substr($a[1], 0, 7), "abcdefx", 'NOSTEAL non-empty $a[1]');
+    is (substr($a[0], 0, 7), "abcdefx", 'NOSTEAL f18 non-empty $a[0]');
+    is (substr($a[1], 0, 7), "abcdefx", 'NOSTEAL f18 non-empty $a[1]');
+    @a = 1..3;
+    @a = (split /-/, "abc-def")[0,0];
+    is ($a[0], "abc", 'NOSTEAL split non-empty $a[0]');
+    is ($a[1], "abc", 'NOSTEAL split non-empty $a[1]');
+
+    # similarly with PADTMPs
+
+    @a = ();
+    @a = ($long . "x")[0,0];
+    is (substr($a[0], 0, 4), "defx", 'NOSTEAL PADTMP empty $a[0]');
+    is (substr($a[1], 0, 4), "defx", 'NOSTEAL PADTMP empty $a[1]');
 
+    @a = 1..3;
+    @a = ($long . "x")[0,0];
+    is (substr($a[0], 0, 4), "defx", 'NOSTEAL PADTMP non-empty $a[0]');
+    is (substr($a[1], 0, 4), "defx", 'NOSTEAL PADTMP non-empty $a[1]');
+
+    #  as above, but assigning to a hash
+
+    %h = (f18())[1,0,2,0];
+    is (substr($h{key1}, 0, 7), "abcdefx", 'NOSTEAL f18 empty $h{key1}');
+    is (substr($h{key2}, 0, 7), "abcdefx", 'NOSTEAL f18 empty $h{key2}');
+    %h = (split /-/, "key1-val-key2")[0,1,2,1];
+    is ($h{key1}, "val", 'NOSTEAL split empty $h{key1}');
+    is ($h{key2}, "val", 'NOSTEAL split empty $h{key2}');
+
+    %h = qw(key1 foo key2 bar key3 baz);
+    %h = (f18())[1,0,2,0];
+    is (substr($h{key1}, 0, 7), "abcdefx", 'NOSTEAL f18 non-empty $h{key1}');
+    is (substr($h{key2}, 0, 7), "abcdefx", 'NOSTEAL f18 non-empty $h{key2}');
+    %h = qw(key1 foo key2 bar key3 baz);
+    %h = (split /-/, "key1-val-key2")[0,1,2,1];
+    is ($h{key1}, "val", 'NOSTEAL split non-empty $h{key1}');
+    is ($h{key2}, "val", 'NOSTEAL split non-empty $h{key2}');
+
+    %h = ();
+    %h = ($long . "x", "key1", "key2")[1,0,2,0];
+    is (substr($h{key1}, 0, 4), "defx", 'NOSTEAL PADTMP empty $h{key1}');
+    is (substr($h{key2}, 0, 4), "defx", 'NOSTEAL PADTMP empty $h{key2}');
+
+    %h = qw(key1 foo key2 bar key3 baz);
+    %h = ($long . "x", "key1", "key2")[1,0,2,0];
+    is (substr($h{key1}, 0, 4), "defx", 'NOSTEAL PADTMP non-empty $h{key1}');
+    is (substr($h{key2}, 0, 4), "defx", 'NOSTEAL PADTMP non-empty $h{key2}');
+
+    # both keys and values stealable
+    @a = (%h = (split /-/, "abc-def")[0,1,0,1]);
+    is (join(':', keys   %h), "abc",     "NOSTEAL split G_ARRAY keys");
+    is (join(':', values %h), "def",     "NOSTEAL split G_ARRAY values");
+    is (join(':', @a),        "abc:def", "NOSTEAL split G_ARRAY result");
 }
 
 {
@@ -395,4 +451,35 @@ SKIP: {
     }
 }
 
+{
+    # check that a second aggregate is empted but doesn't suck up
+    # anything random
+
+    my (@a, @b) = qw(x y);
+    is(+@a, 2, "double array A len");
+    is(+@b, 0, "double array B len");
+    is("@a", "x y", "double array A contents");
+
+    @a = 1..10;
+    @b = 100..200;
+    (@a, @b) = qw(x y);
+    is(+@a, 2, "double array non-empty A len");
+    is(+@b, 0, "double array non-empty B len");
+    is("@a", "x y", "double array non-empty A contents");
+
+    my (%a, %b) = qw(k1 v1 k2 v2);
+    is(+(keys %a), 2, "double hash A len");
+    is(+(keys %b), 0, "double hash B len");
+    is(join(' ', sort keys   %a), "k1 k2", "double hash A keys");
+    is(join(' ', sort values %a), "v1 v2", "double hash A values");
+
+    %a = 1..10;
+    %b = 101..200;
+    (%a, %b) = qw(k1 v1 k2 v2);
+    is(+(keys %a), 2, "double hash non-empty A len");
+    is(+(keys %b), 0, "double hash non-empty B len");
+    is(join(' ', sort keys   %a), "k1 k2", "double hash non-empty A keys");
+    is(join(' ', sort values %a), "v1 v2", "double hash non-empty A values");
+}
+
 done_testing();
diff --git a/t/op/hash.t b/t/op/hash.t
index 1f8a550..a0e79c7 100644
--- a/t/op/hash.t
+++ b/t/op/hash.t
@@ -230,4 +230,25 @@ if (is_miniperl) {
     is(join(':', %h), 'x:', 'hash self-assign');
 }
 
+# magic keys and values should be evaluated before the hash on the LHS is
+# cleared
+
+package Magic {
+    my %inner;
+    sub TIEHASH { bless [] }
+    sub FETCH { $inner{$_[1]} }
+    sub STORE { $inner{$_[1]} = $_[2]; }
+    sub CLEAR { %inner = () }
+
+    my (%t1, %t2);
+    tie %t1, 'Magic';
+    tie %t2, 'Magic';
+
+    %inner = qw(a x b y);
+    %t1 = (@t2{'a','b'});
+    ::is(join( ':', %inner), "x:y", "magic keys");
+}
+
+
+
 done_testing();
diff --git a/t/op/tie.t b/t/op/tie.t
index 6c13bee..cbae110 100644
--- a/t/op/tie.t
+++ b/t/op/tie.t
@@ -1473,3 +1473,19 @@ print "b is $b\n";
 EXPECT
 a is 3
 b is 7
+########
+# when assigning to array/hash, ensure get magic is processed first
+use Tie::Hash;
+my %tied;
+tie %tied, "Tie::StdHash";
+%tied = qw(a foo);
+my @a = values %tied;
+%tied = qw(b bar); # overwrites @a's contents unless magic was called
+print "$a[0]\n";
+my %h = ("x", values %tied);
+%tied = qw(c baz); # overwrites @a's contents unless magic was called
+print "$h{x}\n";
+
+EXPECT
+foo
+bar
diff --git a/t/op/tiearray.t b/t/op/tiearray.t
index 1b9149c..000e3e5 100644
--- a/t/op/tiearray.t
+++ b/t/op/tiearray.t
@@ -147,7 +147,7 @@ sub FETCHSIZE { -1 }
 
 package main;
   
-plan(tests => 69);
+plan(tests => 73);
 
 {my @ary;
 
@@ -298,3 +298,35 @@ untie @ary;
 }
 
 is($seen{'DESTROY'}, 3);
+
+{
+    # check that a tied element assigned to an array doesn't remain tied
+
+    package Magical;
+
+    my $i = 10;
+
+    sub TIEARRAY { bless [1] }
+    sub TIEHASH  { bless [1] }
+    sub FETCHSIZE { 1; }
+    sub FETCH { $i++ }
+    sub STORE { $_[0][0] = $_[1]; }
+    sub FIRSTKEY { 0 }
+    sub NEXTKEY { }
+
+    package main;
+
+    my (@a, @b);
+    tie @a, 'Magical';
+    @b = @a;
+    is ($b[0],  10, "Magical array fetch 1");
+    $b[0] = 100;
+    is ($b[0], 100, "Magical array fetch 2");
+
+    my (%a, %b);
+    tie %a, 'Magical';
+    %b = %a;
+    is ($b{0},  11, "Magical hash fetch 1");
+    $b{0} = 100;
+    is ($b{0}, 100, "Magical hash fetch 2");
+}
diff --git a/t/perf/benchmarks b/t/perf/benchmarks
index 56987bc..a06921a 100644
--- a/t/perf/benchmarks
+++ b/t/perf/benchmarks
@@ -418,6 +418,21 @@
         setup   => 'my ($x, @a) = 1..4;',
         code    => '($x, @a) = ()',
     },
+    'expr::aassign::mh_empty' => {
+        desc    => 'my hash assigned empty',
+        setup   => '',
+        code    => 'my %h = ()',
+    },
+    'expr::aassign::lhx_empty' => {
+        desc    => 'non-empty lexical hash assigned empty',
+        setup   => 'my %h = 1..4;',
+        code    => '%h = ()',
+    },
+    'expr::aassign::llhx_empty' => {
+        desc    => 'non-empty lexical var and hash assigned empty',
+        setup   => 'my ($x, %h) = 1..5;',
+        code    => '($x, %h) = ()',
+    },
     'expr::aassign::3m_empty' => {
         desc    => 'three my vars assigned empty',
         setup   => '',
@@ -461,6 +476,21 @@
         setup   => 'my ($x, @a) = 1..4;',
         code    => '($x, @a) = (1,2,3)',
     },
+    'expr::aassign::mh_4c' => {
+        desc    => 'my hash assigned 4 consts',
+        setup   => '',
+        code    => 'my %h = qw(a 1 b 2)',
+    },
+    'expr::aassign::lhx_4c' => {
+        desc    => 'non-empty lexical hash assigned 4 consts',
+        setup   => 'my %h = qw(a 1 b 2);',
+        code    => '%h = qw(c 3 d 4)',
+    },
+    'expr::aassign::llhx_5c' => {
+        desc    => 'non-empty lexical var and array assigned 5 consts',
+        setup   => 'my ($x, %h) = (1, qw(a 1 b 2));',
+        code    => '($x, %h) = (10, qw(c 3 d 4))',
+    },
     'expr::aassign::3m_3c' => {
         desc    => 'three my vars assigned 3 consts',
         setup   => '',
@@ -781,6 +811,52 @@
         code    => '($x,$x) = (undef, $x)',
     },
 
+    # array assign of strings
+
+    'expr::aassign::la_3s' => {
+        desc    => 'assign 3 strings to empty lexical array',
+        setup   => 'my @a',
+        code    => '@a = (); @a = qw(abc defg hijkl);',
+    },
+    'expr::aassign::la_3ts' => {
+        desc    => 'assign 3 temp strings to empty lexical array',
+        setup   => 'my @a',
+        code    => '@a = (); @a = map $_, qw(abc defg hijkl);',
+    },
+    'expr::aassign::lan_3s' => {
+        desc    => 'assign 3 strings to non-empty lexical array',
+        setup   => 'my @a = qw(abc defg hijkl)',
+        code    => '@a = qw(abc defg hijkl);',
+    },
+    'expr::aassign::lan_3ts' => {
+        desc    => 'assign 3 temp strings to non-empty lexical array',
+        setup   => 'my @a = qw(abc defg hijkl)',
+        code    => '@a = map $_, qw(abc defg hijkl);',
+    },
+
+    # hash assign of strings
+
+    'expr::aassign::lh_2s' => {
+        desc    => 'assign 2 strings to empty lexical hash',
+        setup   => 'my %h',
+        code    => '%h = (); %h = qw(k1 abc k2 defg);',
+    },
+    'expr::aassign::lh_2ts' => {
+        desc    => 'assign 2 temp strings to empty lexical hash',
+        setup   => 'my %h',
+        code    => '%h = (); %h = map $_, qw(k1 abc k2 defg);',
+    },
+    'expr::aassign::lhn_2s' => {
+        desc    => 'assign 2 strings to non-empty lexical hash',
+        setup   => 'my %h = qw(k1 abc k2 defg);',
+        code    => '%h = qw(k1 abc k2 defg);',
+    },
+    'expr::aassign::lhn_2ts' => {
+        desc    => 'assign 2 temp strings to non-empty lexical hash',
+        setup   => 'my %h = qw(k1 abc k2 defg);',
+        code    => '%h = map $_, qw(k1 abc k2 defg);',
+    },
+
 
     'expr::arith::add_lex_ii' => {
         desc    => 'add two integers and assign to a lexical var',
-- 
1.8.3.1