+
+
+/*
+ ---------------------------------------------------------
+
+ Common vars in list assignment
+
+ There now follows some enums and static functions for detecting
+ common variables in list assignments. Here is a little essay I wrote
+ for myself when trying to get my head around this. DAPM.
+
+ ----
+
+ First some random observations:
+
+ * If a lexical var is an alias of something else, e.g.
+ for my $x ($lex, $pkg, $a[0]) {...}
+ then the act of aliasing will increase the reference count of the SV
+
+ * If a package var is an alias of something else, it may still have a
+ reference count of 1, depending on how the alias was created, e.g.
+ in *a = *b, $a may have a refcount of 1 since the GP is shared
+ with a single GvSV pointer to the SV. So If it's an alias of another
+ package var, then RC may be 1; if it's an alias of another scalar, e.g.
+ a lexical var or an array element, then it will have RC > 1.
+
+ * There are many ways to create a package alias; ultimately, XS code
+ may quite legally do GvSV(gv) = SvREFCNT_inc(sv) for example, so
+ run-time tracing mechanisms are unlikely to be able to catch all cases.
+
+ * When the LHS is all my declarations, the same vars can't appear directly
+ on the RHS, but they can indirectly via closures, aliasing and lvalue
+ subs. But those techniques all involve an increase in the lexical
+ scalar's ref count.
+
+ * When the LHS is all lexical vars (but not necessarily my declarations),
+ it is possible for the same lexicals to appear directly on the RHS, and
+ without an increased ref count, since the stack isn't refcounted.
+ This case can be detected at compile time by scanning for common lex
+ vars with PL_generation.
+
+ * lvalue subs defeat common var detection, but they do at least
+ return vars with a temporary ref count increment. Also, you can't
+ tell at compile time whether a sub call is lvalue.
+
+
+ So...
+
+ A: There are a few circumstances where there definitely can't be any
+ commonality:
+
+ LHS empty: () = (...);
+ RHS empty: (....) = ();
+ RHS contains only constants or other 'can't possibly be shared'
+ elements (e.g. ops that return PADTMPs): (...) = (1,2, length)
+ i.e. they only contain ops not marked as dangerous, whose children
+ are also not dangerous;
+ LHS ditto;
+ LHS contains a single scalar element: e.g. ($x) = (....); because
+ after $x has been modified, it won't be used again on the RHS;
+ RHS contains a single element with no aggregate on LHS: e.g.
+ ($a,$b,$c) = ($x); again, once $a has been modified, its value
+ won't be used again.
+
+ B: If LHS are all 'my' lexical var declarations (or safe ops, which
+ we can ignore):
+
+ my ($a, $b, @c) = ...;
+
+ Due to closure and goto tricks, these vars may already have content.
+ For the same reason, an element on the RHS may be a lexical or package
+ alias of one of the vars on the left, or share common elements, for
+ example:
+
+ my ($x,$y) = f(); # $x and $y on both sides
+ sub f : lvalue { ($x,$y) = (1,2); $y, $x }
+
+ and
+
+ my $ra = f();
+ my @a = @$ra; # elements of @a on both sides
+ sub f { @a = 1..4; \@a }
+
+
+ First, just consider scalar vars on LHS:
+
+ RHS is safe only if (A), or in addition,
+ * contains only lexical *scalar* vars, where neither side's
+ lexicals have been flagged as aliases
+
+ If RHS is not safe, then it's always legal to check LHS vars for
+ RC==1, since the only RHS aliases will always be associated
+ with an RC bump.
+
+ Note that in particular, RHS is not safe if:
+
+ * it contains package scalar vars; e.g.:
+
+ f();
+ my ($x, $y) = (2, $x_alias);
+ sub f { $x = 1; *x_alias = \$x; }
+
+ * It contains other general elements, such as flattened or
+ * spliced or single array or hash elements, e.g.
+
+ f();
+ my ($x,$y) = @a; # or $a[0] or @a{@b} etc
+
+ sub f {
+ ($x, $y) = (1,2);
+ use feature 'refaliasing';
+ \($a[0], $a[1]) = \($y,$x);
+ }
+
+ It doesn't matter if the array/hash is lexical or package.
+
+ * it contains a function call that happens to be an lvalue
+ sub which returns one or more of the above, e.g.
+
+ f();
+ my ($x,$y) = f();
+
+ sub f : lvalue {
+ ($x, $y) = (1,2);
+ *x1 = \$x;
+ $y, $x1;
+ }
+
+ (so a sub call on the RHS should be treated the same
+ as having a package var on the RHS).
+
+ * any other "dangerous" thing, such an op or built-in that
+ returns one of the above, e.g. pp_preinc
+
+
+ If RHS is not safe, what we can do however is at compile time flag
+ that the LHS are all my declarations, and at run time check whether
+ all the LHS have RC == 1, and if so skip the full scan.
+
+ Now consider array and hash vars on LHS: e.g. my (...,@a) = ...;
+
+ Here the issue is whether there can be elements of @a on the RHS
+ which will get prematurely freed when @a is cleared prior to
+ assignment. This is only a problem if the aliasing mechanism
+ is one which doesn't increase the refcount - only if RC == 1
+ will the RHS element be prematurely freed.
+
+ Because the array/hash is being INTROed, it or its elements
+ can't directly appear on the RHS:
+
+ my (@a) = ($a[0], @a, etc) # NOT POSSIBLE
+
+ but can indirectly, e.g.:
+
+ my $r = f();
+ my (@a) = @$r;
+ sub f { @a = 1..3; \@a }
+
+ So if the RHS isn't safe as defined by (A), we must always
+ mortalise and bump the ref count of any remaining RHS elements
+ when assigning to a non-empty LHS aggregate.
+
+ Lexical scalars on the RHS aren't safe if they've been involved in
+ aliasing, e.g.
+
+ use feature 'refaliasing';
+
+ f();
+ \(my $lex) = \$pkg;
+ my @a = ($lex,3); # equivalent to ($a[0],3)
+
+ sub f {
+ @a = (1,2);
+ \$pkg = \$a[0];
+ }
+
+ Similarly with lexical arrays and hashes on the RHS:
+
+ f();
+ my @b;
+ my @a = (@b);
+
+ sub f {
+ @a = (1,2);
+ \$b[0] = \$a[1];
+ \$b[1] = \$a[0];
+ }
+
+
+
+ C: As (B), but in addition the LHS may contain non-intro lexicals, e.g.
+ my $a; ($a, my $b) = (....);
+
+ The difference between (B) and (C) is that it is now physically
+ possible for the LHS vars to appear on the RHS too, where they
+ are not reference counted; but in this case, the compile-time
+ PL_generation sweep will detect such common vars.
+
+ So the rules for (C) differ from (B) in that if common vars are
+ detected, the runtime "test RC==1" optimisation can no longer be used,
+ and a full mark and sweep is required
+
+ D: As (C), but in addition the LHS may contain package vars.
+
+ Since package vars can be aliased without a corresponding refcount
+ increase, all bets are off. It's only safe if (A). E.g.
+
+ my ($x, $y) = (1,2);
+
+ for $x_alias ($x) {
+ ($x_alias, $y) = (3, $x); # whoops
+ }
+
+ Ditto for LHS aggregate package vars.
+
+ E: Any other dangerous ops on LHS, e.g.
+ (f(), $a[0], @$r) = (...);
+
+ this is similar to (E) in that all bets are off. In addition, it's
+ impossible to determine at compile time whether the LHS
+ contains a scalar or an aggregate, e.g.
+
+ sub f : lvalue { @a }
+ (f()) = 1..3;
+
+* ---------------------------------------------------------
+*/
+
+
+/* A set of bit flags returned by S_aassign_scan(). Each flag indicates
+ * that at least one of the things flagged was seen.
+ */
+
+enum {
+ AAS_MY_SCALAR = 0x001, /* my $scalar */
+ AAS_MY_AGG = 0x002, /* aggregate: my @array or my %hash */
+ AAS_LEX_SCALAR = 0x004, /* $lexical */
+ AAS_LEX_AGG = 0x008, /* @lexical or %lexical aggregate */
+ AAS_LEX_SCALAR_COMM = 0x010, /* $lexical seen on both sides */
+ AAS_PKG_SCALAR = 0x020, /* $scalar (where $scalar is pkg var) */
+ AAS_PKG_AGG = 0x040, /* package @array or %hash aggregate */
+ AAS_DANGEROUS = 0x080, /* an op (other than the above)
+ that's flagged OA_DANGEROUS */
+ AAS_SAFE_SCALAR = 0x100, /* produces at least one scalar SV that's
+ not in any of the categories above */
+ AAS_DEFAV = 0x200 /* contains just a single '@_' on RHS */
+};
+
+
+
+/* helper function for S_aassign_scan().
+ * check a PAD-related op for commonality and/or set its generation number.
+ * Returns a boolean indicating whether its shared */
+
+static bool
+S_aassign_padcheck(pTHX_ OP* o, bool rhs)
+{
+ if (PAD_COMPNAME_GEN(o->op_targ) == PERL_INT_MAX)
+ /* lexical used in aliasing */
+ return TRUE;
+
+ if (rhs)
+ return cBOOL(PAD_COMPNAME_GEN(o->op_targ) == (STRLEN)PL_generation);
+ else
+ PAD_COMPNAME_GEN_set(o->op_targ, PL_generation);
+
+ return FALSE;
+}
+
+
+/*
+ Helper function for OPpASSIGN_COMMON* detection in rpeep().
+ It scans the left or right hand subtree of the aassign op, and returns a
+ set of flags indicating what sorts of things it found there.
+ 'rhs' indicates whether we're scanning the LHS or RHS. If the former, we
+ set PL_generation on lexical vars; if the latter, we see if
+ PL_generation matches.
+ 'top' indicates whether we're recursing or at the top level.
+ 'scalars_p' is a pointer to a counter of the number of scalar SVs seen.
+ This fn will increment it by the number seen. It's not intended to
+ be an accurate count (especially as many ops can push a variable
+ number of SVs onto the stack); rather it's used as to test whether there
+ can be at most 1 SV pushed; so it's only meanings are "0, 1, many".
+*/
+
+static int
+S_aassign_scan(pTHX_ OP* o, bool rhs, bool top, int *scalars_p)
+{
+ int flags = 0;
+ bool kid_top = FALSE;
+
+ /* first, look for a solitary @_ on the RHS */
+ if ( rhs
+ && top
+ && (o->op_flags & OPf_KIDS)
+ && OP_TYPE_IS_OR_WAS(o, OP_LIST)
+ ) {
+ OP *kid = cUNOPo->op_first;
+ if ( ( kid->op_type == OP_PUSHMARK
+ || kid->op_type == OP_PADRANGE) /* ex-pushmark */
+ && ((kid = OpSIBLING(kid)))
+ && !OpHAS_SIBLING(kid)
+ && kid->op_type == OP_RV2AV
+ && !(kid->op_flags & OPf_REF)
+ && !(kid->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
+ && ((kid->op_flags & OPf_WANT) == OPf_WANT_LIST)
+ && ((kid = cUNOPx(kid)->op_first))
+ && kid->op_type == OP_GV
+ && cGVOPx_gv(kid) == PL_defgv
+ )
+ flags |= AAS_DEFAV;
+ }
+
+ switch (o->op_type) {
+ case OP_GVSV:
+ (*scalars_p)++;
+ return AAS_PKG_SCALAR;
+
+ case OP_PADAV:
+ case OP_PADHV:
+ (*scalars_p) += 2;
+ /* if !top, could be e.g. @a[0,1] */
+ if (top && (o->op_flags & OPf_REF))
+ return (o->op_private & OPpLVAL_INTRO)
+ ? AAS_MY_AGG : AAS_LEX_AGG;
+ return AAS_DANGEROUS;
+
+ case OP_PADSV:
+ {
+ int comm = S_aassign_padcheck(aTHX_ o, rhs)
+ ? AAS_LEX_SCALAR_COMM : 0;
+ (*scalars_p)++;
+ return (o->op_private & OPpLVAL_INTRO)
+ ? (AAS_MY_SCALAR|comm) : (AAS_LEX_SCALAR|comm);
+ }
+
+ case OP_RV2AV:
+ case OP_RV2HV:
+ (*scalars_p) += 2;
+ if (cUNOPx(o)->op_first->op_type != OP_GV)
+ return AAS_DANGEROUS; /* @{expr}, %{expr} */
+ /* @pkg, %pkg */
+ /* if !top, could be e.g. @a[0,1] */
+ if (top && (o->op_flags & OPf_REF))
+ return AAS_PKG_AGG;
+ return AAS_DANGEROUS;
+
+ case OP_RV2SV:
+ (*scalars_p)++;
+ if (cUNOPx(o)->op_first->op_type != OP_GV) {
+ (*scalars_p) += 2;
+ return AAS_DANGEROUS; /* ${expr} */
+ }
+ return AAS_PKG_SCALAR; /* $pkg */
+
+ case OP_SPLIT:
+ if (o->op_private & OPpSPLIT_ASSIGN) {
+ /* the assign in @a = split() has been optimised away
+ * and the @a attached directly to the split op
+ * Treat the array as appearing on the RHS, i.e.
+ * ... = (@a = split)
+ * is treated like
+ * ... = @a;
+ */
+
+ if (o->op_flags & OPf_STACKED)
+ /* @{expr} = split() - the array expression is tacked
+ * on as an extra child to split - process kid */
+ return S_aassign_scan(aTHX_ cLISTOPo->op_last, rhs,
+ top, scalars_p);
+
+ /* ... else array is directly attached to split op */
+ (*scalars_p) += 2;
+ if (PL_op->op_private & OPpSPLIT_LEX)
+ return (o->op_private & OPpLVAL_INTRO)
+ ? AAS_MY_AGG : AAS_LEX_AGG;
+ else
+ return AAS_PKG_AGG;
+ }
+ (*scalars_p)++;
+ /* other args of split can't be returned */
+ return AAS_SAFE_SCALAR;
+
+ case OP_UNDEF:
+ /* undef counts as a scalar on the RHS:
+ * (undef, $x) = ...; # only 1 scalar on LHS: always safe
+ * ($x, $y) = (undef, $x); # 2 scalars on RHS: unsafe
+ */
+ if (rhs)
+ (*scalars_p)++;
+ flags = AAS_SAFE_SCALAR;
+ break;
+
+ case OP_PUSHMARK:
+ case OP_STUB:
+ /* these are all no-ops; they don't push a potentially common SV
+ * onto the stack, so they are neither AAS_DANGEROUS nor
+ * AAS_SAFE_SCALAR */
+ return 0;
+
+ case OP_PADRANGE: /* Ignore padrange; checking its siblings is enough */
+ break;
+
+ case OP_NULL:
+ case OP_LIST:
+ /* these do nothing but may have children; but their children
+ * should also be treated as top-level */
+ kid_top = top;
+ break;
+
+ default:
+ if (PL_opargs[o->op_type] & OA_DANGEROUS) {
+ (*scalars_p) += 2;
+ flags = AAS_DANGEROUS;
+ break;
+ }
+
+ if ( (PL_opargs[o->op_type] & OA_TARGLEX)
+ && (o->op_private & OPpTARGET_MY))
+ {
+ (*scalars_p)++;
+ return S_aassign_padcheck(aTHX_ o, rhs)
+ ? AAS_LEX_SCALAR_COMM : AAS_LEX_SCALAR;
+ }
+
+ /* if its an unrecognised, non-dangerous op, assume that it
+ * it the cause of at least one safe scalar */
+ (*scalars_p)++;
+ flags = AAS_SAFE_SCALAR;
+ break;
+ }
+
+ /* XXX this assumes that all other ops are "transparent" - i.e. that
+ * they can return some of their children. While this true for e.g.
+ * sort and grep, it's not true for e.g. map. We really need a
+ * 'transparent' flag added to regen/opcodes
+ */
+ if (o->op_flags & OPf_KIDS) {
+ OP *kid;
+ for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid))
+ flags |= S_aassign_scan(aTHX_ kid, rhs, kid_top, scalars_p);
+ }
+ return flags;
+}
+
+