X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/a83b92fa8845fe243b594cefd53ec906a9de17a6..1af9149031d9f5d2d43b6df59b628a0f2c1041f6:/op.c?ds=sidebyside diff --git a/op.c b/op.c index e8fbb1e..0f7ee62 100644 --- a/op.c +++ b/op.c @@ -419,6 +419,15 @@ Perl_Slab_to_rw(pTHX_ OPSLAB *const slab) # define PerlMemShared PerlMem #endif +/* make freed ops die if they're inadvertently executed */ +#ifdef DEBUGGING +static OP * +S_pp_freed(pTHX) +{ + DIE(aTHX_ "panic: freed op 0x%p called\n", PL_op); +} +#endif + void Perl_Slab_Free(pTHX_ void *op) { @@ -427,6 +436,10 @@ Perl_Slab_Free(pTHX_ void *op) PERL_ARGS_ASSERT_SLAB_FREE; +#ifdef DEBUGGING + o->op_ppaddr = S_pp_freed; +#endif + if (!o->op_slabbed) { if (!o->op_static) PerlMemShared_free(op); @@ -662,6 +675,7 @@ Perl_allocmy(pTHX_ const char *const name, const STRLEN len, const U32 flags) if (!(flags & SVf_UTF8 && UTF8_IS_START(name[1])) && isASCII(name[1]) && (!isPRINT(name[1]) || strchr("\t\n\r\f", name[1]))) { + /* diag_listed_as: Can't use global %s in "%s" */ yyerror(Perl_form(aTHX_ "Can't use global %c^%c%.*s in \"%s\"", name[0], toCTRL(name[1]), (int)(len - 2), name + 2, PL_parser->in_my == KEY_state ? "state" : "my")); @@ -956,6 +970,7 @@ Perl_op_clear(pTHX_ OP *o) SvREFCNT_dec(cMETHOPx(o)->op_rclass_sv); cMETHOPx(o)->op_rclass_sv = NULL; #endif + /* FALLTHROUGH */ case OP_METHOD_NAMED: case OP_METHOD_SUPER: SvREFCNT_dec(cMETHOPx(o)->op_u.op_meth_sv); @@ -1063,6 +1078,22 @@ Perl_op_clear(pTHX_ OP *o) PerlMemShared_free(cUNOP_AUXo->op_aux); break; + case OP_MULTICONCAT: + { + UNOP_AUX_item *aux = cUNOP_AUXo->op_aux; + /* aux[PERL_MULTICONCAT_IX_PLAIN_PV] and/or + * aux[PERL_MULTICONCAT_IX_UTF8_PV] point to plain and/or + * utf8 shared strings */ + char *p1 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; + char *p2 = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; + if (p1) + PerlMemShared_free(p1); + if (p2 && p1 != p2) + PerlMemShared_free(p2); + PerlMemShared_free(aux); + } + break; + case OP_MULTIDEREF: { UNOP_AUX_item *items = cUNOP_AUXo->op_aux; @@ -1079,12 +1110,14 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_padhv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_padav_aelem: pad_free((++items)->pad_offset); goto do_elem; case MDEREF_HV_gvhv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_gvav_aelem: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &((++items)->pad_offset)); @@ -1095,6 +1128,7 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_gvsv_vivify_rv2hv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_gvsv_vivify_rv2av_aelem: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &((++items)->pad_offset)); @@ -1105,6 +1139,7 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_padsv_vivify_rv2hv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_padsv_vivify_rv2av_aelem: pad_free((++items)->pad_offset); goto do_vivify_rv2xv_elem; @@ -1112,6 +1147,7 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_pop_rv2hv_helem: case MDEREF_HV_vivify_rv2hv_helem: is_hash = TRUE; + /* FALLTHROUGH */ do_vivify_rv2xv_elem: case MDEREF_AV_pop_rv2av_aelem: case MDEREF_AV_vivify_rv2av_aelem: @@ -2450,6 +2486,906 @@ S_check_hash_fields_and_hekify(pTHX_ UNOP *rop, SVOP *key_op) } } +/* info returned by S_sprintf_is_multiconcatable() */ + +struct sprintf_ismc_info { + SSize_t nargs; /* num of args to sprintf (not including the format) */ + char *start; /* start of raw format string */ + char *end; /* bytes after end of raw format string */ + STRLEN total_len; /* total length (in bytes) of format string, not + including '%s' and half of '%%' */ + STRLEN variant; /* number of bytes by which total_len_p would grow + if upgraded to utf8 */ + bool utf8; /* whether the format is utf8 */ +}; + + +/* is the OP_SPRINTF o suitable for converting into a multiconcat op? + * i.e. its format argument is a const string with only '%s' and '%%' + * formats, and the number of args is known, e.g. + * sprintf "a=%s f=%s", $a[0], scalar(f()); + * but not + * sprintf "i=%d a=%s f=%s", $i, @a, f(); + * + * If successful, the sprintf_ismc_info struct pointed to by info will be + * populated. + */ + +STATIC bool +S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info) +{ + OP *pm, *constop, *kid; + SV *sv; + char *s, *e, *p; + SSize_t nargs, nformats; + STRLEN cur, total_len, variant; + bool utf8; + + /* if sprintf's behaviour changes, die here so that someone + * can decide whether to enhance this function or skip optimising + * under those new circumstances */ + assert(!(o->op_flags & OPf_STACKED)); + assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX)); + assert(!(o->op_private & ~OPpARG4_MASK)); + + pm = cUNOPo->op_first; + if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */ + return FALSE; + constop = OpSIBLING(pm); + if (!constop || constop->op_type != OP_CONST) + return FALSE; + sv = cSVOPx_sv(constop); + if (SvMAGICAL(sv) || !SvPOK(sv)) + return FALSE; + + s = SvPV(sv, cur); + e = s + cur; + + /* Scan format for %% and %s and work out how many %s there are. + * Abandon if other format types are found. + */ + + nformats = 0; + total_len = 0; + variant = 0; + + for (p = s; p < e; p++) { + if (*p != '%') { + total_len++; + if (!UTF8_IS_INVARIANT(*p)) + variant++; + continue; + } + p++; + if (p >= e) + return FALSE; /* lone % at end gives "Invalid conversion" */ + if (*p == '%') + total_len++; + else if (*p == 's') + nformats++; + else + return FALSE; + } + + if (!nformats || nformats > PERL_MULTICONCAT_MAXARG) + return FALSE; + + utf8 = cBOOL(SvUTF8(sv)); + if (utf8) + variant = 0; + + /* scan args; they must all be in scalar cxt */ + + nargs = 0; + kid = OpSIBLING(constop); + + while (kid) { + if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR) + return FALSE; + nargs++; + kid = OpSIBLING(kid); + } + + if (nargs != nformats) + return FALSE; /* e.g. sprintf("%s%s", $a); */ + + + info->nargs = nargs; + info->start = s; + info->end = e; + info->total_len = total_len; + info->variant = variant; + info->utf8 = utf8; + + return TRUE; +} + + + +/* S_maybe_multiconcat(): + * + * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly + * convert it (and its children) into an OP_MULTICONCAT. See the code + * comments just before pp_multiconcat() for the full details of what + * OP_MULTICONCAT supports. + * + * Basically we're looking for an optree with a chain of OP_CONCATS down + * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or + * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g. + * + * $x = "$a$b-$c" + * + * looks like + * + * SASSIGN + * | + * STRINGIFY -- PADSV[$x] + * | + * | + * ex-PUSHMARK -- CONCAT/S + * | + * CONCAT/S -- PADSV[$d] + * | + * CONCAT -- CONST["-"] + * | + * PADSV[$a] -- PADSV[$b] + * + * Note that at this stage the OP_SASSIGN may have already been optimised + * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT. + */ + +STATIC void +S_maybe_multiconcat(pTHX_ OP *o) +{ + OP *lastkidop; /* the right-most of any kids unshifted onto o */ + OP *topop; /* the top-most op in the concat tree (often equals o, + unless there are assign/stringify ops above it */ + OP *parentop; /* the parent op of topop (or itself if no parent) */ + OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */ + OP *targetop; /* the op corresponding to target=... or target.=... */ + OP *stringop; /* the OP_STRINGIFY op, if any */ + OP *nextop; /* used for recreating the op_next chain without consts */ + OP *kid; /* general-purpose op pointer */ + UNOP_AUX_item *aux; + UNOP_AUX_item *lenp; + char *const_str, *p; + struct sprintf_ismc_info sprintf_info; + + /* store info about each arg in args[]; + * toparg is the highest used slot; argp is a general + * pointer to args[] slots */ + struct { + void *p; /* initially points to const sv (or null for op); + later, set to SvPV(constsv), with ... */ + STRLEN len; /* ... len set to SvPV(..., len) */ + } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1]; + + SSize_t nargs = 0; + SSize_t nconst = 0; + STRLEN variant; + bool utf8 = FALSE; + bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op; + the last-processed arg will the LHS of one, + as args are processed in reverse order */ + U8 stacked_last = 0; /* whether the last seen concat op was STACKED */ + STRLEN total_len = 0; /* sum of the lengths of the const segments */ + U8 flags = 0; /* what will become the op_flags and ... */ + U8 private_flags = 0; /* ... op_private of the multiconcat op */ + bool is_sprintf = FALSE; /* we're optimising an sprintf */ + bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */ + + /* ----------------------------------------------------------------- + * Phase 1: + * + * Examine the optree non-destructively to determine whether it's + * suitable to be converted into an OP_MULTICONCAT. Accumulate + * information about the optree in args[]. + */ + + argp = args; + targmyop = NULL; + targetop = NULL; + stringop = NULL; + topop = o; + parentop = o; + + assert( o->op_type == OP_SASSIGN + || o->op_type == OP_CONCAT + || o->op_type == OP_SPRINTF + || o->op_type == OP_STRINGIFY); + + Zero(&sprintf_info, 1, struct sprintf_ismc_info); + + /* first see if, at the top of the tree, there is an assign, + * append and/or stringify */ + + if (topop->op_type == OP_SASSIGN) { + /* expr = ..... */ + if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN]) + return; + if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV)) + return; + assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */ + + parentop = topop; + topop = cBINOPo->op_first; + targetop = OpSIBLING(topop); + if (!targetop) /* probably some sort of syntax error */ + return; + } + else if ( topop->op_type == OP_CONCAT + && (topop->op_flags & OPf_STACKED) + && (cUNOPo->op_first->op_flags & OPf_MOD)) + { + /* expr .= ..... */ + + /* OPpTARGET_MY shouldn't be able to be set here. If it is, + * decide what to do about it */ + assert(!(o->op_private & OPpTARGET_MY)); + + /* barf on unknown flags */ + assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY))); + private_flags |= OPpMULTICONCAT_APPEND; + targetop = cBINOPo->op_first; + parentop = topop; + topop = OpSIBLING(targetop); + + /* $x .= gets optimised to rcatline instead */ + if (topop->op_type == OP_READLINE) + return; + } + + if (targetop) { + /* Can targetop (the LHS) if it's a padsv, be be optimised + * away and use OPpTARGET_MY instead? + */ + if ( (targetop->op_type == OP_PADSV) + && !(targetop->op_private & OPpDEREF) + && !(targetop->op_private & OPpPAD_STATE) + /* we don't support 'my $x .= ...' */ + && ( o->op_type == OP_SASSIGN + || !(targetop->op_private & OPpLVAL_INTRO)) + ) + is_targable = TRUE; + } + + if (topop->op_type == OP_STRINGIFY) { + if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY]) + return; + stringop = topop; + + /* barf on unknown flags */ + assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY))); + + if ((topop->op_private & OPpTARGET_MY)) { + if (o->op_type == OP_SASSIGN) + return; /* can't have two assigns */ + targmyop = topop; + } + + private_flags |= OPpMULTICONCAT_STRINGIFY; + parentop = topop; + topop = cBINOPx(topop)->op_first; + assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK)); + topop = OpSIBLING(topop); + } + + if (topop->op_type == OP_SPRINTF) { + if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF]) + return; + if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) { + nargs = sprintf_info.nargs; + total_len = sprintf_info.total_len; + variant = sprintf_info.variant; + utf8 = sprintf_info.utf8; + is_sprintf = TRUE; + private_flags |= OPpMULTICONCAT_FAKE; + toparg = argp; + /* we have an sprintf op rather than a concat optree. + * Skip most of the code below which is associated with + * processing that optree. We also skip phase 2, determining + * whether its cost effective to optimise, since for sprintf, + * multiconcat is *always* faster */ + goto create_aux; + } + /* note that even if the sprintf itself isn't multiconcatable, + * the expression as a whole may be, e.g. in + * $x .= sprintf("%d",...) + * the sprintf op will be left as-is, but the concat/S op may + * be upgraded to multiconcat + */ + } + else if (topop->op_type == OP_CONCAT) { + if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT]) + return; + + if ((topop->op_private & OPpTARGET_MY)) { + if (o->op_type == OP_SASSIGN || targmyop) + return; /* can't have two assigns */ + targmyop = topop; + } + } + + /* Is it safe to convert a sassign/stringify/concat op into + * a multiconcat? */ + assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP); + assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP); + assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP); + assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP); + STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last) + == STRUCT_OFFSET(UNOP_AUX, op_aux)); + STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last) + == STRUCT_OFFSET(UNOP_AUX, op_aux)); + + /* Now scan the down the tree looking for a series of + * CONCAT/OPf_STACKED ops on the LHS (with the last one not + * stacked). For example this tree: + * + * | + * CONCAT/STACKED + * | + * CONCAT/STACKED -- EXPR5 + * | + * CONCAT/STACKED -- EXPR4 + * | + * CONCAT -- EXPR3 + * | + * EXPR1 -- EXPR2 + * + * corresponds to an expression like + * + * (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5) + * + * Record info about each EXPR in args[]: in particular, whether it is + * a stringifiable OP_CONST and if so what the const sv is. + * + * The reason why the last concat can't be STACKED is the difference + * between + * + * ((($a .= $a) .= $a) .= $a) .= $a + * + * and + * $a . $a . $a . $a . $a + * + * The main difference between the optrees for those two constructs + * is the presence of the last STACKED. As well as modifying $a, + * the former sees the changed $a between each concat, so if $s is + * initially 'a', the first returns 'a' x 16, while the latter returns + * 'a' x 5. And pp_multiconcat can't handle that kind of thing. + */ + + kid = topop; + + for (;;) { + OP *argop; + SV *sv; + bool last = FALSE; + + if ( kid->op_type == OP_CONCAT + && !kid_is_last + ) { + OP *k1, *k2; + k1 = cUNOPx(kid)->op_first; + k2 = OpSIBLING(k1); + /* shouldn't happen except maybe after compile err? */ + if (!k2) + return; + + /* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */ + if (kid->op_private & OPpTARGET_MY) + kid_is_last = TRUE; + + stacked_last = (kid->op_flags & OPf_STACKED); + if (!stacked_last) + kid_is_last = TRUE; + + kid = k1; + argop = k2; + } + else { + argop = kid; + last = TRUE; + } + + if ( nargs > PERL_MULTICONCAT_MAXARG - 2 + || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2) + { + /* At least two spare slots are needed to decompose both + * concat args. If there are no slots left, continue to + * examine the rest of the optree, but don't push new values + * on args[]. If the optree as a whole is legal for conversion + * (in particular that the last concat isn't STACKED), then + * the first PERL_MULTICONCAT_MAXARG elements of the optree + * can be converted into an OP_MULTICONCAT now, with the first + * child of that op being the remainder of the optree - + * which may itself later be converted to a multiconcat op + * too. + */ + if (last) { + /* the last arg is the rest of the optree */ + argp++->p = NULL; + nargs++; + } + } + else if ( argop->op_type == OP_CONST + && ((sv = cSVOPx_sv(argop))) + /* defer stringification until runtime of 'constant' + * things that might stringify variantly, e.g. the radix + * point of NVs, or overloaded RVs */ + && (SvPOK(sv) || SvIOK(sv)) + && (!SvGMAGICAL(sv)) + ) { + argp++->p = sv; + utf8 |= cBOOL(SvUTF8(sv)); + nconst++; + } + else { + argp++->p = NULL; + nargs++; + } + + if (last) + break; + } + + toparg = argp - 1; + + if (stacked_last) + return; /* we don't support ((A.=B).=C)...) */ + + /* look for two adjacent consts and don't fold them together: + * $o . "a" . "b" + * should do + * $o->concat("a")->concat("b") + * rather than + * $o->concat("ab") + * (but $o .= "a" . "b" should still fold) + */ + { + bool seen_nonconst = FALSE; + for (argp = toparg; argp >= args; argp--) { + if (argp->p == NULL) { + seen_nonconst = TRUE; + continue; + } + if (!seen_nonconst) + continue; + if (argp[1].p) { + /* both previous and current arg were constants; + * leave the current OP_CONST as-is */ + argp->p = NULL; + nconst--; + nargs++; + } + } + } + + /* ----------------------------------------------------------------- + * Phase 2: + * + * At this point we have determined that the optree *can* be converted + * into a multiconcat. Having gathered all the evidence, we now decide + * whether it *should*. + */ + + + /* we need at least one concat action, e.g.: + * + * Y . Z + * X = Y . Z + * X .= Y + * + * otherwise we could be doing something like $x = "foo", which + * if treated as as a concat, would fail to COW. + */ + if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2) + return; + + /* Benchmarking seems to indicate that we gain if: + * * we optimise at least two actions into a single multiconcat + * (e.g concat+concat, sassign+concat); + * * or if we can eliminate at least 1 OP_CONST; + * * or if we can eliminate a padsv via OPpTARGET_MY + */ + + if ( + /* eliminated at least one OP_CONST */ + nconst >= 1 + /* eliminated an OP_SASSIGN */ + || o->op_type == OP_SASSIGN + /* eliminated an OP_PADSV */ + || (!targmyop && is_targable) + ) + /* definitely a net gain to optimise */ + goto optimise; + + /* ... if not, what else? */ + + /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1): + * multiconcat is faster (due to not creating a temporary copy of + * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is + * faster. + */ + if ( nconst == 0 + && nargs == 2 + && targmyop + && topop->op_type == OP_CONCAT + ) { + PADOFFSET t = targmyop->op_targ; + OP *k1 = cBINOPx(topop)->op_first; + OP *k2 = cBINOPx(topop)->op_last; + if ( k2->op_type == OP_PADSV + && k2->op_targ == t + && ( k1->op_type != OP_PADSV + || k1->op_targ != t) + ) + goto optimise; + } + + /* need at least two concats */ + if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3) + return; + + + + /* ----------------------------------------------------------------- + * Phase 3: + * + * At this point the optree has been verified as ok to be optimised + * into an OP_MULTICONCAT. Now start changing things. + */ + + optimise: + + /* stringify all const args and determine utf8ness */ + + variant = 0; + for (argp = args; argp <= toparg; argp++) { + SV *sv = (SV*)argp->p; + if (!sv) + continue; /* not a const op */ + if (utf8 && !SvUTF8(sv)) + sv_utf8_upgrade_nomg(sv); + argp->p = SvPV_nomg(sv, argp->len); + total_len += argp->len; + + /* see if any strings would grow if converted to utf8 */ + if (!utf8) { + char *p = (char*)argp->p; + STRLEN len = argp->len; + while (len--) { + U8 c = *p++; + if (!UTF8_IS_INVARIANT(c)) + variant++; + } + } + } + + /* create and populate aux struct */ + + create_aux: + + aux = (UNOP_AUX_item*)PerlMemShared_malloc( + sizeof(UNOP_AUX_item) + * ( + PERL_MULTICONCAT_HEADER_SIZE + + ((nargs + 1) * (variant ? 2 : 1)) + ) + ); + const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1); + + /* Extract all the non-const expressions from the concat tree then + * dispose of the old tree, e.g. convert the tree from this: + * + * o => SASSIGN + * | + * STRINGIFY -- TARGET + * | + * ex-PUSHMARK -- CONCAT + * | + * CONCAT -- EXPR5 + * | + * CONCAT -- EXPR4 + * | + * CONCAT -- EXPR3 + * | + * EXPR1 -- EXPR2 + * + * + * to: + * + * o => MULTICONCAT + * | + * ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET + * + * except that if EXPRi is an OP_CONST, it's discarded. + * + * During the conversion process, EXPR ops are stripped from the tree + * and unshifted onto o. Finally, any of o's remaining original + * childen are discarded and o is converted into an OP_MULTICONCAT. + * + * In this middle of this, o may contain both: unshifted args on the + * left, and some remaining original args on the right. lastkidop + * is set to point to the right-most unshifted arg to delineate + * between the two sets. + */ + + + if (is_sprintf) { + /* create a copy of the format with the %'s removed, and record + * the sizes of the const string segments in the aux struct */ + char *q, *oldq; + lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; + + p = sprintf_info.start; + q = const_str; + oldq = q; + for (; p < sprintf_info.end; p++) { + if (*p == '%') { + p++; + if (*p != '%') { + (lenp++)->ssize = q - oldq; + oldq = q; + continue; + } + } + *q++ = *p; + } + lenp->ssize = q - oldq; + assert((STRLEN)(q - const_str) == total_len); + + /* Attach all the args (i.e. the kids of the sprintf) to o (which + * may or may not be topop) The pushmark and const ops need to be + * kept in case they're an op_next entry point. + */ + lastkidop = cLISTOPx(topop)->op_last; + kid = cUNOPx(topop)->op_first; /* pushmark */ + op_null(kid); + op_null(OpSIBLING(kid)); /* const */ + if (o != topop) { + kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */ + op_sibling_splice(o, NULL, 0, kid); /* and attach to o */ + lastkidop->op_next = o; + } + } + else { + p = const_str; + lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; + + lenp->ssize = -1; + + /* Concatenate all const strings into const_str. + * Note that args[] contains the RHS args in reverse order, so + * we scan args[] from top to bottom to get constant strings + * in L-R order + */ + for (argp = toparg; argp >= args; argp--) { + if (!argp->p) + /* not a const op */ + (++lenp)->ssize = -1; + else { + STRLEN l = argp->len; + Copy(argp->p, p, l, char); + p += l; + if (lenp->ssize == -1) + lenp->ssize = l; + else + lenp->ssize += l; + } + } + + kid = topop; + nextop = o; + lastkidop = NULL; + + for (argp = args; argp <= toparg; argp++) { + /* only keep non-const args, except keep the first-in-next-chain + * arg no matter what it is (but nulled if OP_CONST), because it + * may be the entry point to this subtree from the previous + * op_next. + */ + bool last = (argp == toparg); + OP *prev; + + /* set prev to the sibling *before* the arg to be cut out, + * e.g.: + * + * | + * kid= CONST + * | + * prev= CONST -- EXPR + * | + */ + if (argp == args && kid->op_type != OP_CONCAT) { + /* in e.g. '$x . = f(1)' there's no RHS concat tree + * so the expression to be cut isn't kid->op_last but + * kid itself */ + OP *o1, *o2; + /* find the op before kid */ + o1 = NULL; + o2 = cUNOPx(parentop)->op_first; + while (o2 && o2 != kid) { + o1 = o2; + o2 = OpSIBLING(o2); + } + assert(o2 == kid); + prev = o1; + kid = parentop; + } + else if (kid == o && lastkidop) + prev = last ? lastkidop : OpSIBLING(lastkidop); + else + prev = last ? NULL : cUNOPx(kid)->op_first; + + if (!argp->p || last) { + /* cut RH op */ + OP *aop = op_sibling_splice(kid, prev, 1, NULL); + /* and unshift to front of o */ + op_sibling_splice(o, NULL, 0, aop); + /* record the right-most op added to o: later we will + * free anything to the right of it */ + if (!lastkidop) + lastkidop = aop; + aop->op_next = nextop; + if (last) { + if (argp->p) + /* null the const at start of op_next chain */ + op_null(aop); + } + else if (prev) + nextop = prev->op_next; + } + + /* the last two arguments are both attached to the same concat op */ + if (argp < toparg - 1) + kid = prev; + } + } + + /* Populate the aux struct */ + + aux[PERL_MULTICONCAT_IX_NARGS].ssize = nargs; + aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str; + aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ? 0 : total_len; + aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = total_len; + + /* if variant > 0, calculate a variant const string and lengths where + * the utf8 version of the string will take 'variant' more bytes than + * the plain one. */ + + if (variant) { + char *p = const_str; + STRLEN ulen = total_len + variant; + UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS; + UNOP_AUX_item *ulens = lens + (nargs + 1); + char *up = (char*)PerlMemShared_malloc(ulen); + SSize_t n; + + aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen; + + for (n = 0; n < (nargs + 1); n++) { + SSize_t i; + char * orig_up = up; + for (i = (lens++)->ssize; i > 0; i--) { + U8 c = *p++; + append_utf8_from_native_byte(c, (U8**)&up); + } + (ulens++)->ssize = (i < 0) ? i : up - orig_up; + } + } + + if (stringop) { + /* if there was a top(ish)-level OP_STRINGIFY, we need to keep + * that op's first child - an ex-PUSHMARK - because the op_next of + * the previous op may point to it (i.e. it's the entry point for + * the o optree) + */ + OP *pmop = + (stringop == o) + ? op_sibling_splice(o, lastkidop, 1, NULL) + : op_sibling_splice(stringop, NULL, 1, NULL); + assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK)); + op_sibling_splice(o, NULL, 0, pmop); + if (!lastkidop) + lastkidop = pmop; + } + + /* Optimise + * target = A.B.C... + * target .= A.B.C... + */ + + if (targetop) { + assert(!targmyop); + + if (o->op_type == OP_SASSIGN) { + /* Move the target subtree from being the last of o's children + * to being the last of o's preserved children. + * Note the difference between 'target = ...' and 'target .= ...': + * for the former, target is executed last; for the latter, + * first. + */ + kid = OpSIBLING(lastkidop); + op_sibling_splice(o, kid, 1, NULL); /* cut target op */ + op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */ + lastkidop->op_next = kid->op_next; + lastkidop = targetop; + } + else { + /* Move the target subtree from being the first of o's + * original children to being the first of *all* o's children. + */ + if (lastkidop) { + op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */ + op_sibling_splice(o, NULL, 0, targetop); /* and paste*/ + } + else { + /* if the RHS of .= doesn't contain a concat (e.g. + * $x .= "foo"), it gets missed by the "strip ops from the + * tree and add to o" loop earlier */ + assert(topop->op_type != OP_CONCAT); + if (stringop) { + /* in e.g. $x .= "$y", move the $y expression + * from being a child of OP_STRINGIFY to being the + * second child of the OP_CONCAT + */ + assert(cUNOPx(stringop)->op_first == topop); + op_sibling_splice(stringop, NULL, 1, NULL); + op_sibling_splice(o, cUNOPo->op_first, 0, topop); + } + assert(topop == OpSIBLING(cBINOPo->op_first)); + if (toparg->p) + op_null(topop); + lastkidop = topop; + } + } + + if (is_targable) { + /* optimise + * my $lex = A.B.C... + * $lex = A.B.C... + * $lex .= A.B.C... + * The original padsv op is kept but nulled in case it's the + * entry point for the optree (which it will be for + * '$lex .= ... ' + */ + private_flags |= OPpTARGET_MY; + private_flags |= (targetop->op_private & OPpLVAL_INTRO); + o->op_targ = targetop->op_targ; + targetop->op_targ = 0; + op_null(targetop); + } + else + flags |= OPf_STACKED; + } + else if (targmyop) { + private_flags |= OPpTARGET_MY; + if (o != targmyop) { + o->op_targ = targmyop->op_targ; + targmyop->op_targ = 0; + } + } + + /* detach the emaciated husk of the sprintf/concat optree and free it */ + for (;;) { + kid = op_sibling_splice(o, lastkidop, 1, NULL); + if (!kid) + break; + op_free(kid); + } + + /* and convert o into a multiconcat */ + + o->op_flags = (flags|OPf_KIDS|stacked_last + |(o->op_flags & (OPf_WANT|OPf_PARENS))); + o->op_private = private_flags; + o->op_type = OP_MULTICONCAT; + o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT]; + cUNOP_AUXo->op_aux = aux; +} + /* do all the final processing on an optree (e.g. running the peephole * optimiser on it), then attach it to cv (if cv is non-null) @@ -2472,6 +3408,7 @@ S_process_optree(pTHX_ CV *cv, OP *optree, OP* start) *startp = start; optree->op_private |= OPpREFCOUNTED; OpREFCNT_set(optree, 1); + optimize_optree(optree); CALL_PEEP(*startp); finalize_optree(optree); S_prune_chain_head(startp); @@ -2485,6 +3422,74 @@ S_process_optree(pTHX_ CV *cv, OP *optree, OP* start) /* +=for apidoc optimize_optree + +This function applies some optimisations to the optree in top-down order. +It is called before the peephole optimizer, which processes ops in +execution order. Note that finalize_optree() also does a top-down scan, +but is called *after* the peephole optimizer. + +=cut +*/ + +void +Perl_optimize_optree(pTHX_ OP* o) +{ + PERL_ARGS_ASSERT_OPTIMIZE_OPTREE; + + ENTER; + SAVEVPTR(PL_curcop); + + optimize_op(o); + + LEAVE; +} + + +/* helper for optimize_optree() which optimises on op then recurses + * to optimise any children. + */ + +STATIC void +S_optimize_op(pTHX_ OP* o) +{ + OP *kid; + + PERL_ARGS_ASSERT_OPTIMIZE_OP; + assert(o->op_type != OP_FREED); + + switch (o->op_type) { + case OP_NEXTSTATE: + case OP_DBSTATE: + PL_curcop = ((COP*)o); /* for warnings */ + break; + + + case OP_CONCAT: + case OP_SASSIGN: + case OP_STRINGIFY: + case OP_SPRINTF: + S_maybe_multiconcat(aTHX_ o); + break; + + case OP_SUBST: + if (cPMOPo->op_pmreplrootu.op_pmreplroot) + optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot); + break; + + default: + break; + } + + if (!(o->op_flags & OPf_KIDS)) + return; + + for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) + optimize_op(kid); +} + + +/* =for apidoc finalize_optree This function finalizes the optree. Should be called directly after @@ -2578,8 +3583,8 @@ S_finalize_op(pTHX_ OP* o) case OP_CONST: if (cSVOPo->op_private & OPpCONST_STRICT) no_bareword_allowed(o); - /* FALLTHROUGH */ #ifdef USE_ITHREADS + /* FALLTHROUGH */ case OP_HINTSEVAL: op_relocate_sv(&cSVOPo->op_sv, &o->op_targ); #endif @@ -3686,7 +4691,8 @@ Perl_apply_attrs_string(pTHX_ const char *stashpv, CV *cv, } STATIC void -S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) +S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name, + bool curstash) { OP *new_proto = NULL; STRLEN pvlen; @@ -3701,7 +4707,7 @@ S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) o = *attrs; if (o->op_type == OP_CONST) { pv = SvPV(cSVOPo_sv, pvlen); - if (pvlen >= 10 && memEQ(pv, "prototype(", 10)) { + if (memBEGINs(pv, pvlen, "prototype(")) { SV * const tmpsv = newSVpvn_flags(pv + 10, pvlen - 11, SvUTF8(cSVOPo_sv)); SV ** const tmpo = cSVOPx_svp(o); SvREFCNT_dec(cSVOPo_sv); @@ -3717,7 +4723,7 @@ S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) for (o = OpSIBLING(lasto); o; o = OpSIBLING(o)) { if (o->op_type == OP_CONST) { pv = SvPV(cSVOPo_sv, pvlen); - if (pvlen >= 10 && memEQ(pv, "prototype(", 10)) { + if (memBEGINs(pv, pvlen, "prototype(")) { SV * const tmpsv = newSVpvn_flags(pv + 10, pvlen - 11, SvUTF8(cSVOPo_sv)); SV ** const tmpo = cSVOPx_svp(o); SvREFCNT_dec(cSVOPo_sv); @@ -3760,12 +4766,20 @@ S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) else svname = (SV *)name; if (ckWARN(WARN_ILLEGALPROTO)) - (void)validate_proto(svname, cSVOPx_sv(new_proto), TRUE); + (void)validate_proto(svname, cSVOPx_sv(new_proto), TRUE, + curstash); if (*proto && ckWARN(WARN_PROTOTYPE)) { STRLEN old_len, new_len; const char * oldp = SvPV(cSVOPx_sv(*proto), old_len); const char * newp = SvPV(cSVOPx_sv(new_proto), new_len); + if (curstash && svname == (SV *)name + && !memchr(SvPVX(svname), ':', SvCUR(svname))) { + svname = sv_2mortal(newSVsv(PL_curstname)); + sv_catpvs(svname, "::"); + sv_catsv(svname, (SV *)name); + } + Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE), "Prototype '%" UTF8f "' overridden by attribute 'prototype(%" UTF8f ")'" " in %" SVf, @@ -4432,11 +5446,11 @@ static OP * S_fold_constants(pTHX_ OP *const o) { dVAR; - OP * VOL curop; + OP * volatile curop; OP *newop; - VOL I32 type = o->op_type; + volatile I32 type = o->op_type; bool is_stringify; - SV * VOL sv = NULL; + SV * volatile sv = NULL; int ret = 0; OP *old_next; SV * const oldwarnhook = PL_warnhook; @@ -5816,6 +6830,11 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) scope->op_next = NULL; /* stop on last op */ op_null(scope); } + + if (is_compiletime) + /* runtime finalizes as part of finalizing whole tree */ + optimize_optree(o); + /* have to peep the DOs individually as we've removed it from * the op_next chain */ CALL_PEEP(o); @@ -6203,9 +7222,10 @@ Perl_newGVOP(pTHX_ I32 type, I32 flags, GV *gv) Constructs, checks, and returns an op of any type that involves an embedded C-level pointer (PV). C is the opcode. C gives -the eight bits of C. C supplies the C-level pointer, which -must have been allocated using C; the memory will -be freed when the op is destroyed. +the eight bits of C. C supplies the C-level pointer. +Depending on the op type, the memory referenced by C may be freed +when the op is destroyed. If the op is of a freeing type, C must +have been allocated using C. =cut */ @@ -6559,11 +7579,24 @@ S_assignment_type(pTHX_ const OP *o) if (!o) return TRUE; - if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) - o = cUNOPo->op_first; + if (o->op_type == OP_SREFGEN) + { + OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; + type = kid->op_type; + flags = o->op_flags | kid->op_flags; + if (!(flags & OPf_PARENS) + && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || + kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) + return ASSIGN_REF; + ret = ASSIGN_REF; + } else { + if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) + o = cUNOPo->op_first; + flags = o->op_flags; + type = o->op_type; + ret = 0; + } - flags = o->op_flags; - type = o->op_type; if (type == OP_COND_EXPR) { OP * const sib = OpSIBLING(cLOGOPo->op_first); const I32 t = assignment_type(sib); @@ -6576,19 +7609,6 @@ S_assignment_type(pTHX_ const OP *o) return FALSE; } - if (type == OP_SREFGEN) - { - OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; - type = kid->op_type; - flags |= kid->op_flags; - if (!(flags & OPf_PARENS) - && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || - kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) - return ASSIGN_REF; - ret = ASSIGN_REF; - } - else ret = 0; - if (type == OP_LIST && (flags & OPf_WANT) == OPf_WANT_SCALAR && o->op_private & OPpLVAL_INTRO) @@ -6609,6 +7629,33 @@ S_assignment_type(pTHX_ const OP *o) return ret; } +static OP * +S_newONCEOP(pTHX_ OP *initop, OP *padop) +{ + const PADOFFSET target = padop->op_targ; + OP *const other = newOP(OP_PADSV, + padop->op_flags + | ((padop->op_private & ~OPpLVAL_INTRO) << 8)); + OP *const first = newOP(OP_NULL, 0); + OP *const nullop = newCONDOP(0, first, initop, other); + /* XXX targlex disabled for now; see ticket #124160 + newCONDOP(0, first, S_maybe_targlex(aTHX_ initop), other); + */ + OP *const condop = first->op_next; + + OpTYPE_set(condop, OP_ONCE); + other->op_targ = target; + nullop->op_flags |= OPf_WANT_SCALAR; + + /* Store the initializedness of state vars in a separate + pad entry. */ + condop->op_targ = + pad_add_name_pvn("$",1,padadd_NO_DUP_CHECK|padadd_STATE,0,0); + /* hijacking PADSTALE for uninitialized state variables */ + SvPADSTALE_on(PAD_SVl(condop->op_targ)); + + return nullop; +} /* =for apidoc Am|OP *|newASSIGNOP|I32 flags|OP *left|I32 optype|OP *right @@ -6653,8 +7700,9 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) } if ((assign_type = assignment_type(left)) == ASSIGN_LIST) { + OP *state_var_op = NULL; static const char no_list_state[] = "Initialization of state variables" - " in list context currently forbidden"; + " in list currently forbidden"; OP *curop; if (left->op_type == OP_ASLICE || left->op_type == OP_HSLICE) @@ -6668,16 +7716,29 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) if (OP_TYPE_IS_OR_WAS(left, OP_LIST)) { - OP* lop = ((LISTOP*)left)->op_first; - while (lop) { - if ((lop->op_type == OP_PADSV || - lop->op_type == OP_PADAV || - lop->op_type == OP_PADHV || - lop->op_type == OP_PADANY) - && (lop->op_private & OPpPAD_STATE) - ) - yyerror(no_list_state); - lop = OpSIBLING(lop); + OP *lop = ((LISTOP*)left)->op_first, *vop, *eop; + if (!(left->op_flags & OPf_PARENS) && + lop->op_type == OP_PUSHMARK && + (vop = OpSIBLING(lop)) && + (vop->op_type == OP_PADAV || vop->op_type == OP_PADHV) && + !(vop->op_flags & OPf_PARENS) && + (vop->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) == + (OPpLVAL_INTRO|OPpPAD_STATE) && + (eop = OpSIBLING(vop)) && + eop->op_type == OP_ENTERSUB && + !OpHAS_SIBLING(eop)) { + state_var_op = vop; + } else { + while (lop) { + if ((lop->op_type == OP_PADSV || + lop->op_type == OP_PADAV || + lop->op_type == OP_PADHV || + lop->op_type == OP_PADANY) + && (lop->op_private & OPpPAD_STATE) + ) + yyerror(no_list_state); + lop = OpSIBLING(lop); + } } } else if ( (left->op_private & OPpLVAL_INTRO) @@ -6697,7 +7758,10 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) state (%a) = ... (state %a) = ... */ - yyerror(no_list_state); + if (left->op_flags & OPf_PARENS) + yyerror(no_list_state); + else + state_var_op = left; } /* optimise @a = split(...) into: @@ -6789,6 +7853,9 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) } } } + + if (state_var_op) + o = S_newONCEOP(aTHX_ o, state_var_op); return o; } if (assign_type == ASSIGN_REF) @@ -7899,8 +8966,8 @@ S_looks_like_bool(pTHX_ const OP *o) =for apidoc Am|OP *|newGIVENOP|OP *cond|OP *block|PADOFFSET defsv_off Constructs, checks, and returns an op tree expressing a C block. -C supplies the expression that will be locally assigned to a lexical -variable, and C supplies the body of the C construct; they +C supplies the expression to whose value C<$_> will be locally +aliased, and C supplies the body of the C construct; they are consumed by this function and become part of the constructed op tree. C must be zero (it used to identity the pad slot of lexical $_). @@ -8205,6 +9272,8 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) PERL_ARGS_ASSERT_NEWMYSUB; + PL_hints |= HINT_BLOCK_SCOPE; + /* Find the pad slot for storing the new sub. We cannot use PL_comppad, as it is the pad owned by the new sub. We need to look in CvOUTSIDE and find the pad belonging to the enclos- @@ -8226,7 +9295,7 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) spot = (CV **)svspot; if (!(PL_parser && PL_parser->error_count)) - move_proto_attr(&proto, &attrs, (GV *)PadnameSV(name)); + move_proto_attr(&proto, &attrs, (GV *)PadnameSV(name), 0); if (proto) { assert(proto->op_type == OP_CONST); @@ -8533,6 +9602,85 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) return cv; } +/* +=for apidoc m|CV *|newATTRSUB_x|I32 floor|OP *o|OP *proto|OP *attrs|OP *block|bool o_is_gv + +Construct a Perl subroutine, also performing some surrounding jobs. + +This function is expected to be called in a Perl compilation context, +and some aspects of the subroutine are taken from global variables +associated with compilation. In particular, C represents +the subroutine that is currently being compiled. It must be non-null +when this function is called, and some aspects of the subroutine being +constructed are taken from it. The constructed subroutine may actually +be a reuse of the C object, but will not necessarily be so. + +If C is null then the subroutine will have no body, and for the +time being it will be an error to call it. This represents a forward +subroutine declaration such as S>. If C is +non-null then it provides the Perl code of the subroutine body, which +will be executed when the subroutine is called. This body includes +any argument unwrapping code resulting from a subroutine signature or +similar. The pad use of the code must correspond to the pad attached +to C. The code is not expected to include a C or +C op; this function will add such an op. C is consumed +by this function and will become part of the constructed subroutine. + +C specifies the subroutine's prototype, unless one is supplied +as an attribute (see below). If C is null, then the subroutine +will not have a prototype. If C is non-null, it must point to a +C op whose value is a string, and the subroutine will have that +string as its prototype. If a prototype is supplied as an attribute, the +attribute takes precedence over C, but in that case C should +preferably be null. In any case, C is consumed by this function. + +C supplies attributes to be applied the subroutine. A handful of +attributes take effect by built-in means, being applied to C +immediately when seen. Other attributes are collected up and attached +to the subroutine by this route. C may be null to supply no +attributes, or point to a C op for a single attribute, or point +to a C op whose children apart from the C are C +ops for one or more attributes. Each C op must be a string, +giving the attribute name optionally followed by parenthesised arguments, +in the manner in which attributes appear in Perl source. The attributes +will be applied to the sub by this function. C is consumed by +this function. + +If C is false and C is null, then the subroutine will +be anonymous. If C is false and C is non-null, then C +must point to a C op, which will be consumed by this function, +and its string value supplies a name for the subroutine. The name may +be qualified or unqualified, and if it is unqualified then a default +stash will be selected in some manner. If C is true, then C +doesn't point to an C at all, but is instead a cast pointer to a C +by which the subroutine will be named. + +If there is already a subroutine of the specified name, then the new +sub will either replace the existing one in the glob or be merged with +the existing one. A warning may be generated about redefinition. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +In the case of C, the subroutine will be executed and the reference +to it disposed of before this function returns. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by the +phase's automatic run queue. But a C subroutine, having already +been executed, will quite likely have been destroyed already by the +time this function returns, making it erroneous for the caller to make +any use of the returned pointer. It is the caller's responsibility to +ensure that it knows which of these situations applies. + +=cut +*/ /* _x = extended */ CV * @@ -8580,7 +9728,7 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, sub is stored in. */ const I32 flags = ec ? GV_NOADD_NOINIT - : PL_curstash != CopSTASH(PL_curcop) + : (IN_PERL_RUNTIME && PL_curstash != CopSTASH(PL_curcop)) || memchr(name, ':', namlen) || memchr(name, '\'', namlen) ? gv_fetch_flags : GV_ADDMULTI | GV_NOINIT | GV_NOTQUAL; @@ -8603,10 +9751,10 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, if (!ec) { if (isGV(gv)) { - move_proto_attr(&proto, &attrs, gv); + move_proto_attr(&proto, &attrs, gv, 0); } else { assert(cSVOPo); - move_proto_attr(&proto, &attrs, (GV *)cSVOPo->op_sv); + move_proto_attr(&proto, &attrs, (GV *)cSVOPo->op_sv, 1); } } @@ -8635,7 +9783,7 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, PL_compcv = 0; if (name && block) { - const char *s = strrchr(name, ':'); + const char *s = (char *) my_memrchr(name, ':', namlen); s = s ? s+1 : name; if (strEQ(s, "BEGIN")) { if (PL_in_eval & EVAL_KEEPERR) @@ -8799,6 +9947,8 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, NULL, name, namlen, name_is_utf8 ? SVf_UTF8 : 0, const_sv ); + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvFLAGS(cv) |= CvMETHOD(PL_compcv); } else { @@ -8897,8 +10047,12 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, SvROK_on(gv); } SvRV_set(gv, (SV *)cv); + if (HvENAME_HEK(PL_curstash)) + mro_method_changed_in(PL_curstash); } } + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); if (!CvHASGV(cv)) { if (isGV(gv)) @@ -8987,12 +10141,15 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, process_special_blocks(floor, name, gv, cv); } } + assert(cv); done: + assert(!cv || evanescent || SvREFCNT((SV*)cv) != 0); if (PL_parser) PL_parser->copline = NOLINE; LEAVE_SCOPE(floor); + assert(!cv || evanescent || SvREFCNT((SV*)cv) != 0); if (!evanescent) { #ifdef PERL_DEBUG_READONLY_OPS if (slab) @@ -9107,9 +10264,11 @@ S_process_special_blocks(pTHX_ I32 floor, const char *const fullname, } /* -=for apidoc newCONSTSUB +=for apidoc Am|CV *|newCONSTSUB|HV *stash|const char *name|SV *sv -See L. +Behaves like L, except that C is nul-terminated +rather than of counted length, and no flags are set. (This means that +C is always interpreted as Latin-1.) =cut */ @@ -9121,20 +10280,71 @@ Perl_newCONSTSUB(pTHX_ HV *stash, const char *name, SV *sv) } /* -=for apidoc newCONSTSUB_flags - -Creates a constant sub equivalent to Perl S> which is -eligible for inlining at compile-time. - -Currently, the only useful value for C is C. - -The newly created subroutine takes ownership of a reference to the passed in -SV. - -Passing C for SV creates a constant sub equivalent to S>, -which won't be called if used as a destructor, but will suppress the overhead -of a call to C. (This form, however, isn't eligible for inlining at -compile time.) +=for apidoc Am|CV *|newCONSTSUB_flags|HV *stash|const char *name|STRLEN len|U32 flags|SV *sv + +Construct a constant subroutine, also performing some surrounding +jobs. A scalar constant-valued subroutine is eligible for inlining +at compile-time, and in Perl code can be created by S>. Other kinds of constant subroutine have other treatment. + +The subroutine will have an empty prototype and will ignore any arguments +when called. Its constant behaviour is determined by C. If C +is null, the subroutine will yield an empty list. If C points to a +scalar, the subroutine will always yield that scalar. If C points +to an array, the subroutine will always yield a list of the elements of +that array in list context, or the number of elements in the array in +scalar context. This function takes ownership of one counted reference +to the scalar or array, and will arrange for the object to live as long +as the subroutine does. If C points to a scalar then the inlining +assumes that the value of the scalar will never change, so the caller +must ensure that the scalar is not subsequently written to. If C +points to an array then no such assumption is made, so it is ostensibly +safe to mutate the array or its elements, but whether this is really +supported has not been determined. + +The subroutine will have C set according to C. +Other aspects of the subroutine will be left in their default state. +The caller is free to mutate the subroutine beyond its initial state +after this function has returned. + +If C is null then the subroutine will be anonymous, with its +C referring to an C<__ANON__> glob. If C is non-null then the +subroutine will be named accordingly, referenced by the appropriate glob. +C is a string of length C bytes giving a sigilless symbol +name, in UTF-8 if C has the C bit set and in Latin-1 +otherwise. The name may be either qualified or unqualified. If the +name is unqualified then it defaults to being in the stash specified by +C if that is non-null, or to C if C is null. +The symbol is always added to the stash if necessary, with C +semantics. + +C should not have bits set other than C. + +If there is already a subroutine of the specified name, then the new sub +will replace the existing one in the glob. A warning may be generated +about the redefinition. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +Execution of the subroutine will likely be a no-op, unless C was +a tied array or the caller modified the subroutine in some interesting +way before it was executed. In the case of C, the treatment is +buggy: the sub will be executed when only half built, and may be deleted +prematurely, possibly causing a crash. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by +the phase's automatic run queue. A C subroutine may have been +destroyed already by the time this function returns, but currently bugs +occur in that case before the caller gets control. It is the caller's +responsibility to ensure that it knows which of these situations applies. =cut */ @@ -9181,6 +10391,8 @@ Perl_newCONSTSUB_flags(pTHX_ HV *stash, const char *name, STRLEN len, : const_sv_xsub, file ? file : "", "", &sv, XS_DYNAMIC_FILENAME | flags); + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvXSUBANY(cv).any_ptr = SvREFCNT_inc_simple(sv); CvCONST_on(cv); @@ -9227,6 +10439,78 @@ Perl_newXS_deffile(pTHX_ const char *name, XSUBADDR_t subaddr) ); } +/* +=for apidoc m|CV *|newXS_len_flags|const char *name|STRLEN len|XSUBADDR_t subaddr|const char *const filename|const char *const proto|SV **const_svp|U32 flags + +Construct an XS subroutine, also performing some surrounding jobs. + +The subroutine will have the entry point C. It will have +the prototype specified by the nul-terminated string C, or +no prototype if C is null. The prototype string is copied; +the caller can mutate the supplied string afterwards. If C +is non-null, it must be a nul-terminated filename, and the subroutine +will have its C set accordingly. By default C is set to +point directly to the supplied string, which must be static. If C +has the C bit set, then a copy of the string will +be taken instead. + +Other aspects of the subroutine will be left in their default state. +If anything else needs to be done to the subroutine for it to function +correctly, it is the caller's responsibility to do that after this +function has constructed it. However, beware of the subroutine +potentially being destroyed before this function returns, as described +below. + +If C is null then the subroutine will be anonymous, with its +C referring to an C<__ANON__> glob. If C is non-null then the +subroutine will be named accordingly, referenced by the appropriate glob. +C is a string of length C bytes giving a sigilless symbol name, +in UTF-8 if C has the C bit set and in Latin-1 otherwise. +The name may be either qualified or unqualified, with the stash defaulting +in the same manner as for C. C may contain +flag bits understood by C with the same meaning as +they have there, such as C. The symbol is always added to +the stash if necessary, with C semantics. + +If there is already a subroutine of the specified name, then the new sub +will replace the existing one in the glob. A warning may be generated +about the redefinition. If the old subroutine was C then the +decision about whether to warn is influenced by an expectation about +whether the new subroutine will become a constant of similar value. +That expectation is determined by C. (Note that the call to +this function doesn't make the new subroutine C in any case; +that is left to the caller.) If C is null then it indicates +that the new subroutine will not become a constant. If C +is non-null then it indicates that the new subroutine will become a +constant, and it points to an C that provides the constant value +that the subroutine will have. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +In the case of C, the subroutine will be executed and the reference +to it disposed of before this function returns, and also before its +prototype is set. If a C subroutine would not be sufficiently +constructed by this function to be ready for execution then the caller +must prevent this happening by giving the subroutine a different name. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by the +phase's automatic run queue. But a C subroutine, having already +been executed, will quite likely have been destroyed already by the +time this function returns, making it erroneous for the caller to make +any use of the returned pointer. It is the caller's responsibility to +ensure that it knows which of these situations applies. + +=cut +*/ + CV * Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, XSUBADDR_t subaddr, const char *const filename, @@ -9235,6 +10519,7 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, { CV *cv; bool interleave = FALSE; + bool evanescent = FALSE; PERL_ARGS_ASSERT_NEWXS_LEN_FLAGS; @@ -9279,6 +10564,8 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, gv_method_changed(gv); /* newXS */ } } + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvGV_set(cv, gv); if(filename) { @@ -9306,14 +10593,17 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, #endif if (name) - process_special_blocks(0, name, gv, cv); + evanescent = process_special_blocks(0, name, gv, cv); else CvANON_on(cv); } /* <- not a conditional branch */ + assert(cv); + assert(evanescent || SvREFCNT((SV*)cv) != 0); - sv_setpv(MUTABLE_SV(cv), proto); + if (!evanescent) sv_setpv(MUTABLE_SV(cv), proto); if (interleave) LEAVE; + assert(evanescent || SvREFCNT((SV*)cv) != 0); return cv; } @@ -9805,9 +11095,13 @@ Perl_ck_concat(pTHX_ OP *o) PERL_ARGS_ASSERT_CK_CONCAT; PERL_UNUSED_CONTEXT; + /* reuse the padtmp returned by the concat child */ if (kid->op_type == OP_CONCAT && !(kid->op_private & OPpTARGET_MY) && !(kUNOP->op_first->op_flags & OPf_MOD)) + { o->op_flags |= OPf_STACKED; + o->op_private |= OPpCONCAT_NESTED; + } return o; } @@ -10731,35 +12025,13 @@ Perl_ck_sassign(pTHX_ OP *o) ) && (kkid->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) == (OPpLVAL_INTRO|OPpPAD_STATE)) { - const PADOFFSET target = kkid->op_targ; - OP *const other = newOP(OP_PADSV, - kkid->op_flags - | ((kkid->op_private & ~OPpLVAL_INTRO) << 8)); - OP *const first = newOP(OP_NULL, 0); - OP *const nullop = - newCONDOP(0, first, o, other); - /* XXX targlex disabled for now; see ticket #124160 - newCONDOP(0, first, S_maybe_targlex(aTHX_ o), other); - */ - OP *const condop = first->op_next; - - OpTYPE_set(condop, OP_ONCE); - other->op_targ = target; - nullop->op_flags |= OPf_WANT_SCALAR; - - /* Store the initializedness of state vars in a separate - pad entry. */ - condop->op_targ = - pad_add_name_pvn("$",1,padadd_NO_DUP_CHECK|padadd_STATE,0,0); - /* hijacking PADSTALE for uninitialized state variables */ - SvPADSTALE_on(PAD_SVl(condop->op_targ)); - - return nullop; + return S_newONCEOP(aTHX_ o, kkid); } } return S_maybe_targlex(aTHX_ o); } + OP * Perl_ck_match(pTHX_ OP *o) { @@ -10786,7 +12058,9 @@ Perl_ck_method(pTHX_ OP *o) sv = kSVOP->op_sv; /* replace ' with :: */ - while ((compatptr = strchr(SvPVX(sv), '\''))) { + while ((compatptr = (char *) memchr(SvPVX(sv), '\'', + SvEND(sv) - SvPVX(sv) ))) + { *compatptr = ':'; sv_insert(sv, compatptr - SvPVX_const(sv), 0, ":", 1); } @@ -10807,13 +12081,13 @@ Perl_ck_method(pTHX_ OP *o) return newMETHOP_named(OP_METHOD_NAMED, 0, methsv); } - if (nsplit == 7 && memEQ(method, "SUPER::", nsplit)) { /* $proto->SUPER::method() */ + if (memEQs(method, nsplit, "SUPER::")) { /* $proto->SUPER::method() */ op_free(o); return newMETHOP_named(OP_METHOD_SUPER, 0, methsv); } /* $proto->MyClass::method() and $proto->MyClass::SUPER::method() */ - if (nsplit >= 9 && strnEQ(method+nsplit-9, "::SUPER::", 9)) { + if (nsplit >= 9 && strBEGINs(method+nsplit-9, "::SUPER::")) { rclass = newSVpvn_share(method, utf8*(nsplit-9), 0); new_op = newMETHOP_named(OP_METHOD_REDIR_SUPER, 0, methsv); } else { @@ -11162,10 +12436,10 @@ Perl_ck_sort(pTHX_ OP *o) SV ** const svp = hv_fetchs(hinthv, "sort", FALSE); if (svp) { const I32 sorthints = (I32)SvIV(*svp); - if ((sorthints & HINT_SORT_QUICKSORT) != 0) - o->op_private |= OPpSORT_QSORT; if ((sorthints & HINT_SORT_STABLE) != 0) o->op_private |= OPpSORT_STABLE; + if ((sorthints & HINT_SORT_UNSTABLE) != 0) + o->op_private |= OPpSORT_UNSTABLE; } } @@ -11593,11 +12867,18 @@ Perl_rv2cv_op_cv(pTHX_ OP *cvop, U32 flags) } if (SvTYPE((SV*)cv) != SVt_PVCV) return NULL; - if (flags & (RV2CVOPCV_RETURN_NAME_GV|RV2CVOPCV_MAYBE_NAME_GV)) { - if ((!CvANON(cv) || !gv) && !CvLEXICAL(cv) - && ((flags & RV2CVOPCV_RETURN_NAME_GV) || !CvNAMED(cv))) + if (flags & RV2CVOPCV_RETURN_NAME_GV) { + if ((!CvANON(cv) && !CvLEXICAL(cv)) || !gv) gv = CvGV(cv); return (CV*)gv; + } + else if (flags & RV2CVOPCV_MAYBE_NAME_GV) { + if (CvLEXICAL(cv) || CvNAMED(cv)) + return NULL; + if (!CvANON(cv) || !gv) + gv = CvGV(cv); + return (CV*)gv; + } else { return cv; } @@ -11771,7 +13052,7 @@ Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv) switch (*proto++) { case '[': if (contextclass++ == 0) { - e = strchr(proto, ']'); + e = (char *) memchr(proto, ']', proto_end - proto); if (!e || e == proto) goto oops; } @@ -12353,7 +13634,10 @@ Perl_ck_substr(pTHX_ OP *o) if (kid->op_type == OP_NULL) kid = OpSIBLING(kid); if (kid) - kid->op_flags |= OPf_MOD; + /* Historically, substr(delete $foo{bar},...) has been allowed + with 4-arg substr. Keep it working by applying entersub + lvalue context. */ + op_lvalue(kid, OP_ENTERSUB); } return o; @@ -14515,7 +15799,7 @@ Perl_rpeep(pTHX_ OP *o) o->op_flags &= ~(OPf_REF|OPf_WANT); o->op_flags |= want; o->op_private |= (o->op_type == OP_PADHV ? - OPpRV2HV_ISKEYS : OPpRV2HV_ISKEYS); + OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS); /* for keys(%lex), hold onto the OP_KEYS's targ * since padhv doesn't have its own targ to return * an int with */