X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/df6b4bd56551f2d39f7c0019c23f27181d8c39c4..35ad0133df9b65a4e32f2f07a2a05b387bd79591:/op.c diff --git a/op.c b/op.c index 1e85dd1..0b46b34 100644 --- a/op.c +++ b/op.c @@ -419,6 +419,15 @@ Perl_Slab_to_rw(pTHX_ OPSLAB *const slab) # define PerlMemShared PerlMem #endif +/* make freed ops die if they're inadvertently executed */ +#ifdef DEBUGGING +static OP * +S_pp_freed(pTHX) +{ + DIE(aTHX_ "panic: freed op 0x%p called\n", PL_op); +} +#endif + void Perl_Slab_Free(pTHX_ void *op) { @@ -427,6 +436,10 @@ Perl_Slab_Free(pTHX_ void *op) PERL_ARGS_ASSERT_SLAB_FREE; +#ifdef DEBUGGING + o->op_ppaddr = S_pp_freed; +#endif + if (!o->op_slabbed) { if (!o->op_static) PerlMemShared_free(op); @@ -662,6 +675,7 @@ Perl_allocmy(pTHX_ const char *const name, const STRLEN len, const U32 flags) if (!(flags & SVf_UTF8 && UTF8_IS_START(name[1])) && isASCII(name[1]) && (!isPRINT(name[1]) || strchr("\t\n\r\f", name[1]))) { + /* diag_listed_as: Can't use global %s in "%s" */ yyerror(Perl_form(aTHX_ "Can't use global %c^%c%.*s in \"%s\"", name[0], toCTRL(name[1]), (int)(len - 2), name + 2, PL_parser->in_my == KEY_state ? "state" : "my")); @@ -956,6 +970,7 @@ Perl_op_clear(pTHX_ OP *o) SvREFCNT_dec(cMETHOPx(o)->op_rclass_sv); cMETHOPx(o)->op_rclass_sv = NULL; #endif + /* FALLTHROUGH */ case OP_METHOD_NAMED: case OP_METHOD_SUPER: SvREFCNT_dec(cMETHOPx(o)->op_u.op_meth_sv); @@ -1063,6 +1078,22 @@ Perl_op_clear(pTHX_ OP *o) PerlMemShared_free(cUNOP_AUXo->op_aux); break; + case OP_MULTICONCAT: + { + UNOP_AUX_item *aux = cUNOP_AUXo->op_aux; + /* aux[PERL_MULTICONCAT_IX_PLAIN_PV] and/or + * aux[PERL_MULTICONCAT_IX_UTF8_PV] point to plain and/or + * utf8 shared strings */ + char *p1 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; + char *p2 = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; + if (p1) + PerlMemShared_free(p1); + if (p2 && p1 != p2) + PerlMemShared_free(p2); + PerlMemShared_free(aux); + } + break; + case OP_MULTIDEREF: { UNOP_AUX_item *items = cUNOP_AUXo->op_aux; @@ -1079,12 +1110,14 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_padhv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_padav_aelem: pad_free((++items)->pad_offset); goto do_elem; case MDEREF_HV_gvhv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_gvav_aelem: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &((++items)->pad_offset)); @@ -1095,6 +1128,7 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_gvsv_vivify_rv2hv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_gvsv_vivify_rv2av_aelem: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &((++items)->pad_offset)); @@ -1105,6 +1139,7 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_padsv_vivify_rv2hv_helem: is_hash = TRUE; + /* FALLTHROUGH */ case MDEREF_AV_padsv_vivify_rv2av_aelem: pad_free((++items)->pad_offset); goto do_vivify_rv2xv_elem; @@ -1112,6 +1147,7 @@ Perl_op_clear(pTHX_ OP *o) case MDEREF_HV_pop_rv2hv_helem: case MDEREF_HV_vivify_rv2hv_helem: is_hash = TRUE; + /* FALLTHROUGH */ do_vivify_rv2xv_elem: case MDEREF_AV_pop_rv2av_aelem: case MDEREF_AV_vivify_rv2av_aelem: @@ -1187,8 +1223,7 @@ S_cop_free(pTHX_ COP* cop) } STATIC void -S_forget_pmop(pTHX_ PMOP *const o - ) +S_forget_pmop(pTHX_ PMOP *const o) { HV * const pmstash = PmopSTASH(o); @@ -1509,7 +1544,8 @@ Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP* other) OpTYPE_set(logop, type); logop->op_first = first; logop->op_other = other; - logop->op_flags = OPf_KIDS; + if (first) + logop->op_flags = OPf_KIDS; while (kid && OpHAS_SIBLING(kid)) kid = OpSIBLING(kid); if (kid) @@ -1913,6 +1949,11 @@ Perl_scalarvoid(pTHX_ OP *arg) if (o->op_type == OP_REPEAT) scalar(cBINOPo->op_first); goto func_ops; + case OP_CONCAT: + if ((o->op_flags & OPf_STACKED) && + !(o->op_private & OPpCONCAT_NESTED)) + break; + goto func_ops; case OP_SUBSTR: if (o->op_private == 4) break; @@ -2450,6 +2491,915 @@ S_check_hash_fields_and_hekify(pTHX_ UNOP *rop, SVOP *key_op) } } +/* info returned by S_sprintf_is_multiconcatable() */ + +struct sprintf_ismc_info { + SSize_t nargs; /* num of args to sprintf (not including the format) */ + char *start; /* start of raw format string */ + char *end; /* bytes after end of raw format string */ + STRLEN total_len; /* total length (in bytes) of format string, not + including '%s' and half of '%%' */ + STRLEN variant; /* number of bytes by which total_len_p would grow + if upgraded to utf8 */ + bool utf8; /* whether the format is utf8 */ +}; + + +/* is the OP_SPRINTF o suitable for converting into a multiconcat op? + * i.e. its format argument is a const string with only '%s' and '%%' + * formats, and the number of args is known, e.g. + * sprintf "a=%s f=%s", $a[0], scalar(f()); + * but not + * sprintf "i=%d a=%s f=%s", $i, @a, f(); + * + * If successful, the sprintf_ismc_info struct pointed to by info will be + * populated. + */ + +STATIC bool +S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info) +{ + OP *pm, *constop, *kid; + SV *sv; + char *s, *e, *p; + SSize_t nargs, nformats; + STRLEN cur, total_len, variant; + bool utf8; + + /* if sprintf's behaviour changes, die here so that someone + * can decide whether to enhance this function or skip optimising + * under those new circumstances */ + assert(!(o->op_flags & OPf_STACKED)); + assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX)); + assert(!(o->op_private & ~OPpARG4_MASK)); + + pm = cUNOPo->op_first; + if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */ + return FALSE; + constop = OpSIBLING(pm); + if (!constop || constop->op_type != OP_CONST) + return FALSE; + sv = cSVOPx_sv(constop); + if (SvMAGICAL(sv) || !SvPOK(sv)) + return FALSE; + + s = SvPV(sv, cur); + e = s + cur; + + /* Scan format for %% and %s and work out how many %s there are. + * Abandon if other format types are found. + */ + + nformats = 0; + total_len = 0; + variant = 0; + + for (p = s; p < e; p++) { + if (*p != '%') { + total_len++; + if (!UTF8_IS_INVARIANT(*p)) + variant++; + continue; + } + p++; + if (p >= e) + return FALSE; /* lone % at end gives "Invalid conversion" */ + if (*p == '%') + total_len++; + else if (*p == 's') + nformats++; + else + return FALSE; + } + + if (!nformats || nformats > PERL_MULTICONCAT_MAXARG) + return FALSE; + + utf8 = cBOOL(SvUTF8(sv)); + if (utf8) + variant = 0; + + /* scan args; they must all be in scalar cxt */ + + nargs = 0; + kid = OpSIBLING(constop); + + while (kid) { + if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR) + return FALSE; + nargs++; + kid = OpSIBLING(kid); + } + + if (nargs != nformats) + return FALSE; /* e.g. sprintf("%s%s", $a); */ + + + info->nargs = nargs; + info->start = s; + info->end = e; + info->total_len = total_len; + info->variant = variant; + info->utf8 = utf8; + + return TRUE; +} + + + +/* S_maybe_multiconcat(): + * + * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly + * convert it (and its children) into an OP_MULTICONCAT. See the code + * comments just before pp_multiconcat() for the full details of what + * OP_MULTICONCAT supports. + * + * Basically we're looking for an optree with a chain of OP_CONCATS down + * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or + * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g. + * + * $x = "$a$b-$c" + * + * looks like + * + * SASSIGN + * | + * STRINGIFY -- PADSV[$x] + * | + * | + * ex-PUSHMARK -- CONCAT/S + * | + * CONCAT/S -- PADSV[$d] + * | + * CONCAT -- CONST["-"] + * | + * PADSV[$a] -- PADSV[$b] + * + * Note that at this stage the OP_SASSIGN may have already been optimised + * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT. + */ + +STATIC void +S_maybe_multiconcat(pTHX_ OP *o) +{ + OP *lastkidop; /* the right-most of any kids unshifted onto o */ + OP *topop; /* the top-most op in the concat tree (often equals o, + unless there are assign/stringify ops above it */ + OP *parentop; /* the parent op of topop (or itself if no parent) */ + OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */ + OP *targetop; /* the op corresponding to target=... or target.=... */ + OP *stringop; /* the OP_STRINGIFY op, if any */ + OP *nextop; /* used for recreating the op_next chain without consts */ + OP *kid; /* general-purpose op pointer */ + UNOP_AUX_item *aux; + UNOP_AUX_item *lenp; + char *const_str, *p; + struct sprintf_ismc_info sprintf_info; + + /* store info about each arg in args[]; + * toparg is the highest used slot; argp is a general + * pointer to args[] slots */ + struct { + void *p; /* initially points to const sv (or null for op); + later, set to SvPV(constsv), with ... */ + STRLEN len; /* ... len set to SvPV(..., len) */ + } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1]; + + SSize_t nargs = 0; + SSize_t nconst = 0; + SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */ + STRLEN variant; + bool utf8 = FALSE; + bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op; + the last-processed arg will the LHS of one, + as args are processed in reverse order */ + U8 stacked_last = 0; /* whether the last seen concat op was STACKED */ + STRLEN total_len = 0; /* sum of the lengths of the const segments */ + U8 flags = 0; /* what will become the op_flags and ... */ + U8 private_flags = 0; /* ... op_private of the multiconcat op */ + bool is_sprintf = FALSE; /* we're optimising an sprintf */ + bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */ + bool prev_was_const = FALSE; /* previous arg was a const */ + + /* ----------------------------------------------------------------- + * Phase 1: + * + * Examine the optree non-destructively to determine whether it's + * suitable to be converted into an OP_MULTICONCAT. Accumulate + * information about the optree in args[]. + */ + + argp = args; + targmyop = NULL; + targetop = NULL; + stringop = NULL; + topop = o; + parentop = o; + + assert( o->op_type == OP_SASSIGN + || o->op_type == OP_CONCAT + || o->op_type == OP_SPRINTF + || o->op_type == OP_STRINGIFY); + + Zero(&sprintf_info, 1, struct sprintf_ismc_info); + + /* first see if, at the top of the tree, there is an assign, + * append and/or stringify */ + + if (topop->op_type == OP_SASSIGN) { + /* expr = ..... */ + if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN]) + return; + if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV)) + return; + assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */ + + parentop = topop; + topop = cBINOPo->op_first; + targetop = OpSIBLING(topop); + if (!targetop) /* probably some sort of syntax error */ + return; + } + else if ( topop->op_type == OP_CONCAT + && (topop->op_flags & OPf_STACKED) + && (!(topop->op_private & OPpCONCAT_NESTED)) + ) + { + /* expr .= ..... */ + + /* OPpTARGET_MY shouldn't be able to be set here. If it is, + * decide what to do about it */ + assert(!(o->op_private & OPpTARGET_MY)); + + /* barf on unknown flags */ + assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY))); + private_flags |= OPpMULTICONCAT_APPEND; + targetop = cBINOPo->op_first; + parentop = topop; + topop = OpSIBLING(targetop); + + /* $x .= gets optimised to rcatline instead */ + if (topop->op_type == OP_READLINE) + return; + } + + if (targetop) { + /* Can targetop (the LHS) if it's a padsv, be be optimised + * away and use OPpTARGET_MY instead? + */ + if ( (targetop->op_type == OP_PADSV) + && !(targetop->op_private & OPpDEREF) + && !(targetop->op_private & OPpPAD_STATE) + /* we don't support 'my $x .= ...' */ + && ( o->op_type == OP_SASSIGN + || !(targetop->op_private & OPpLVAL_INTRO)) + ) + is_targable = TRUE; + } + + if (topop->op_type == OP_STRINGIFY) { + if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY]) + return; + stringop = topop; + + /* barf on unknown flags */ + assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY))); + + if ((topop->op_private & OPpTARGET_MY)) { + if (o->op_type == OP_SASSIGN) + return; /* can't have two assigns */ + targmyop = topop; + } + + private_flags |= OPpMULTICONCAT_STRINGIFY; + parentop = topop; + topop = cBINOPx(topop)->op_first; + assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK)); + topop = OpSIBLING(topop); + } + + if (topop->op_type == OP_SPRINTF) { + if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF]) + return; + if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) { + nargs = sprintf_info.nargs; + total_len = sprintf_info.total_len; + variant = sprintf_info.variant; + utf8 = sprintf_info.utf8; + is_sprintf = TRUE; + private_flags |= OPpMULTICONCAT_FAKE; + toparg = argp; + /* we have an sprintf op rather than a concat optree. + * Skip most of the code below which is associated with + * processing that optree. We also skip phase 2, determining + * whether its cost effective to optimise, since for sprintf, + * multiconcat is *always* faster */ + goto create_aux; + } + /* note that even if the sprintf itself isn't multiconcatable, + * the expression as a whole may be, e.g. in + * $x .= sprintf("%d",...) + * the sprintf op will be left as-is, but the concat/S op may + * be upgraded to multiconcat + */ + } + else if (topop->op_type == OP_CONCAT) { + if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT]) + return; + + if ((topop->op_private & OPpTARGET_MY)) { + if (o->op_type == OP_SASSIGN || targmyop) + return; /* can't have two assigns */ + targmyop = topop; + } + } + + /* Is it safe to convert a sassign/stringify/concat op into + * a multiconcat? */ + assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP); + assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP); + assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP); + assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP); + STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last) + == STRUCT_OFFSET(UNOP_AUX, op_aux)); + STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last) + == STRUCT_OFFSET(UNOP_AUX, op_aux)); + + /* Now scan the down the tree looking for a series of + * CONCAT/OPf_STACKED ops on the LHS (with the last one not + * stacked). For example this tree: + * + * | + * CONCAT/STACKED + * | + * CONCAT/STACKED -- EXPR5 + * | + * CONCAT/STACKED -- EXPR4 + * | + * CONCAT -- EXPR3 + * | + * EXPR1 -- EXPR2 + * + * corresponds to an expression like + * + * (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5) + * + * Record info about each EXPR in args[]: in particular, whether it is + * a stringifiable OP_CONST and if so what the const sv is. + * + * The reason why the last concat can't be STACKED is the difference + * between + * + * ((($a .= $a) .= $a) .= $a) .= $a + * + * and + * $a . $a . $a . $a . $a + * + * The main difference between the optrees for those two constructs + * is the presence of the last STACKED. As well as modifying $a, + * the former sees the changed $a between each concat, so if $s is + * initially 'a', the first returns 'a' x 16, while the latter returns + * 'a' x 5. And pp_multiconcat can't handle that kind of thing. + */ + + kid = topop; + + for (;;) { + OP *argop; + SV *sv; + bool last = FALSE; + + if ( kid->op_type == OP_CONCAT + && !kid_is_last + ) { + OP *k1, *k2; + k1 = cUNOPx(kid)->op_first; + k2 = OpSIBLING(k1); + /* shouldn't happen except maybe after compile err? */ + if (!k2) + return; + + /* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */ + if (kid->op_private & OPpTARGET_MY) + kid_is_last = TRUE; + + stacked_last = (kid->op_flags & OPf_STACKED); + if (!stacked_last) + kid_is_last = TRUE; + + kid = k1; + argop = k2; + } + else { + argop = kid; + last = TRUE; + } + + if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2 + || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2) + { + /* At least two spare slots are needed to decompose both + * concat args. If there are no slots left, continue to + * examine the rest of the optree, but don't push new values + * on args[]. If the optree as a whole is legal for conversion + * (in particular that the last concat isn't STACKED), then + * the first PERL_MULTICONCAT_MAXARG elements of the optree + * can be converted into an OP_MULTICONCAT now, with the first + * child of that op being the remainder of the optree - + * which may itself later be converted to a multiconcat op + * too. + */ + if (last) { + /* the last arg is the rest of the optree */ + argp++->p = NULL; + nargs++; + } + } + else if ( argop->op_type == OP_CONST + && ((sv = cSVOPx_sv(argop))) + /* defer stringification until runtime of 'constant' + * things that might stringify variantly, e.g. the radix + * point of NVs, or overloaded RVs */ + && (SvPOK(sv) || SvIOK(sv)) + && (!SvGMAGICAL(sv)) + ) { + argp++->p = sv; + utf8 |= cBOOL(SvUTF8(sv)); + nconst++; + if (prev_was_const) + /* this const may be demoted back to a plain arg later; + * make sure we have enough arg slots left */ + nadjconst++; + prev_was_const = !prev_was_const; + } + else { + argp++->p = NULL; + nargs++; + prev_was_const = FALSE; + } + + if (last) + break; + } + + toparg = argp - 1; + + if (stacked_last) + return; /* we don't support ((A.=B).=C)...) */ + + /* look for two adjacent consts and don't fold them together: + * $o . "a" . "b" + * should do + * $o->concat("a")->concat("b") + * rather than + * $o->concat("ab") + * (but $o .= "a" . "b" should still fold) + */ + { + bool seen_nonconst = FALSE; + for (argp = toparg; argp >= args; argp--) { + if (argp->p == NULL) { + seen_nonconst = TRUE; + continue; + } + if (!seen_nonconst) + continue; + if (argp[1].p) { + /* both previous and current arg were constants; + * leave the current OP_CONST as-is */ + argp->p = NULL; + nconst--; + nargs++; + } + } + } + + /* ----------------------------------------------------------------- + * Phase 2: + * + * At this point we have determined that the optree *can* be converted + * into a multiconcat. Having gathered all the evidence, we now decide + * whether it *should*. + */ + + + /* we need at least one concat action, e.g.: + * + * Y . Z + * X = Y . Z + * X .= Y + * + * otherwise we could be doing something like $x = "foo", which + * if treated as as a concat, would fail to COW. + */ + if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2) + return; + + /* Benchmarking seems to indicate that we gain if: + * * we optimise at least two actions into a single multiconcat + * (e.g concat+concat, sassign+concat); + * * or if we can eliminate at least 1 OP_CONST; + * * or if we can eliminate a padsv via OPpTARGET_MY + */ + + if ( + /* eliminated at least one OP_CONST */ + nconst >= 1 + /* eliminated an OP_SASSIGN */ + || o->op_type == OP_SASSIGN + /* eliminated an OP_PADSV */ + || (!targmyop && is_targable) + ) + /* definitely a net gain to optimise */ + goto optimise; + + /* ... if not, what else? */ + + /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1): + * multiconcat is faster (due to not creating a temporary copy of + * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is + * faster. + */ + if ( nconst == 0 + && nargs == 2 + && targmyop + && topop->op_type == OP_CONCAT + ) { + PADOFFSET t = targmyop->op_targ; + OP *k1 = cBINOPx(topop)->op_first; + OP *k2 = cBINOPx(topop)->op_last; + if ( k2->op_type == OP_PADSV + && k2->op_targ == t + && ( k1->op_type != OP_PADSV + || k1->op_targ != t) + ) + goto optimise; + } + + /* need at least two concats */ + if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3) + return; + + + + /* ----------------------------------------------------------------- + * Phase 3: + * + * At this point the optree has been verified as ok to be optimised + * into an OP_MULTICONCAT. Now start changing things. + */ + + optimise: + + /* stringify all const args and determine utf8ness */ + + variant = 0; + for (argp = args; argp <= toparg; argp++) { + SV *sv = (SV*)argp->p; + if (!sv) + continue; /* not a const op */ + if (utf8 && !SvUTF8(sv)) + sv_utf8_upgrade_nomg(sv); + argp->p = SvPV_nomg(sv, argp->len); + total_len += argp->len; + + /* see if any strings would grow if converted to utf8 */ + if (!utf8) { + char *p = (char*)argp->p; + STRLEN len = argp->len; + while (len--) { + U8 c = *p++; + if (!UTF8_IS_INVARIANT(c)) + variant++; + } + } + } + + /* create and populate aux struct */ + + create_aux: + + aux = (UNOP_AUX_item*)PerlMemShared_malloc( + sizeof(UNOP_AUX_item) + * ( + PERL_MULTICONCAT_HEADER_SIZE + + ((nargs + 1) * (variant ? 2 : 1)) + ) + ); + const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1); + + /* Extract all the non-const expressions from the concat tree then + * dispose of the old tree, e.g. convert the tree from this: + * + * o => SASSIGN + * | + * STRINGIFY -- TARGET + * | + * ex-PUSHMARK -- CONCAT + * | + * CONCAT -- EXPR5 + * | + * CONCAT -- EXPR4 + * | + * CONCAT -- EXPR3 + * | + * EXPR1 -- EXPR2 + * + * + * to: + * + * o => MULTICONCAT + * | + * ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET + * + * except that if EXPRi is an OP_CONST, it's discarded. + * + * During the conversion process, EXPR ops are stripped from the tree + * and unshifted onto o. Finally, any of o's remaining original + * childen are discarded and o is converted into an OP_MULTICONCAT. + * + * In this middle of this, o may contain both: unshifted args on the + * left, and some remaining original args on the right. lastkidop + * is set to point to the right-most unshifted arg to delineate + * between the two sets. + */ + + + if (is_sprintf) { + /* create a copy of the format with the %'s removed, and record + * the sizes of the const string segments in the aux struct */ + char *q, *oldq; + lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; + + p = sprintf_info.start; + q = const_str; + oldq = q; + for (; p < sprintf_info.end; p++) { + if (*p == '%') { + p++; + if (*p != '%') { + (lenp++)->ssize = q - oldq; + oldq = q; + continue; + } + } + *q++ = *p; + } + lenp->ssize = q - oldq; + assert((STRLEN)(q - const_str) == total_len); + + /* Attach all the args (i.e. the kids of the sprintf) to o (which + * may or may not be topop) The pushmark and const ops need to be + * kept in case they're an op_next entry point. + */ + lastkidop = cLISTOPx(topop)->op_last; + kid = cUNOPx(topop)->op_first; /* pushmark */ + op_null(kid); + op_null(OpSIBLING(kid)); /* const */ + if (o != topop) { + kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */ + op_sibling_splice(o, NULL, 0, kid); /* and attach to o */ + lastkidop->op_next = o; + } + } + else { + p = const_str; + lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; + + lenp->ssize = -1; + + /* Concatenate all const strings into const_str. + * Note that args[] contains the RHS args in reverse order, so + * we scan args[] from top to bottom to get constant strings + * in L-R order + */ + for (argp = toparg; argp >= args; argp--) { + if (!argp->p) + /* not a const op */ + (++lenp)->ssize = -1; + else { + STRLEN l = argp->len; + Copy(argp->p, p, l, char); + p += l; + if (lenp->ssize == -1) + lenp->ssize = l; + else + lenp->ssize += l; + } + } + + kid = topop; + nextop = o; + lastkidop = NULL; + + for (argp = args; argp <= toparg; argp++) { + /* only keep non-const args, except keep the first-in-next-chain + * arg no matter what it is (but nulled if OP_CONST), because it + * may be the entry point to this subtree from the previous + * op_next. + */ + bool last = (argp == toparg); + OP *prev; + + /* set prev to the sibling *before* the arg to be cut out, + * e.g. when cutting EXPR: + * + * | + * kid= CONCAT + * | + * prev= CONCAT -- EXPR + * | + */ + if (argp == args && kid->op_type != OP_CONCAT) { + /* in e.g. '$x .= f(1)' there's no RHS concat tree + * so the expression to be cut isn't kid->op_last but + * kid itself */ + OP *o1, *o2; + /* find the op before kid */ + o1 = NULL; + o2 = cUNOPx(parentop)->op_first; + while (o2 && o2 != kid) { + o1 = o2; + o2 = OpSIBLING(o2); + } + assert(o2 == kid); + prev = o1; + kid = parentop; + } + else if (kid == o && lastkidop) + prev = last ? lastkidop : OpSIBLING(lastkidop); + else + prev = last ? NULL : cUNOPx(kid)->op_first; + + if (!argp->p || last) { + /* cut RH op */ + OP *aop = op_sibling_splice(kid, prev, 1, NULL); + /* and unshift to front of o */ + op_sibling_splice(o, NULL, 0, aop); + /* record the right-most op added to o: later we will + * free anything to the right of it */ + if (!lastkidop) + lastkidop = aop; + aop->op_next = nextop; + if (last) { + if (argp->p) + /* null the const at start of op_next chain */ + op_null(aop); + } + else if (prev) + nextop = prev->op_next; + } + + /* the last two arguments are both attached to the same concat op */ + if (argp < toparg - 1) + kid = prev; + } + } + + /* Populate the aux struct */ + + aux[PERL_MULTICONCAT_IX_NARGS].ssize = nargs; + aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str; + aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ? 0 : total_len; + aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = total_len; + + /* if variant > 0, calculate a variant const string and lengths where + * the utf8 version of the string will take 'variant' more bytes than + * the plain one. */ + + if (variant) { + char *p = const_str; + STRLEN ulen = total_len + variant; + UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS; + UNOP_AUX_item *ulens = lens + (nargs + 1); + char *up = (char*)PerlMemShared_malloc(ulen); + SSize_t n; + + aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen; + + for (n = 0; n < (nargs + 1); n++) { + SSize_t i; + char * orig_up = up; + for (i = (lens++)->ssize; i > 0; i--) { + U8 c = *p++; + append_utf8_from_native_byte(c, (U8**)&up); + } + (ulens++)->ssize = (i < 0) ? i : up - orig_up; + } + } + + if (stringop) { + /* if there was a top(ish)-level OP_STRINGIFY, we need to keep + * that op's first child - an ex-PUSHMARK - because the op_next of + * the previous op may point to it (i.e. it's the entry point for + * the o optree) + */ + OP *pmop = + (stringop == o) + ? op_sibling_splice(o, lastkidop, 1, NULL) + : op_sibling_splice(stringop, NULL, 1, NULL); + assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK)); + op_sibling_splice(o, NULL, 0, pmop); + if (!lastkidop) + lastkidop = pmop; + } + + /* Optimise + * target = A.B.C... + * target .= A.B.C... + */ + + if (targetop) { + assert(!targmyop); + + if (o->op_type == OP_SASSIGN) { + /* Move the target subtree from being the last of o's children + * to being the last of o's preserved children. + * Note the difference between 'target = ...' and 'target .= ...': + * for the former, target is executed last; for the latter, + * first. + */ + kid = OpSIBLING(lastkidop); + op_sibling_splice(o, kid, 1, NULL); /* cut target op */ + op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */ + lastkidop->op_next = kid->op_next; + lastkidop = targetop; + } + else { + /* Move the target subtree from being the first of o's + * original children to being the first of *all* o's children. + */ + if (lastkidop) { + op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */ + op_sibling_splice(o, NULL, 0, targetop); /* and paste*/ + } + else { + /* if the RHS of .= doesn't contain a concat (e.g. + * $x .= "foo"), it gets missed by the "strip ops from the + * tree and add to o" loop earlier */ + assert(topop->op_type != OP_CONCAT); + if (stringop) { + /* in e.g. $x .= "$y", move the $y expression + * from being a child of OP_STRINGIFY to being the + * second child of the OP_CONCAT + */ + assert(cUNOPx(stringop)->op_first == topop); + op_sibling_splice(stringop, NULL, 1, NULL); + op_sibling_splice(o, cUNOPo->op_first, 0, topop); + } + assert(topop == OpSIBLING(cBINOPo->op_first)); + if (toparg->p) + op_null(topop); + lastkidop = topop; + } + } + + if (is_targable) { + /* optimise + * my $lex = A.B.C... + * $lex = A.B.C... + * $lex .= A.B.C... + * The original padsv op is kept but nulled in case it's the + * entry point for the optree (which it will be for + * '$lex .= ... ' + */ + private_flags |= OPpTARGET_MY; + private_flags |= (targetop->op_private & OPpLVAL_INTRO); + o->op_targ = targetop->op_targ; + targetop->op_targ = 0; + op_null(targetop); + } + else + flags |= OPf_STACKED; + } + else if (targmyop) { + private_flags |= OPpTARGET_MY; + if (o != targmyop) { + o->op_targ = targmyop->op_targ; + targmyop->op_targ = 0; + } + } + + /* detach the emaciated husk of the sprintf/concat optree and free it */ + for (;;) { + kid = op_sibling_splice(o, lastkidop, 1, NULL); + if (!kid) + break; + op_free(kid); + } + + /* and convert o into a multiconcat */ + + o->op_flags = (flags|OPf_KIDS|stacked_last + |(o->op_flags & (OPf_WANT|OPf_PARENS))); + o->op_private = private_flags; + o->op_type = OP_MULTICONCAT; + o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT]; + cUNOP_AUXo->op_aux = aux; +} + /* do all the final processing on an optree (e.g. running the peephole * optimiser on it), then attach it to cv (if cv is non-null) @@ -2472,15 +3422,84 @@ S_process_optree(pTHX_ CV *cv, OP *optree, OP* start) *startp = start; optree->op_private |= OPpREFCOUNTED; OpREFCNT_set(optree, 1); + optimize_optree(optree); CALL_PEEP(*startp); finalize_optree(optree); S_prune_chain_head(startp); - if (cv) { - /* now that optimizer has done its work, adjust pad values */ - pad_tidy(optree->op_type == OP_LEAVEWRITE ? padtidy_FORMAT - : CvCLONE(cv) ? padtidy_SUBCLONE : padtidy_SUB); + if (cv) { + /* now that optimizer has done its work, adjust pad values */ + pad_tidy(optree->op_type == OP_LEAVEWRITE ? padtidy_FORMAT + : CvCLONE(cv) ? padtidy_SUBCLONE : padtidy_SUB); + } +} + + +/* +=for apidoc optimize_optree + +This function applies some optimisations to the optree in top-down order. +It is called before the peephole optimizer, which processes ops in +execution order. Note that finalize_optree() also does a top-down scan, +but is called *after* the peephole optimizer. + +=cut +*/ + +void +Perl_optimize_optree(pTHX_ OP* o) +{ + PERL_ARGS_ASSERT_OPTIMIZE_OPTREE; + + ENTER; + SAVEVPTR(PL_curcop); + + optimize_op(o); + + LEAVE; +} + + +/* helper for optimize_optree() which optimises on op then recurses + * to optimise any children. + */ + +STATIC void +S_optimize_op(pTHX_ OP* o) +{ + OP *kid; + + PERL_ARGS_ASSERT_OPTIMIZE_OP; + assert(o->op_type != OP_FREED); + + switch (o->op_type) { + case OP_NEXTSTATE: + case OP_DBSTATE: + PL_curcop = ((COP*)o); /* for warnings */ + break; + + + case OP_CONCAT: + case OP_SASSIGN: + case OP_STRINGIFY: + case OP_SPRINTF: + S_maybe_multiconcat(aTHX_ o); + break; + + case OP_SUBST: + if (cPMOPo->op_pmreplrootu.op_pmreplroot) + optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot); + break; + + default: + break; } + + if (!(o->op_flags & OPf_KIDS)) + return; + + for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) + optimize_op(kid); } @@ -2578,8 +3597,8 @@ S_finalize_op(pTHX_ OP* o) case OP_CONST: if (cSVOPo->op_private & OPpCONST_STRICT) no_bareword_allowed(o); - /* FALLTHROUGH */ #ifdef USE_ITHREADS + /* FALLTHROUGH */ case OP_HINTSEVAL: op_relocate_sv(&cSVOPo->op_sv, &o->op_targ); #endif @@ -3060,7 +4079,10 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_RV2HV: if (type == OP_REFGEN && o->op_flags & OPf_PARENS) { PL_modcount = RETURN_UNLIMITED_NUMBER; - return o; /* Treat \(@foo) like ordinary list. */ + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; } /* FALLTHROUGH */ case OP_RV2GV: @@ -3092,7 +4114,7 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) goto nomod; case OP_AVHVSWITCH: if (type == OP_LEAVESUBLV - && (o->op_private & 3) + OP_EACH == OP_KEYS) + && (o->op_private & OPpAVHVSWITCH_MASK) + OP_EACH == OP_KEYS) o->op_private |= OPpMAYBE_LVSUB; goto nomod; case OP_AV2ARYLEN: @@ -3125,7 +4147,12 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_PADHV: PL_modcount = RETURN_UNLIMITED_NUMBER; if (type == OP_REFGEN && o->op_flags & OPf_PARENS) - return o; /* Treat \(@foo) like ordinary list. */ + { + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; + } if (scalar_mod_type(o, type)) goto nomod; if ((o->op_flags & OPf_WANT) != OPf_WANT_SCALAR @@ -3686,7 +4713,8 @@ Perl_apply_attrs_string(pTHX_ const char *stashpv, CV *cv, } STATIC void -S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) +S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name, + bool curstash) { OP *new_proto = NULL; STRLEN pvlen; @@ -3701,7 +4729,7 @@ S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) o = *attrs; if (o->op_type == OP_CONST) { pv = SvPV(cSVOPo_sv, pvlen); - if (pvlen >= 10 && memEQ(pv, "prototype(", 10)) { + if (memBEGINs(pv, pvlen, "prototype(")) { SV * const tmpsv = newSVpvn_flags(pv + 10, pvlen - 11, SvUTF8(cSVOPo_sv)); SV ** const tmpo = cSVOPx_svp(o); SvREFCNT_dec(cSVOPo_sv); @@ -3717,7 +4745,7 @@ S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) for (o = OpSIBLING(lasto); o; o = OpSIBLING(o)) { if (o->op_type == OP_CONST) { pv = SvPV(cSVOPo_sv, pvlen); - if (pvlen >= 10 && memEQ(pv, "prototype(", 10)) { + if (memBEGINs(pv, pvlen, "prototype(")) { SV * const tmpsv = newSVpvn_flags(pv + 10, pvlen - 11, SvUTF8(cSVOPo_sv)); SV ** const tmpo = cSVOPx_svp(o); SvREFCNT_dec(cSVOPo_sv); @@ -3760,12 +4788,20 @@ S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name) else svname = (SV *)name; if (ckWARN(WARN_ILLEGALPROTO)) - (void)validate_proto(svname, cSVOPx_sv(new_proto), TRUE); + (void)validate_proto(svname, cSVOPx_sv(new_proto), TRUE, + curstash); if (*proto && ckWARN(WARN_PROTOTYPE)) { STRLEN old_len, new_len; const char * oldp = SvPV(cSVOPx_sv(*proto), old_len); const char * newp = SvPV(cSVOPx_sv(new_proto), new_len); + if (curstash && svname == (SV *)name + && !memchr(SvPVX(svname), ':', SvCUR(svname))) { + svname = sv_2mortal(newSVsv(PL_curstname)); + sv_catpvs(svname, "::"); + sv_catsv(svname, (SV *)name); + } + Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE), "Prototype '%" UTF8f "' overridden by attribute 'prototype(%" UTF8f ")'" " in %" SVf, @@ -4428,15 +5464,34 @@ S_op_integerize(pTHX_ OP *o) return o; } +/* This function exists solely to provide a scope to limit + setjmp/longjmp() messing with auto variables. + */ +PERL_STATIC_INLINE int +S_fold_constants_eval(pTHX) { + int ret = 0; + dJMPENV; + + JMPENV_PUSH(ret); + + if (ret == 0) { + CALLRUNOPS(aTHX); + } + + JMPENV_POP; + + return ret; +} + static OP * S_fold_constants(pTHX_ OP *const o) { dVAR; - OP * VOL curop; + OP *curop; OP *newop; - VOL I32 type = o->op_type; + I32 type = o->op_type; bool is_stringify; - SV * VOL sv = NULL; + SV *sv = NULL; int ret = 0; OP *old_next; SV * const oldwarnhook = PL_warnhook; @@ -4444,7 +5499,6 @@ S_fold_constants(pTHX_ OP *const o) COP not_compiling; U8 oldwarn = PL_dowarn; I32 old_cxix; - dJMPENV; PERL_ARGS_ASSERT_FOLD_CONSTANTS; @@ -4546,15 +5600,15 @@ S_fold_constants(pTHX_ OP *const o) assert(IN_PERL_RUNTIME); PL_warnhook = PERL_WARNHOOK_FATAL; PL_diehook = NULL; - JMPENV_PUSH(ret); /* Effective $^W=1. */ if ( ! (PL_dowarn & G_WARN_ALL_MASK)) PL_dowarn |= G_WARN_ON; + ret = S_fold_constants_eval(aTHX); + switch (ret) { case 0: - CALLRUNOPS(aTHX); sv = *(PL_stack_sp--); if (o->op_targ && sv == PAD_SV(o->op_targ)) { /* grab pad temp? */ pad_swipe(o->op_targ, FALSE); @@ -4572,7 +5626,6 @@ S_fold_constants(pTHX_ OP *const o) o->op_next = old_next; break; default: - JMPENV_POP; /* Don't expect 1 (setjmp failed) or 2 (something called my_exit) */ PL_warnhook = oldwarnhook; PL_diehook = olddiehook; @@ -4580,7 +5633,6 @@ S_fold_constants(pTHX_ OP *const o) * the stack - eg any nested evals */ Perl_croak(aTHX_ "panic: fold_constants JMPENV_PUSH returned %d", ret); } - JMPENV_POP; PL_dowarn = oldwarn; PL_warnhook = oldwarnhook; PL_diehook = olddiehook; @@ -5264,6 +6316,10 @@ Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last) return fold_constants(op_integerize(op_std_init((OP *)binop))); } +/* Helper function for S_pmtrans(): comparison function to sort an array + * of codepoint range pairs. Sorts by start point, or if equal, by end + * point */ + static int uvcompare(const void *a, const void *b) __attribute__nonnull__(1) __attribute__nonnull__(2) @@ -5281,24 +6337,39 @@ static int uvcompare(const void *a, const void *b) return 0; } +/* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl + * containing the search and replacement strings, assemble into + * a translation table attached as o->op_pv. + * Free expr and repl. + * It expects the toker to have already set the + * OPpTRANS_COMPLEMENT + * OPpTRANS_SQUASH + * OPpTRANS_DELETE + * flags as appropriate; this function may add + * OPpTRANS_FROM_UTF + * OPpTRANS_TO_UTF + * OPpTRANS_IDENTICAL + * OPpTRANS_GROWS + * flags + */ + static OP * S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) { SV * const tstr = ((SVOP*)expr)->op_sv; - SV * const rstr = - ((SVOP*)repl)->op_sv; + SV * const rstr = ((SVOP*)repl)->op_sv; STRLEN tlen; STRLEN rlen; const U8 *t = (U8*)SvPV_const(tstr, tlen); const U8 *r = (U8*)SvPV_const(rstr, rlen); - I32 i; - I32 j; - I32 grows = 0; - short *tbl; - - const I32 complement = o->op_private & OPpTRANS_COMPLEMENT; - const I32 squash = o->op_private & OPpTRANS_SQUASH; - I32 del = o->op_private & OPpTRANS_DELETE; + Size_t i, j; + bool grows = FALSE; + OPtrans_map *tbl; + SSize_t struct_size; /* malloced size of table struct */ + + const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT); + const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH); + const bool del = cBOOL(o->op_private & OPpTRANS_DELETE); SV* swash; PERL_ARGS_ASSERT_PMTRANS; @@ -5312,6 +6383,14 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) o->op_private |= OPpTRANS_TO_UTF; if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) { + + /* for utf8 translations, op_sv will be set to point to a swash + * containing codepoint ranges. This is done by first assembling + * a textual representation of the ranges in listsv then compiling + * it using swash_init(). For more details of the textual format, + * see L . + */ + SV* const listsv = newSVpvs("# comment\n"); SV* transv = NULL; const U8* tend = t + tlen; @@ -5353,15 +6432,24 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) * odd. */ if (complement) { + /* utf8 and /c: + * replace t/tlen/tend with a version that has the ranges + * complemented + */ U8 tmpbuf[UTF8_MAXBYTES+1]; UV *cp; UV nextmin = 0; Newx(cp, 2*tlen, UV); i = 0; transv = newSVpvs(""); + + /* convert search string into array of (start,end) range + * codepoint pairs stored in cp[]. Most "ranges" will start + * and end at the same char */ while (t < tend) { cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags); t += ulen; + /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */ if (t < tend && *t == ILLEGAL_UTF8_BYTE) { t++; cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags); @@ -5372,7 +6460,19 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } i++; } + + /* sort the ranges */ qsort(cp, i, 2*sizeof(UV), uvcompare); + + /* Create a utf8 string containing the complement of the + * codepoint ranges. For example if cp[] contains [A,B], [C,D], + * then transv will contain the equivalent of: + * join '', map chr, 0, ILLEGAL_UTF8_BYTE, A - 1, + * B + 1, ILLEGAL_UTF8_BYTE, C - 1, + * D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff; + * A range of a single char skips the ILLEGAL_UTF8_BYTE and + * end cp. + */ for (j = 0; j < i; j++) { UV val = cp[2*j]; diff = val - nextmin; @@ -5390,6 +6490,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) if (val >= nextmin) nextmin = val + 1; } + t = uvchr_to_utf8(tmpbuf,nextmin); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); { @@ -5406,6 +6507,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!rlen && !del) { r = t; rlen = tlen; rend = tend; } + if (!squash) { if ((!rlen && !del) || t == r || (tlen == rlen && memEQ((char *)t, (char *)r, tlen))) @@ -5414,6 +6516,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } } + /* extract char ranges from t and r and append them to listsv */ + while (t < tend || tfirst <= tlast) { /* see if we need more "t" chars */ if (tfirst > tlast) { @@ -5486,9 +6590,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) tfirst += diff + 1; } + /* compile listsv into a swash and attach to o */ + none = ++max; if (del) - del = ++max; + ++max; if (max > 0xffff) bits = 32; @@ -5527,50 +6633,88 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) goto warnins; } - tbl = (short*)PerlMemShared_calloc( - (o->op_private & OPpTRANS_COMPLEMENT) && - !(o->op_private & OPpTRANS_DELETE) ? 258 : 256, - sizeof(short)); + /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup + * table. Entries with the value -1 indicate chars not to be + * translated, while -2 indicates a search char without a + * corresponding replacement char under /d. + * + * Normally, the table has 256 slots. However, in the presence of + * /c, the search charlist has an implicit \x{100}-\x{7fffffff} + * added, and if there are enough replacement chars to start pairing + * with the \x{100},... search chars, then a larger (> 256) table + * is allocated. + * + * In addition, regardless of whether under /c, an extra slot at the + * end is used to store the final repeating char, or -3 under an empty + * replacement list, or -2 under /d; which makes the runtime code + * easier. + * + * The toker will have already expanded char ranges in t and r. + */ + + /* Initially allocate 257-slot table: 256 for basic (non /c) usage, + * plus final slot for repeat/-2/-3. Later we realloc if excess > * 0. + * The OPtrans_map struct already contains one slot; hence the -1. + */ + struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short); + tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1); + tbl->size = 256; cPVOPo->op_pv = (char*)tbl; + if (complement) { - for (i = 0; i < (I32)tlen; i++) - tbl[t[i]] = -1; + Size_t excess; + + /* in this branch, j is a count of 'consumed' (i.e. paired off + * with a search char) replacement chars (so j <= rlen always) + */ + for (i = 0; i < tlen; i++) + tbl->map[t[i]] = -1; + for (i = 0, j = 0; i < 256; i++) { - if (!tbl[i]) { - if (j >= (I32)rlen) { + if (!tbl->map[i]) { + if (j == rlen) { if (del) - tbl[i] = -2; + tbl->map[i] = -2; else if (rlen) - tbl[i] = r[j-1]; + tbl->map[i] = r[j-1]; else - tbl[i] = (short)i; + tbl->map[i] = (short)i; } else { - if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[i] = r[j++]; + tbl->map[i] = r[j++]; } + if ( tbl->map[i] >= 0 + && UVCHR_IS_INVARIANT((UV)i) + && !UVCHR_IS_INVARIANT((UV)(tbl->map[i])) + ) + grows = TRUE; } } - if (!del) { - if (!rlen) { - j = rlen; - if (!squash) - o->op_private |= OPpTRANS_IDENTICAL; - } - else if (j >= (I32)rlen) - j = rlen - 1; - else { - tbl = - (short *) - PerlMemShared_realloc(tbl, - (0x101+rlen-j) * sizeof(short)); - cPVOPo->op_pv = (char*)tbl; - } - tbl[0x100] = (short)(rlen - j); - for (i=0; i < (I32)rlen - j; i++) - tbl[0x101+i] = r[j+i]; - } + + ASSUME(j <= rlen); + excess = rlen - j; + + if (excess) { + /* More replacement chars than search chars: + * store excess replacement chars at end of main table. + */ + + struct_size += excess; + tbl = (OPtrans_map*)PerlMemShared_realloc(tbl, + struct_size + excess * sizeof(short)); + tbl->size += excess; + cPVOPo->op_pv = (char*)tbl; + + for (i = 0; i < excess; i++) + tbl->map[i + 256] = r[j+i]; + } + else { + /* no more replacement chars than search chars */ + if (!rlen && !del && !squash) + o->op_private |= OPpTRANS_IDENTICAL; + } + + tbl->map[tbl->size] = del ? -2 : rlen ? r[rlen - 1] : -3; } else { if (!rlen && !del) { @@ -5581,26 +6725,30 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) { o->op_private |= OPpTRANS_IDENTICAL; } + for (i = 0; i < 256; i++) - tbl[i] = -1; - for (i = 0, j = 0; i < (I32)tlen; i++,j++) { - if (j >= (I32)rlen) { + tbl->map[i] = -1; + for (i = 0, j = 0; i < tlen; i++,j++) { + if (j >= rlen) { if (del) { - if (tbl[t[i]] == -1) - tbl[t[i]] = -2; + if (tbl->map[t[i]] == -1) + tbl->map[t[i]] = -2; continue; } --j; } - if (tbl[t[i]] == -1) { + if (tbl->map[t[i]] == -1) { if ( UVCHR_IS_INVARIANT(t[i]) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[t[i]] = r[j]; + grows = TRUE; + tbl->map[t[i]] = r[j]; } } + tbl->map[tbl->size] = del ? -1 : rlen ? -1 : -3; } + /* both non-utf8 and utf8 code paths end up here */ + warnins: if(del && rlen == tlen) { Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator"); @@ -5616,6 +6764,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) return o; } + /* =for apidoc Am|OP *|newPMOP|I32 type|I32 flags @@ -5816,6 +6965,17 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) scope->op_next = NULL; /* stop on last op */ op_null(scope); } + + /* XXX optimize_optree() must be called on o before + * CALL_PEEP(), as currently S_maybe_multiconcat() can't + * currently cope with a peephole-optimised optree. + * Calling optimize_optree() here ensures that condition + * is met, but may mean optimize_optree() is applied + * to the same optree later (where hopefully it won't do any + * harm as it can't convert an op to multiconcat if it's + * already been converted */ + optimize_optree(o); + /* have to peep the DOs individually as we've removed it from * the op_next chain */ CALL_PEEP(o); @@ -6203,9 +7363,10 @@ Perl_newGVOP(pTHX_ I32 type, I32 flags, GV *gv) Constructs, checks, and returns an op of any type that involves an embedded C-level pointer (PV). C is the opcode. C gives -the eight bits of C. C supplies the C-level pointer, which -must have been allocated using C; the memory will -be freed when the op is destroyed. +the eight bits of C. C supplies the C-level pointer. +Depending on the op type, the memory referenced by C may be freed +when the op is destroyed. If the op is of a freeing type, C must +have been allocated using C. =cut */ @@ -6559,11 +7720,24 @@ S_assignment_type(pTHX_ const OP *o) if (!o) return TRUE; - if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) - o = cUNOPo->op_first; + if (o->op_type == OP_SREFGEN) + { + OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; + type = kid->op_type; + flags = o->op_flags | kid->op_flags; + if (!(flags & OPf_PARENS) + && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || + kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) + return ASSIGN_REF; + ret = ASSIGN_REF; + } else { + if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) + o = cUNOPo->op_first; + flags = o->op_flags; + type = o->op_type; + ret = 0; + } - flags = o->op_flags; - type = o->op_type; if (type == OP_COND_EXPR) { OP * const sib = OpSIBLING(cLOGOPo->op_first); const I32 t = assignment_type(sib); @@ -6576,19 +7750,6 @@ S_assignment_type(pTHX_ const OP *o) return FALSE; } - if (type == OP_SREFGEN) - { - OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; - type = kid->op_type; - flags |= kid->op_flags; - if (!(flags & OPf_PARENS) - && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || - kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) - return ASSIGN_REF; - ret = ASSIGN_REF; - } - else ret = 0; - if (type == OP_LIST && (flags & OPf_WANT) == OPf_WANT_SCALAR && o->op_private & OPpLVAL_INTRO) @@ -6609,6 +7770,33 @@ S_assignment_type(pTHX_ const OP *o) return ret; } +static OP * +S_newONCEOP(pTHX_ OP *initop, OP *padop) +{ + const PADOFFSET target = padop->op_targ; + OP *const other = newOP(OP_PADSV, + padop->op_flags + | ((padop->op_private & ~OPpLVAL_INTRO) << 8)); + OP *const first = newOP(OP_NULL, 0); + OP *const nullop = newCONDOP(0, first, initop, other); + /* XXX targlex disabled for now; see ticket #124160 + newCONDOP(0, first, S_maybe_targlex(aTHX_ initop), other); + */ + OP *const condop = first->op_next; + + OpTYPE_set(condop, OP_ONCE); + other->op_targ = target; + nullop->op_flags |= OPf_WANT_SCALAR; + + /* Store the initializedness of state vars in a separate + pad entry. */ + condop->op_targ = + pad_add_name_pvn("$",1,padadd_NO_DUP_CHECK|padadd_STATE,0,0); + /* hijacking PADSTALE for uninitialized state variables */ + SvPADSTALE_on(PAD_SVl(condop->op_targ)); + + return nullop; +} /* =for apidoc Am|OP *|newASSIGNOP|I32 flags|OP *left|I32 optype|OP *right @@ -6653,8 +7841,9 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) } if ((assign_type = assignment_type(left)) == ASSIGN_LIST) { + OP *state_var_op = NULL; static const char no_list_state[] = "Initialization of state variables" - " in list context currently forbidden"; + " in list currently forbidden"; OP *curop; if (left->op_type == OP_ASLICE || left->op_type == OP_HSLICE) @@ -6668,16 +7857,29 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) if (OP_TYPE_IS_OR_WAS(left, OP_LIST)) { - OP* lop = ((LISTOP*)left)->op_first; - while (lop) { - if ((lop->op_type == OP_PADSV || - lop->op_type == OP_PADAV || - lop->op_type == OP_PADHV || - lop->op_type == OP_PADANY) - && (lop->op_private & OPpPAD_STATE) - ) - yyerror(no_list_state); - lop = OpSIBLING(lop); + OP *lop = ((LISTOP*)left)->op_first, *vop, *eop; + if (!(left->op_flags & OPf_PARENS) && + lop->op_type == OP_PUSHMARK && + (vop = OpSIBLING(lop)) && + (vop->op_type == OP_PADAV || vop->op_type == OP_PADHV) && + !(vop->op_flags & OPf_PARENS) && + (vop->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) == + (OPpLVAL_INTRO|OPpPAD_STATE) && + (eop = OpSIBLING(vop)) && + eop->op_type == OP_ENTERSUB && + !OpHAS_SIBLING(eop)) { + state_var_op = vop; + } else { + while (lop) { + if ((lop->op_type == OP_PADSV || + lop->op_type == OP_PADAV || + lop->op_type == OP_PADHV || + lop->op_type == OP_PADANY) + && (lop->op_private & OPpPAD_STATE) + ) + yyerror(no_list_state); + lop = OpSIBLING(lop); + } } } else if ( (left->op_private & OPpLVAL_INTRO) @@ -6697,7 +7899,10 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) state (%a) = ... (state %a) = ... */ - yyerror(no_list_state); + if (left->op_flags & OPf_PARENS) + yyerror(no_list_state); + else + state_var_op = left; } /* optimise @a = split(...) into: @@ -6789,6 +7994,9 @@ Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) } } } + + if (state_var_op) + o = S_newONCEOP(aTHX_ o, state_var_op); return o; } if (assign_type == ASSIGN_REF) @@ -7068,9 +8276,8 @@ S_new_logop(pTHX_ I32 type, I32 flags, OP** firstp, OP** otherp) && o2->op_private & OPpLVAL_INTRO && !(o2->op_private & OPpPAD_STATE)) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), - "Deprecated use of my() in false conditional. " - "This will be a fatal error in Perl 5.30"); + Perl_croak(aTHX_ "This use of my() in false conditional is " + "no longer allowed"); } *otherp = NULL; @@ -7879,6 +9086,13 @@ S_looks_like_bool(pTHX_ const OP *o) case OP_FLOP: return TRUE; + + case OP_INDEX: + case OP_RINDEX: + /* optimised-away (index() != -1) or similar comparison */ + if (o->op_private & OPpTRUEBOOL) + return TRUE; + return FALSE; case OP_CONST: /* Detect comparisons that have been optimized away */ @@ -7888,7 +9102,6 @@ S_looks_like_bool(pTHX_ const OP *o) return TRUE; else return FALSE; - /* FALLTHROUGH */ default: return FALSE; @@ -7899,8 +9112,8 @@ S_looks_like_bool(pTHX_ const OP *o) =for apidoc Am|OP *|newGIVENOP|OP *cond|OP *block|PADOFFSET defsv_off Constructs, checks, and returns an op tree expressing a C block. -C supplies the expression that will be locally assigned to a lexical -variable, and C supplies the body of the C construct; they +C supplies the expression to whose value C<$_> will be locally +aliased, and C supplies the body of the C construct; they are consumed by this function and become part of the constructed op tree. C must be zero (it used to identity the pad slot of lexical $_). @@ -8205,6 +9418,8 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) PERL_ARGS_ASSERT_NEWMYSUB; + PL_hints |= HINT_BLOCK_SCOPE; + /* Find the pad slot for storing the new sub. We cannot use PL_comppad, as it is the pad owned by the new sub. We need to look in CvOUTSIDE and find the pad belonging to the enclos- @@ -8226,7 +9441,7 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) spot = (CV **)svspot; if (!(PL_parser && PL_parser->error_count)) - move_proto_attr(&proto, &attrs, (GV *)PadnameSV(name)); + move_proto_attr(&proto, &attrs, (GV *)PadnameSV(name), 0); if (proto) { assert(proto->op_type == OP_CONST); @@ -8533,6 +9748,85 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) return cv; } +/* +=for apidoc m|CV *|newATTRSUB_x|I32 floor|OP *o|OP *proto|OP *attrs|OP *block|bool o_is_gv + +Construct a Perl subroutine, also performing some surrounding jobs. + +This function is expected to be called in a Perl compilation context, +and some aspects of the subroutine are taken from global variables +associated with compilation. In particular, C represents +the subroutine that is currently being compiled. It must be non-null +when this function is called, and some aspects of the subroutine being +constructed are taken from it. The constructed subroutine may actually +be a reuse of the C object, but will not necessarily be so. + +If C is null then the subroutine will have no body, and for the +time being it will be an error to call it. This represents a forward +subroutine declaration such as S>. If C is +non-null then it provides the Perl code of the subroutine body, which +will be executed when the subroutine is called. This body includes +any argument unwrapping code resulting from a subroutine signature or +similar. The pad use of the code must correspond to the pad attached +to C. The code is not expected to include a C or +C op; this function will add such an op. C is consumed +by this function and will become part of the constructed subroutine. + +C specifies the subroutine's prototype, unless one is supplied +as an attribute (see below). If C is null, then the subroutine +will not have a prototype. If C is non-null, it must point to a +C op whose value is a string, and the subroutine will have that +string as its prototype. If a prototype is supplied as an attribute, the +attribute takes precedence over C, but in that case C should +preferably be null. In any case, C is consumed by this function. + +C supplies attributes to be applied the subroutine. A handful of +attributes take effect by built-in means, being applied to C +immediately when seen. Other attributes are collected up and attached +to the subroutine by this route. C may be null to supply no +attributes, or point to a C op for a single attribute, or point +to a C op whose children apart from the C are C +ops for one or more attributes. Each C op must be a string, +giving the attribute name optionally followed by parenthesised arguments, +in the manner in which attributes appear in Perl source. The attributes +will be applied to the sub by this function. C is consumed by +this function. + +If C is false and C is null, then the subroutine will +be anonymous. If C is false and C is non-null, then C +must point to a C op, which will be consumed by this function, +and its string value supplies a name for the subroutine. The name may +be qualified or unqualified, and if it is unqualified then a default +stash will be selected in some manner. If C is true, then C +doesn't point to an C at all, but is instead a cast pointer to a C +by which the subroutine will be named. + +If there is already a subroutine of the specified name, then the new +sub will either replace the existing one in the glob or be merged with +the existing one. A warning may be generated about redefinition. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +In the case of C, the subroutine will be executed and the reference +to it disposed of before this function returns. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by the +phase's automatic run queue. But a C subroutine, having already +been executed, will quite likely have been destroyed already by the +time this function returns, making it erroneous for the caller to make +any use of the returned pointer. It is the caller's responsibility to +ensure that it knows which of these situations applies. + +=cut +*/ /* _x = extended */ CV * @@ -8578,9 +9872,12 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, Also, we may be called from load_module at run time, so PL_curstash (which sets CvSTASH) may not point to the stash the sub is stored in. */ + /* XXX This optimization is currently disabled for packages other + than main, since there was too much CPAN breakage. */ const I32 flags = ec ? GV_NOADD_NOINIT - : PL_curstash != CopSTASH(PL_curcop) + : (IN_PERL_RUNTIME && PL_curstash != CopSTASH(PL_curcop)) + || PL_curstash != PL_defstash || memchr(name, ':', namlen) || memchr(name, '\'', namlen) ? gv_fetch_flags : GV_ADDMULTI | GV_NOINIT | GV_NOTQUAL; @@ -8603,10 +9900,10 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, if (!ec) { if (isGV(gv)) { - move_proto_attr(&proto, &attrs, gv); + move_proto_attr(&proto, &attrs, gv, 0); } else { assert(cSVOPo); - move_proto_attr(&proto, &attrs, (GV *)cSVOPo->op_sv); + move_proto_attr(&proto, &attrs, (GV *)cSVOPo->op_sv, 1); } } @@ -8635,7 +9932,7 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, PL_compcv = 0; if (name && block) { - const char *s = strrchr(name, ':'); + const char *s = (char *) my_memrchr(name, ':', namlen); s = s ? s+1 : name; if (strEQ(s, "BEGIN")) { if (PL_in_eval & EVAL_KEEPERR) @@ -8799,6 +10096,8 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, NULL, name, namlen, name_is_utf8 ? SVf_UTF8 : 0, const_sv ); + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvFLAGS(cv) |= CvMETHOD(PL_compcv); } else { @@ -8897,8 +10196,12 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, SvROK_on(gv); } SvRV_set(gv, (SV *)cv); + if (HvENAME_HEK(PL_curstash)) + mro_method_changed_in(PL_curstash); } } + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); if (!CvHASGV(cv)) { if (isGV(gv)) @@ -8987,12 +10290,15 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, process_special_blocks(floor, name, gv, cv); } } + assert(cv); done: + assert(!cv || evanescent || SvREFCNT((SV*)cv) != 0); if (PL_parser) PL_parser->copline = NOLINE; LEAVE_SCOPE(floor); + assert(!cv || evanescent || SvREFCNT((SV*)cv) != 0); if (!evanescent) { #ifdef PERL_DEBUG_READONLY_OPS if (slab) @@ -9107,9 +10413,11 @@ S_process_special_blocks(pTHX_ I32 floor, const char *const fullname, } /* -=for apidoc newCONSTSUB +=for apidoc Am|CV *|newCONSTSUB|HV *stash|const char *name|SV *sv -See L. +Behaves like L, except that C is nul-terminated +rather than of counted length, and no flags are set. (This means that +C is always interpreted as Latin-1.) =cut */ @@ -9121,20 +10429,71 @@ Perl_newCONSTSUB(pTHX_ HV *stash, const char *name, SV *sv) } /* -=for apidoc newCONSTSUB_flags - -Creates a constant sub equivalent to Perl S> which is -eligible for inlining at compile-time. - -Currently, the only useful value for C is C. - -The newly created subroutine takes ownership of a reference to the passed in -SV. - -Passing C for SV creates a constant sub equivalent to S>, -which won't be called if used as a destructor, but will suppress the overhead -of a call to C. (This form, however, isn't eligible for inlining at -compile time.) +=for apidoc Am|CV *|newCONSTSUB_flags|HV *stash|const char *name|STRLEN len|U32 flags|SV *sv + +Construct a constant subroutine, also performing some surrounding +jobs. A scalar constant-valued subroutine is eligible for inlining +at compile-time, and in Perl code can be created by S>. Other kinds of constant subroutine have other treatment. + +The subroutine will have an empty prototype and will ignore any arguments +when called. Its constant behaviour is determined by C. If C +is null, the subroutine will yield an empty list. If C points to a +scalar, the subroutine will always yield that scalar. If C points +to an array, the subroutine will always yield a list of the elements of +that array in list context, or the number of elements in the array in +scalar context. This function takes ownership of one counted reference +to the scalar or array, and will arrange for the object to live as long +as the subroutine does. If C points to a scalar then the inlining +assumes that the value of the scalar will never change, so the caller +must ensure that the scalar is not subsequently written to. If C +points to an array then no such assumption is made, so it is ostensibly +safe to mutate the array or its elements, but whether this is really +supported has not been determined. + +The subroutine will have C set according to C. +Other aspects of the subroutine will be left in their default state. +The caller is free to mutate the subroutine beyond its initial state +after this function has returned. + +If C is null then the subroutine will be anonymous, with its +C referring to an C<__ANON__> glob. If C is non-null then the +subroutine will be named accordingly, referenced by the appropriate glob. +C is a string of length C bytes giving a sigilless symbol +name, in UTF-8 if C has the C bit set and in Latin-1 +otherwise. The name may be either qualified or unqualified. If the +name is unqualified then it defaults to being in the stash specified by +C if that is non-null, or to C if C is null. +The symbol is always added to the stash if necessary, with C +semantics. + +C should not have bits set other than C. + +If there is already a subroutine of the specified name, then the new sub +will replace the existing one in the glob. A warning may be generated +about the redefinition. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +Execution of the subroutine will likely be a no-op, unless C was +a tied array or the caller modified the subroutine in some interesting +way before it was executed. In the case of C, the treatment is +buggy: the sub will be executed when only half built, and may be deleted +prematurely, possibly causing a crash. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by +the phase's automatic run queue. A C subroutine may have been +destroyed already by the time this function returns, but currently bugs +occur in that case before the caller gets control. It is the caller's +responsibility to ensure that it knows which of these situations applies. =cut */ @@ -9181,6 +10540,8 @@ Perl_newCONSTSUB_flags(pTHX_ HV *stash, const char *name, STRLEN len, : const_sv_xsub, file ? file : "", "", &sv, XS_DYNAMIC_FILENAME | flags); + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvXSUBANY(cv).any_ptr = SvREFCNT_inc_simple(sv); CvCONST_on(cv); @@ -9227,6 +10588,78 @@ Perl_newXS_deffile(pTHX_ const char *name, XSUBADDR_t subaddr) ); } +/* +=for apidoc m|CV *|newXS_len_flags|const char *name|STRLEN len|XSUBADDR_t subaddr|const char *const filename|const char *const proto|SV **const_svp|U32 flags + +Construct an XS subroutine, also performing some surrounding jobs. + +The subroutine will have the entry point C. It will have +the prototype specified by the nul-terminated string C, or +no prototype if C is null. The prototype string is copied; +the caller can mutate the supplied string afterwards. If C +is non-null, it must be a nul-terminated filename, and the subroutine +will have its C set accordingly. By default C is set to +point directly to the supplied string, which must be static. If C +has the C bit set, then a copy of the string will +be taken instead. + +Other aspects of the subroutine will be left in their default state. +If anything else needs to be done to the subroutine for it to function +correctly, it is the caller's responsibility to do that after this +function has constructed it. However, beware of the subroutine +potentially being destroyed before this function returns, as described +below. + +If C is null then the subroutine will be anonymous, with its +C referring to an C<__ANON__> glob. If C is non-null then the +subroutine will be named accordingly, referenced by the appropriate glob. +C is a string of length C bytes giving a sigilless symbol name, +in UTF-8 if C has the C bit set and in Latin-1 otherwise. +The name may be either qualified or unqualified, with the stash defaulting +in the same manner as for C. C may contain +flag bits understood by C with the same meaning as +they have there, such as C. The symbol is always added to +the stash if necessary, with C semantics. + +If there is already a subroutine of the specified name, then the new sub +will replace the existing one in the glob. A warning may be generated +about the redefinition. If the old subroutine was C then the +decision about whether to warn is influenced by an expectation about +whether the new subroutine will become a constant of similar value. +That expectation is determined by C. (Note that the call to +this function doesn't make the new subroutine C in any case; +that is left to the caller.) If C is null then it indicates +that the new subroutine will not become a constant. If C +is non-null then it indicates that the new subroutine will become a +constant, and it points to an C that provides the constant value +that the subroutine will have. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +In the case of C, the subroutine will be executed and the reference +to it disposed of before this function returns, and also before its +prototype is set. If a C subroutine would not be sufficiently +constructed by this function to be ready for execution then the caller +must prevent this happening by giving the subroutine a different name. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by the +phase's automatic run queue. But a C subroutine, having already +been executed, will quite likely have been destroyed already by the +time this function returns, making it erroneous for the caller to make +any use of the returned pointer. It is the caller's responsibility to +ensure that it knows which of these situations applies. + +=cut +*/ + CV * Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, XSUBADDR_t subaddr, const char *const filename, @@ -9235,6 +10668,7 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, { CV *cv; bool interleave = FALSE; + bool evanescent = FALSE; PERL_ARGS_ASSERT_NEWXS_LEN_FLAGS; @@ -9279,6 +10713,8 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, gv_method_changed(gv); /* newXS */ } } + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvGV_set(cv, gv); if(filename) { @@ -9306,14 +10742,17 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, #endif if (name) - process_special_blocks(0, name, gv, cv); + evanescent = process_special_blocks(0, name, gv, cv); else CvANON_on(cv); } /* <- not a conditional branch */ + assert(cv); + assert(evanescent || SvREFCNT((SV*)cv) != 0); - sv_setpv(MUTABLE_SV(cv), proto); + if (!evanescent) sv_setpv(MUTABLE_SV(cv), proto); if (interleave) LEAVE; + assert(evanescent || SvREFCNT((SV*)cv) != 0); return cv; } @@ -9471,6 +10910,8 @@ Perl_oopsHV(pTHX_ OP *o) case OP_RV2SV: case OP_RV2AV: OpTYPE_set(o, OP_RV2HV); + /* rv2hv steals the bottom bit for its own uses */ + o->op_private &= ~OPpARG1_MASK; ref(o, OP_RV2HV); break; @@ -9613,6 +11054,7 @@ Perl_ck_backtick(pTHX_ OP *o) OP *newop = NULL; OP *sibl; PERL_ARGS_ASSERT_CK_BACKTICK; + o = ck_fun(o); /* qx and `` have a null pushmark; CORE::readpipe has only one kid. */ if (o->op_flags & OPf_KIDS && (sibl = OpSIBLING(cUNOPo->op_first)) && (gv = gv_override("readpipe",8))) @@ -9638,12 +11080,6 @@ Perl_ck_bitop(pTHX_ OP *o) o->op_private = (U8)(PL_hints & HINT_INTEGER); - if (o->op_type == OP_NBIT_OR || o->op_type == OP_SBIT_OR - || o->op_type == OP_NBIT_XOR || o->op_type == OP_SBIT_XOR - || o->op_type == OP_NBIT_AND || o->op_type == OP_SBIT_AND - || o->op_type == OP_NCOMPLEMENT || o->op_type == OP_SCOMPLEMENT) - Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__BITWISE), - "The bitwise feature is experimental"); if (!(o->op_flags & OPf_STACKED) /* Not an assignment */ && OP_IS_INFIX_BIT(o->op_type)) { @@ -9679,11 +11115,27 @@ is_dollar_bracket(pTHX_ const OP * const o) && strEQ(GvNAME(cGVOPx_gv(kid)), "["); } +/* for lt, gt, le, ge, eq, ne and their i_ variants */ + OP * Perl_ck_cmp(pTHX_ OP *o) { + bool is_eq; + bool neg; + bool reverse; + bool iv0; + OP *indexop, *constop, *start; + SV *sv; + IV iv; + PERL_ARGS_ASSERT_CK_CMP; - if (ckWARN(WARN_SYNTAX)) { + + is_eq = ( o->op_type == OP_EQ + || o->op_type == OP_NE + || o->op_type == OP_I_EQ + || o->op_type == OP_I_NE); + + if (!is_eq && ckWARN(WARN_SYNTAX)) { const OP *kid = cUNOPo->op_first; if (kid && ( @@ -9698,9 +11150,87 @@ Perl_ck_cmp(pTHX_ OP *o) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "$[ used in %s (did you mean $] ?)", OP_DESC(o)); } - return o; + + /* convert (index(...) == -1) and variations into + * (r)index/BOOL(,NEG) + */ + + reverse = FALSE; + + indexop = cUNOPo->op_first; + constop = OpSIBLING(indexop); + start = NULL; + if (indexop->op_type == OP_CONST) { + constop = indexop; + indexop = OpSIBLING(constop); + start = constop; + reverse = TRUE; + } + + if (indexop->op_type != OP_INDEX && indexop->op_type != OP_RINDEX) + return o; + + /* ($lex = index(....)) == -1 */ + if (indexop->op_private & OPpTARGET_MY) + return o; + + if (constop->op_type != OP_CONST) + return o; + + sv = cSVOPx_sv(constop); + if (!(sv && SvIOK_notUV(sv))) + return o; + + iv = SvIVX(sv); + if (iv != -1 && iv != 0) + return o; + iv0 = (iv == 0); + + if (o->op_type == OP_LT || o->op_type == OP_I_LT) { + if (!(iv0 ^ reverse)) + return o; + neg = iv0; + } + else if (o->op_type == OP_LE || o->op_type == OP_I_LE) { + if (iv0 ^ reverse) + return o; + neg = !iv0; + } + else if (o->op_type == OP_GE || o->op_type == OP_I_GE) { + if (!(iv0 ^ reverse)) + return o; + neg = !iv0; + } + else if (o->op_type == OP_GT || o->op_type == OP_I_GT) { + if (iv0 ^ reverse) + return o; + neg = iv0; + } + else if (o->op_type == OP_EQ || o->op_type == OP_I_EQ) { + if (iv0) + return o; + neg = TRUE; + } + else { + assert(o->op_type == OP_NE || o->op_type == OP_I_NE); + if (iv0) + return o; + neg = FALSE; + } + + indexop->op_flags &= ~OPf_PARENS; + indexop->op_flags |= (o->op_flags & OPf_PARENS); + indexop->op_private |= OPpTRUEBOOL; + if (neg) + indexop->op_private |= OPpINDEX_BOOLNEG; + /* cut out the index op and free the eq,const ops */ + (void)op_sibling_splice(o, start, 1, NULL); + op_free(o); + + return indexop; } + OP * Perl_ck_concat(pTHX_ OP *o) { @@ -9709,9 +11239,13 @@ Perl_ck_concat(pTHX_ OP *o) PERL_ARGS_ASSERT_CK_CONCAT; PERL_UNUSED_CONTEXT; + /* reuse the padtmp returned by the concat child */ if (kid->op_type == OP_CONCAT && !(kid->op_private & OPpTARGET_MY) && !(kUNOP->op_first->op_flags & OPf_MOD)) + { o->op_flags |= OPf_STACKED; + o->op_private |= OPpCONCAT_NESTED; + } return o; } @@ -9813,6 +11347,7 @@ Perl_ck_eof(pTHX_ OP *o) return o; } + OP * Perl_ck_eval(pTHX_ OP *o) { @@ -9925,6 +11460,10 @@ Perl_ck_rvconst(pTHX_ OP *o) PERL_ARGS_ASSERT_CK_RVCONST; + if (o->op_type == OP_RV2HV) + /* rv2hv steals the bottom bit for its own uses */ + o->op_private &= ~OPpARG1_MASK; + o->op_private |= (PL_hints & HINT_STRICT_REFS); if (kid->op_type == OP_CONST) { @@ -10630,35 +12169,13 @@ Perl_ck_sassign(pTHX_ OP *o) ) && (kkid->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) == (OPpLVAL_INTRO|OPpPAD_STATE)) { - const PADOFFSET target = kkid->op_targ; - OP *const other = newOP(OP_PADSV, - kkid->op_flags - | ((kkid->op_private & ~OPpLVAL_INTRO) << 8)); - OP *const first = newOP(OP_NULL, 0); - OP *const nullop = - newCONDOP(0, first, o, other); - /* XXX targlex disabled for now; see ticket #124160 - newCONDOP(0, first, S_maybe_targlex(aTHX_ o), other); - */ - OP *const condop = first->op_next; - - OpTYPE_set(condop, OP_ONCE); - other->op_targ = target; - nullop->op_flags |= OPf_WANT_SCALAR; - - /* Store the initializedness of state vars in a separate - pad entry. */ - condop->op_targ = - pad_add_name_pvn("$",1,padadd_NO_DUP_CHECK|padadd_STATE,0,0); - /* hijacking PADSTALE for uninitialized state variables */ - SvPADSTALE_on(PAD_SVl(condop->op_targ)); - - return nullop; + return S_newONCEOP(aTHX_ o, kkid); } } return S_maybe_targlex(aTHX_ o); } + OP * Perl_ck_match(pTHX_ OP *o) { @@ -10685,7 +12202,9 @@ Perl_ck_method(pTHX_ OP *o) sv = kSVOP->op_sv; /* replace ' with :: */ - while ((compatptr = strchr(SvPVX(sv), '\''))) { + while ((compatptr = (char *) memchr(SvPVX(sv), '\'', + SvEND(sv) - SvPVX(sv) ))) + { *compatptr = ':'; sv_insert(sv, compatptr - SvPVX_const(sv), 0, ":", 1); } @@ -10706,13 +12225,13 @@ Perl_ck_method(pTHX_ OP *o) return newMETHOP_named(OP_METHOD_NAMED, 0, methsv); } - if (nsplit == 7 && memEQ(method, "SUPER::", nsplit)) { /* $proto->SUPER::method() */ + if (memEQs(method, nsplit, "SUPER::")) { /* $proto->SUPER::method() */ op_free(o); return newMETHOP_named(OP_METHOD_SUPER, 0, methsv); } /* $proto->MyClass::method() and $proto->MyClass::SUPER::method() */ - if (nsplit >= 9 && strnEQ(method+nsplit-9, "::SUPER::", 9)) { + if (nsplit >= 9 && strBEGINs(method+nsplit-9, "::SUPER::")) { rclass = newSVpvn_share(method, utf8*(nsplit-9), 0); new_op = newMETHOP_named(OP_METHOD_REDIR_SUPER, 0, methsv); } else { @@ -11061,10 +12580,10 @@ Perl_ck_sort(pTHX_ OP *o) SV ** const svp = hv_fetchs(hinthv, "sort", FALSE); if (svp) { const I32 sorthints = (I32)SvIV(*svp); - if ((sorthints & HINT_SORT_QUICKSORT) != 0) - o->op_private |= OPpSORT_QSORT; if ((sorthints & HINT_SORT_STABLE) != 0) o->op_private |= OPpSORT_STABLE; + if ((sorthints & HINT_SORT_UNSTABLE) != 0) + o->op_private |= OPpSORT_UNSTABLE; } } @@ -11492,11 +13011,18 @@ Perl_rv2cv_op_cv(pTHX_ OP *cvop, U32 flags) } if (SvTYPE((SV*)cv) != SVt_PVCV) return NULL; - if (flags & (RV2CVOPCV_RETURN_NAME_GV|RV2CVOPCV_MAYBE_NAME_GV)) { - if ((!CvANON(cv) || !gv) && !CvLEXICAL(cv) - && ((flags & RV2CVOPCV_RETURN_NAME_GV) || !CvNAMED(cv))) + if (flags & RV2CVOPCV_RETURN_NAME_GV) { + if ((!CvANON(cv) && !CvLEXICAL(cv)) || !gv) + gv = CvGV(cv); + return (CV*)gv; + } + else if (flags & RV2CVOPCV_MAYBE_NAME_GV) { + if (CvLEXICAL(cv) || CvNAMED(cv)) + return NULL; + if (!CvANON(cv) || !gv) gv = CvGV(cv); return (CV*)gv; + } else { return cv; } @@ -11670,7 +13196,7 @@ Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv) switch (*proto++) { case '[': if (contextclass++ == 0) { - e = strchr(proto, ']'); + e = (char *) memchr(proto, ']', proto_end - proto); if (!e || e == proto) goto oops; } @@ -11828,7 +13354,8 @@ Perl_ck_entersub_args_proto_or_list(pTHX_ OP *entersubop, OP * Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) { - int opnum = SvTYPE(protosv) == SVt_PVCV ? 0 : (int)SvUV(protosv); + IV cvflags = SvIVX(protosv); + int opnum = cvflags & 0xffff; OP *aop = cUNOPx(entersubop)->op_first; PERL_ARGS_ASSERT_CK_ENTERSUB_ARGS_CORE; @@ -11839,11 +13366,14 @@ Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) aop = cUNOPx(aop)->op_first; aop = OpSIBLING(aop); for (cvop = aop; OpSIBLING(cvop); cvop = OpSIBLING(cvop)) ; - if (aop != cvop) - (void)too_many_arguments_pv(entersubop, GvNAME(namegv), 0); + if (aop != cvop) { + SV *namesv = cv_name((CV *)namegv, NULL, CV_NAME_NOTQUAL); + yyerror_pv(Perl_form(aTHX_ "Too many arguments for %" SVf, + SVfARG(namesv)), SvUTF8(namesv)); + } op_free(entersubop); - switch(GvNAME(namegv)[2]) { + switch(cvflags >> 16) { case 'F': return newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)); case 'L': return newSVOP( @@ -11896,8 +13426,7 @@ Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) op_sibling_splice(parent, first, -1, NULL); op_free(entersubop); - if (opnum == OP_ENTEREVAL - && GvNAMELEN(namegv)==9 && strnEQ(GvNAME(namegv), "evalbytes", 9)) + if (cvflags == (OP_ENTEREVAL | (1<<16))) flags |= OPpEVAL_BYTES <<8; switch (PL_opargs[opnum] & OA_CLASS_MASK) { @@ -11907,7 +13436,9 @@ Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) return aop ? newUNOP(opnum,flags,aop) : newOP(opnum,flags); case OA_BASEOP: if (aop) { - (void)too_many_arguments_pv(aop, GvNAME(namegv), 0); + SV *namesv = cv_name((CV *)namegv, NULL, CV_NAME_NOTQUAL); + yyerror_pv(Perl_form(aTHX_ "Too many arguments for %" SVf, + SVfARG(namesv)), SvUTF8(namesv)); op_free(aop); } return opnum == OP_RUNCV @@ -11922,70 +13453,101 @@ Perl_ck_entersub_args_core(pTHX_ OP *entersubop, GV *namegv, SV *protosv) } /* -=for apidoc Am|void|cv_get_call_checker|CV *cv|Perl_call_checker *ckfun_p|SV **ckobj_p +=for apidoc Am|void|cv_get_call_checker_flags|CV *cv|U32 gflags|Perl_call_checker *ckfun_p|SV **ckobj_p|U32 *ckflags_p Retrieves the function that will be used to fix up a call to C. Specifically, the function is applied to an C op tree for a subroutine call, not marked with C<&>, where the callee can be identified at compile time as C. -The C-level function pointer is returned in C<*ckfun_p>, and an SV -argument for it is returned in C<*ckobj_p>. The function is intended -to be called in this manner: +The C-level function pointer is returned in C<*ckfun_p>, an SV argument +for it is returned in C<*ckobj_p>, and control flags are returned in +C<*ckflags_p>. The function is intended to be called in this manner: entersubop = (*ckfun_p)(aTHX_ entersubop, namegv, (*ckobj_p)); In this call, C is a pointer to the C op, -which may be replaced by the check function, and C is a GV -supplying the name that should be used by the check function to refer +which may be replaced by the check function, and C supplies +the name that should be used by the check function to refer to the callee of the C op if it needs to emit any diagnostics. It is permitted to apply the check function in non-standard situations, such as to a call to a different subroutine or to a method call. -By default, the function is +C may not actually be a GV. If the C +bit is clear in C<*ckflags_p>, it is permitted to pass a CV or other SV +instead, anything that can be used as the first argument to L. +If the C bit is set in C<*ckflags_p> then the +check function requires C to be a genuine GV. + +By default, the check function is L, -and the SV parameter is C itself. This implements standard -prototype processing. It can be changed, for a particular subroutine, -by L. +the SV parameter is C itself, and the C +flag is clear. This implements standard prototype processing. It can +be changed, for a particular subroutine, by L. + +If the C bit is set in C then it +indicates that the caller only knows about the genuine GV version of +C, and accordingly the corresponding bit will always be set in +C<*ckflags_p>, regardless of the check function's recorded requirements. +If the C bit is clear in C then it +indicates the caller knows about the possibility of passing something +other than a GV as C, and accordingly the corresponding bit may +be either set or clear in C<*ckflags_p>, indicating the check function's +recorded requirements. + +C is a bitset passed into C, in which +only the C bit currently has a defined meaning +(for which see above). All other bits should be clear. + +=for apidoc Am|void|cv_get_call_checker|CV *cv|Perl_call_checker *ckfun_p|SV **ckobj_p + +The original form of L, which does not return +checker flags. When using a checker function returned by this function, +it is only safe to call it with a genuine GV as its C argument. =cut */ -static void -S_cv_get_call_checker(CV *cv, Perl_call_checker *ckfun_p, SV **ckobj_p, - U8 *flagsp) +void +Perl_cv_get_call_checker_flags(pTHX_ CV *cv, U32 gflags, + Perl_call_checker *ckfun_p, SV **ckobj_p, U32 *ckflags_p) { MAGIC *callmg; + PERL_ARGS_ASSERT_CV_GET_CALL_CHECKER_FLAGS; + PERL_UNUSED_CONTEXT; callmg = SvMAGICAL((SV*)cv) ? mg_find((SV*)cv, PERL_MAGIC_checkcall) : NULL; if (callmg) { *ckfun_p = DPTR2FPTR(Perl_call_checker, callmg->mg_ptr); *ckobj_p = callmg->mg_obj; - if (flagsp) *flagsp = callmg->mg_flags; + *ckflags_p = (callmg->mg_flags | gflags) & MGf_REQUIRE_GV; } else { *ckfun_p = Perl_ck_entersub_args_proto_or_list; *ckobj_p = (SV*)cv; - if (flagsp) *flagsp = 0; + *ckflags_p = gflags & MGf_REQUIRE_GV; } } void Perl_cv_get_call_checker(pTHX_ CV *cv, Perl_call_checker *ckfun_p, SV **ckobj_p) { + U32 ckflags; PERL_ARGS_ASSERT_CV_GET_CALL_CHECKER; PERL_UNUSED_CONTEXT; - S_cv_get_call_checker(cv, ckfun_p, ckobj_p, NULL); + cv_get_call_checker_flags(cv, CALL_CHECKER_REQUIRE_GV, ckfun_p, ckobj_p, + &ckflags); } /* -=for apidoc Am|void|cv_set_call_checker_flags|CV *cv|Perl_call_checker ckfun|SV *ckobj|U32 flags +=for apidoc Am|void|cv_set_call_checker_flags|CV *cv|Perl_call_checker ckfun|SV *ckobj|U32 ckflags Sets the function that will be used to fix up a call to C. Specifically, the function is applied to an C op tree for a subroutine call, not marked with C<&>, where the callee can be identified at compile time as C. -The C-level function pointer is supplied in C, and an SV argument -for it is supplied in C. The function should be defined like this: +The C-level function pointer is supplied in C, an SV argument for +it is supplied in C, and control flags are supplied in C. +The function should be defined like this: STATIC OP * ckfun(pTHX_ OP *op, GV *namegv, SV *ckobj) @@ -12003,15 +13565,21 @@ such as to a call to a different subroutine or to a method call. C may not actually be a GV. For efficiency, perl may pass a CV or other SV instead. Whatever is passed can be used as the first argument to L. You can force perl to pass a GV by including -C in the C. +C in the C. + +C is a bitset, in which only the C +bit currently has a defined meaning (for which see above). All other +bits should be clear. The current setting for a particular CV can be retrieved by -L. +L. =for apidoc Am|void|cv_set_call_checker|CV *cv|Perl_call_checker ckfun|SV *ckobj The original form of L, which passes it the -C flag for backward-compatibility. +C flag for backward-compatibility. The effect +of that flag setting is that the check function is guaranteed to get a +genuine GV as its C argument. =cut */ @@ -12025,7 +13593,7 @@ Perl_cv_set_call_checker(pTHX_ CV *cv, Perl_call_checker ckfun, SV *ckobj) void Perl_cv_set_call_checker_flags(pTHX_ CV *cv, Perl_call_checker ckfun, - SV *ckobj, U32 flags) + SV *ckobj, U32 ckflags) { PERL_ARGS_ASSERT_CV_SET_CALL_CHECKER_FLAGS; if (ckfun == Perl_ck_entersub_args_proto_or_list && ckobj == (SV*)cv) { @@ -12047,7 +13615,7 @@ Perl_cv_set_call_checker_flags(pTHX_ CV *cv, Perl_call_checker ckfun, callmg->mg_flags |= MGf_REFCOUNTED; } callmg->mg_flags = (callmg->mg_flags &~ MGf_REQUIRE_GV) - | (U8)(flags & MGf_REQUIRE_GV) | MGf_COPY; + | (U8)(ckflags & MGf_REQUIRE_GV) | MGf_COPY; } } @@ -12128,8 +13696,8 @@ Perl_ck_subr(pTHX_ OP *o) } else { Perl_call_checker ckfun; SV *ckobj; - U8 flags; - S_cv_get_call_checker(cv, &ckfun, &ckobj, &flags); + U32 ckflags; + cv_get_call_checker_flags(cv, 0, &ckfun, &ckobj, &ckflags); if (CvISXSUB(cv) || !CvROOT(cv)) S_entersub_alloc_targ(aTHX_ o); if (!namegv) { @@ -12139,7 +13707,7 @@ Perl_ck_subr(pTHX_ OP *o) the CV’s GV, unless this is an anonymous sub. This is not ideal for lexical subs, as its stringification will include the package. But it is the best we can do. */ - if (flags & MGf_REQUIRE_GV) { + if (ckflags & CALL_CHECKER_REQUIRE_GV) { if (!CvANON(cv) && (!CvNAMED(cv) || CvNAME_HEK(cv))) namegv = CvGV(cv); } @@ -12210,7 +13778,10 @@ Perl_ck_substr(pTHX_ OP *o) if (kid->op_type == OP_NULL) kid = OpSIBLING(kid); if (kid) - kid->op_flags |= OPf_MOD; + /* Historically, substr(delete $foo{bar},...) has been allowed + with 4-arg substr. Keep it working by applying entersub + lvalue context. */ + op_lvalue(kid, OP_ENTERSUB); } return o; @@ -13151,7 +14722,7 @@ S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints) /* at this point we're looking for an OP_AELEM, OP_HELEM, * OP_EXISTS or OP_DELETE */ - /* if something like arybase (a.k.a $[ ) is in scope, + /* if a custom array/hash access checker is in scope, * abandon optimisation attempt */ if ( (o->op_type == OP_AELEM || o->op_type == OP_HELEM) && PL_check[o->op_type] != Perl_ck_null) @@ -13542,9 +15113,16 @@ static void S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag) { OP *lop; + U8 flag = 0; assert((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR); + /* OPpTARGET_MY and boolean context probably don't mix well. + * If someone finds a valid use case, maybe add an extra flag to this + * function which indicates its safe to do so for this op? */ + assert(!( (PL_opargs[o->op_type] & OA_TARGLEX) + && (o->op_private & OPpTARGET_MY))); + lop = o->op_next; while (lop) { @@ -13569,7 +15147,7 @@ S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag) case OP_XOR: case OP_COND_EXPR: case OP_GREPWHILE: - o->op_private |= bool_flag; + flag = bool_flag; lop = NULL; break; @@ -13581,7 +15159,7 @@ S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag) */ case OP_AND: if (safe_and) { - o->op_private |= bool_flag; + flag = bool_flag; lop = NULL; break; } @@ -13589,12 +15167,12 @@ S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag) case OP_OR: case OP_DOR: if ((lop->op_flags & OPf_WANT) == OPf_WANT_VOID) { - o->op_private |= bool_flag; + flag = bool_flag; lop = NULL; } else if (!(lop->op_flags & OPf_WANT)) { /* unknown context - decide at runtime */ - o->op_private |= maybe_flag; + flag = maybe_flag; lop = NULL; } break; @@ -13607,6 +15185,8 @@ S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag) if (lop) lop = lop->op_next; } + + o->op_private |= flag; } @@ -14338,15 +15918,54 @@ Perl_rpeep(pTHX_ OP *o) break; } + case OP_RV2AV: + if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) + S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); + break; + case OP_RV2HV: case OP_PADHV: + /*'keys %h' in void or scalar context: skip the OP_KEYS + * and perform the functionality directly in the RV2HV/PADHV + * op + */ + if (o->op_flags & OPf_REF) { + OP *k = o->op_next; + U8 want = (k->op_flags & OPf_WANT); + if ( k + && k->op_type == OP_KEYS + && ( want == OPf_WANT_VOID + || want == OPf_WANT_SCALAR) + && !(k->op_private & OPpMAYBE_LVSUB) + && !(k->op_flags & OPf_MOD) + ) { + o->op_next = k->op_next; + o->op_flags &= ~(OPf_REF|OPf_WANT); + o->op_flags |= want; + o->op_private |= (o->op_type == OP_PADHV ? + OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS); + /* for keys(%lex), hold onto the OP_KEYS's targ + * since padhv doesn't have its own targ to return + * an int with */ + if (!(o->op_type ==OP_PADHV && want == OPf_WANT_SCALAR)) + op_null(k); + } + } + /* see if %h is used in boolean context */ if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL); + + if (o->op_type != OP_PADHV) break; /* FALLTHROUGH */ case OP_PADAV: + if ( o->op_type == OP_PADAV + && (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR + ) + S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); + /* FALLTHROUGH */ case OP_PADSV: /* Skip over state($x) in void context. */ if (oldop && o->op_private == (OPpPAD_STATE|OPpLVAL_INTRO) @@ -14467,9 +16086,12 @@ Perl_rpeep(pTHX_ OP *o) o->op_opt = 1; break; + case OP_GREPWHILE: + if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) + S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); + /* FALLTHROUGH */ case OP_COND_EXPR: case OP_MAPWHILE: - case OP_GREPWHILE: case OP_ANDASSIGN: case OP_ORASSIGN: case OP_DORASSIGN: @@ -14501,6 +16123,8 @@ Perl_rpeep(pTHX_ OP *o) break; case OP_SUBST: + if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) + S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); assert(!(cPMOP->op_pmflags & PMf_ONCE)); while (cPMOP->op_pmstashstartu.op_pmreplstart && cPMOP->op_pmstashstartu.op_pmreplstart->op_type == OP_NULL) @@ -14834,6 +16458,8 @@ Perl_rpeep(pTHX_ OP *o) o->op_private &= ~(OPpASSIGN_COMMON_SCALAR|OPpASSIGN_COMMON_RC1); + if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) + S_check_for_bool_cxt(o, 1, OPpASSIGN_TRUEBOOL, 0); break; } @@ -14843,6 +16469,21 @@ Perl_rpeep(pTHX_ OP *o) S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL); break; + case OP_LENGTH: + /* see if the op is used in known boolean context, + * but not if OA_TARGLEX optimisation is enabled */ + if ( (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR + && !(o->op_private & OPpTARGET_MY) + ) + S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); + break; + + case OP_POS: + /* see if the op is used in known boolean context */ + if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) + S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); + break; + case OP_CUSTOM: { Perl_cpeep_t cpeep = XopENTRYCUSTOM(o, xop_peep); @@ -15249,21 +16890,9 @@ C specifies which type of op is to be affected. C is a pointer to the C function that is to be added to that opcode's check chain, and C points to the storage location where a pointer to the next function in the chain will be stored. The value of -C is written into the L array, while the value +C is written into the L array, while the value previously stored there is written to C<*old_checker_p>. -The function should be defined like this: - - static OP *new_checker(pTHX_ OP *op) { ... } - -It is intended to be called in this manner: - - new_checker(aTHX_ op) - -C should be defined like this: - - static Perl_check_t old_checker_p; - L is global to an entire process, and a module wishing to hook op checking may find itself invoked more than once per process, typically in different threads. To handle that situation, this function @@ -15285,9 +16914,22 @@ decides not to do anything special with an op that it is given (which is the usual case for most uses of op check hooking), it must chain the check function referenced by C<*old_checker_p>. +Taken all together, XS code to hook an op checker should typically look +something like this: + + static Perl_check_t nxck_frob; + static OP *myck_frob(pTHX_ OP *op) { + ... + op = nxck_frob(aTHX_ op); + ... + return op; + } + BOOT: + wrap_op_checker(OP_FROB, myck_frob, &nxck_frob); + If you want to influence compilation of calls to a specific subroutine, -then use L rather than hooking checking of all -C ops. +then use L rather than hooking checking of +all C ops. =cut */