X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/572d598f754af334bea81530a84b95b5605fe3c4..700dd4f8ecb172f1dd4eff765271d599cfa3fe02:/pp_hot.c diff --git a/pp_hot.c b/pp_hot.c index 5a920d4..ca6b195 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -74,6 +74,7 @@ PP(pp_null) return NORMAL; } +/* This is sometimes called directly by pp_coreargs. */ PP(pp_pushmark) { dVAR; @@ -134,7 +135,7 @@ PP(pp_sassign) context. */ if (!got_coderef && !is_gv && GIMME_V == G_VOID) { /* Is the target symbol table currently empty? */ - GV * const gv = gv_fetchsv(right, GV_NOINIT, SVt_PVGV); + GV * const gv = gv_fetchsv_nomg(right, GV_NOINIT, SVt_PVGV); if (SvTYPE(gv) != SVt_PVGV && !SvOK(gv)) { /* Good. Create a new proxy constant subroutine in the target. The gv becomes a(nother) reference to the constant. */ @@ -152,7 +153,7 @@ PP(pp_sassign) /* Need to fix things up. */ if (!is_gv) { /* Need to fix GV. */ - right = MUTABLE_SV(gv_fetchsv(right, GV_ADD, SVt_PVGV)); + right = MUTABLE_SV(gv_fetchsv_nomg(right,GV_ADD, SVt_PVGV)); } if (!got_coderef) { @@ -197,6 +198,13 @@ PP(pp_sassign) } } + if ( + SvTEMP(right) && !SvSMAGICAL(right) && SvREFCNT(right) == 1 && + (!isGV_with_GP(right) || SvFAKE(right)) && ckWARN(WARN_MISC) + ) + Perl_warner(aTHX_ + packWARN(WARN_MISC), "Useless assignment to a temporary" + ); SvSetMagicSV(right, left); SETs(right); RETURN; @@ -304,7 +312,7 @@ PP(pp_padsv) SAVECLEARSV(PAD_SVl(PL_op->op_targ)); if (PL_op->op_private & OPpDEREF) { PUTBACK; - vivify_ref(PAD_SVl(PL_op->op_targ), PL_op->op_private & OPpDEREF); + TOPs = vivify_ref(TOPs, PL_op->op_private & OPpDEREF); SPAGAIN; } } @@ -314,9 +322,13 @@ PP(pp_padsv) PP(pp_readline) { dVAR; - dSP; SvGETMAGIC(TOPs); - tryAMAGICunTARGET(iter_amg, 0, 0); - PL_last_in_gv = MUTABLE_GV(*PL_stack_sp--); + dSP; + if (TOPs) { + SvGETMAGIC(TOPs); + tryAMAGICunTARGET(iter_amg, 0, 0); + PL_last_in_gv = MUTABLE_GV(*PL_stack_sp--); + } + else PL_last_in_gv = PL_argvgv, PL_stack_sp--; if (!isGV_with_GP(PL_last_in_gv)) { if (SvROK(PL_last_in_gv) && isGV_with_GP(SvRV(PL_last_in_gv))) PL_last_in_gv = MUTABLE_GV(SvRV(PL_last_in_gv)); @@ -334,75 +346,17 @@ PP(pp_readline) PP(pp_eq) { dVAR; dSP; + SV *left, *right; + tryAMAGICbin_MG(eq_amg, AMGf_set|AMGf_numeric); -#ifndef NV_PRESERVES_UV - if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { - SP--; - SETs(boolSV(SvRV(TOPs) == SvRV(TOPp1s))); - RETURN; - } -#endif -#ifdef PERL_PRESERVE_IVUV - SvIV_please_nomg(TOPs); - if (SvIOK(TOPs)) { - /* Unless the left argument is integer in range we are going - to have to use NV maths. Hence only attempt to coerce the - right argument if we know the left is integer. */ - SvIV_please_nomg(TOPm1s); - if (SvIOK(TOPm1s)) { - const bool auvok = SvUOK(TOPm1s); - const bool buvok = SvUOK(TOPs); - - if (auvok == buvok) { /* ## IV == IV or UV == UV ## */ - /* Casting IV to UV before comparison isn't going to matter - on 2s complement. On 1s complement or sign&magnitude - (if we have any of them) it could to make negative zero - differ from normal zero. As I understand it. (Need to - check - is negative zero implementation defined behaviour - anyway?). NWC */ - const UV buv = SvUVX(POPs); - const UV auv = SvUVX(TOPs); - - SETs(boolSV(auv == buv)); - RETURN; - } - { /* ## Mixed IV,UV ## */ - SV *ivp, *uvp; - IV iv; - - /* == is commutative so doesn't matter which is left or right */ - if (auvok) { - /* top of stack (b) is the iv */ - ivp = *SP; - uvp = *--SP; - } else { - uvp = *SP; - ivp = *--SP; - } - iv = SvIVX(ivp); - if (iv < 0) - /* As uv is a UV, it's >0, so it cannot be == */ - SETs(&PL_sv_no); - else - /* we know iv is >= 0 */ - SETs(boolSV((UV)iv == SvUVX(uvp))); - RETURN; - } - } - } -#endif - { -#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - dPOPTOPnnrl_nomg; - if (Perl_isnan(left) || Perl_isnan(right)) - RETSETNO; - SETs(boolSV(left == right)); -#else - dPOPnv_nomg; - SETs(boolSV(SvNV_nomg(TOPs) == value)); -#endif - RETURN; - } + right = POPs; + left = TOPs; + SETs(boolSV( + (SvIOK_notUV(left) && SvIOK_notUV(right)) + ? (SvIVX(left) == SvIVX(right)) + : ( do_ncmp(left, right) == 0) + )); + RETURN; } PP(pp_preinc) @@ -664,8 +618,8 @@ PP(pp_add) PP(pp_aelemfast) { dVAR; dSP; - AV * const av = PL_op->op_flags & OPf_SPECIAL - ? MUTABLE_AV(PAD_SV(PL_op->op_targ)) : GvAV(cGVOP_gv); + AV * const av = PL_op->op_type == OP_AELEMFAST_LEX + ? MUTABLE_AV(PAD_SV(PL_op->op_targ)) : GvAVn(cGVOP_gv); const U32 lval = PL_op->op_flags & OPf_MOD; SV** const svp = av_fetch(av, PL_op->op_private, lval); SV *sv = (svp ? *svp : &PL_sv_undef); @@ -810,8 +764,7 @@ PP(pp_rv2av) const bool is_pp_rv2av = PL_op->op_type == OP_RV2AV; const svtype type = is_pp_rv2av ? SVt_PVAV : SVt_PVHV; - if (!(PL_op->op_private & OPpDEREFed)) - SvGETMAGIC(sv); + SvGETMAGIC(sv); if (SvROK(sv)) { if (SvAMAGIC(sv)) { sv = amagic_deref_call(sv, is_pp_rv2av ? to_av_amg : to_hv_amg); @@ -824,11 +777,14 @@ PP(pp_rv2av) SETs(sv); RETURN; } - else if (LVRET) { + else if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { if (gimme != G_ARRAY) goto croak_cant_return; SETs(sv); RETURN; + } } else if (PL_op->op_flags & OPf_MOD && PL_op->op_private & OPpLVAL_INTRO) @@ -866,11 +822,14 @@ PP(pp_rv2av) SETs(sv); RETURN; } - else if (LVRET) { + else if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { if (gimme != G_ARRAY) goto croak_cant_return; SETs(sv); RETURN; + } } } } @@ -989,8 +948,19 @@ PP(pp_aassign) /* If there's a common identifier on both sides we have to take * special care that assigning the identifier on the left doesn't * clobber a value on the right that's used later in the list. + * Don't bother if LHS is just an empty hash or array. */ - if (PL_op->op_private & (OPpASSIGN_COMMON)) { + + if ( (PL_op->op_private & OPpASSIGN_COMMON) + && ( + firstlelem != lastlelem + || ! ((sv = *firstlelem)) + || SvMAGICAL(sv) + || ! (SvTYPE(sv) == SVt_PVAV || SvTYPE(sv) == SVt_PVHV) + || (SvTYPE(sv) == SVt_PVAV && AvFILL((AV*)sv) != -1) + || (SvTYPE(sv) == SVt_PVHV && HvUSEDKEYS((HV*)sv) != 0) + ) + ) { EXTEND_MORTAL(lastrelem - firstrelem + 1); for (relem = firstrelem; relem <= lastrelem; relem++) { if ((sv = *relem)) { @@ -1094,6 +1064,14 @@ PP(pp_aassign) break; } if (relem <= lastrelem) { + if ( + SvTEMP(sv) && !SvSMAGICAL(sv) && SvREFCNT(sv) == 1 && + (!isGV_with_GP(sv) || SvFAKE(sv)) && ckWARN(WARN_MISC) + ) + Perl_warner(aTHX_ + packWARN(WARN_MISC), + "Useless assignment to a temporary" + ); sv_setsv(sv, *relem); *(relem++) = sv; } @@ -1235,8 +1213,10 @@ PP(pp_qr) (void)sv_bless(rv, stash); } - if (RX_EXTFLAGS(rx) & RXf_TAINTED) + if (RX_EXTFLAGS(rx) & RXf_TAINTED) { SvTAINTED_on(rv); + SvTAINTED_on(SvRV(rv)); + } XPUSHs(rv); RETURN; } @@ -1345,7 +1325,7 @@ PP(pp_match) if (SvSCREAM(TARG)) r_flags |= REXEC_SCREAM; -play_it_again: + play_it_again: if (global && RX_OFFS(rx)[0].start != -1) { t = s = RX_OFFS(rx)[0].end + truebase - RX_GOFS(rx); if ((s + RX_MINLEN(rx)) > strend || s < truebase) @@ -1370,22 +1350,18 @@ play_it_again: && !SvROK(TARG)) /* Cannot trust since INTUIT cannot guess ^ */ goto yup; } - if (CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase, - minmatch, TARG, NUM2PTR(void*, gpos), r_flags)) - { - PL_curpm = pm; - if (dynpm->op_pmflags & PMf_ONCE) { + if (!CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase, + minmatch, TARG, NUM2PTR(void*, gpos), r_flags)) + goto ret_no; + + PL_curpm = pm; + if (dynpm->op_pmflags & PMf_ONCE) { #ifdef USE_ITHREADS - SvREADONLY_on(PL_regex_pad[dynpm->op_pmoffset]); + SvREADONLY_on(PL_regex_pad[dynpm->op_pmoffset]); #else - dynpm->op_pmflags |= PMf_USED; + dynpm->op_pmflags |= PMf_USED; #endif - } - goto gotcha; } - else - goto ret_no; - /*NOTREACHED*/ gotcha: if (rxtainted) @@ -1763,31 +1739,6 @@ Perl_do_readline(pTHX) } } -PP(pp_enter) -{ - dVAR; dSP; - register PERL_CONTEXT *cx; - I32 gimme = OP_GIMME(PL_op, -1); - - if (gimme == -1) { - if (cxstack_ix >= 0) { - /* If this flag is set, we're just inside a return, so we should - * store the caller's context */ - gimme = (PL_op->op_flags & OPf_SPECIAL) - ? block_gimme() - : cxstack[cxstack_ix].blk_gimme; - } else - gimme = G_SCALAR; - } - - ENTER_with_name("block"); - - SAVETMPS; - PUSHBLOCK(cx, CXt_BLOCK, SP); - - RETURN; -} - PP(pp_helem) { dVAR; dSP; @@ -1845,8 +1796,10 @@ PP(pp_helem) else SAVEHDELETE(hv, keysv); } - else if (PL_op->op_private & OPpDEREF) - vivify_ref(*svp, PL_op->op_private & OPpDEREF); + else if (PL_op->op_private & OPpDEREF) { + PUSHs(vivify_ref(*svp, PL_op->op_private & OPpDEREF)); + RETURN; + } } sv = (svp ? *svp : &PL_sv_undef); /* Originally this did a conditional C; this @@ -1867,57 +1820,6 @@ PP(pp_helem) RETURN; } -PP(pp_leave) -{ - dVAR; dSP; - register PERL_CONTEXT *cx; - SV **newsp; - PMOP *newpm; - I32 gimme; - - if (PL_op->op_flags & OPf_SPECIAL) { - cx = &cxstack[cxstack_ix]; - cx->blk_oldpm = PL_curpm; /* fake block should preserve $1 et al */ - } - - POPBLOCK(cx,newpm); - - gimme = OP_GIMME(PL_op, (cxstack_ix >= 0) ? gimme : G_SCALAR); - - TAINT_NOT; - if (gimme == G_VOID) - SP = newsp; - else if (gimme == G_SCALAR) { - register SV **mark; - MARK = newsp + 1; - if (MARK <= SP) { - if (SvFLAGS(TOPs) & (SVs_PADTMP|SVs_TEMP)) - *MARK = TOPs; - else - *MARK = sv_mortalcopy(TOPs); - } else { - MEXTEND(mark,0); - *MARK = &PL_sv_undef; - } - SP = MARK; - } - else if (gimme == G_ARRAY) { - /* in case LEAVE wipes old return values */ - register SV **mark; - for (mark = newsp + 1; mark <= SP; mark++) { - if (!(SvFLAGS(*mark) & (SVs_PADTMP|SVs_TEMP))) { - *mark = sv_mortalcopy(*mark); - TAINT_NOT; /* Each item is independent */ - } - } - } - PL_curpm = newpm; /* Don't pop $1 et al till now */ - - LEAVE_with_name("block"); - - RETURN; -} - PP(pp_iter) { dVAR; dSP; @@ -2056,6 +1958,73 @@ PP(pp_iter) RETPUSHYES; } +/* +A description of how taint works in pattern matching and substitution. + +While the pattern is being assembled/concatenated and them compiled, +PL_tainted will get set if any component of the pattern is tainted, e.g. +/.*$tainted/. At the end of pattern compilation, the RXf_TAINTED flag +is set on the pattern if PL_tainted is set. + +When the pattern is copied, e.g. $r = qr/..../, the SV holding the ref to +the pattern is marked as tainted. This means that subsequent usage, such +as /x$r/, will set PL_tainted, and thus RXf_TAINTED, on the new pattern too. + +During execution of a pattern, locale-variant ops such as ALNUML set the +local flag RF_tainted. At the end of execution, the engine sets the +RXf_TAINTED_SEEN on the pattern if RF_tainted got set, or clears it +otherwise. + +In addition, RXf_TAINTED_SEEN is used post-execution by the get magic code +of $1 et al to indicate whether the returned value should be tainted. +It is the responsibility of the caller of the pattern (i.e. pp_match, +pp_subst etc) to set this flag for any other circumstances where $1 needs +to be tainted. + +The taint behaviour of pp_subst (and pp_substcont) is quite complex. + +There are three possible sources of taint + * the source string + * the pattern (both compile- and run-time, RXf_TAINTED / RXf_TAINTED_SEEN) + * the replacement string (or expression under /e) + +There are four destinations of taint and they are affected by the sources +according to the rules below: + + * the return value (not including /r): + tainted by the source string and pattern, but only for the + number-of-iterations case; boolean returns aren't tainted; + * the modified string (or modified copy under /r): + tainted by the source string, pattern, and replacement strings; + * $1 et al: + tainted by the pattern, and under 'use re "taint"', by the source + string too; + * PL_taint - i.e. whether subsequent code (e.g. in a /e block) is tainted: + should always be unset before executing subsequent code. + +The overall action of pp_subst is: + + * at the start, set bits in rxtainted indicating the taint status of + the various sources. + + * After each pattern execution, update the SUBST_TAINT_PAT bit in + rxtainted if RXf_TAINTED_SEEN has been set, to indicate that the + pattern has subsequently become tainted via locale ops. + + * If control is being passed to pp_substcont to execute a /e block, + save rxtainted in the CXt_SUBST block, for future use by + pp_substcont. + + * Whenever control is being returned to perl code (either by falling + off the "end" of pp_subst/pp_substcont, or by entering a /e block), + use the flag bits in rxtainted to make all the appropriate types of + destination taint visible; e.g. set RXf_TAINTED_SEEN so that $1 + et al will appear tainted. + +pp_match is just a simpler version of the above. + +*/ + PP(pp_subst) { dVAR; dSP; dTARG; @@ -2071,7 +2040,8 @@ PP(pp_subst) I32 maxiters; register I32 i; bool once; - U8 rxtainted; + U8 rxtainted = 0; /* holds various SUBST_TAINT_* flag bits. + See "how taint works" above */ char *orig; U8 r_flags; register REGEXP *rx = PM_GETRE(pm); @@ -2080,7 +2050,6 @@ PP(pp_subst) const I32 oldsave = PL_savestack_ix; STRLEN slen; bool doutf8 = FALSE; - I32 matched; #ifdef PERL_OLD_COPY_ON_WRITE bool is_cow; #endif @@ -2099,11 +2068,6 @@ PP(pp_subst) EXTEND(SP,1); } - /* In non-destructive replacement mode, duplicate target scalar so it - * remains unchanged. */ - if (rpm->op_pmflags & PMf_NONDESTRUCT) - TARG = sv_2mortal(newSVsv(TARG)); - #ifdef PERL_OLD_COPY_ON_WRITE /* Awooga. Awooga. "bool" types that are actually char are dangerous, because they make integers such as 256 "false". */ @@ -2112,14 +2076,14 @@ PP(pp_subst) if (SvIsCOW(TARG)) sv_force_normal_flags(TARG,0); #endif - if ( + if (!(rpm->op_pmflags & PMf_NONDESTRUCT) #ifdef PERL_OLD_COPY_ON_WRITE - !is_cow && + && !is_cow #endif - (SvREADONLY(TARG) - || ( ((SvTYPE(TARG) == SVt_PVGV && isGV_with_GP(TARG)) - || SvTYPE(TARG) > SVt_PVLV) - && !(SvTYPE(TARG) == SVt_PVGV && SvFAKE(TARG))))) + && (SvREADONLY(TARG) + || ( ((SvTYPE(TARG) == SVt_PVGV && isGV_with_GP(TARG)) + || SvTYPE(TARG) > SVt_PVLV) + && !(SvTYPE(TARG) == SVt_PVGV && SvFAKE(TARG))))) Perl_croak_no_modify(aTHX); PUTBACK; @@ -2127,11 +2091,20 @@ PP(pp_subst) s = SvPV_mutable(TARG, len); if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV) force_on_match = 1; - rxtainted = ((RX_EXTFLAGS(rx) & RXf_TAINTED) || - (PL_tainted && (pm->op_pmflags & PMf_RETAINT))); - if (PL_tainted) - rxtainted |= 2; - TAINT_NOT; + + /* only replace once? */ + once = !(rpm->op_pmflags & PMf_GLOBAL); + + /* See "how taint works" above */ + if (PL_tainting) { + rxtainted = ( + (SvTAINTED(TARG) ? SUBST_TAINT_STR : 0) + | ((RX_EXTFLAGS(rx) & RXf_TAINTED) ? SUBST_TAINT_PAT : 0) + | ((pm->op_pmflags & PMf_RETAINT) ? SUBST_TAINT_RETAINT : 0) + | ((once && !(rpm->op_pmflags & PMf_NONDESTRUCT)) + ? SUBST_TAINT_BOOLRET : 0)); + TAINT_NOT; + } RX_MATCH_UTF8_set(rx, DO_UTF8(TARG)); @@ -2173,24 +2146,33 @@ PP(pp_subst) */ } - /* only replace once? */ - once = !(rpm->op_pmflags & PMf_GLOBAL); - matched = CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, - r_flags | REXEC_CHECKED); + if (!CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, + r_flags | REXEC_CHECKED)) + { + ret_no: + SPAGAIN; + PUSHs(rpm->op_pmflags & PMf_NONDESTRUCT ? TARG : &PL_sv_no); + LEAVE_SCOPE(oldsave); + RETURN; + } + /* known replacement string? */ if (dstr) { + if (SvTAINTED(dstr)) + rxtainted |= SUBST_TAINT_REPL; /* Upgrade the source if the replacement is utf8 but the source is not, * but only if it matched; see * http://www.nntp.perl.org/group/perl.perl5.porters/2010/04/msg158809.html */ - if (matched && DO_UTF8(dstr) && ! DO_UTF8(TARG)) { - const STRLEN new_len = sv_utf8_upgrade(TARG); + if (DO_UTF8(dstr) && ! DO_UTF8(TARG)) { + char * const orig_pvx = SvPVX(TARG); + const STRLEN new_len = sv_utf8_upgrade_nomg(TARG); /* If the lengths are the same, the pattern contains only * invariants, can keep going; otherwise, various internal markers * could be off, so redo */ - if (new_len != len) { + if (new_len != len || orig_pvx != SvPVX(TARG)) { goto setup_match; } } @@ -2216,14 +2198,6 @@ PP(pp_subst) doutf8 = FALSE; } - if (!matched) { - ret_no: - SPAGAIN; - PUSHs(rpm->op_pmflags & PMf_NONDESTRUCT ? TARG : &PL_sv_no); - LEAVE_SCOPE(oldsave); - RETURN; - } - /* can do inplace substitution? */ if (c #ifdef PERL_OLD_COPY_ON_WRITE @@ -2231,7 +2205,8 @@ PP(pp_subst) #endif && (I32)clen <= RX_MINLENRET(rx) && (once || !(r_flags & REXEC_COPY_STR)) && !(RX_EXTFLAGS(rx) & RXf_LOOKBEHIND_SEEN) - && (!doutf8 || SvUTF8(TARG))) + && (!doutf8 || SvUTF8(TARG)) + && !(rpm->op_pmflags & PMf_NONDESTRUCT)) { #ifdef PERL_OLD_COPY_ON_WRITE @@ -2249,7 +2224,8 @@ PP(pp_subst) PL_curpm = pm; SvSCREAM_off(TARG); /* disable possible screamer */ if (once) { - rxtainted |= RX_MATCH_TAINTED(rx); + if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ + rxtainted |= SUBST_TAINT_PAT; m = orig + RX_OFFS(rx)[0].start; d = orig + RX_OFFS(rx)[0].end; s = orig; @@ -2282,15 +2258,15 @@ PP(pp_subst) else { sv_chop(TARG, d); } - TAINT_IF(rxtainted & 1); SPAGAIN; - PUSHs(rpm->op_pmflags & PMf_NONDESTRUCT ? TARG : &PL_sv_yes); + PUSHs(&PL_sv_yes); } else { do { if (iters++ > maxiters) DIE(aTHX_ "Substitution loop"); - rxtainted |= RX_MATCH_TAINTED(rx); + if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ + rxtainted |= SUBST_TAINT_PAT; m = RX_OFFS(rx)[0].start + orig; if ((i = m - s)) { if (s != d) @@ -2311,38 +2287,39 @@ PP(pp_subst) SvCUR_set(TARG, d - SvPVX_const(TARG) + i); Move(s, d, i+1, char); /* include the NUL */ } - TAINT_IF(rxtainted & 1); SPAGAIN; - if (rpm->op_pmflags & PMf_NONDESTRUCT) - PUSHs(TARG); - else - mPUSHi((I32)iters); + mPUSHi((I32)iters); } - (void)SvPOK_only_UTF8(TARG); - TAINT_IF(rxtainted); - SvSETMAGIC(TARG); - SvTAINT(TARG); - if (doutf8) - SvUTF8_on(TARG); - LEAVE_SCOPE(oldsave); - RETURN; } else { if (force_on_match) { force_on_match = 0; + if (rpm->op_pmflags & PMf_NONDESTRUCT) { + /* I feel that it should be possible to avoid this mortal copy + given that the code below copies into a new destination. + However, I suspect it isn't worth the complexity of + unravelling the C for the small number of + cases where it would be viable to drop into the copy code. */ + TARG = sv_2mortal(newSVsv(TARG)); + } s = SvPV_force(TARG, len); goto force_it; } #ifdef PERL_OLD_COPY_ON_WRITE have_a_cow: #endif - rxtainted |= RX_MATCH_TAINTED(rx); - dstr = newSVpvn_utf8(m, s-m, DO_UTF8(TARG)); - SAVEFREESV(dstr); + if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ + rxtainted |= SUBST_TAINT_PAT; + dstr = newSVpvn_flags(m, s-m, SVs_TEMP | (DO_UTF8(TARG) ? SVf_UTF8 : 0)); PL_curpm = pm; if (!c) { register PERL_CONTEXT *cx; SPAGAIN; + /* note that a whole bunch of local vars are saved here for + * use by pp_substcont: here's a list of them in case you're + * searching for places in this sub that uses a particular var: + * iters maxiters r_flags oldsave rxtainted orig dstr targ + * s m strend rx once */ PUSHSUBST(cx); RETURNOP(cPMOP->op_pmreplrootu.op_pmreplroot); } @@ -2350,7 +2327,8 @@ PP(pp_subst) do { if (iters++ > maxiters) DIE(aTHX_ "Substitution loop"); - rxtainted |= RX_MATCH_TAINTED(rx); + if (RX_MATCH_TAINTED(rx)) + rxtainted |= SUBST_TAINT_PAT; if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) { m = s; s = orig; @@ -2375,42 +2353,67 @@ PP(pp_subst) else sv_catpvn(dstr, s, strend - s); + if (rpm->op_pmflags & PMf_NONDESTRUCT) { + /* From here on down we're using the copy, and leaving the original + untouched. */ + TARG = dstr; + SPAGAIN; + PUSHs(dstr); + } else { #ifdef PERL_OLD_COPY_ON_WRITE - /* The match may make the string COW. If so, brilliant, because that's - just saved us one malloc, copy and free - the regexp has donated - the old buffer, and we malloc an entirely new one, rather than the - regexp malloc()ing a buffer and copying our original, only for - us to throw it away here during the substitution. */ - if (SvIsCOW(TARG)) { - sv_force_normal_flags(TARG, SV_COW_DROP_PV); - } else + /* The match may make the string COW. If so, brilliant, because + that's just saved us one malloc, copy and free - the regexp has + donated the old buffer, and we malloc an entirely new one, rather + than the regexp malloc()ing a buffer and copying our original, + only for us to throw it away here during the substitution. */ + if (SvIsCOW(TARG)) { + sv_force_normal_flags(TARG, SV_COW_DROP_PV); + } else #endif - { - SvPV_free(TARG); - } - SvPV_set(TARG, SvPVX(dstr)); - SvCUR_set(TARG, SvCUR(dstr)); - SvLEN_set(TARG, SvLEN(dstr)); - doutf8 |= DO_UTF8(dstr); - SvPV_set(dstr, NULL); + { + SvPV_free(TARG); + } + SvPV_set(TARG, SvPVX(dstr)); + SvCUR_set(TARG, SvCUR(dstr)); + SvLEN_set(TARG, SvLEN(dstr)); + doutf8 |= DO_UTF8(dstr); + SvPV_set(dstr, NULL); - TAINT_IF(rxtainted & 1); - SPAGAIN; - if (rpm->op_pmflags & PMf_NONDESTRUCT) - PUSHs(TARG); - else + SPAGAIN; mPUSHi((I32)iters); + } + } + if (!(rpm->op_pmflags & PMf_NONDESTRUCT)) { (void)SvPOK_only_UTF8(TARG); if (doutf8) SvUTF8_on(TARG); - TAINT_IF(rxtainted); - SvSETMAGIC(TARG); + } + + /* See "how taint works" above */ + if (PL_tainting) { + if ((rxtainted & SUBST_TAINT_PAT) || + ((rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) == + (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) + ) + (RX_MATCH_TAINTED_on(rx)); /* taint $1 et al */ + + if (!(rxtainted & SUBST_TAINT_BOOLRET) + && (rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT)) + ) + SvTAINTED_on(TOPs); /* taint return value */ + else + SvTAINTED_off(TOPs); /* may have got tainted earlier */ + + /* needed for mg_set below */ + PL_tainted = + cBOOL(rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT|SUBST_TAINT_REPL)); SvTAINT(TARG); - LEAVE_SCOPE(oldsave); - RETURN; } - /* NOTREACHED */ + SvSETMAGIC(TARG); /* PL_tainted must be correctly set for this mg_set */ + TAINT_NOT; + LEAVE_SCOPE(oldsave); + RETURN; } PP(pp_grepwhile) @@ -2486,7 +2489,7 @@ PP(pp_leavesub) MARK = newsp + 1; if (MARK <= SP) { if (cx->blk_sub.cv && CvDEPTH(cx->blk_sub.cv) > 1) { - if (SvTEMP(TOPs)) { + if (SvTEMP(TOPs) && SvREFCNT(TOPs) == 1) { *MARK = SvREFCNT_inc(TOPs); FREETMPS; sv_2mortal(*MARK); @@ -2498,8 +2501,11 @@ PP(pp_leavesub) SvREFCNT_dec(sv); } } + else if (SvTEMP(TOPs) && SvREFCNT(TOPs) == 1) { + *MARK = TOPs; + } else - *MARK = SvTEMP(TOPs) ? TOPs : sv_mortalcopy(TOPs); + *MARK = sv_mortalcopy(TOPs); } else { MEXTEND(MARK, 0); @@ -2509,7 +2515,7 @@ PP(pp_leavesub) } else if (gimme == G_ARRAY) { for (MARK = newsp + 1; MARK <= SP; MARK++) { - if (!SvTEMP(*MARK)) { + if (!SvTEMP(*MARK) || SvREFCNT(*MARK) != 1) { *MARK = sv_mortalcopy(*MARK); TAINT_NOT; /* Each item is independent */ } @@ -2526,196 +2532,6 @@ PP(pp_leavesub) return cx->blk_sub.retop; } -/* This duplicates the above code because the above code must not - * get any slower by more conditions */ -PP(pp_leavesublv) -{ - dVAR; dSP; - SV **mark; - SV **newsp; - PMOP *newpm; - I32 gimme; - register PERL_CONTEXT *cx; - SV *sv; - - if (CxMULTICALL(&cxstack[cxstack_ix])) - return 0; - - POPBLOCK(cx,newpm); - cxstack_ix++; /* temporarily protect top context */ - - TAINT_NOT; - - if (CxLVAL(cx) & OPpENTERSUB_INARGS) { - /* We are an argument to a function or grep(). - * This kind of lvalueness was legal before lvalue - * subroutines too, so be backward compatible: - * cannot report errors. */ - - /* Scalar context *is* possible, on the LHS of -> only, - * as in f()->meth(). But this is not an lvalue. */ - if (gimme == G_SCALAR) - goto temporise; - if (gimme == G_ARRAY) { - mark = newsp + 1; - /* We want an array here, but padav will have left us an arrayref for an lvalue, - * so we need to expand it */ - if(SvTYPE(*mark) == SVt_PVAV) { - AV *const av = MUTABLE_AV(*mark); - const I32 maxarg = AvFILL(av) + 1; - (void)POPs; /* get rid of the array ref */ - EXTEND(SP, maxarg); - if (SvRMAGICAL(av)) { - U32 i; - for (i=0; i < (U32)maxarg; i++) { - SV ** const svp = av_fetch(av, i, FALSE); - SP[i+1] = svp - ? SvGMAGICAL(*svp) ? (mg_get(*svp), *svp) : *svp - : &PL_sv_undef; - } - } - else { - Copy(AvARRAY(av), SP+1, maxarg, SV*); - } - SP += maxarg; - PUTBACK; - } - if (!CvLVALUE(cx->blk_sub.cv)) - goto temporise_array; - EXTEND_MORTAL(SP - newsp); - for (mark = newsp + 1; mark <= SP; mark++) { - if (SvTEMP(*mark)) - NOOP; - else if (SvFLAGS(*mark) & (SVs_PADTMP | SVf_READONLY)) - *mark = sv_mortalcopy(*mark); - else { - /* Can be a localized value subject to deletion. */ - PL_tmps_stack[++PL_tmps_ix] = *mark; - SvREFCNT_inc_void(*mark); - } - } - } - } - else if (CxLVAL(cx)) { /* Leave it as it is if we can. */ - /* Here we go for robustness, not for speed, so we change all - * the refcounts so the caller gets a live guy. Cannot set - * TEMP, so sv_2mortal is out of question. */ - if (!CvLVALUE(cx->blk_sub.cv)) { - LEAVE; - cxstack_ix--; - POPSUB(cx,sv); - PL_curpm = newpm; - LEAVESUB(sv); - DIE(aTHX_ "Can't modify non-lvalue subroutine call"); - } - if (gimme == G_SCALAR) { - MARK = newsp + 1; - EXTEND_MORTAL(1); - if (MARK == SP) { - /* Temporaries are bad unless they happen to have set magic - * attached, such as the elements of a tied hash or array */ - if ((SvFLAGS(TOPs) & (SVs_TEMP | SVs_PADTMP) || - (SvFLAGS(TOPs) & (SVf_READONLY | SVf_FAKE)) - == SVf_READONLY - ) && - !SvSMAGICAL(TOPs)) { - LEAVE; - cxstack_ix--; - POPSUB(cx,sv); - PL_curpm = newpm; - LEAVESUB(sv); - DIE(aTHX_ "Can't return %s from lvalue subroutine", - SvREADONLY(TOPs) ? (TOPs == &PL_sv_undef) ? "undef" - : "a readonly value" : "a temporary"); - } - else { /* Can be a localized value - * subject to deletion. */ - PL_tmps_stack[++PL_tmps_ix] = *mark; - SvREFCNT_inc_void(*mark); - } - } - else { /* Should not happen? */ - LEAVE; - cxstack_ix--; - POPSUB(cx,sv); - PL_curpm = newpm; - LEAVESUB(sv); - DIE(aTHX_ "%s returned from lvalue subroutine in scalar context", - (MARK > SP ? "Empty array" : "Array")); - } - SP = MARK; - } - else if (gimme == G_ARRAY) { - EXTEND_MORTAL(SP - newsp); - for (mark = newsp + 1; mark <= SP; mark++) { - if (*mark != &PL_sv_undef - && SvFLAGS(*mark) & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)) { - /* Might be flattened array after $#array = */ - PUTBACK; - LEAVE; - cxstack_ix--; - POPSUB(cx,sv); - PL_curpm = newpm; - LEAVESUB(sv); - DIE(aTHX_ "Can't return a %s from lvalue subroutine", - SvREADONLY(TOPs) ? "readonly value" : "temporary"); - } - else { - /* Can be a localized value subject to deletion. */ - PL_tmps_stack[++PL_tmps_ix] = *mark; - SvREFCNT_inc_void(*mark); - } - } - } - } - else { - if (gimme == G_SCALAR) { - temporise: - MARK = newsp + 1; - if (MARK <= SP) { - if (cx->blk_sub.cv && CvDEPTH(cx->blk_sub.cv) > 1) { - if (SvTEMP(TOPs)) { - *MARK = SvREFCNT_inc(TOPs); - FREETMPS; - sv_2mortal(*MARK); - } - else { - sv = SvREFCNT_inc(TOPs); /* FREETMPS could clobber it */ - FREETMPS; - *MARK = sv_mortalcopy(sv); - SvREFCNT_dec(sv); - } - } - else - *MARK = SvTEMP(TOPs) ? TOPs : sv_mortalcopy(TOPs); - } - else { - MEXTEND(MARK, 0); - *MARK = &PL_sv_undef; - } - SP = MARK; - } - else if (gimme == G_ARRAY) { - temporise_array: - for (MARK = newsp + 1; MARK <= SP; MARK++) { - if (!SvTEMP(*MARK)) { - *MARK = sv_mortalcopy(*MARK); - TAINT_NOT; /* Each item is independent */ - } - } - } - } - PUTBACK; - - LEAVE; - cxstack_ix--; - POPSUB(cx,sv); /* Stack values are safe: release CV and @_ ... */ - PL_curpm = newpm; /* ... and pop $1 et al */ - - LEAVESUB(sv); - return cx->blk_sub.retop; -} - PP(pp_entersub) { dVAR; dSP; dPOPss; @@ -2984,8 +2800,6 @@ PP(pp_aelem) Perl_warner(aTHX_ packWARN(WARN_MISC), "Use of reference \"%"SVf"\" as array index", SVfARG(elemsv)); - if (elem > 0) - elem -= CopARYBASE_get(PL_curcop); if (SvTYPE(av) != SVt_PVAV) RETPUSHUNDEF; @@ -3036,8 +2850,10 @@ PP(pp_aelem) else SAVEADELETE(av, elem); } - else if (PL_op->op_private & OPpDEREF) - vivify_ref(*svp, PL_op->op_private & OPpDEREF); + else if (PL_op->op_private & OPpDEREF) { + PUSHs(vivify_ref(*svp, PL_op->op_private & OPpDEREF)); + RETURN; + } } sv = (svp ? *svp : &PL_sv_undef); if (!lval && SvRMAGICAL(av) && SvGMAGICAL(sv)) /* see note in pp_helem() */ @@ -3046,7 +2862,7 @@ PP(pp_aelem) RETURN; } -void +SV* Perl_vivify_ref(pTHX_ SV *sv, U32 to_what) { PERL_ARGS_ASSERT_VIVIFY_REF; @@ -3070,6 +2886,14 @@ Perl_vivify_ref(pTHX_ SV *sv, U32 to_what) SvROK_on(sv); SvSETMAGIC(sv); } + if (SvGMAGICAL(sv)) { + /* copy the sv without magic to prevent magic from being + executed twice */ + SV* msv = sv_newmortal(); + sv_setsv_nomg(msv, sv); + return msv; + } + return sv; } PP(pp_method) @@ -3122,10 +2946,16 @@ S_method_common(pTHX_ SV* meth, U32* hashp) ob = MUTABLE_SV(SvRV(sv)); else { GV* iogv; + bool packname_is_utf8 = FALSE; /* this isn't a reference */ - if(SvOK(sv) && (packname = SvPV_const(sv, packlen))) { - const HE* const he = hv_fetch_ent(PL_stashcache, sv, 0, 0); + if(SvOK(sv) && (packname = SvPV_nomg_const(sv, packlen))) { + const HE* const he = + (const HE *)hv_common_key_len( + PL_stashcache, packname, + packlen * -(packname_is_utf8 = !!SvUTF8(sv)), 0, NULL, 0 + ); + if (he) { stash = INT2PTR(HV*,SvIV(HeVAL(he))); goto fetch; @@ -3134,7 +2964,9 @@ S_method_common(pTHX_ SV* meth, U32* hashp) if (!SvOK(sv) || !(packname) || - !(iogv = gv_fetchsv(sv, 0, SVt_PVIO)) || + !(iogv = gv_fetchpvn_flags( + packname, packlen, SVf_UTF8 * packname_is_utf8, SVt_PVIO + )) || !(ob=MUTABLE_SV(GvIO(iogv)))) { /* this isn't the name of a filehandle either */