X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b3f91e9158d8a5c05627eba1c2848f35528571d2..88fb56ecc95f23db1630e96a259c1febfbe98e20:/pp.c diff --git a/pp.c b/pp.c index 129c948..385f1be 100644 --- a/pp.c +++ b/pp.c @@ -71,11 +71,14 @@ PP(pp_padav) if (PL_op->op_flags & OPf_REF) { PUSHs(TARG); RETURN; - } else if (LVRET) { + } else if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { if (GIMME == G_SCALAR) Perl_croak(aTHX_ "Can't return array to lvalue scalar context"); PUSHs(TARG); RETURN; + } } gimme = GIMME_V; if (gimme == G_ARRAY) { @@ -114,14 +117,17 @@ PP(pp_padhv) SAVECLEARSV(PAD_SVl(PL_op->op_targ)); if (PL_op->op_flags & OPf_REF) RETURN; - else if (LVRET) { + else if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { if (GIMME == G_SCALAR) Perl_croak(aTHX_ "Can't return hash to lvalue scalar context"); RETURN; + } } gimme = GIMME_V; if (gimme == G_ARRAY) { - RETURNOP(do_kv()); + RETURNOP(Perl_do_kv(aTHX)); } else if (gimme == G_SCALAR) { SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG)); @@ -139,11 +145,13 @@ PP(pp_rv2gv) { dVAR; dSP; dTOPss; - SvGETMAGIC(sv); + if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv); if (SvROK(sv)) { wasref: - tryAMAGICunDEREF(to_gv); - + if (SvAMAGIC(sv)) { + sv = amagic_deref_call(sv, to_gv_amg); + SPAGAIN; + } sv = SvRV(sv); if (SvTYPE(sv) == SVt_PVIO) { GV * const gv = MUTABLE_GV(sv_newmortal()); @@ -213,8 +221,16 @@ PP(pp_rv2gv) } sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV)); } + /* FAKE globs in the symbol table cause weird bugs (#77810) */ + if (sv) SvFAKE_off(sv); } } + if (sv && SvFAKE(sv)) { + SV *newsv = sv_newmortal(); + sv_setsv_flags(newsv, sv, 0); + SvFAKE_off(newsv); + sv = newsv; + } if (PL_op->op_private & OPpLVAL_INTRO) save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL)); SETs(sv); @@ -238,7 +254,10 @@ Perl_softref2xv(pTHX_ SV *const sv, const char *const what, Perl_die(aTHX_ PL_no_usym, what); } if (!SvOK(sv)) { - if (PL_op->op_flags & OPf_REF) + if ( + PL_op->op_flags & OPf_REF && + PL_op->op_next->op_type != OP_BOOLKEYS + ) Perl_die(aTHX_ PL_no_usym, what); if (ckWARN(WARN_UNINITIALIZED)) report_uninit(sv); @@ -275,7 +294,10 @@ PP(pp_rv2sv) if (!(PL_op->op_private & OPpDEREFed)) SvGETMAGIC(sv); if (SvROK(sv)) { - tryAMAGICunDEREF(to_sv); + if (SvAMAGIC(sv)) { + sv = amagic_deref_call(sv, to_sv_amg); + SPAGAIN; + } sv = SvRV(sv); switch (SvTYPE(sv)) { @@ -336,26 +358,21 @@ PP(pp_av2arylen) PP(pp_pos) { - dVAR; dSP; dTARGET; dPOPss; + dVAR; dSP; dPOPss; if (PL_op->op_flags & OPf_MOD || LVRET) { - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, NULL, PERL_MAGIC_pos, NULL, 0); - } - - LvTYPE(TARG) = '.'; - if (LvTARG(TARG) != sv) { - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc_simple(sv); - } - PUSHs(TARG); /* no SvSETMAGIC */ + SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0); + LvTYPE(ret) = '.'; + LvTARG(ret) = SvREFCNT_inc_simple(sv); + PUSHs(ret); /* no SvSETMAGIC */ RETURN; } else { if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global); if (mg && mg->mg_len >= 0) { + dTARGET; I32 i = mg->mg_len; if (DO_UTF8(sv)) sv_pos_b2u(sv, &i); @@ -426,7 +443,19 @@ PP(pp_prototype) goto set; } if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) { - ret = newSVpvs_flags("\\[@%]", SVs_TEMP); + ret = newSVpvs_flags("+", SVs_TEMP); + goto set; + } + if (code == -KEY_push || code == -KEY_unshift) { + ret = newSVpvs_flags("+@", SVs_TEMP); + goto set; + } + if (code == -KEY_pop || code == -KEY_shift) { + ret = newSVpvs_flags(";+", SVs_TEMP); + goto set; + } + if (code == -KEY_splice) { + ret = newSVpvs_flags("+;$$@", SVs_TEMP); goto set; } if (code == -KEY_tied || code == -KEY_untie) { @@ -689,15 +718,18 @@ PP(pp_study) RETPUSHYES; } s = (unsigned char*)(SvPV(sv, len)); - pos = len; - if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) { + if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) { /* No point in studying a zero length string, and not safe to study anything that doesn't appear to be a simple scalar (and hence might change between now and when the regexp engine runs without our set magic ever running) such as a reference to an object with overloaded - stringification. */ + stringification. Also refuse to study an FBM scalar, as this gives + more flexibility in SV flag usage. No real-world code would ever + end up studying an FBM scalar, so this isn't a real pessimisation. + */ RETPUSHNO; } + pos = len; if (PL_lastscream) { SvSCREAM_off(PL_lastscream); @@ -705,10 +737,6 @@ PP(pp_study) } PL_lastscream = SvREFCNT_inc_simple(sv); - s = (unsigned char*)(SvPV(sv, len)); - pos = len; - if (pos <= 0) - RETPUSHNO; if (pos > PL_maxscream) { if (PL_maxscream < 0) { PL_maxscream = pos + 80; @@ -760,16 +788,192 @@ PP(pp_trans) EXTEND(SP,1); } TARG = sv_newmortal(); - PUSHi(do_trans(sv)); + if(PL_op->op_type == OP_TRANSR) { + SV * const newsv = newSVsv(sv); + do_trans(newsv); + mPUSHs(newsv); + } + else PUSHi(do_trans(sv)); RETURN; } /* Lvalue operators. */ +static void +S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) +{ + dVAR; + STRLEN len; + char *s; + + PERL_ARGS_ASSERT_DO_CHOMP; + + if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs))) + return; + if (SvTYPE(sv) == SVt_PVAV) { + I32 i; + AV *const av = MUTABLE_AV(sv); + const I32 max = AvFILL(av); + + for (i = 0; i <= max; i++) { + sv = MUTABLE_SV(av_fetch(av, i, FALSE)); + if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) + do_chomp(retval, sv, chomping); + } + return; + } + else if (SvTYPE(sv) == SVt_PVHV) { + HV* const hv = MUTABLE_HV(sv); + HE* entry; + (void)hv_iterinit(hv); + while ((entry = hv_iternext(hv))) + do_chomp(retval, hv_iterval(hv,entry), chomping); + return; + } + else if (SvREADONLY(sv)) { + if (SvFAKE(sv)) { + /* SV is copy-on-write */ + sv_force_normal_flags(sv, 0); + } + if (SvREADONLY(sv)) + Perl_croak_no_modify(aTHX); + } + + if (PL_encoding) { + if (!SvUTF8(sv)) { + /* XXX, here sv is utf8-ized as a side-effect! + If encoding.pm is used properly, almost string-generating + operations, including literal strings, chr(), input data, etc. + should have been utf8-ized already, right? + */ + sv_recode_to_utf8(sv, PL_encoding); + } + } + + s = SvPV(sv, len); + if (chomping) { + char *temp_buffer = NULL; + SV *svrecode = NULL; + + if (s && len) { + s += --len; + if (RsPARA(PL_rs)) { + if (*s != '\n') + goto nope; + ++SvIVX(retval); + while (len && s[-1] == '\n') { + --len; + --s; + ++SvIVX(retval); + } + } + else { + STRLEN rslen, rs_charlen; + const char *rsptr = SvPV_const(PL_rs, rslen); + + rs_charlen = SvUTF8(PL_rs) + ? sv_len_utf8(PL_rs) + : rslen; + + if (SvUTF8(PL_rs) != SvUTF8(sv)) { + /* Assumption is that rs is shorter than the scalar. */ + if (SvUTF8(PL_rs)) { + /* RS is utf8, scalar is 8 bit. */ + bool is_utf8 = TRUE; + temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, + &rslen, &is_utf8); + if (is_utf8) { + /* Cannot downgrade, therefore cannot possibly match + */ + assert (temp_buffer == rsptr); + temp_buffer = NULL; + goto nope; + } + rsptr = temp_buffer; + } + else if (PL_encoding) { + /* RS is 8 bit, encoding.pm is used. + * Do not recode PL_rs as a side-effect. */ + svrecode = newSVpvn(rsptr, rslen); + sv_recode_to_utf8(svrecode, PL_encoding); + rsptr = SvPV_const(svrecode, rslen); + rs_charlen = sv_len_utf8(svrecode); + } + else { + /* RS is 8 bit, scalar is utf8. */ + temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); + rsptr = temp_buffer; + } + } + if (rslen == 1) { + if (*s != *rsptr) + goto nope; + ++SvIVX(retval); + } + else { + if (len < rslen - 1) + goto nope; + len -= rslen - 1; + s -= rslen - 1; + if (memNE(s, rsptr, rslen)) + goto nope; + SvIVX(retval) += rs_charlen; + } + } + s = SvPV_force_nolen(sv); + SvCUR_set(sv, len); + *SvEND(sv) = '\0'; + SvNIOK_off(sv); + SvSETMAGIC(sv); + } + nope: + + SvREFCNT_dec(svrecode); + + Safefree(temp_buffer); + } else { + if (len && !SvPOK(sv)) + s = SvPV_force_nomg(sv, len); + if (DO_UTF8(sv)) { + if (s && len) { + char * const send = s + len; + char * const start = s; + s = send - 1; + while (s > start && UTF8_IS_CONTINUATION(*s)) + s--; + if (is_utf8_string((U8*)s, send - s)) { + sv_setpvn(retval, s, send - s); + *s = '\0'; + SvCUR_set(sv, s - start); + SvNIOK_off(sv); + SvUTF8_on(retval); + } + } + else + sv_setpvs(retval, ""); + } + else if (s && len) { + s += --len; + sv_setpvn(retval, s, 1); + *s = '\0'; + SvCUR_set(sv, len); + SvUTF8_off(sv); + SvNIOK_off(sv); + } + else + sv_setpvs(retval, ""); + SvSETMAGIC(sv); + } +} + PP(pp_schop) { dVAR; dSP; dTARGET; - do_chop(TARG, TOPs); + const bool chomping = PL_op->op_type == OP_SCHOMP; + + if (chomping) + sv_setiv(TARG, 0); + do_chomp(TARG, TOPs, chomping); SETTARG; RETURN; } @@ -777,31 +981,17 @@ PP(pp_schop) PP(pp_chop) { dVAR; dSP; dMARK; dTARGET; dORIGMARK; + const bool chomping = PL_op->op_type == OP_CHOMP; + + if (chomping) + sv_setiv(TARG, 0); while (MARK < SP) - do_chop(TARG, *++MARK); + do_chomp(TARG, *++MARK, chomping); SP = ORIGMARK; XPUSHTARG; RETURN; } -PP(pp_schomp) -{ - dVAR; dSP; dTARGET; - SETi(do_chomp(TOPs)); - RETURN; -} - -PP(pp_chomp) -{ - dVAR; dSP; dMARK; dTARGET; - register I32 count = 0; - - while (SP > MARK) - count += do_chomp(POPs); - XPUSHi(count); - RETURN; -} - PP(pp_undef) { dVAR; dSP; @@ -850,21 +1040,38 @@ PP(pp_undef) GP *gp; HV *stash; - /* undef *Foo:: */ - if((stash = GvHV((const GV *)sv)) && HvNAME_get(stash)) - mro_isa_changed_in(stash); /* undef *Pkg::meth_name ... */ - else if(GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv)) - && HvNAME_get(stash)) - mro_method_changed_in(stash); + bool method_changed + = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv)) + && HvENAME_get(stash); + /* undef *Foo:: */ + if((stash = GvHV((const GV *)sv))) { + if(HvENAME_get(stash)) + SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash)); + else stash = NULL; + } gp_free(MUTABLE_GV(sv)); Newxz(gp, 1, GP); - GvGP(sv) = gp_ref(gp); + GvGP_set(sv, gp_ref(gp)); GvSV(sv) = newSV(0); GvLINE(sv) = CopLINE(PL_curcop); GvEGV(sv) = MUTABLE_GV(sv); GvMULTI_on(sv); + + if(stash) + mro_package_moved(NULL, stash, (const GV *)sv, 0); + stash = NULL; + /* undef *Foo::ISA */ + if( strEQ(GvNAME((const GV *)sv), "ISA") + && (stash = GvSTASH((const GV *)sv)) + && (method_changed || HvENAME(stash)) ) + mro_isa_changed_in(stash); + else if(method_changed) + mro_method_changed_in( + GvSTASH((const GV *)sv) + ); + break; } /* FALL THROUGH */ @@ -903,6 +1110,8 @@ PP(pp_postinc) dVAR; dSP; dTARGET; if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs)) Perl_croak_no_modify(aTHX); + if (SvROK(TOPs)) + TARG = sv_newmortal(); sv_setsv(TARG, TOPs); if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MAX) @@ -925,6 +1134,8 @@ PP(pp_postdec) dVAR; dSP; dTARGET; if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs)) Perl_croak_no_modify(aTHX); + if (SvROK(TOPs)) + TARG = sv_newmortal(); sv_setsv(TARG, TOPs); if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MIN) @@ -1292,7 +1503,7 @@ PP(pp_divide) warning before dieing, hence this test goes here. If it were immediately before the second SvIV_please, then DIE() would be invoked before left was even inspected, so - no inpsection would give no warning. */ + no inspection would give no warning. */ if (right == 0) DIE(aTHX_ "Illegal division by zero"); @@ -1752,7 +1963,7 @@ PP(pp_subtract) PP(pp_left_shift) { dVAR; dSP; dATARGET; SV *svl, *svr; - tryAMAGICbin_MG(lshift_amg, AMGf_assign); + tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric); svr = POPs; svl = TOPs; { @@ -1772,7 +1983,7 @@ PP(pp_left_shift) PP(pp_right_shift) { dVAR; dSP; dATARGET; SV *svl, *svr; - tryAMAGICbin_MG(rshift_amg, AMGf_assign); + tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric); svr = POPs; svl = TOPs; { @@ -1792,7 +2003,7 @@ PP(pp_right_shift) PP(pp_lt) { dVAR; dSP; - tryAMAGICbin_MG(lt_amg, AMGf_set); + tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -1875,7 +2086,7 @@ PP(pp_lt) PP(pp_gt) { dVAR; dSP; - tryAMAGICbin_MG(gt_amg, AMGf_set); + tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -1959,7 +2170,7 @@ PP(pp_gt) PP(pp_le) { dVAR; dSP; - tryAMAGICbin_MG(le_amg, AMGf_set); + tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -2043,7 +2254,7 @@ PP(pp_le) PP(pp_ge) { dVAR; dSP; - tryAMAGICbin_MG(ge_amg,AMGf_set); + tryAMAGICbin_MG(ge_amg,AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -2127,7 +2338,7 @@ PP(pp_ge) PP(pp_ne) { dVAR; dSP; - tryAMAGICbin_MG(ne_amg,AMGf_set); + tryAMAGICbin_MG(ne_amg,AMGf_set|AMGf_numeric); #ifndef NV_PRESERVES_UV if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { SP--; @@ -2204,7 +2415,7 @@ PP(pp_ne) PP(pp_ncmp) { dVAR; dSP; dTARGET; - tryAMAGICbin_MG(ncmp_amg, 0); + tryAMAGICbin_MG(ncmp_amg, AMGf_numeric); #ifndef NV_PRESERVES_UV if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { const UV right = PTR2UV(SvRV(POPs)); @@ -2337,8 +2548,8 @@ PP(pp_sle) { dPOPTOPssrl; const int cmp = (IN_LOCALE_RUNTIME - ? sv_cmp_locale(left, right) - : sv_cmp(left, right)); + ? sv_cmp_locale_flags(left, right, 0) + : sv_cmp_flags(left, right, 0)); SETs(boolSV(cmp * multiplier < rhs)); RETURN; } @@ -2350,7 +2561,7 @@ PP(pp_seq) tryAMAGICbin_MG(seq_amg, AMGf_set); { dPOPTOPssrl; - SETs(boolSV(sv_eq(left, right))); + SETs(boolSV(sv_eq_flags(left, right, 0))); RETURN; } } @@ -2361,7 +2572,7 @@ PP(pp_sne) tryAMAGICbin_MG(sne_amg, AMGf_set); { dPOPTOPssrl; - SETs(boolSV(!sv_eq(left, right))); + SETs(boolSV(!sv_eq_flags(left, right, 0))); RETURN; } } @@ -2373,8 +2584,8 @@ PP(pp_scmp) { dPOPTOPssrl; const int cmp = (IN_LOCALE_RUNTIME - ? sv_cmp_locale(left, right) - : sv_cmp(left, right)); + ? sv_cmp_locale_flags(left, right, 0) + : sv_cmp_flags(left, right, 0)); SETi( cmp ); RETURN; } @@ -2387,6 +2598,8 @@ PP(pp_bit_and) { dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { + const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); + const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); if (PL_op->op_private & HINT_INTEGER) { const IV i = SvIV_nomg(left) & SvIV_nomg(right); SETi(i); @@ -2395,6 +2608,8 @@ PP(pp_bit_and) const UV u = SvUV_nomg(left) & SvUV_nomg(right); SETu(u); } + if (left_ro_nonnum) SvNIOK_off(left); + if (right_ro_nonnum) SvNIOK_off(right); } else { do_vop(PL_op->op_type, TARG, left, right); @@ -2413,6 +2628,8 @@ PP(pp_bit_or) { dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { + const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); + const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); if (PL_op->op_private & HINT_INTEGER) { const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); const IV r = SvIV_nomg(right); @@ -2425,6 +2642,8 @@ PP(pp_bit_or) const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); SETu(result); } + if (left_ro_nonnum) SvNIOK_off(left); + if (right_ro_nonnum) SvNIOK_off(right); } else { do_vop(op_type, TARG, left, right); @@ -2441,6 +2660,11 @@ PP(pp_negate) { SV * const sv = TOPs; const int flags = SvFLAGS(sv); + + if( !SvNIOK( sv ) && looks_like_number( sv ) ){ + SvIV_please( sv ); + } + if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { /* It's publicly an integer, or privately an integer-not-float */ oops_its_an_int: @@ -2508,14 +2732,14 @@ PP(pp_not) { dVAR; dSP; tryAMAGICun_MG(not_amg, AMGf_set); - *PL_stack_sp = boolSV(!SvTRUE(*PL_stack_sp)); + *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp)); return NORMAL; } PP(pp_complement) { dVAR; dSP; dTARGET; - tryAMAGICun_MG(compl_amg, 0); + tryAMAGICun_MG(compl_amg, AMGf_numeric); { dTOPss; if (SvNIOKp(sv)) { @@ -2950,12 +3174,19 @@ PP(pp_rand) PP(pp_srand) { - dVAR; dSP; + dVAR; dSP; dTARGET; const UV anum = (MAXARG < 1) ? seed() : POPu; (void)seedDrand01((Rand_seed_t)anum); PL_srand_called = TRUE; - EXTEND(SP, 1); - RETPUSHYES; + if (anum) + XPUSHu(anum); + else { + /* Historically srand always returned true. We can avoid breaking + that like this: */ + sv_setpvs(TARG, "0 but true"); + XPUSHTARG; + } + RETURN; } PP(pp_int) @@ -3103,8 +3334,13 @@ PP(pp_length) = sv_2pv_flags(sv, &len, SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC); - if (!p) + if (!p) { + if (!SvPADTMP(TARG)) { + sv_setsv(TARG, &PL_sv_undef); + SETTARG; + } SETs(&PL_sv_undef); + } else if (DO_UTF8(sv)) { SETi(utf8_length((U8*)p, (U8*)p + len)); } @@ -3117,6 +3353,10 @@ PP(pp_length) else SETi(sv_len(sv)); } else { + if (!SvPADTMP(TARG)) { + sv_setsv_nomg(TARG, &PL_sv_undef); + SETTARG; + } SETs(&PL_sv_undef); } RETURN; @@ -3146,8 +3386,6 @@ PP(pp_substr) bool repl_need_utf8_upgrade = FALSE; bool repl_is_utf8 = FALSE; - SvTAINTED_off(TARG); /* decontaminate */ - SvUTF8_off(TARG); /* decontaminate */ if (num_args > 2) { if (num_args > 3) { repl_sv = POPs; @@ -3255,26 +3493,46 @@ PP(pp_substr) STRLEN byte_pos = utf8_curlen ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos; - tmps += byte_pos; - /* we either return a PV or an LV. If the TARG hasn't been used - * before, or is of that type, reuse it; otherwise use a mortal - * instead. Note that LVs can have an extended lifetime, so also - * dont reuse if refcount > 1 (bug #20933) */ - if (SvTYPE(TARG) > SVt_NULL) { - if ( (SvTYPE(TARG) == SVt_PVLV) - ? (!lvalue || SvREFCNT(TARG) > 1) - : lvalue) - { - TARG = sv_newmortal(); + if (lvalue && !repl) { + SV * ret; + + if (!SvGMAGICAL(sv)) { + if (SvROK(sv)) { + SvPV_force_nolen(sv); + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), + "Attempt to use reference as lvalue in substr"); + } + if (isGV_with_GP(sv)) + SvPV_force_nolen(sv); + else if (SvOK(sv)) /* is it defined ? */ + (void)SvPOK_only_UTF8(sv); + else + sv_setpvs(sv, ""); /* avoid lexical reincarnation */ } + + ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); + LvTYPE(ret) = 'x'; + LvTARG(ret) = SvREFCNT_inc_simple(sv); + LvTARGOFF(ret) = pos; + LvTARGLEN(ret) = len; + + SPAGAIN; + PUSHs(ret); /* avoid SvSETMAGIC here */ + RETURN; } + SvTAINTED_off(TARG); /* decontaminate */ + SvUTF8_off(TARG); /* decontaminate */ + + tmps += byte_pos; sv_setpvn(TARG, tmps, byte_len); #ifdef USE_LOCALE_COLLATE sv_unmagic(TARG, PERL_MAGIC_collxfrm); #endif if (utf8_curlen) SvUTF8_on(TARG); + if (repl) { SV* repl_sv_copy = NULL; @@ -3291,37 +3549,10 @@ PP(pp_substr) SvUTF8_on(sv); SvREFCNT_dec(repl_sv_copy); } - else if (lvalue) { /* it's an lvalue! */ - if (!SvGMAGICAL(sv)) { - if (SvROK(sv)) { - SvPV_force_nolen(sv); - Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), - "Attempt to use reference as lvalue in substr"); - } - if (isGV_with_GP(sv)) - SvPV_force_nolen(sv); - else if (SvOK(sv)) /* is it defined ? */ - (void)SvPOK_only_UTF8(sv); - else - sv_setpvs(sv, ""); /* avoid lexical reincarnation */ - } - - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, NULL, PERL_MAGIC_substr, NULL, 0); - } - - LvTYPE(TARG) = 'x'; - if (LvTARG(TARG) != sv) { - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc_simple(sv); - } - LvTARGOFF(TARG) = pos; - LvTARGLEN(TARG) = len; - } } SPAGAIN; - PUSHs(TARG); /* avoid SvSETMAGIC here */ + SvSETMAGIC(TARG); + PUSHs(TARG); RETURN; bound_fail: @@ -3333,31 +3564,29 @@ bound_fail: PP(pp_vec) { - dVAR; dSP; dTARGET; + dVAR; dSP; register const IV size = POPi; register const IV offset = POPi; register SV * const src = POPs; const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; + SV * ret; - SvTAINTED_off(TARG); /* decontaminate */ if (lvalue) { /* it's an lvalue! */ - if (SvREFCNT(TARG) > 1) /* don't share the TARG (#20933) */ - TARG = sv_newmortal(); - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, NULL, PERL_MAGIC_vec, NULL, 0); - } - LvTYPE(TARG) = 'v'; - if (LvTARG(TARG) != src) { - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc_simple(src); - } - LvTARGOFF(TARG) = offset; - LvTARGLEN(TARG) = size; + ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0); + LvTYPE(ret) = 'v'; + LvTARG(ret) = SvREFCNT_inc_simple(src); + LvTARGOFF(ret) = offset; + LvTARGLEN(ret) = size; + } + else { + dTARGET; + SvTAINTED_off(TARG); /* decontaminate */ + ret = TARG; } - sv_setuv(TARG, do_vecget(src, offset, size)); - PUSHs(TARG); + sv_setuv(ret, do_vecget(src, offset, size)); + PUSHs(ret); RETURN; } @@ -3484,8 +3713,6 @@ PP(pp_index) PP(pp_sprintf) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - if (SvTAINTED(MARK[1])) - TAINT_PROPER("sprintf"); SvTAINTED_off(TARG); do_sprintf(TARG, SP-MARK, MARK+1); TAINT_IF(SvTAINTED(TARG)); @@ -3624,12 +3851,6 @@ PP(pp_crypt) /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ -/* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max - * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF. - * See http://www.unicode.org/unicode/reports/tr16 */ -#define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */ -#define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */ - /* Below are several macros that generate code */ /* Generates code to store a unicode codepoint c that is known to occupy * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */ @@ -3774,7 +3995,7 @@ PP(pp_ucfirst) /* Convert the two source bytes to a single Unicode code point * value, change case and save for below */ - chr = UTF8_ACCUMULATE(*s, *(s+1)); + chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)); if (op_type == OP_LCFIRST) { /* lower casing is easy */ U8 lower = toLOWER_LATIN1(chr); STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower); @@ -3993,6 +4214,8 @@ PP(pp_ucfirst) SvCUR_set(dest, need - 1); } } + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4099,10 +4322,10 @@ PP(pp_uc) /* Likewise, if it fits in a byte, its case change is in our * table */ - U8 orig = UTF8_ACCUMULATE(*s, *(s+1)); + U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++); U8 upper = toUPPER_LATIN1_MOD(orig); CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper); - s += 2; + s++; } else { #else @@ -4263,6 +4486,8 @@ PP(pp_uc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } /* End of isn't utf8 */ + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4337,9 +4562,9 @@ PP(pp_lc) else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { /* As do the ones in the Latin1 range */ - U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1))); + U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++)); CAT_UNI_TO_UTF8_TWO_BYTE(d, lower); - s += 2; + s++; } else { #endif @@ -4485,6 +4710,8 @@ PP(pp_lc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4607,6 +4834,43 @@ PP(pp_aslice) RETURN; } +/* Smart dereferencing for keys, values and each */ +PP(pp_rkeys) +{ + dVAR; + dSP; + dPOPss; + + SvGETMAGIC(sv); + + if ( + !SvROK(sv) + || (sv = SvRV(sv), + (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV) + || SvOBJECT(sv) + ) + ) { + DIE(aTHX_ + "Type of argument to %s must be unblessed hashref or arrayref", + PL_op_desc[PL_op->op_type] ); + } + + if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV) + DIE(aTHX_ + "Can't modify %s in %s", + PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type] + ); + + /* Delegate to correct function for op type */ + PUSHs(sv); + if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) { + return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX); + } + else { + return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX); + } +} + PP(pp_aeach) { dVAR; @@ -4652,7 +4916,7 @@ PP(pp_akeys) EXTEND(SP, n + 1); - if (PL_op->op_type == OP_AKEYS) { + if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) { n += i; for (; i <= n; i++) { mPUSHi(i); @@ -5137,10 +5401,40 @@ PP(pp_anonhash) RETURN; } +static AV * +S_deref_plain_array(pTHX_ AV *ary) +{ + if (SvTYPE(ary) == SVt_PVAV) return ary; + SvGETMAGIC((SV *)ary); + if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV) + Perl_die(aTHX_ "Not an ARRAY reference"); + else if (SvOBJECT(SvRV(ary))) + Perl_die(aTHX_ "Not an unblessed ARRAY reference"); + return (AV *)SvRV(ary); +} + +#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) +# define DEREF_PLAIN_ARRAY(ary) \ + ({ \ + AV *aRrRay = ary; \ + SvTYPE(aRrRay) == SVt_PVAV \ + ? aRrRay \ + : S_deref_plain_array(aTHX_ aRrRay); \ + }) +#else +# define DEREF_PLAIN_ARRAY(ary) \ + ( \ + PL_Sv = (SV *)(ary), \ + SvTYPE(PL_Sv) == SVt_PVAV \ + ? (AV *)PL_Sv \ + : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \ + ) +#endif + PP(pp_splice) { dVAR; dSP; dMARK; dORIGMARK; - register AV *ary = MUTABLE_AV(*++MARK); + register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); register SV **src; register SV **dst; register I32 i; @@ -5152,14 +5446,9 @@ PP(pp_splice) const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { - *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); - PUSHMARK(MARK); - PUTBACK; - ENTER_with_name("call_SPLICE"); - call_method("SPLICE",GIMME_V); - LEAVE_with_name("call_SPLICE"); - SPAGAIN; - RETURN; + return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg, + GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, + sp - mark); } SP++; @@ -5226,7 +5515,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5318,7 +5607,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5337,6 +5626,10 @@ PP(pp_splice) *MARK = &PL_sv_undef; Safefree(tmparyval); } + + if (SvMAGICAL(ary)) + mg_set(MUTABLE_SV(ary)); + SP = MARK; RETURN; } @@ -5344,7 +5637,7 @@ PP(pp_splice) PP(pp_push) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - register AV * const ary = MUTABLE_AV(*++MARK); + register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { @@ -5381,7 +5674,7 @@ PP(pp_shift) dVAR; dSP; AV * const av = PL_op->op_flags & OPf_SPECIAL - ? MUTABLE_AV(GvAV(PL_defgv)) : MUTABLE_AV(POPs); + ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs)); SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av); EXTEND(SP, 1); assert (sv); @@ -5394,7 +5687,7 @@ PP(pp_shift) PP(pp_unshift) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - register AV *ary = MUTABLE_AV(*++MARK); + register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { @@ -5592,7 +5885,7 @@ PP(pp_split) DIE(aTHX_ "panic: pp_split"); rx = PM_GETRE(pm); - TAINT_IF((RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) && + TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5638,7 +5931,7 @@ PP(pp_split) while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); } - else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (isSPACE_LC(*s)) s++; } @@ -5647,7 +5940,7 @@ PP(pp_split) s++; } } - if (RX_EXTFLAGS(rx) & PMf_MULTILINE) { + if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) { multiline = 1; } @@ -5668,7 +5961,8 @@ PP(pp_split) else m += t; } - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (m < strend && !isSPACE_LC(*m)) ++m; } else { @@ -5700,7 +5994,8 @@ PP(pp_split) if (do_utf8) { while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) s += UTF8SKIP(s); - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (s < strend && isSPACE_LC(*s)) ++s; } else { @@ -6017,8 +6312,20 @@ PP(pp_lock) PP(unimplemented_op) { dVAR; - DIE(aTHX_ "panic: unimplemented op %s (#%d) called", OP_NAME(PL_op), - PL_op->op_type); + const Optype op_type = PL_op->op_type; + /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope + with out of range op numbers - it only "special" cases op_custom. + Secondly, as the three ops we "panic" on are padmy, mapstart and custom, + if we get here for a custom op then that means that the custom op didn't + have an implementation. Given that OP_NAME() looks up the custom op + by its pp_addr, likely it will return NULL, unless someone (unhelpfully) + registers &PL_unimplemented_op as the address of their custom op. + NULL doesn't generate a useful error message. "custom" does. */ + const char *const name = op_type >= OP_max + ? "[out of range]" : PL_op_name[PL_op->op_type]; + if(OP_IS_SOCKET(op_type)) + DIE(aTHX_ PL_no_sock_func, name); + DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); } PP(pp_boolkeys) @@ -6027,6 +6334,8 @@ PP(pp_boolkeys) dSP; HV * const hv = (HV*)POPs; + if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; } + if (SvRMAGICAL(hv)) { MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied); if (mg) { @@ -6035,7 +6344,7 @@ PP(pp_boolkeys) } } - XPUSHs(boolSV(HvKEYS(hv) != 0)); + XPUSHs(boolSV(HvUSEDKEYS(hv) != 0)); RETURN; }