X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/0824d66743a706cd268ace8fc9df03d7374c6886..226963cb8d530d52829bccdad84e085bd2f00cb7:/pp.c diff --git a/pp.c b/pp.c index 937fdfd..751a0bf 100644 --- a/pp.c +++ b/pp.c @@ -121,7 +121,7 @@ PP(pp_padhv) } gimme = GIMME_V; if (gimme == G_ARRAY) { - RETURNOP(do_kv()); + RETURNOP(Perl_do_kv(aTHX)); } else if (gimme == G_SCALAR) { SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG)); @@ -139,11 +139,13 @@ PP(pp_rv2gv) { dVAR; dSP; dTOPss; - SvGETMAGIC(sv); + if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv); if (SvROK(sv)) { wasref: - tryAMAGICunDEREF(to_gv); - + if (SvAMAGIC(sv)) { + sv = amagic_deref_call(sv, to_gv_amg); + SPAGAIN; + } sv = SvRV(sv); if (SvTYPE(sv) == SVt_PVIO) { GV * const gv = MUTABLE_GV(sv_newmortal()); @@ -162,7 +164,7 @@ PP(pp_rv2gv) * NI-S 1999/05/07 */ if (SvREADONLY(sv)) - Perl_croak(aTHX_ "%s", PL_no_modify); + Perl_croak_no_modify(aTHX); if (PL_op->op_private & OPpDEREF) { GV *gv; if (cUNOP->op_targ) { @@ -213,8 +215,16 @@ PP(pp_rv2gv) } sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV)); } + /* FAKE globs in the symbol table cause weird bugs (#77810) */ + if (sv) SvFAKE_off(sv); } } + if (sv && SvFAKE(sv)) { + SV *newsv = sv_newmortal(); + sv_setsv_flags(newsv, sv, 0); + SvFAKE_off(newsv); + sv = newsv; + } if (PL_op->op_private & OPpLVAL_INTRO) save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL)); SETs(sv); @@ -238,7 +248,10 @@ Perl_softref2xv(pTHX_ SV *const sv, const char *const what, Perl_die(aTHX_ PL_no_usym, what); } if (!SvOK(sv)) { - if (PL_op->op_flags & OPf_REF) + if ( + PL_op->op_flags & OPf_REF && + PL_op->op_next->op_type != OP_BOOLKEYS + ) Perl_die(aTHX_ PL_no_usym, what); if (ckWARN(WARN_UNINITIALIZED)) report_uninit(sv); @@ -275,7 +288,10 @@ PP(pp_rv2sv) if (!(PL_op->op_private & OPpDEREFed)) SvGETMAGIC(sv); if (SvROK(sv)) { - tryAMAGICunDEREF(to_sv); + if (SvAMAGIC(sv)) { + sv = amagic_deref_call(sv, to_sv_amg); + SPAGAIN; + } sv = SvRV(sv); switch (SvTYPE(sv)) { @@ -336,26 +352,21 @@ PP(pp_av2arylen) PP(pp_pos) { - dVAR; dSP; dTARGET; dPOPss; + dVAR; dSP; dPOPss; if (PL_op->op_flags & OPf_MOD || LVRET) { - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, NULL, PERL_MAGIC_pos, NULL, 0); - } - - LvTYPE(TARG) = '.'; - if (LvTARG(TARG) != sv) { - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc_simple(sv); - } - PUSHs(TARG); /* no SvSETMAGIC */ + SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0); + LvTYPE(ret) = '.'; + LvTARG(ret) = SvREFCNT_inc_simple(sv); + PUSHs(ret); /* no SvSETMAGIC */ RETURN; } else { if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global); if (mg && mg->mg_len >= 0) { + dTARGET; I32 i = mg->mg_len; if (DO_UTF8(sv)) sv_pos_b2u(sv, &i); @@ -426,7 +437,27 @@ PP(pp_prototype) goto set; } if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) { - ret = newSVpvs_flags("\\[@%]", SVs_TEMP); + ret = newSVpvs_flags("+", SVs_TEMP); + goto set; + } + if (code == -KEY_push || code == -KEY_unshift) { + ret = newSVpvs_flags("+@", SVs_TEMP); + goto set; + } + if (code == -KEY_pop || code == -KEY_shift) { + ret = newSVpvs_flags(";+", SVs_TEMP); + goto set; + } + if (code == -KEY_splice) { + ret = newSVpvs_flags("+;$$@", SVs_TEMP); + goto set; + } + if (code == -KEY_tied || code == -KEY_untie) { + ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP); + goto set; + } + if (code == -KEY_tie) { + ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP); goto set; } if (code == -KEY_readpipe) { @@ -752,16 +783,192 @@ PP(pp_trans) EXTEND(SP,1); } TARG = sv_newmortal(); - PUSHi(do_trans(sv)); + if(PL_op->op_type == OP_TRANSR) { + SV * const newsv = newSVsv(sv); + do_trans(newsv); + mPUSHs(newsv); + } + else PUSHi(do_trans(sv)); RETURN; } /* Lvalue operators. */ +static void +S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) +{ + dVAR; + STRLEN len; + char *s; + + PERL_ARGS_ASSERT_DO_CHOMP; + + if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs))) + return; + if (SvTYPE(sv) == SVt_PVAV) { + I32 i; + AV *const av = MUTABLE_AV(sv); + const I32 max = AvFILL(av); + + for (i = 0; i <= max; i++) { + sv = MUTABLE_SV(av_fetch(av, i, FALSE)); + if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) + do_chomp(retval, sv, chomping); + } + return; + } + else if (SvTYPE(sv) == SVt_PVHV) { + HV* const hv = MUTABLE_HV(sv); + HE* entry; + (void)hv_iterinit(hv); + while ((entry = hv_iternext(hv))) + do_chomp(retval, hv_iterval(hv,entry), chomping); + return; + } + else if (SvREADONLY(sv)) { + if (SvFAKE(sv)) { + /* SV is copy-on-write */ + sv_force_normal_flags(sv, 0); + } + if (SvREADONLY(sv)) + Perl_croak_no_modify(aTHX); + } + + if (PL_encoding) { + if (!SvUTF8(sv)) { + /* XXX, here sv is utf8-ized as a side-effect! + If encoding.pm is used properly, almost string-generating + operations, including literal strings, chr(), input data, etc. + should have been utf8-ized already, right? + */ + sv_recode_to_utf8(sv, PL_encoding); + } + } + + s = SvPV(sv, len); + if (chomping) { + char *temp_buffer = NULL; + SV *svrecode = NULL; + + if (s && len) { + s += --len; + if (RsPARA(PL_rs)) { + if (*s != '\n') + goto nope; + ++SvIVX(retval); + while (len && s[-1] == '\n') { + --len; + --s; + ++SvIVX(retval); + } + } + else { + STRLEN rslen, rs_charlen; + const char *rsptr = SvPV_const(PL_rs, rslen); + + rs_charlen = SvUTF8(PL_rs) + ? sv_len_utf8(PL_rs) + : rslen; + + if (SvUTF8(PL_rs) != SvUTF8(sv)) { + /* Assumption is that rs is shorter than the scalar. */ + if (SvUTF8(PL_rs)) { + /* RS is utf8, scalar is 8 bit. */ + bool is_utf8 = TRUE; + temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, + &rslen, &is_utf8); + if (is_utf8) { + /* Cannot downgrade, therefore cannot possibly match + */ + assert (temp_buffer == rsptr); + temp_buffer = NULL; + goto nope; + } + rsptr = temp_buffer; + } + else if (PL_encoding) { + /* RS is 8 bit, encoding.pm is used. + * Do not recode PL_rs as a side-effect. */ + svrecode = newSVpvn(rsptr, rslen); + sv_recode_to_utf8(svrecode, PL_encoding); + rsptr = SvPV_const(svrecode, rslen); + rs_charlen = sv_len_utf8(svrecode); + } + else { + /* RS is 8 bit, scalar is utf8. */ + temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); + rsptr = temp_buffer; + } + } + if (rslen == 1) { + if (*s != *rsptr) + goto nope; + ++SvIVX(retval); + } + else { + if (len < rslen - 1) + goto nope; + len -= rslen - 1; + s -= rslen - 1; + if (memNE(s, rsptr, rslen)) + goto nope; + SvIVX(retval) += rs_charlen; + } + } + s = SvPV_force_nolen(sv); + SvCUR_set(sv, len); + *SvEND(sv) = '\0'; + SvNIOK_off(sv); + SvSETMAGIC(sv); + } + nope: + + SvREFCNT_dec(svrecode); + + Safefree(temp_buffer); + } else { + if (len && !SvPOK(sv)) + s = SvPV_force_nomg(sv, len); + if (DO_UTF8(sv)) { + if (s && len) { + char * const send = s + len; + char * const start = s; + s = send - 1; + while (s > start && UTF8_IS_CONTINUATION(*s)) + s--; + if (is_utf8_string((U8*)s, send - s)) { + sv_setpvn(retval, s, send - s); + *s = '\0'; + SvCUR_set(sv, s - start); + SvNIOK_off(sv); + SvUTF8_on(retval); + } + } + else + sv_setpvs(retval, ""); + } + else if (s && len) { + s += --len; + sv_setpvn(retval, s, 1); + *s = '\0'; + SvCUR_set(sv, len); + SvUTF8_off(sv); + SvNIOK_off(sv); + } + else + sv_setpvs(retval, ""); + SvSETMAGIC(sv); + } +} + PP(pp_schop) { dVAR; dSP; dTARGET; - do_chop(TARG, TOPs); + const bool chomping = PL_op->op_type == OP_SCHOMP; + + if (chomping) + sv_setiv(TARG, 0); + do_chomp(TARG, TOPs, chomping); SETTARG; RETURN; } @@ -769,31 +976,17 @@ PP(pp_schop) PP(pp_chop) { dVAR; dSP; dMARK; dTARGET; dORIGMARK; + const bool chomping = PL_op->op_type == OP_CHOMP; + + if (chomping) + sv_setiv(TARG, 0); while (MARK < SP) - do_chop(TARG, *++MARK); + do_chomp(TARG, *++MARK, chomping); SP = ORIGMARK; XPUSHTARG; RETURN; } -PP(pp_schomp) -{ - dVAR; dSP; dTARGET; - SETi(do_chomp(TOPs)); - RETURN; -} - -PP(pp_chomp) -{ - dVAR; dSP; dMARK; dTARGET; - register I32 count = 0; - - while (SP > MARK) - count += do_chomp(POPs); - XPUSHi(count); - RETURN; -} - PP(pp_undef) { dVAR; dSP; @@ -830,7 +1023,7 @@ PP(pp_undef) /* let user-undef'd sub keep its identity */ GV* const gv = CvGV((const CV *)sv); cv_undef(MUTABLE_CV(sv)); - CvGV((const CV *)sv) = gv; + CvGV_set(MUTABLE_CV(sv), gv); } break; case SVt_PVGV: @@ -842,21 +1035,38 @@ PP(pp_undef) GP *gp; HV *stash; - /* undef *Foo:: */ - if((stash = GvHV((const GV *)sv)) && HvNAME_get(stash)) - mro_isa_changed_in(stash); /* undef *Pkg::meth_name ... */ - else if(GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv)) - && HvNAME_get(stash)) - mro_method_changed_in(stash); + bool method_changed + = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv)) + && HvENAME_get(stash); + /* undef *Foo:: */ + if((stash = GvHV((const GV *)sv))) { + if(HvENAME_get(stash)) + SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash)); + else stash = NULL; + } gp_free(MUTABLE_GV(sv)); Newxz(gp, 1, GP); - GvGP(sv) = gp_ref(gp); + GvGP_set(sv, gp_ref(gp)); GvSV(sv) = newSV(0); GvLINE(sv) = CopLINE(PL_curcop); GvEGV(sv) = MUTABLE_GV(sv); GvMULTI_on(sv); + + if(stash) + mro_package_moved(NULL, stash, (const GV *)sv, 0); + stash = NULL; + /* undef *Foo::ISA */ + if( strEQ(GvNAME((const GV *)sv), "ISA") + && (stash = GvSTASH((const GV *)sv)) + && (method_changed || HvENAME(stash)) ) + mro_isa_changed_in(stash); + else if(method_changed) + mro_method_changed_in( + GvSTASH((const GV *)sv) + ); + break; } /* FALL THROUGH */ @@ -877,7 +1087,7 @@ PP(pp_predec) { dVAR; dSP; if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs)) - DIE(aTHX_ "%s", PL_no_modify); + Perl_croak_no_modify(aTHX); if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MIN) { @@ -894,7 +1104,9 @@ PP(pp_postinc) { dVAR; dSP; dTARGET; if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs)) - DIE(aTHX_ "%s", PL_no_modify); + Perl_croak_no_modify(aTHX); + if (SvROK(TOPs)) + TARG = sv_newmortal(); sv_setsv(TARG, TOPs); if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MAX) @@ -916,7 +1128,9 @@ PP(pp_postdec) { dVAR; dSP; dTARGET; if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs)) - DIE(aTHX_ "%s", PL_no_modify); + Perl_croak_no_modify(aTHX); + if (SvROK(TOPs)) + TARG = sv_newmortal(); sv_setsv(TARG, TOPs); if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MIN) @@ -1284,7 +1498,7 @@ PP(pp_divide) warning before dieing, hence this test goes here. If it were immediately before the second SvIV_please, then DIE() would be invoked before left was even inspected, so - no inpsection would give no warning. */ + no inspection would give no warning. */ if (right == 0) DIE(aTHX_ "Illegal division by zero"); @@ -1744,7 +1958,7 @@ PP(pp_subtract) PP(pp_left_shift) { dVAR; dSP; dATARGET; SV *svl, *svr; - tryAMAGICbin_MG(lshift_amg, AMGf_assign); + tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric); svr = POPs; svl = TOPs; { @@ -1764,7 +1978,7 @@ PP(pp_left_shift) PP(pp_right_shift) { dVAR; dSP; dATARGET; SV *svl, *svr; - tryAMAGICbin_MG(rshift_amg, AMGf_assign); + tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric); svr = POPs; svl = TOPs; { @@ -1784,7 +1998,7 @@ PP(pp_right_shift) PP(pp_lt) { dVAR; dSP; - tryAMAGICbin_MG(lt_amg, AMGf_set); + tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -1867,7 +2081,7 @@ PP(pp_lt) PP(pp_gt) { dVAR; dSP; - tryAMAGICbin_MG(gt_amg, AMGf_set); + tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -1951,7 +2165,7 @@ PP(pp_gt) PP(pp_le) { dVAR; dSP; - tryAMAGICbin_MG(le_amg, AMGf_set); + tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -2035,7 +2249,7 @@ PP(pp_le) PP(pp_ge) { dVAR; dSP; - tryAMAGICbin_MG(ge_amg,AMGf_set); + tryAMAGICbin_MG(ge_amg,AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -2119,7 +2333,7 @@ PP(pp_ge) PP(pp_ne) { dVAR; dSP; - tryAMAGICbin_MG(ne_amg,AMGf_set); + tryAMAGICbin_MG(ne_amg,AMGf_set|AMGf_numeric); #ifndef NV_PRESERVES_UV if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { SP--; @@ -2196,7 +2410,7 @@ PP(pp_ne) PP(pp_ncmp) { dVAR; dSP; dTARGET; - tryAMAGICbin_MG(ncmp_amg, 0); + tryAMAGICbin_MG(ncmp_amg, AMGf_numeric); #ifndef NV_PRESERVES_UV if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { const UV right = PTR2UV(SvRV(POPs)); @@ -2329,8 +2543,8 @@ PP(pp_sle) { dPOPTOPssrl; const int cmp = (IN_LOCALE_RUNTIME - ? sv_cmp_locale(left, right) - : sv_cmp(left, right)); + ? sv_cmp_locale_flags(left, right, 0) + : sv_cmp_flags(left, right, 0)); SETs(boolSV(cmp * multiplier < rhs)); RETURN; } @@ -2342,7 +2556,7 @@ PP(pp_seq) tryAMAGICbin_MG(seq_amg, AMGf_set); { dPOPTOPssrl; - SETs(boolSV(sv_eq(left, right))); + SETs(boolSV(sv_eq_flags(left, right, 0))); RETURN; } } @@ -2353,7 +2567,7 @@ PP(pp_sne) tryAMAGICbin_MG(sne_amg, AMGf_set); { dPOPTOPssrl; - SETs(boolSV(!sv_eq(left, right))); + SETs(boolSV(!sv_eq_flags(left, right, 0))); RETURN; } } @@ -2365,8 +2579,8 @@ PP(pp_scmp) { dPOPTOPssrl; const int cmp = (IN_LOCALE_RUNTIME - ? sv_cmp_locale(left, right) - : sv_cmp(left, right)); + ? sv_cmp_locale_flags(left, right, 0) + : sv_cmp_flags(left, right, 0)); SETi( cmp ); RETURN; } @@ -2379,6 +2593,8 @@ PP(pp_bit_and) { dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { + const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); + const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); if (PL_op->op_private & HINT_INTEGER) { const IV i = SvIV_nomg(left) & SvIV_nomg(right); SETi(i); @@ -2387,6 +2603,8 @@ PP(pp_bit_and) const UV u = SvUV_nomg(left) & SvUV_nomg(right); SETu(u); } + if (left_ro_nonnum) SvNIOK_off(left); + if (right_ro_nonnum) SvNIOK_off(right); } else { do_vop(PL_op->op_type, TARG, left, right); @@ -2405,6 +2623,8 @@ PP(pp_bit_or) { dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { + const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); + const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); if (PL_op->op_private & HINT_INTEGER) { const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); const IV r = SvIV_nomg(right); @@ -2417,6 +2637,8 @@ PP(pp_bit_or) const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); SETu(result); } + if (left_ro_nonnum) SvNIOK_off(left); + if (right_ro_nonnum) SvNIOK_off(right); } else { do_vop(op_type, TARG, left, right); @@ -2433,6 +2655,11 @@ PP(pp_negate) { SV * const sv = TOPs; const int flags = SvFLAGS(sv); + + if( !SvNIOK( sv ) && looks_like_number( sv ) ){ + SvIV_please( sv ); + } + if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { /* It's publicly an integer, or privately an integer-not-float */ oops_its_an_int: @@ -2500,14 +2727,14 @@ PP(pp_not) { dVAR; dSP; tryAMAGICun_MG(not_amg, AMGf_set); - *PL_stack_sp = boolSV(!SvTRUE(*PL_stack_sp)); + *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp)); return NORMAL; } PP(pp_complement) { dVAR; dSP; dTARGET; - tryAMAGICun_MG(compl_amg, 0); + tryAMAGICun_MG(compl_amg, AMGf_numeric); { dTOPss; if (SvNIOKp(sv)) { @@ -2942,12 +3169,19 @@ PP(pp_rand) PP(pp_srand) { - dVAR; dSP; + dVAR; dSP; dTARGET; const UV anum = (MAXARG < 1) ? seed() : POPu; (void)seedDrand01((Rand_seed_t)anum); PL_srand_called = TRUE; - EXTEND(SP, 1); - RETPUSHYES; + if (anum) + XPUSHu(anum); + else { + /* Historically srand always returned true. We can avoid breaking + that like this: */ + sv_setpvs(TARG, "0 but true"); + XPUSHTARG; + } + RETURN; } PP(pp_int) @@ -3059,11 +3293,11 @@ PP(pp_oct) tmps++, len--; if (*tmps == '0') tmps++, len--; - if (*tmps == 'x') { + if (*tmps == 'x' || *tmps == 'X') { hex: result_uv = grok_hex (tmps, &len, &flags, &result_nv); } - else if (*tmps == 'b') + else if (*tmps == 'b' || *tmps == 'B') result_uv = grok_bin (tmps, &len, &flags, &result_nv); else result_uv = grok_oct (tmps, &len, &flags, &result_nv); @@ -3095,8 +3329,13 @@ PP(pp_length) = sv_2pv_flags(sv, &len, SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC); - if (!p) + if (!p) { + if (!SvPADTMP(TARG)) { + sv_setsv(TARG, &PL_sv_undef); + SETTARG; + } SETs(&PL_sv_undef); + } else if (DO_UTF8(sv)) { SETi(utf8_length((U8*)p, (U8*)p + len)); } @@ -3109,6 +3348,10 @@ PP(pp_length) else SETi(sv_len(sv)); } else { + if (!SvPADTMP(TARG)) { + sv_setsv_nomg(TARG, &PL_sv_undef); + SETTARG; + } SETs(&PL_sv_undef); } RETURN; @@ -3138,8 +3381,6 @@ PP(pp_substr) bool repl_need_utf8_upgrade = FALSE; bool repl_is_utf8 = FALSE; - SvTAINTED_off(TARG); /* decontaminate */ - SvUTF8_off(TARG); /* decontaminate */ if (num_args > 2) { if (num_args > 3) { repl_sv = POPs; @@ -3247,26 +3488,46 @@ PP(pp_substr) STRLEN byte_pos = utf8_curlen ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos; - tmps += byte_pos; - /* we either return a PV or an LV. If the TARG hasn't been used - * before, or is of that type, reuse it; otherwise use a mortal - * instead. Note that LVs can have an extended lifetime, so also - * dont reuse if refcount > 1 (bug #20933) */ - if (SvTYPE(TARG) > SVt_NULL) { - if ( (SvTYPE(TARG) == SVt_PVLV) - ? (!lvalue || SvREFCNT(TARG) > 1) - : lvalue) - { - TARG = sv_newmortal(); + if (lvalue && !repl) { + SV * ret; + + if (!SvGMAGICAL(sv)) { + if (SvROK(sv)) { + SvPV_force_nolen(sv); + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), + "Attempt to use reference as lvalue in substr"); + } + if (isGV_with_GP(sv)) + SvPV_force_nolen(sv); + else if (SvOK(sv)) /* is it defined ? */ + (void)SvPOK_only_UTF8(sv); + else + sv_setpvs(sv, ""); /* avoid lexical reincarnation */ } + + ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); + LvTYPE(ret) = 'x'; + LvTARG(ret) = SvREFCNT_inc_simple(sv); + LvTARGOFF(ret) = pos; + LvTARGLEN(ret) = len; + + SPAGAIN; + PUSHs(ret); /* avoid SvSETMAGIC here */ + RETURN; } + SvTAINTED_off(TARG); /* decontaminate */ + SvUTF8_off(TARG); /* decontaminate */ + + tmps += byte_pos; sv_setpvn(TARG, tmps, byte_len); #ifdef USE_LOCALE_COLLATE sv_unmagic(TARG, PERL_MAGIC_collxfrm); #endif if (utf8_curlen) SvUTF8_on(TARG); + if (repl) { SV* repl_sv_copy = NULL; @@ -3283,37 +3544,10 @@ PP(pp_substr) SvUTF8_on(sv); SvREFCNT_dec(repl_sv_copy); } - else if (lvalue) { /* it's an lvalue! */ - if (!SvGMAGICAL(sv)) { - if (SvROK(sv)) { - SvPV_force_nolen(sv); - Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), - "Attempt to use reference as lvalue in substr"); - } - if (isGV_with_GP(sv)) - SvPV_force_nolen(sv); - else if (SvOK(sv)) /* is it defined ? */ - (void)SvPOK_only_UTF8(sv); - else - sv_setpvs(sv, ""); /* avoid lexical reincarnation */ - } - - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, NULL, PERL_MAGIC_substr, NULL, 0); - } - - LvTYPE(TARG) = 'x'; - if (LvTARG(TARG) != sv) { - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc_simple(sv); - } - LvTARGOFF(TARG) = pos; - LvTARGLEN(TARG) = len; - } } SPAGAIN; - PUSHs(TARG); /* avoid SvSETMAGIC here */ + SvSETMAGIC(TARG); + PUSHs(TARG); RETURN; bound_fail: @@ -3325,31 +3559,29 @@ bound_fail: PP(pp_vec) { - dVAR; dSP; dTARGET; + dVAR; dSP; register const IV size = POPi; register const IV offset = POPi; register SV * const src = POPs; const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; + SV * ret; - SvTAINTED_off(TARG); /* decontaminate */ if (lvalue) { /* it's an lvalue! */ - if (SvREFCNT(TARG) > 1) /* don't share the TARG (#20933) */ - TARG = sv_newmortal(); - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, NULL, PERL_MAGIC_vec, NULL, 0); - } - LvTYPE(TARG) = 'v'; - if (LvTARG(TARG) != src) { - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc_simple(src); - } - LvTARGOFF(TARG) = offset; - LvTARGLEN(TARG) = size; + ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0); + LvTYPE(ret) = 'v'; + LvTARG(ret) = SvREFCNT_inc_simple(src); + LvTARGOFF(ret) = offset; + LvTARGLEN(ret) = size; + } + else { + dTARGET; + SvTAINTED_off(TARG); /* decontaminate */ + ret = TARG; } - sv_setuv(TARG, do_vecget(src, offset, size)); - PUSHs(TARG); + sv_setuv(ret, do_vecget(src, offset, size)); + PUSHs(ret); RETURN; } @@ -3476,8 +3708,6 @@ PP(pp_index) PP(pp_sprintf) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - if (SvTAINTED(MARK[1])) - TAINT_PROPER("sprintf"); SvTAINTED_off(TARG); do_sprintf(TARG, SP-MARK, MARK+1); TAINT_IF(SvTAINTED(TARG)); @@ -3610,19 +3840,12 @@ PP(pp_crypt) #else DIE(aTHX_ "The crypt() function is unimplemented due to excessive paranoia."); - return NORMAL; #endif } /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ -/* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max - * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF. - * See http://www.unicode.org/unicode/reports/tr16 */ -#define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */ -#define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */ - /* Below are several macros that generate code */ /* Generates code to store a unicode codepoint c that is known to occupy * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */ @@ -3767,7 +3990,7 @@ PP(pp_ucfirst) /* Convert the two source bytes to a single Unicode code point * value, change case and save for below */ - chr = UTF8_ACCUMULATE(*s, *(s+1)); + chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)); if (op_type == OP_LCFIRST) { /* lower casing is easy */ U8 lower = toLOWER_LATIN1(chr); STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower); @@ -4056,18 +4279,19 @@ PP(pp_uc) const U8 *const send = s + len; U8 tmpbuf[UTF8_MAXBYTES+1]; -/* This is ifdefd out because it needs more work and thought. It isn't clear - * that we should do it. These are hard-coded rules from the Unicode standard, - * and may change. 5.2 gives new guidance on the iota subscript, for example, - * which has not been checked against this; and secondly it may be that we are - * passed a subset of the context, via a \U...\E, for example, and its not - * clear what the best approach is to that */ -#ifdef CONTEXT_DEPENDENT_CASING + /* All occurrences of these are to be moved to follow any other marks. + * This is context-dependent. We may not be passed enough context to + * move the iota subscript beyond all of them, but we do the best we can + * with what we're given. The result is always better than if we + * hadn't done this. And, the problem would only arise if we are + * passed a character without all its combining marks, which would be + * the caller's mistake. The information this is based on comes from a + * comment in Unicode SpecialCasing.txt, (and the Standard's text + * itself) and so can't be checked properly to see if it ever gets + * revised. But the likelihood of it changing is remote */ bool in_iota_subscript = FALSE; -#endif while (s < send) { -#ifdef CONTEXT_DEPENDENT_CASING if (in_iota_subscript && ! is_utf8_mark(s)) { /* A non-mark. Time to output the iota subscript */ #define GREEK_CAPITAL_LETTER_IOTA 0x0399 @@ -4076,7 +4300,6 @@ PP(pp_uc) CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); in_iota_subscript = FALSE; } -#endif /* See comments at the first instance in this file of this ifdef */ @@ -4092,10 +4315,10 @@ PP(pp_uc) /* Likewise, if it fits in a byte, its case change is in our * table */ - U8 orig = UTF8_ACCUMULATE(*s, *(s+1)); + U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++); U8 upper = toUPPER_LATIN1_MOD(orig); CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper); - s += 2; + s++; } else { #else @@ -4108,15 +4331,13 @@ PP(pp_uc) const STRLEN u = UTF8SKIP(s); STRLEN ulen; -#ifndef CONTEXT_DEPENDENT_CASING - toUPPER_utf8(s, tmpbuf, &ulen); -#else const UV uv = toUPPER_utf8(s, tmpbuf, &ulen); - if (uv == GREEK_CAPITAL_LETTER_IOTA && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI) { + if (uv == GREEK_CAPITAL_LETTER_IOTA + && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI) + { in_iota_subscript = TRUE; } else { -#endif if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { /* If the eventually required minimum size outgrows * the available space, we need to grow. */ @@ -4125,26 +4346,25 @@ PP(pp_uc) /* If someone uppercases one million U+03B0s we * SvGROW() one million times. Or we could try * guessing how much to allocate without allocating too - * much. Such is life. See corresponding comment in lc code - * for another option */ + * much. Such is life. See corresponding comment in + * lc code for another option */ SvGROW(dest, min); d = (U8*)SvPVX(dest) + o; } Copy(tmpbuf, d, ulen, U8); d += ulen; -#ifdef CONTEXT_DEPENDENT_CASING } -#endif s += u; } } -#ifdef CONTEXT_DEPENDENT_CASING - if (in_iota_subscript) CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); -#endif + if (in_iota_subscript) { + CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); + } SvUTF8_on(dest); *d = '\0'; SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); - } else { /* Not UTF-8 */ + } + else { /* Not UTF-8 */ if (len) { const U8 *const send = s + len; @@ -4333,9 +4553,9 @@ PP(pp_lc) else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { /* As do the ones in the Latin1 range */ - U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1))); + U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++)); CAT_UNI_TO_UTF8_TWO_BYTE(d, lower); - s += 2; + s++; } else { #endif @@ -4345,12 +4565,23 @@ PP(pp_lc) const STRLEN u = UTF8SKIP(s); STRLEN ulen; -/* See comments at the first instance in this file of this ifdef */ #ifndef CONTEXT_DEPENDENT_CASING toLOWER_utf8(s, tmpbuf, &ulen); #else - /* Here is context dependent casing, not compiled in currently; - * needs more thought and work */ +/* This is ifdefd out because it needs more work and thought. It isn't clear + * that we should do it. + * A minor objection is that this is based on a hard-coded rule from the + * Unicode standard, and may change, but this is not very likely at all. + * mktables should check and warn if it does. + * More importantly, if the sigma occurs at the end of the string, we don't + * have enough context to know whether it is part of a larger string or going + * to be or not. It may be that we are passed a subset of the context, via + * a \U...\E, for example, and we could conceivably know the larger context if + * code were changed to pass that in. But, if the string passed in is an + * intermediate result, and the user concatenates two strings together + * after we have made a final sigma, that would be wrong. If the final sigma + * occurs in the middle of the string we are working on, then we know that it + * should be a final sigma, but otherwise we can't be sure. */ const UV uv = toLOWER_utf8(s, tmpbuf, &ulen); @@ -4592,6 +4823,71 @@ PP(pp_aslice) RETURN; } +/* Smart dereferencing for keys, values and each */ +PP(pp_rkeys) +{ + dVAR; + dSP; + dPOPss; + + if (!SvOK(sv)) + RETURN; + + if (SvROK(sv)) { + SvGETMAGIC(sv); + if (SvAMAGIC(sv)) { + /* N.B.: AMG macros return sv if no overloading is found */ + SV *maybe_hv = AMG_CALLunary(sv, to_hv_amg); + SV *maybe_av = AMG_CALLunary(sv, to_av_amg); + if ( maybe_hv != sv && maybe_av != sv ) { + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s", + Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as %%{}", + PL_op_desc[PL_op->op_type] + ) + ); + sv = maybe_hv; + } + else if ( maybe_av != sv ) { + if ( SvTYPE(SvRV(sv)) == SVt_PVHV ) { + /* @{} overload, but underlying reftype is HV */ + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s", + Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as @{}", + PL_op_desc[PL_op->op_type] + ) + ); + } + sv = maybe_av; + } + else if ( maybe_hv != sv ) { + if ( SvTYPE(SvRV(sv)) == SVt_PVAV ) { + /* %{} overload, but underlying reftype is AV */ + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s", + Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as %%{}", + PL_op_desc[PL_op->op_type] + ) + ); + } + sv = maybe_hv; + } + } + sv = SvRV(sv); + } + + if ( SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV ) { + DIE(aTHX_ "Type of argument to %s must be hashref or arrayref", + PL_op_desc[PL_op->op_type] ); + } + + /* Delegate to correct function for op type */ + PUSHs(sv); + if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) { + return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX); + } + else { + return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX); + } +} + PP(pp_aeach) { dVAR; @@ -4637,7 +4933,7 @@ PP(pp_akeys) EXTEND(SP, n + 1); - if (PL_op->op_type == OP_AKEYS) { + if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) { n += i; for (; i <= n; i++) { mPUSHi(i); @@ -5137,14 +5433,9 @@ PP(pp_splice) const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { - *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); - PUSHMARK(MARK); - PUTBACK; - ENTER_with_name("call_SPLICE"); - call_method("SPLICE",GIMME_V); - LEAVE_with_name("call_SPLICE"); - SPAGAIN; - RETURN; + return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg, + GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, + sp - mark); } SP++; @@ -5211,7 +5502,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5303,7 +5594,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5322,6 +5613,10 @@ PP(pp_splice) *MARK = &PL_sv_undef; Safefree(tmparyval); } + + if (SvMAGICAL(ary)) + mg_set(MUTABLE_SV(ary)); + SP = MARK; RETURN; } @@ -5349,7 +5644,7 @@ PP(pp_push) sv_setsv(sv, *MARK); av_store(ary, AvFILLp(ary)+1, sv); } - if (PL_delaymagic & DM_ARRAY) + if (PL_delaymagic & DM_ARRAY_ISA) mg_set(MUTABLE_SV(ary)); PL_delaymagic = 0; @@ -5489,19 +5784,12 @@ PP(pp_reverse) register I32 tmp; dTARGET; STRLEN len; - PADOFFSET padoff_du; SvUTF8_off(TARG); /* decontaminate */ if (SP - MARK > 1) do_join(TARG, &PL_sv_no, MARK, SP); else { - sv_setsv(TARG, (SP > MARK) - ? *SP - : (padoff_du = find_rundefsvoffset(), - (padoff_du == NOT_IN_PAD - || PAD_COMPNAME_FLAGS_isOUR(padoff_du)) - ? DEFSV : PAD_SVl(padoff_du))); - + sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv()); if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED)) report_uninit(TARG); } @@ -5584,7 +5872,7 @@ PP(pp_split) DIE(aTHX_ "panic: pp_split"); rx = PM_GETRE(pm); - TAINT_IF((RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) && + TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5630,7 +5918,7 @@ PP(pp_split) while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); } - else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (isSPACE_LC(*s)) s++; } @@ -5639,7 +5927,7 @@ PP(pp_split) s++; } } - if (RX_EXTFLAGS(rx) & PMf_MULTILINE) { + if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) { multiline = 1; } @@ -5660,7 +5948,8 @@ PP(pp_split) else m += t; } - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (m < strend && !isSPACE_LC(*m)) ++m; } else { @@ -5692,7 +5981,8 @@ PP(pp_split) if (do_utf8) { while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) s += UTF8SKIP(s); - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (s < strend && isSPACE_LC(*s)) ++s; } else { @@ -6009,9 +6299,20 @@ PP(pp_lock) PP(unimplemented_op) { dVAR; - DIE(aTHX_ "panic: unimplemented op %s (#%d) called", OP_NAME(PL_op), - PL_op->op_type); - return NORMAL; + const Optype op_type = PL_op->op_type; + /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope + with out of range op numbers - it only "special" cases op_custom. + Secondly, as the three ops we "panic" on are padmy, mapstart and custom, + if we get here for a custom op then that means that the custom op didn't + have an implementation. Given that OP_NAME() looks up the custom op + by its pp_addr, likely it will return NULL, unless someone (unhelpfully) + registers &PL_unimplemented_op as the address of their custom op. + NULL doesn't generate a useful error message. "custom" does. */ + const char *const name = op_type >= OP_max + ? "[out of range]" : PL_op_name[PL_op->op_type]; + if(OP_IS_SOCKET(op_type)) + DIE(aTHX_ PL_no_sock_func, name); + DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); } PP(pp_boolkeys) @@ -6020,6 +6321,8 @@ PP(pp_boolkeys) dSP; HV * const hv = (HV*)POPs; + if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; } + if (SvRMAGICAL(hv)) { MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied); if (mg) {