X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/d8906c052fcf764bce3ec89556a62718884c9ac4..577914ca141fdfb73ca33f7a427a69f89de59840:/pp.c diff --git a/pp.c b/pp.c index 9a74280..899f2b8 100644 --- a/pp.c +++ b/pp.c @@ -121,7 +121,7 @@ PP(pp_padhv) } gimme = GIMME_V; if (gimme == G_ARRAY) { - RETURNOP(do_kv()); + RETURNOP(Perl_do_kv(aTHX)); } else if (gimme == G_SCALAR) { SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG)); @@ -142,9 +142,10 @@ PP(pp_rv2gv) if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv); if (SvROK(sv)) { wasref: - sv = amagic_deref_call(sv, to_gv_amg); - SPAGAIN; - + if (SvAMAGIC(sv)) { + sv = amagic_deref_call(sv, to_gv_amg); + SPAGAIN; + } sv = SvRV(sv); if (SvTYPE(sv) == SVt_PVIO) { GV * const gv = MUTABLE_GV(sv_newmortal()); @@ -247,7 +248,10 @@ Perl_softref2xv(pTHX_ SV *const sv, const char *const what, Perl_die(aTHX_ PL_no_usym, what); } if (!SvOK(sv)) { - if (PL_op->op_flags & OPf_REF) + if ( + PL_op->op_flags & OPf_REF && + PL_op->op_next->op_type != OP_BOOLKEYS + ) Perl_die(aTHX_ PL_no_usym, what); if (ckWARN(WARN_UNINITIALIZED)) report_uninit(sv); @@ -284,8 +288,10 @@ PP(pp_rv2sv) if (!(PL_op->op_private & OPpDEREFed)) SvGETMAGIC(sv); if (SvROK(sv)) { - sv = amagic_deref_call(sv, to_sv_amg); - SPAGAIN; + if (SvAMAGIC(sv)) { + sv = amagic_deref_call(sv, to_sv_amg); + SPAGAIN; + } sv = SvRV(sv); switch (SvTYPE(sv)) { @@ -788,10 +794,181 @@ PP(pp_trans) /* Lvalue operators. */ +static void +S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) +{ + dVAR; + STRLEN len; + char *s; + + PERL_ARGS_ASSERT_DO_CHOMP; + + if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs))) + return; + if (SvTYPE(sv) == SVt_PVAV) { + I32 i; + AV *const av = MUTABLE_AV(sv); + const I32 max = AvFILL(av); + + for (i = 0; i <= max; i++) { + sv = MUTABLE_SV(av_fetch(av, i, FALSE)); + if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) + do_chomp(retval, sv, chomping); + } + return; + } + else if (SvTYPE(sv) == SVt_PVHV) { + HV* const hv = MUTABLE_HV(sv); + HE* entry; + (void)hv_iterinit(hv); + while ((entry = hv_iternext(hv))) + do_chomp(retval, hv_iterval(hv,entry), chomping); + return; + } + else if (SvREADONLY(sv)) { + if (SvFAKE(sv)) { + /* SV is copy-on-write */ + sv_force_normal_flags(sv, 0); + } + if (SvREADONLY(sv)) + Perl_croak_no_modify(aTHX); + } + + if (PL_encoding) { + if (!SvUTF8(sv)) { + /* XXX, here sv is utf8-ized as a side-effect! + If encoding.pm is used properly, almost string-generating + operations, including literal strings, chr(), input data, etc. + should have been utf8-ized already, right? + */ + sv_recode_to_utf8(sv, PL_encoding); + } + } + + s = SvPV(sv, len); + if (chomping) { + char *temp_buffer = NULL; + SV *svrecode = NULL; + + if (s && len) { + s += --len; + if (RsPARA(PL_rs)) { + if (*s != '\n') + goto nope; + ++SvIVX(retval); + while (len && s[-1] == '\n') { + --len; + --s; + ++SvIVX(retval); + } + } + else { + STRLEN rslen, rs_charlen; + const char *rsptr = SvPV_const(PL_rs, rslen); + + rs_charlen = SvUTF8(PL_rs) + ? sv_len_utf8(PL_rs) + : rslen; + + if (SvUTF8(PL_rs) != SvUTF8(sv)) { + /* Assumption is that rs is shorter than the scalar. */ + if (SvUTF8(PL_rs)) { + /* RS is utf8, scalar is 8 bit. */ + bool is_utf8 = TRUE; + temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, + &rslen, &is_utf8); + if (is_utf8) { + /* Cannot downgrade, therefore cannot possibly match + */ + assert (temp_buffer == rsptr); + temp_buffer = NULL; + goto nope; + } + rsptr = temp_buffer; + } + else if (PL_encoding) { + /* RS is 8 bit, encoding.pm is used. + * Do not recode PL_rs as a side-effect. */ + svrecode = newSVpvn(rsptr, rslen); + sv_recode_to_utf8(svrecode, PL_encoding); + rsptr = SvPV_const(svrecode, rslen); + rs_charlen = sv_len_utf8(svrecode); + } + else { + /* RS is 8 bit, scalar is utf8. */ + temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); + rsptr = temp_buffer; + } + } + if (rslen == 1) { + if (*s != *rsptr) + goto nope; + ++SvIVX(retval); + } + else { + if (len < rslen - 1) + goto nope; + len -= rslen - 1; + s -= rslen - 1; + if (memNE(s, rsptr, rslen)) + goto nope; + SvIVX(retval) += rs_charlen; + } + } + s = SvPV_force_nolen(sv); + SvCUR_set(sv, len); + *SvEND(sv) = '\0'; + SvNIOK_off(sv); + SvSETMAGIC(sv); + } + nope: + + SvREFCNT_dec(svrecode); + + Safefree(temp_buffer); + } else { + if (len && !SvPOK(sv)) + s = SvPV_force_nomg(sv, len); + if (DO_UTF8(sv)) { + if (s && len) { + char * const send = s + len; + char * const start = s; + s = send - 1; + while (s > start && UTF8_IS_CONTINUATION(*s)) + s--; + if (is_utf8_string((U8*)s, send - s)) { + sv_setpvn(retval, s, send - s); + *s = '\0'; + SvCUR_set(sv, s - start); + SvNIOK_off(sv); + SvUTF8_on(retval); + } + } + else + sv_setpvs(retval, ""); + } + else if (s && len) { + s += --len; + sv_setpvn(retval, s, 1); + *s = '\0'; + SvCUR_set(sv, len); + SvUTF8_off(sv); + SvNIOK_off(sv); + } + else + sv_setpvs(retval, ""); + SvSETMAGIC(sv); + } +} + PP(pp_schop) { dVAR; dSP; dTARGET; - do_chop(TARG, TOPs); + const bool chomping = PL_op->op_type == OP_SCHOMP; + + if (chomping) + sv_setiv(TARG, 0); + do_chomp(TARG, TOPs, chomping); SETTARG; RETURN; } @@ -799,31 +976,17 @@ PP(pp_schop) PP(pp_chop) { dVAR; dSP; dMARK; dTARGET; dORIGMARK; + const bool chomping = PL_op->op_type == OP_CHOMP; + + if (chomping) + sv_setiv(TARG, 0); while (MARK < SP) - do_chop(TARG, *++MARK); + do_chomp(TARG, *++MARK, chomping); SP = ORIGMARK; XPUSHTARG; RETURN; } -PP(pp_schomp) -{ - dVAR; dSP; dTARGET; - SETi(do_chomp(TOPs)); - RETURN; -} - -PP(pp_chomp) -{ - dVAR; dSP; dMARK; dTARGET; - register I32 count = 0; - - while (SP > MARK) - count += do_chomp(POPs); - XPUSHi(count); - RETURN; -} - PP(pp_undef) { dVAR; dSP; @@ -885,7 +1048,7 @@ PP(pp_undef) gp_free(MUTABLE_GV(sv)); Newxz(gp, 1, GP); - GvGP(sv) = gp_ref(gp); + GvGP_set(sv, gp_ref(gp)); GvSV(sv) = newSV(0); GvLINE(sv) = CopLINE(PL_curcop); GvEGV(sv) = MUTABLE_GV(sv); @@ -1335,7 +1498,7 @@ PP(pp_divide) warning before dieing, hence this test goes here. If it were immediately before the second SvIV_please, then DIE() would be invoked before left was even inspected, so - no inpsection would give no warning. */ + no inspection would give no warning. */ if (right == 0) DIE(aTHX_ "Illegal division by zero"); @@ -1795,7 +1958,7 @@ PP(pp_subtract) PP(pp_left_shift) { dVAR; dSP; dATARGET; SV *svl, *svr; - tryAMAGICbin_MG(lshift_amg, AMGf_assign); + tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric); svr = POPs; svl = TOPs; { @@ -1815,7 +1978,7 @@ PP(pp_left_shift) PP(pp_right_shift) { dVAR; dSP; dATARGET; SV *svl, *svr; - tryAMAGICbin_MG(rshift_amg, AMGf_assign); + tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric); svr = POPs; svl = TOPs; { @@ -1835,7 +1998,7 @@ PP(pp_right_shift) PP(pp_lt) { dVAR; dSP; - tryAMAGICbin_MG(lt_amg, AMGf_set); + tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -1918,7 +2081,7 @@ PP(pp_lt) PP(pp_gt) { dVAR; dSP; - tryAMAGICbin_MG(gt_amg, AMGf_set); + tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -2002,7 +2165,7 @@ PP(pp_gt) PP(pp_le) { dVAR; dSP; - tryAMAGICbin_MG(le_amg, AMGf_set); + tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -2086,7 +2249,7 @@ PP(pp_le) PP(pp_ge) { dVAR; dSP; - tryAMAGICbin_MG(ge_amg,AMGf_set); + tryAMAGICbin_MG(ge_amg,AMGf_set|AMGf_numeric); #ifdef PERL_PRESERVE_IVUV SvIV_please_nomg(TOPs); if (SvIOK(TOPs)) { @@ -2170,7 +2333,7 @@ PP(pp_ge) PP(pp_ne) { dVAR; dSP; - tryAMAGICbin_MG(ne_amg,AMGf_set); + tryAMAGICbin_MG(ne_amg,AMGf_set|AMGf_numeric); #ifndef NV_PRESERVES_UV if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { SP--; @@ -2247,7 +2410,7 @@ PP(pp_ne) PP(pp_ncmp) { dVAR; dSP; dTARGET; - tryAMAGICbin_MG(ncmp_amg, 0); + tryAMAGICbin_MG(ncmp_amg, AMGf_numeric); #ifndef NV_PRESERVES_UV if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { const UV right = PTR2UV(SvRV(POPs)); @@ -2571,7 +2734,7 @@ PP(pp_not) PP(pp_complement) { dVAR; dSP; dTARGET; - tryAMAGICun_MG(compl_amg, 0); + tryAMAGICun_MG(compl_amg, AMGf_numeric); { dTOPss; if (SvNIOKp(sv)) { @@ -2875,7 +3038,7 @@ PP(pp_i_ne) dVAR; dSP; tryAMAGICbin_MG(ne_amg, AMGf_set); { - dPOPTOPiirl_nomg; + dPOPTOPiirl_halfmg; SETs(boolSV(left != right)); RETURN; } @@ -2886,7 +3049,7 @@ PP(pp_i_ncmp) dVAR; dSP; dTARGET; tryAMAGICbin_MG(ncmp_amg, 0); { - dPOPTOPiirl_nomg; + dPOPTOPiirl_halfmg; I32 value; if (left > right) @@ -2919,7 +3082,7 @@ PP(pp_atan2) dVAR; dSP; dTARGET; tryAMAGICbin_MG(atan2_amg, 0); { - dPOPTOPnnrl_nomg; + dPOPTOPnnrl_halfmg; SETn(Perl_atan2(left, right)); RETURN; } @@ -3167,8 +3330,11 @@ PP(pp_length) SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC); if (!p) { - sv_setsv(TARG, &PL_sv_undef); - SETTARG; + if (!SvPADTMP(TARG)) { + sv_setsv(TARG, &PL_sv_undef); + SETTARG; + } + SETs(&PL_sv_undef); } else if (DO_UTF8(sv)) { SETi(utf8_length((U8*)p, (U8*)p + len)); @@ -3182,8 +3348,11 @@ PP(pp_length) else SETi(sv_len(sv)); } else { - sv_setsv_nomg(TARG, &PL_sv_undef); - SETTARG; + if (!SvPADTMP(TARG)) { + sv_setsv_nomg(TARG, &PL_sv_undef); + SETTARG; + } + SETs(&PL_sv_undef); } RETURN; } @@ -3377,7 +3546,8 @@ PP(pp_substr) } } SPAGAIN; - PUSHs(TARG); /* avoid SvSETMAGIC here */ + SvSETMAGIC(TARG); + PUSHs(TARG); RETURN; bound_fail: @@ -3538,8 +3708,6 @@ PP(pp_index) PP(pp_sprintf) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - if (SvTAINTED(MARK[1])) - TAINT_PROPER("sprintf"); SvTAINTED_off(TARG); do_sprintf(TARG, SP-MARK, MARK+1); TAINT_IF(SvTAINTED(TARG)); @@ -3678,12 +3846,6 @@ PP(pp_crypt) /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ -/* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max - * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF. - * See http://www.unicode.org/unicode/reports/tr16 */ -#define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */ -#define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */ - /* Below are several macros that generate code */ /* Generates code to store a unicode codepoint c that is known to occupy * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */ @@ -3828,7 +3990,7 @@ PP(pp_ucfirst) /* Convert the two source bytes to a single Unicode code point * value, change case and save for below */ - chr = UTF8_ACCUMULATE(*s, *(s+1)); + chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)); if (op_type == OP_LCFIRST) { /* lower casing is easy */ U8 lower = toLOWER_LATIN1(chr); STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower); @@ -4047,6 +4209,8 @@ PP(pp_ucfirst) SvCUR_set(dest, need - 1); } } + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4153,10 +4317,10 @@ PP(pp_uc) /* Likewise, if it fits in a byte, its case change is in our * table */ - U8 orig = UTF8_ACCUMULATE(*s, *(s+1)); + U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++); U8 upper = toUPPER_LATIN1_MOD(orig); CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper); - s += 2; + s++; } else { #else @@ -4317,6 +4481,8 @@ PP(pp_uc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } /* End of isn't utf8 */ + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4391,9 +4557,9 @@ PP(pp_lc) else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { /* As do the ones in the Latin1 range */ - U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1))); + U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++)); CAT_UNI_TO_UTF8_TWO_BYTE(d, lower); - s += 2; + s++; } else { #endif @@ -4539,6 +4705,8 @@ PP(pp_lc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4675,8 +4843,8 @@ PP(pp_rkeys) SvGETMAGIC(sv); if (SvAMAGIC(sv)) { /* N.B.: AMG macros return sv if no overloading is found */ - SV *maybe_hv = AMG_CALLun(sv,to_hv); - SV *maybe_av = AMG_CALLun(sv,to_av); + SV *maybe_hv = AMG_CALLunary(sv, to_hv_amg); + SV *maybe_av = AMG_CALLunary(sv, to_av_amg); if ( maybe_hv != sv && maybe_av != sv ) { Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s", Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as %%{}", @@ -4712,8 +4880,8 @@ PP(pp_rkeys) } if ( SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV ) { - DIE(aTHX_ Perl_form(aTHX_ "Type of argument to %s must be hashref or arrayref", - PL_op_desc[PL_op->op_type] )); + DIE(aTHX_ "Type of argument to %s must be hashref or arrayref", + PL_op_desc[PL_op->op_type] ); } /* Delegate to correct function for op type */ @@ -5271,14 +5439,9 @@ PP(pp_splice) const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { - *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); - PUSHMARK(MARK); - PUTBACK; - ENTER_with_name("call_SPLICE"); - call_method("SPLICE",GIMME_V); - LEAVE_with_name("call_SPLICE"); - SPAGAIN; - RETURN; + return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg, + GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, + sp - mark); } SP++; @@ -5345,7 +5508,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5437,7 +5600,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5715,7 +5878,7 @@ PP(pp_split) DIE(aTHX_ "panic: pp_split"); rx = PM_GETRE(pm); - TAINT_IF((RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) && + TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5761,7 +5924,7 @@ PP(pp_split) while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); } - else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (isSPACE_LC(*s)) s++; } @@ -5770,7 +5933,7 @@ PP(pp_split) s++; } } - if (RX_EXTFLAGS(rx) & PMf_MULTILINE) { + if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) { multiline = 1; } @@ -5791,7 +5954,8 @@ PP(pp_split) else m += t; } - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (m < strend && !isSPACE_LC(*m)) ++m; } else { @@ -5823,7 +5987,8 @@ PP(pp_split) if (do_utf8) { while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) s += UTF8SKIP(s); - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (s < strend && isSPACE_LC(*s)) ++s; } else { @@ -6140,8 +6305,20 @@ PP(pp_lock) PP(unimplemented_op) { dVAR; - DIE(aTHX_ "panic: unimplemented op %s (#%d) called", OP_NAME(PL_op), - PL_op->op_type); + const Optype op_type = PL_op->op_type; + /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope + with out of range op numbers - it only "special" cases op_custom. + Secondly, as the three ops we "panic" on are padmy, mapstart and custom, + if we get here for a custom op then that means that the custom op didn't + have an implementation. Given that OP_NAME() looks up the custom op + by its pp_addr, likely it will return NULL, unless someone (unhelpfully) + registers &PL_unimplemented_op as the address of their custom op. + NULL doesn't generate a useful error message. "custom" does. */ + const char *const name = op_type >= OP_max + ? "[out of range]" : PL_op_name[PL_op->op_type]; + if(OP_IS_SOCKET(op_type)) + DIE(aTHX_ PL_no_sock_func, name); + DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); } PP(pp_boolkeys) @@ -6150,6 +6327,8 @@ PP(pp_boolkeys) dSP; HV * const hv = (HV*)POPs; + if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; } + if (SvRMAGICAL(hv)) { MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied); if (mg) {