X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/93d7320bf7eac5942274854396d4ccc6268106c7..72de20cdcc84ae35e0d8a55c7a92950fece19347:/pp.c diff --git a/pp.c b/pp.c index 112f4f5..98d6482 100644 --- a/pp.c +++ b/pp.c @@ -71,11 +71,14 @@ PP(pp_padav) if (PL_op->op_flags & OPf_REF) { PUSHs(TARG); RETURN; - } else if (LVRET) { + } else if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { if (GIMME == G_SCALAR) Perl_croak(aTHX_ "Can't return array to lvalue scalar context"); PUSHs(TARG); RETURN; + } } gimme = GIMME_V; if (gimme == G_ARRAY) { @@ -114,14 +117,17 @@ PP(pp_padhv) SAVECLEARSV(PAD_SVl(PL_op->op_targ)); if (PL_op->op_flags & OPf_REF) RETURN; - else if (LVRET) { + else if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags && !(flags & OPpENTERSUB_INARGS)) { if (GIMME == G_SCALAR) Perl_croak(aTHX_ "Can't return hash to lvalue scalar context"); RETURN; + } } gimme = GIMME_V; if (gimme == G_ARRAY) { - RETURNOP(do_kv()); + RETURNOP(Perl_do_kv(aTHX)); } else if (gimme == G_SCALAR) { SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG)); @@ -248,7 +254,10 @@ Perl_softref2xv(pTHX_ SV *const sv, const char *const what, Perl_die(aTHX_ PL_no_usym, what); } if (!SvOK(sv)) { - if (PL_op->op_flags & OPf_REF) + if ( + PL_op->op_flags & OPf_REF && + PL_op->op_next->op_type != OP_BOOLKEYS + ) Perl_die(aTHX_ PL_no_usym, what); if (ckWARN(WARN_UNINITIALIZED)) report_uninit(sv); @@ -698,71 +707,84 @@ PP(pp_study) { dVAR; dSP; dPOPss; register unsigned char *s; - register I32 pos; - register I32 ch; - register I32 *sfirst; - register I32 *snext; + char *sfirst_raw; STRLEN len; + MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL; + U8 quanta; + STRLEN size; + + if (mg && SvSCREAM(sv)) + RETPUSHYES; - if (sv == PL_lastscream) { - if (SvSCREAM(sv)) - RETPUSHYES; - } s = (unsigned char*)(SvPV(sv, len)); - pos = len; - if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) { + if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) { /* No point in studying a zero length string, and not safe to study anything that doesn't appear to be a simple scalar (and hence might change between now and when the regexp engine runs without our set magic ever running) such as a reference to an object with overloaded - stringification. */ + stringification. Also refuse to study an FBM scalar, as this gives + more flexibility in SV flag usage. No real-world code would ever + end up studying an FBM scalar, so this isn't a real pessimisation. + Endemic use of I32 in Perl_screaminstr makes it hard to safely push + the study length limit from I32_MAX to U32_MAX - 1. + */ RETPUSHNO; } - if (PL_lastscream) { - SvSCREAM_off(PL_lastscream); - SvREFCNT_dec(PL_lastscream); - } - PL_lastscream = SvREFCNT_inc_simple(sv); - - s = (unsigned char*)(SvPV(sv, len)); - pos = len; - if (pos <= 0) - RETPUSHNO; - if (pos > PL_maxscream) { - if (PL_maxscream < 0) { - PL_maxscream = pos + 80; - Newx(PL_screamfirst, 256, I32); - Newx(PL_screamnext, PL_maxscream, I32); - } - else { - PL_maxscream = pos + pos / 4; - Renew(PL_screamnext, PL_maxscream, I32); - } - } + if (len < 0xFF) { + quanta = 1; + } else if (len < 0xFFFF) { + quanta = 2; + } else + quanta = 4; - sfirst = PL_screamfirst; - snext = PL_screamnext; + size = (256 + len) * quanta; + sfirst_raw = (char *)safemalloc(size); - if (!sfirst || !snext) + if (!sfirst_raw) DIE(aTHX_ "do_study: out of memory"); - for (ch = 256; ch; --ch) - *sfirst++ = -1; - sfirst -= 256; - - while (--pos >= 0) { - register const I32 ch = s[pos]; - if (sfirst[ch] >= 0) - snext[pos] = sfirst[ch] - pos; - else - snext[pos] = -pos; - sfirst[ch] = pos; + SvSCREAM_on(sv); + if (!mg) + mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0); + mg->mg_ptr = sfirst_raw; + mg->mg_len = size; + mg->mg_private = quanta; + + memset(sfirst_raw, ~0, 256 * quanta); + + /* The assumption here is that most studied strings are fairly short, hence + the pain of the extra code is worth it, given the memory savings. + 80 character string, 336 bytes as U8, down from 1344 as U32 + 800 character string, 2112 bytes as U16, down from 4224 as U32 + */ + + if (quanta == 1) { + U8 *const sfirst = (U8 *)sfirst_raw; + U8 *const snext = sfirst + 256; + while (len-- > 0) { + const U8 ch = s[len]; + snext[len] = sfirst[ch]; + sfirst[ch] = len; + } + } else if (quanta == 2) { + U16 *const sfirst = (U16 *)sfirst_raw; + U16 *const snext = sfirst + 256; + while (len-- > 0) { + const U8 ch = s[len]; + snext[len] = sfirst[ch]; + sfirst[ch] = len; + } + } else { + U32 *const sfirst = (U32 *)sfirst_raw; + U32 *const snext = sfirst + 256; + while (len-- > 0) { + const U8 ch = s[len]; + snext[len] = sfirst[ch]; + sfirst[ch] = len; + } } - SvSCREAM_on(sv); - /* piggyback on m//g magic */ - sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0); RETPUSHYES; } @@ -791,10 +813,181 @@ PP(pp_trans) /* Lvalue operators. */ +static void +S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) +{ + dVAR; + STRLEN len; + char *s; + + PERL_ARGS_ASSERT_DO_CHOMP; + + if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs))) + return; + if (SvTYPE(sv) == SVt_PVAV) { + I32 i; + AV *const av = MUTABLE_AV(sv); + const I32 max = AvFILL(av); + + for (i = 0; i <= max; i++) { + sv = MUTABLE_SV(av_fetch(av, i, FALSE)); + if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) + do_chomp(retval, sv, chomping); + } + return; + } + else if (SvTYPE(sv) == SVt_PVHV) { + HV* const hv = MUTABLE_HV(sv); + HE* entry; + (void)hv_iterinit(hv); + while ((entry = hv_iternext(hv))) + do_chomp(retval, hv_iterval(hv,entry), chomping); + return; + } + else if (SvREADONLY(sv)) { + if (SvFAKE(sv)) { + /* SV is copy-on-write */ + sv_force_normal_flags(sv, 0); + } + if (SvREADONLY(sv)) + Perl_croak_no_modify(aTHX); + } + + if (PL_encoding) { + if (!SvUTF8(sv)) { + /* XXX, here sv is utf8-ized as a side-effect! + If encoding.pm is used properly, almost string-generating + operations, including literal strings, chr(), input data, etc. + should have been utf8-ized already, right? + */ + sv_recode_to_utf8(sv, PL_encoding); + } + } + + s = SvPV(sv, len); + if (chomping) { + char *temp_buffer = NULL; + SV *svrecode = NULL; + + if (s && len) { + s += --len; + if (RsPARA(PL_rs)) { + if (*s != '\n') + goto nope; + ++SvIVX(retval); + while (len && s[-1] == '\n') { + --len; + --s; + ++SvIVX(retval); + } + } + else { + STRLEN rslen, rs_charlen; + const char *rsptr = SvPV_const(PL_rs, rslen); + + rs_charlen = SvUTF8(PL_rs) + ? sv_len_utf8(PL_rs) + : rslen; + + if (SvUTF8(PL_rs) != SvUTF8(sv)) { + /* Assumption is that rs is shorter than the scalar. */ + if (SvUTF8(PL_rs)) { + /* RS is utf8, scalar is 8 bit. */ + bool is_utf8 = TRUE; + temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, + &rslen, &is_utf8); + if (is_utf8) { + /* Cannot downgrade, therefore cannot possibly match + */ + assert (temp_buffer == rsptr); + temp_buffer = NULL; + goto nope; + } + rsptr = temp_buffer; + } + else if (PL_encoding) { + /* RS is 8 bit, encoding.pm is used. + * Do not recode PL_rs as a side-effect. */ + svrecode = newSVpvn(rsptr, rslen); + sv_recode_to_utf8(svrecode, PL_encoding); + rsptr = SvPV_const(svrecode, rslen); + rs_charlen = sv_len_utf8(svrecode); + } + else { + /* RS is 8 bit, scalar is utf8. */ + temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); + rsptr = temp_buffer; + } + } + if (rslen == 1) { + if (*s != *rsptr) + goto nope; + ++SvIVX(retval); + } + else { + if (len < rslen - 1) + goto nope; + len -= rslen - 1; + s -= rslen - 1; + if (memNE(s, rsptr, rslen)) + goto nope; + SvIVX(retval) += rs_charlen; + } + } + s = SvPV_force_nolen(sv); + SvCUR_set(sv, len); + *SvEND(sv) = '\0'; + SvNIOK_off(sv); + SvSETMAGIC(sv); + } + nope: + + SvREFCNT_dec(svrecode); + + Safefree(temp_buffer); + } else { + if (len && !SvPOK(sv)) + s = SvPV_force_nomg(sv, len); + if (DO_UTF8(sv)) { + if (s && len) { + char * const send = s + len; + char * const start = s; + s = send - 1; + while (s > start && UTF8_IS_CONTINUATION(*s)) + s--; + if (is_utf8_string((U8*)s, send - s)) { + sv_setpvn(retval, s, send - s); + *s = '\0'; + SvCUR_set(sv, s - start); + SvNIOK_off(sv); + SvUTF8_on(retval); + } + } + else + sv_setpvs(retval, ""); + } + else if (s && len) { + s += --len; + sv_setpvn(retval, s, 1); + *s = '\0'; + SvCUR_set(sv, len); + SvUTF8_off(sv); + SvNIOK_off(sv); + } + else + sv_setpvs(retval, ""); + SvSETMAGIC(sv); + } +} + PP(pp_schop) { dVAR; dSP; dTARGET; - do_chop(TARG, TOPs); + const bool chomping = PL_op->op_type == OP_SCHOMP; + + if (chomping) + sv_setiv(TARG, 0); + do_chomp(TARG, TOPs, chomping); SETTARG; RETURN; } @@ -802,31 +995,17 @@ PP(pp_schop) PP(pp_chop) { dVAR; dSP; dMARK; dTARGET; dORIGMARK; + const bool chomping = PL_op->op_type == OP_CHOMP; + + if (chomping) + sv_setiv(TARG, 0); while (MARK < SP) - do_chop(TARG, *++MARK); + do_chomp(TARG, *++MARK, chomping); SP = ORIGMARK; XPUSHTARG; RETURN; } -PP(pp_schomp) -{ - dVAR; dSP; dTARGET; - SETi(do_chomp(TOPs)); - RETURN; -} - -PP(pp_chomp) -{ - dVAR; dSP; dMARK; dTARGET; - register I32 count = 0; - - while (SP > MARK) - count += do_chomp(POPs); - XPUSHi(count); - RETURN; -} - PP(pp_undef) { dVAR; dSP; @@ -888,7 +1067,7 @@ PP(pp_undef) gp_free(MUTABLE_GV(sv)); Newxz(gp, 1, GP); - GvGP(sv) = gp_ref(gp); + GvGP_set(sv, gp_ref(gp)); GvSV(sv) = newSV(0); GvLINE(sv) = CopLINE(PL_curcop); GvEGV(sv) = MUTABLE_GV(sv); @@ -1338,7 +1517,7 @@ PP(pp_divide) warning before dieing, hence this test goes here. If it were immediately before the second SvIV_please, then DIE() would be invoked before left was even inspected, so - no inpsection would give no warning. */ + no inspection would give no warning. */ if (right == 0) DIE(aTHX_ "Illegal division by zero"); @@ -1838,518 +2017,178 @@ PP(pp_right_shift) PP(pp_lt) { dVAR; dSP; + SV *left, *right; + tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric); -#ifdef PERL_PRESERVE_IVUV - SvIV_please_nomg(TOPs); - if (SvIOK(TOPs)) { - SvIV_please_nomg(TOPm1s); - if (SvIOK(TOPm1s)) { - bool auvok = SvUOK(TOPm1s); - bool buvok = SvUOK(TOPs); - - if (!auvok && !buvok) { /* ## IV < IV ## */ - const IV aiv = SvIVX(TOPm1s); - const IV biv = SvIVX(TOPs); - - SP--; - SETs(boolSV(aiv < biv)); - RETURN; - } - if (auvok && buvok) { /* ## UV < UV ## */ - const UV auv = SvUVX(TOPm1s); - const UV buv = SvUVX(TOPs); - - SP--; - SETs(boolSV(auv < buv)); - RETURN; - } - if (auvok) { /* ## UV < IV ## */ - UV auv; - const IV biv = SvIVX(TOPs); - SP--; - if (biv < 0) { - /* As (a) is a UV, it's >=0, so it cannot be < */ - SETs(&PL_sv_no); - RETURN; - } - auv = SvUVX(TOPs); - SETs(boolSV(auv < (UV)biv)); - RETURN; - } - { /* ## IV < UV ## */ - const IV aiv = SvIVX(TOPm1s); - UV buv; - - if (aiv < 0) { - /* As (b) is a UV, it's >=0, so it must be < */ - SP--; - SETs(&PL_sv_yes); - RETURN; - } - buv = SvUVX(TOPs); - SP--; - SETs(boolSV((UV)aiv < buv)); - RETURN; - } - } - } -#endif -#ifndef NV_PRESERVES_UV -#ifdef PERL_PRESERVE_IVUV - else -#endif - if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { - SP--; - SETs(boolSV(SvRV(TOPs) < SvRV(TOPp1s))); - RETURN; - } -#endif - { -#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - dPOPTOPnnrl_nomg; - if (Perl_isnan(left) || Perl_isnan(right)) - RETSETNO; - SETs(boolSV(left < right)); -#else - dPOPnv_nomg; - SETs(boolSV(SvNV_nomg(TOPs) < value)); -#endif - RETURN; - } + right = POPs; + left = TOPs; + SETs(boolSV( + (SvIOK_notUV(left) && SvIOK_notUV(right)) + ? (SvIVX(left) < SvIVX(right)) + : (do_ncmp(left, right) == -1) + )); + RETURN; } PP(pp_gt) { dVAR; dSP; - tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric); -#ifdef PERL_PRESERVE_IVUV - SvIV_please_nomg(TOPs); - if (SvIOK(TOPs)) { - SvIV_please_nomg(TOPm1s); - if (SvIOK(TOPm1s)) { - bool auvok = SvUOK(TOPm1s); - bool buvok = SvUOK(TOPs); - - if (!auvok && !buvok) { /* ## IV > IV ## */ - const IV aiv = SvIVX(TOPm1s); - const IV biv = SvIVX(TOPs); + SV *left, *right; - SP--; - SETs(boolSV(aiv > biv)); - RETURN; - } - if (auvok && buvok) { /* ## UV > UV ## */ - const UV auv = SvUVX(TOPm1s); - const UV buv = SvUVX(TOPs); - - SP--; - SETs(boolSV(auv > buv)); - RETURN; - } - if (auvok) { /* ## UV > IV ## */ - UV auv; - const IV biv = SvIVX(TOPs); - - SP--; - if (biv < 0) { - /* As (a) is a UV, it's >=0, so it must be > */ - SETs(&PL_sv_yes); - RETURN; - } - auv = SvUVX(TOPs); - SETs(boolSV(auv > (UV)biv)); - RETURN; - } - { /* ## IV > UV ## */ - const IV aiv = SvIVX(TOPm1s); - UV buv; - - if (aiv < 0) { - /* As (b) is a UV, it's >=0, so it cannot be > */ - SP--; - SETs(&PL_sv_no); - RETURN; - } - buv = SvUVX(TOPs); - SP--; - SETs(boolSV((UV)aiv > buv)); - RETURN; - } - } - } -#endif -#ifndef NV_PRESERVES_UV -#ifdef PERL_PRESERVE_IVUV - else -#endif - if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { - SP--; - SETs(boolSV(SvRV(TOPs) > SvRV(TOPp1s))); - RETURN; - } -#endif - { -#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - dPOPTOPnnrl_nomg; - if (Perl_isnan(left) || Perl_isnan(right)) - RETSETNO; - SETs(boolSV(left > right)); -#else - dPOPnv_nomg; - SETs(boolSV(SvNV_nomg(TOPs) > value)); -#endif - RETURN; - } + tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric); + right = POPs; + left = TOPs; + SETs(boolSV( + (SvIOK_notUV(left) && SvIOK_notUV(right)) + ? (SvIVX(left) > SvIVX(right)) + : (do_ncmp(left, right) == 1) + )); + RETURN; } PP(pp_le) { dVAR; dSP; - tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric); -#ifdef PERL_PRESERVE_IVUV - SvIV_please_nomg(TOPs); - if (SvIOK(TOPs)) { - SvIV_please_nomg(TOPm1s); - if (SvIOK(TOPm1s)) { - bool auvok = SvUOK(TOPm1s); - bool buvok = SvUOK(TOPs); - - if (!auvok && !buvok) { /* ## IV <= IV ## */ - const IV aiv = SvIVX(TOPm1s); - const IV biv = SvIVX(TOPs); - - SP--; - SETs(boolSV(aiv <= biv)); - RETURN; - } - if (auvok && buvok) { /* ## UV <= UV ## */ - UV auv = SvUVX(TOPm1s); - UV buv = SvUVX(TOPs); - - SP--; - SETs(boolSV(auv <= buv)); - RETURN; - } - if (auvok) { /* ## UV <= IV ## */ - UV auv; - const IV biv = SvIVX(TOPs); - - SP--; - if (biv < 0) { - /* As (a) is a UV, it's >=0, so a cannot be <= */ - SETs(&PL_sv_no); - RETURN; - } - auv = SvUVX(TOPs); - SETs(boolSV(auv <= (UV)biv)); - RETURN; - } - { /* ## IV <= UV ## */ - const IV aiv = SvIVX(TOPm1s); - UV buv; + SV *left, *right; - if (aiv < 0) { - /* As (b) is a UV, it's >=0, so a must be <= */ - SP--; - SETs(&PL_sv_yes); - RETURN; - } - buv = SvUVX(TOPs); - SP--; - SETs(boolSV((UV)aiv <= buv)); - RETURN; - } - } - } -#endif -#ifndef NV_PRESERVES_UV -#ifdef PERL_PRESERVE_IVUV - else -#endif - if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { - SP--; - SETs(boolSV(SvRV(TOPs) <= SvRV(TOPp1s))); - RETURN; - } -#endif - { -#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - dPOPTOPnnrl_nomg; - if (Perl_isnan(left) || Perl_isnan(right)) - RETSETNO; - SETs(boolSV(left <= right)); -#else - dPOPnv_nomg; - SETs(boolSV(SvNV_nomg(TOPs) <= value)); -#endif - RETURN; - } + tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric); + right = POPs; + left = TOPs; + SETs(boolSV( + (SvIOK_notUV(left) && SvIOK_notUV(right)) + ? (SvIVX(left) <= SvIVX(right)) + : (do_ncmp(left, right) <= 0) + )); + RETURN; } PP(pp_ge) { dVAR; dSP; - tryAMAGICbin_MG(ge_amg,AMGf_set|AMGf_numeric); -#ifdef PERL_PRESERVE_IVUV - SvIV_please_nomg(TOPs); - if (SvIOK(TOPs)) { - SvIV_please_nomg(TOPm1s); - if (SvIOK(TOPm1s)) { - bool auvok = SvUOK(TOPm1s); - bool buvok = SvUOK(TOPs); - - if (!auvok && !buvok) { /* ## IV >= IV ## */ - const IV aiv = SvIVX(TOPm1s); - const IV biv = SvIVX(TOPs); + SV *left, *right; + + tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric); + right = POPs; + left = TOPs; + SETs(boolSV( + (SvIOK_notUV(left) && SvIOK_notUV(right)) + ? (SvIVX(left) >= SvIVX(right)) + : ( (do_ncmp(left, right) & 2) == 0) + )); + RETURN; +} - SP--; - SETs(boolSV(aiv >= biv)); - RETURN; - } - if (auvok && buvok) { /* ## UV >= UV ## */ - const UV auv = SvUVX(TOPm1s); - const UV buv = SvUVX(TOPs); +PP(pp_ne) +{ + dVAR; dSP; + SV *left, *right; + + tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric); + right = POPs; + left = TOPs; + SETs(boolSV( + (SvIOK_notUV(left) && SvIOK_notUV(right)) + ? (SvIVX(left) != SvIVX(right)) + : (do_ncmp(left, right) != 0) + )); + RETURN; +} - SP--; - SETs(boolSV(auv >= buv)); - RETURN; - } - if (auvok) { /* ## UV >= IV ## */ - UV auv; - const IV biv = SvIVX(TOPs); +/* compare left and right SVs. Returns: + * -1: < + * 0: == + * 1: > + * 2: left or right was a NaN + */ +I32 +Perl_do_ncmp(pTHX_ SV* const left, SV * const right) +{ + dVAR; - SP--; - if (biv < 0) { - /* As (a) is a UV, it's >=0, so it must be >= */ - SETs(&PL_sv_yes); - RETURN; + PERL_ARGS_ASSERT_DO_NCMP; +#ifdef PERL_PRESERVE_IVUV + SvIV_please_nomg(right); + /* Fortunately it seems NaN isn't IOK */ + if (SvIOK(right)) { + SvIV_please_nomg(left); + if (SvIOK(left)) { + if (!SvUOK(left)) { + const IV leftiv = SvIVX(left); + if (!SvUOK(right)) { + /* ## IV <=> IV ## */ + const IV rightiv = SvIVX(right); + return (leftiv > rightiv) - (leftiv < rightiv); } - auv = SvUVX(TOPs); - SETs(boolSV(auv >= (UV)biv)); - RETURN; - } - { /* ## IV >= UV ## */ - const IV aiv = SvIVX(TOPm1s); - UV buv; - - if (aiv < 0) { - /* As (b) is a UV, it's >=0, so a cannot be >= */ - SP--; - SETs(&PL_sv_no); - RETURN; + /* ## IV <=> UV ## */ + if (leftiv < 0) + /* As (b) is a UV, it's >=0, so it must be < */ + return -1; + { + const UV rightuv = SvUVX(right); + return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv); } - buv = SvUVX(TOPs); - SP--; - SETs(boolSV((UV)aiv >= buv)); - RETURN; } - } - } -#endif -#ifndef NV_PRESERVES_UV -#ifdef PERL_PRESERVE_IVUV - else -#endif - if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { - SP--; - SETs(boolSV(SvRV(TOPs) >= SvRV(TOPp1s))); - RETURN; - } -#endif - { -#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - dPOPTOPnnrl_nomg; - if (Perl_isnan(left) || Perl_isnan(right)) - RETSETNO; - SETs(boolSV(left >= right)); -#else - dPOPnv_nomg; - SETs(boolSV(SvNV_nomg(TOPs) >= value)); -#endif - RETURN; - } -} -PP(pp_ne) -{ - dVAR; dSP; - tryAMAGICbin_MG(ne_amg,AMGf_set|AMGf_numeric); -#ifndef NV_PRESERVES_UV - if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { - SP--; - SETs(boolSV(SvRV(TOPs) != SvRV(TOPp1s))); - RETURN; - } -#endif -#ifdef PERL_PRESERVE_IVUV - SvIV_please_nomg(TOPs); - if (SvIOK(TOPs)) { - SvIV_please_nomg(TOPm1s); - if (SvIOK(TOPm1s)) { - const bool auvok = SvUOK(TOPm1s); - const bool buvok = SvUOK(TOPs); - - if (auvok == buvok) { /* ## IV == IV or UV == UV ## */ - /* Casting IV to UV before comparison isn't going to matter - on 2s complement. On 1s complement or sign&magnitude - (if we have any of them) it could make negative zero - differ from normal zero. As I understand it. (Need to - check - is negative zero implementation defined behaviour - anyway?). NWC */ - const UV buv = SvUVX(POPs); - const UV auv = SvUVX(TOPs); - - SETs(boolSV(auv != buv)); - RETURN; + if (SvUOK(right)) { + /* ## UV <=> UV ## */ + const UV leftuv = SvUVX(left); + const UV rightuv = SvUVX(right); + return (leftuv > rightuv) - (leftuv < rightuv); } - { /* ## Mixed IV,UV ## */ - IV iv; - UV uv; - - /* != is commutative so swap if needed (save code) */ - if (auvok) { - /* swap. top of stack (b) is the iv */ - iv = SvIVX(TOPs); - SP--; - if (iv < 0) { - /* As (a) is a UV, it's >0, so it cannot be == */ - SETs(&PL_sv_yes); - RETURN; - } - uv = SvUVX(TOPs); - } else { - iv = SvIVX(TOPm1s); - SP--; - if (iv < 0) { - /* As (b) is a UV, it's >0, so it cannot be == */ - SETs(&PL_sv_yes); - RETURN; - } - uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */ + /* ## UV <=> IV ## */ + { + const IV rightiv = SvIVX(right); + if (rightiv < 0) + /* As (a) is a UV, it's >=0, so it cannot be < */ + return 1; + { + const UV leftuv = SvUVX(left); + return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv); } - SETs(boolSV((UV)iv != uv)); - RETURN; } + /* NOTREACHED */ } } #endif { + NV const rnv = SvNV_nomg(right); + NV const lnv = SvNV_nomg(left); + #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - dPOPTOPnnrl_nomg; - if (Perl_isnan(left) || Perl_isnan(right)) - RETSETYES; - SETs(boolSV(left != right)); + if (Perl_isnan(lnv) || Perl_isnan(rnv)) { + return 2; + } + return (lnv > rnv) - (lnv < rnv); #else - dPOPnv_nomg; - SETs(boolSV(SvNV_nomg(TOPs) != value)); + if (lnv < rnv) + return -1; + if (lnv > rnv) + return 1; + if (lnv == rnv) + return 0; + return 2; #endif - RETURN; } } + PP(pp_ncmp) { - dVAR; dSP; dTARGET; + dVAR; dSP; + SV *left, *right; + I32 value; tryAMAGICbin_MG(ncmp_amg, AMGf_numeric); -#ifndef NV_PRESERVES_UV - if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) { - const UV right = PTR2UV(SvRV(POPs)); - const UV left = PTR2UV(SvRV(TOPs)); - SETi((left > right) - (left < right)); - RETURN; - } -#endif -#ifdef PERL_PRESERVE_IVUV - /* Fortunately it seems NaN isn't IOK */ - SvIV_please_nomg(TOPs); - if (SvIOK(TOPs)) { - SvIV_please_nomg(TOPm1s); - if (SvIOK(TOPm1s)) { - const bool leftuvok = SvUOK(TOPm1s); - const bool rightuvok = SvUOK(TOPs); - I32 value; - if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */ - const IV leftiv = SvIVX(TOPm1s); - const IV rightiv = SvIVX(TOPs); - - if (leftiv > rightiv) - value = 1; - else if (leftiv < rightiv) - value = -1; - else - value = 0; - } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */ - const UV leftuv = SvUVX(TOPm1s); - const UV rightuv = SvUVX(TOPs); - - if (leftuv > rightuv) - value = 1; - else if (leftuv < rightuv) - value = -1; - else - value = 0; - } else if (leftuvok) { /* ## UV <=> IV ## */ - const IV rightiv = SvIVX(TOPs); - if (rightiv < 0) { - /* As (a) is a UV, it's >=0, so it cannot be < */ - value = 1; - } else { - const UV leftuv = SvUVX(TOPm1s); - if (leftuv > (UV)rightiv) { - value = 1; - } else if (leftuv < (UV)rightiv) { - value = -1; - } else { - value = 0; - } - } - } else { /* ## IV <=> UV ## */ - const IV leftiv = SvIVX(TOPm1s); - if (leftiv < 0) { - /* As (b) is a UV, it's >=0, so it must be < */ - value = -1; - } else { - const UV rightuv = SvUVX(TOPs); - if ((UV)leftiv > rightuv) { - value = 1; - } else if ((UV)leftiv < rightuv) { - value = -1; - } else { - value = 0; - } - } - } - SP--; - SETi(value); - RETURN; - } - } -#endif - { - dPOPTOPnnrl_nomg; - I32 value; - -#ifdef Perl_isnan - if (Perl_isnan(left) || Perl_isnan(right)) { - SETs(&PL_sv_undef); - RETURN; - } - value = (left > right) - (left < right); -#else - if (left == right) - value = 0; - else if (left < right) - value = -1; - else if (left > right) - value = 1; - else { + right = POPs; + left = TOPs; + value = do_ncmp(left, right); + if (value == 2) { SETs(&PL_sv_undef); - RETURN; - } -#endif - SETi(value); - RETURN; } + else { + dTARGET; + SETi(value); + } + RETURN; } PP(pp_sle) @@ -3170,8 +3009,11 @@ PP(pp_length) SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC); if (!p) { - sv_setsv(TARG, &PL_sv_undef); - SETTARG; + if (!SvPADTMP(TARG)) { + sv_setsv(TARG, &PL_sv_undef); + SETTARG; + } + SETs(&PL_sv_undef); } else if (DO_UTF8(sv)) { SETi(utf8_length((U8*)p, (U8*)p + len)); @@ -3185,8 +3027,11 @@ PP(pp_length) else SETi(sv_len(sv)); } else { - sv_setsv_nomg(TARG, &PL_sv_undef); - SETTARG; + if (!SvPADTMP(TARG)) { + sv_setsv_nomg(TARG, &PL_sv_undef); + SETTARG; + } + SETs(&PL_sv_undef); } RETURN; } @@ -3380,7 +3225,8 @@ PP(pp_substr) } } SPAGAIN; - PUSHs(TARG); /* avoid SvSETMAGIC here */ + SvSETMAGIC(TARG); + PUSHs(TARG); RETURN; bound_fail: @@ -3541,8 +3387,6 @@ PP(pp_index) PP(pp_sprintf) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - if (SvTAINTED(MARK[1])) - TAINT_PROPER("sprintf"); SvTAINTED_off(TARG); do_sprintf(TARG, SP-MARK, MARK+1); TAINT_IF(SvTAINTED(TARG)); @@ -3681,12 +3525,6 @@ PP(pp_crypt) /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ -/* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max - * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF. - * See http://www.unicode.org/unicode/reports/tr16 */ -#define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */ -#define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */ - /* Below are several macros that generate code */ /* Generates code to store a unicode codepoint c that is known to occupy * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */ @@ -4050,6 +3888,8 @@ PP(pp_ucfirst) SvCUR_set(dest, need - 1); } } + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4320,6 +4160,8 @@ PP(pp_uc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } /* End of isn't utf8 */ + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4542,6 +4384,8 @@ PP(pp_lc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } + if (dest != source && SvTAINTED(source)) + SvTAINT(dest); SvSETMAGIC(dest); RETURN; } @@ -4671,54 +4515,26 @@ PP(pp_rkeys) dSP; dPOPss; - if (!SvOK(sv)) - RETURN; - - if (SvROK(sv)) { - SvGETMAGIC(sv); - if (SvAMAGIC(sv)) { - /* N.B.: AMG macros return sv if no overloading is found */ - SV *maybe_hv = AMG_CALLun(sv,to_hv); - SV *maybe_av = AMG_CALLun(sv,to_av); - if ( maybe_hv != sv && maybe_av != sv ) { - Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s", - Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as %%{}", - PL_op_desc[PL_op->op_type] - ) - ); - sv = maybe_hv; - } - else if ( maybe_av != sv ) { - if ( SvTYPE(SvRV(sv)) == SVt_PVHV ) { - /* @{} overload, but underlying reftype is HV */ - Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s", - Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as @{}", - PL_op_desc[PL_op->op_type] - ) - ); - } - sv = maybe_av; - } - else if ( maybe_hv != sv ) { - if ( SvTYPE(SvRV(sv)) == SVt_PVAV ) { - /* %{} overload, but underlying reftype is AV */ - Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s", - Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as %%{}", - PL_op_desc[PL_op->op_type] - ) - ); - } - sv = maybe_hv; - } - } - sv = SvRV(sv); - } - - if ( SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV ) { - DIE(aTHX_ "Type of argument to %s must be hashref or arrayref", + SvGETMAGIC(sv); + + if ( + !SvROK(sv) + || (sv = SvRV(sv), + (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV) + || SvOBJECT(sv) + ) + ) { + DIE(aTHX_ + "Type of argument to %s must be unblessed hashref or arrayref", PL_op_desc[PL_op->op_type] ); } + if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV) + DIE(aTHX_ + "Can't modify %s in %s", + PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type] + ); + /* Delegate to correct function for op type */ PUSHs(sv); if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) { @@ -5259,10 +5075,41 @@ PP(pp_anonhash) RETURN; } +static AV * +S_deref_plain_array(pTHX_ AV *ary) +{ + if (SvTYPE(ary) == SVt_PVAV) return ary; + SvGETMAGIC((SV *)ary); + if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV) + Perl_die(aTHX_ "Not an ARRAY reference"); + else if (SvOBJECT(SvRV(ary))) + Perl_die(aTHX_ "Not an unblessed ARRAY reference"); + return (AV *)SvRV(ary); +} + +#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) +# define DEREF_PLAIN_ARRAY(ary) \ + ({ \ + AV *aRrRay = ary; \ + SvTYPE(aRrRay) == SVt_PVAV \ + ? aRrRay \ + : S_deref_plain_array(aTHX_ aRrRay); \ + }) +#else +# define DEREF_PLAIN_ARRAY(ary) \ + ( \ + PL_Sv = (SV *)(ary), \ + SvTYPE(PL_Sv) == SVt_PVAV \ + ? (AV *)PL_Sv \ + : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \ + ) +#endif + PP(pp_splice) { dVAR; dSP; dMARK; dORIGMARK; - register AV *ary = MUTABLE_AV(*++MARK); + int num_args = (SP - MARK); + register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); register SV **src; register SV **dst; register I32 i; @@ -5274,14 +5121,9 @@ PP(pp_splice) const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { - *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); - PUSHMARK(MARK); - PUTBACK; - ENTER_with_name("call_SPLICE"); - call_method("SPLICE",GIMME_V); - LEAVE_with_name("call_SPLICE"); - SPAGAIN; - RETURN; + return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg, + GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, + sp - mark); } SP++; @@ -5310,7 +5152,8 @@ PP(pp_splice) length = AvMAX(ary) + 1; } if (offset > AvFILLp(ary) + 1) { - Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" ); + if (num_args > 2) + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" ); offset = AvFILLp(ary) + 1; } after = AvFILLp(ary) + 1 - (offset + length); @@ -5348,7 +5191,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5440,7 +5283,7 @@ PP(pp_splice) if (AvREAL(ary)) { EXTEND_MORTAL(length); for (i = length, dst = MARK; i; i--) { - sv_2mortal(*dst); /* free them eventualy */ + sv_2mortal(*dst); /* free them eventually */ dst++; } } @@ -5470,7 +5313,7 @@ PP(pp_splice) PP(pp_push) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - register AV * const ary = MUTABLE_AV(*++MARK); + register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { @@ -5507,7 +5350,7 @@ PP(pp_shift) dVAR; dSP; AV * const av = PL_op->op_flags & OPf_SPECIAL - ? MUTABLE_AV(GvAV(PL_defgv)) : MUTABLE_AV(POPs); + ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs)); SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av); EXTEND(SP, 1); assert (sv); @@ -5520,7 +5363,7 @@ PP(pp_shift) PP(pp_unshift) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - register AV *ary = MUTABLE_AV(*++MARK); + register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { @@ -5718,7 +5561,7 @@ PP(pp_split) DIE(aTHX_ "panic: pp_split"); rx = PM_GETRE(pm); - TAINT_IF((RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) && + TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5764,7 +5607,7 @@ PP(pp_split) while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); } - else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (isSPACE_LC(*s)) s++; } @@ -5773,7 +5616,7 @@ PP(pp_split) s++; } } - if (RX_EXTFLAGS(rx) & PMf_MULTILINE) { + if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) { multiline = 1; } @@ -5794,7 +5637,8 @@ PP(pp_split) else m += t; } - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (m < strend && !isSPACE_LC(*m)) ++m; } else { @@ -5826,7 +5670,8 @@ PP(pp_split) if (do_utf8) { while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) s += UTF8SKIP(s); - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (s < strend && isSPACE_LC(*s)) ++s; } else { @@ -5981,7 +5826,7 @@ PP(pp_split) I32 rex_return; PUTBACK; rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 , - sv, NULL, 0); + sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0); SPAGAIN; if (rex_return == 0) break; @@ -6143,8 +5988,20 @@ PP(pp_lock) PP(unimplemented_op) { dVAR; - DIE(aTHX_ "panic: unimplemented op %s (#%d) called", OP_NAME(PL_op), - PL_op->op_type); + const Optype op_type = PL_op->op_type; + /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope + with out of range op numbers - it only "special" cases op_custom. + Secondly, as the three ops we "panic" on are padmy, mapstart and custom, + if we get here for a custom op then that means that the custom op didn't + have an implementation. Given that OP_NAME() looks up the custom op + by its pp_addr, likely it will return NULL, unless someone (unhelpfully) + registers &PL_unimplemented_op as the address of their custom op. + NULL doesn't generate a useful error message. "custom" does. */ + const char *const name = op_type >= OP_max + ? "[out of range]" : PL_op_name[PL_op->op_type]; + if(OP_IS_SOCKET(op_type)) + DIE(aTHX_ PL_no_sock_func, name); + DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); } PP(pp_boolkeys) @@ -6153,6 +6010,8 @@ PP(pp_boolkeys) dSP; HV * const hv = (HV*)POPs; + if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; } + if (SvRMAGICAL(hv)) { MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied); if (mg) { @@ -6161,7 +6020,7 @@ PP(pp_boolkeys) } } - XPUSHs(boolSV(HvKEYS(hv) != 0)); + XPUSHs(boolSV(HvUSEDKEYS(hv) != 0)); RETURN; }