X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/347f3823c84af87f209fefadfdec9cbe013b7f19..6fa2dc60899234df7bde728354b58afa2779105e:/doop.c diff --git a/doop.c b/doop.c index faf74c5..dbf26d6 100644 --- a/doop.c +++ b/doop.c @@ -30,7 +30,6 @@ STATIC I32 S_do_trans_simple(pTHX_ SV * const sv) { - dVAR; I32 matches = 0; STRLEN len; U8 *s = (U8*)SvPV_nomg(sv,len); @@ -99,7 +98,6 @@ S_do_trans_simple(pTHX_ SV * const sv) STATIC I32 S_do_trans_count(pTHX_ SV * const sv) { - dVAR; STRLEN len; const U8 *s = (const U8*)SvPV_nomg_const(sv, len); const U8 * const send = s + len; @@ -137,7 +135,6 @@ S_do_trans_count(pTHX_ SV * const sv) STATIC I32 S_do_trans_complex(pTHX_ SV * const sv) { - dVAR; STRLEN len; U8 *s = (U8*)SvPV_nomg(sv, len); U8 * const send = s+len; @@ -301,7 +298,6 @@ S_do_trans_complex(pTHX_ SV * const sv) STATIC I32 S_do_trans_simple_utf8(pTHX_ SV * const sv) { - dVAR; U8 *s; U8 *send; U8 *d; @@ -331,7 +327,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) const U8 * const e = s + len; while (t < e) { const U8 ch = *t++; - hibit = !NATIVE_IS_INVARIANT(ch); + hibit = !NATIVE_BYTE_IS_INVARIANT(ch); if (hibit) { s = bytes_to_utf8(s, &len); break; @@ -361,7 +357,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) if (uv < none) { s += UTF8SKIP(s); matches++; - d = uvuni_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); } else if (uv == none) { const int i = UTF8SKIP(s); @@ -372,7 +368,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) else if (uv == extra) { s += UTF8SKIP(s); matches++; - d = uvuni_to_utf8(d, final); + d = uvchr_to_utf8(d, final); } else s += UTF8SKIP(s); @@ -406,7 +402,6 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) STATIC I32 S_do_trans_count_utf8(pTHX_ SV * const sv) { - dVAR; const U8 *s; const U8 *start = NULL; const U8 *send; @@ -432,7 +427,7 @@ S_do_trans_count_utf8(pTHX_ SV * const sv) const U8 * const e = s + len; while (t < e) { const U8 ch = *t++; - hibit = !NATIVE_IS_INVARIANT(ch); + hibit = !NATIVE_BYTE_IS_INVARIANT(ch); if (hibit) { start = s = bytes_to_utf8(s, &len); break; @@ -456,7 +451,6 @@ S_do_trans_count_utf8(pTHX_ SV * const sv) STATIC I32 S_do_trans_complex_utf8(pTHX_ SV * const sv) { - dVAR; U8 *start, *send; U8 *d; I32 matches = 0; @@ -487,7 +481,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) const U8 * const e = s + len; while (t < e) { const U8 ch = *t++; - hibit = !NATIVE_IS_INVARIANT(ch); + hibit = !NATIVE_BYTE_IS_INVARIANT(ch); if (hibit) { s = bytes_to_utf8(s, &len); break; @@ -532,7 +526,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) matches++; s += UTF8SKIP(s); if (uv != puv) { - d = uvuni_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); puv = uv; } continue; @@ -550,13 +544,13 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) if (havefinal) { s += UTF8SKIP(s); if (puv != final) { - d = uvuni_to_utf8(d, final); + d = uvchr_to_utf8(d, final); puv = final; } } else { STRLEN len; - uv = utf8n_to_uvuni(s, send - s, &len, UTF8_ALLOW_DEFAULT); + uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT); if (uv != puv) { Move(s, d, len, U8); d += len; @@ -585,7 +579,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) if (uv < none) { matches++; s += UTF8SKIP(s); - d = uvuni_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); continue; } else if (uv == none) { /* "none" is unmapped character */ @@ -598,7 +592,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) else if (uv == extra && !del) { matches++; s += UTF8SKIP(s); - d = uvuni_to_utf8(d, final); + d = uvchr_to_utf8(d, final); continue; } matches++; /* "none+1" is delete character */ @@ -624,21 +618,19 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) I32 Perl_do_trans(pTHX_ SV *sv) { - dVAR; STRLEN len; - const I32 hasutf = (PL_op->op_private & - (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); + const I32 flags = PL_op->op_private; + const I32 hasutf = flags & (OPpTRANS_FROM_UTF | OPpTRANS_TO_UTF); PERL_ARGS_ASSERT_DO_TRANS; - if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) { - if (!SvIsCOW(sv)) - Perl_croak_no_modify(); + if (SvREADONLY(sv) && !(flags & OPpTRANS_IDENTICAL)) { + Perl_croak_no_modify(); } (void)SvPV_const(sv, len); if (!len) return 0; - if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) { + if (!(flags & OPpTRANS_IDENTICAL)) { if (!SvPOKp(sv) || SvTHINKFIRST(sv)) (void)SvPV_force_nomg(sv, len); (void)SvPOK_only_UTF8(sv); @@ -646,44 +638,29 @@ Perl_do_trans(pTHX_ SV *sv) DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); - switch (PL_op->op_private & ~hasutf & ( - OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL| - OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { - case 0: - if (hasutf) - return do_trans_simple_utf8(sv); - else - return do_trans_simple(sv); - - case OPpTRANS_IDENTICAL: - case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT: - if (hasutf) - return do_trans_count_utf8(sv); - else - return do_trans_count(sv); - - default: - if (hasutf) - return do_trans_complex_utf8(sv); - else - return do_trans_complex(sv); + /* If we use only OPpTRANS_IDENTICAL to bypass the READONLY check, + * we must also rely on it to choose the readonly strategy. + */ + if (flags & OPpTRANS_IDENTICAL) { + return hasutf ? do_trans_count_utf8(sv) : do_trans_count(sv); + } else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { + return hasutf ? do_trans_complex_utf8(sv) : do_trans_complex(sv); + } else { + return hasutf ? do_trans_simple_utf8(sv) : do_trans_simple(sv); } } void Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) { - dVAR; SV ** const oldmark = mark; I32 items = sp - mark; STRLEN len; STRLEN delimlen; + const char * const delims = SvPV_const(delim, delimlen); PERL_ARGS_ASSERT_DO_JOIN; - (void) SvPV_const(delim, delimlen); /* stringify and get the delimlen */ - /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */ - mark++; len = (items > 0 ? (delimlen * (items - 1) ) : 0); SvUPGRADE(sv, SVt_PV); @@ -717,14 +694,24 @@ Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) } if (delimlen) { + const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES; for (; items > 0; items--,mark++) { - sv_catsv_nomg(sv,delim); - sv_catsv(sv,*mark); + STRLEN len; + const char *s; + sv_catpvn_flags(sv,delims,delimlen,delimflag); + s = SvPV_const(*mark,len); + sv_catpvn_flags(sv,s,len, + DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); } } else { for (; items > 0; items--,mark++) - sv_catsv(sv,*mark); + { + STRLEN len; + const char *s = SvPV_const(*mark,len); + sv_catpvn_flags(sv,s,len, + DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); + } } SvSETMAGIC(sv); } @@ -732,7 +719,6 @@ Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) void Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) { - dVAR; STRLEN patlen; const char * const pat = SvPV_const(*sarg, patlen); bool do_taint = FALSE; @@ -760,15 +746,15 @@ Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) UV Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) { - dVAR; STRLEN srclen, len, uoffset, bitoffs = 0; - const unsigned char *s = (const unsigned char *) SvPV_flags_const(sv, srclen, - SV_GMAGIC | ((PL_op->op_flags & OPf_MOD || LVRET) - ? SV_UNDEF_RETURNS_NULL : 0)); + const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET) + ? SV_UNDEF_RETURNS_NULL : 0); + unsigned char *s = (unsigned char *) + SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC)); UV retnum = 0; if (!s) { - s = (const unsigned char *)""; + s = (unsigned char *)""; } PERL_ARGS_ASSERT_DO_VECGET; @@ -778,8 +764,11 @@ Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ Perl_croak(aTHX_ "Illegal number of bits in vec"); - if (SvUTF8(sv)) + if (SvUTF8(sv)) { (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); + /* PVX may have changed */ + s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags); + } if (size < 8) { bitoffs = ((offset%8)*size)%8; @@ -847,7 +836,7 @@ Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) ((UV) s[uoffset + 1] << 48) + ((UV) s[uoffset + 2] << 40) + ((UV) s[uoffset + 3] << 32) + - ( s[uoffset + 4] << 24); + ((UV) s[uoffset + 4] << 24); else if (uoffset + 6 >= srclen) retnum = ((UV) s[uoffset ] << 56) + @@ -864,7 +853,7 @@ Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) ((UV) s[uoffset + 3] << 32) + ((UV) s[uoffset + 4] << 24) + ((UV) s[uoffset + 5] << 16) + - ( s[uoffset + 6] << 8); + ((UV) s[uoffset + 6] << 8); } #endif } @@ -911,7 +900,6 @@ Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) void Perl_do_vecset(pTHX_ SV *sv) { - dVAR; SSize_t offset, bitoffs = 0; int size; unsigned char *s; @@ -998,7 +986,6 @@ Perl_do_vecset(pTHX_ SV *sv) void Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) { - dVAR; #ifdef LIBERAL long *dl; long *ll; @@ -1015,6 +1002,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) const char *rsave; bool left_utf; bool right_utf; + bool do_warn_above_ff = ckWARN_d(WARN_DEPRECATED); STRLEN needlen = 0; PERL_ARGS_ASSERT_DO_VOP; @@ -1030,7 +1018,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) } rsave = rc = SvPV_nomg_const(right, rightlen); - /* This need to come after SvPV to ensure that string overloading has + /* This needs to come after SvPV to ensure that string overloading has fired off. */ left_utf = DO_UTF8(left); @@ -1095,6 +1083,12 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) rulen -= ulen; duc = luc & ruc; dc = (char*)uvchr_to_utf8((U8*)dc, duc); + if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[optype]); + /* Warn only once per operation */ + do_warn_above_ff = FALSE; + } } if (sv == left || sv == right) (void)sv_usepvn(sv, dcorig, needlen); @@ -1110,6 +1104,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) rulen -= ulen; duc = luc ^ ruc; dc = (char*)uvchr_to_utf8((U8*)dc, duc); + if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[optype]); + do_warn_above_ff = FALSE; + } } goto mop_up_utf; case OP_BIT_OR: @@ -1122,6 +1121,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) rulen -= ulen; duc = luc | ruc; dc = (char*)uvchr_to_utf8((U8*)dc, duc); + if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + deprecated_above_ff_msg, PL_op_desc[optype]); + do_warn_above_ff = FALSE; + } } mop_up_utf: if (rulen) @@ -1220,22 +1224,25 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) break; } } -finish: + finish: SvTAINT(sv); } + +/* used for: pp_keys(), pp_values() */ + OP * Perl_do_kv(pTHX) { - dVAR; dSP; HV * const keys = MUTABLE_HV(POPs); HE *entry; + SSize_t extend_size; const I32 gimme = GIMME_V; const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV); /* op_type is OP_RKEYS/OP_RVALUES if pp_rkeys delegated to here */ - const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS || PL_op->op_type == OP_RKEYS); - const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES || PL_op->op_type == OP_RVALUES); + const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS); + const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES); (void)hv_iterinit(keys); /* always reset iterator regardless */ @@ -1266,37 +1273,28 @@ Perl_do_kv(pTHX) RETURN; } - EXTEND(SP, HvUSEDKEYS(keys) * (dokeys + dovalues)); + /* 2*HvUSEDKEYS() should never be big enough to truncate or wrap */ + assert(HvUSEDKEYS(keys) <= (SSize_t_MAX >> 1)); + extend_size = (SSize_t)HvUSEDKEYS(keys) * (dokeys + dovalues); + EXTEND(SP, extend_size); - PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */ while ((entry = hv_iternext(keys))) { - SPAGAIN; if (dokeys) { SV* const sv = hv_iterkeysv(entry); - XPUSHs(sv); /* won't clobber stack_sp */ + XPUSHs(sv); } if (dovalues) { - SV *tmpstr; - PUTBACK; - tmpstr = hv_iterval(keys,entry); + SV *tmpstr = hv_iterval(keys,entry); DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu", (unsigned long)HeHASH(entry), (int)HvMAX(keys)+1, (unsigned long)(HeHASH(entry) & HvMAX(keys)))); - SPAGAIN; XPUSHs(tmpstr); } - PUTBACK; } - return NORMAL; + RETURN; } /* - * Local variables: - * c-indentation-style: bsd - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * * ex: set ts=8 sts=4 sw=4 et: */